Skip to content

Commit f152449

Browse files
authored
[DLStreamer] Improve VA context reuse (open-edge-platform#608)
1 parent 00ee34e commit f152449

File tree

1 file changed

+48
-17
lines changed

1 file changed

+48
-17
lines changed

libraries/dl-streamer/src/monolithic/gst/inference_elements/base/inference_impl.cpp

Lines changed: 48 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,8 @@ using namespace InferenceBackend;
5050

5151
namespace {
5252

53-
const int DEFAULT_GPU_DRM_ID = 128; // -> /dev/dri/renderD128
53+
const int DEFAULT_GPU_DRM_ID = 128; // -> /dev/dri/renderD128
54+
const int MAX_STREAMS_SHARING_VADISPLAY = 4; // Maximum number of streams sharing the same VADisplay context
5455

5556
inline std::shared_ptr<Allocator> CreateAllocator(const char *const allocator_name) {
5657
std::shared_ptr<Allocator> allocator;
@@ -276,6 +277,14 @@ GetPreferredImagePreproc(CapsFeature caps, const std::vector<ModelInputProcessor
276277
case VA_SURFACE_CAPS_FEATURE:
277278
case VA_MEMORY_CAPS_FEATURE:
278279
result = ImagePreprocessorType::VAAPI_SYSTEM;
280+
281+
// VA context may come from other pipeline elements ensure using correct preprocessor type
282+
if (device.find("CPU") != std::string::npos) {
283+
GVA_WARNING(
284+
"Using VAAPI preprocessor with CPU device is not recommended, forcing using OpenCV preprocessor");
285+
result = ImagePreprocessorType::IE;
286+
}
287+
279288
break;
280289
case DMA_BUF_CAPS_FEATURE:
281290
#ifdef ENABLE_VPUX
@@ -429,7 +438,7 @@ void UpdateConfigWithLayerInfo(const std::vector<ModelInputProcessorInfo::Ptr> &
429438
if (gst_structure_get_int(it->params, "reverse_input_channels", &reverse_channels)) {
430439
config[KEY_BASE][KEY_MODEL_FORMAT] = reverse_channels ? "RGB" : "BGR";
431440
}
432-
441+
433442
const auto color_space = gst_structure_get_string(it->params, "color_space");
434443
if (color_space) {
435444
// Ensure that reverse_input_channels and color_space are not both defined
@@ -577,9 +586,25 @@ int getGPURenderDevId(GvaBaseInference *gva_base_inference) {
577586
return gpuRenderDevId;
578587
}
579588

580-
bool canReuseSharedVADispCtx(GvaBaseInference *gva_base_inference) {
589+
bool canReuseSharedVADispCtx(GvaBaseInference *gva_base_inference, size_t max_streams) {
590+
581591
const std::string device(gva_base_inference->device);
582592

593+
// Check reference count if display is set
594+
if (gva_base_inference->priv->va_display) {
595+
if (device.find("GPU") == device.npos) {
596+
return true; // For CPU/NPU/AUTO device fallback to default control flow and do not create a new/separate
597+
// VADisplay context
598+
}
599+
// This counts all shared_ptr references, not just streams, but is the best available heuristic
600+
auto use_count = gva_base_inference->priv->va_display.use_count();
601+
if (use_count > static_cast<long>(max_streams)) {
602+
GVA_INFO("VADisplay is used by more than %zu streams (use_count=%ld), not reusing.", max_streams,
603+
use_count);
604+
return false;
605+
}
606+
}
607+
583608
if (device.find("GPU.") == device.npos && device.find("GPU") != device.npos) {
584609
// GPU only i.e. all available accelerators
585610
return true;
@@ -597,27 +622,33 @@ bool canReuseSharedVADispCtx(GvaBaseInference *gva_base_inference) {
597622
return false;
598623
}
599624

625+
// Returns a dlstreamer::ContextPtr representing a VA display context.
626+
// The returned shared pointer may either reference a shared VA display (if reuse is possible) or a newly created one.
627+
// The caller is responsible for holding the returned pointer for as long as the VA display context is needed.
628+
// If a shared VA display is reused, its lifetime is managed by all holders of the shared pointer.
600629
dlstreamer::ContextPtr createVaDisplay(GvaBaseInference *gva_base_inference) {
601630
assert(gva_base_inference);
602631

603-
auto display = gva_base_inference->priv->va_display;
604632
const std::string device(gva_base_inference->device);
633+
dlstreamer::ContextPtr display = nullptr;
605634

606-
// Create a new VADisplay context only if the existing one i.e priv->va_display does not match
607-
if (!canReuseSharedVADispCtx(gva_base_inference)) {
608-
if (device.find("GPU.") != device.npos) {
609-
uint32_t rel_dev_index = 0;
610-
rel_dev_index = Utils::getRelativeGpuDeviceIndex(device);
611-
display = vaApiCreateVaDisplay(rel_dev_index);
612-
613-
GVA_INFO("Using new VADisplay (%p) ", static_cast<void *>(display.get()));
614-
return display;
615-
}
616-
}
617-
618-
if (display) {
635+
if ((gva_base_inference->priv->va_display) &&
636+
(canReuseSharedVADispCtx(gva_base_inference, MAX_STREAMS_SHARING_VADISPLAY))) {
637+
// Reuse existing VADisplay context (i.e. priv->va_display) if it fits
638+
display = gva_base_inference->priv->va_display;
619639
GVA_INFO("Using shared VADisplay (%p) from element %s", static_cast<void *>(display.get()),
620640
GST_ELEMENT_NAME(gva_base_inference));
641+
} else {
642+
// Create a new VADisplay context
643+
uint32_t rel_dev_index = Utils::getRelativeGpuDeviceIndex(device);
644+
display = vaApiCreateVaDisplay(rel_dev_index);
645+
GVA_INFO("Using new VADisplay (%p) ", static_cast<void *>(display.get()));
646+
}
647+
648+
if (!display) {
649+
GST_ERROR_OBJECT(GST_ELEMENT(gva_base_inference),
650+
"No shared VADisplay found for device '%s', failed to create or retrieve a VADisplay context.",
651+
device.c_str());
621652
}
622653

623654
return display;

0 commit comments

Comments
 (0)