Skip to content

Add zero-copy NVMM support with nvbufsurface (JetPack 5) #204

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
8 changes: 5 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ if(hasParent)
message("-- jetson-utils: building as submodule, ${hasParent}")
else()
message("-- jetson-utils: building as standalone")

# standalone project
project(jetson-utils)

Expand Down Expand Up @@ -70,7 +69,7 @@ else()
endif()

# option for enabling/disabling NVMM memory in multimedia stack
find_library(NVBUF_UTILS NAMES nvbuf_utils PATHS /usr/lib/aarch64-linux-gnu/tegra)
find_library(NVBUF_UTILS NAMES nvbuf_utils nvbufsurface PATHS /usr/lib/aarch64-linux-gnu/tegra)
message("-- nvbuf_utils: ${NVBUF_UTILS}")

if(NVBUF_UTILS)
Expand Down Expand Up @@ -103,7 +102,10 @@ cuda_add_library(jetson-utils SHARED ${jetsonUtilitySources})
target_link_libraries(jetson-utils GL GLU GLEW gstreamer-1.0 gstapp-1.0 gstpbutils-1.0 gstwebrtc-1.0 gstsdp-1.0 gstrtspserver-1.0 json-glib-1.0 soup-2.4 ${CUDA_nppicc_LIBRARY})

if(NVBUF_UTILS)
target_link_libraries(jetson-utils nvbuf_utils)
target_link_libraries(jetson-utils ${NVBUF_UTILS})
if(CUDA_VERSION_MAJOR GREATER 10)
target_link_libraries(jetson-utils nvbufsurftransform)
endif()
endif()

# transfer all headers to the include directory
Expand Down
25 changes: 20 additions & 5 deletions camera/gstCamera.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,17 +168,32 @@ bool gstCamera::buildLaunchStr()
ss << "appsink name=mysink";
}
else
{
ss << "v4l2src device=" << mOptions.resource.location << " do-timestamp=true ! ";

{
if( mOptions.codec != videoOptions::CODEC_UNKNOWN )
{
{
std::string camerasrc;
#if defined(__x86_64__) || defined(__amd64__)
camerasrc = "v4l2src";
ss << "v4l2src device=" << mOptions.resource.location << " do-timestamp=true ! ";
#else
if( mOptions.codec == videoOptions::CODEC_RAW && enable_nvmm )
camerasrc = "nvv4l2camerasrc";
else
camerasrc = "v4l2src";
#endif
ss << camerasrc << " device=" << mOptions.resource.location << " do-timestamp=true ! ";

ss << gst_codec_to_string(mOptions.codec) << ", ";

if( mOptions.codec == videoOptions::CODEC_RAW )
ss << "format=(string)" << gst_format_to_string(mFormatYUV) << ", ";

ss << "width=(int)" << GetWidth() << ", height=(int)" << GetHeight() << ", framerate=" << (int)mOptions.frameRate << "/1 ! ";

}
else
{
ss << "v4l2src device=" << mOptions.resource.location << " do-timestamp=true ! ";
}

//ss << "queue max-size-buffers=16 ! ";
Expand Down Expand Up @@ -243,7 +258,7 @@ bool gstCamera::buildLaunchStr()
// V4L2 decoders can only output NVMM memory, if we aren't using NVMM have nvvidconv convert it
if( mOptions.flipMethod != videoOptions::FLIP_NONE || (mOptions.codecType == videoOptions::CODEC_V4L2 && !enable_nvmm) )
{
if( (enable_nvmm && mOptions.codecType != videoOptions::CODEC_CPU) || mOptions.codecType == videoOptions::CODEC_V4L2 )
if( enable_nvmm || mOptions.codecType == videoOptions::CODEC_V4L2 )
ss << "nvvidconv flip-method=" << mOptions.flipMethod << " ! " << (enable_nvmm ? "video/x-raw(memory:NVMM) ! " : "video/x-raw ! ");
else
ss << "videoflip method=" << videoOptions::FlipMethodToStr(mOptions.flipMethod) << " ! "; // the videoflip enum varies slightly, but the strings are the same
Expand Down
66 changes: 48 additions & 18 deletions codec/gstBufferManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@

#if NV_TENSORRT_MAJOR > 8 || (NV_TENSORRT_MAJOR == 8 && NV_TENSORRT_MINOR >= 4)
#include <nvbufsurface.h> // JetPack 5
#include <nvbufsurftransform.h>
#endif
#endif

Expand Down Expand Up @@ -61,7 +62,7 @@ gstBufferManager::gstBufferManager( videoOptions* options )
// destructor
gstBufferManager::~gstBufferManager()
{

NvBufSurfaceUnMap(mSurfConv, -1, -1);
}


Expand Down Expand Up @@ -174,32 +175,59 @@ bool gstBufferManager::Enqueue( GstBuffer* gstBuffer, GstCaps* gstCaps )
LogError(LOG_GSTREAMER "gstBufferManager -- failed to get FD from NVMM memory\n");
return false;
}
#endif

NvBufferParams nvmmParams;

if( NvBufferGetParams(nvmmFD, &nvmmParams) != 0 )
{
LogError(LOG_GSTREAMER "gstBufferManager -- failed to get NVMM buffer params\n");
return false;
}

#endif
#ifdef DEBUG
LogVerbose(LOG_GSTREAMER "gstBufferManager -- NVMM buffer payload type: %s\n", nvmmParams.payloadType == NvBufferPayload_MemHandle ? "MemHandle" : "SurfArray");
LogVerbose(LOG_GSTREAMER "gstBufferManager -- NVMM buffer planes: %u format=%u\n", nvmmParams.num_planes, (uint32_t)nvmmParams.pixel_format);

for( uint32_t n=0; n < nvmmParams.num_planes; n++ )
LogVerbose(LOG_GSTREAMER "gstBufferManager -- NVMM buffer plane %u: %ux%u\n", n, nvmmParams.width[n], nvmmParams.height[n]);
#endif

#if NV_TENSORRT_MAJOR > 8 || (NV_TENSORRT_MAJOR == 8 && NV_TENSORRT_MINOR >= 4)

EGLImageKHR eglImage = NULL;
// check if layout matches. if not, transform it
if (surf->surfaceList[0].layout != NVBUF_LAYOUT_BLOCK_LINEAR)
{
if (mSurfConv == NULL)
{
NvBufSurfaceCreateParams params;
params.gpuId = 0;
params.width = surf->surfaceList[0].width;
params.height = surf->surfaceList[0].height;
params.size = surf->surfaceList[0].dataSize;
params.colorFormat = surf->surfaceList[0].colorFormat;
params.layout = NVBUF_LAYOUT_BLOCK_LINEAR;
params.memType = NVBUF_MEM_SURFACE_ARRAY;
NvBufSurfaceCreate(&mSurfConv, 1, &params);

}
NvBufSurfTransformParams transformParams;
memset(&transformParams, 0, sizeof(transformParams));
NvBufSurfTransform(surf, mSurfConv, &transformParams);
NvBufSurfaceMapEglImage(mSurfConv, 0);
eglImage = mSurfConv->surfaceList[0].mappedAddr.eglImage;
}
else
{
NvBufSurfaceMapEglImage(surf, 0);
eglImage = surf->surfaceList[0].mappedAddr.eglImage;
}
#else
EGLImageKHR eglImage = NvEGLImageFromFd(NULL, nvmmFD);
#endif
if( !eglImage )
{
LogError(LOG_GSTREAMER "gstBufferManager -- failed to map EGLImage from NVMM buffer\n");
return false;
}

// nvfilter memory comes from nvvidconv, which handles NvReleaseFd() internally
GstMemory* gstMemory = gst_buffer_peek_memory(gstBuffer, 0);

Expand All @@ -213,19 +241,19 @@ bool gstBufferManager::Enqueue( GstBuffer* gstBuffer, GstCaps* gstCaps )

// update latest frame so capture thread can grab it
mNvmmMutex.Lock();


#if NV_TENSORRT_MAJOR < 8 || (NV_TENSORRT_MAJOR == 8 && NV_TENSORRT_MINOR < 4)
if( mNvmmEGL != NULL )
{
{
NvDestroyEGLImage(NULL, mNvmmEGL);

if( mNvmmReleaseFD )
NvReleaseFd(mNvmmFD);
if( mNvmmReleaseFD )
NvReleaseFd(mNvmmFD);
}

#endif

mNvmmFD = nvmmFD;
mNvmmEGL = eglImage;
mNvmmReleaseFD = nvmmReleaseFD;

mNvmmMutex.Unlock();
}
else
Expand Down Expand Up @@ -332,15 +360,16 @@ int gstBufferManager::Dequeue( void** output, imageFormat format, uint64_t timeo
if( CUDA_FAILED(cudaGraphicsResourceGetMappedEglFrame(&eglFrame, eglResource, 0, 0)) )
return -1;

if( eglFrame.planeCount != 2 )
LogWarning(LOG_GSTREAMER "gstBufferManager -- unexpected number of planes in NVMM buffer (%u vs 2 expected)\n", eglFrame.planeCount);
// TODO: disabled for now, as this seems to be the case for nvv4l2camerasrc
// if( eglFrame.planeCount != 2 )
// LogWarning(LOG_GSTREAMER "gstBufferManager -- unexpected number of planes in NVMM buffer (%u vs 2 expected)\n", eglFrame.planeCount);

if( eglFrame.planeDesc[0].width != mOptions->width || eglFrame.planeDesc[0].height != mOptions->height )
{
LogError(LOG_GSTREAMER "gstBufferManager -- NVMM EGLImage dimensions mismatch (%ux%u when expected %ux%u)", eglFrame.planeDesc[0].width, eglFrame.planeDesc[0].height, mOptions->width, mOptions->height);
return -1;
}

// TODO: we could remove the transform in Enqueue if we could handle cudaEglFrameTypePitch here
if( eglFrame.frameType != cudaEglFrameTypeArray ) // cudaEglFrameTypePitch
{
LogError(LOG_GSTREAMER "gstBufferManager -- NVMM had unexpected frame type (was pitched pointer, expected CUDA array)\n");
Expand Down Expand Up @@ -395,10 +424,11 @@ int gstBufferManager::Dequeue( void** output, imageFormat format, uint64_t timeo
latestYUV = mNvmmCUDA;

CUDA(cudaGraphicsUnregisterResource(eglResource));
#if NV_TENSORRT_MAJOR < 8 || (NV_TENSORRT_MAJOR == 8 && NV_TENSORRT_MINOR < 4)
NvDestroyEGLImage(NULL, eglImage);

if( nvmmReleaseFD )
NvReleaseFd(nvmmFD);
#endif
}
#endif

Expand Down
12 changes: 9 additions & 3 deletions codec/gstBufferManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,18 @@
#include "Event.h"
#include "Mutex.h"
#include "RingBuffer.h"

#if NV_TENSORRT_MAJOR > 8 || (NV_TENSORRT_MAJOR == 8 && NV_TENSORRT_MINOR >= 4)
#include <nvbufsurface.h> // JetPack 5
#endif

#ifdef ENABLE_NVMM
#if !GST_CHECK_VERSION(1,0,0)
#undef ENABLE_NVMM // NVMM is only enabled for GStreamer 1.0 and newer
// #undef ENABLE_NVMM // NVMM is only enabled for GStreamer 1.0 and newer
#endif

#include "NvInfer.h"
#if NV_TENSORRT_MAJOR > 8 || (NV_TENSORRT_MAJOR == 8 && NV_TENSORRT_MINOR >= 4)
#undef ENABLE_NVMM // debug NVMM under JetPack 5
// #undef ENABLE_NVMM // debug NVMM under JetPack 5
#endif
#endif

Expand Down Expand Up @@ -116,6 +118,10 @@ class gstBufferManager
void* mNvmmCUDA;
size_t mNvmmSize;
bool mNvmmReleaseFD;
#if NV_TENSORRT_MAJOR > 8 || (NV_TENSORRT_MAJOR == 8 && NV_TENSORRT_MINOR >= 4)
// JetPack 5 nvbufsurface
NvBufSurface *mSurfConv = NULL;
#endif
#endif
};

Expand Down
7 changes: 6 additions & 1 deletion codec/gstUtility.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,12 @@ const char* gst_codec_to_string( videoOptions::Codec codec )
{
switch(codec)
{
case videoOptions::CODEC_RAW: return "video/x-raw";
case videoOptions::CODEC_RAW:
#if defined(ENABLE_NVMM)
return "video/x-raw(memory:NVMM)";
#else
return "video/x-raw";
#endif
case videoOptions::CODEC_H264: return "video/x-h264";
case videoOptions::CODEC_H265: return "video/x-h265";
case videoOptions::CODEC_VP8: return "video/x-vp8";
Expand Down