mirror of
https://github.com/opencv/opencv_contrib.git
synced 2025-10-18 00:01:17 +08:00
Lazy loading nvcuda.dll
This commit is contained in:
@@ -29,11 +29,14 @@ CV_Error(cv::Error::HeaderIsNull, "Nvidia Optical Flow headers not found. Make s
|
|||||||
|
|
||||||
//macro for dll loading
|
//macro for dll loading
|
||||||
#if defined(_WIN64)
|
#if defined(_WIN64)
|
||||||
#define MODULENAME TEXT("nvofapi64.dll")
|
#define OF_MODULENAME TEXT("nvofapi64.dll")
|
||||||
|
#define CUDA_MODULENAME TEXT("nvcuda.dll")
|
||||||
#elif defined(_WIN32)
|
#elif defined(_WIN32)
|
||||||
#define MODULENAME TEXT("nvofapi.dll")
|
#define OF_MODULENAME TEXT("nvofapi.dll")
|
||||||
|
#define CUDA_MODULENAME TEXT("nvcuda.dll")
|
||||||
#else
|
#else
|
||||||
#define MODULENAME "libnvidia-opticalflow.so.1"
|
#define OF_MODULENAME "libnvidia-opticalflow.so.1"
|
||||||
|
#define CUDA_MODULENAME "libcuda.so"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define NVOF_API_CALL(nvOFAPI) \
|
#define NVOF_API_CALL(nvOFAPI) \
|
||||||
@@ -112,6 +115,114 @@ using namespace cv::cuda;
|
|||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
|
class LoadNvidiaModules
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
typedef int(*PFNCudaCuCtxGetCurrent)(CUcontext*);
|
||||||
|
typedef NV_OF_STATUS(NVOFAPI *PFNNvOFAPICreateInstanceCuda)
|
||||||
|
(uint32_t apiVer, NV_OF_CUDA_API_FUNCTION_LIST* cudaOf);
|
||||||
|
|
||||||
|
PFNCudaCuCtxGetCurrent m_cudaDriverAPIGetCurrentCtx;
|
||||||
|
PFNNvOFAPICreateInstanceCuda m_NvOFAPICreateInstanceCuda;
|
||||||
|
HMODULE m_hOFModule;
|
||||||
|
HMODULE m_hCudaModule;
|
||||||
|
bool m_isFailed;
|
||||||
|
|
||||||
|
LoadNvidiaModules() :
|
||||||
|
m_cudaDriverAPIGetCurrentCtx(NULL),
|
||||||
|
m_NvOFAPICreateInstanceCuda(NULL),
|
||||||
|
m_isFailed(false)
|
||||||
|
{
|
||||||
|
//Loading Cuda Library
|
||||||
|
#if defined(_WIN32) || defined(_WIN64)
|
||||||
|
HMODULE hCudaModule = LoadLibrary(CUDA_MODULENAME);
|
||||||
|
#else
|
||||||
|
void *hCudaModule = dlopen(CUDA_MODULENAME, RTLD_LAZY);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (hCudaModule == NULL)
|
||||||
|
{
|
||||||
|
m_isFailed = true;
|
||||||
|
CV_Error(Error::StsBadFunc, "Cannot find Cuda library.");
|
||||||
|
}
|
||||||
|
m_hCudaModule = hCudaModule;
|
||||||
|
|
||||||
|
#if defined(_WIN32)
|
||||||
|
m_cudaDriverAPIGetCurrentCtx = (PFNCudaCuCtxGetCurrent)GetProcAddress(m_hCudaModule, "cuCtxGetCurrent");
|
||||||
|
#else
|
||||||
|
m_cudaDriverAPIGetCurrentCtx = (PFNCudaCuCtxGetCurrent)dlsym(m_hCudaModule, "cuCtxGetCurrent");
|
||||||
|
#endif
|
||||||
|
if (!m_cudaDriverAPIGetCurrentCtx)
|
||||||
|
{
|
||||||
|
m_isFailed = true;
|
||||||
|
CV_Error(Error::StsBadFunc,
|
||||||
|
"Cannot find Cuda Driver API : cuCtxGetCurrent() entry in Cuda library");
|
||||||
|
}
|
||||||
|
|
||||||
|
//Loading Optical Flow Library
|
||||||
|
#if defined(_WIN32) || defined(_WIN64)
|
||||||
|
HMODULE hOFModule = LoadLibrary(OF_MODULENAME);
|
||||||
|
#else
|
||||||
|
void *hOFModule = dlopen(OF_MODULENAME, RTLD_LAZY);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (hOFModule == NULL)
|
||||||
|
{
|
||||||
|
m_isFailed = true;
|
||||||
|
CV_Error(Error::StsBadFunc, "Cannot find NvOF library.");
|
||||||
|
}
|
||||||
|
m_hOFModule = hOFModule;
|
||||||
|
|
||||||
|
#if defined(_WIN32)
|
||||||
|
m_NvOFAPICreateInstanceCuda = (PFNNvOFAPICreateInstanceCuda)GetProcAddress(m_hOFModule, "NvOFAPICreateInstanceCuda");
|
||||||
|
#else
|
||||||
|
m_NvOFAPICreateInstanceCuda = (PFNNvOFAPICreateInstanceCuda)dlsym(m_hOFModule, "NvOFAPICreateInstanceCuda");
|
||||||
|
#endif
|
||||||
|
if (!m_NvOFAPICreateInstanceCuda)
|
||||||
|
{
|
||||||
|
m_isFailed = true;
|
||||||
|
CV_Error(Error::StsBadFunc,
|
||||||
|
"Cannot find NvOFAPICreateInstanceCuda() entry in NVOF library");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
~LoadNvidiaModules()
|
||||||
|
{
|
||||||
|
if (NULL != m_hCudaModule)
|
||||||
|
{
|
||||||
|
#if defined(_WIN32) || defined(_WIN64)
|
||||||
|
FreeLibrary(m_hCudaModule);
|
||||||
|
#else
|
||||||
|
dlclose(m_hCudaModule);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
if (NULL != m_hOFModule)
|
||||||
|
{
|
||||||
|
#if defined(_WIN32) || defined(_WIN64)
|
||||||
|
FreeLibrary(m_hOFModule);
|
||||||
|
#else
|
||||||
|
dlclose(m_hOFModule);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
m_hCudaModule = NULL;
|
||||||
|
m_hOFModule = NULL;
|
||||||
|
m_cudaDriverAPIGetCurrentCtx = NULL;
|
||||||
|
m_NvOFAPICreateInstanceCuda = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
static LoadNvidiaModules& Init()
|
||||||
|
{
|
||||||
|
static LoadNvidiaModules LoadLibraryObj;
|
||||||
|
if (LoadLibraryObj.m_isFailed)
|
||||||
|
CV_Error(Error::StsError, "Can't initialize LoadNvidiaModules Class Object");
|
||||||
|
return LoadLibraryObj;
|
||||||
|
}
|
||||||
|
|
||||||
|
PFNCudaCuCtxGetCurrent GetCudaLibraryFunctionPtr() { return m_cudaDriverAPIGetCurrentCtx; }
|
||||||
|
PFNNvOFAPICreateInstanceCuda GetOFLibraryFunctionPtr() { return m_NvOFAPICreateInstanceCuda; }
|
||||||
|
};
|
||||||
|
|
||||||
class NvidiaOpticalFlowImpl : public cv::cuda::NvidiaOpticalFlow_1_0
|
class NvidiaOpticalFlowImpl : public cv::cuda::NvidiaOpticalFlow_1_0
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
@@ -169,7 +280,6 @@ private:
|
|||||||
NvOFHandle GetHandle() { return m_hOF; }
|
NvOFHandle GetHandle() { return m_hOF; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
HMODULE m_hModule; //module handle to load nvof dll
|
|
||||||
std::mutex m_lock;
|
std::mutex m_lock;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
@@ -198,6 +308,8 @@ NvidiaOpticalFlowImpl::NvidiaOpticalFlowImpl(
|
|||||||
m_cuContext(nullptr), m_format(NV_OF_BUFFER_FORMAT_GRAYSCALE8),
|
m_cuContext(nullptr), m_format(NV_OF_BUFFER_FORMAT_GRAYSCALE8),
|
||||||
m_gridSize(NV_OF_OUTPUT_VECTOR_GRID_SIZE_4)
|
m_gridSize(NV_OF_OUTPUT_VECTOR_GRID_SIZE_4)
|
||||||
{
|
{
|
||||||
|
LoadNvidiaModules& LoadNvidiaModulesObj = LoadNvidiaModules::Init();
|
||||||
|
|
||||||
int nGpu = 0;
|
int nGpu = 0;
|
||||||
|
|
||||||
cuSafeCall(cudaGetDeviceCount(&nGpu));
|
cuSafeCall(cudaGetDeviceCount(&nGpu));
|
||||||
@@ -208,7 +320,8 @@ NvidiaOpticalFlowImpl::NvidiaOpticalFlowImpl(
|
|||||||
|
|
||||||
cuSafeCall(cudaSetDevice(m_gpuId));
|
cuSafeCall(cudaSetDevice(m_gpuId));
|
||||||
cuSafeCall(cudaFree(m_cuContext));
|
cuSafeCall(cudaFree(m_cuContext));
|
||||||
cuSafeCall(cuCtxGetCurrent(&m_cuContext));
|
|
||||||
|
cuSafeCall(LoadNvidiaModulesObj.GetCudaLibraryFunctionPtr()(&m_cuContext));
|
||||||
|
|
||||||
if (m_gridSize != NV_OF_OUTPUT_VECTOR_GRID_SIZE_4)
|
if (m_gridSize != NV_OF_OUTPUT_VECTOR_GRID_SIZE_4)
|
||||||
{
|
{
|
||||||
@@ -253,38 +366,9 @@ NvidiaOpticalFlowImpl::NvidiaOpticalFlowImpl(
|
|||||||
m_costBufElementSize = sizeof(uint32_t);
|
m_costBufElementSize = sizeof(uint32_t);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(_WIN32) || defined(_WIN64)
|
|
||||||
HMODULE hModule = LoadLibrary(MODULENAME);
|
|
||||||
#else
|
|
||||||
void *hModule = dlopen(MODULENAME, RTLD_LAZY);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (hModule == NULL)
|
|
||||||
{
|
|
||||||
CV_Error(Error::StsBadFunc,
|
|
||||||
"Cannot find NvOF library.");
|
|
||||||
}
|
|
||||||
m_hModule = hModule;
|
|
||||||
|
|
||||||
typedef NV_OF_STATUS(NVOFAPI *PFNNvOFAPICreateInstanceCuda)
|
|
||||||
(uint32_t apiVer, NV_OF_CUDA_API_FUNCTION_LIST* cudaOf);
|
|
||||||
|
|
||||||
#if defined(_WIN32)
|
|
||||||
PFNNvOFAPICreateInstanceCuda NvOFAPICreateInstanceCuda
|
|
||||||
= (PFNNvOFAPICreateInstanceCuda)GetProcAddress(m_hModule, "NvOFAPICreateInstanceCuda");
|
|
||||||
#else
|
|
||||||
PFNNvOFAPICreateInstanceCuda NvOFAPICreateInstanceCuda
|
|
||||||
= (PFNNvOFAPICreateInstanceCuda)dlsym(m_hModule, "NvOFAPICreateInstanceCuda");
|
|
||||||
#endif
|
|
||||||
if (!NvOFAPICreateInstanceCuda)
|
|
||||||
{
|
|
||||||
CV_Error(Error::StsBadFunc,
|
|
||||||
"Cannot find NvOFAPICreateInstanceCuda() entry in NVOF library");
|
|
||||||
}
|
|
||||||
|
|
||||||
m_ofAPI.reset(new NV_OF_CUDA_API_FUNCTION_LIST());
|
m_ofAPI.reset(new NV_OF_CUDA_API_FUNCTION_LIST());
|
||||||
|
|
||||||
NVOF_API_CALL(NvOFAPICreateInstanceCuda(NV_OF_API_VERSION, m_ofAPI.get()));
|
NVOF_API_CALL(LoadNvidiaModulesObj.GetOFLibraryFunctionPtr()(NV_OF_API_VERSION, m_ofAPI.get()));
|
||||||
NVOF_API_CALL(GetAPI()->nvCreateOpticalFlowCuda(m_cuContext, &m_hOF));
|
NVOF_API_CALL(GetAPI()->nvCreateOpticalFlowCuda(m_cuContext, &m_hOF));
|
||||||
|
|
||||||
memset(&m_initParams, 0, sizeof(m_initParams));
|
memset(&m_initParams, 0, sizeof(m_initParams));
|
||||||
@@ -416,9 +500,7 @@ void NvidiaOpticalFlowImpl::calc(InputArray _frame0, InputArray _frame1, InputOu
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
cuSafeCall(cuCtxPushCurrent(m_cuContext));
|
|
||||||
inputStream.waitForCompletion();
|
inputStream.waitForCompletion();
|
||||||
cuSafeCall(cuCtxPopCurrent(&m_cuContext));
|
|
||||||
|
|
||||||
//Execute Call
|
//Execute Call
|
||||||
NV_OF_EXECUTE_INPUT_PARAMS exeInParams;
|
NV_OF_EXECUTE_INPUT_PARAMS exeInParams;
|
||||||
@@ -436,9 +518,7 @@ void NvidiaOpticalFlowImpl::calc(InputArray _frame0, InputArray _frame1, InputOu
|
|||||||
m_hCostBuffer : nullptr;;
|
m_hCostBuffer : nullptr;;
|
||||||
NVOF_API_CALL(GetAPI()->nvOFExecute(GetHandle(), &exeInParams, &exeOutParams));
|
NVOF_API_CALL(GetAPI()->nvOFExecute(GetHandle(), &exeInParams, &exeOutParams));
|
||||||
|
|
||||||
cuSafeCall(cuCtxPushCurrent(m_cuContext));
|
|
||||||
outputStream.waitForCompletion();
|
outputStream.waitForCompletion();
|
||||||
cuSafeCall(cuCtxPopCurrent(&m_cuContext));
|
|
||||||
|
|
||||||
if (_flow.isMat())
|
if (_flow.isMat())
|
||||||
flowXYGpuMat.download(_flow);
|
flowXYGpuMat.download(_flow);
|
||||||
@@ -460,7 +540,7 @@ void NvidiaOpticalFlowImpl::calc(InputArray _frame0, InputArray _frame1, InputOu
|
|||||||
else
|
else
|
||||||
CV_Error(Error::StsBadArg, "Incorrect cost buffer passed. Pass Mat or GpuMat");
|
CV_Error(Error::StsBadArg, "Incorrect cost buffer passed. Pass Mat or GpuMat");
|
||||||
}
|
}
|
||||||
cuSafeCall(cuCtxSynchronize());
|
cuSafeCall(cudaDeviceSynchronize());
|
||||||
}
|
}
|
||||||
|
|
||||||
void NvidiaOpticalFlowImpl::collectGarbage()
|
void NvidiaOpticalFlowImpl::collectGarbage()
|
||||||
|
Reference in New Issue
Block a user