mirror of
https://github.com/opencv/opencv_contrib.git
synced 2025-10-16 13:57:05 +08:00
Modified Stream support functionality.
This commit is contained in:
@@ -118,7 +118,9 @@ public:
|
||||
@param inputImage Input image.
|
||||
@param referenceImage Reference image of the same size and the same type as input image.
|
||||
@param flow A buffer consisting of inputImage.Size() / getGridSize() flow vectors in CV_16SC2 format.
|
||||
@param stream Stream for the asynchronous version.
|
||||
@param stream It is highly recommended that CUDA streams for pre and post processing of optical flow vectors should be set once per session in create() function as a part of optical flow session creation.
|
||||
This parameter is left here for backward compatibility and may be removed in the future.
|
||||
Default value is NULL stream;
|
||||
@param hint Hint buffer if client provides external hints. Must have same size as flow buffer.
|
||||
Caller can provide flow vectors as hints for optical flow calculation.
|
||||
@param cost Cost buffer contains numbers indicating the confidence associated with each of the generated flow vectors.
|
||||
@@ -435,6 +437,12 @@ public:
|
||||
@param enableExternalHints Optional Parameter. Flag to enable passing external hints buffer to calc(). Defaults to false.
|
||||
@param enableCostBuffer Optional Parameter. Flag to enable cost buffer output from calc(). Defaults to false.
|
||||
@param gpuId Optional parameter to select the GPU ID on which the optical flow should be computed. Useful in multi-GPU systems. Defaults to 0.
|
||||
@param inputStream Optical flow algorithm may optionally involve cuda preprocessing on the input buffers.
|
||||
The input cuda stream can be used to pipeline and synchronize the cuda preprocessing tasks with OF HW engine.
|
||||
If input stream is not set, the execute function will use default stream which is NULL stream;
|
||||
@param outputStream Optical flow algorithm may optionally involve cuda post processing on the output flow vectors.
|
||||
The output cuda stream can be used to pipeline and synchronize the cuda post processing tasks with OF HW engine.
|
||||
If output stream is not set, the execute function will use default stream which is NULL stream;
|
||||
*/
|
||||
CV_WRAP static Ptr<NvidiaOpticalFlow_1_0> create(
|
||||
int width,
|
||||
@@ -444,7 +452,9 @@ public:
|
||||
bool enableTemporalHints = false,
|
||||
bool enableExternalHints = false,
|
||||
bool enableCostBuffer = false,
|
||||
int gpuId = 0);
|
||||
int gpuId = 0,
|
||||
Stream& inputStream = Stream::Null(),
|
||||
Stream& outputStream = Stream::Null());
|
||||
};
|
||||
|
||||
//! @}
|
||||
|
@@ -181,8 +181,8 @@ int main(int argc, const char* argv[])
|
||||
Ptr<cuda::DensePyrLKOpticalFlow> lk = cuda::DensePyrLKOpticalFlow::create(Size(7, 7));
|
||||
Ptr<cuda::FarnebackOpticalFlow> farn = cuda::FarnebackOpticalFlow::create();
|
||||
Ptr<cuda::OpticalFlowDual_TVL1> tvl1 = cuda::OpticalFlowDual_TVL1::create();
|
||||
Ptr<cuda::NvidiaOpticalFlow_1_0> nvof = cuda::NvidiaOpticalFlow_1_0::create(
|
||||
frame0.size().width, frame0.size().height, NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_FAST);
|
||||
Ptr<cuda::NvidiaOpticalFlow_1_0> nvof = cuda::NvidiaOpticalFlow_1_0::create(frame0.size().width, frame0.size().height,
|
||||
NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_FAST, false, false, false, 0, Stream(), Stream());
|
||||
|
||||
{
|
||||
GpuMat d_frame0f;
|
||||
|
@@ -8,11 +8,11 @@
|
||||
|
||||
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
|
||||
|
||||
cv::Ptr<cv::cuda::NvidiaOpticalFlow_1_0> cv::cuda::NvidiaOpticalFlow_1_0::create(int, int, NVIDIA_OF_PERF_LEVEL, bool, bool, bool, int) { throw_no_cuda(); return cv::Ptr<cv::cuda::NvidiaOpticalFlow_1_0>(); }
|
||||
cv::Ptr<cv::cuda::NvidiaOpticalFlow_1_0> cv::cuda::NvidiaOpticalFlow_1_0::create(int, int, NVIDIA_OF_PERF_LEVEL, bool, bool, bool, int, Stream&, Stream&) { throw_no_cuda(); return cv::Ptr<cv::cuda::NvidiaOpticalFlow_1_0>(); }
|
||||
|
||||
#elif !defined HAVE_NVIDIA_OPTFLOW
|
||||
|
||||
cv::Ptr<cv::cuda::NvidiaOpticalFlow_1_0> cv::cuda::NvidiaOpticalFlow_1_0::create(int, int, NVIDIA_OF_PERF_LEVEL, bool, bool, bool, int)
|
||||
cv::Ptr<cv::cuda::NvidiaOpticalFlow_1_0> cv::cuda::NvidiaOpticalFlow_1_0::create(int, int, NVIDIA_OF_PERF_LEVEL, bool, bool, bool, int, Stream&, Stream&)
|
||||
{
|
||||
CV_Error(cv::Error::HeaderIsNull, "OpenCV was build without NVIDIA OpticalFlow support");
|
||||
}
|
||||
@@ -236,6 +236,8 @@ private:
|
||||
bool m_enableExternalHints;
|
||||
bool m_enableCostBuffer;
|
||||
int m_gpuId;
|
||||
Stream m_inputStream;
|
||||
Stream m_outputStream;
|
||||
|
||||
CUcontext m_cuContext;
|
||||
NV_OF_BUFFER_FORMAT m_format;
|
||||
@@ -286,13 +288,14 @@ protected:
|
||||
std::mutex m_lock;
|
||||
|
||||
public:
|
||||
NvidiaOpticalFlowImpl(int width, int height, NV_OF_PERF_LEVEL perfPreset,
|
||||
bool bEnableTemporalHints, bool bEnableExternalHints, bool bEnableCostBuffer, int gpuId);
|
||||
NvidiaOpticalFlowImpl(int width, int height, NV_OF_PERF_LEVEL perfPreset, bool bEnableTemporalHints,
|
||||
bool bEnableExternalHints, bool bEnableCostBuffer, int gpuId, Stream inputStream, Stream outputStream);
|
||||
|
||||
virtual void calc(InputArray inputImage, InputArray referenceImage,
|
||||
InputOutputArray flow, Stream& stream = Stream::Null(),
|
||||
InputArray hint = cv::noArray(), OutputArray cost = cv::noArray());
|
||||
|
||||
|
||||
virtual void collectGarbage();
|
||||
|
||||
virtual void upSampler(InputArray flow, int width, int height,
|
||||
@@ -303,11 +306,13 @@ public:
|
||||
|
||||
NvidiaOpticalFlowImpl::NvidiaOpticalFlowImpl(
|
||||
int width, int height, NV_OF_PERF_LEVEL perfPreset, bool bEnableTemporalHints,
|
||||
bool bEnableExternalHints, bool bEnableCostBuffer, int gpuId) :
|
||||
bool bEnableExternalHints, bool bEnableCostBuffer, int gpuId,
|
||||
Stream inputStream, Stream outputStream) :
|
||||
m_width(width), m_height(height), m_preset(perfPreset),
|
||||
m_enableTemporalHints((NV_OF_BOOL)bEnableTemporalHints),
|
||||
m_enableExternalHints((NV_OF_BOOL)bEnableExternalHints),
|
||||
m_enableCostBuffer((NV_OF_BOOL)bEnableCostBuffer), m_gpuId(gpuId),
|
||||
m_inputStream(inputStream), m_outputStream(outputStream),
|
||||
m_cuContext(nullptr), m_format(NV_OF_BUFFER_FORMAT_GRAYSCALE8),
|
||||
m_gridSize(NV_OF_OUTPUT_VECTOR_GRID_SIZE_4)
|
||||
{
|
||||
@@ -387,6 +392,12 @@ NvidiaOpticalFlowImpl::NvidiaOpticalFlowImpl(
|
||||
|
||||
NVOF_API_CALL(GetAPI()->nvOFInit(GetHandle(), &m_initParams));
|
||||
|
||||
if (m_inputStream || m_outputStream)
|
||||
{
|
||||
NVOF_API_CALL(GetAPI()->nvOFSetIOCudaStreams(GetHandle(),
|
||||
StreamAccessor::getStream(m_inputStream), StreamAccessor::getStream(m_outputStream)));
|
||||
}
|
||||
|
||||
//Input Buffer 1
|
||||
NVOF_API_CALL(GetAPI()->nvOFCreateGPUBufferCuda(GetHandle(),
|
||||
&m_inputBufferDesc, NV_OF_CUDA_BUFFER_TYPE_CUDEVICEPTR, &m_hInputBuffer));
|
||||
@@ -432,13 +443,12 @@ NvidiaOpticalFlowImpl::NvidiaOpticalFlowImpl(
|
||||
void NvidiaOpticalFlowImpl::calc(InputArray _frame0, InputArray _frame1, InputOutputArray _flow,
|
||||
Stream& stream, InputArray hint, OutputArray cost)
|
||||
{
|
||||
Stream inputStream = {};
|
||||
Stream outputStream = {};
|
||||
if (stream)
|
||||
inputStream = stream;
|
||||
|
||||
NVOF_API_CALL(GetAPI()->nvOFSetIOCudaStreams(GetHandle(),
|
||||
StreamAccessor::getStream(inputStream), StreamAccessor::getStream(outputStream)));
|
||||
if (stream && !m_inputStream)
|
||||
{
|
||||
m_inputStream = stream;
|
||||
NVOF_API_CALL(GetAPI()->nvOFSetIOCudaStreams(GetHandle(),
|
||||
StreamAccessor::getStream(m_inputStream), StreamAccessor::getStream(m_outputStream)));
|
||||
}
|
||||
|
||||
GpuMat frame0GpuMat(_frame0.size(), _frame0.type(), (void*)m_frame0cuDevPtr,
|
||||
m_inputBufferStrideInfo.strideInfo[0].strideXInBytes);
|
||||
@@ -452,12 +462,14 @@ void NvidiaOpticalFlowImpl::calc(InputArray _frame0, InputArray _frame1, InputOu
|
||||
if (_frame0.isMat())
|
||||
{
|
||||
//Get Mats from InputArrays
|
||||
frame0GpuMat.upload(_frame0);
|
||||
Mat __frame0 = _frame0.getMat();
|
||||
frame0GpuMat.upload(__frame0, m_inputStream);
|
||||
}
|
||||
else if (_frame0.isGpuMat())
|
||||
{
|
||||
//Get GpuMats from InputArrays
|
||||
_frame0.copyTo(frame0GpuMat);
|
||||
GpuMat __frame0 = _frame0.getGpuMat();
|
||||
__frame0.copyTo(frame0GpuMat, m_inputStream);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -469,12 +481,14 @@ void NvidiaOpticalFlowImpl::calc(InputArray _frame0, InputArray _frame1, InputOu
|
||||
if (_frame1.isMat())
|
||||
{
|
||||
//Get Mats from InputArrays
|
||||
frame1GpuMat.upload(_frame1);
|
||||
Mat __frame1 = _frame1.getMat();
|
||||
frame1GpuMat.upload(__frame1, m_inputStream);
|
||||
}
|
||||
else if (_frame1.isGpuMat())
|
||||
{
|
||||
//Get GpuMats from InputArrays
|
||||
_frame1.copyTo(frame1GpuMat);
|
||||
GpuMat __frame1 = _frame1.getGpuMat();
|
||||
__frame1.copyTo(frame1GpuMat, m_inputStream);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -490,12 +504,14 @@ void NvidiaOpticalFlowImpl::calc(InputArray _frame0, InputArray _frame1, InputOu
|
||||
if (hint.isMat())
|
||||
{
|
||||
//Get Mat from InputArray hint
|
||||
hintGpuMat.upload(hint);
|
||||
Mat _hint = hint.getMat();
|
||||
hintGpuMat.upload(_hint, m_inputStream);
|
||||
}
|
||||
else if(hint.isGpuMat())
|
||||
{
|
||||
//Get GpuMat from InputArray hint
|
||||
hint.copyTo(hintGpuMat);
|
||||
GpuMat _hint = hint.getGpuMat();
|
||||
_hint.copyTo(hintGpuMat, m_inputStream);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -503,8 +519,6 @@ void NvidiaOpticalFlowImpl::calc(InputArray _frame0, InputArray _frame1, InputOu
|
||||
}
|
||||
}
|
||||
|
||||
inputStream.waitForCompletion();
|
||||
|
||||
//Execute Call
|
||||
NV_OF_EXECUTE_INPUT_PARAMS exeInParams;
|
||||
NV_OF_EXECUTE_OUTPUT_PARAMS exeOutParams;
|
||||
@@ -518,15 +532,13 @@ void NvidiaOpticalFlowImpl::calc(InputArray _frame0, InputArray _frame1, InputOu
|
||||
memset(&exeOutParams, 0, sizeof(exeOutParams));
|
||||
exeOutParams.outputBuffer = m_hOutputBuffer;
|
||||
exeOutParams.outputCostBuffer = m_initParams.enableOutputCost == NV_OF_TRUE ?
|
||||
m_hCostBuffer : nullptr;;
|
||||
m_hCostBuffer : nullptr;
|
||||
NVOF_API_CALL(GetAPI()->nvOFExecute(GetHandle(), &exeInParams, &exeOutParams));
|
||||
|
||||
outputStream.waitForCompletion();
|
||||
|
||||
if (_flow.isMat())
|
||||
flowXYGpuMat.download(_flow);
|
||||
flowXYGpuMat.download(_flow, m_outputStream);
|
||||
else if(_flow.isGpuMat())
|
||||
flowXYGpuMat.copyTo(_flow);
|
||||
flowXYGpuMat.copyTo(_flow, m_outputStream);
|
||||
else
|
||||
CV_Error(Error::StsBadArg, "Incorrect flow buffer passed. Pass Mat or GpuMat");
|
||||
|
||||
@@ -537,13 +549,13 @@ void NvidiaOpticalFlowImpl::calc(InputArray _frame0, InputArray _frame1, InputOu
|
||||
m_costBufferStrideInfo.strideInfo[0].strideXInBytes);
|
||||
|
||||
if (cost.isMat())
|
||||
costGpuMat.download(cost);
|
||||
costGpuMat.download(cost, m_outputStream);
|
||||
else if(cost.isGpuMat())
|
||||
costGpuMat.copyTo(cost);
|
||||
costGpuMat.copyTo(cost, m_outputStream);
|
||||
else
|
||||
CV_Error(Error::StsBadArg, "Incorrect cost buffer passed. Pass Mat or GpuMat");
|
||||
}
|
||||
cuSafeCall(cudaDeviceSynchronize());
|
||||
m_outputStream.waitForCompletion();
|
||||
}
|
||||
|
||||
void NvidiaOpticalFlowImpl::collectGarbage()
|
||||
@@ -574,6 +586,14 @@ void NvidiaOpticalFlowImpl::collectGarbage()
|
||||
NVOF_API_CALL(GetAPI()->nvOFDestroyGPUBufferCuda(m_hCostBuffer));
|
||||
}
|
||||
}
|
||||
if (m_inputStream)
|
||||
{
|
||||
m_inputStream.waitForCompletion();
|
||||
}
|
||||
if (m_outputStream)
|
||||
{
|
||||
m_outputStream.waitForCompletion();
|
||||
}
|
||||
if (m_hOF)
|
||||
{
|
||||
NVOF_API_CALL(GetAPI()->nvOFDestroy(m_hOF));
|
||||
@@ -586,7 +606,8 @@ void NvidiaOpticalFlowImpl::upSampler(InputArray _flow, int width, int height,
|
||||
Mat flow;
|
||||
if (_flow.isMat())
|
||||
{
|
||||
_flow.copyTo(flow);
|
||||
Mat __flow = _flow.getMat();
|
||||
__flow.copyTo(flow);
|
||||
}
|
||||
else if (_flow.isGpuMat())
|
||||
{
|
||||
@@ -640,7 +661,8 @@ void NvidiaOpticalFlowImpl::upSampler(InputArray _flow, int width, int height,
|
||||
Ptr<cv::cuda::NvidiaOpticalFlow_1_0> cv::cuda::NvidiaOpticalFlow_1_0::create(
|
||||
int width, int height, NVIDIA_OF_PERF_LEVEL perfPreset,
|
||||
bool bEnableTemporalHints, bool bEnableExternalHints,
|
||||
bool bEnableCostBuffer, int gpuId)
|
||||
bool bEnableCostBuffer, int gpuId,
|
||||
Stream& inputStream, Stream& outputStream)
|
||||
{
|
||||
return makePtr<NvidiaOpticalFlowImpl>(
|
||||
width,
|
||||
@@ -649,6 +671,8 @@ Ptr<cv::cuda::NvidiaOpticalFlow_1_0> cv::cuda::NvidiaOpticalFlow_1_0::create(
|
||||
bEnableTemporalHints,
|
||||
bEnableExternalHints,
|
||||
bEnableCostBuffer,
|
||||
gpuId);
|
||||
gpuId,
|
||||
inputStream,
|
||||
outputStream);
|
||||
}
|
||||
#endif
|
||||
|
Reference in New Issue
Block a user