mirror of
https://github.com/opencv/opencv_contrib.git
synced 2025-10-18 17:24:28 +08:00
fix cuda mem leak and move cuda malloc out of critical path
This commit is contained in:
@@ -87,25 +87,22 @@ namespace cv { namespace cuda { namespace device
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int findCorners_gpu(const cudaTextureObject_t &eigTex, const int &rows, const int &cols, float threshold, PtrStepSzb mask, float2* corners, int max_count, cudaStream_t stream)
|
int findCorners_gpu(const cudaTextureObject_t &eigTex, const int &rows, const int &cols, float threshold, PtrStepSzb mask, float2* corners, int max_count, int* counterPtr, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
int* counter_ptr;
|
cudaSafeCall( cudaMemsetAsync(counterPtr, 0, sizeof(int), stream) );
|
||||||
cudaSafeCall( cudaMalloc(&counter_ptr, sizeof(int)) );
|
|
||||||
|
|
||||||
cudaSafeCall( cudaMemsetAsync(counter_ptr, 0, sizeof(int), stream) );
|
|
||||||
|
|
||||||
dim3 block(16, 16);
|
dim3 block(16, 16);
|
||||||
dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
|
dim3 grid(divUp(cols, block.x), divUp(rows, block.y));
|
||||||
|
|
||||||
if (mask.data)
|
if (mask.data)
|
||||||
findCorners<<<grid, block, 0, stream>>>(threshold, SingleMask(mask), corners, max_count, rows, cols, eigTex, counter_ptr);
|
findCorners<<<grid, block, 0, stream>>>(threshold, SingleMask(mask), corners, max_count, rows, cols, eigTex, counterPtr);
|
||||||
else
|
else
|
||||||
findCorners<<<grid, block, 0, stream>>>(threshold, WithOutMask(), corners, max_count, rows, cols, eigTex, counter_ptr);
|
findCorners<<<grid, block, 0, stream>>>(threshold, WithOutMask(), corners, max_count, rows, cols, eigTex, counterPtr);
|
||||||
|
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
int count;
|
int count;
|
||||||
cudaSafeCall( cudaMemcpyAsync(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost, stream) );
|
cudaSafeCall( cudaMemcpyAsync(&count, counterPtr, sizeof(int), cudaMemcpyDeviceToHost, stream) );
|
||||||
if (stream)
|
if (stream)
|
||||||
cudaSafeCall(cudaStreamSynchronize(stream));
|
cudaSafeCall(cudaStreamSynchronize(stream));
|
||||||
else
|
else
|
||||||
|
@@ -55,7 +55,7 @@ namespace cv { namespace cuda { namespace device
|
|||||||
{
|
{
|
||||||
namespace gfft
|
namespace gfft
|
||||||
{
|
{
|
||||||
int findCorners_gpu(const cudaTextureObject_t &eigTex_, const int &rows, const int &cols, float threshold, PtrStepSzb mask, float2* corners, int max_count, cudaStream_t stream);
|
int findCorners_gpu(const cudaTextureObject_t &eigTex_, const int &rows, const int &cols, float threshold, PtrStepSzb mask, float2* corners, int max_count, int* counterPtr, cudaStream_t stream);
|
||||||
void sortCorners_gpu(const cudaTextureObject_t &eigTex_, float2* corners, int count, cudaStream_t stream);
|
void sortCorners_gpu(const cudaTextureObject_t &eigTex_, float2* corners, int count, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
@@ -67,7 +67,7 @@ namespace
|
|||||||
public:
|
public:
|
||||||
GoodFeaturesToTrackDetector(int srcType, int maxCorners, double qualityLevel, double minDistance,
|
GoodFeaturesToTrackDetector(int srcType, int maxCorners, double qualityLevel, double minDistance,
|
||||||
int blockSize, bool useHarrisDetector, double harrisK);
|
int blockSize, bool useHarrisDetector, double harrisK);
|
||||||
|
~GoodFeaturesToTrackDetector();
|
||||||
void detect(InputArray image, OutputArray corners, InputArray mask, Stream& stream);
|
void detect(InputArray image, OutputArray corners, InputArray mask, Stream& stream);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@@ -82,6 +82,8 @@ namespace
|
|||||||
GpuMat buf_;
|
GpuMat buf_;
|
||||||
GpuMat eig_;
|
GpuMat eig_;
|
||||||
GpuMat tmpCorners_;
|
GpuMat tmpCorners_;
|
||||||
|
|
||||||
|
int* counterPtr_;
|
||||||
};
|
};
|
||||||
|
|
||||||
GoodFeaturesToTrackDetector::GoodFeaturesToTrackDetector(int srcType, int maxCorners, double qualityLevel, double minDistance,
|
GoodFeaturesToTrackDetector::GoodFeaturesToTrackDetector(int srcType, int maxCorners, double qualityLevel, double minDistance,
|
||||||
@@ -93,6 +95,12 @@ namespace
|
|||||||
cornerCriteria_ = useHarrisDetector ?
|
cornerCriteria_ = useHarrisDetector ?
|
||||||
cuda::createHarrisCorner(srcType, blockSize, 3, harrisK) :
|
cuda::createHarrisCorner(srcType, blockSize, 3, harrisK) :
|
||||||
cuda::createMinEigenValCorner(srcType, blockSize, 3);
|
cuda::createMinEigenValCorner(srcType, blockSize, 3);
|
||||||
|
cudaSafeCall(cudaMalloc(&counterPtr_, sizeof(int)));
|
||||||
|
}
|
||||||
|
|
||||||
|
GoodFeaturesToTrackDetector::~GoodFeaturesToTrackDetector()
|
||||||
|
{
|
||||||
|
cudaSafeCall(cudaFree(counterPtr_));
|
||||||
}
|
}
|
||||||
|
|
||||||
void GoodFeaturesToTrackDetector::detect(InputArray _image, OutputArray _corners, InputArray _mask, Stream& stream)
|
void GoodFeaturesToTrackDetector::detect(InputArray _image, OutputArray _corners, InputArray _mask, Stream& stream)
|
||||||
@@ -125,7 +133,7 @@ namespace
|
|||||||
PtrStepSzf eig = eig_;
|
PtrStepSzf eig = eig_;
|
||||||
cv::cuda::device::createTextureObjectPitch2D<float>(&eigTex_, eig, texDesc);
|
cv::cuda::device::createTextureObjectPitch2D<float>(&eigTex_, eig, texDesc);
|
||||||
|
|
||||||
int total = findCorners_gpu(eigTex_, eig_.rows, eig_.cols, static_cast<float>(maxVal * qualityLevel_), mask, tmpCorners_.ptr<float2>(), tmpCorners_.cols, stream_);
|
int total = findCorners_gpu(eigTex_, eig_.rows, eig_.cols, static_cast<float>(maxVal * qualityLevel_), mask, tmpCorners_.ptr<float2>(), tmpCorners_.cols, counterPtr_, stream_);
|
||||||
|
|
||||||
|
|
||||||
if (total == 0)
|
if (total == 0)
|
||||||
|
Reference in New Issue
Block a user