Merge pull request #3999 from incubus-ank:fix/cuda_fast_detectAsync_memory_leak

Add missing cudaFree #3999 I found missing memory cudaFree in cv::cuda::FAST_Impl::detectAsync() I created an issue, but did not receive a response. https://github.com/opencv/opencv_contrib/issues/3994 ### Pull Request Readiness Checklist - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
2025-10-18 00:01:17 +08:00 · 2025-09-12 19:02:37 +03:00
parent 095981ca99
commit b01e6a7581
1 changed files with 7 additions and 4 deletions
--- a/modules/cudafeatures2d/src/fast.cpp
+++ b/modules/cudafeatures2d/src/fast.cpp
@@ -66,6 +66,7 @@ namespace
    {
    public:
        FAST_Impl(int threshold, bool nonmaxSuppression, int max_npoints);
+        ~FAST_Impl();

        virtual void detect(InputArray _image, std::vector<KeyPoint>& keypoints, InputArray _mask);
        virtual void detectAsync(InputArray _image, OutputArray _keypoints, InputArray _mask, Stream& stream);
@@ -95,6 +96,12 @@ namespace
    FAST_Impl::FAST_Impl(int threshold, bool nonmaxSuppression, int max_npoints) :
        threshold_(threshold), nonmaxSuppression_(nonmaxSuppression), max_npoints_(max_npoints)
    {
+        cudaSafeCall( cudaMalloc(&d_counter, sizeof(unsigned int)) );
+    }
+
+    FAST_Impl::~FAST_Impl()
+    {
+        cudaSafeCall( cudaFree(d_counter) );
    }

    void FAST_Impl::detect(InputArray _image, std::vector<KeyPoint>& keypoints, InputArray _mask)
@@ -116,8 +123,6 @@ namespace
    {
        using namespace cv::cuda::device::fast;

-        cudaSafeCall( cudaMalloc(&d_counter, sizeof(unsigned int)) );
-
        const GpuMat img = _image.getGpuMat();
        const GpuMat mask = _mask.getGpuMat();

@@ -165,8 +170,6 @@ namespace
            kpLoc.colRange(0, count).copyTo(locRow, stream);
            keypoints.row(1).setTo(Scalar::all(0), stream);
        }
-
-        cudaSafeCall( cudaFree(d_counter) );
    }

    void FAST_Impl::convert(InputArray _gpu_keypoints, std::vector<KeyPoint>& keypoints)