diff --git a/modules/cudalegacy/src/cuda/NCVBroxOpticalFlow.cu b/modules/cudalegacy/src/cuda/NCVBroxOpticalFlow.cu index a7f83c715..3a527a010 100644 --- a/modules/cudalegacy/src/cuda/NCVBroxOpticalFlow.cu +++ b/modules/cudalegacy/src/cuda/NCVBroxOpticalFlow.cu @@ -876,17 +876,17 @@ NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc, Texture texIyy(kLevelHeight, kLevelWidth, Iyy.ptr(), kPitchTex, true, cudaFilterModeLinear, cudaAddressModeMirror); Texture texIy0(kLevelHeight, kLevelWidth, Iy0.ptr(), kPitchTex, true, cudaFilterModeLinear, cudaAddressModeMirror); Texture texIxy(kLevelHeight, kLevelWidth, Ixy.ptr(), kPitchTex, true, cudaFilterModeLinear, cudaAddressModeMirror); - Texture texDiffX(1, kLevelSizeInBytes / sizeof(float), diffusivity_x.ptr(), kLevelSizeInBytes); - Texture texDiffY(1, kLevelSizeInBytes / sizeof(float), diffusivity_y.ptr(), kLevelSizeInBytes); + Texture texDiffX(kLevelSizeInBytes, diffusivity_x.ptr()); + Texture texDiffY(kLevelSizeInBytes, diffusivity_y.ptr()); // flow - Texture texU(1, kLevelSizeInBytes / sizeof(float), ptrU->ptr(), kLevelSizeInBytes); - Texture texV(1, kLevelSizeInBytes / sizeof(float), ptrV->ptr(), kLevelSizeInBytes); + Texture texU(kLevelSizeInBytes, ptrU->ptr()); + Texture texV(kLevelSizeInBytes, ptrV->ptr()); // flow increments - Texture texDu(1, kLevelSizeInBytes / sizeof(float), du.ptr(), kLevelSizeInBytes); - Texture texDv(1, kLevelSizeInBytes / sizeof(float), dv.ptr(), kLevelSizeInBytes); - Texture texDuNew(1, kLevelSizeInBytes / sizeof(float), du_new.ptr(), kLevelSizeInBytes); - Texture texDvNew(1, kLevelSizeInBytes / sizeof(float), dv_new.ptr(), kLevelSizeInBytes); + Texture texDu(kLevelSizeInBytes, du.ptr()); + Texture texDv(kLevelSizeInBytes, dv.ptr()); + Texture texDuNew(kLevelSizeInBytes, du_new.ptr()); + Texture texDvNew(kLevelSizeInBytes, dv_new.ptr()); dim3 psor_blocks(iDivUp(kLevelWidth, PSOR_TILE_WIDTH), iDivUp(kLevelHeight, PSOR_TILE_HEIGHT)); dim3 psor_threads(PSOR_TILE_WIDTH, PSOR_TILE_HEIGHT); diff --git a/modules/cudawarping/test/test_remap.cpp b/modules/cudawarping/test/test_remap.cpp index ec7586638..b751072bb 100644 --- a/modules/cudawarping/test/test_remap.cpp +++ b/modules/cudawarping/test/test_remap.cpp @@ -173,7 +173,7 @@ CUDA_TEST_P(Remap, Accuracy) INSTANTIATE_TEST_CASE_P(CUDA_Warping, Remap, testing::Combine( ALL_DEVICES, - DIFFERENT_SIZES, + DIFFERENT_SIZES_EXTRA, testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)), testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT), BorderType(cv::BORDER_WRAP)), @@ -198,7 +198,7 @@ CUDA_TEST_P(RemapOutOfScope, Regression_18224) INSTANTIATE_TEST_CASE_P(CUDA_Warping, RemapOutOfScope, testing::Combine( ALL_DEVICES, - DIFFERENT_SIZES, + DIFFERENT_SIZES_EXTRA, testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)), testing::Values(BorderType(cv::BORDER_CONSTANT)), diff --git a/modules/cudev/include/opencv2/cudev/ptr2d/texture.hpp b/modules/cudev/include/opencv2/cudev/ptr2d/texture.hpp index 078373a44..27a26102e 100644 --- a/modules/cudev/include/opencv2/cudev/ptr2d/texture.hpp +++ b/modules/cudev/include/opencv2/cudev/ptr2d/texture.hpp @@ -94,13 +94,14 @@ namespace cv { namespace cudev { __host__ UniqueTexture(const size_t sizeInBytes, T* data, const bool normalizedCoords = false, const cudaTextureFilterMode filterMode = cudaFilterModePoint, const cudaTextureAddressMode addressMode = cudaAddressModeClamp, const cudaTextureReadMode readMode = cudaReadModeElementType) { - create(1, static_cast(sizeInBytes/sizeof(T)), data, sizeInBytes, normalizedCoords, filterMode, addressMode, readMode); + create(sizeInBytes, data, normalizedCoords, filterMode, addressMode, readMode); } __host__ ~UniqueTexture() { if (tex != cudaTextureObject_t()) { try { CV_CUDEV_SAFE_CALL(cudaDestroyTextureObject(tex)); + CV_CUDEV_SAFE_CALL(cudaFree(internalSrc)); } catch (const cv::Exception& ex) { std::ostringstream os; @@ -132,29 +133,9 @@ namespace cv { namespace cudev { __host__ explicit operator bool() const noexcept { return tex != cudaTextureObject_t(); } private: - - template - __host__ void create(const int rows, const int cols, T1* data, const size_t step, const bool normalizedCoords, const cudaTextureFilterMode filterMode, + __host__ void createTextureObject(cudaResourceDesc texRes, const bool normalizedCoords, const cudaTextureFilterMode filterMode, const cudaTextureAddressMode addressMode, const cudaTextureReadMode readMode) { - cudaResourceDesc texRes; - std::memset(&texRes, 0, sizeof(texRes)); - if (rows == 1) { - CV_Assert(rows == 1 && cols*sizeof(T) == step); - texRes.resType = cudaResourceTypeLinear; - texRes.res.linear.devPtr = data; - texRes.res.linear.sizeInBytes = step; - texRes.res.linear.desc = cudaCreateChannelDesc(); - } - else { - texRes.resType = cudaResourceTypePitch2D; - texRes.res.pitch2D.devPtr = data; - texRes.res.pitch2D.height = rows; - texRes.res.pitch2D.width = cols; - texRes.res.pitch2D.pitchInBytes = step; - texRes.res.pitch2D.desc = cudaCreateChannelDesc(); - } - cudaTextureDesc texDescr; std::memset(&texDescr, 0, sizeof(texDescr)); texDescr.normalizedCoords = normalizedCoords; @@ -163,10 +144,53 @@ namespace cv { namespace cudev { texDescr.addressMode[1] = addressMode; texDescr.addressMode[2] = addressMode; texDescr.readMode = readMode; - CV_CUDEV_SAFE_CALL(cudaCreateTextureObject(&tex, &texRes, &texDescr, 0)); } + template + __host__ void create(const size_t sizeInBytes, T1* data, const bool normalizedCoords, const cudaTextureFilterMode filterMode, + const cudaTextureAddressMode addressMode, const cudaTextureReadMode readMode) + { + cudaResourceDesc texRes; + std::memset(&texRes, 0, sizeof(texRes)); + texRes.resType = cudaResourceTypeLinear; + texRes.res.linear.devPtr = data; + texRes.res.linear.sizeInBytes = sizeInBytes; + texRes.res.linear.desc = cudaCreateChannelDesc(); + createTextureObject(texRes, normalizedCoords, filterMode, addressMode, readMode); + } + + __host__ void create(const size_t sizeInBytes, uint64* data, const bool normalizedCoords, const cudaTextureFilterMode filterMode, + const cudaTextureAddressMode addressMode, const cudaTextureReadMode readMode) + { + create(sizeInBytes, (uint2*)data, normalizedCoords, filterMode, addressMode, readMode); + } + + template + __host__ void create(const int rows, const int cols, T1* data, const size_t step, const bool normalizedCoords, const cudaTextureFilterMode filterMode, + const cudaTextureAddressMode addressMode, const cudaTextureReadMode readMode) + { + cudaResourceDesc texRes; + std::memset(&texRes, 0, sizeof(texRes)); + texRes.resType = cudaResourceTypePitch2D; + texRes.res.pitch2D.height = rows; + texRes.res.pitch2D.width = cols; + // temporary fix for single row/columns until TexturePtr is reworked + if (rows == 1 || cols == 1) { + size_t dStep = 0; + CV_CUDEV_SAFE_CALL(cudaMallocPitch(&internalSrc, &dStep, cols * sizeof(T1), rows)); + CV_CUDEV_SAFE_CALL(cudaMemcpy2D(internalSrc, dStep, data, step, cols * sizeof(T1), rows, cudaMemcpyDeviceToDevice)); + texRes.res.pitch2D.devPtr = internalSrc; + texRes.res.pitch2D.pitchInBytes = dStep; + } + else { + texRes.res.pitch2D.devPtr = data; + texRes.res.pitch2D.pitchInBytes = step; + } + texRes.res.pitch2D.desc = cudaCreateChannelDesc(); + createTextureObject(texRes, normalizedCoords, filterMode, addressMode, readMode); + } + __host__ void create(const int rows, const int cols, uint64* data, const size_t step, const bool normalizedCoords, const cudaTextureFilterMode filterMode, const cudaTextureAddressMode addressMode, const cudaTextureReadMode readMode) { @@ -175,6 +199,7 @@ namespace cv { namespace cudev { private: cudaTextureObject_t tex; + T* internalSrc = 0; }; /** @brief sharable smart CUDA texture object