mirror of
https://github.com/opencv/opencv_contrib.git
synced 2025-10-16 05:17:39 +08:00
cudev: fix 1D error introduced in PR 3378
This commit is contained in:
@@ -876,17 +876,17 @@ NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc,
|
|||||||
Texture texIyy(kLevelHeight, kLevelWidth, Iyy.ptr(), kPitchTex, true, cudaFilterModeLinear, cudaAddressModeMirror);
|
Texture texIyy(kLevelHeight, kLevelWidth, Iyy.ptr(), kPitchTex, true, cudaFilterModeLinear, cudaAddressModeMirror);
|
||||||
Texture texIy0(kLevelHeight, kLevelWidth, Iy0.ptr(), kPitchTex, true, cudaFilterModeLinear, cudaAddressModeMirror);
|
Texture texIy0(kLevelHeight, kLevelWidth, Iy0.ptr(), kPitchTex, true, cudaFilterModeLinear, cudaAddressModeMirror);
|
||||||
Texture texIxy(kLevelHeight, kLevelWidth, Ixy.ptr(), kPitchTex, true, cudaFilterModeLinear, cudaAddressModeMirror);
|
Texture texIxy(kLevelHeight, kLevelWidth, Ixy.ptr(), kPitchTex, true, cudaFilterModeLinear, cudaAddressModeMirror);
|
||||||
Texture texDiffX(1, kLevelSizeInBytes / sizeof(float), diffusivity_x.ptr(), kLevelSizeInBytes);
|
Texture texDiffX(kLevelSizeInBytes, diffusivity_x.ptr());
|
||||||
Texture texDiffY(1, kLevelSizeInBytes / sizeof(float), diffusivity_y.ptr(), kLevelSizeInBytes);
|
Texture texDiffY(kLevelSizeInBytes, diffusivity_y.ptr());
|
||||||
|
|
||||||
// flow
|
// flow
|
||||||
Texture texU(1, kLevelSizeInBytes / sizeof(float), ptrU->ptr(), kLevelSizeInBytes);
|
Texture texU(kLevelSizeInBytes, ptrU->ptr());
|
||||||
Texture texV(1, kLevelSizeInBytes / sizeof(float), ptrV->ptr(), kLevelSizeInBytes);
|
Texture texV(kLevelSizeInBytes, ptrV->ptr());
|
||||||
// flow increments
|
// flow increments
|
||||||
Texture texDu(1, kLevelSizeInBytes / sizeof(float), du.ptr(), kLevelSizeInBytes);
|
Texture texDu(kLevelSizeInBytes, du.ptr());
|
||||||
Texture texDv(1, kLevelSizeInBytes / sizeof(float), dv.ptr(), kLevelSizeInBytes);
|
Texture texDv(kLevelSizeInBytes, dv.ptr());
|
||||||
Texture texDuNew(1, kLevelSizeInBytes / sizeof(float), du_new.ptr(), kLevelSizeInBytes);
|
Texture texDuNew(kLevelSizeInBytes, du_new.ptr());
|
||||||
Texture texDvNew(1, kLevelSizeInBytes / sizeof(float), dv_new.ptr(), kLevelSizeInBytes);
|
Texture texDvNew(kLevelSizeInBytes, dv_new.ptr());
|
||||||
|
|
||||||
dim3 psor_blocks(iDivUp(kLevelWidth, PSOR_TILE_WIDTH), iDivUp(kLevelHeight, PSOR_TILE_HEIGHT));
|
dim3 psor_blocks(iDivUp(kLevelWidth, PSOR_TILE_WIDTH), iDivUp(kLevelHeight, PSOR_TILE_HEIGHT));
|
||||||
dim3 psor_threads(PSOR_TILE_WIDTH, PSOR_TILE_HEIGHT);
|
dim3 psor_threads(PSOR_TILE_WIDTH, PSOR_TILE_HEIGHT);
|
||||||
|
@@ -173,7 +173,7 @@ CUDA_TEST_P(Remap, Accuracy)
|
|||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(CUDA_Warping, Remap, testing::Combine(
|
INSTANTIATE_TEST_CASE_P(CUDA_Warping, Remap, testing::Combine(
|
||||||
ALL_DEVICES,
|
ALL_DEVICES,
|
||||||
DIFFERENT_SIZES,
|
DIFFERENT_SIZES_EXTRA,
|
||||||
testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
|
testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
|
||||||
testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
|
testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
|
||||||
testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT), BorderType(cv::BORDER_WRAP)),
|
testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT), BorderType(cv::BORDER_WRAP)),
|
||||||
@@ -198,7 +198,7 @@ CUDA_TEST_P(RemapOutOfScope, Regression_18224)
|
|||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(CUDA_Warping, RemapOutOfScope, testing::Combine(
|
INSTANTIATE_TEST_CASE_P(CUDA_Warping, RemapOutOfScope, testing::Combine(
|
||||||
ALL_DEVICES,
|
ALL_DEVICES,
|
||||||
DIFFERENT_SIZES,
|
DIFFERENT_SIZES_EXTRA,
|
||||||
testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
|
testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
|
||||||
testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)),
|
testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)),
|
||||||
testing::Values(BorderType(cv::BORDER_CONSTANT)),
|
testing::Values(BorderType(cv::BORDER_CONSTANT)),
|
||||||
|
@@ -94,13 +94,14 @@ namespace cv { namespace cudev {
|
|||||||
__host__ UniqueTexture(const size_t sizeInBytes, T* data, const bool normalizedCoords = false, const cudaTextureFilterMode filterMode = cudaFilterModePoint,
|
__host__ UniqueTexture(const size_t sizeInBytes, T* data, const bool normalizedCoords = false, const cudaTextureFilterMode filterMode = cudaFilterModePoint,
|
||||||
const cudaTextureAddressMode addressMode = cudaAddressModeClamp, const cudaTextureReadMode readMode = cudaReadModeElementType)
|
const cudaTextureAddressMode addressMode = cudaAddressModeClamp, const cudaTextureReadMode readMode = cudaReadModeElementType)
|
||||||
{
|
{
|
||||||
create(1, static_cast<int>(sizeInBytes/sizeof(T)), data, sizeInBytes, normalizedCoords, filterMode, addressMode, readMode);
|
create(sizeInBytes, data, normalizedCoords, filterMode, addressMode, readMode);
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__ ~UniqueTexture() {
|
__host__ ~UniqueTexture() {
|
||||||
if (tex != cudaTextureObject_t()) {
|
if (tex != cudaTextureObject_t()) {
|
||||||
try {
|
try {
|
||||||
CV_CUDEV_SAFE_CALL(cudaDestroyTextureObject(tex));
|
CV_CUDEV_SAFE_CALL(cudaDestroyTextureObject(tex));
|
||||||
|
CV_CUDEV_SAFE_CALL(cudaFree(internalSrc));
|
||||||
}
|
}
|
||||||
catch (const cv::Exception& ex) {
|
catch (const cv::Exception& ex) {
|
||||||
std::ostringstream os;
|
std::ostringstream os;
|
||||||
@@ -132,29 +133,9 @@ namespace cv { namespace cudev {
|
|||||||
__host__ explicit operator bool() const noexcept { return tex != cudaTextureObject_t(); }
|
__host__ explicit operator bool() const noexcept { return tex != cudaTextureObject_t(); }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
__host__ void createTextureObject(cudaResourceDesc texRes, const bool normalizedCoords, const cudaTextureFilterMode filterMode,
|
||||||
template <class T1>
|
|
||||||
__host__ void create(const int rows, const int cols, T1* data, const size_t step, const bool normalizedCoords, const cudaTextureFilterMode filterMode,
|
|
||||||
const cudaTextureAddressMode addressMode, const cudaTextureReadMode readMode)
|
const cudaTextureAddressMode addressMode, const cudaTextureReadMode readMode)
|
||||||
{
|
{
|
||||||
cudaResourceDesc texRes;
|
|
||||||
std::memset(&texRes, 0, sizeof(texRes));
|
|
||||||
if (rows == 1) {
|
|
||||||
CV_Assert(rows == 1 && cols*sizeof(T) == step);
|
|
||||||
texRes.resType = cudaResourceTypeLinear;
|
|
||||||
texRes.res.linear.devPtr = data;
|
|
||||||
texRes.res.linear.sizeInBytes = step;
|
|
||||||
texRes.res.linear.desc = cudaCreateChannelDesc<T1>();
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
texRes.resType = cudaResourceTypePitch2D;
|
|
||||||
texRes.res.pitch2D.devPtr = data;
|
|
||||||
texRes.res.pitch2D.height = rows;
|
|
||||||
texRes.res.pitch2D.width = cols;
|
|
||||||
texRes.res.pitch2D.pitchInBytes = step;
|
|
||||||
texRes.res.pitch2D.desc = cudaCreateChannelDesc<T1>();
|
|
||||||
}
|
|
||||||
|
|
||||||
cudaTextureDesc texDescr;
|
cudaTextureDesc texDescr;
|
||||||
std::memset(&texDescr, 0, sizeof(texDescr));
|
std::memset(&texDescr, 0, sizeof(texDescr));
|
||||||
texDescr.normalizedCoords = normalizedCoords;
|
texDescr.normalizedCoords = normalizedCoords;
|
||||||
@@ -163,10 +144,53 @@ namespace cv { namespace cudev {
|
|||||||
texDescr.addressMode[1] = addressMode;
|
texDescr.addressMode[1] = addressMode;
|
||||||
texDescr.addressMode[2] = addressMode;
|
texDescr.addressMode[2] = addressMode;
|
||||||
texDescr.readMode = readMode;
|
texDescr.readMode = readMode;
|
||||||
|
|
||||||
CV_CUDEV_SAFE_CALL(cudaCreateTextureObject(&tex, &texRes, &texDescr, 0));
|
CV_CUDEV_SAFE_CALL(cudaCreateTextureObject(&tex, &texRes, &texDescr, 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class T1>
|
||||||
|
__host__ void create(const size_t sizeInBytes, T1* data, const bool normalizedCoords, const cudaTextureFilterMode filterMode,
|
||||||
|
const cudaTextureAddressMode addressMode, const cudaTextureReadMode readMode)
|
||||||
|
{
|
||||||
|
cudaResourceDesc texRes;
|
||||||
|
std::memset(&texRes, 0, sizeof(texRes));
|
||||||
|
texRes.resType = cudaResourceTypeLinear;
|
||||||
|
texRes.res.linear.devPtr = data;
|
||||||
|
texRes.res.linear.sizeInBytes = sizeInBytes;
|
||||||
|
texRes.res.linear.desc = cudaCreateChannelDesc<T1>();
|
||||||
|
createTextureObject(texRes, normalizedCoords, filterMode, addressMode, readMode);
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ void create(const size_t sizeInBytes, uint64* data, const bool normalizedCoords, const cudaTextureFilterMode filterMode,
|
||||||
|
const cudaTextureAddressMode addressMode, const cudaTextureReadMode readMode)
|
||||||
|
{
|
||||||
|
create<uint2>(sizeInBytes, (uint2*)data, normalizedCoords, filterMode, addressMode, readMode);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T1>
|
||||||
|
__host__ void create(const int rows, const int cols, T1* data, const size_t step, const bool normalizedCoords, const cudaTextureFilterMode filterMode,
|
||||||
|
const cudaTextureAddressMode addressMode, const cudaTextureReadMode readMode)
|
||||||
|
{
|
||||||
|
cudaResourceDesc texRes;
|
||||||
|
std::memset(&texRes, 0, sizeof(texRes));
|
||||||
|
texRes.resType = cudaResourceTypePitch2D;
|
||||||
|
texRes.res.pitch2D.height = rows;
|
||||||
|
texRes.res.pitch2D.width = cols;
|
||||||
|
// temporary fix for single row/columns until TexturePtr is reworked
|
||||||
|
if (rows == 1 || cols == 1) {
|
||||||
|
size_t dStep = 0;
|
||||||
|
CV_CUDEV_SAFE_CALL(cudaMallocPitch(&internalSrc, &dStep, cols * sizeof(T1), rows));
|
||||||
|
CV_CUDEV_SAFE_CALL(cudaMemcpy2D(internalSrc, dStep, data, step, cols * sizeof(T1), rows, cudaMemcpyDeviceToDevice));
|
||||||
|
texRes.res.pitch2D.devPtr = internalSrc;
|
||||||
|
texRes.res.pitch2D.pitchInBytes = dStep;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
texRes.res.pitch2D.devPtr = data;
|
||||||
|
texRes.res.pitch2D.pitchInBytes = step;
|
||||||
|
}
|
||||||
|
texRes.res.pitch2D.desc = cudaCreateChannelDesc<T1>();
|
||||||
|
createTextureObject(texRes, normalizedCoords, filterMode, addressMode, readMode);
|
||||||
|
}
|
||||||
|
|
||||||
__host__ void create(const int rows, const int cols, uint64* data, const size_t step, const bool normalizedCoords, const cudaTextureFilterMode filterMode,
|
__host__ void create(const int rows, const int cols, uint64* data, const size_t step, const bool normalizedCoords, const cudaTextureFilterMode filterMode,
|
||||||
const cudaTextureAddressMode addressMode, const cudaTextureReadMode readMode)
|
const cudaTextureAddressMode addressMode, const cudaTextureReadMode readMode)
|
||||||
{
|
{
|
||||||
@@ -175,6 +199,7 @@ namespace cv { namespace cudev {
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
cudaTextureObject_t tex;
|
cudaTextureObject_t tex;
|
||||||
|
T* internalSrc = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
/** @brief sharable smart CUDA texture object
|
/** @brief sharable smart CUDA texture object
|
||||||
|
Reference in New Issue
Block a user