mirror of
https://github.com/opencv/opencv_contrib.git
synced 2025-10-20 04:25:42 +08:00
[moved from opencv] CUDA 10.1 Build Issue Fix
original commit: 5a2faab2e6
This commit is contained in:
@@ -157,43 +157,6 @@ CV_CUDEV_SHFL_VEC_INST(double)
|
|||||||
|
|
||||||
// shfl_up
|
// shfl_up
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
__device__ __forceinline__ T compatible_shfl_up(T val, uint delta, int width = warpSize)
|
|
||||||
{
|
|
||||||
#if __CUDACC_VER_MAJOR__ < 9
|
|
||||||
|
|
||||||
return shfl_up(val, delta, width);
|
|
||||||
|
|
||||||
#else // __CUDACC_VER_MAJOR__ < 9
|
|
||||||
|
|
||||||
#if CV_CUDEV_ARCH >= 700
|
|
||||||
return shfl_up_sync(0xFFFFFFFFU, val, delta, width);
|
|
||||||
#else
|
|
||||||
const int block_size = Block::blockSize();
|
|
||||||
const int residual = block_size & (warpSize - 1);
|
|
||||||
|
|
||||||
if (0 == residual)
|
|
||||||
return shfl_up_sync(0xFFFFFFFFU, val, delta, width);
|
|
||||||
else
|
|
||||||
{
|
|
||||||
const int n_warps = divUp(block_size, warpSize);
|
|
||||||
const int warp_id = Warp::warpId();
|
|
||||||
|
|
||||||
if (warp_id < n_warps - 1)
|
|
||||||
return shfl_up_sync(0xFFFFFFFFU, val, delta, width);
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// We are at the last threads of a block whose number of threads
|
|
||||||
// is not a multiple of the warp size
|
|
||||||
uint mask = (1LU << residual) - 1;
|
|
||||||
return shfl_up_sync(mask, val, delta, width);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif // __CUDACC_VER_MAJOR__ < 9
|
|
||||||
}
|
|
||||||
|
|
||||||
#if __CUDACC_VER_MAJOR__ >= 9
|
#if __CUDACC_VER_MAJOR__ >= 9
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
@@ -300,6 +263,43 @@ CV_CUDEV_SHFL_UP_VEC_INST(double)
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
__device__ __forceinline__ T compatible_shfl_up(T val, uint delta, int width = warpSize)
|
||||||
|
{
|
||||||
|
#if __CUDACC_VER_MAJOR__ < 9
|
||||||
|
|
||||||
|
return shfl_up(val, delta, width);
|
||||||
|
|
||||||
|
#else // __CUDACC_VER_MAJOR__ < 9
|
||||||
|
|
||||||
|
#if CV_CUDEV_ARCH >= 700
|
||||||
|
return shfl_up_sync(0xFFFFFFFFU, val, delta, width);
|
||||||
|
#else
|
||||||
|
const int block_size = Block::blockSize();
|
||||||
|
const int residual = block_size & (warpSize - 1);
|
||||||
|
|
||||||
|
if (0 == residual)
|
||||||
|
return shfl_up_sync(0xFFFFFFFFU, val, delta, width);
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const int n_warps = divUp(block_size, warpSize);
|
||||||
|
const int warp_id = Warp::warpId();
|
||||||
|
|
||||||
|
if (warp_id < n_warps - 1)
|
||||||
|
return shfl_up_sync(0xFFFFFFFFU, val, delta, width);
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// We are at the last threads of a block whose number of threads
|
||||||
|
// is not a multiple of the warp size
|
||||||
|
uint mask = (1LU << residual) - 1;
|
||||||
|
return shfl_up_sync(mask, val, delta, width);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif // __CUDACC_VER_MAJOR__ < 9
|
||||||
|
}
|
||||||
|
|
||||||
// shfl_down
|
// shfl_down
|
||||||
|
|
||||||
__device__ __forceinline__ uchar shfl_down(uchar val, uint delta, int width = warpSize)
|
__device__ __forceinline__ uchar shfl_down(uchar val, uint delta, int width = warpSize)
|
||||||
|
Reference in New Issue
Block a user