1
0
mirror of https://github.com/opencv/opencv_contrib.git synced 2025-10-17 07:04:18 +08:00

Merge pull request #3355 from cudawarped:cudacodec_add_resize_crop

Add scaling and cropping options to `cudacodec::VideoReader`
This commit is contained in:
Alexander Smorkalov
2022-09-22 12:06:56 +03:00
committed by GitHub
8 changed files with 107 additions and 12 deletions

View File

@@ -6,7 +6,7 @@ set(the_description "CUDA-accelerated Video Encoding/Decoding")
ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4324 /wd4512 -Wundef -Wshadow)
ocv_add_module(cudacodec opencv_core opencv_videoio OPTIONAL opencv_cudev WRAP python)
ocv_add_module(cudacodec opencv_core opencv_videoio opencv_cudaarithm opencv_cudawarping OPTIONAL opencv_cudev WRAP python)
ocv_module_include_directories()
ocv_glob_module_sources()

View File

@@ -309,6 +309,9 @@ struct CV_EXPORTS_W_SIMPLE FormatInfo
CV_PROP_RW double fps;
CV_PROP_RW int ulNumDecodeSurfaces;//!< Maximum number of internal decode surfaces.
CV_PROP_RW DeinterlaceMode deinterlaceMode;
CV_PROP_RW cv::Size targetSz;//!< Post-processed size of the output frame.
CV_PROP_RW cv::Rect srcRoi;//!< Region of interest decoded from video source.
CV_PROP_RW cv::Rect targetRoi;//!< Region of interest in the output frame containing the decoded frame.
};
/** @brief cv::cudacodec::VideoReader generic properties identifier.
@@ -516,6 +519,10 @@ surfaces it requires for correct functionality and optimal video memory usage bu
overall application. The optimal number of decode surfaces (in terms of performance and memory utilization) should be decided by experimentation for each application,
but it cannot go below the number determined by NVDEC.
@param rawMode Allow the raw encoded data which has been read up until the last call to grab() to be retrieved by calling retrieve(rawData,RAW_DATA_IDX).
@param targetSz Post-processed size (width/height should be multiples of 2) of the output frame, defaults to the size of the encoded video source.
@param srcRoi Region of interest (x/width should be multiples of 4 and y/height multiples of 2) decoded from video source, defaults to the full frame.
@param targetRoi Region of interest (x/width should be multiples of 4 and y/height multiples of 2) within the output frame to copy and resize the decoded frame to,
defaults to the full frame.
*/
struct CV_EXPORTS_W_SIMPLE VideoReaderInitParams {
CV_WRAP VideoReaderInitParams() : udpSource(false), allowFrameDrop(false), minNumDecodeSurfaces(0), rawMode(0) {};
@@ -523,6 +530,9 @@ struct CV_EXPORTS_W_SIMPLE VideoReaderInitParams {
CV_PROP_RW bool allowFrameDrop;
CV_PROP_RW int minNumDecodeSurfaces;
CV_PROP_RW bool rawMode;
CV_PROP_RW cv::Size targetSz;
CV_PROP_RW cv::Rect srcRoi;
CV_PROP_RW cv::Rect targetRoi;
};
/** @brief Creates video reader.

View File

@@ -148,6 +148,14 @@ void cv::cudacodec::detail::VideoDecoder::create(const FormatInfo& videoFormat)
createInfo_.ulTargetHeight = videoFormat.height;
createInfo_.ulMaxWidth = videoFormat.ulMaxWidth;
createInfo_.ulMaxHeight = videoFormat.ulMaxHeight;
createInfo_.display_area.left = videoFormat.displayArea.x;
createInfo_.display_area.right = videoFormat.displayArea.x + videoFormat.displayArea.width;
createInfo_.display_area.top = videoFormat.displayArea.y;
createInfo_.display_area.bottom = videoFormat.displayArea.y + videoFormat.displayArea.height;
createInfo_.target_rect.left = videoFormat.targetRoi.x;
createInfo_.target_rect.right = videoFormat.targetRoi.x + videoFormat.targetRoi.width;
createInfo_.target_rect.top = videoFormat.targetRoi.y;
createInfo_.target_rect.bottom = videoFormat.targetRoi.y + videoFormat.targetRoi.height;
createInfo_.ulNumOutputSurfaces = 2;
createInfo_.ulCreationFlags = videoCreateFlags;
createInfo_.vidLock = lock_;

View File

@@ -49,10 +49,17 @@ namespace cv { namespace cudacodec { namespace detail {
class VideoDecoder
{
public:
VideoDecoder(const Codec& codec, const int minNumDecodeSurfaces, CUcontext ctx, CUvideoctxlock lock) : ctx_(ctx), lock_(lock), decoder_(0)
VideoDecoder(const Codec& codec, const int minNumDecodeSurfaces, cv::Size targetSz, cv::Rect srcRoi, cv::Rect targetRoi, CUcontext ctx, CUvideoctxlock lock) :
ctx_(ctx), lock_(lock), decoder_(0)
{
videoFormat_.codec = codec;
videoFormat_.ulNumDecodeSurfaces = minNumDecodeSurfaces;
// alignment enforced by nvcuvid, likely due to chroma subsampling
videoFormat_.targetSz.width = targetSz.width - targetSz.width % 2; videoFormat_.targetSz.height = targetSz.height - targetSz.height % 2;
videoFormat_.srcRoi.x = srcRoi.x - srcRoi.x % 4; videoFormat_.srcRoi.width = srcRoi.width - srcRoi.width % 4;
videoFormat_.srcRoi.y = srcRoi.y - srcRoi.y % 2; videoFormat_.srcRoi.height = srcRoi.height - srcRoi.height % 2;
videoFormat_.targetRoi.x = targetRoi.x - targetRoi.x % 4; videoFormat_.targetRoi.width = targetRoi.width - targetRoi.width % 4;
videoFormat_.targetRoi.y = targetRoi.y - targetRoi.y % 2; videoFormat_.targetRoi.height = targetRoi.height - targetRoi.height % 2;
}
~VideoDecoder()
@@ -66,6 +73,9 @@ public:
// Get the code-type currently used.
cudaVideoCodec codec() const { return static_cast<cudaVideoCodec>(videoFormat_.codec); }
int nDecodeSurfaces() const { return videoFormat_.ulNumDecodeSurfaces; }
cv::Size getTargetSz() const { return videoFormat_.targetSz; }
cv::Rect getSrcRoi() const { return videoFormat_.srcRoi; }
cv::Rect getTargetRoi() const { return videoFormat_.targetRoi; }
unsigned long frameWidth() const { return videoFormat_.ulWidth; }
unsigned long frameHeight() const { return videoFormat_.ulHeight; }
@@ -89,7 +99,7 @@ public:
cuSafeCall( cuvidMapVideoFrame(decoder_, picIdx, &ptr, &pitch, &videoProcParams) );
return cuda::GpuMat(frameHeight() * 3 / 2, frameWidth(), CV_8UC1, (void*) ptr, pitch);
return cuda::GpuMat(targetHeight() * 3 / 2, targetWidth(), CV_8UC1, (void*) ptr, pitch);
}
void unmapFrame(cuda::GpuMat& frame)

View File

@@ -120,10 +120,19 @@ int CUDAAPI cv::cudacodec::detail::VideoParser::HandleVideoSequence(void* userDa
newFormat.nBitDepthMinus8 = format->bit_depth_luma_minus8;
newFormat.ulWidth = format->coded_width;
newFormat.ulHeight = format->coded_height;
newFormat.width = format->coded_width;
newFormat.height = format->coded_height;
newFormat.displayArea = Rect(Point(format->display_area.left, format->display_area.top), Point(format->display_area.right, format->display_area.bottom));
newFormat.fps = format->frame_rate.numerator / static_cast<float>(format->frame_rate.denominator);
newFormat.targetSz = thiz->videoDecoder_->getTargetSz();
newFormat.width = newFormat.targetSz.width ? newFormat.targetSz.width : format->coded_width;
newFormat.height = newFormat.targetSz.height ? newFormat.targetSz.height : format->coded_height;
newFormat.srcRoi = thiz->videoDecoder_->getSrcRoi();
if (newFormat.srcRoi.empty()) {
format->display_area.right = format->coded_width;
format->display_area.bottom = format->coded_height;
newFormat.displayArea = Rect(Point(format->display_area.left, format->display_area.top), Point(format->display_area.right, format->display_area.bottom));
}
else
newFormat.displayArea = newFormat.srcRoi;
newFormat.targetRoi = thiz->videoDecoder_->getTargetRoi();
newFormat.ulNumDecodeSurfaces = min(!thiz->allowFrameDrop_ ? max(thiz->videoDecoder_->nDecodeSurfaces(), static_cast<int>(format->min_num_decode_surfaces)) :
format->min_num_decode_surfaces * 2, 32);
if (format->progressive_sequence)

View File

@@ -86,7 +86,8 @@ namespace
class VideoReaderImpl : public VideoReader
{
public:
explicit VideoReaderImpl(const Ptr<VideoSource>& source, const int minNumDecodeSurfaces, const bool allowFrameDrop = false , const bool udpSource = false);
explicit VideoReaderImpl(const Ptr<VideoSource>& source, const int minNumDecodeSurfaces, const bool allowFrameDrop = false , const bool udpSource = false,
const Size targetSz = Size(), const Rect srcRoi = Rect(), const Rect targetRoi = Rect());
~VideoReaderImpl();
bool nextFrame(GpuMat& frame, Stream& stream) CV_OVERRIDE;
@@ -131,7 +132,8 @@ namespace
return videoSource_->format();
}
VideoReaderImpl::VideoReaderImpl(const Ptr<VideoSource>& source, const int minNumDecodeSurfaces, const bool allowFrameDrop, const bool udpSource) :
VideoReaderImpl::VideoReaderImpl(const Ptr<VideoSource>& source, const int minNumDecodeSurfaces, const bool allowFrameDrop, const bool udpSource,
const Size targetSz, const Rect srcRoi, const Rect targetRoi) :
videoSource_(source),
lock_(0)
{
@@ -143,7 +145,7 @@ namespace
cuSafeCall( cuCtxGetCurrent(&ctx) );
cuSafeCall( cuvidCtxLockCreate(&lock_, ctx) );
frameQueue_.reset(new FrameQueue());
videoDecoder_.reset(new VideoDecoder(videoSource_->format().codec, minNumDecodeSurfaces, ctx, lock_));
videoDecoder_.reset(new VideoDecoder(videoSource_->format().codec, minNumDecodeSurfaces, targetSz, srcRoi, targetRoi, ctx, lock_));
videoParser_.reset(new VideoParser(videoDecoder_, frameQueue_, allowFrameDrop, udpSource));
videoSource_->setVideoParser(videoParser_);
videoSource_->start();
@@ -357,13 +359,15 @@ Ptr<VideoReader> cv::cudacodec::createVideoReader(const String& filename, const
videoSource.reset(new CuvidVideoSource(filename));
}
return makePtr<VideoReaderImpl>(videoSource, params.minNumDecodeSurfaces, params.allowFrameDrop, params.udpSource);
return makePtr<VideoReaderImpl>(videoSource, params.minNumDecodeSurfaces, params.allowFrameDrop, params.udpSource, params.targetSz,
params.srcRoi, params.targetRoi);
}
Ptr<VideoReader> cv::cudacodec::createVideoReader(const Ptr<RawVideoSource>& source, const VideoReaderInitParams params)
{
Ptr<VideoSource> videoSource(new RawVideoSourceWrapper(source, params.rawMode));
return makePtr<VideoReaderImpl>(videoSource, params.minNumDecodeSurfaces);
return makePtr<VideoReaderImpl>(videoSource, params.minNumDecodeSurfaces, params.allowFrameDrop, params.udpSource, params.targetSz,
params.srcRoi, params.targetRoi);
}
#endif // HAVE_NVCUVID

View File

@@ -47,6 +47,8 @@
#include "opencv2/ts/cuda_test.hpp"
#include "opencv2/cudacodec.hpp"
#include "opencv2/cudawarping.hpp"
#include "opencv2/cudaarithm.hpp"
#include "cvconfig.h"

View File

@@ -54,6 +54,10 @@ PARAM_TEST_CASE(CheckExtraData, cv::cuda::DeviceInfo, check_extra_data_params_t)
{
};
PARAM_TEST_CASE(Scaling, cv::cuda::DeviceInfo, std::string, Size2f, Rect2f, Rect2f)
{
};
PARAM_TEST_CASE(Video, cv::cuda::DeviceInfo, std::string)
{
};
@@ -177,6 +181,47 @@ CUDA_TEST_P(CheckKeyFrame, Reader)
}
}
CUDA_TEST_P(Scaling, Reader)
{
cv::cuda::setDevice(GET_PARAM(0).deviceID());
std::string inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "../" + GET_PARAM(1);
const Size2f targetSzIn = GET_PARAM(2);
const Rect2f srcRoiIn = GET_PARAM(3);
const Rect2f targetRoiIn = GET_PARAM(4);
GpuMat frameOr;
{
cv::Ptr<cv::cudacodec::VideoReader> readerGs = cv::cudacodec::createVideoReader(inputFile);
readerGs->set(cudacodec::ColorFormat::GRAY);
ASSERT_TRUE(readerGs->nextFrame(frameOr));
}
cudacodec::VideoReaderInitParams params;
params.targetSz = Size(frameOr.cols * targetSzIn.width, frameOr.rows * targetSzIn.height);
params.srcRoi = Rect(frameOr.cols * srcRoiIn.x, frameOr.rows * srcRoiIn.y, frameOr.cols * srcRoiIn.width, frameOr.rows * srcRoiIn.height);
params.targetRoi = Rect(params.targetSz.width * targetRoiIn.x, params.targetSz.height * targetRoiIn.y, params.targetSz.width * targetRoiIn.width,
params.targetSz.height * targetRoiIn.height);
cv::Ptr<cv::cudacodec::VideoReader> reader = cv::cudacodec::createVideoReader(inputFile, {}, params);
reader->set(cudacodec::ColorFormat::GRAY);
GpuMat frame;
ASSERT_TRUE(reader->nextFrame(frame));
const cudacodec::FormatInfo format = reader->format();
Size targetSzOut;
targetSzOut.width = params.targetSz.width - params.targetSz.width % 2; targetSzOut.height = params.targetSz.height - params.targetSz.height % 2;
Rect srcRoiOut, targetRoiOut;
srcRoiOut.x = params.srcRoi.x - params.srcRoi.x % 4; srcRoiOut.width = params.srcRoi.width - params.srcRoi.width % 4;
srcRoiOut.y = params.srcRoi.y - params.srcRoi.y % 2; srcRoiOut.height = params.srcRoi.height - params.srcRoi.height % 2;
targetRoiOut.x = params.targetRoi.x - params.targetRoi.x % 4; targetRoiOut.width = params.targetRoi.width - params.targetRoi.width % 4;
targetRoiOut.y = params.targetRoi.y - params.targetRoi.y % 2; targetRoiOut.height = params.targetRoi.height - params.targetRoi.height % 2;
ASSERT_TRUE(format.valid && format.targetSz == targetSzOut && format.srcRoi == srcRoiOut && format.targetRoi == targetRoiOut);
ASSERT_TRUE(frame.size() == targetSzOut);
GpuMat frameGs;
cv::cuda::resize(frameOr(srcRoiOut), frameGs, targetRoiOut.size(), 0, 0, INTER_AREA);
// assert on mean absolute error due to different resize algorithms
const double mae = cv::cuda::norm(frameGs, frame(targetRoiOut), NORM_L1)/frameGs.size().area();
ASSERT_LT(mae, 2.35);
}
CUDA_TEST_P(Video, Reader)
{
cv::cuda::setDevice(GET_PARAM(0).deviceID());
@@ -431,7 +476,14 @@ INSTANTIATE_TEST_CASE_P(CUDA_Codec, CheckSet, testing::Combine(
ALL_DEVICES,
testing::Values("highgui/video/big_buck_bunny.mp4")));
#define VIDEO_SRC_R "highgui/video/big_buck_bunny.mp4", "cv/video/768x576.avi", "cv/video/1920x1080.avi", "highgui/video/big_buck_bunny.avi", \
#define VIDEO_SRC_SCALING "highgui/video/big_buck_bunny.mp4"
#define TARGET_SZ Size2f(1,1), Size2f(0.8,0.9), Size2f(2.3,1.8)
#define SRC_ROI Rect2f(0,0,1,1), Rect2f(0.25,0.25,0.5,0.5)
#define TARGET_ROI Rect2f(0,0,1,1), Rect2f(0.2,0.3,0.6,0.7)
INSTANTIATE_TEST_CASE_P(CUDA_Codec, Scaling, testing::Combine(
ALL_DEVICES, testing::Values(VIDEO_SRC_SCALING), testing::Values(TARGET_SZ), testing::Values(SRC_ROI), testing::Values(TARGET_ROI)));
#define VIDEO_SRC_R "highgui/video/big_buck_bunny.mp4", "cv/video/768x576.avi", "cv/video/1920x1080.avi", "highgui/video/big_buck_bunny.avi", \
"highgui/video/big_buck_bunny.h264", "highgui/video/big_buck_bunny.h265", "highgui/video/big_buck_bunny.mpg", \
"highgui/video/sample_322x242_15frames.yuv420p.libvpx-vp9.mp4", "highgui/video/sample_322x242_15frames.yuv420p.libaom-av1.mp4", \
"cv/tracking/faceocc2/data/faceocc2.webm"