diff --git a/modules/cudacodec/CMakeLists.txt b/modules/cudacodec/CMakeLists.txt index 071404ecc..9281024e8 100644 --- a/modules/cudacodec/CMakeLists.txt +++ b/modules/cudacodec/CMakeLists.txt @@ -6,7 +6,7 @@ set(the_description "CUDA-accelerated Video Encoding/Decoding") ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4324 /wd4512 -Wundef -Wshadow) -ocv_add_module(cudacodec opencv_core opencv_videoio OPTIONAL opencv_cudev WRAP python) +ocv_add_module(cudacodec opencv_core opencv_videoio opencv_cudaarithm opencv_cudawarping OPTIONAL opencv_cudev WRAP python) ocv_module_include_directories() ocv_glob_module_sources() diff --git a/modules/cudacodec/include/opencv2/cudacodec.hpp b/modules/cudacodec/include/opencv2/cudacodec.hpp index 54ec5bed0..5aacba0ab 100644 --- a/modules/cudacodec/include/opencv2/cudacodec.hpp +++ b/modules/cudacodec/include/opencv2/cudacodec.hpp @@ -309,6 +309,9 @@ struct CV_EXPORTS_W_SIMPLE FormatInfo CV_PROP_RW double fps; CV_PROP_RW int ulNumDecodeSurfaces;//!< Maximum number of internal decode surfaces. CV_PROP_RW DeinterlaceMode deinterlaceMode; + CV_PROP_RW cv::Size targetSz;//!< Post-processed size of the output frame. + CV_PROP_RW cv::Rect srcRoi;//!< Region of interest decoded from video source. + CV_PROP_RW cv::Rect targetRoi;//!< Region of interest in the output frame containing the decoded frame. }; /** @brief cv::cudacodec::VideoReader generic properties identifier. @@ -516,6 +519,10 @@ surfaces it requires for correct functionality and optimal video memory usage bu overall application. The optimal number of decode surfaces (in terms of performance and memory utilization) should be decided by experimentation for each application, but it cannot go below the number determined by NVDEC. @param rawMode Allow the raw encoded data which has been read up until the last call to grab() to be retrieved by calling retrieve(rawData,RAW_DATA_IDX). +@param targetSz Post-processed size (width/height should be multiples of 2) of the output frame, defaults to the size of the encoded video source. +@param srcRoi Region of interest (x/width should be multiples of 4 and y/height multiples of 2) decoded from video source, defaults to the full frame. +@param targetRoi Region of interest (x/width should be multiples of 4 and y/height multiples of 2) within the output frame to copy and resize the decoded frame to, +defaults to the full frame. */ struct CV_EXPORTS_W_SIMPLE VideoReaderInitParams { CV_WRAP VideoReaderInitParams() : udpSource(false), allowFrameDrop(false), minNumDecodeSurfaces(0), rawMode(0) {}; @@ -523,6 +530,9 @@ struct CV_EXPORTS_W_SIMPLE VideoReaderInitParams { CV_PROP_RW bool allowFrameDrop; CV_PROP_RW int minNumDecodeSurfaces; CV_PROP_RW bool rawMode; + CV_PROP_RW cv::Size targetSz; + CV_PROP_RW cv::Rect srcRoi; + CV_PROP_RW cv::Rect targetRoi; }; /** @brief Creates video reader. diff --git a/modules/cudacodec/src/video_decoder.cpp b/modules/cudacodec/src/video_decoder.cpp index 69845a7a0..f828b08c1 100644 --- a/modules/cudacodec/src/video_decoder.cpp +++ b/modules/cudacodec/src/video_decoder.cpp @@ -148,6 +148,14 @@ void cv::cudacodec::detail::VideoDecoder::create(const FormatInfo& videoFormat) createInfo_.ulTargetHeight = videoFormat.height; createInfo_.ulMaxWidth = videoFormat.ulMaxWidth; createInfo_.ulMaxHeight = videoFormat.ulMaxHeight; + createInfo_.display_area.left = videoFormat.displayArea.x; + createInfo_.display_area.right = videoFormat.displayArea.x + videoFormat.displayArea.width; + createInfo_.display_area.top = videoFormat.displayArea.y; + createInfo_.display_area.bottom = videoFormat.displayArea.y + videoFormat.displayArea.height; + createInfo_.target_rect.left = videoFormat.targetRoi.x; + createInfo_.target_rect.right = videoFormat.targetRoi.x + videoFormat.targetRoi.width; + createInfo_.target_rect.top = videoFormat.targetRoi.y; + createInfo_.target_rect.bottom = videoFormat.targetRoi.y + videoFormat.targetRoi.height; createInfo_.ulNumOutputSurfaces = 2; createInfo_.ulCreationFlags = videoCreateFlags; createInfo_.vidLock = lock_; diff --git a/modules/cudacodec/src/video_decoder.hpp b/modules/cudacodec/src/video_decoder.hpp index 98d8e6525..76d731f20 100644 --- a/modules/cudacodec/src/video_decoder.hpp +++ b/modules/cudacodec/src/video_decoder.hpp @@ -49,10 +49,17 @@ namespace cv { namespace cudacodec { namespace detail { class VideoDecoder { public: - VideoDecoder(const Codec& codec, const int minNumDecodeSurfaces, CUcontext ctx, CUvideoctxlock lock) : ctx_(ctx), lock_(lock), decoder_(0) + VideoDecoder(const Codec& codec, const int minNumDecodeSurfaces, cv::Size targetSz, cv::Rect srcRoi, cv::Rect targetRoi, CUcontext ctx, CUvideoctxlock lock) : + ctx_(ctx), lock_(lock), decoder_(0) { videoFormat_.codec = codec; videoFormat_.ulNumDecodeSurfaces = minNumDecodeSurfaces; + // alignment enforced by nvcuvid, likely due to chroma subsampling + videoFormat_.targetSz.width = targetSz.width - targetSz.width % 2; videoFormat_.targetSz.height = targetSz.height - targetSz.height % 2; + videoFormat_.srcRoi.x = srcRoi.x - srcRoi.x % 4; videoFormat_.srcRoi.width = srcRoi.width - srcRoi.width % 4; + videoFormat_.srcRoi.y = srcRoi.y - srcRoi.y % 2; videoFormat_.srcRoi.height = srcRoi.height - srcRoi.height % 2; + videoFormat_.targetRoi.x = targetRoi.x - targetRoi.x % 4; videoFormat_.targetRoi.width = targetRoi.width - targetRoi.width % 4; + videoFormat_.targetRoi.y = targetRoi.y - targetRoi.y % 2; videoFormat_.targetRoi.height = targetRoi.height - targetRoi.height % 2; } ~VideoDecoder() @@ -66,6 +73,9 @@ public: // Get the code-type currently used. cudaVideoCodec codec() const { return static_cast(videoFormat_.codec); } int nDecodeSurfaces() const { return videoFormat_.ulNumDecodeSurfaces; } + cv::Size getTargetSz() const { return videoFormat_.targetSz; } + cv::Rect getSrcRoi() const { return videoFormat_.srcRoi; } + cv::Rect getTargetRoi() const { return videoFormat_.targetRoi; } unsigned long frameWidth() const { return videoFormat_.ulWidth; } unsigned long frameHeight() const { return videoFormat_.ulHeight; } @@ -89,7 +99,7 @@ public: cuSafeCall( cuvidMapVideoFrame(decoder_, picIdx, &ptr, &pitch, &videoProcParams) ); - return cuda::GpuMat(frameHeight() * 3 / 2, frameWidth(), CV_8UC1, (void*) ptr, pitch); + return cuda::GpuMat(targetHeight() * 3 / 2, targetWidth(), CV_8UC1, (void*) ptr, pitch); } void unmapFrame(cuda::GpuMat& frame) diff --git a/modules/cudacodec/src/video_parser.cpp b/modules/cudacodec/src/video_parser.cpp index feda982c5..8bccd065a 100644 --- a/modules/cudacodec/src/video_parser.cpp +++ b/modules/cudacodec/src/video_parser.cpp @@ -120,10 +120,19 @@ int CUDAAPI cv::cudacodec::detail::VideoParser::HandleVideoSequence(void* userDa newFormat.nBitDepthMinus8 = format->bit_depth_luma_minus8; newFormat.ulWidth = format->coded_width; newFormat.ulHeight = format->coded_height; - newFormat.width = format->coded_width; - newFormat.height = format->coded_height; - newFormat.displayArea = Rect(Point(format->display_area.left, format->display_area.top), Point(format->display_area.right, format->display_area.bottom)); newFormat.fps = format->frame_rate.numerator / static_cast(format->frame_rate.denominator); + newFormat.targetSz = thiz->videoDecoder_->getTargetSz(); + newFormat.width = newFormat.targetSz.width ? newFormat.targetSz.width : format->coded_width; + newFormat.height = newFormat.targetSz.height ? newFormat.targetSz.height : format->coded_height; + newFormat.srcRoi = thiz->videoDecoder_->getSrcRoi(); + if (newFormat.srcRoi.empty()) { + format->display_area.right = format->coded_width; + format->display_area.bottom = format->coded_height; + newFormat.displayArea = Rect(Point(format->display_area.left, format->display_area.top), Point(format->display_area.right, format->display_area.bottom)); + } + else + newFormat.displayArea = newFormat.srcRoi; + newFormat.targetRoi = thiz->videoDecoder_->getTargetRoi(); newFormat.ulNumDecodeSurfaces = min(!thiz->allowFrameDrop_ ? max(thiz->videoDecoder_->nDecodeSurfaces(), static_cast(format->min_num_decode_surfaces)) : format->min_num_decode_surfaces * 2, 32); if (format->progressive_sequence) diff --git a/modules/cudacodec/src/video_reader.cpp b/modules/cudacodec/src/video_reader.cpp index 903defaf3..5eea8199e 100644 --- a/modules/cudacodec/src/video_reader.cpp +++ b/modules/cudacodec/src/video_reader.cpp @@ -86,7 +86,8 @@ namespace class VideoReaderImpl : public VideoReader { public: - explicit VideoReaderImpl(const Ptr& source, const int minNumDecodeSurfaces, const bool allowFrameDrop = false , const bool udpSource = false); + explicit VideoReaderImpl(const Ptr& source, const int minNumDecodeSurfaces, const bool allowFrameDrop = false , const bool udpSource = false, + const Size targetSz = Size(), const Rect srcRoi = Rect(), const Rect targetRoi = Rect()); ~VideoReaderImpl(); bool nextFrame(GpuMat& frame, Stream& stream) CV_OVERRIDE; @@ -131,7 +132,8 @@ namespace return videoSource_->format(); } - VideoReaderImpl::VideoReaderImpl(const Ptr& source, const int minNumDecodeSurfaces, const bool allowFrameDrop, const bool udpSource) : + VideoReaderImpl::VideoReaderImpl(const Ptr& source, const int minNumDecodeSurfaces, const bool allowFrameDrop, const bool udpSource, + const Size targetSz, const Rect srcRoi, const Rect targetRoi) : videoSource_(source), lock_(0) { @@ -143,7 +145,7 @@ namespace cuSafeCall( cuCtxGetCurrent(&ctx) ); cuSafeCall( cuvidCtxLockCreate(&lock_, ctx) ); frameQueue_.reset(new FrameQueue()); - videoDecoder_.reset(new VideoDecoder(videoSource_->format().codec, minNumDecodeSurfaces, ctx, lock_)); + videoDecoder_.reset(new VideoDecoder(videoSource_->format().codec, minNumDecodeSurfaces, targetSz, srcRoi, targetRoi, ctx, lock_)); videoParser_.reset(new VideoParser(videoDecoder_, frameQueue_, allowFrameDrop, udpSource)); videoSource_->setVideoParser(videoParser_); videoSource_->start(); @@ -357,13 +359,15 @@ Ptr cv::cudacodec::createVideoReader(const String& filename, const videoSource.reset(new CuvidVideoSource(filename)); } - return makePtr(videoSource, params.minNumDecodeSurfaces, params.allowFrameDrop, params.udpSource); + return makePtr(videoSource, params.minNumDecodeSurfaces, params.allowFrameDrop, params.udpSource, params.targetSz, + params.srcRoi, params.targetRoi); } Ptr cv::cudacodec::createVideoReader(const Ptr& source, const VideoReaderInitParams params) { Ptr videoSource(new RawVideoSourceWrapper(source, params.rawMode)); - return makePtr(videoSource, params.minNumDecodeSurfaces); + return makePtr(videoSource, params.minNumDecodeSurfaces, params.allowFrameDrop, params.udpSource, params.targetSz, + params.srcRoi, params.targetRoi); } #endif // HAVE_NVCUVID diff --git a/modules/cudacodec/test/test_precomp.hpp b/modules/cudacodec/test/test_precomp.hpp index dd5848252..7d38b1142 100644 --- a/modules/cudacodec/test/test_precomp.hpp +++ b/modules/cudacodec/test/test_precomp.hpp @@ -47,6 +47,8 @@ #include "opencv2/ts/cuda_test.hpp" #include "opencv2/cudacodec.hpp" +#include "opencv2/cudawarping.hpp" +#include "opencv2/cudaarithm.hpp" #include "cvconfig.h" diff --git a/modules/cudacodec/test/test_video.cpp b/modules/cudacodec/test/test_video.cpp index 216d0bc0e..b194ffa2f 100644 --- a/modules/cudacodec/test/test_video.cpp +++ b/modules/cudacodec/test/test_video.cpp @@ -54,6 +54,10 @@ PARAM_TEST_CASE(CheckExtraData, cv::cuda::DeviceInfo, check_extra_data_params_t) { }; +PARAM_TEST_CASE(Scaling, cv::cuda::DeviceInfo, std::string, Size2f, Rect2f, Rect2f) +{ +}; + PARAM_TEST_CASE(Video, cv::cuda::DeviceInfo, std::string) { }; @@ -177,6 +181,47 @@ CUDA_TEST_P(CheckKeyFrame, Reader) } } +CUDA_TEST_P(Scaling, Reader) +{ + cv::cuda::setDevice(GET_PARAM(0).deviceID()); + std::string inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "../" + GET_PARAM(1); + const Size2f targetSzIn = GET_PARAM(2); + const Rect2f srcRoiIn = GET_PARAM(3); + const Rect2f targetRoiIn = GET_PARAM(4); + + GpuMat frameOr; + { + cv::Ptr readerGs = cv::cudacodec::createVideoReader(inputFile); + readerGs->set(cudacodec::ColorFormat::GRAY); + ASSERT_TRUE(readerGs->nextFrame(frameOr)); + } + + cudacodec::VideoReaderInitParams params; + params.targetSz = Size(frameOr.cols * targetSzIn.width, frameOr.rows * targetSzIn.height); + params.srcRoi = Rect(frameOr.cols * srcRoiIn.x, frameOr.rows * srcRoiIn.y, frameOr.cols * srcRoiIn.width, frameOr.rows * srcRoiIn.height); + params.targetRoi = Rect(params.targetSz.width * targetRoiIn.x, params.targetSz.height * targetRoiIn.y, params.targetSz.width * targetRoiIn.width, + params.targetSz.height * targetRoiIn.height); + cv::Ptr reader = cv::cudacodec::createVideoReader(inputFile, {}, params); + reader->set(cudacodec::ColorFormat::GRAY); + GpuMat frame; + ASSERT_TRUE(reader->nextFrame(frame)); + const cudacodec::FormatInfo format = reader->format(); + Size targetSzOut; + targetSzOut.width = params.targetSz.width - params.targetSz.width % 2; targetSzOut.height = params.targetSz.height - params.targetSz.height % 2; + Rect srcRoiOut, targetRoiOut; + srcRoiOut.x = params.srcRoi.x - params.srcRoi.x % 4; srcRoiOut.width = params.srcRoi.width - params.srcRoi.width % 4; + srcRoiOut.y = params.srcRoi.y - params.srcRoi.y % 2; srcRoiOut.height = params.srcRoi.height - params.srcRoi.height % 2; + targetRoiOut.x = params.targetRoi.x - params.targetRoi.x % 4; targetRoiOut.width = params.targetRoi.width - params.targetRoi.width % 4; + targetRoiOut.y = params.targetRoi.y - params.targetRoi.y % 2; targetRoiOut.height = params.targetRoi.height - params.targetRoi.height % 2; + ASSERT_TRUE(format.valid && format.targetSz == targetSzOut && format.srcRoi == srcRoiOut && format.targetRoi == targetRoiOut); + ASSERT_TRUE(frame.size() == targetSzOut); + GpuMat frameGs; + cv::cuda::resize(frameOr(srcRoiOut), frameGs, targetRoiOut.size(), 0, 0, INTER_AREA); + // assert on mean absolute error due to different resize algorithms + const double mae = cv::cuda::norm(frameGs, frame(targetRoiOut), NORM_L1)/frameGs.size().area(); + ASSERT_LT(mae, 2.35); +} + CUDA_TEST_P(Video, Reader) { cv::cuda::setDevice(GET_PARAM(0).deviceID()); @@ -431,7 +476,14 @@ INSTANTIATE_TEST_CASE_P(CUDA_Codec, CheckSet, testing::Combine( ALL_DEVICES, testing::Values("highgui/video/big_buck_bunny.mp4"))); -#define VIDEO_SRC_R "highgui/video/big_buck_bunny.mp4", "cv/video/768x576.avi", "cv/video/1920x1080.avi", "highgui/video/big_buck_bunny.avi", \ +#define VIDEO_SRC_SCALING "highgui/video/big_buck_bunny.mp4" +#define TARGET_SZ Size2f(1,1), Size2f(0.8,0.9), Size2f(2.3,1.8) +#define SRC_ROI Rect2f(0,0,1,1), Rect2f(0.25,0.25,0.5,0.5) +#define TARGET_ROI Rect2f(0,0,1,1), Rect2f(0.2,0.3,0.6,0.7) +INSTANTIATE_TEST_CASE_P(CUDA_Codec, Scaling, testing::Combine( + ALL_DEVICES, testing::Values(VIDEO_SRC_SCALING), testing::Values(TARGET_SZ), testing::Values(SRC_ROI), testing::Values(TARGET_ROI))); + +#define VIDEO_SRC_R "highgui/video/big_buck_bunny.mp4", "cv/video/768x576.avi", "cv/video/1920x1080.avi", "highgui/video/big_buck_bunny.avi", \ "highgui/video/big_buck_bunny.h264", "highgui/video/big_buck_bunny.h265", "highgui/video/big_buck_bunny.mpg", \ "highgui/video/sample_322x242_15frames.yuv420p.libvpx-vp9.mp4", "highgui/video/sample_322x242_15frames.yuv420p.libaom-av1.mp4", \ "cv/tracking/faceocc2/data/faceocc2.webm"