mirror of
https://github.com/opencv/opencv_contrib.git
synced 2025-10-17 07:04:18 +08:00
Merge pull request #3355 from cudawarped:cudacodec_add_resize_crop
Add scaling and cropping options to `cudacodec::VideoReader`
This commit is contained in:
@@ -6,7 +6,7 @@ set(the_description "CUDA-accelerated Video Encoding/Decoding")
|
||||
|
||||
ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4324 /wd4512 -Wundef -Wshadow)
|
||||
|
||||
ocv_add_module(cudacodec opencv_core opencv_videoio OPTIONAL opencv_cudev WRAP python)
|
||||
ocv_add_module(cudacodec opencv_core opencv_videoio opencv_cudaarithm opencv_cudawarping OPTIONAL opencv_cudev WRAP python)
|
||||
|
||||
ocv_module_include_directories()
|
||||
ocv_glob_module_sources()
|
||||
|
@@ -309,6 +309,9 @@ struct CV_EXPORTS_W_SIMPLE FormatInfo
|
||||
CV_PROP_RW double fps;
|
||||
CV_PROP_RW int ulNumDecodeSurfaces;//!< Maximum number of internal decode surfaces.
|
||||
CV_PROP_RW DeinterlaceMode deinterlaceMode;
|
||||
CV_PROP_RW cv::Size targetSz;//!< Post-processed size of the output frame.
|
||||
CV_PROP_RW cv::Rect srcRoi;//!< Region of interest decoded from video source.
|
||||
CV_PROP_RW cv::Rect targetRoi;//!< Region of interest in the output frame containing the decoded frame.
|
||||
};
|
||||
|
||||
/** @brief cv::cudacodec::VideoReader generic properties identifier.
|
||||
@@ -516,6 +519,10 @@ surfaces it requires for correct functionality and optimal video memory usage bu
|
||||
overall application. The optimal number of decode surfaces (in terms of performance and memory utilization) should be decided by experimentation for each application,
|
||||
but it cannot go below the number determined by NVDEC.
|
||||
@param rawMode Allow the raw encoded data which has been read up until the last call to grab() to be retrieved by calling retrieve(rawData,RAW_DATA_IDX).
|
||||
@param targetSz Post-processed size (width/height should be multiples of 2) of the output frame, defaults to the size of the encoded video source.
|
||||
@param srcRoi Region of interest (x/width should be multiples of 4 and y/height multiples of 2) decoded from video source, defaults to the full frame.
|
||||
@param targetRoi Region of interest (x/width should be multiples of 4 and y/height multiples of 2) within the output frame to copy and resize the decoded frame to,
|
||||
defaults to the full frame.
|
||||
*/
|
||||
struct CV_EXPORTS_W_SIMPLE VideoReaderInitParams {
|
||||
CV_WRAP VideoReaderInitParams() : udpSource(false), allowFrameDrop(false), minNumDecodeSurfaces(0), rawMode(0) {};
|
||||
@@ -523,6 +530,9 @@ struct CV_EXPORTS_W_SIMPLE VideoReaderInitParams {
|
||||
CV_PROP_RW bool allowFrameDrop;
|
||||
CV_PROP_RW int minNumDecodeSurfaces;
|
||||
CV_PROP_RW bool rawMode;
|
||||
CV_PROP_RW cv::Size targetSz;
|
||||
CV_PROP_RW cv::Rect srcRoi;
|
||||
CV_PROP_RW cv::Rect targetRoi;
|
||||
};
|
||||
|
||||
/** @brief Creates video reader.
|
||||
|
@@ -148,6 +148,14 @@ void cv::cudacodec::detail::VideoDecoder::create(const FormatInfo& videoFormat)
|
||||
createInfo_.ulTargetHeight = videoFormat.height;
|
||||
createInfo_.ulMaxWidth = videoFormat.ulMaxWidth;
|
||||
createInfo_.ulMaxHeight = videoFormat.ulMaxHeight;
|
||||
createInfo_.display_area.left = videoFormat.displayArea.x;
|
||||
createInfo_.display_area.right = videoFormat.displayArea.x + videoFormat.displayArea.width;
|
||||
createInfo_.display_area.top = videoFormat.displayArea.y;
|
||||
createInfo_.display_area.bottom = videoFormat.displayArea.y + videoFormat.displayArea.height;
|
||||
createInfo_.target_rect.left = videoFormat.targetRoi.x;
|
||||
createInfo_.target_rect.right = videoFormat.targetRoi.x + videoFormat.targetRoi.width;
|
||||
createInfo_.target_rect.top = videoFormat.targetRoi.y;
|
||||
createInfo_.target_rect.bottom = videoFormat.targetRoi.y + videoFormat.targetRoi.height;
|
||||
createInfo_.ulNumOutputSurfaces = 2;
|
||||
createInfo_.ulCreationFlags = videoCreateFlags;
|
||||
createInfo_.vidLock = lock_;
|
||||
|
@@ -49,10 +49,17 @@ namespace cv { namespace cudacodec { namespace detail {
|
||||
class VideoDecoder
|
||||
{
|
||||
public:
|
||||
VideoDecoder(const Codec& codec, const int minNumDecodeSurfaces, CUcontext ctx, CUvideoctxlock lock) : ctx_(ctx), lock_(lock), decoder_(0)
|
||||
VideoDecoder(const Codec& codec, const int minNumDecodeSurfaces, cv::Size targetSz, cv::Rect srcRoi, cv::Rect targetRoi, CUcontext ctx, CUvideoctxlock lock) :
|
||||
ctx_(ctx), lock_(lock), decoder_(0)
|
||||
{
|
||||
videoFormat_.codec = codec;
|
||||
videoFormat_.ulNumDecodeSurfaces = minNumDecodeSurfaces;
|
||||
// alignment enforced by nvcuvid, likely due to chroma subsampling
|
||||
videoFormat_.targetSz.width = targetSz.width - targetSz.width % 2; videoFormat_.targetSz.height = targetSz.height - targetSz.height % 2;
|
||||
videoFormat_.srcRoi.x = srcRoi.x - srcRoi.x % 4; videoFormat_.srcRoi.width = srcRoi.width - srcRoi.width % 4;
|
||||
videoFormat_.srcRoi.y = srcRoi.y - srcRoi.y % 2; videoFormat_.srcRoi.height = srcRoi.height - srcRoi.height % 2;
|
||||
videoFormat_.targetRoi.x = targetRoi.x - targetRoi.x % 4; videoFormat_.targetRoi.width = targetRoi.width - targetRoi.width % 4;
|
||||
videoFormat_.targetRoi.y = targetRoi.y - targetRoi.y % 2; videoFormat_.targetRoi.height = targetRoi.height - targetRoi.height % 2;
|
||||
}
|
||||
|
||||
~VideoDecoder()
|
||||
@@ -66,6 +73,9 @@ public:
|
||||
// Get the code-type currently used.
|
||||
cudaVideoCodec codec() const { return static_cast<cudaVideoCodec>(videoFormat_.codec); }
|
||||
int nDecodeSurfaces() const { return videoFormat_.ulNumDecodeSurfaces; }
|
||||
cv::Size getTargetSz() const { return videoFormat_.targetSz; }
|
||||
cv::Rect getSrcRoi() const { return videoFormat_.srcRoi; }
|
||||
cv::Rect getTargetRoi() const { return videoFormat_.targetRoi; }
|
||||
|
||||
unsigned long frameWidth() const { return videoFormat_.ulWidth; }
|
||||
unsigned long frameHeight() const { return videoFormat_.ulHeight; }
|
||||
@@ -89,7 +99,7 @@ public:
|
||||
|
||||
cuSafeCall( cuvidMapVideoFrame(decoder_, picIdx, &ptr, &pitch, &videoProcParams) );
|
||||
|
||||
return cuda::GpuMat(frameHeight() * 3 / 2, frameWidth(), CV_8UC1, (void*) ptr, pitch);
|
||||
return cuda::GpuMat(targetHeight() * 3 / 2, targetWidth(), CV_8UC1, (void*) ptr, pitch);
|
||||
}
|
||||
|
||||
void unmapFrame(cuda::GpuMat& frame)
|
||||
|
@@ -120,10 +120,19 @@ int CUDAAPI cv::cudacodec::detail::VideoParser::HandleVideoSequence(void* userDa
|
||||
newFormat.nBitDepthMinus8 = format->bit_depth_luma_minus8;
|
||||
newFormat.ulWidth = format->coded_width;
|
||||
newFormat.ulHeight = format->coded_height;
|
||||
newFormat.width = format->coded_width;
|
||||
newFormat.height = format->coded_height;
|
||||
newFormat.displayArea = Rect(Point(format->display_area.left, format->display_area.top), Point(format->display_area.right, format->display_area.bottom));
|
||||
newFormat.fps = format->frame_rate.numerator / static_cast<float>(format->frame_rate.denominator);
|
||||
newFormat.targetSz = thiz->videoDecoder_->getTargetSz();
|
||||
newFormat.width = newFormat.targetSz.width ? newFormat.targetSz.width : format->coded_width;
|
||||
newFormat.height = newFormat.targetSz.height ? newFormat.targetSz.height : format->coded_height;
|
||||
newFormat.srcRoi = thiz->videoDecoder_->getSrcRoi();
|
||||
if (newFormat.srcRoi.empty()) {
|
||||
format->display_area.right = format->coded_width;
|
||||
format->display_area.bottom = format->coded_height;
|
||||
newFormat.displayArea = Rect(Point(format->display_area.left, format->display_area.top), Point(format->display_area.right, format->display_area.bottom));
|
||||
}
|
||||
else
|
||||
newFormat.displayArea = newFormat.srcRoi;
|
||||
newFormat.targetRoi = thiz->videoDecoder_->getTargetRoi();
|
||||
newFormat.ulNumDecodeSurfaces = min(!thiz->allowFrameDrop_ ? max(thiz->videoDecoder_->nDecodeSurfaces(), static_cast<int>(format->min_num_decode_surfaces)) :
|
||||
format->min_num_decode_surfaces * 2, 32);
|
||||
if (format->progressive_sequence)
|
||||
|
@@ -86,7 +86,8 @@ namespace
|
||||
class VideoReaderImpl : public VideoReader
|
||||
{
|
||||
public:
|
||||
explicit VideoReaderImpl(const Ptr<VideoSource>& source, const int minNumDecodeSurfaces, const bool allowFrameDrop = false , const bool udpSource = false);
|
||||
explicit VideoReaderImpl(const Ptr<VideoSource>& source, const int minNumDecodeSurfaces, const bool allowFrameDrop = false , const bool udpSource = false,
|
||||
const Size targetSz = Size(), const Rect srcRoi = Rect(), const Rect targetRoi = Rect());
|
||||
~VideoReaderImpl();
|
||||
|
||||
bool nextFrame(GpuMat& frame, Stream& stream) CV_OVERRIDE;
|
||||
@@ -131,7 +132,8 @@ namespace
|
||||
return videoSource_->format();
|
||||
}
|
||||
|
||||
VideoReaderImpl::VideoReaderImpl(const Ptr<VideoSource>& source, const int minNumDecodeSurfaces, const bool allowFrameDrop, const bool udpSource) :
|
||||
VideoReaderImpl::VideoReaderImpl(const Ptr<VideoSource>& source, const int minNumDecodeSurfaces, const bool allowFrameDrop, const bool udpSource,
|
||||
const Size targetSz, const Rect srcRoi, const Rect targetRoi) :
|
||||
videoSource_(source),
|
||||
lock_(0)
|
||||
{
|
||||
@@ -143,7 +145,7 @@ namespace
|
||||
cuSafeCall( cuCtxGetCurrent(&ctx) );
|
||||
cuSafeCall( cuvidCtxLockCreate(&lock_, ctx) );
|
||||
frameQueue_.reset(new FrameQueue());
|
||||
videoDecoder_.reset(new VideoDecoder(videoSource_->format().codec, minNumDecodeSurfaces, ctx, lock_));
|
||||
videoDecoder_.reset(new VideoDecoder(videoSource_->format().codec, minNumDecodeSurfaces, targetSz, srcRoi, targetRoi, ctx, lock_));
|
||||
videoParser_.reset(new VideoParser(videoDecoder_, frameQueue_, allowFrameDrop, udpSource));
|
||||
videoSource_->setVideoParser(videoParser_);
|
||||
videoSource_->start();
|
||||
@@ -357,13 +359,15 @@ Ptr<VideoReader> cv::cudacodec::createVideoReader(const String& filename, const
|
||||
videoSource.reset(new CuvidVideoSource(filename));
|
||||
}
|
||||
|
||||
return makePtr<VideoReaderImpl>(videoSource, params.minNumDecodeSurfaces, params.allowFrameDrop, params.udpSource);
|
||||
return makePtr<VideoReaderImpl>(videoSource, params.minNumDecodeSurfaces, params.allowFrameDrop, params.udpSource, params.targetSz,
|
||||
params.srcRoi, params.targetRoi);
|
||||
}
|
||||
|
||||
Ptr<VideoReader> cv::cudacodec::createVideoReader(const Ptr<RawVideoSource>& source, const VideoReaderInitParams params)
|
||||
{
|
||||
Ptr<VideoSource> videoSource(new RawVideoSourceWrapper(source, params.rawMode));
|
||||
return makePtr<VideoReaderImpl>(videoSource, params.minNumDecodeSurfaces);
|
||||
return makePtr<VideoReaderImpl>(videoSource, params.minNumDecodeSurfaces, params.allowFrameDrop, params.udpSource, params.targetSz,
|
||||
params.srcRoi, params.targetRoi);
|
||||
}
|
||||
|
||||
#endif // HAVE_NVCUVID
|
||||
|
@@ -47,6 +47,8 @@
|
||||
#include "opencv2/ts/cuda_test.hpp"
|
||||
|
||||
#include "opencv2/cudacodec.hpp"
|
||||
#include "opencv2/cudawarping.hpp"
|
||||
#include "opencv2/cudaarithm.hpp"
|
||||
|
||||
#include "cvconfig.h"
|
||||
|
||||
|
@@ -54,6 +54,10 @@ PARAM_TEST_CASE(CheckExtraData, cv::cuda::DeviceInfo, check_extra_data_params_t)
|
||||
{
|
||||
};
|
||||
|
||||
PARAM_TEST_CASE(Scaling, cv::cuda::DeviceInfo, std::string, Size2f, Rect2f, Rect2f)
|
||||
{
|
||||
};
|
||||
|
||||
PARAM_TEST_CASE(Video, cv::cuda::DeviceInfo, std::string)
|
||||
{
|
||||
};
|
||||
@@ -177,6 +181,47 @@ CUDA_TEST_P(CheckKeyFrame, Reader)
|
||||
}
|
||||
}
|
||||
|
||||
CUDA_TEST_P(Scaling, Reader)
|
||||
{
|
||||
cv::cuda::setDevice(GET_PARAM(0).deviceID());
|
||||
std::string inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "../" + GET_PARAM(1);
|
||||
const Size2f targetSzIn = GET_PARAM(2);
|
||||
const Rect2f srcRoiIn = GET_PARAM(3);
|
||||
const Rect2f targetRoiIn = GET_PARAM(4);
|
||||
|
||||
GpuMat frameOr;
|
||||
{
|
||||
cv::Ptr<cv::cudacodec::VideoReader> readerGs = cv::cudacodec::createVideoReader(inputFile);
|
||||
readerGs->set(cudacodec::ColorFormat::GRAY);
|
||||
ASSERT_TRUE(readerGs->nextFrame(frameOr));
|
||||
}
|
||||
|
||||
cudacodec::VideoReaderInitParams params;
|
||||
params.targetSz = Size(frameOr.cols * targetSzIn.width, frameOr.rows * targetSzIn.height);
|
||||
params.srcRoi = Rect(frameOr.cols * srcRoiIn.x, frameOr.rows * srcRoiIn.y, frameOr.cols * srcRoiIn.width, frameOr.rows * srcRoiIn.height);
|
||||
params.targetRoi = Rect(params.targetSz.width * targetRoiIn.x, params.targetSz.height * targetRoiIn.y, params.targetSz.width * targetRoiIn.width,
|
||||
params.targetSz.height * targetRoiIn.height);
|
||||
cv::Ptr<cv::cudacodec::VideoReader> reader = cv::cudacodec::createVideoReader(inputFile, {}, params);
|
||||
reader->set(cudacodec::ColorFormat::GRAY);
|
||||
GpuMat frame;
|
||||
ASSERT_TRUE(reader->nextFrame(frame));
|
||||
const cudacodec::FormatInfo format = reader->format();
|
||||
Size targetSzOut;
|
||||
targetSzOut.width = params.targetSz.width - params.targetSz.width % 2; targetSzOut.height = params.targetSz.height - params.targetSz.height % 2;
|
||||
Rect srcRoiOut, targetRoiOut;
|
||||
srcRoiOut.x = params.srcRoi.x - params.srcRoi.x % 4; srcRoiOut.width = params.srcRoi.width - params.srcRoi.width % 4;
|
||||
srcRoiOut.y = params.srcRoi.y - params.srcRoi.y % 2; srcRoiOut.height = params.srcRoi.height - params.srcRoi.height % 2;
|
||||
targetRoiOut.x = params.targetRoi.x - params.targetRoi.x % 4; targetRoiOut.width = params.targetRoi.width - params.targetRoi.width % 4;
|
||||
targetRoiOut.y = params.targetRoi.y - params.targetRoi.y % 2; targetRoiOut.height = params.targetRoi.height - params.targetRoi.height % 2;
|
||||
ASSERT_TRUE(format.valid && format.targetSz == targetSzOut && format.srcRoi == srcRoiOut && format.targetRoi == targetRoiOut);
|
||||
ASSERT_TRUE(frame.size() == targetSzOut);
|
||||
GpuMat frameGs;
|
||||
cv::cuda::resize(frameOr(srcRoiOut), frameGs, targetRoiOut.size(), 0, 0, INTER_AREA);
|
||||
// assert on mean absolute error due to different resize algorithms
|
||||
const double mae = cv::cuda::norm(frameGs, frame(targetRoiOut), NORM_L1)/frameGs.size().area();
|
||||
ASSERT_LT(mae, 2.35);
|
||||
}
|
||||
|
||||
CUDA_TEST_P(Video, Reader)
|
||||
{
|
||||
cv::cuda::setDevice(GET_PARAM(0).deviceID());
|
||||
@@ -431,7 +476,14 @@ INSTANTIATE_TEST_CASE_P(CUDA_Codec, CheckSet, testing::Combine(
|
||||
ALL_DEVICES,
|
||||
testing::Values("highgui/video/big_buck_bunny.mp4")));
|
||||
|
||||
#define VIDEO_SRC_R "highgui/video/big_buck_bunny.mp4", "cv/video/768x576.avi", "cv/video/1920x1080.avi", "highgui/video/big_buck_bunny.avi", \
|
||||
#define VIDEO_SRC_SCALING "highgui/video/big_buck_bunny.mp4"
|
||||
#define TARGET_SZ Size2f(1,1), Size2f(0.8,0.9), Size2f(2.3,1.8)
|
||||
#define SRC_ROI Rect2f(0,0,1,1), Rect2f(0.25,0.25,0.5,0.5)
|
||||
#define TARGET_ROI Rect2f(0,0,1,1), Rect2f(0.2,0.3,0.6,0.7)
|
||||
INSTANTIATE_TEST_CASE_P(CUDA_Codec, Scaling, testing::Combine(
|
||||
ALL_DEVICES, testing::Values(VIDEO_SRC_SCALING), testing::Values(TARGET_SZ), testing::Values(SRC_ROI), testing::Values(TARGET_ROI)));
|
||||
|
||||
#define VIDEO_SRC_R "highgui/video/big_buck_bunny.mp4", "cv/video/768x576.avi", "cv/video/1920x1080.avi", "highgui/video/big_buck_bunny.avi", \
|
||||
"highgui/video/big_buck_bunny.h264", "highgui/video/big_buck_bunny.h265", "highgui/video/big_buck_bunny.mpg", \
|
||||
"highgui/video/sample_322x242_15frames.yuv420p.libvpx-vp9.mp4", "highgui/video/sample_322x242_15frames.yuv420p.libaom-av1.mp4", \
|
||||
"cv/tracking/faceocc2/data/faceocc2.webm"
|
||||
|
Reference in New Issue
Block a user