From 2cc5ead103f622611a73e044ef0e9bb853992105 Mon Sep 17 00:00:00 2001 From: Vadim Pisarevsky Date: Fri, 9 Nov 2018 00:48:48 +0300 Subject: [PATCH] moved DIS optical flow from opencv_contrib to opencv, moved TVL1 from opencv to opencv_contrib --- modules/optflow/include/opencv2/optflow.hpp | 289 ++-- .../optflow/perf/opencl/perf_dis_optflow.cpp | 72 - .../perf/opencl/perf_optflow_dualTVL1.cpp | 111 ++ modules/optflow/perf/perf_disflow.cpp | 66 - modules/optflow/perf/perf_tvl1optflow.cpp | 31 + .../perf/perf_variational_refinement.cpp | 40 - modules/optflow/samples/dis_opticalflow.cpp | 74 - modules/optflow/samples/gpc_evaluate.cpp | 2 +- .../samples/optical_flow_evaluation.cpp | 8 +- modules/optflow/samples/pcaflow_demo.cpp | 2 +- modules/optflow/src/deepflow.cpp | 2 +- modules/optflow/src/dis_flow.cpp | 1503 ----------------- modules/optflow/src/opencl/dis_flow.cl | 522 ------ .../optflow/src/opencl/optical_flow_tvl1.cl | 378 +++++ modules/optflow/src/optical_flow_io.cpp | 139 -- modules/optflow/src/tvl1flow.cpp | 1488 ++++++++++++++++ .../optflow/src/variational_refinement.cpp | 1192 ------------- ...test_dis.cpp => test_optflow_tvl1flow.cpp} | 85 +- modules/optflow/test/test_OF_accuracy.cpp | 46 - .../optflow/test/test_OF_reproducibility.cpp | 160 -- modules/optflow/test/test_tvl1optflow.cpp | 173 ++ modules/superres/CMakeLists.txt | 2 +- modules/superres/src/optical_flow.cpp | 5 +- samples/python2/dis_opt_flow.py | 114 -- 24 files changed, 2362 insertions(+), 4142 deletions(-) delete mode 100644 modules/optflow/perf/opencl/perf_dis_optflow.cpp create mode 100644 modules/optflow/perf/opencl/perf_optflow_dualTVL1.cpp delete mode 100644 modules/optflow/perf/perf_disflow.cpp create mode 100644 modules/optflow/perf/perf_tvl1optflow.cpp delete mode 100644 modules/optflow/perf/perf_variational_refinement.cpp delete mode 100644 modules/optflow/samples/dis_opticalflow.cpp delete mode 100644 modules/optflow/src/dis_flow.cpp delete mode 100644 modules/optflow/src/opencl/dis_flow.cl create mode 100644 modules/optflow/src/opencl/optical_flow_tvl1.cl delete mode 100644 modules/optflow/src/optical_flow_io.cpp create mode 100644 modules/optflow/src/tvl1flow.cpp delete mode 100644 modules/optflow/src/variational_refinement.cpp rename modules/optflow/test/ocl/{test_dis.cpp => test_optflow_tvl1flow.cpp} (50%) delete mode 100644 modules/optflow/test/test_OF_reproducibility.cpp create mode 100644 modules/optflow/test/test_tvl1optflow.cpp delete mode 100644 samples/python2/dis_opt_flow.py diff --git a/modules/optflow/include/opencv2/optflow.hpp b/modules/optflow/include/opencv2/optflow.hpp index e68d5b78d..65802750d 100644 --- a/modules/optflow/include/opencv2/optflow.hpp +++ b/modules/optflow/include/opencv2/optflow.hpp @@ -73,7 +73,7 @@ namespace cv { namespace optflow { - + //! @addtogroup optflow //! @{ @@ -137,85 +137,6 @@ CV_EXPORTS_W void calcOpticalFlowSparseToDense ( InputArray from, InputArray to, bool use_post_proc = true, float fgs_lambda = 500.0f, float fgs_sigma = 1.5f ); -/** @brief Read a .flo file - -@param path Path to the file to be loaded - -The function readOpticalFlow loads a flow field from a file and returns it as a single matrix. -Resulting Mat has a type CV_32FC2 - floating-point, 2-channel. First channel corresponds to the -flow in the horizontal direction (u), second - vertical (v). - */ -CV_EXPORTS_W Mat readOpticalFlow( const String& path ); -/** @brief Write a .flo to disk - -@param path Path to the file to be written -@param flow Flow field to be stored - -The function stores a flow field in a file, returns true on success, false otherwise. -The flow field must be a 2-channel, floating-point matrix (CV_32FC2). First channel corresponds -to the flow in the horizontal direction (u), second - vertical (v). - */ -CV_EXPORTS_W bool writeOpticalFlow( const String& path, InputArray flow ); - -/** @brief Variational optical flow refinement - -This class implements variational refinement of the input flow field, i.e. -it uses input flow to initialize the minimization of the following functional: -\f$E(U) = \int_{\Omega} \delta \Psi(E_I) + \gamma \Psi(E_G) + \alpha \Psi(E_S) \f$, -where \f$E_I,E_G,E_S\f$ are color constancy, gradient constancy and smoothness terms -respectively. \f$\Psi(s^2)=\sqrt{s^2+\epsilon^2}\f$ is a robust penalizer to limit the -influence of outliers. A complete formulation and a description of the minimization -procedure can be found in @cite Brox2004 -*/ -class CV_EXPORTS_W VariationalRefinement : public DenseOpticalFlow -{ -public: - /** @brief @ref calc function overload to handle separate horizontal (u) and vertical (v) flow components - (to avoid extra splits/merges) */ - CV_WRAP virtual void calcUV(InputArray I0, InputArray I1, InputOutputArray flow_u, InputOutputArray flow_v) = 0; - - /** @brief Number of outer (fixed-point) iterations in the minimization procedure. - @see setFixedPointIterations */ - CV_WRAP virtual int getFixedPointIterations() const = 0; - /** @copybrief getFixedPointIterations @see getFixedPointIterations */ - CV_WRAP virtual void setFixedPointIterations(int val) = 0; - - /** @brief Number of inner successive over-relaxation (SOR) iterations - in the minimization procedure to solve the respective linear system. - @see setSorIterations */ - CV_WRAP virtual int getSorIterations() const = 0; - /** @copybrief getSorIterations @see getSorIterations */ - CV_WRAP virtual void setSorIterations(int val) = 0; - - /** @brief Relaxation factor in SOR - @see setOmega */ - CV_WRAP virtual float getOmega() const = 0; - /** @copybrief getOmega @see getOmega */ - CV_WRAP virtual void setOmega(float val) = 0; - - /** @brief Weight of the smoothness term - @see setAlpha */ - CV_WRAP virtual float getAlpha() const = 0; - /** @copybrief getAlpha @see getAlpha */ - CV_WRAP virtual void setAlpha(float val) = 0; - - /** @brief Weight of the color constancy term - @see setDelta */ - CV_WRAP virtual float getDelta() const = 0; - /** @copybrief getDelta @see getDelta */ - CV_WRAP virtual void setDelta(float val) = 0; - - /** @brief Weight of the gradient constancy term - @see setGamma */ - CV_WRAP virtual float getGamma() const = 0; - /** @copybrief getGamma @see getGamma */ - CV_WRAP virtual void setGamma(float val) = 0; -}; - -/** @brief Creates an instance of VariationalRefinement -*/ -CV_EXPORTS_W Ptr createVariationalFlowRefinement(); - /** @brief DeepFlow optical flow algorithm implementation. The class implements the DeepFlow optical flow algorithm described in @cite Weinzaepfel2013 . See @@ -252,107 +173,131 @@ CV_EXPORTS_W Ptr createOptFlow_Farneback(); //! Additional interface to the SparseToDenseFlow algorithm - calcOpticalFlowSparseToDense() CV_EXPORTS_W Ptr createOptFlow_SparseToDense(); -/** @brief DIS optical flow algorithm. +/** @brief "Dual TV L1" Optical Flow Algorithm. -This class implements the Dense Inverse Search (DIS) optical flow algorithm. More -details about the algorithm can be found at @cite Kroeger2016 . Includes three presets with preselected -parameters to provide reasonable trade-off between speed and quality. However, even the slowest preset is -still relatively fast, use DeepFlow if you need better quality and don't care about speed. +The class implements the "Dual TV L1" optical flow algorithm described in @cite Zach2007 and +@cite Javier2012 . +Here are important members of the class that control the algorithm, which you can set after +constructing the class instance: -This implementation includes several additional features compared to the algorithm described in the paper, -including spatial propagation of flow vectors (@ref getUseSpatialPropagation), as well as an option to -utilize an initial flow approximation passed to @ref calc (which is, essentially, temporal propagation, -if the previous frame's flow field is passed). +- member double tau + Time step of the numerical scheme. + +- member double lambda + Weight parameter for the data term, attachment parameter. This is the most relevant + parameter, which determines the smoothness of the output. The smaller this parameter is, + the smoother the solutions we obtain. It depends on the range of motions of the images, so + its value should be adapted to each image sequence. + +- member double theta + Weight parameter for (u - v)\^2, tightness parameter. It serves as a link between the + attachment and the regularization terms. In theory, it should have a small value in order + to maintain both parts in correspondence. The method is stable for a large range of values + of this parameter. + +- member int nscales + Number of scales used to create the pyramid of images. + +- member int warps + Number of warpings per scale. Represents the number of times that I1(x+u0) and grad( + I1(x+u0) ) are computed per scale. This is a parameter that assures the stability of the + method. It also affects the running time, so it is a compromise between speed and + accuracy. + +- member double epsilon + Stopping criterion threshold used in the numerical scheme, which is a trade-off between + precision and running time. A small value will yield more accurate solutions at the + expense of a slower convergence. + +- member int iterations + Stopping criterion iterations number used in the numerical scheme. + +C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow". +Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation". */ -class CV_EXPORTS_W DISOpticalFlow : public DenseOpticalFlow +class CV_EXPORTS_W DualTVL1OpticalFlow : public DenseOpticalFlow { public: - enum - { - PRESET_ULTRAFAST = 0, - PRESET_FAST = 1, - PRESET_MEDIUM = 2 - }; + //! @brief Time step of the numerical scheme + /** @see setTau */ + CV_WRAP virtual double getTau() const = 0; + /** @copybrief getTau @see getTau */ + CV_WRAP virtual void setTau(double val) = 0; + //! @brief Weight parameter for the data term, attachment parameter + /** @see setLambda */ + CV_WRAP virtual double getLambda() const = 0; + /** @copybrief getLambda @see getLambda */ + CV_WRAP virtual void setLambda(double val) = 0; + //! @brief Weight parameter for (u - v)^2, tightness parameter + /** @see setTheta */ + CV_WRAP virtual double getTheta() const = 0; + /** @copybrief getTheta @see getTheta */ + CV_WRAP virtual void setTheta(double val) = 0; + //! @brief coefficient for additional illumination variation term + /** @see setGamma */ + CV_WRAP virtual double getGamma() const = 0; + /** @copybrief getGamma @see getGamma */ + CV_WRAP virtual void setGamma(double val) = 0; + //! @brief Number of scales used to create the pyramid of images + /** @see setScalesNumber */ + CV_WRAP virtual int getScalesNumber() const = 0; + /** @copybrief getScalesNumber @see getScalesNumber */ + CV_WRAP virtual void setScalesNumber(int val) = 0; + //! @brief Number of warpings per scale + /** @see setWarpingsNumber */ + CV_WRAP virtual int getWarpingsNumber() const = 0; + /** @copybrief getWarpingsNumber @see getWarpingsNumber */ + CV_WRAP virtual void setWarpingsNumber(int val) = 0; + //! @brief Stopping criterion threshold used in the numerical scheme, which is a trade-off between precision and running time + /** @see setEpsilon */ + CV_WRAP virtual double getEpsilon() const = 0; + /** @copybrief getEpsilon @see getEpsilon */ + CV_WRAP virtual void setEpsilon(double val) = 0; + //! @brief Inner iterations (between outlier filtering) used in the numerical scheme + /** @see setInnerIterations */ + CV_WRAP virtual int getInnerIterations() const = 0; + /** @copybrief getInnerIterations @see getInnerIterations */ + CV_WRAP virtual void setInnerIterations(int val) = 0; + //! @brief Outer iterations (number of inner loops) used in the numerical scheme + /** @see setOuterIterations */ + CV_WRAP virtual int getOuterIterations() const = 0; + /** @copybrief getOuterIterations @see getOuterIterations */ + CV_WRAP virtual void setOuterIterations(int val) = 0; + //! @brief Use initial flow + /** @see setUseInitialFlow */ + CV_WRAP virtual bool getUseInitialFlow() const = 0; + /** @copybrief getUseInitialFlow @see getUseInitialFlow */ + CV_WRAP virtual void setUseInitialFlow(bool val) = 0; + //! @brief Step between scales (<1) + /** @see setScaleStep */ + CV_WRAP virtual double getScaleStep() const = 0; + /** @copybrief getScaleStep @see getScaleStep */ + CV_WRAP virtual void setScaleStep(double val) = 0; + //! @brief Median filter kernel size (1 = no filter) (3 or 5) + /** @see setMedianFiltering */ + CV_WRAP virtual int getMedianFiltering() const = 0; + /** @copybrief getMedianFiltering @see getMedianFiltering */ + CV_WRAP virtual void setMedianFiltering(int val) = 0; - /** @brief Finest level of the Gaussian pyramid on which the flow is computed (zero level - corresponds to the original image resolution). The final flow is obtained by bilinear upscaling. - @see setFinestScale */ - CV_WRAP virtual int getFinestScale() const = 0; - /** @copybrief getFinestScale @see getFinestScale */ - CV_WRAP virtual void setFinestScale(int val) = 0; - - /** @brief Size of an image patch for matching (in pixels). Normally, default 8x8 patches work well - enough in most cases. - @see setPatchSize */ - CV_WRAP virtual int getPatchSize() const = 0; - /** @copybrief getPatchSize @see getPatchSize */ - CV_WRAP virtual void setPatchSize(int val) = 0; - - /** @brief Stride between neighbor patches. Must be less than patch size. Lower values correspond - to higher flow quality. - @see setPatchStride */ - CV_WRAP virtual int getPatchStride() const = 0; - /** @copybrief getPatchStride @see getPatchStride */ - CV_WRAP virtual void setPatchStride(int val) = 0; - - /** @brief Maximum number of gradient descent iterations in the patch inverse search stage. Higher values - may improve quality in some cases. - @see setGradientDescentIterations */ - CV_WRAP virtual int getGradientDescentIterations() const = 0; - /** @copybrief getGradientDescentIterations @see getGradientDescentIterations */ - CV_WRAP virtual void setGradientDescentIterations(int val) = 0; - - /** @brief Number of fixed point iterations of variational refinement per scale. Set to zero to - disable variational refinement completely. Higher values will typically result in more smooth and - high-quality flow. - @see setGradientDescentIterations */ - CV_WRAP virtual int getVariationalRefinementIterations() const = 0; - /** @copybrief getGradientDescentIterations @see getGradientDescentIterations */ - CV_WRAP virtual void setVariationalRefinementIterations(int val) = 0; - - /** @brief Weight of the smoothness term - @see setVariationalRefinementAlpha */ - CV_WRAP virtual float getVariationalRefinementAlpha() const = 0; - /** @copybrief getVariationalRefinementAlpha @see getVariationalRefinementAlpha */ - CV_WRAP virtual void setVariationalRefinementAlpha(float val) = 0; - - /** @brief Weight of the color constancy term - @see setVariationalRefinementDelta */ - CV_WRAP virtual float getVariationalRefinementDelta() const = 0; - /** @copybrief getVariationalRefinementDelta @see getVariationalRefinementDelta */ - CV_WRAP virtual void setVariationalRefinementDelta(float val) = 0; - - /** @brief Weight of the gradient constancy term - @see setVariationalRefinementGamma */ - CV_WRAP virtual float getVariationalRefinementGamma() const = 0; - /** @copybrief getVariationalRefinementGamma @see getVariationalRefinementGamma */ - CV_WRAP virtual void setVariationalRefinementGamma(float val) = 0; - - - /** @brief Whether to use mean-normalization of patches when computing patch distance. It is turned on - by default as it typically provides a noticeable quality boost because of increased robustness to - illumination variations. Turn it off if you are certain that your sequence doesn't contain any changes - in illumination. - @see setUseMeanNormalization */ - CV_WRAP virtual bool getUseMeanNormalization() const = 0; - /** @copybrief getUseMeanNormalization @see getUseMeanNormalization */ - CV_WRAP virtual void setUseMeanNormalization(bool val) = 0; - - /** @brief Whether to use spatial propagation of good optical flow vectors. This option is turned on by - default, as it tends to work better on average and can sometimes help recover from major errors - introduced by the coarse-to-fine scheme employed by the DIS optical flow algorithm. Turning this - option off can make the output flow field a bit smoother, however. - @see setUseSpatialPropagation */ - CV_WRAP virtual bool getUseSpatialPropagation() const = 0; - /** @copybrief getUseSpatialPropagation @see getUseSpatialPropagation */ - CV_WRAP virtual void setUseSpatialPropagation(bool val) = 0; + /** @brief Creates instance of cv::DualTVL1OpticalFlow*/ + CV_WRAP static Ptr create( + double tau = 0.25, + double lambda = 0.15, + double theta = 0.3, + int nscales = 5, + int warps = 5, + double epsilon = 0.01, + int innnerIterations = 30, + int outerIterations = 10, + double scaleStep = 0.8, + double gamma = 0.0, + int medianFiltering = 5, + bool useInitialFlow = false); }; -/** @brief Creates an instance of DISOpticalFlow - -@param preset one of PRESET_ULTRAFAST, PRESET_FAST and PRESET_MEDIUM +/** @brief Creates instance of cv::DenseOpticalFlow */ -CV_EXPORTS_W Ptr createOptFlow_DIS(int preset = DISOpticalFlow::PRESET_FAST); +CV_EXPORTS_W Ptr createOptFlow_DualTVL1(); //! @} diff --git a/modules/optflow/perf/opencl/perf_dis_optflow.cpp b/modules/optflow/perf/opencl/perf_dis_optflow.cpp deleted file mode 100644 index f299d4c49..000000000 --- a/modules/optflow/perf/opencl/perf_dis_optflow.cpp +++ /dev/null @@ -1,72 +0,0 @@ -// This file is part of OpenCV project. -// It is subject to the license terms in the LICENSE file found in the top-level directory -// of this distribution and at http://opencv.org/license.html. -#include "../perf_precomp.hpp" -#include "opencv2/ts/ocl_perf.hpp" - -namespace opencv_test { namespace { - -#ifdef HAVE_OPENCL - -void MakeArtificialExample(UMat &dst_frame1, UMat &dst_frame2); - -typedef tuple DISParams; -typedef TestBaseWithParam DenseOpticalFlow_DIS; - -OCL_PERF_TEST_P(DenseOpticalFlow_DIS, perf, - Combine(Values("PRESET_ULTRAFAST", "PRESET_FAST", "PRESET_MEDIUM"), Values(szVGA, sz720p, sz1080p))) -{ - DISParams params = GetParam(); - - // use strings to print preset names in the perf test results: - String preset_string = get<0>(params); - int preset = DISOpticalFlow::PRESET_FAST; - if (preset_string == "PRESET_ULTRAFAST") - preset = DISOpticalFlow::PRESET_ULTRAFAST; - else if (preset_string == "PRESET_FAST") - preset = DISOpticalFlow::PRESET_FAST; - else if (preset_string == "PRESET_MEDIUM") - preset = DISOpticalFlow::PRESET_MEDIUM; - Size sz = get<1>(params); - - UMat frame1(sz, CV_8U); - UMat frame2(sz, CV_8U); - UMat flow; - - MakeArtificialExample(frame1, frame2); - - Ptr algo = createOptFlow_DIS(preset); - - OCL_TEST_CYCLE_N(10) - { - algo->calc(frame1, frame2, flow); - } - - SANITY_CHECK_NOTHING(); -} - -void MakeArtificialExample(UMat &dst_frame1, UMat &dst_frame2) -{ - int src_scale = 2; - int OF_scale = 6; - double sigma = dst_frame1.cols / 300; - - UMat tmp(Size(dst_frame1.cols / (1 << src_scale), dst_frame1.rows / (1 << src_scale)), CV_8U); - randu(tmp, 0, 255); - resize(tmp, dst_frame1, dst_frame1.size(), 0.0, 0.0, INTER_LINEAR_EXACT); - resize(tmp, dst_frame2, dst_frame2.size(), 0.0, 0.0, INTER_LINEAR_EXACT); - - Mat displacement_field(Size(dst_frame1.cols / (1 << OF_scale), dst_frame1.rows / (1 << OF_scale)), - CV_32FC2); - randn(displacement_field, 0.0, sigma); - resize(displacement_field, displacement_field, dst_frame2.size(), 0.0, 0.0, INTER_CUBIC); - for (int i = 0; i < displacement_field.rows; i++) - for (int j = 0; j < displacement_field.cols; j++) - displacement_field.at(i, j) += Vec2f((float)j, (float)i); - - remap(dst_frame2, dst_frame2, displacement_field, Mat(), INTER_LINEAR, BORDER_REPLICATE); -} - -#endif // HAVE_OPENCL - -}} // namespace diff --git a/modules/optflow/perf/opencl/perf_optflow_dualTVL1.cpp b/modules/optflow/perf/opencl/perf_optflow_dualTVL1.cpp new file mode 100644 index 000000000..1fa465d4c --- /dev/null +++ b/modules/optflow/perf/opencl/perf_optflow_dualTVL1.cpp @@ -0,0 +1,111 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Fangfang Bai, fangfang@multicorewareinc.com +// Jin Ma, jin@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "../perf_precomp.hpp" +#include "opencv2/ts/ocl_perf.hpp" + +#ifdef HAVE_OPENCL + +namespace opencv_test { +namespace ocl { + +///////////// OpticalFlow Dual TVL1 //////////////////////// +typedef tuple< tuple, bool> OpticalFlowDualTVL1Params; +typedef TestBaseWithParam OpticalFlowDualTVL1Fixture; + +OCL_PERF_TEST_P(OpticalFlowDualTVL1Fixture, OpticalFlowDualTVL1, + ::testing::Combine( + ::testing::Values(make_tuple(-1, 0.3), + make_tuple(3, 0.5)), + ::testing::Bool() + ) + ) + { + Mat frame0 = imread(getDataPath("cv/optflow/RubberWhale1.png"), IMREAD_GRAYSCALE); + ASSERT_FALSE(frame0.empty()) << "can't load RubberWhale1.png"; + + Mat frame1 = imread(getDataPath("cv/optflow/RubberWhale2.png"), IMREAD_GRAYSCALE); + ASSERT_FALSE(frame1.empty()) << "can't load RubberWhale2.png"; + + const Size srcSize = frame0.size(); + + const OpticalFlowDualTVL1Params params = GetParam(); + const tuple filteringScale = get<0>(params); + const int medianFiltering = get<0>(filteringScale); + const double scaleStep = get<1>(filteringScale); + const bool useInitFlow = get<1>(params); + double eps = 0.9; + + UMat uFrame0; frame0.copyTo(uFrame0); + UMat uFrame1; frame1.copyTo(uFrame1); + UMat uFlow(srcSize, CV_32FC2); + declare.in(uFrame0, uFrame1, WARMUP_READ).out(uFlow, WARMUP_READ); + + //create algorithm + Ptr alg = createOptFlow_DualTVL1(); + + //set parameters + alg->setScaleStep(scaleStep); + alg->setMedianFiltering(medianFiltering); + + if (useInitFlow) + { + //calculate initial flow as result of optical flow + alg->calc(uFrame0, uFrame1, uFlow); + } + + //set flag to use initial flow + alg->setUseInitialFlow(useInitFlow); + OCL_TEST_CYCLE() + alg->calc(uFrame0, uFrame1, uFlow); + + SANITY_CHECK(uFlow, eps, ERROR_RELATIVE); + } +} + +} // namespace opencv_test::ocl + +#endif // HAVE_OPENCL diff --git a/modules/optflow/perf/perf_disflow.cpp b/modules/optflow/perf/perf_disflow.cpp deleted file mode 100644 index 04443f573..000000000 --- a/modules/optflow/perf/perf_disflow.cpp +++ /dev/null @@ -1,66 +0,0 @@ -// This file is part of OpenCV project. -// It is subject to the license terms in the LICENSE file found in the top-level directory -// of this distribution and at http://opencv.org/license.html. -#include "perf_precomp.hpp" - -namespace opencv_test { namespace { - -void MakeArtificialExample(Mat &dst_frame1, Mat &dst_frame2); - -typedef tuple DISParams; -typedef TestBaseWithParam DenseOpticalFlow_DIS; - -PERF_TEST_P(DenseOpticalFlow_DIS, perf, - Combine(Values("PRESET_ULTRAFAST", "PRESET_FAST", "PRESET_MEDIUM"), Values(szVGA, sz720p, sz1080p))) -{ - DISParams params = GetParam(); - - // use strings to print preset names in the perf test results: - String preset_string = get<0>(params); - int preset = DISOpticalFlow::PRESET_FAST; - if (preset_string == "PRESET_ULTRAFAST") - preset = DISOpticalFlow::PRESET_ULTRAFAST; - else if (preset_string == "PRESET_FAST") - preset = DISOpticalFlow::PRESET_FAST; - else if (preset_string == "PRESET_MEDIUM") - preset = DISOpticalFlow::PRESET_MEDIUM; - Size sz = get<1>(params); - - Mat frame1(sz, CV_8U); - Mat frame2(sz, CV_8U); - Mat flow; - - MakeArtificialExample(frame1, frame2); - - TEST_CYCLE_N(10) - { - Ptr algo = createOptFlow_DIS(preset); - algo->calc(frame1, frame2, flow); - } - - SANITY_CHECK_NOTHING(); -} - -void MakeArtificialExample(Mat &dst_frame1, Mat &dst_frame2) -{ - int src_scale = 2; - int OF_scale = 6; - double sigma = dst_frame1.cols / 300; - - Mat tmp(Size(dst_frame1.cols / (1 << src_scale), dst_frame1.rows / (1 << src_scale)), CV_8U); - randu(tmp, 0, 255); - resize(tmp, dst_frame1, dst_frame1.size(), 0.0, 0.0, INTER_LINEAR_EXACT); - resize(tmp, dst_frame2, dst_frame2.size(), 0.0, 0.0, INTER_LINEAR_EXACT); - - Mat displacement_field(Size(dst_frame1.cols / (1 << OF_scale), dst_frame1.rows / (1 << OF_scale)), - CV_32FC2); - randn(displacement_field, 0.0, sigma); - resize(displacement_field, displacement_field, dst_frame2.size(), 0.0, 0.0, INTER_CUBIC); - for (int i = 0; i < displacement_field.rows; i++) - for (int j = 0; j < displacement_field.cols; j++) - displacement_field.at(i, j) += Vec2f((float)j, (float)i); - - remap(dst_frame2, dst_frame2, displacement_field, Mat(), INTER_LINEAR, BORDER_REPLICATE); -} - -}} // namespace diff --git a/modules/optflow/perf/perf_tvl1optflow.cpp b/modules/optflow/perf/perf_tvl1optflow.cpp new file mode 100644 index 000000000..917c28fcf --- /dev/null +++ b/modules/optflow/perf/perf_tvl1optflow.cpp @@ -0,0 +1,31 @@ +#include "perf_precomp.hpp" + +namespace opencv_test { namespace { +using namespace perf; + +typedef TestBaseWithParam< std::pair > ImagePair; + +std::pair impair(const char* im1, const char* im2) +{ + return std::make_pair(string(im1), string(im2)); +} + +PERF_TEST_P(ImagePair, OpticalFlowDual_TVL1, testing::Values(impair("cv/optflow/RubberWhale1.png", "cv/optflow/RubberWhale2.png"))) +{ + declare.time(260); + + Mat frame1 = imread(getDataPath(GetParam().first), IMREAD_GRAYSCALE); + Mat frame2 = imread(getDataPath(GetParam().second), IMREAD_GRAYSCALE); + ASSERT_FALSE(frame1.empty()); + ASSERT_FALSE(frame2.empty()); + + Mat flow; + + Ptr tvl1 = createOptFlow_DualTVL1(); + + TEST_CYCLE() tvl1->calc(frame1, frame2, flow); + + SANITY_CHECK_NOTHING(); +} + +}} // namespace diff --git a/modules/optflow/perf/perf_variational_refinement.cpp b/modules/optflow/perf/perf_variational_refinement.cpp deleted file mode 100644 index 0a953040d..000000000 --- a/modules/optflow/perf/perf_variational_refinement.cpp +++ /dev/null @@ -1,40 +0,0 @@ -// This file is part of OpenCV project. -// It is subject to the license terms in the LICENSE file found in the top-level directory -// of this distribution and at http://opencv.org/license.html. -#include "perf_precomp.hpp" - -namespace opencv_test { namespace { - -typedef tuple VarRefParams; -typedef TestBaseWithParam DenseOpticalFlow_VariationalRefinement; - -PERF_TEST_P(DenseOpticalFlow_VariationalRefinement, perf, Combine(Values(szQVGA, szVGA), Values(5, 10), Values(5, 10))) -{ - VarRefParams params = GetParam(); - Size sz = get<0>(params); - int sorIter = get<1>(params); - int fixedPointIter = get<2>(params); - - Mat frame1(sz, CV_8U); - Mat frame2(sz, CV_8U); - Mat flow(sz, CV_32FC2); - - randu(frame1, 0, 255); - randu(frame2, 0, 255); - flow.setTo(0.0f); - - TEST_CYCLE_N(10) - { - Ptr var = createVariationalFlowRefinement(); - var->setAlpha(20.0f); - var->setGamma(10.0f); - var->setDelta(5.0f); - var->setSorIterations(sorIter); - var->setFixedPointIterations(fixedPointIter); - var->calc(frame1, frame2, flow); - } - - SANITY_CHECK_NOTHING(); -} - -}} // namespace diff --git a/modules/optflow/samples/dis_opticalflow.cpp b/modules/optflow/samples/dis_opticalflow.cpp deleted file mode 100644 index 1d4037c04..000000000 --- a/modules/optflow/samples/dis_opticalflow.cpp +++ /dev/null @@ -1,74 +0,0 @@ - -#include "opencv2/core/utility.hpp" -#include "opencv2/highgui.hpp" -#include "opencv2/imgproc.hpp" -#include "opencv2/optflow.hpp" - -using namespace std; -using namespace cv; -using namespace optflow; - -static void help() -{ - printf("Usage: dis_optflow \n"); -} - -int main(int argc, char **argv) -{ - VideoCapture cap; - - if (argc < 2) - { - help(); - exit(1); - } - - cap.open(argv[1]); - if(!cap.isOpened()) - { - printf("ERROR: Cannot open file %s\n", argv[1]); - return -1; - } - - Mat prevgray, gray, rgb, frame; - Mat flow, flow_uv[2]; - Mat mag, ang; - Mat hsv_split[3], hsv; - char ret; - - namedWindow("flow", 1); - namedWindow("orig", 1); - - Ptr algorithm = createOptFlow_DIS(DISOpticalFlow::PRESET_MEDIUM); - - while(true) - { - cap >> frame; - if (frame.empty()) - break; - - cvtColor(frame, gray, COLOR_BGR2GRAY); - - if (!prevgray.empty()) - { - algorithm->calc(prevgray, gray, flow); - split(flow, flow_uv); - multiply(flow_uv[1], -1, flow_uv[1]); - cartToPolar(flow_uv[0], flow_uv[1], mag, ang, true); - normalize(mag, mag, 0, 1, NORM_MINMAX); - hsv_split[0] = ang; - hsv_split[1] = mag; - hsv_split[2] = Mat::ones(ang.size(), ang.type()); - merge(hsv_split, 3, hsv); - cvtColor(hsv, rgb, COLOR_HSV2BGR); - imshow("flow", rgb); - imshow("orig", frame); - } - - if ((ret = (char)waitKey(20)) > 0) - break; - std::swap(prevgray, gray); - } - - return 0; -} diff --git a/modules/optflow/samples/gpc_evaluate.cpp b/modules/optflow/samples/gpc_evaluate.cpp index f787103d0..528483fd5 100644 --- a/modules/optflow/samples/gpc_evaluate.cpp +++ b/modules/optflow/samples/gpc_evaluate.cpp @@ -101,7 +101,7 @@ int main( int argc, const char **argv ) Mat from = imread( fromPath ); Mat to = imread( toPath ); - Mat gt = optflow::readOpticalFlow( gtPath ); + Mat gt = readOpticalFlow( gtPath ); std::vector< std::pair< Point2i, Point2i > > corr; TickMeter meter; diff --git a/modules/optflow/samples/optical_flow_evaluation.cpp b/modules/optflow/samples/optical_flow_evaluation.cpp index cdd641f77..cd2af6b2d 100644 --- a/modules/optflow/samples/optical_flow_evaluation.cpp +++ b/modules/optflow/samples/optical_flow_evaluation.cpp @@ -269,11 +269,11 @@ int main( int argc, char** argv ) algorithm = createOptFlow_PCAFlow(); } else if ( method == "DISflow_ultrafast" ) - algorithm = createOptFlow_DIS(DISOpticalFlow::PRESET_ULTRAFAST); + algorithm = DISOpticalFlow::create(DISOpticalFlow::PRESET_ULTRAFAST); else if (method == "DISflow_fast") - algorithm = createOptFlow_DIS(DISOpticalFlow::PRESET_FAST); + algorithm = DISOpticalFlow::create(DISOpticalFlow::PRESET_FAST); else if (method == "DISflow_medium") - algorithm = createOptFlow_DIS(DISOpticalFlow::PRESET_MEDIUM); + algorithm = DISOpticalFlow::create(DISOpticalFlow::PRESET_MEDIUM); else { printf("Wrong method!\n"); @@ -300,7 +300,7 @@ int main( int argc, char** argv ) if ( !groundtruth_path.empty() ) { // compare to ground truth - ground_truth = optflow::readOpticalFlow(groundtruth_path); + ground_truth = readOpticalFlow(groundtruth_path); if ( flow.size() != ground_truth.size() || flow.channels() != 2 || ground_truth.channels() != 2 ) { diff --git a/modules/optflow/samples/pcaflow_demo.cpp b/modules/optflow/samples/pcaflow_demo.cpp index ea9a60712..ec0c3f903 100644 --- a/modules/optflow/samples/pcaflow_demo.cpp +++ b/modules/optflow/samples/pcaflow_demo.cpp @@ -135,7 +135,7 @@ int main( int argc, const char **argv ) Mat i1 = imread( img1 ); Mat i2 = imread( img2 ); - Mat gt = optflow::readOpticalFlow( groundtruth ); + Mat gt = readOpticalFlow( groundtruth ); Mat i1g, i2g; cvtColor( i1, i1g, COLOR_BGR2GRAY ); diff --git a/modules/optflow/src/deepflow.cpp b/modules/optflow/src/deepflow.cpp index 92423fa32..6b7cc2e74 100644 --- a/modules/optflow/src/deepflow.cpp +++ b/modules/optflow/src/deepflow.cpp @@ -147,7 +147,7 @@ void OpticalFlowDeepFlow::calc( InputArray _I0, InputArray _I1, InputOutputArray for ( int level = levelCount - 1; level >= 0; --level ) { //iterate through all levels, beginning with the most coarse - Ptr var = createVariationalFlowRefinement(); + Ptr var = VariationalRefinement::create(); var->setAlpha(4 * alpha); var->setDelta(delta / 3); diff --git a/modules/optflow/src/dis_flow.cpp b/modules/optflow/src/dis_flow.cpp deleted file mode 100644 index 77d62e971..000000000 --- a/modules/optflow/src/dis_flow.cpp +++ /dev/null @@ -1,1503 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "precomp.hpp" -#include "opencl_kernels_optflow.hpp" - -using namespace std; -#define EPS 0.001F -#define INF 1E+10F - -namespace cv -{ -namespace optflow -{ - -class DISOpticalFlowImpl CV_FINAL : public DISOpticalFlow -{ - public: - DISOpticalFlowImpl(); - - void calc(InputArray I0, InputArray I1, InputOutputArray flow) CV_OVERRIDE; - void collectGarbage() CV_OVERRIDE; - - protected: //!< algorithm parameters - int finest_scale, coarsest_scale; - int patch_size; - int patch_stride; - int grad_descent_iter; - int variational_refinement_iter; - float variational_refinement_alpha; - float variational_refinement_gamma; - float variational_refinement_delta; - bool use_mean_normalization; - bool use_spatial_propagation; - - protected: //!< some auxiliary variables - int border_size; - int w, h; //!< flow buffer width and height on the current scale - int ws, hs; //!< sparse flow buffer width and height on the current scale - - public: - int getFinestScale() const CV_OVERRIDE { return finest_scale; } - void setFinestScale(int val) CV_OVERRIDE { finest_scale = val; } - int getPatchSize() const CV_OVERRIDE { return patch_size; } - void setPatchSize(int val) CV_OVERRIDE { patch_size = val; } - int getPatchStride() const CV_OVERRIDE { return patch_stride; } - void setPatchStride(int val) CV_OVERRIDE { patch_stride = val; } - int getGradientDescentIterations() const CV_OVERRIDE { return grad_descent_iter; } - void setGradientDescentIterations(int val) CV_OVERRIDE { grad_descent_iter = val; } - int getVariationalRefinementIterations() const CV_OVERRIDE { return variational_refinement_iter; } - void setVariationalRefinementIterations(int val) CV_OVERRIDE { variational_refinement_iter = val; } - float getVariationalRefinementAlpha() const CV_OVERRIDE { return variational_refinement_alpha; } - void setVariationalRefinementAlpha(float val) CV_OVERRIDE { variational_refinement_alpha = val; } - float getVariationalRefinementDelta() const CV_OVERRIDE { return variational_refinement_delta; } - void setVariationalRefinementDelta(float val) CV_OVERRIDE { variational_refinement_delta = val; } - float getVariationalRefinementGamma() const CV_OVERRIDE { return variational_refinement_gamma; } - void setVariationalRefinementGamma(float val) CV_OVERRIDE { variational_refinement_gamma = val; } - - bool getUseMeanNormalization() const CV_OVERRIDE { return use_mean_normalization; } - void setUseMeanNormalization(bool val) CV_OVERRIDE { use_mean_normalization = val; } - bool getUseSpatialPropagation() const CV_OVERRIDE { return use_spatial_propagation; } - void setUseSpatialPropagation(bool val) CV_OVERRIDE { use_spatial_propagation = val; } - - protected: //!< internal buffers - vector > I0s; //!< Gaussian pyramid for the current frame - vector > I1s; //!< Gaussian pyramid for the next frame - vector > I1s_ext; //!< I1s with borders - - vector > I0xs; //!< Gaussian pyramid for the x gradient of the current frame - vector > I0ys; //!< Gaussian pyramid for the y gradient of the current frame - - vector > Ux; //!< x component of the flow vectors - vector > Uy; //!< y component of the flow vectors - - vector > initial_Ux; //!< x component of the initial flow field, if one was passed as an input - vector > initial_Uy; //!< y component of the initial flow field, if one was passed as an input - - Mat_ U; //!< a buffer for the merged flow - - Mat_ Sx; //!< intermediate sparse flow representation (x component) - Mat_ Sy; //!< intermediate sparse flow representation (y component) - - /* Structure tensor components: */ - Mat_ I0xx_buf; //!< sum of squares of x gradient values - Mat_ I0yy_buf; //!< sum of squares of y gradient values - Mat_ I0xy_buf; //!< sum of x and y gradient products - - /* Extra buffers that are useful if patch mean-normalization is used: */ - Mat_ I0x_buf; //!< sum of x gradient values - Mat_ I0y_buf; //!< sum of y gradient values - - /* Auxiliary buffers used in structure tensor computation: */ - Mat_ I0xx_buf_aux; - Mat_ I0yy_buf_aux; - Mat_ I0xy_buf_aux; - Mat_ I0x_buf_aux; - Mat_ I0y_buf_aux; - - vector > variational_refinement_processors; - - private: //!< private methods and parallel sections - void prepareBuffers(Mat &I0, Mat &I1, Mat &flow, bool use_flow); - void precomputeStructureTensor(Mat &dst_I0xx, Mat &dst_I0yy, Mat &dst_I0xy, Mat &dst_I0x, Mat &dst_I0y, Mat &I0x, - Mat &I0y); - - struct PatchInverseSearch_ParBody : public ParallelLoopBody - { - DISOpticalFlowImpl *dis; - int nstripes, stripe_sz; - int hs; - Mat *Sx, *Sy, *Ux, *Uy, *I0, *I1, *I0x, *I0y; - int num_iter, pyr_level; - - PatchInverseSearch_ParBody(DISOpticalFlowImpl &_dis, int _nstripes, int _hs, Mat &dst_Sx, Mat &dst_Sy, - Mat &src_Ux, Mat &src_Uy, Mat &_I0, Mat &_I1, Mat &_I0x, Mat &_I0y, int _num_iter, - int _pyr_level); - void operator()(const Range &range) const CV_OVERRIDE; - }; - - struct Densification_ParBody : public ParallelLoopBody - { - DISOpticalFlowImpl *dis; - int nstripes, stripe_sz; - int h; - Mat *Ux, *Uy, *Sx, *Sy, *I0, *I1; - - Densification_ParBody(DISOpticalFlowImpl &_dis, int _nstripes, int _h, Mat &dst_Ux, Mat &dst_Uy, Mat &src_Sx, - Mat &src_Sy, Mat &_I0, Mat &_I1); - void operator()(const Range &range) const CV_OVERRIDE; - }; - -#ifdef HAVE_OPENCL - vector u_I0s; //!< Gaussian pyramid for the current frame - vector u_I1s; //!< Gaussian pyramid for the next frame - vector u_I1s_ext; //!< I1s with borders - - vector u_I0xs; //!< Gaussian pyramid for the x gradient of the current frame - vector u_I0ys; //!< Gaussian pyramid for the y gradient of the current frame - - vector u_Ux; //!< x component of the flow vectors - vector u_Uy; //!< y component of the flow vectors - - vector u_initial_Ux; //!< x component of the initial flow field, if one was passed as an input - vector u_initial_Uy; //!< y component of the initial flow field, if one was passed as an input - - UMat u_U; //!< a buffer for the merged flow - - UMat u_Sx; //!< intermediate sparse flow representation (x component) - UMat u_Sy; //!< intermediate sparse flow representation (y component) - - /* Structure tensor components: */ - UMat u_I0xx_buf; //!< sum of squares of x gradient values - UMat u_I0yy_buf; //!< sum of squares of y gradient values - UMat u_I0xy_buf; //!< sum of x and y gradient products - - /* Extra buffers that are useful if patch mean-normalization is used: */ - UMat u_I0x_buf; //!< sum of x gradient values - UMat u_I0y_buf; //!< sum of y gradient values - - /* Auxiliary buffers used in structure tensor computation: */ - UMat u_I0xx_buf_aux; - UMat u_I0yy_buf_aux; - UMat u_I0xy_buf_aux; - UMat u_I0x_buf_aux; - UMat u_I0y_buf_aux; - - bool ocl_precomputeStructureTensor(UMat &dst_I0xx, UMat &dst_I0yy, UMat &dst_I0xy, - UMat &dst_I0x, UMat &dst_I0y, UMat &I0x, UMat &I0y); - void ocl_prepareBuffers(UMat &I0, UMat &I1, UMat &flow, bool use_flow); - bool ocl_calc(InputArray I0, InputArray I1, InputOutputArray flow); - bool ocl_Densification(UMat &dst_Ux, UMat &dst_Uy, UMat &src_Sx, UMat &src_Sy, UMat &_I0, UMat &_I1); - bool ocl_PatchInverseSearch(UMat &src_Ux, UMat &src_Uy, - UMat &I0, UMat &I1, UMat &I0x, UMat &I0y, int num_iter, int pyr_level); -#endif -}; - -DISOpticalFlowImpl::DISOpticalFlowImpl() -{ - finest_scale = 2; - patch_size = 8; - patch_stride = 4; - grad_descent_iter = 16; - variational_refinement_iter = 5; - variational_refinement_alpha = 20.f; - variational_refinement_gamma = 10.f; - variational_refinement_delta = 5.f; - - border_size = 16; - use_mean_normalization = true; - use_spatial_propagation = true; - - /* Use separate variational refinement instances for different scales to avoid repeated memory allocation: */ - int max_possible_scales = 10; - for (int i = 0; i < max_possible_scales; i++) - variational_refinement_processors.push_back(createVariationalFlowRefinement()); -} - -void DISOpticalFlowImpl::prepareBuffers(Mat &I0, Mat &I1, Mat &flow, bool use_flow) -{ - I0s.resize(coarsest_scale + 1); - I1s.resize(coarsest_scale + 1); - I1s_ext.resize(coarsest_scale + 1); - I0xs.resize(coarsest_scale + 1); - I0ys.resize(coarsest_scale + 1); - Ux.resize(coarsest_scale + 1); - Uy.resize(coarsest_scale + 1); - - Mat flow_uv[2]; - if (use_flow) - { - split(flow, flow_uv); - initial_Ux.resize(coarsest_scale + 1); - initial_Uy.resize(coarsest_scale + 1); - } - - int fraction = 1; - int cur_rows = 0, cur_cols = 0; - - for (int i = 0; i <= coarsest_scale; i++) - { - /* Avoid initializing the pyramid levels above the finest scale, as they won't be used anyway */ - if (i == finest_scale) - { - cur_rows = I0.rows / fraction; - cur_cols = I0.cols / fraction; - I0s[i].create(cur_rows, cur_cols); - resize(I0, I0s[i], I0s[i].size(), 0.0, 0.0, INTER_AREA); - I1s[i].create(cur_rows, cur_cols); - resize(I1, I1s[i], I1s[i].size(), 0.0, 0.0, INTER_AREA); - - /* These buffers are reused in each scale so we initialize them once on the finest scale: */ - Sx.create(cur_rows / patch_stride, cur_cols / patch_stride); - Sy.create(cur_rows / patch_stride, cur_cols / patch_stride); - I0xx_buf.create(cur_rows / patch_stride, cur_cols / patch_stride); - I0yy_buf.create(cur_rows / patch_stride, cur_cols / patch_stride); - I0xy_buf.create(cur_rows / patch_stride, cur_cols / patch_stride); - I0x_buf.create(cur_rows / patch_stride, cur_cols / patch_stride); - I0y_buf.create(cur_rows / patch_stride, cur_cols / patch_stride); - - I0xx_buf_aux.create(cur_rows, cur_cols / patch_stride); - I0yy_buf_aux.create(cur_rows, cur_cols / patch_stride); - I0xy_buf_aux.create(cur_rows, cur_cols / patch_stride); - I0x_buf_aux.create(cur_rows, cur_cols / patch_stride); - I0y_buf_aux.create(cur_rows, cur_cols / patch_stride); - - U.create(cur_rows, cur_cols); - } - else if (i > finest_scale) - { - cur_rows = I0s[i - 1].rows / 2; - cur_cols = I0s[i - 1].cols / 2; - I0s[i].create(cur_rows, cur_cols); - resize(I0s[i - 1], I0s[i], I0s[i].size(), 0.0, 0.0, INTER_AREA); - I1s[i].create(cur_rows, cur_cols); - resize(I1s[i - 1], I1s[i], I1s[i].size(), 0.0, 0.0, INTER_AREA); - } - - if (i >= finest_scale) - { - I1s_ext[i].create(cur_rows + 2 * border_size, cur_cols + 2 * border_size); - copyMakeBorder(I1s[i], I1s_ext[i], border_size, border_size, border_size, border_size, BORDER_REPLICATE); - I0xs[i].create(cur_rows, cur_cols); - I0ys[i].create(cur_rows, cur_cols); - spatialGradient(I0s[i], I0xs[i], I0ys[i]); - Ux[i].create(cur_rows, cur_cols); - Uy[i].create(cur_rows, cur_cols); - variational_refinement_processors[i]->setAlpha(variational_refinement_alpha); - variational_refinement_processors[i]->setDelta(variational_refinement_delta); - variational_refinement_processors[i]->setGamma(variational_refinement_gamma); - variational_refinement_processors[i]->setSorIterations(5); - variational_refinement_processors[i]->setFixedPointIterations(variational_refinement_iter); - - if (use_flow) - { - resize(flow_uv[0], initial_Ux[i], Size(cur_cols, cur_rows)); - initial_Ux[i] /= fraction; - resize(flow_uv[1], initial_Uy[i], Size(cur_cols, cur_rows)); - initial_Uy[i] /= fraction; - } - } - - fraction *= 2; - } -} - -/* This function computes the structure tensor elements (local sums of I0x^2, I0x*I0y and I0y^2). - * A simple box filter is not used instead because we need to compute these sums on a sparse grid - * and store them densely in the output buffers. - */ -void DISOpticalFlowImpl::precomputeStructureTensor(Mat &dst_I0xx, Mat &dst_I0yy, Mat &dst_I0xy, Mat &dst_I0x, - Mat &dst_I0y, Mat &I0x, Mat &I0y) -{ - float *I0xx_ptr = dst_I0xx.ptr(); - float *I0yy_ptr = dst_I0yy.ptr(); - float *I0xy_ptr = dst_I0xy.ptr(); - float *I0x_ptr = dst_I0x.ptr(); - float *I0y_ptr = dst_I0y.ptr(); - - float *I0xx_aux_ptr = I0xx_buf_aux.ptr(); - float *I0yy_aux_ptr = I0yy_buf_aux.ptr(); - float *I0xy_aux_ptr = I0xy_buf_aux.ptr(); - float *I0x_aux_ptr = I0x_buf_aux.ptr(); - float *I0y_aux_ptr = I0y_buf_aux.ptr(); - - /* Separable box filter: horizontal pass */ - for (int i = 0; i < h; i++) - { - float sum_xx = 0.0f, sum_yy = 0.0f, sum_xy = 0.0f, sum_x = 0.0f, sum_y = 0.0f; - short *x_row = I0x.ptr(i); - short *y_row = I0y.ptr(i); - for (int j = 0; j < patch_size; j++) - { - sum_xx += x_row[j] * x_row[j]; - sum_yy += y_row[j] * y_row[j]; - sum_xy += x_row[j] * y_row[j]; - sum_x += x_row[j]; - sum_y += y_row[j]; - } - I0xx_aux_ptr[i * ws] = sum_xx; - I0yy_aux_ptr[i * ws] = sum_yy; - I0xy_aux_ptr[i * ws] = sum_xy; - I0x_aux_ptr[i * ws] = sum_x; - I0y_aux_ptr[i * ws] = sum_y; - int js = 1; - for (int j = patch_size; j < w; j++) - { - sum_xx += (x_row[j] * x_row[j] - x_row[j - patch_size] * x_row[j - patch_size]); - sum_yy += (y_row[j] * y_row[j] - y_row[j - patch_size] * y_row[j - patch_size]); - sum_xy += (x_row[j] * y_row[j] - x_row[j - patch_size] * y_row[j - patch_size]); - sum_x += (x_row[j] - x_row[j - patch_size]); - sum_y += (y_row[j] - y_row[j - patch_size]); - if ((j - patch_size + 1) % patch_stride == 0) - { - I0xx_aux_ptr[i * ws + js] = sum_xx; - I0yy_aux_ptr[i * ws + js] = sum_yy; - I0xy_aux_ptr[i * ws + js] = sum_xy; - I0x_aux_ptr[i * ws + js] = sum_x; - I0y_aux_ptr[i * ws + js] = sum_y; - js++; - } - } - } - - AutoBuffer sum_xx(ws), sum_yy(ws), sum_xy(ws), sum_x(ws), sum_y(ws); - for (int j = 0; j < ws; j++) - { - sum_xx[j] = 0.0f; - sum_yy[j] = 0.0f; - sum_xy[j] = 0.0f; - sum_x[j] = 0.0f; - sum_y[j] = 0.0f; - } - - /* Separable box filter: vertical pass */ - for (int i = 0; i < patch_size; i++) - for (int j = 0; j < ws; j++) - { - sum_xx[j] += I0xx_aux_ptr[i * ws + j]; - sum_yy[j] += I0yy_aux_ptr[i * ws + j]; - sum_xy[j] += I0xy_aux_ptr[i * ws + j]; - sum_x[j] += I0x_aux_ptr[i * ws + j]; - sum_y[j] += I0y_aux_ptr[i * ws + j]; - } - for (int j = 0; j < ws; j++) - { - I0xx_ptr[j] = sum_xx[j]; - I0yy_ptr[j] = sum_yy[j]; - I0xy_ptr[j] = sum_xy[j]; - I0x_ptr[j] = sum_x[j]; - I0y_ptr[j] = sum_y[j]; - } - int is = 1; - for (int i = patch_size; i < h; i++) - { - for (int j = 0; j < ws; j++) - { - sum_xx[j] += (I0xx_aux_ptr[i * ws + j] - I0xx_aux_ptr[(i - patch_size) * ws + j]); - sum_yy[j] += (I0yy_aux_ptr[i * ws + j] - I0yy_aux_ptr[(i - patch_size) * ws + j]); - sum_xy[j] += (I0xy_aux_ptr[i * ws + j] - I0xy_aux_ptr[(i - patch_size) * ws + j]); - sum_x[j] += (I0x_aux_ptr[i * ws + j] - I0x_aux_ptr[(i - patch_size) * ws + j]); - sum_y[j] += (I0y_aux_ptr[i * ws + j] - I0y_aux_ptr[(i - patch_size) * ws + j]); - } - if ((i - patch_size + 1) % patch_stride == 0) - { - for (int j = 0; j < ws; j++) - { - I0xx_ptr[is * ws + j] = sum_xx[j]; - I0yy_ptr[is * ws + j] = sum_yy[j]; - I0xy_ptr[is * ws + j] = sum_xy[j]; - I0x_ptr[is * ws + j] = sum_x[j]; - I0y_ptr[is * ws + j] = sum_y[j]; - } - is++; - } - } -} - -DISOpticalFlowImpl::PatchInverseSearch_ParBody::PatchInverseSearch_ParBody(DISOpticalFlowImpl &_dis, int _nstripes, - int _hs, Mat &dst_Sx, Mat &dst_Sy, - Mat &src_Ux, Mat &src_Uy, Mat &_I0, Mat &_I1, - Mat &_I0x, Mat &_I0y, int _num_iter, - int _pyr_level) - : dis(&_dis), nstripes(_nstripes), hs(_hs), Sx(&dst_Sx), Sy(&dst_Sy), Ux(&src_Ux), Uy(&src_Uy), I0(&_I0), I1(&_I1), - I0x(&_I0x), I0y(&_I0y), num_iter(_num_iter), pyr_level(_pyr_level) -{ - stripe_sz = (int)ceil(hs / (double)nstripes); -} - -/////////////////////////////////////////////* Patch processing functions *///////////////////////////////////////////// - -/* Some auxiliary macros */ -#define HAL_INIT_BILINEAR_8x8_PATCH_EXTRACTION \ - v_float32x4 w00v = v_setall_f32(w00); \ - v_float32x4 w01v = v_setall_f32(w01); \ - v_float32x4 w10v = v_setall_f32(w10); \ - v_float32x4 w11v = v_setall_f32(w11); \ - \ - v_uint8x16 I0_row_16, I1_row_16, I1_row_shifted_16, I1_row_next_16, I1_row_next_shifted_16; \ - v_uint16x8 I0_row_8, I1_row_8, I1_row_shifted_8, I1_row_next_8, I1_row_next_shifted_8, tmp; \ - v_uint32x4 I0_row_4_left, I1_row_4_left, I1_row_shifted_4_left, I1_row_next_4_left, I1_row_next_shifted_4_left; \ - v_uint32x4 I0_row_4_right, I1_row_4_right, I1_row_shifted_4_right, I1_row_next_4_right, \ - I1_row_next_shifted_4_right; \ - v_float32x4 I_diff_left, I_diff_right; \ - \ - /* Preload and expand the first row of I1: */ \ - I1_row_16 = v_load(I1_ptr); \ - I1_row_shifted_16 = v_extract<1>(I1_row_16, I1_row_16); \ - v_expand(I1_row_16, I1_row_8, tmp); \ - v_expand(I1_row_shifted_16, I1_row_shifted_8, tmp); \ - v_expand(I1_row_8, I1_row_4_left, I1_row_4_right); \ - v_expand(I1_row_shifted_8, I1_row_shifted_4_left, I1_row_shifted_4_right); \ - I1_ptr += I1_stride; - -#define HAL_PROCESS_BILINEAR_8x8_PATCH_EXTRACTION \ - /* Load the next row of I1: */ \ - I1_row_next_16 = v_load(I1_ptr); \ - /* Circular shift left by 1 element: */ \ - I1_row_next_shifted_16 = v_extract<1>(I1_row_next_16, I1_row_next_16); \ - /* Expand to 8 ushorts (we only need the first 8 values): */ \ - v_expand(I1_row_next_16, I1_row_next_8, tmp); \ - v_expand(I1_row_next_shifted_16, I1_row_next_shifted_8, tmp); \ - /* Separate the left and right halves: */ \ - v_expand(I1_row_next_8, I1_row_next_4_left, I1_row_next_4_right); \ - v_expand(I1_row_next_shifted_8, I1_row_next_shifted_4_left, I1_row_next_shifted_4_right); \ - \ - /* Load current row of I0: */ \ - I0_row_16 = v_load(I0_ptr); \ - v_expand(I0_row_16, I0_row_8, tmp); \ - v_expand(I0_row_8, I0_row_4_left, I0_row_4_right); \ - \ - /* Compute diffs between I0 and bilinearly interpolated I1: */ \ - I_diff_left = w00v * v_cvt_f32(v_reinterpret_as_s32(I1_row_4_left)) + \ - w01v * v_cvt_f32(v_reinterpret_as_s32(I1_row_shifted_4_left)) + \ - w10v * v_cvt_f32(v_reinterpret_as_s32(I1_row_next_4_left)) + \ - w11v * v_cvt_f32(v_reinterpret_as_s32(I1_row_next_shifted_4_left)) - \ - v_cvt_f32(v_reinterpret_as_s32(I0_row_4_left)); \ - I_diff_right = w00v * v_cvt_f32(v_reinterpret_as_s32(I1_row_4_right)) + \ - w01v * v_cvt_f32(v_reinterpret_as_s32(I1_row_shifted_4_right)) + \ - w10v * v_cvt_f32(v_reinterpret_as_s32(I1_row_next_4_right)) + \ - w11v * v_cvt_f32(v_reinterpret_as_s32(I1_row_next_shifted_4_right)) - \ - v_cvt_f32(v_reinterpret_as_s32(I0_row_4_right)); - -#define HAL_BILINEAR_8x8_PATCH_EXTRACTION_NEXT_ROW \ - I0_ptr += I0_stride; \ - I1_ptr += I1_stride; \ - \ - I1_row_4_left = I1_row_next_4_left; \ - I1_row_4_right = I1_row_next_4_right; \ - I1_row_shifted_4_left = I1_row_next_shifted_4_left; \ - I1_row_shifted_4_right = I1_row_next_shifted_4_right; - -/* This function essentially performs one iteration of gradient descent when finding the most similar patch in I1 for a - * given one in I0. It assumes that I0_ptr and I1_ptr already point to the corresponding patches and w00, w01, w10, w11 - * are precomputed bilinear interpolation weights. It returns the SSD (sum of squared differences) between these patches - * and computes the values (dst_dUx, dst_dUy) that are used in the flow vector update. HAL acceleration is implemented - * only for the default patch size (8x8). Everything is processed in floats as using fixed-point approximations harms - * the quality significantly. - */ -inline float processPatch(float &dst_dUx, float &dst_dUy, uchar *I0_ptr, uchar *I1_ptr, short *I0x_ptr, short *I0y_ptr, - int I0_stride, int I1_stride, float w00, float w01, float w10, float w11, int patch_sz) -{ - float SSD = 0.0f; -#ifdef CV_SIMD128 - if (patch_sz == 8) - { - /* Variables to accumulate the sums */ - v_float32x4 Ux_vec = v_setall_f32(0); - v_float32x4 Uy_vec = v_setall_f32(0); - v_float32x4 SSD_vec = v_setall_f32(0); - - v_int16x8 I0x_row, I0y_row; - v_int32x4 I0x_row_4_left, I0x_row_4_right, I0y_row_4_left, I0y_row_4_right; - - HAL_INIT_BILINEAR_8x8_PATCH_EXTRACTION; - for (int row = 0; row < 8; row++) - { - HAL_PROCESS_BILINEAR_8x8_PATCH_EXTRACTION; - I0x_row = v_load(I0x_ptr); - v_expand(I0x_row, I0x_row_4_left, I0x_row_4_right); - I0y_row = v_load(I0y_ptr); - v_expand(I0y_row, I0y_row_4_left, I0y_row_4_right); - - /* Update the sums: */ - Ux_vec += I_diff_left * v_cvt_f32(I0x_row_4_left) + I_diff_right * v_cvt_f32(I0x_row_4_right); - Uy_vec += I_diff_left * v_cvt_f32(I0y_row_4_left) + I_diff_right * v_cvt_f32(I0y_row_4_right); - SSD_vec += I_diff_left * I_diff_left + I_diff_right * I_diff_right; - - I0x_ptr += I0_stride; - I0y_ptr += I0_stride; - HAL_BILINEAR_8x8_PATCH_EXTRACTION_NEXT_ROW; - } - - /* Final reduce operations: */ - dst_dUx = v_reduce_sum(Ux_vec); - dst_dUy = v_reduce_sum(Uy_vec); - SSD = v_reduce_sum(SSD_vec); - } - else - { -#endif - dst_dUx = 0.0f; - dst_dUy = 0.0f; - float diff; - for (int i = 0; i < patch_sz; i++) - for (int j = 0; j < patch_sz; j++) - { - diff = w00 * I1_ptr[i * I1_stride + j] + w01 * I1_ptr[i * I1_stride + j + 1] + - w10 * I1_ptr[(i + 1) * I1_stride + j] + w11 * I1_ptr[(i + 1) * I1_stride + j + 1] - - I0_ptr[i * I0_stride + j]; - - SSD += diff * diff; - dst_dUx += diff * I0x_ptr[i * I0_stride + j]; - dst_dUy += diff * I0y_ptr[i * I0_stride + j]; - } -#ifdef CV_SIMD128 - } -#endif - return SSD; -} - -/* Same as processPatch, but with patch mean normalization, which improves robustness under changing - * lighting conditions - */ -inline float processPatchMeanNorm(float &dst_dUx, float &dst_dUy, uchar *I0_ptr, uchar *I1_ptr, short *I0x_ptr, - short *I0y_ptr, int I0_stride, int I1_stride, float w00, float w01, float w10, - float w11, int patch_sz, float x_grad_sum, float y_grad_sum) -{ - float sum_diff = 0.0, sum_diff_sq = 0.0; - float sum_I0x_mul = 0.0, sum_I0y_mul = 0.0; - float n = (float)patch_sz * patch_sz; - -#ifdef CV_SIMD128 - if (patch_sz == 8) - { - /* Variables to accumulate the sums */ - v_float32x4 sum_I0x_mul_vec = v_setall_f32(0); - v_float32x4 sum_I0y_mul_vec = v_setall_f32(0); - v_float32x4 sum_diff_vec = v_setall_f32(0); - v_float32x4 sum_diff_sq_vec = v_setall_f32(0); - - v_int16x8 I0x_row, I0y_row; - v_int32x4 I0x_row_4_left, I0x_row_4_right, I0y_row_4_left, I0y_row_4_right; - - HAL_INIT_BILINEAR_8x8_PATCH_EXTRACTION; - for (int row = 0; row < 8; row++) - { - HAL_PROCESS_BILINEAR_8x8_PATCH_EXTRACTION; - I0x_row = v_load(I0x_ptr); - v_expand(I0x_row, I0x_row_4_left, I0x_row_4_right); - I0y_row = v_load(I0y_ptr); - v_expand(I0y_row, I0y_row_4_left, I0y_row_4_right); - - /* Update the sums: */ - sum_I0x_mul_vec += I_diff_left * v_cvt_f32(I0x_row_4_left) + I_diff_right * v_cvt_f32(I0x_row_4_right); - sum_I0y_mul_vec += I_diff_left * v_cvt_f32(I0y_row_4_left) + I_diff_right * v_cvt_f32(I0y_row_4_right); - sum_diff_sq_vec += I_diff_left * I_diff_left + I_diff_right * I_diff_right; - sum_diff_vec += I_diff_left + I_diff_right; - - I0x_ptr += I0_stride; - I0y_ptr += I0_stride; - HAL_BILINEAR_8x8_PATCH_EXTRACTION_NEXT_ROW; - } - - /* Final reduce operations: */ - sum_I0x_mul = v_reduce_sum(sum_I0x_mul_vec); - sum_I0y_mul = v_reduce_sum(sum_I0y_mul_vec); - sum_diff = v_reduce_sum(sum_diff_vec); - sum_diff_sq = v_reduce_sum(sum_diff_sq_vec); - } - else - { -#endif - float diff; - for (int i = 0; i < patch_sz; i++) - for (int j = 0; j < patch_sz; j++) - { - diff = w00 * I1_ptr[i * I1_stride + j] + w01 * I1_ptr[i * I1_stride + j + 1] + - w10 * I1_ptr[(i + 1) * I1_stride + j] + w11 * I1_ptr[(i + 1) * I1_stride + j + 1] - - I0_ptr[i * I0_stride + j]; - - sum_diff += diff; - sum_diff_sq += diff * diff; - - sum_I0x_mul += diff * I0x_ptr[i * I0_stride + j]; - sum_I0y_mul += diff * I0y_ptr[i * I0_stride + j]; - } -#ifdef CV_SIMD128 - } -#endif - dst_dUx = sum_I0x_mul - sum_diff * x_grad_sum / n; - dst_dUy = sum_I0y_mul - sum_diff * y_grad_sum / n; - return sum_diff_sq - sum_diff * sum_diff / n; -} - -/* Similar to processPatch, but compute only the sum of squared differences (SSD) between the patches */ -inline float computeSSD(uchar *I0_ptr, uchar *I1_ptr, int I0_stride, int I1_stride, float w00, float w01, float w10, - float w11, int patch_sz) -{ - float SSD = 0.0f; -#ifdef CV_SIMD128 - if (patch_sz == 8) - { - v_float32x4 SSD_vec = v_setall_f32(0); - HAL_INIT_BILINEAR_8x8_PATCH_EXTRACTION; - for (int row = 0; row < 8; row++) - { - HAL_PROCESS_BILINEAR_8x8_PATCH_EXTRACTION; - SSD_vec += I_diff_left * I_diff_left + I_diff_right * I_diff_right; - HAL_BILINEAR_8x8_PATCH_EXTRACTION_NEXT_ROW; - } - SSD = v_reduce_sum(SSD_vec); - } - else - { -#endif - float diff; - for (int i = 0; i < patch_sz; i++) - for (int j = 0; j < patch_sz; j++) - { - diff = w00 * I1_ptr[i * I1_stride + j] + w01 * I1_ptr[i * I1_stride + j + 1] + - w10 * I1_ptr[(i + 1) * I1_stride + j] + w11 * I1_ptr[(i + 1) * I1_stride + j + 1] - - I0_ptr[i * I0_stride + j]; - SSD += diff * diff; - } -#ifdef CV_SIMD128 - } -#endif - return SSD; -} - -/* Same as computeSSD, but with patch mean normalization */ -inline float computeSSDMeanNorm(uchar *I0_ptr, uchar *I1_ptr, int I0_stride, int I1_stride, float w00, float w01, - float w10, float w11, int patch_sz) -{ - float sum_diff = 0.0f, sum_diff_sq = 0.0f; - float n = (float)patch_sz * patch_sz; -#ifdef CV_SIMD128 - if (patch_sz == 8) - { - v_float32x4 sum_diff_vec = v_setall_f32(0); - v_float32x4 sum_diff_sq_vec = v_setall_f32(0); - HAL_INIT_BILINEAR_8x8_PATCH_EXTRACTION; - for (int row = 0; row < 8; row++) - { - HAL_PROCESS_BILINEAR_8x8_PATCH_EXTRACTION; - sum_diff_sq_vec += I_diff_left * I_diff_left + I_diff_right * I_diff_right; - sum_diff_vec += I_diff_left + I_diff_right; - HAL_BILINEAR_8x8_PATCH_EXTRACTION_NEXT_ROW; - } - sum_diff = v_reduce_sum(sum_diff_vec); - sum_diff_sq = v_reduce_sum(sum_diff_sq_vec); - } - else - { -#endif - float diff; - for (int i = 0; i < patch_sz; i++) - for (int j = 0; j < patch_sz; j++) - { - diff = w00 * I1_ptr[i * I1_stride + j] + w01 * I1_ptr[i * I1_stride + j + 1] + - w10 * I1_ptr[(i + 1) * I1_stride + j] + w11 * I1_ptr[(i + 1) * I1_stride + j + 1] - - I0_ptr[i * I0_stride + j]; - - sum_diff += diff; - sum_diff_sq += diff * diff; - } -#ifdef CV_SIMD128 - } -#endif - return sum_diff_sq - sum_diff * sum_diff / n; -} - -#undef HAL_INIT_BILINEAR_8x8_PATCH_EXTRACTION -#undef HAL_PROCESS_BILINEAR_8x8_PATCH_EXTRACTION -#undef HAL_BILINEAR_8x8_PATCH_EXTRACTION_NEXT_ROW -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -void DISOpticalFlowImpl::PatchInverseSearch_ParBody::operator()(const Range &range) const -{ - // force separate processing of stripes if we are using spatial propagation: - if (dis->use_spatial_propagation && range.end > range.start + 1) - { - for (int n = range.start; n < range.end; n++) - (*this)(Range(n, n + 1)); - return; - } - int psz = dis->patch_size; - int psz2 = psz / 2; - int w_ext = dis->w + 2 * dis->border_size; //!< width of I1_ext - int bsz = dis->border_size; - - /* Input dense flow */ - float *Ux_ptr = Ux->ptr(); - float *Uy_ptr = Uy->ptr(); - - /* Output sparse flow */ - float *Sx_ptr = Sx->ptr(); - float *Sy_ptr = Sy->ptr(); - - uchar *I0_ptr = I0->ptr(); - uchar *I1_ptr = I1->ptr(); - short *I0x_ptr = I0x->ptr(); - short *I0y_ptr = I0y->ptr(); - - /* Precomputed structure tensor */ - float *xx_ptr = dis->I0xx_buf.ptr(); - float *yy_ptr = dis->I0yy_buf.ptr(); - float *xy_ptr = dis->I0xy_buf.ptr(); - /* And extra buffers for mean-normalization: */ - float *x_ptr = dis->I0x_buf.ptr(); - float *y_ptr = dis->I0y_buf.ptr(); - - bool use_temporal_candidates = false; - float *initial_Ux_ptr = NULL, *initial_Uy_ptr = NULL; - if (!dis->initial_Ux.empty()) - { - initial_Ux_ptr = dis->initial_Ux[pyr_level].ptr(); - initial_Uy_ptr = dis->initial_Uy[pyr_level].ptr(); - use_temporal_candidates = true; - } - - int i, j, dir; - int start_is, end_is, start_js, end_js; - int start_i, start_j; - float i_lower_limit = bsz - psz + 1.0f; - float i_upper_limit = bsz + dis->h - 1.0f; - float j_lower_limit = bsz - psz + 1.0f; - float j_upper_limit = bsz + dis->w - 1.0f; - float dUx, dUy, i_I1, j_I1, w00, w01, w10, w11, dx, dy; - -#define INIT_BILINEAR_WEIGHTS(Ux, Uy) \ - i_I1 = min(max(i + Uy + bsz, i_lower_limit), i_upper_limit); \ - j_I1 = min(max(j + Ux + bsz, j_lower_limit), j_upper_limit); \ - \ - w11 = (i_I1 - floor(i_I1)) * (j_I1 - floor(j_I1)); \ - w10 = (i_I1 - floor(i_I1)) * (floor(j_I1) + 1 - j_I1); \ - w01 = (floor(i_I1) + 1 - i_I1) * (j_I1 - floor(j_I1)); \ - w00 = (floor(i_I1) + 1 - i_I1) * (floor(j_I1) + 1 - j_I1); - -#define COMPUTE_SSD(dst, Ux, Uy) \ - INIT_BILINEAR_WEIGHTS(Ux, Uy); \ - if (dis->use_mean_normalization) \ - dst = computeSSDMeanNorm(I0_ptr + i * dis->w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1, dis->w, w_ext, w00, \ - w01, w10, w11, psz); \ - else \ - dst = computeSSD(I0_ptr + i * dis->w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1, dis->w, w_ext, w00, w01, \ - w10, w11, psz); - - int num_inner_iter = (int)floor(dis->grad_descent_iter / (float)num_iter); - for (int iter = 0; iter < num_iter; iter++) - { - if (iter % 2 == 0) - { - dir = 1; - start_is = min(range.start * stripe_sz, hs); - end_is = min(range.end * stripe_sz, hs); - start_js = 0; - end_js = dis->ws; - start_i = start_is * dis->patch_stride; - start_j = 0; - } - else - { - dir = -1; - start_is = min(range.end * stripe_sz, hs) - 1; - end_is = min(range.start * stripe_sz, hs) - 1; - start_js = dis->ws - 1; - end_js = -1; - start_i = start_is * dis->patch_stride; - start_j = (dis->ws - 1) * dis->patch_stride; - } - - i = start_i; - for (int is = start_is; dir * is < dir * end_is; is += dir) - { - j = start_j; - for (int js = start_js; dir * js < dir * end_js; js += dir) - { - if (iter == 0) - { - /* Using result form the previous pyramid level as the very first approximation: */ - Sx_ptr[is * dis->ws + js] = Ux_ptr[(i + psz2) * dis->w + j + psz2]; - Sy_ptr[is * dis->ws + js] = Uy_ptr[(i + psz2) * dis->w + j + psz2]; - } - - float min_SSD = INF, cur_SSD; - if (use_temporal_candidates || dis->use_spatial_propagation) - { - COMPUTE_SSD(min_SSD, Sx_ptr[is * dis->ws + js], Sy_ptr[is * dis->ws + js]); - } - - if (use_temporal_candidates) - { - /* Try temporal candidates (vectors from the initial flow field that was passed to the function) */ - COMPUTE_SSD(cur_SSD, initial_Ux_ptr[(i + psz2) * dis->w + j + psz2], - initial_Uy_ptr[(i + psz2) * dis->w + j + psz2]); - if (cur_SSD < min_SSD) - { - min_SSD = cur_SSD; - Sx_ptr[is * dis->ws + js] = initial_Ux_ptr[(i + psz2) * dis->w + j + psz2]; - Sy_ptr[is * dis->ws + js] = initial_Uy_ptr[(i + psz2) * dis->w + j + psz2]; - } - } - - if (dis->use_spatial_propagation) - { - /* Try spatial candidates: */ - if (dir * js > dir * start_js) - { - COMPUTE_SSD(cur_SSD, Sx_ptr[is * dis->ws + js - dir], Sy_ptr[is * dis->ws + js - dir]); - if (cur_SSD < min_SSD) - { - min_SSD = cur_SSD; - Sx_ptr[is * dis->ws + js] = Sx_ptr[is * dis->ws + js - dir]; - Sy_ptr[is * dis->ws + js] = Sy_ptr[is * dis->ws + js - dir]; - } - } - /* Flow vectors won't actually propagate across different stripes, which is the reason for keeping - * the number of stripes constant. It works well enough in practice and doesn't introduce any - * visible seams. - */ - if (dir * is > dir * start_is) - { - COMPUTE_SSD(cur_SSD, Sx_ptr[(is - dir) * dis->ws + js], Sy_ptr[(is - dir) * dis->ws + js]); - if (cur_SSD < min_SSD) - { - min_SSD = cur_SSD; - Sx_ptr[is * dis->ws + js] = Sx_ptr[(is - dir) * dis->ws + js]; - Sy_ptr[is * dis->ws + js] = Sy_ptr[(is - dir) * dis->ws + js]; - } - } - } - - /* Use the best candidate as a starting point for the gradient descent: */ - float cur_Ux = Sx_ptr[is * dis->ws + js]; - float cur_Uy = Sy_ptr[is * dis->ws + js]; - - /* Computing the inverse of the structure tensor: */ - float detH = xx_ptr[is * dis->ws + js] * yy_ptr[is * dis->ws + js] - - xy_ptr[is * dis->ws + js] * xy_ptr[is * dis->ws + js]; - if (abs(detH) < EPS) - detH = EPS; - float invH11 = yy_ptr[is * dis->ws + js] / detH; - float invH12 = -xy_ptr[is * dis->ws + js] / detH; - float invH22 = xx_ptr[is * dis->ws + js] / detH; - float prev_SSD = INF, SSD; - float x_grad_sum = x_ptr[is * dis->ws + js]; - float y_grad_sum = y_ptr[is * dis->ws + js]; - - for (int t = 0; t < num_inner_iter; t++) - { - INIT_BILINEAR_WEIGHTS(cur_Ux, cur_Uy); - if (dis->use_mean_normalization) - SSD = processPatchMeanNorm(dUx, dUy, I0_ptr + i * dis->w + j, - I1_ptr + (int)i_I1 * w_ext + (int)j_I1, I0x_ptr + i * dis->w + j, - I0y_ptr + i * dis->w + j, dis->w, w_ext, w00, w01, w10, w11, psz, - x_grad_sum, y_grad_sum); - else - SSD = processPatch(dUx, dUy, I0_ptr + i * dis->w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1, - I0x_ptr + i * dis->w + j, I0y_ptr + i * dis->w + j, dis->w, w_ext, w00, w01, - w10, w11, psz); - - dx = invH11 * dUx + invH12 * dUy; - dy = invH12 * dUx + invH22 * dUy; - cur_Ux -= dx; - cur_Uy -= dy; - - /* Break when patch distance stops decreasing */ - if (SSD >= prev_SSD) - break; - prev_SSD = SSD; - } - - /* If gradient descent converged to a flow vector that is very far from the initial approximation - * (more than patch size) then we don't use it. Noticeably improves the robustness. - */ - if (norm(Vec2f(cur_Ux - Sx_ptr[is * dis->ws + js], cur_Uy - Sy_ptr[is * dis->ws + js])) <= psz) - { - Sx_ptr[is * dis->ws + js] = cur_Ux; - Sy_ptr[is * dis->ws + js] = cur_Uy; - } - j += dir * dis->patch_stride; - } - i += dir * dis->patch_stride; - } - } -#undef INIT_BILINEAR_WEIGHTS -#undef COMPUTE_SSD -} - -DISOpticalFlowImpl::Densification_ParBody::Densification_ParBody(DISOpticalFlowImpl &_dis, int _nstripes, int _h, - Mat &dst_Ux, Mat &dst_Uy, Mat &src_Sx, Mat &src_Sy, - Mat &_I0, Mat &_I1) - : dis(&_dis), nstripes(_nstripes), h(_h), Ux(&dst_Ux), Uy(&dst_Uy), Sx(&src_Sx), Sy(&src_Sy), I0(&_I0), I1(&_I1) -{ - stripe_sz = (int)ceil(h / (double)nstripes); -} - -/* This function transforms a sparse optical flow field obtained by PatchInverseSearch (which computes flow values - * on a sparse grid defined by patch_stride) into a dense optical flow field by weighted averaging of values from the - * overlapping patches. - */ -void DISOpticalFlowImpl::Densification_ParBody::operator()(const Range &range) const -{ - int start_i = min(range.start * stripe_sz, h); - int end_i = min(range.end * stripe_sz, h); - - /* Input sparse flow */ - float *Sx_ptr = Sx->ptr(); - float *Sy_ptr = Sy->ptr(); - - /* Output dense flow */ - float *Ux_ptr = Ux->ptr(); - float *Uy_ptr = Uy->ptr(); - - uchar *I0_ptr = I0->ptr(); - uchar *I1_ptr = I1->ptr(); - - int psz = dis->patch_size; - int pstr = dis->patch_stride; - int i_l, i_u; - int j_l, j_u; - float i_m, j_m, diff; - - /* These values define the set of sparse grid locations that contain patches overlapping with the current dense flow - * location */ - int start_is, end_is; - int start_js, end_js; - -/* Some helper macros for updating this set of sparse grid locations */ -#define UPDATE_SPARSE_I_COORDINATES \ - if (i % pstr == 0 && i + psz <= h) \ - end_is++; \ - if (i - psz >= 0 && (i - psz) % pstr == 0 && start_is < end_is) \ - start_is++; - -#define UPDATE_SPARSE_J_COORDINATES \ - if (j % pstr == 0 && j + psz <= dis->w) \ - end_js++; \ - if (j - psz >= 0 && (j - psz) % pstr == 0 && start_js < end_js) \ - start_js++; - - start_is = 0; - end_is = -1; - for (int i = 0; i < start_i; i++) - { - UPDATE_SPARSE_I_COORDINATES; - } - for (int i = start_i; i < end_i; i++) - { - UPDATE_SPARSE_I_COORDINATES; - start_js = 0; - end_js = -1; - for (int j = 0; j < dis->w; j++) - { - UPDATE_SPARSE_J_COORDINATES; - float coef, sum_coef = 0.0f; - float sum_Ux = 0.0f; - float sum_Uy = 0.0f; - - /* Iterate through all the patches that overlap the current location (i,j) */ - for (int is = start_is; is <= end_is; is++) - for (int js = start_js; js <= end_js; js++) - { - j_m = min(max(j + Sx_ptr[is * dis->ws + js], 0.0f), dis->w - 1.0f - EPS); - i_m = min(max(i + Sy_ptr[is * dis->ws + js], 0.0f), dis->h - 1.0f - EPS); - j_l = (int)j_m; - j_u = j_l + 1; - i_l = (int)i_m; - i_u = i_l + 1; - diff = (j_m - j_l) * (i_m - i_l) * I1_ptr[i_u * dis->w + j_u] + - (j_u - j_m) * (i_m - i_l) * I1_ptr[i_u * dis->w + j_l] + - (j_m - j_l) * (i_u - i_m) * I1_ptr[i_l * dis->w + j_u] + - (j_u - j_m) * (i_u - i_m) * I1_ptr[i_l * dis->w + j_l] - I0_ptr[i * dis->w + j]; - coef = 1 / max(1.0f, abs(diff)); - sum_Ux += coef * Sx_ptr[is * dis->ws + js]; - sum_Uy += coef * Sy_ptr[is * dis->ws + js]; - sum_coef += coef; - } - Ux_ptr[i * dis->w + j] = sum_Ux / sum_coef; - Uy_ptr[i * dis->w + j] = sum_Uy / sum_coef; - } - } -#undef UPDATE_SPARSE_I_COORDINATES -#undef UPDATE_SPARSE_J_COORDINATES -} - -#ifdef HAVE_OPENCL -bool DISOpticalFlowImpl::ocl_PatchInverseSearch(UMat &src_Ux, UMat &src_Uy, - UMat &I0, UMat &I1, UMat &I0x, UMat &I0y, int num_iter, int pyr_level) -{ - size_t globalSize[] = {(size_t)ws, (size_t)hs}; - size_t localSize[] = {16, 16}; - int idx; - int num_inner_iter = (int)floor(grad_descent_iter / (float)num_iter); - - for (int iter = 0; iter < num_iter; iter++) - { - if (iter == 0) - { - ocl::Kernel k1("dis_patch_inverse_search_fwd_1", ocl::optflow::dis_flow_oclsrc); - size_t global_sz[] = {(size_t)hs * 8}; - size_t local_sz[] = {8}; - idx = 0; - - idx = k1.set(idx, ocl::KernelArg::PtrReadOnly(src_Ux)); - idx = k1.set(idx, ocl::KernelArg::PtrReadOnly(src_Uy)); - idx = k1.set(idx, ocl::KernelArg::PtrReadOnly(I0)); - idx = k1.set(idx, ocl::KernelArg::PtrReadOnly(I1)); - idx = k1.set(idx, (int)border_size); - idx = k1.set(idx, (int)patch_size); - idx = k1.set(idx, (int)patch_stride); - idx = k1.set(idx, (int)w); - idx = k1.set(idx, (int)h); - idx = k1.set(idx, (int)ws); - idx = k1.set(idx, (int)hs); - idx = k1.set(idx, (int)pyr_level); - idx = k1.set(idx, ocl::KernelArg::PtrWriteOnly(u_Sx)); - idx = k1.set(idx, ocl::KernelArg::PtrWriteOnly(u_Sy)); - if (!k1.run(1, global_sz, local_sz, false)) - return false; - - ocl::Kernel k2("dis_patch_inverse_search_fwd_2", ocl::optflow::dis_flow_oclsrc); - idx = 0; - - idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(src_Ux)); - idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(src_Uy)); - idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(I0)); - idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(I1)); - idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(I0x)); - idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(I0y)); - idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(u_I0xx_buf)); - idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(u_I0yy_buf)); - idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(u_I0xy_buf)); - idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(u_I0x_buf)); - idx = k2.set(idx, ocl::KernelArg::PtrReadOnly(u_I0y_buf)); - idx = k2.set(idx, (int)border_size); - idx = k2.set(idx, (int)patch_size); - idx = k2.set(idx, (int)patch_stride); - idx = k2.set(idx, (int)w); - idx = k2.set(idx, (int)h); - idx = k2.set(idx, (int)ws); - idx = k2.set(idx, (int)hs); - idx = k2.set(idx, (int)num_inner_iter); - idx = k2.set(idx, (int)pyr_level); - idx = k2.set(idx, ocl::KernelArg::PtrReadWrite(u_Sx)); - idx = k2.set(idx, ocl::KernelArg::PtrReadWrite(u_Sy)); - if (!k2.run(2, globalSize, localSize, false)) - return false; - } - else - { - ocl::Kernel k3("dis_patch_inverse_search_bwd_1", ocl::optflow::dis_flow_oclsrc); - size_t global_sz[] = {(size_t)hs * 8}; - size_t local_sz[] = {8}; - idx = 0; - - idx = k3.set(idx, ocl::KernelArg::PtrReadOnly(I0)); - idx = k3.set(idx, ocl::KernelArg::PtrReadOnly(I1)); - idx = k3.set(idx, (int)border_size); - idx = k3.set(idx, (int)patch_size); - idx = k3.set(idx, (int)patch_stride); - idx = k3.set(idx, (int)w); - idx = k3.set(idx, (int)h); - idx = k3.set(idx, (int)ws); - idx = k3.set(idx, (int)hs); - idx = k3.set(idx, (int)pyr_level); - idx = k3.set(idx, ocl::KernelArg::PtrReadWrite(u_Sx)); - idx = k3.set(idx, ocl::KernelArg::PtrReadWrite(u_Sy)); - if (!k3.run(1, global_sz, local_sz, false)) - return false; - - ocl::Kernel k4("dis_patch_inverse_search_bwd_2", ocl::optflow::dis_flow_oclsrc); - idx = 0; - - idx = k4.set(idx, ocl::KernelArg::PtrReadOnly(I0)); - idx = k4.set(idx, ocl::KernelArg::PtrReadOnly(I1)); - idx = k4.set(idx, ocl::KernelArg::PtrReadOnly(I0x)); - idx = k4.set(idx, ocl::KernelArg::PtrReadOnly(I0y)); - idx = k4.set(idx, ocl::KernelArg::PtrReadOnly(u_I0xx_buf)); - idx = k4.set(idx, ocl::KernelArg::PtrReadOnly(u_I0yy_buf)); - idx = k4.set(idx, ocl::KernelArg::PtrReadOnly(u_I0xy_buf)); - idx = k4.set(idx, ocl::KernelArg::PtrReadOnly(u_I0x_buf)); - idx = k4.set(idx, ocl::KernelArg::PtrReadOnly(u_I0y_buf)); - idx = k4.set(idx, (int)border_size); - idx = k4.set(idx, (int)patch_size); - idx = k4.set(idx, (int)patch_stride); - idx = k4.set(idx, (int)w); - idx = k4.set(idx, (int)h); - idx = k4.set(idx, (int)ws); - idx = k4.set(idx, (int)hs); - idx = k4.set(idx, (int)num_inner_iter); - idx = k4.set(idx, ocl::KernelArg::PtrReadWrite(u_Sx)); - idx = k4.set(idx, ocl::KernelArg::PtrReadWrite(u_Sy)); - if (!k4.run(2, globalSize, localSize, false)) - return false; - } - } - return true; -} - -bool DISOpticalFlowImpl::ocl_Densification(UMat &dst_Ux, UMat &dst_Uy, UMat &src_Sx, UMat &src_Sy, UMat &_I0, UMat &_I1) -{ - size_t globalSize[] = {(size_t)w, (size_t)h}; - size_t localSize[] = {16, 16}; - - ocl::Kernel kernel("dis_densification", ocl::optflow::dis_flow_oclsrc); - kernel.args(ocl::KernelArg::PtrReadOnly(src_Sx), - ocl::KernelArg::PtrReadOnly(src_Sy), - ocl::KernelArg::PtrReadOnly(_I0), - ocl::KernelArg::PtrReadOnly(_I1), - (int)patch_size, (int)patch_stride, - (int)w, (int)h, (int)ws, - ocl::KernelArg::PtrWriteOnly(dst_Ux), - ocl::KernelArg::PtrWriteOnly(dst_Uy)); - return kernel.run(2, globalSize, localSize, false); -} - -void DISOpticalFlowImpl::ocl_prepareBuffers(UMat &I0, UMat &I1, UMat &flow, bool use_flow) -{ - u_I0s.resize(coarsest_scale + 1); - u_I1s.resize(coarsest_scale + 1); - u_I1s_ext.resize(coarsest_scale + 1); - u_I0xs.resize(coarsest_scale + 1); - u_I0ys.resize(coarsest_scale + 1); - u_Ux.resize(coarsest_scale + 1); - u_Uy.resize(coarsest_scale + 1); - - vector flow_uv(2); - if (use_flow) - { - split(flow, flow_uv); - u_initial_Ux.resize(coarsest_scale + 1); - u_initial_Uy.resize(coarsest_scale + 1); - } - - int fraction = 1; - int cur_rows = 0, cur_cols = 0; - - for (int i = 0; i <= coarsest_scale; i++) - { - /* Avoid initializing the pyramid levels above the finest scale, as they won't be used anyway */ - if (i == finest_scale) - { - cur_rows = I0.rows / fraction; - cur_cols = I0.cols / fraction; - u_I0s[i].create(cur_rows, cur_cols, CV_8UC1); - resize(I0, u_I0s[i], u_I0s[i].size(), 0.0, 0.0, INTER_AREA); - u_I1s[i].create(cur_rows, cur_cols, CV_8UC1); - resize(I1, u_I1s[i], u_I1s[i].size(), 0.0, 0.0, INTER_AREA); - - /* These buffers are reused in each scale so we initialize them once on the finest scale: */ - u_Sx.create(cur_rows / patch_stride, cur_cols / patch_stride, CV_32FC1); - u_Sy.create(cur_rows / patch_stride, cur_cols / patch_stride, CV_32FC1); - u_I0xx_buf.create(cur_rows / patch_stride, cur_cols / patch_stride, CV_32FC1); - u_I0yy_buf.create(cur_rows / patch_stride, cur_cols / patch_stride, CV_32FC1); - u_I0xy_buf.create(cur_rows / patch_stride, cur_cols / patch_stride, CV_32FC1); - u_I0x_buf.create(cur_rows / patch_stride, cur_cols / patch_stride, CV_32FC1); - u_I0y_buf.create(cur_rows / patch_stride, cur_cols / patch_stride, CV_32FC1); - - u_I0xx_buf_aux.create(cur_rows, cur_cols / patch_stride, CV_32FC1); - u_I0yy_buf_aux.create(cur_rows, cur_cols / patch_stride, CV_32FC1); - u_I0xy_buf_aux.create(cur_rows, cur_cols / patch_stride, CV_32FC1); - u_I0x_buf_aux.create(cur_rows, cur_cols / patch_stride, CV_32FC1); - u_I0y_buf_aux.create(cur_rows, cur_cols / patch_stride, CV_32FC1); - - u_U.create(cur_rows, cur_cols, CV_32FC2); - } - else if (i > finest_scale) - { - cur_rows = u_I0s[i - 1].rows / 2; - cur_cols = u_I0s[i - 1].cols / 2; - u_I0s[i].create(cur_rows, cur_cols, CV_8UC1); - resize(u_I0s[i - 1], u_I0s[i], u_I0s[i].size(), 0.0, 0.0, INTER_AREA); - u_I1s[i].create(cur_rows, cur_cols, CV_8UC1); - resize(u_I1s[i - 1], u_I1s[i], u_I1s[i].size(), 0.0, 0.0, INTER_AREA); - } - - if (i >= finest_scale) - { - u_I1s_ext[i].create(cur_rows + 2 * border_size, cur_cols + 2 * border_size, CV_8UC1); - copyMakeBorder(u_I1s[i], u_I1s_ext[i], border_size, border_size, border_size, border_size, BORDER_REPLICATE); - u_I0xs[i].create(cur_rows, cur_cols, CV_16SC1); - u_I0ys[i].create(cur_rows, cur_cols, CV_16SC1); - spatialGradient(u_I0s[i], u_I0xs[i], u_I0ys[i]); - u_Ux[i].create(cur_rows, cur_cols, CV_32FC1); - u_Uy[i].create(cur_rows, cur_cols, CV_32FC1); - variational_refinement_processors[i]->setAlpha(variational_refinement_alpha); - variational_refinement_processors[i]->setDelta(variational_refinement_delta); - variational_refinement_processors[i]->setGamma(variational_refinement_gamma); - variational_refinement_processors[i]->setSorIterations(5); - variational_refinement_processors[i]->setFixedPointIterations(variational_refinement_iter); - - if (use_flow) - { - resize(flow_uv[0], u_initial_Ux[i], Size(cur_cols, cur_rows)); - divide(u_initial_Ux[i], static_cast(fraction), u_initial_Ux[i]); - resize(flow_uv[1], u_initial_Uy[i], Size(cur_cols, cur_rows)); - divide(u_initial_Uy[i], static_cast(fraction), u_initial_Uy[i]); - } - } - - fraction *= 2; - } -} - -bool DISOpticalFlowImpl::ocl_precomputeStructureTensor(UMat &dst_I0xx, UMat &dst_I0yy, UMat &dst_I0xy, - UMat &dst_I0x, UMat &dst_I0y, UMat &I0x, UMat &I0y) -{ - size_t globalSizeX[] = {(size_t)h}; - size_t localSizeX[] = {16}; - - ocl::Kernel kernelX("dis_precomputeStructureTensor_hor", ocl::optflow::dis_flow_oclsrc); - kernelX.args(ocl::KernelArg::PtrReadOnly(I0x), - ocl::KernelArg::PtrReadOnly(I0y), - (int)patch_size, (int)patch_stride, - (int)w, (int)h, (int)ws, - ocl::KernelArg::PtrWriteOnly(u_I0xx_buf_aux), - ocl::KernelArg::PtrWriteOnly(u_I0yy_buf_aux), - ocl::KernelArg::PtrWriteOnly(u_I0xy_buf_aux), - ocl::KernelArg::PtrWriteOnly(u_I0x_buf_aux), - ocl::KernelArg::PtrWriteOnly(u_I0y_buf_aux)); - if (!kernelX.run(1, globalSizeX, localSizeX, false)) - return false; - - size_t globalSizeY[] = {(size_t)ws}; - size_t localSizeY[] = {16}; - - ocl::Kernel kernelY("dis_precomputeStructureTensor_ver", ocl::optflow::dis_flow_oclsrc); - kernelY.args(ocl::KernelArg::PtrReadOnly(u_I0xx_buf_aux), - ocl::KernelArg::PtrReadOnly(u_I0yy_buf_aux), - ocl::KernelArg::PtrReadOnly(u_I0xy_buf_aux), - ocl::KernelArg::PtrReadOnly(u_I0x_buf_aux), - ocl::KernelArg::PtrReadOnly(u_I0y_buf_aux), - (int)patch_size, (int)patch_stride, - (int)w, (int)h, (int)ws, - ocl::KernelArg::PtrWriteOnly(dst_I0xx), - ocl::KernelArg::PtrWriteOnly(dst_I0yy), - ocl::KernelArg::PtrWriteOnly(dst_I0xy), - ocl::KernelArg::PtrWriteOnly(dst_I0x), - ocl::KernelArg::PtrWriteOnly(dst_I0y)); - return kernelY.run(1, globalSizeY, localSizeY, false); -} - -bool DISOpticalFlowImpl::ocl_calc(InputArray I0, InputArray I1, InputOutputArray flow) -{ - UMat I0Mat = I0.getUMat(); - UMat I1Mat = I1.getUMat(); - bool use_input_flow = false; - if (flow.sameSize(I0) && flow.depth() == CV_32F && flow.channels() == 2) - use_input_flow = true; - else - flow.create(I1Mat.size(), CV_32FC2); - UMat &u_flowMat = flow.getUMatRef(); - coarsest_scale = min((int)(log(max(I0Mat.cols, I0Mat.rows) / (4.0 * patch_size)) / log(2.0) + 0.5), /* Original code serach for maximal movement of width/4 */ - (int)(log(min(I0Mat.cols, I0Mat.rows) / patch_size) / log(2.0))); /* Deepest pyramid level greater or equal than patch*/ - - ocl_prepareBuffers(I0Mat, I1Mat, u_flowMat, use_input_flow); - u_Ux[coarsest_scale].setTo(0.0f); - u_Uy[coarsest_scale].setTo(0.0f); - - for (int i = coarsest_scale; i >= finest_scale; i--) - { - w = u_I0s[i].cols; - h = u_I0s[i].rows; - ws = 1 + (w - patch_size) / patch_stride; - hs = 1 + (h - patch_size) / patch_stride; - - if (!ocl_precomputeStructureTensor(u_I0xx_buf, u_I0yy_buf, u_I0xy_buf, - u_I0x_buf, u_I0y_buf, u_I0xs[i], u_I0ys[i])) - return false; - - if (!ocl_PatchInverseSearch(u_Ux[i], u_Uy[i], u_I0s[i], u_I1s_ext[i], u_I0xs[i], u_I0ys[i], 2, i)) - return false; - - if (!ocl_Densification(u_Ux[i], u_Uy[i], u_Sx, u_Sy, u_I0s[i], u_I1s[i])) - return false; - - if (variational_refinement_iter > 0) - variational_refinement_processors[i]->calcUV(u_I0s[i], u_I1s[i], - u_Ux[i].getMat(ACCESS_WRITE), u_Uy[i].getMat(ACCESS_WRITE)); - - if (i > finest_scale) - { - resize(u_Ux[i], u_Ux[i - 1], u_Ux[i - 1].size()); - resize(u_Uy[i], u_Uy[i - 1], u_Uy[i - 1].size()); - multiply(u_Ux[i - 1], 2, u_Ux[i - 1]); - multiply(u_Uy[i - 1], 2, u_Uy[i - 1]); - } - } - vector uxy(2); - uxy[0] = u_Ux[finest_scale]; - uxy[1] = u_Uy[finest_scale]; - merge(uxy, u_U); - resize(u_U, u_flowMat, u_flowMat.size()); - multiply(u_flowMat, 1 << finest_scale, u_flowMat); - - return true; -} -#endif - -void DISOpticalFlowImpl::calc(InputArray I0, InputArray I1, InputOutputArray flow) -{ - CV_Assert(!I0.empty() && I0.depth() == CV_8U && I0.channels() == 1); - CV_Assert(!I1.empty() && I1.depth() == CV_8U && I1.channels() == 1); - CV_Assert(I0.sameSize(I1)); - CV_Assert(I0.isContinuous()); - CV_Assert(I1.isContinuous()); - - CV_OCL_RUN(ocl::Device::getDefault().isIntel() && flow.isUMat() && - (patch_size == 8) && (use_spatial_propagation == true), - ocl_calc(I0, I1, flow)); - - Mat I0Mat = I0.getMat(); - Mat I1Mat = I1.getMat(); - bool use_input_flow = false; - if (flow.sameSize(I0) && flow.depth() == CV_32F && flow.channels() == 2) - use_input_flow = true; - else - flow.create(I1Mat.size(), CV_32FC2); - Mat flowMat = flow.getMat(); - coarsest_scale = min((int)(log(max(I0Mat.cols, I0Mat.rows) / (4.0 * patch_size)) / log(2.0) + 0.5), /* Original code serach for maximal movement of width/4 */ - (int)(log(min(I0Mat.cols, I0Mat.rows) / patch_size) / log(2.0))); /* Deepest pyramid level greater or equal than patch*/ - int num_stripes = getNumThreads(); - - prepareBuffers(I0Mat, I1Mat, flowMat, use_input_flow); - Ux[coarsest_scale].setTo(0.0f); - Uy[coarsest_scale].setTo(0.0f); - - for (int i = coarsest_scale; i >= finest_scale; i--) - { - w = I0s[i].cols; - h = I0s[i].rows; - ws = 1 + (w - patch_size) / patch_stride; - hs = 1 + (h - patch_size) / patch_stride; - - precomputeStructureTensor(I0xx_buf, I0yy_buf, I0xy_buf, I0x_buf, I0y_buf, I0xs[i], I0ys[i]); - if (use_spatial_propagation) - { - /* Use a fixed number of stripes regardless the number of threads to make inverse search - * with spatial propagation reproducible - */ - parallel_for_(Range(0, 8), PatchInverseSearch_ParBody(*this, 8, hs, Sx, Sy, Ux[i], Uy[i], I0s[i], - I1s_ext[i], I0xs[i], I0ys[i], 2, i)); - } - else - { - parallel_for_(Range(0, num_stripes), - PatchInverseSearch_ParBody(*this, num_stripes, hs, Sx, Sy, Ux[i], Uy[i], I0s[i], I1s_ext[i], - I0xs[i], I0ys[i], 1, i)); - } - - parallel_for_(Range(0, num_stripes), - Densification_ParBody(*this, num_stripes, I0s[i].rows, Ux[i], Uy[i], Sx, Sy, I0s[i], I1s[i])); - if (variational_refinement_iter > 0) - variational_refinement_processors[i]->calcUV(I0s[i], I1s[i], Ux[i], Uy[i]); - - if (i > finest_scale) - { - resize(Ux[i], Ux[i - 1], Ux[i - 1].size()); - resize(Uy[i], Uy[i - 1], Uy[i - 1].size()); - Ux[i - 1] *= 2; - Uy[i - 1] *= 2; - } - } - Mat uxy[] = {Ux[finest_scale], Uy[finest_scale]}; - merge(uxy, 2, U); - resize(U, flowMat, flowMat.size()); - flowMat *= 1 << finest_scale; -} - -void DISOpticalFlowImpl::collectGarbage() -{ - I0s.clear(); - I1s.clear(); - I1s_ext.clear(); - I0xs.clear(); - I0ys.clear(); - Ux.clear(); - Uy.clear(); - U.release(); - Sx.release(); - Sy.release(); - I0xx_buf.release(); - I0yy_buf.release(); - I0xy_buf.release(); - I0xx_buf_aux.release(); - I0yy_buf_aux.release(); - I0xy_buf_aux.release(); - -#ifdef HAVE_OPENCL - u_I0s.clear(); - u_I1s.clear(); - u_I1s_ext.clear(); - u_I0xs.clear(); - u_I0ys.clear(); - u_Ux.clear(); - u_Uy.clear(); - u_U.release(); - u_Sx.release(); - u_Sy.release(); - u_I0xx_buf.release(); - u_I0yy_buf.release(); - u_I0xy_buf.release(); - u_I0xx_buf_aux.release(); - u_I0yy_buf_aux.release(); - u_I0xy_buf_aux.release(); -#endif - - for (int i = finest_scale; i <= coarsest_scale; i++) - variational_refinement_processors[i]->collectGarbage(); - variational_refinement_processors.clear(); -} - -Ptr createOptFlow_DIS(int preset) -{ - Ptr dis = makePtr(); - dis->setPatchSize(8); - if (preset == DISOpticalFlow::PRESET_ULTRAFAST) - { - dis->setFinestScale(2); - dis->setPatchStride(4); - dis->setGradientDescentIterations(12); - dis->setVariationalRefinementIterations(0); - } - else if (preset == DISOpticalFlow::PRESET_FAST) - { - dis->setFinestScale(2); - dis->setPatchStride(4); - dis->setGradientDescentIterations(16); - dis->setVariationalRefinementIterations(5); - } - else if (preset == DISOpticalFlow::PRESET_MEDIUM) - { - dis->setFinestScale(1); - dis->setPatchStride(3); - dis->setGradientDescentIterations(25); - dis->setVariationalRefinementIterations(5); - } - - return dis; -} -} -} diff --git a/modules/optflow/src/opencl/dis_flow.cl b/modules/optflow/src/opencl/dis_flow.cl deleted file mode 100644 index 54c642d9e..000000000 --- a/modules/optflow/src/opencl/dis_flow.cl +++ /dev/null @@ -1,522 +0,0 @@ -// This file is part of OpenCV project. -// It is subject to the license terms in the LICENSE file found in the top-level directory -// of this distribution and at http://opencv.org/license.html. - -#define EPS 0.001f -#define INF 1E+10F - -__kernel void dis_precomputeStructureTensor_hor(__global const short *I0x, - __global const short *I0y, - int patch_size, int patch_stride, - int w, int h, int ws, - __global float *I0xx_aux_ptr, - __global float *I0yy_aux_ptr, - __global float *I0xy_aux_ptr, - __global float *I0x_aux_ptr, - __global float *I0y_aux_ptr) -{ - - int i = get_global_id(0); - - if (i >= h) return; - - const __global short *x_row = I0x + i * w; - const __global short *y_row = I0y + i * w; - - float sum_xx = 0.0f, sum_yy = 0.0f, sum_xy = 0.0f, sum_x = 0.0f, sum_y = 0.0f; - float8 x_vec = convert_float8(vload8(0, x_row)); - float8 y_vec = convert_float8(vload8(0, y_row)); - sum_xx = dot(x_vec.lo, x_vec.lo) + dot(x_vec.hi, x_vec.hi); - sum_yy = dot(y_vec.lo, y_vec.lo) + dot(y_vec.hi, y_vec.hi); - sum_xy = dot(x_vec.lo, y_vec.lo) + dot(x_vec.hi, y_vec.hi); - sum_x = dot(x_vec.lo, 1.0f) + dot(x_vec.hi, 1.0f); - sum_y = dot(y_vec.lo, 1.0f) + dot(y_vec.hi, 1.0f); - - I0xx_aux_ptr[i * ws] = sum_xx; - I0yy_aux_ptr[i * ws] = sum_yy; - I0xy_aux_ptr[i * ws] = sum_xy; - I0x_aux_ptr[i * ws] = sum_x; - I0y_aux_ptr[i * ws] = sum_y; - - int js = 1; - for (int j = patch_size; j < w; j++) - { - short x_val1 = x_row[j]; - short x_val2 = x_row[j - patch_size]; - short y_val1 = y_row[j]; - short y_val2 = y_row[j - patch_size]; - sum_xx += (x_val1 * x_val1 - x_val2 * x_val2); - sum_yy += (y_val1 * y_val1 - y_val2 * y_val2); - sum_xy += (x_val1 * y_val1 - x_val2 * y_val2); - sum_x += (x_val1 - x_val2); - sum_y += (y_val1 - y_val2); - if ((j - patch_size + 1) % patch_stride == 0) - { - int index = i * ws + js; - I0xx_aux_ptr[index] = sum_xx; - I0yy_aux_ptr[index] = sum_yy; - I0xy_aux_ptr[index] = sum_xy; - I0x_aux_ptr[index] = sum_x; - I0y_aux_ptr[index] = sum_y; - js++; - } - } -} - -__kernel void dis_precomputeStructureTensor_ver(__global const float *I0xx_aux_ptr, - __global const float *I0yy_aux_ptr, - __global const float *I0xy_aux_ptr, - __global const float *I0x_aux_ptr, - __global const float *I0y_aux_ptr, - int patch_size, int patch_stride, - int w, int h, int ws, - __global float *I0xx_ptr, - __global float *I0yy_ptr, - __global float *I0xy_ptr, - __global float *I0x_ptr, - __global float *I0y_ptr) -{ - int j = get_global_id(0); - - if (j >= ws) return; - - float sum_xx, sum_yy, sum_xy, sum_x, sum_y; - sum_xx = sum_yy = sum_xy = sum_x = sum_y = 0.0f; - - for (int i = 0; i < patch_size; i++) - { - sum_xx += I0xx_aux_ptr[i * ws + j]; - sum_yy += I0yy_aux_ptr[i * ws + j]; - sum_xy += I0xy_aux_ptr[i * ws + j]; - sum_x += I0x_aux_ptr[i * ws + j]; - sum_y += I0y_aux_ptr[i * ws + j]; - } - I0xx_ptr[j] = sum_xx; - I0yy_ptr[j] = sum_yy; - I0xy_ptr[j] = sum_xy; - I0x_ptr[j] = sum_x; - I0y_ptr[j] = sum_y; - - int is = 1; - for (int i = patch_size; i < h; i++) - { - sum_xx += (I0xx_aux_ptr[i * ws + j] - I0xx_aux_ptr[(i - patch_size) * ws + j]); - sum_yy += (I0yy_aux_ptr[i * ws + j] - I0yy_aux_ptr[(i - patch_size) * ws + j]); - sum_xy += (I0xy_aux_ptr[i * ws + j] - I0xy_aux_ptr[(i - patch_size) * ws + j]); - sum_x += (I0x_aux_ptr[i * ws + j] - I0x_aux_ptr[(i - patch_size) * ws + j]); - sum_y += (I0y_aux_ptr[i * ws + j] - I0y_aux_ptr[(i - patch_size) * ws + j]); - - if ((i - patch_size + 1) % patch_stride == 0) - { - I0xx_ptr[is * ws + j] = sum_xx; - I0yy_ptr[is * ws + j] = sum_yy; - I0xy_ptr[is * ws + j] = sum_xy; - I0x_ptr[is * ws + j] = sum_x; - I0y_ptr[is * ws + j] = sum_y; - is++; - } - } -} - -__kernel void dis_densification(__global const float *sx, __global const float *sy, - __global const uchar *i0, __global const uchar *i1, - int psz, int pstr, - int w, int h, int ws, - __global float *ux, __global float *uy) -{ - int x = get_global_id(0); - int y = get_global_id(1); - int i, j; - - if (x >= w || y >= h) return; - - int start_is, end_is; - int start_js, end_js; - - end_is = min(y / pstr, (h - psz) / pstr); - start_is = max(0, y - psz + pstr) / pstr; - start_is = min(start_is, end_is); - - end_js = min(x / pstr, (w - psz) / pstr); - start_js = max(0, x - psz + pstr) / pstr; - start_js = min(start_js, end_js); - - float coef, sum_coef = 0.0f; - float sum_Ux = 0.0f; - float sum_Uy = 0.0f; - - int i_l, i_u; - int j_l, j_u; - float i_m, j_m, diff; - - i = y; - j = x; - - /* Iterate through all the patches that overlap the current location (i,j) */ - for (int is = start_is; is <= end_is; is++) - for (int js = start_js; js <= end_js; js++) - { - float sx_val = sx[is * ws + js]; - float sy_val = sy[is * ws + js]; - uchar2 i1_vec1, i1_vec2; - - j_m = min(max(j + sx_val, 0.0f), w - 1.0f - EPS); - i_m = min(max(i + sy_val, 0.0f), h - 1.0f - EPS); - j_l = (int)j_m; - j_u = j_l + 1; - i_l = (int)i_m; - i_u = i_l + 1; - i1_vec1 = vload2(0, i1 + i_u * w + j_l); - i1_vec2 = vload2(0, i1 + i_l * w + j_l); - diff = (j_m - j_l) * (i_m - i_l) * i1_vec1.y + - (j_u - j_m) * (i_m - i_l) * i1_vec1.x + - (j_m - j_l) * (i_u - i_m) * i1_vec2.y + - (j_u - j_m) * (i_u - i_m) * i1_vec2.x - i0[i * w + j]; - coef = 1 / max(1.0f, fabs(diff)); - sum_Ux += coef * sx_val; - sum_Uy += coef * sy_val; - sum_coef += coef; - } - - ux[i * w + j] = sum_Ux / sum_coef; - uy[i * w + j] = sum_Uy / sum_coef; -} - -#define INIT_BILINEAR_WEIGHTS(Ux, Uy) \ - i_I1 = min(max(i + Uy + bsz, i_lower_limit), i_upper_limit); \ - j_I1 = min(max(j + Ux + bsz, j_lower_limit), j_upper_limit); \ - \ - w11 = (i_I1 - floor(i_I1)) * (j_I1 - floor(j_I1)); \ - w10 = (i_I1 - floor(i_I1)) * (floor(j_I1) + 1 - j_I1); \ - w01 = (floor(i_I1) + 1 - i_I1) * (j_I1 - floor(j_I1)); \ - w00 = (floor(i_I1) + 1 - i_I1) * (floor(j_I1) + 1 - j_I1); - -float computeSSDMeanNorm(const __global uchar *I0_ptr, const __global uchar *I1_ptr, - int I0_stride, int I1_stride, - float w00, float w01, float w10, float w11, int patch_sz, int i) -{ - float sum_diff = 0.0f, sum_diff_sq = 0.0f; - int n = patch_sz * patch_sz; - - uchar8 I1_vec1, I1_vec2, I0_vec; - uchar I1_val1, I1_val2; - - I0_vec = vload8(0, I0_ptr + i * I0_stride); - I1_vec1 = vload8(0, I1_ptr + i * I1_stride); - I1_vec2 = vload8(0, I1_ptr + (i + 1) * I1_stride); - I1_val1 = I1_ptr[i * I1_stride + 8]; - I1_val2 = I1_ptr[(i + 1) * I1_stride + 8]; - - float8 vec = w00 * convert_float8(I1_vec1) + w01 * convert_float8((uchar8)(I1_vec1.s123, I1_vec1.s4567, I1_val1)) + - w10 * convert_float8(I1_vec2) + w11 * convert_float8((uchar8)(I1_vec2.s123, I1_vec2.s4567, I1_val2)) - - convert_float8(I0_vec); - - sum_diff = (dot(vec.lo, 1.0) + dot(vec.hi, 1.0)); - sum_diff_sq = (dot(vec.lo, vec.lo) + dot(vec.hi, vec.hi)); - - sum_diff = sub_group_reduce_add(sum_diff); - sum_diff_sq = sub_group_reduce_add(sum_diff_sq); - - return sum_diff_sq - sum_diff * sum_diff / n; -} - -__kernel void dis_patch_inverse_search_fwd_1(__global const float *Ux_ptr, __global const float *Uy_ptr, - __global const uchar *I0_ptr, __global const uchar *I1_ptr, - int border_size, int patch_size, int patch_stride, - int w, int h, int ws, int hs, int pyr_level, - __global float *Sx_ptr, __global float *Sy_ptr) -{ - int id = get_global_id(0); - int is = id / 8; - if (id >= (hs * 8)) return; - - int i = is * patch_stride; - int j = 0; - int psz = patch_size; - int psz2 = psz / 2; - int w_ext = w + 2 * border_size; - int bsz = border_size; - - float i_lower_limit = bsz - psz + 1.0f; - float i_upper_limit = bsz + h - 1.0f; - float j_lower_limit = bsz - psz + 1.0f; - float j_upper_limit = bsz + w - 1.0f; - float i_I1, j_I1, w00, w01, w10, w11; - - float prev_Ux = Ux_ptr[(i + psz2) * w + j + psz2]; - float prev_Uy = Uy_ptr[(i + psz2) * w + j + psz2]; - Sx_ptr[is * ws] = prev_Ux; - Sy_ptr[is * ws] = prev_Uy; - j += patch_stride; - - int sid = get_sub_group_local_id(); - for (int js = 1; js < ws; js++, j += patch_stride) - { - float min_SSD, cur_SSD; - float Ux = Ux_ptr[(i + psz2) * w + j + psz2]; - float Uy = Uy_ptr[(i + psz2) * w + j + psz2]; - - INIT_BILINEAR_WEIGHTS(Ux, Uy); - min_SSD = computeSSDMeanNorm(I0_ptr + i * w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1, - w, w_ext, w00, w01, w10, w11, psz, sid); - - INIT_BILINEAR_WEIGHTS(prev_Ux, prev_Uy); - cur_SSD = computeSSDMeanNorm(I0_ptr + i * w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1, - w, w_ext, w00, w01, w10, w11, psz, sid); - if (cur_SSD < min_SSD) - { - Ux = prev_Ux; - Uy = prev_Uy; - } - - prev_Ux = Ux; - prev_Uy = Uy; - Sx_ptr[is * ws + js] = Ux; - Sy_ptr[is * ws + js] = Uy; - } -} - -float3 processPatchMeanNorm(const __global uchar *I0_ptr, const __global uchar *I1_ptr, - const __global short *I0x_ptr, const __global short *I0y_ptr, - int I0_stride, int I1_stride, float w00, float w01, float w10, - float w11, int patch_sz, float x_grad_sum, float y_grad_sum) -{ - float sum_diff = 0.0, sum_diff_sq = 0.0; - float sum_I0x_mul = 0.0, sum_I0y_mul = 0.0; - int n = patch_sz * patch_sz; - uchar8 I1_vec1, I1_vec2; - uchar I1_val1, I1_val2; - - for (int i = 0; i < 8; i++) - { - uchar8 I0_vec = vload8(0, I0_ptr + i * I0_stride); - - I1_vec1 = (i == 0) ? vload8(0, I1_ptr + i * I1_stride) : I1_vec2; - I1_vec2 = vload8(0, I1_ptr + (i + 1) * I1_stride); - I1_val1 = (i == 0) ? I1_ptr[i * I1_stride + patch_sz] : I1_val2; - I1_val2 = I1_ptr[(i + 1) * I1_stride + patch_sz]; - - float8 vec = w00 * convert_float8(I1_vec1) + w01 * convert_float8((uchar8)(I1_vec1.s123, I1_vec1.s4567, I1_val1)) + - w10 * convert_float8(I1_vec2) + w11 * convert_float8((uchar8)(I1_vec2.s123, I1_vec2.s4567, I1_val2)) - - convert_float8(I0_vec); - - sum_diff += (dot(vec.lo, 1.0) + dot(vec.hi, 1.0)); - sum_diff_sq += (dot(vec.lo, vec.lo) + dot(vec.hi, vec.hi)); - - short8 I0x_vec = vload8(0, I0x_ptr + i * I0_stride); - short8 I0y_vec = vload8(0, I0y_ptr + i * I0_stride); - - sum_I0x_mul += dot(vec.lo, convert_float4(I0x_vec.lo)); - sum_I0x_mul += dot(vec.hi, convert_float4(I0x_vec.hi)); - sum_I0y_mul += dot(vec.lo, convert_float4(I0y_vec.lo)); - sum_I0y_mul += dot(vec.hi, convert_float4(I0y_vec.hi)); - } - - float dst_dUx = sum_I0x_mul - sum_diff * x_grad_sum / n; - float dst_dUy = sum_I0y_mul - sum_diff * y_grad_sum / n; - float SSD = sum_diff_sq - sum_diff * sum_diff / n; - - return (float3)(SSD, dst_dUx, dst_dUy); -} - -__kernel void dis_patch_inverse_search_fwd_2(__global const float *Ux_ptr, __global const float *Uy_ptr, - __global const uchar *I0_ptr, __global const uchar *I1_ptr, - __global const short *I0x_ptr, __global const short *I0y_ptr, - __global const float *xx_ptr, __global const float *yy_ptr, - __global const float *xy_ptr, - __global const float *x_ptr, __global const float *y_ptr, - int border_size, int patch_size, int patch_stride, - int w, int h, int ws, int hs, int num_inner_iter, int pyr_level, - __global float *Sx_ptr, __global float *Sy_ptr) -{ - int js = get_global_id(0); - int is = get_global_id(1); - int i = is * patch_stride; - int j = js * patch_stride; - int psz = patch_size; - int psz2 = psz / 2; - int w_ext = w + 2 * border_size; - int bsz = border_size; - int index = is * ws + js; - - if (js >= ws || is >= hs) return; - - float Ux = Sx_ptr[index]; - float Uy = Sy_ptr[index]; - float cur_Ux = Ux; - float cur_Uy = Uy; - float cur_xx = xx_ptr[index]; - float cur_yy = yy_ptr[index]; - float cur_xy = xy_ptr[index]; - float detH = cur_xx * cur_yy - cur_xy * cur_xy; - - if (fabs(detH) < EPS) detH = EPS; - - float invH11 = cur_yy / detH; - float invH12 = -cur_xy / detH; - float invH22 = cur_xx / detH; - float prev_SSD = INF, SSD; - float x_grad_sum = x_ptr[index]; - float y_grad_sum = y_ptr[index]; - - float i_lower_limit = bsz - psz + 1.0f; - float i_upper_limit = bsz + h - 1.0f; - float j_lower_limit = bsz - psz + 1.0f; - float j_upper_limit = bsz + w - 1.0f; - float dUx, dUy, i_I1, j_I1, w00, w01, w10, w11, dx, dy; - float3 res; - - for (int t = 0; t < num_inner_iter; t++) - { - INIT_BILINEAR_WEIGHTS(cur_Ux, cur_Uy); - res = processPatchMeanNorm(I0_ptr + i * w + j, - I1_ptr + (int)i_I1 * w_ext + (int)j_I1, I0x_ptr + i * w + j, - I0y_ptr + i * w + j, w, w_ext, w00, w01, w10, w11, psz, - x_grad_sum, y_grad_sum); - - SSD = res.x; - dUx = res.y; - dUy = res.z; - dx = invH11 * dUx + invH12 * dUy; - dy = invH12 * dUx + invH22 * dUy; - - cur_Ux -= dx; - cur_Uy -= dy; - - if (SSD >= prev_SSD) - break; - prev_SSD = SSD; - } - - float2 vec = (float2)(cur_Ux - Ux, cur_Uy - Uy); - if (dot(vec, vec) <= (float)(psz * psz)) - { - Sx_ptr[index] = cur_Ux; - Sy_ptr[index] = cur_Uy; - } -} - -__kernel void dis_patch_inverse_search_bwd_1(__global const uchar *I0_ptr, __global const uchar *I1_ptr, - int border_size, int patch_size, int patch_stride, - int w, int h, int ws, int hs, int pyr_level, - __global float *Sx_ptr, __global float *Sy_ptr) -{ - int id = get_global_id(0); - int is = id / 8; - if (id >= (hs * 8)) return; - - is = (hs - 1 - is); - int i = is * patch_stride; - int j = (ws - 2) * patch_stride; - int psz = patch_size; - int psz2 = psz / 2; - int w_ext = w + 2 * border_size; - int bsz = border_size; - - float i_lower_limit = bsz - psz + 1.0f; - float i_upper_limit = bsz + h - 1.0f; - float j_lower_limit = bsz - psz + 1.0f; - float j_upper_limit = bsz + w - 1.0f; - float i_I1, j_I1, w00, w01, w10, w11; - - int sid = get_sub_group_local_id(); - for (int js = (ws - 2); js > -1; js--, j -= patch_stride) - { - float min_SSD, cur_SSD; - float2 Ux = vload2(0, Sx_ptr + is * ws + js); - float2 Uy = vload2(0, Sy_ptr + is * ws + js); - - INIT_BILINEAR_WEIGHTS(Ux.x, Uy.x); - min_SSD = computeSSDMeanNorm(I0_ptr + i * w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1, - w, w_ext, w00, w01, w10, w11, psz, sid); - - INIT_BILINEAR_WEIGHTS(Ux.y, Uy.y); - cur_SSD = computeSSDMeanNorm(I0_ptr + i * w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1, - w, w_ext, w00, w01, w10, w11, psz, sid); - if (cur_SSD < min_SSD) - { - Sx_ptr[is * ws + js] = Ux.y; - Sy_ptr[is * ws + js] = Uy.y; - } - } -} - -__kernel void dis_patch_inverse_search_bwd_2(__global const uchar *I0_ptr, __global const uchar *I1_ptr, - __global const short *I0x_ptr, __global const short *I0y_ptr, - __global const float *xx_ptr, __global const float *yy_ptr, - __global const float *xy_ptr, - __global const float *x_ptr, __global const float *y_ptr, - int border_size, int patch_size, int patch_stride, - int w, int h, int ws, int hs, int num_inner_iter, - __global float *Sx_ptr, __global float *Sy_ptr) -{ - int js = get_global_id(0); - int is = get_global_id(1); - if (js >= ws || is >= hs) return; - - js = (ws - 1 - js); - is = (hs - 1 - is); - - int j = js * patch_stride; - int i = is * patch_stride; - int psz = patch_size; - int psz2 = psz / 2; - int w_ext = w + 2 * border_size; - int bsz = border_size; - int index = is * ws + js; - - float Ux = Sx_ptr[index]; - float Uy = Sy_ptr[index]; - float cur_Ux = Ux; - float cur_Uy = Uy; - float cur_xx = xx_ptr[index]; - float cur_yy = yy_ptr[index]; - float cur_xy = xy_ptr[index]; - float detH = cur_xx * cur_yy - cur_xy * cur_xy; - - if (fabs(detH) < EPS) detH = EPS; - - float invH11 = cur_yy / detH; - float invH12 = -cur_xy / detH; - float invH22 = cur_xx / detH; - float prev_SSD = INF, SSD; - float x_grad_sum = x_ptr[index]; - float y_grad_sum = y_ptr[index]; - - float i_lower_limit = bsz - psz + 1.0f; - float i_upper_limit = bsz + h - 1.0f; - float j_lower_limit = bsz - psz + 1.0f; - float j_upper_limit = bsz + w - 1.0f; - float dUx, dUy, i_I1, j_I1, w00, w01, w10, w11, dx, dy; - float3 res; - - for (int t = 0; t < num_inner_iter; t++) - { - INIT_BILINEAR_WEIGHTS(cur_Ux, cur_Uy); - res = processPatchMeanNorm(I0_ptr + i * w + j, - I1_ptr + (int)i_I1 * w_ext + (int)j_I1, I0x_ptr + i * w + j, - I0y_ptr + i * w + j, w, w_ext, w00, w01, w10, w11, psz, - x_grad_sum, y_grad_sum); - - SSD = res.x; - dUx = res.y; - dUy = res.z; - dx = invH11 * dUx + invH12 * dUy; - dy = invH12 * dUx + invH22 * dUy; - - cur_Ux -= dx; - cur_Uy -= dy; - - if (SSD >= prev_SSD) - break; - prev_SSD = SSD; - } - - float2 vec = (float2)(cur_Ux - Ux, cur_Uy - Uy); - if ((dot(vec, vec)) <= (float)(psz * psz)) - { - Sx_ptr[index] = cur_Ux; - Sy_ptr[index] = cur_Uy; - } -} - diff --git a/modules/optflow/src/opencl/optical_flow_tvl1.cl b/modules/optflow/src/opencl/optical_flow_tvl1.cl new file mode 100644 index 000000000..ce67879fe --- /dev/null +++ b/modules/optflow/src/opencl/optical_flow_tvl1.cl @@ -0,0 +1,378 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Jin Ma jin@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +__kernel void centeredGradientKernel(__global const float* src_ptr, int src_col, int src_row, int src_step, + __global float* dx, __global float* dy, int d_step) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if((x < src_col)&&(y < src_row)) + { + int src_x1 = (x + 1) < (src_col -1)? (x + 1) : (src_col - 1); + int src_x2 = (x - 1) > 0 ? (x -1) : 0; + dx[y * d_step+ x] = 0.5f * (src_ptr[y * src_step + src_x1] - src_ptr[y * src_step+ src_x2]); + + int src_y1 = (y+1) < (src_row - 1) ? (y + 1) : (src_row - 1); + int src_y2 = (y - 1) > 0 ? (y - 1) : 0; + dy[y * d_step+ x] = 0.5f * (src_ptr[src_y1 * src_step + x] - src_ptr[src_y2 * src_step+ x]); + } + +} + +inline float bicubicCoeff(float x_) +{ + + float x = fabs(x_); + if (x <= 1.0f) + return x * x * (1.5f * x - 2.5f) + 1.0f; + else if (x < 2.0f) + return x * (x * (-0.5f * x + 2.5f) - 4.0f) + 2.0f; + else + return 0.0f; +} + +__kernel void warpBackwardKernel(__global const float* I0, int I0_step, int I0_col, int I0_row, + image2d_t tex_I1, image2d_t tex_I1x, image2d_t tex_I1y, + __global const float* u1, int u1_step, + __global const float* u2, + __global float* I1w, + __global float* I1wx, /*int I1wx_step,*/ + __global float* I1wy, /*int I1wy_step,*/ + __global float* grad, /*int grad_step,*/ + __global float* rho, + int I1w_step, + int u2_step, + int u1_offset_x, + int u1_offset_y, + int u2_offset_x, + int u2_offset_y) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if(x < I0_col&&y < I0_row) + { + //float u1Val = u1(y, x); + float u1Val = u1[(y + u1_offset_y) * u1_step + x + u1_offset_x]; + //float u2Val = u2(y, x); + float u2Val = u2[(y + u2_offset_y) * u2_step + x + u2_offset_x]; + + float wx = x + u1Val; + float wy = y + u2Val; + + int xmin = ceil(wx - 2.0f); + int xmax = floor(wx + 2.0f); + + int ymin = ceil(wy - 2.0f); + int ymax = floor(wy + 2.0f); + + float sum = 0.0f; + float sumx = 0.0f; + float sumy = 0.0f; + float wsum = 0.0f; + sampler_t sampleri = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST; + for (int cy = ymin; cy <= ymax; ++cy) + { + for (int cx = xmin; cx <= xmax; ++cx) + { + float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy); + //sum += w * tex2D(tex_I1 , cx, cy); + int2 cood = (int2)(cx, cy); + sum += w * read_imagef(tex_I1, sampleri, cood).x; + //sumx += w * tex2D(tex_I1x, cx, cy); + sumx += w * read_imagef(tex_I1x, sampleri, cood).x; + //sumy += w * tex2D(tex_I1y, cx, cy); + sumy += w * read_imagef(tex_I1y, sampleri, cood).x; + wsum += w; + } + } + float coeff = 1.0f / wsum; + float I1wVal = sum * coeff; + float I1wxVal = sumx * coeff; + float I1wyVal = sumy * coeff; + I1w[y * I1w_step + x] = I1wVal; + I1wx[y * I1w_step + x] = I1wxVal; + I1wy[y * I1w_step + x] = I1wyVal; + float Ix2 = I1wxVal * I1wxVal; + float Iy2 = I1wyVal * I1wyVal; + + // store the |Grad(I1)|^2 + grad[y * I1w_step + x] = Ix2 + Iy2; + + // compute the constant part of the rho function + float I0Val = I0[y * I0_step + x]; + rho[y * I1w_step + x] = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val; + } +} + +inline float readImage(__global const float *image, int x, int y, int rows, int cols, int elemCntPerRow) +{ + int i0 = clamp(x, 0, cols - 1); + int j0 = clamp(y, 0, rows - 1); + + return image[j0 * elemCntPerRow + i0]; +} + +__kernel void warpBackwardKernelNoImage2d(__global const float* I0, int I0_step, int I0_col, int I0_row, + __global const float* tex_I1, __global const float* tex_I1x, __global const float* tex_I1y, + __global const float* u1, int u1_step, + __global const float* u2, + __global float* I1w, + __global float* I1wx, /*int I1wx_step,*/ + __global float* I1wy, /*int I1wy_step,*/ + __global float* grad, /*int grad_step,*/ + __global float* rho, + int I1w_step, + int u2_step, + int I1_step, + int I1x_step) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if(x < I0_col&&y < I0_row) + { + //float u1Val = u1(y, x); + float u1Val = u1[y * u1_step + x]; + //float u2Val = u2(y, x); + float u2Val = u2[y * u2_step + x]; + + float wx = x + u1Val; + float wy = y + u2Val; + + int xmin = ceil(wx - 2.0f); + int xmax = floor(wx + 2.0f); + + int ymin = ceil(wy - 2.0f); + int ymax = floor(wy + 2.0f); + + float sum = 0.0f; + float sumx = 0.0f; + float sumy = 0.0f; + float wsum = 0.0f; + + for (int cy = ymin; cy <= ymax; ++cy) + { + for (int cx = xmin; cx <= xmax; ++cx) + { + float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy); + + int2 cood = (int2)(cx, cy); + sum += w * readImage(tex_I1, cood.x, cood.y, I0_col, I0_row, I1_step); + sumx += w * readImage(tex_I1x, cood.x, cood.y, I0_col, I0_row, I1x_step); + sumy += w * readImage(tex_I1y, cood.x, cood.y, I0_col, I0_row, I1x_step); + wsum += w; + } + } + + float coeff = 1.0f / wsum; + + float I1wVal = sum * coeff; + float I1wxVal = sumx * coeff; + float I1wyVal = sumy * coeff; + + I1w[y * I1w_step + x] = I1wVal; + I1wx[y * I1w_step + x] = I1wxVal; + I1wy[y * I1w_step + x] = I1wyVal; + + float Ix2 = I1wxVal * I1wxVal; + float Iy2 = I1wyVal * I1wyVal; + + // store the |Grad(I1)|^2 + grad[y * I1w_step + x] = Ix2 + Iy2; + + // compute the constant part of the rho function + float I0Val = I0[y * I0_step + x]; + rho[y * I1w_step + x] = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val; + } + +} + + +__kernel void estimateDualVariablesKernel(__global const float* u1, int u1_col, int u1_row, int u1_step, + __global const float* u2, + __global float* p11, int p11_step, + __global float* p12, + __global float* p21, + __global float* p22, + float taut, + int u2_step, + int u1_offset_x, + int u1_offset_y, + int u2_offset_x, + int u2_offset_y) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if(x < u1_col && y < u1_row) + { + int src_x1 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1); + float u1x = u1[(y + u1_offset_y) * u1_step + src_x1 + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x]; + + int src_y1 = (y + 1) < (u1_row - 1) ? (y + 1) : (u1_row - 1); + float u1y = u1[(src_y1 + u1_offset_y) * u1_step + x + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x]; + + int src_x2 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1); + float u2x = u2[(y + u2_offset_y) * u2_step + src_x2 + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x]; + + int src_y2 = (y + 1) < (u1_row - 1) ? (y + 1) : (u1_row - 1); + float u2y = u2[(src_y2 + u2_offset_y) * u2_step + x + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x]; + + float g1 = hypot(u1x, u1y); + float g2 = hypot(u2x, u2y); + + float ng1 = 1.0f + taut * g1; + float ng2 = 1.0f + taut * g2; + + p11[y * p11_step + x] = (p11[y * p11_step + x] + taut * u1x) / ng1; + p12[y * p11_step + x] = (p12[y * p11_step + x] + taut * u1y) / ng1; + p21[y * p11_step + x] = (p21[y * p11_step + x] + taut * u2x) / ng2; + p22[y * p11_step + x] = (p22[y * p11_step + x] + taut * u2y) / ng2; + } + +} + +inline float divergence(__global const float* v1, __global const float* v2, int y, int x, int v1_step, int v2_step) +{ + + if (x > 0 && y > 0) + { + float v1x = v1[y * v1_step + x] - v1[y * v1_step + x - 1]; + float v2y = v2[y * v2_step + x] - v2[(y - 1) * v2_step + x]; + return v1x + v2y; + } + else + { + if (y > 0) + return v1[y * v1_step + 0] + v2[y * v2_step + 0] - v2[(y - 1) * v2_step + 0]; + else + { + if (x > 0) + return v1[0 * v1_step + x] - v1[0 * v1_step + x - 1] + v2[0 * v2_step + x]; + else + return v1[0 * v1_step + 0] + v2[0 * v2_step + 0]; + } + } + +} + +__kernel void estimateUKernel(__global const float* I1wx, int I1wx_col, int I1wx_row, int I1wx_step, + __global const float* I1wy, /*int I1wy_step,*/ + __global const float* grad, /*int grad_step,*/ + __global const float* rho_c, /*int rho_c_step,*/ + __global const float* p11, /*int p11_step,*/ + __global const float* p12, /*int p12_step,*/ + __global const float* p21, /*int p21_step,*/ + __global const float* p22, /*int p22_step,*/ + __global float* u1, int u1_step, + __global float* u2, + __global float* error, float l_t, float theta, int u2_step, + int u1_offset_x, + int u1_offset_y, + int u2_offset_x, + int u2_offset_y, + char calc_error) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if(x < I1wx_col && y < I1wx_row) + { + float I1wxVal = I1wx[y * I1wx_step + x]; + float I1wyVal = I1wy[y * I1wx_step + x]; + float gradVal = grad[y * I1wx_step + x]; + float u1OldVal = u1[(y + u1_offset_y) * u1_step + x + u1_offset_x]; + float u2OldVal = u2[(y + u2_offset_y) * u2_step + x + u2_offset_x]; + + float rho = rho_c[y * I1wx_step + x] + (I1wxVal * u1OldVal + I1wyVal * u2OldVal); + + // estimate the values of the variable (v1, v2) (thresholding operator TH) + + float d1 = 0.0f; + float d2 = 0.0f; + + if (rho < -l_t * gradVal) + { + d1 = l_t * I1wxVal; + d2 = l_t * I1wyVal; + } + else if (rho > l_t * gradVal) + { + d1 = -l_t * I1wxVal; + d2 = -l_t * I1wyVal; + } + else if (gradVal > 1.192092896e-07f) + { + float fi = -rho / gradVal; + d1 = fi * I1wxVal; + d2 = fi * I1wyVal; + } + + float v1 = u1OldVal + d1; + float v2 = u2OldVal + d2; + + // compute the divergence of the dual variable (p1, p2) + + float div_p1 = divergence(p11, p12, y, x, I1wx_step, I1wx_step); + float div_p2 = divergence(p21, p22, y, x, I1wx_step, I1wx_step); + + // estimate the values of the optical flow (u1, u2) + + float u1NewVal = v1 + theta * div_p1; + float u2NewVal = v2 + theta * div_p2; + + u1[(y + u1_offset_y) * u1_step + x + u1_offset_x] = u1NewVal; + u2[(y + u2_offset_y) * u2_step + x + u2_offset_x] = u2NewVal; + + if(calc_error) + { + float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal); + float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal); + error[y * I1wx_step + x] = n1 + n2; + } + } +} diff --git a/modules/optflow/src/optical_flow_io.cpp b/modules/optflow/src/optical_flow_io.cpp deleted file mode 100644 index ad70b121f..000000000 --- a/modules/optflow/src/optical_flow_io.cpp +++ /dev/null @@ -1,139 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// - // - // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. - // - // By downloading, copying, installing or using the software you agree to this license. - // If you do not agree to this license, do not download, install, - // copy or use the software. - // - // - // License Agreement - // For Open Source Computer Vision Library - // - // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. - // Copyright (C) 2009, Willow Garage Inc., all rights reserved. - // Third party copyrights are property of their respective owners. - // - // Redistribution and use in source and binary forms, with or without modification, - // are permitted provided that the following conditions are met: - // - // * Redistribution's of source code must retain the above copyright notice, - // this list of conditions and the following disclaimer. - // - // * Redistribution's in binary form must reproduce the above copyright notice, - // this list of conditions and the following disclaimer in the documentation - // and/or other materials provided with the distribution. - // - // * The name of the copyright holders may not be used to endorse or promote products - // derived from this software without specific prior written permission. - // - // This software is provided by the copyright holders and contributors "as is" and - // any express or implied warranties, including, but not limited to, the implied - // warranties of merchantability and fitness for a particular purpose are disclaimed. - // In no event shall the Intel Corporation or contributors be liable for any direct, - // indirect, incidental, special, exemplary, or consequential damages - // (including, but not limited to, procurement of substitute goods or services; - // loss of use, data, or profits; or business interruption) however caused - // and on any theory of liability, whether in contract, strict liability, - // or tort (including negligence or otherwise) arising in any way out of - // the use of this software, even if advised of the possibility of such damage. - // - //M*/ -#include "precomp.hpp" -#include -#include - -namespace cv { -namespace optflow { -const float FLOW_TAG_FLOAT = 202021.25f; -const char *FLOW_TAG_STRING = "PIEH"; -CV_EXPORTS_W Mat readOpticalFlow( const String& path ) -{ -// CV_Assert(sizeof(float) == 4); - //FIXME: ensure right sizes of int and float - here and in writeOpticalFlow() - - Mat_ flow; - std::ifstream file(path.c_str(), std::ios_base::binary); - if ( !file.good() ) - return flow; // no file - return empty matrix - - float tag; - file.read((char*) &tag, sizeof(float)); - if ( tag != FLOW_TAG_FLOAT ) - return flow; - - int width, height; - - file.read((char*) &width, 4); - file.read((char*) &height, 4); - - flow.create(height, width); - - for ( int i = 0; i < flow.rows; ++i ) - { - for ( int j = 0; j < flow.cols; ++j ) - { - Point2f u; - file.read((char*) &u.x, sizeof(float)); - file.read((char*) &u.y, sizeof(float)); - if ( !file.good() ) - { - flow.release(); - return flow; - } - - flow(i, j) = u; - } - } - file.close(); - return flow; -} -CV_EXPORTS_W bool writeOpticalFlow( const String& path, InputArray flow ) -{ -// CV_Assert(sizeof(float) == 4); - - const int nChannels = 2; - - Mat input = flow.getMat(); - if ( input.channels() != nChannels || input.depth() != CV_32F || path.length() == 0 ) - return false; - - std::ofstream file(path.c_str(), std::ofstream::binary); - if ( !file.good() ) - return false; - - int nRows, nCols; - - nRows = (int) input.size().height; - nCols = (int) input.size().width; - - const int headerSize = 12; - char header[headerSize]; - memcpy(header, FLOW_TAG_STRING, 4); - // size of ints is known - has been asserted in the current function - memcpy(header + 4, reinterpret_cast(&nCols), sizeof(nCols)); - memcpy(header + 8, reinterpret_cast(&nRows), sizeof(nRows)); - file.write(header, headerSize); - if ( !file.good() ) - return false; - -// if ( input.isContinuous() ) //matrix is continous - treat it as a single row -// { -// nCols *= nRows; -// nRows = 1; -// } - - int row; - char* p; - for ( row = 0; row < nRows; row++ ) - { - p = input.ptr(row); - file.write(p, nCols * nChannels * sizeof(float)); - if ( !file.good() ) - return false; - } - file.close(); - return true; -} -} -} diff --git a/modules/optflow/src/tvl1flow.cpp b/modules/optflow/src/tvl1flow.cpp new file mode 100644 index 000000000..4131ac59a --- /dev/null +++ b/modules/optflow/src/tvl1flow.cpp @@ -0,0 +1,1488 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +/* +// +// This implementation is based on Javier Sánchez Pérez implementation. +// Original BSD license: +// +// Copyright (c) 2011, Javier Sánchez Pérez, Enric Meinhardt Llopis +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, this +// list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +*/ + +#include "precomp.hpp" +#include "opencl_kernels_optflow.hpp" + +#include +#include +#include +#include "opencv2/core/opencl/ocl_defs.hpp" + +namespace cv { +namespace optflow { + +class OpticalFlowDual_TVL1 : public DualTVL1OpticalFlow +{ +public: + + OpticalFlowDual_TVL1(double tau_, double lambda_, double theta_, int nscales_, int warps_, + double epsilon_, int innerIterations_, int outerIterations_, + double scaleStep_, double gamma_, int medianFiltering_, + bool useInitialFlow_) : + tau(tau_), lambda(lambda_), theta(theta_), gamma(gamma_), nscales(nscales_), + warps(warps_), epsilon(epsilon_), innerIterations(innerIterations_), + outerIterations(outerIterations_), useInitialFlow(useInitialFlow_), + scaleStep(scaleStep_), medianFiltering(medianFiltering_) + { + } + OpticalFlowDual_TVL1(); + + void calc(InputArray I0, InputArray I1, InputOutputArray flow) CV_OVERRIDE; + void collectGarbage() CV_OVERRIDE; + + inline double getTau() const CV_OVERRIDE { return tau; } + inline void setTau(double val) CV_OVERRIDE { tau = val; } + inline double getLambda() const CV_OVERRIDE { return lambda; } + inline void setLambda(double val) CV_OVERRIDE { lambda = val; } + inline double getTheta() const CV_OVERRIDE { return theta; } + inline void setTheta(double val) CV_OVERRIDE { theta = val; } + inline double getGamma() const CV_OVERRIDE { return gamma; } + inline void setGamma(double val) CV_OVERRIDE { gamma = val; } + inline int getScalesNumber() const CV_OVERRIDE { return nscales; } + inline void setScalesNumber(int val) CV_OVERRIDE { nscales = val; } + inline int getWarpingsNumber() const CV_OVERRIDE { return warps; } + inline void setWarpingsNumber(int val) CV_OVERRIDE { warps = val; } + inline double getEpsilon() const CV_OVERRIDE { return epsilon; } + inline void setEpsilon(double val) CV_OVERRIDE { epsilon = val; } + inline int getInnerIterations() const CV_OVERRIDE { return innerIterations; } + inline void setInnerIterations(int val) CV_OVERRIDE { innerIterations = val; } + inline int getOuterIterations() const CV_OVERRIDE { return outerIterations; } + inline void setOuterIterations(int val) CV_OVERRIDE { outerIterations = val; } + inline bool getUseInitialFlow() const CV_OVERRIDE { return useInitialFlow; } + inline void setUseInitialFlow(bool val) CV_OVERRIDE { useInitialFlow = val; } + inline double getScaleStep() const CV_OVERRIDE { return scaleStep; } + inline void setScaleStep(double val) CV_OVERRIDE { scaleStep = val; } + inline int getMedianFiltering() const CV_OVERRIDE { return medianFiltering; } + inline void setMedianFiltering(int val) CV_OVERRIDE { medianFiltering = val; } + +protected: + double tau; + double lambda; + double theta; + double gamma; + int nscales; + int warps; + double epsilon; + int innerIterations; + int outerIterations; + bool useInitialFlow; + double scaleStep; + int medianFiltering; + +private: + void procOneScale(const Mat_& I0, const Mat_& I1, Mat_& u1, Mat_& u2, Mat_& u3); + +#ifdef HAVE_OPENCL + bool procOneScale_ocl(const UMat& I0, const UMat& I1, UMat& u1, UMat& u2); + + bool calc_ocl(InputArray I0, InputArray I1, InputOutputArray flow); +#endif + struct dataMat + { + std::vector > I0s; + std::vector > I1s; + std::vector > u1s; + std::vector > u2s; + std::vector > u3s; + + Mat_ I1x_buf; + Mat_ I1y_buf; + + Mat_ flowMap1_buf; + Mat_ flowMap2_buf; + + Mat_ I1w_buf; + Mat_ I1wx_buf; + Mat_ I1wy_buf; + + Mat_ grad_buf; + Mat_ rho_c_buf; + + Mat_ v1_buf; + Mat_ v2_buf; + Mat_ v3_buf; + + Mat_ p11_buf; + Mat_ p12_buf; + Mat_ p21_buf; + Mat_ p22_buf; + Mat_ p31_buf; + Mat_ p32_buf; + + Mat_ div_p1_buf; + Mat_ div_p2_buf; + Mat_ div_p3_buf; + + Mat_ u1x_buf; + Mat_ u1y_buf; + Mat_ u2x_buf; + Mat_ u2y_buf; + Mat_ u3x_buf; + Mat_ u3y_buf; + } dm; + +#ifdef HAVE_OPENCL + struct dataUMat + { + std::vector I0s; + std::vector I1s; + std::vector u1s; + std::vector u2s; + + UMat I1x_buf; + UMat I1y_buf; + + UMat I1w_buf; + UMat I1wx_buf; + UMat I1wy_buf; + + UMat grad_buf; + UMat rho_c_buf; + + UMat p11_buf; + UMat p12_buf; + UMat p21_buf; + UMat p22_buf; + + UMat diff_buf; + UMat norm_buf; + } dum; +#endif +}; + +#ifdef HAVE_OPENCL +namespace cv_ocl_tvl1flow +{ + bool centeredGradient(const UMat &src, UMat &dx, UMat &dy); + + bool warpBackward(const UMat &I0, const UMat &I1, UMat &I1x, UMat &I1y, + UMat &u1, UMat &u2, UMat &I1w, UMat &I1wx, UMat &I1wy, + UMat &grad, UMat &rho); + + bool estimateU(UMat &I1wx, UMat &I1wy, UMat &grad, + UMat &rho_c, UMat &p11, UMat &p12, + UMat &p21, UMat &p22, UMat &u1, + UMat &u2, UMat &error, float l_t, float theta, char calc_error); + + bool estimateDualVariables(UMat &u1, UMat &u2, + UMat &p11, UMat &p12, UMat &p21, UMat &p22, float taut); +} + +bool cv_ocl_tvl1flow::centeredGradient(const UMat &src, UMat &dx, UMat &dy) +{ + size_t globalsize[2] = { (size_t)src.cols, (size_t)src.rows }; + + ocl::Kernel kernel; + if (!kernel.create("centeredGradientKernel", ocl::optflow::optical_flow_tvl1_oclsrc, "")) + return false; + + int idxArg = 0; + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(src));//src mat + idxArg = kernel.set(idxArg, (int)(src.cols));//src mat col + idxArg = kernel.set(idxArg, (int)(src.rows));//src mat rows + idxArg = kernel.set(idxArg, (int)(src.step / src.elemSize()));//src mat step + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(dx));//res mat dx + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(dy));//res mat dy + idxArg = kernel.set(idxArg, (int)(dx.step/dx.elemSize()));//res mat step + return kernel.run(2, globalsize, NULL, false); +} + +bool cv_ocl_tvl1flow::warpBackward(const UMat &I0, const UMat &I1, UMat &I1x, UMat &I1y, + UMat &u1, UMat &u2, UMat &I1w, UMat &I1wx, UMat &I1wy, + UMat &grad, UMat &rho) +{ + size_t globalsize[2] = { (size_t)I0.cols, (size_t)I0.rows }; + + ocl::Kernel kernel; + if (!kernel.create("warpBackwardKernel", ocl::optflow::optical_flow_tvl1_oclsrc, "")) + return false; + + int idxArg = 0; + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(I0));//I0 mat + int I0_step = (int)(I0.step / I0.elemSize()); + idxArg = kernel.set(idxArg, I0_step);//I0_step + idxArg = kernel.set(idxArg, (int)(I0.cols));//I0_col + idxArg = kernel.set(idxArg, (int)(I0.rows));//I0_row + ocl::Image2D imageI1(I1); + ocl::Image2D imageI1x(I1x); + ocl::Image2D imageI1y(I1y); + idxArg = kernel.set(idxArg, imageI1);//image2d_t tex_I1 + idxArg = kernel.set(idxArg, imageI1x);//image2d_t tex_I1x + idxArg = kernel.set(idxArg, imageI1y);//image2d_t tex_I1y + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(u1));//const float* u1 + idxArg = kernel.set(idxArg, (int)(u1.step / u1.elemSize()));//int u1_step + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(u2));//const float* u2 + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(I1w));///float* I1w + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(I1wx));//float* I1wx + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(I1wy));//float* I1wy + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(grad));//float* grad + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(rho));//float* rho + idxArg = kernel.set(idxArg, (int)(I1w.step / I1w.elemSize()));//I1w_step + idxArg = kernel.set(idxArg, (int)(u2.step / u2.elemSize()));//u2_step + int u1_offset_x = (int)((u1.offset) % (u1.step)); + u1_offset_x = (int)(u1_offset_x / u1.elemSize()); + idxArg = kernel.set(idxArg, (int)u1_offset_x );//u1_offset_x + idxArg = kernel.set(idxArg, (int)(u1.offset/u1.step));//u1_offset_y + int u2_offset_x = (int)((u2.offset) % (u2.step)); + u2_offset_x = (int) (u2_offset_x / u2.elemSize()); + idxArg = kernel.set(idxArg, (int)u2_offset_x);//u2_offset_x + idxArg = kernel.set(idxArg, (int)(u2.offset / u2.step));//u2_offset_y + return kernel.run(2, globalsize, NULL, false); +} + +bool cv_ocl_tvl1flow::estimateU(UMat &I1wx, UMat &I1wy, UMat &grad, + UMat &rho_c, UMat &p11, UMat &p12, + UMat &p21, UMat &p22, UMat &u1, + UMat &u2, UMat &error, float l_t, float theta, char calc_error) +{ + size_t globalsize[2] = { (size_t)I1wx.cols, (size_t)I1wx.rows }; + + ocl::Kernel kernel; + if (!kernel.create("estimateUKernel", ocl::optflow::optical_flow_tvl1_oclsrc, "")) + return false; + + int idxArg = 0; + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(I1wx)); //const float* I1wx + idxArg = kernel.set(idxArg, (int)(I1wx.cols)); //int I1wx_col + idxArg = kernel.set(idxArg, (int)(I1wx.rows)); //int I1wx_row + idxArg = kernel.set(idxArg, (int)(I1wx.step/I1wx.elemSize())); //int I1wx_step + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(I1wy)); //const float* I1wy + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(grad)); //const float* grad + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(rho_c)); //const float* rho_c + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(p11)); //const float* p11 + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(p12)); //const float* p12 + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(p21)); //const float* p21 + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(p22)); //const float* p22 + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(u1)); //float* u1 + idxArg = kernel.set(idxArg, (int)(u1.step / u1.elemSize())); //int u1_step + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(u2)); //float* u2 + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrWriteOnly(error)); //float* error + idxArg = kernel.set(idxArg, (float)l_t); //float l_t + idxArg = kernel.set(idxArg, (float)theta); //float theta + idxArg = kernel.set(idxArg, (int)(u2.step / u2.elemSize()));//int u2_step + int u1_offset_x = (int)(u1.offset % u1.step); + u1_offset_x = (int) (u1_offset_x / u1.elemSize()); + idxArg = kernel.set(idxArg, (int)u1_offset_x); //int u1_offset_x + idxArg = kernel.set(idxArg, (int)(u1.offset/u1.step)); //int u1_offset_y + int u2_offset_x = (int)(u2.offset % u2.step); + u2_offset_x = (int)(u2_offset_x / u2.elemSize()); + idxArg = kernel.set(idxArg, (int)u2_offset_x ); //int u2_offset_x + idxArg = kernel.set(idxArg, (int)(u2.offset / u2.step)); //int u2_offset_y + idxArg = kernel.set(idxArg, (char)calc_error); //char calc_error + + return kernel.run(2, globalsize, NULL, false); +} + +bool cv_ocl_tvl1flow::estimateDualVariables(UMat &u1, UMat &u2, + UMat &p11, UMat &p12, UMat &p21, UMat &p22, float taut) +{ + size_t globalsize[2] = { (size_t)u1.cols, (size_t)u1.rows }; + + ocl::Kernel kernel; + if (!kernel.create("estimateDualVariablesKernel", ocl::optflow::optical_flow_tvl1_oclsrc, "")) + return false; + + int idxArg = 0; + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(u1));// const float* u1 + idxArg = kernel.set(idxArg, (int)(u1.cols)); //int u1_col + idxArg = kernel.set(idxArg, (int)(u1.rows)); //int u1_row + idxArg = kernel.set(idxArg, (int)(u1.step/u1.elemSize())); //int u1_step + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(u2)); // const float* u2 + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(p11)); // float* p11 + idxArg = kernel.set(idxArg, (int)(p11.step/p11.elemSize())); //int p11_step + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(p12)); // float* p12 + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(p21)); // float* p21 + idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadWrite(p22)); // float* p22 + idxArg = kernel.set(idxArg, (float)(taut)); //float taut + idxArg = kernel.set(idxArg, (int)(u2.step/u2.elemSize())); //int u2_step + int u1_offset_x = (int)(u1.offset % u1.step); + u1_offset_x = (int)(u1_offset_x / u1.elemSize()); + idxArg = kernel.set(idxArg, u1_offset_x); //int u1_offset_x + idxArg = kernel.set(idxArg, (int)(u1.offset / u1.step)); //int u1_offset_y + int u2_offset_x = (int)(u2.offset % u2.step); + u2_offset_x = (int)(u2_offset_x / u2.elemSize()); + idxArg = kernel.set(idxArg, u2_offset_x); //int u2_offset_x + idxArg = kernel.set(idxArg, (int)(u2.offset / u2.step)); //int u2_offset_y + + return kernel.run(2, globalsize, NULL, false); + +} +#endif + +OpticalFlowDual_TVL1::OpticalFlowDual_TVL1() +{ + tau = 0.25; + lambda = 0.15; + theta = 0.3; + nscales = 5; + warps = 5; + epsilon = 0.01; + gamma = 0.; + innerIterations = 30; + outerIterations = 10; + useInitialFlow = false; + medianFiltering = 5; + scaleStep = 0.8; +} + +void OpticalFlowDual_TVL1::calc(InputArray _I0, InputArray _I1, InputOutputArray _flow) +{ + CV_INSTRUMENT_REGION(); + +#ifndef __APPLE__ + CV_OCL_RUN(_flow.isUMat() && + ocl::Image2D::isFormatSupported(CV_32F, 1, false), + calc_ocl(_I0, _I1, _flow)) +#endif + + Mat I0 = _I0.getMat(); + Mat I1 = _I1.getMat(); + + CV_Assert( I0.type() == CV_8UC1 || I0.type() == CV_32FC1 ); + CV_Assert( I0.size() == I1.size() ); + CV_Assert( I0.type() == I1.type() ); + CV_Assert( !useInitialFlow || (_flow.size() == I0.size() && _flow.type() == CV_32FC2) ); + CV_Assert( nscales > 0 ); + bool use_gamma = gamma != 0; + // allocate memory for the pyramid structure + dm.I0s.resize(nscales); + dm.I1s.resize(nscales); + dm.u1s.resize(nscales); + dm.u2s.resize(nscales); + dm.u3s.resize(nscales); + + I0.convertTo(dm.I0s[0], dm.I0s[0].depth(), I0.depth() == CV_8U ? 1.0 : 255.0); + I1.convertTo(dm.I1s[0], dm.I1s[0].depth(), I1.depth() == CV_8U ? 1.0 : 255.0); + + dm.u1s[0].create(I0.size()); + dm.u2s[0].create(I0.size()); + if (use_gamma) dm.u3s[0].create(I0.size()); + + if (useInitialFlow) + { + Mat_ mv[] = { dm.u1s[0], dm.u2s[0] }; + split(_flow.getMat(), mv); + } + + dm.I1x_buf.create(I0.size()); + dm.I1y_buf.create(I0.size()); + + dm.flowMap1_buf.create(I0.size()); + dm.flowMap2_buf.create(I0.size()); + + dm.I1w_buf.create(I0.size()); + dm.I1wx_buf.create(I0.size()); + dm.I1wy_buf.create(I0.size()); + + dm.grad_buf.create(I0.size()); + dm.rho_c_buf.create(I0.size()); + + dm.v1_buf.create(I0.size()); + dm.v2_buf.create(I0.size()); + dm.v3_buf.create(I0.size()); + + dm.p11_buf.create(I0.size()); + dm.p12_buf.create(I0.size()); + dm.p21_buf.create(I0.size()); + dm.p22_buf.create(I0.size()); + dm.p31_buf.create(I0.size()); + dm.p32_buf.create(I0.size()); + + dm.div_p1_buf.create(I0.size()); + dm.div_p2_buf.create(I0.size()); + dm.div_p3_buf.create(I0.size()); + + dm.u1x_buf.create(I0.size()); + dm.u1y_buf.create(I0.size()); + dm.u2x_buf.create(I0.size()); + dm.u2y_buf.create(I0.size()); + dm.u3x_buf.create(I0.size()); + dm.u3y_buf.create(I0.size()); + + // create the scales + for (int s = 1; s < nscales; ++s) + { + resize(dm.I0s[s - 1], dm.I0s[s], Size(), scaleStep, scaleStep, INTER_LINEAR); + resize(dm.I1s[s - 1], dm.I1s[s], Size(), scaleStep, scaleStep, INTER_LINEAR); + + if (dm.I0s[s].cols < 16 || dm.I0s[s].rows < 16) + { + nscales = s; + break; + } + + if (useInitialFlow) + { + resize(dm.u1s[s - 1], dm.u1s[s], Size(), scaleStep, scaleStep, INTER_LINEAR); + resize(dm.u2s[s - 1], dm.u2s[s], Size(), scaleStep, scaleStep, INTER_LINEAR); + + multiply(dm.u1s[s], Scalar::all(scaleStep), dm.u1s[s]); + multiply(dm.u2s[s], Scalar::all(scaleStep), dm.u2s[s]); + } + else + { + dm.u1s[s].create(dm.I0s[s].size()); + dm.u2s[s].create(dm.I0s[s].size()); + } + if (use_gamma) dm.u3s[s].create(dm.I0s[s].size()); + } + if (!useInitialFlow) + { + dm.u1s[nscales - 1].setTo(Scalar::all(0)); + dm.u2s[nscales - 1].setTo(Scalar::all(0)); + } + if (use_gamma) dm.u3s[nscales - 1].setTo(Scalar::all(0)); + // pyramidal structure for computing the optical flow + for (int s = nscales - 1; s >= 0; --s) + { + // compute the optical flow at the current scale + procOneScale(dm.I0s[s], dm.I1s[s], dm.u1s[s], dm.u2s[s], dm.u3s[s]); + + // if this was the last scale, finish now + if (s == 0) + break; + + // otherwise, upsample the optical flow + + // zoom the optical flow for the next finer scale + resize(dm.u1s[s], dm.u1s[s - 1], dm.I0s[s - 1].size(), 0, 0, INTER_LINEAR); + resize(dm.u2s[s], dm.u2s[s - 1], dm.I0s[s - 1].size(), 0, 0, INTER_LINEAR); + if (use_gamma) resize(dm.u3s[s], dm.u3s[s - 1], dm.I0s[s - 1].size(), 0, 0, INTER_LINEAR); + + // scale the optical flow with the appropriate zoom factor (don't scale u3!) + multiply(dm.u1s[s - 1], Scalar::all(1 / scaleStep), dm.u1s[s - 1]); + multiply(dm.u2s[s - 1], Scalar::all(1 / scaleStep), dm.u2s[s - 1]); + } + + Mat uxy[] = { dm.u1s[0], dm.u2s[0] }; + merge(uxy, 2, _flow); +} + +#ifdef HAVE_OPENCL +bool OpticalFlowDual_TVL1::calc_ocl(InputArray _I0, InputArray _I1, InputOutputArray _flow) +{ + UMat I0 = _I0.getUMat(); + UMat I1 = _I1.getUMat(); + + CV_Assert(I0.type() == CV_8UC1 || I0.type() == CV_32FC1); + CV_Assert(I0.size() == I1.size()); + CV_Assert(I0.type() == I1.type()); + CV_Assert(!useInitialFlow || (_flow.size() == I0.size() && _flow.type() == CV_32FC2)); + CV_Assert(nscales > 0); + + // allocate memory for the pyramid structure + dum.I0s.resize(nscales); + dum.I1s.resize(nscales); + dum.u1s.resize(nscales); + dum.u2s.resize(nscales); + //I0s_step == I1s_step + double alpha = I0.depth() == CV_8U ? 1.0 : 255.0; + + I0.convertTo(dum.I0s[0], CV_32F, alpha); + I1.convertTo(dum.I1s[0], CV_32F, I1.depth() == CV_8U ? 1.0 : 255.0); + + dum.u1s[0].create(I0.size(), CV_32FC1); + dum.u2s[0].create(I0.size(), CV_32FC1); + + if (useInitialFlow) + { + std::vector umv; + umv.push_back(dum.u1s[0]); + umv.push_back(dum.u2s[0]); + cv::split(_flow,umv); + } + + dum.I1x_buf.create(I0.size(), CV_32FC1); + dum.I1y_buf.create(I0.size(), CV_32FC1); + + dum.I1w_buf.create(I0.size(), CV_32FC1); + dum.I1wx_buf.create(I0.size(), CV_32FC1); + dum.I1wy_buf.create(I0.size(), CV_32FC1); + + dum.grad_buf.create(I0.size(), CV_32FC1); + dum.rho_c_buf.create(I0.size(), CV_32FC1); + + dum.p11_buf.create(I0.size(), CV_32FC1); + dum.p12_buf.create(I0.size(), CV_32FC1); + dum.p21_buf.create(I0.size(), CV_32FC1); + dum.p22_buf.create(I0.size(), CV_32FC1); + + dum.diff_buf.create(I0.size(), CV_32FC1); + + // create the scales + for (int s = 1; s < nscales; ++s) + { + resize(dum.I0s[s - 1], dum.I0s[s], Size(), scaleStep, scaleStep, INTER_LINEAR); + resize(dum.I1s[s - 1], dum.I1s[s], Size(), scaleStep, scaleStep, INTER_LINEAR); + + if (dum.I0s[s].cols < 16 || dum.I0s[s].rows < 16) + { + nscales = s; + break; + } + + if (useInitialFlow) + { + resize(dum.u1s[s - 1], dum.u1s[s], Size(), scaleStep, scaleStep, INTER_LINEAR); + resize(dum.u2s[s - 1], dum.u2s[s], Size(), scaleStep, scaleStep, INTER_LINEAR); + + //scale by scale factor + multiply(dum.u1s[s], Scalar::all(scaleStep), dum.u1s[s]); + multiply(dum.u2s[s], Scalar::all(scaleStep), dum.u2s[s]); + } + } + + // pyramidal structure for computing the optical flow + for (int s = nscales - 1; s >= 0; --s) + { + // compute the optical flow at the current scale + if (!OpticalFlowDual_TVL1::procOneScale_ocl(dum.I0s[s], dum.I1s[s], dum.u1s[s], dum.u2s[s])) + return false; + + // if this was the last scale, finish now + if (s == 0) + break; + + // zoom the optical flow for the next finer scale + resize(dum.u1s[s], dum.u1s[s - 1], dum.I0s[s - 1].size(), 0, 0, INTER_LINEAR); + resize(dum.u2s[s], dum.u2s[s - 1], dum.I0s[s - 1].size(), 0, 0, INTER_LINEAR); + + // scale the optical flow with the appropriate zoom factor + multiply(dum.u1s[s - 1], Scalar::all(1 / scaleStep), dum.u1s[s - 1]); + multiply(dum.u2s[s - 1], Scalar::all(1 / scaleStep), dum.u2s[s - 1]); + } + + std::vector uxy; + uxy.push_back(dum.u1s[0]); + uxy.push_back(dum.u2s[0]); + merge(uxy, _flow); + return true; +} +#endif + +//////////////////////////////////////////////////////////// +// buildFlowMap + +struct BuildFlowMapBody : ParallelLoopBody +{ + void operator() (const Range& range) const CV_OVERRIDE; + + Mat_ u1; + Mat_ u2; + mutable Mat_ map1; + mutable Mat_ map2; +}; + +void BuildFlowMapBody::operator() (const Range& range) const +{ + for (int y = range.start; y < range.end; ++y) + { + const float* u1Row = u1[y]; + const float* u2Row = u2[y]; + + float* map1Row = map1[y]; + float* map2Row = map2[y]; + + for (int x = 0; x < u1.cols; ++x) + { + map1Row[x] = x + u1Row[x]; + map2Row[x] = y + u2Row[x]; + } + } +} + +static void buildFlowMap(const Mat_& u1, const Mat_& u2, + Mat_& map1, Mat_& map2) +{ + CV_DbgAssert( u2.size() == u1.size() ); + CV_DbgAssert( map1.size() == u1.size() ); + CV_DbgAssert( map2.size() == u1.size() ); + + BuildFlowMapBody body; + + body.u1 = u1; + body.u2 = u2; + body.map1 = map1; + body.map2 = map2; + + parallel_for_(Range(0, u1.rows), body); +} + +//////////////////////////////////////////////////////////// +// centeredGradient + +struct CenteredGradientBody : ParallelLoopBody +{ + void operator() (const Range& range) const CV_OVERRIDE; + + Mat_ src; + mutable Mat_ dx; + mutable Mat_ dy; +}; + +void CenteredGradientBody::operator() (const Range& range) const +{ + const int last_col = src.cols - 1; + + for (int y = range.start; y < range.end; ++y) + { + const float* srcPrevRow = src[y - 1]; + const float* srcCurRow = src[y]; + const float* srcNextRow = src[y + 1]; + + float* dxRow = dx[y]; + float* dyRow = dy[y]; + + for (int x = 1; x < last_col; ++x) + { + dxRow[x] = 0.5f * (srcCurRow[x + 1] - srcCurRow[x - 1]); + dyRow[x] = 0.5f * (srcNextRow[x] - srcPrevRow[x]); + } + } +} + +static void centeredGradient(const Mat_& src, Mat_& dx, Mat_& dy) +{ + CV_DbgAssert( src.rows > 2 && src.cols > 2 ); + CV_DbgAssert( dx.size() == src.size() ); + CV_DbgAssert( dy.size() == src.size() ); + + const int last_row = src.rows - 1; + const int last_col = src.cols - 1; + + // compute the gradient on the center body of the image + { + CenteredGradientBody body; + + body.src = src; + body.dx = dx; + body.dy = dy; + + parallel_for_(Range(1, last_row), body); + } + + // compute the gradient on the first and last rows + for (int x = 1; x < last_col; ++x) + { + dx(0, x) = 0.5f * (src(0, x + 1) - src(0, x - 1)); + dy(0, x) = 0.5f * (src(1, x) - src(0, x)); + + dx(last_row, x) = 0.5f * (src(last_row, x + 1) - src(last_row, x - 1)); + dy(last_row, x) = 0.5f * (src(last_row, x) - src(last_row - 1, x)); + } + + // compute the gradient on the first and last columns + for (int y = 1; y < last_row; ++y) + { + dx(y, 0) = 0.5f * (src(y, 1) - src(y, 0)); + dy(y, 0) = 0.5f * (src(y + 1, 0) - src(y - 1, 0)); + + dx(y, last_col) = 0.5f * (src(y, last_col) - src(y, last_col - 1)); + dy(y, last_col) = 0.5f * (src(y + 1, last_col) - src(y - 1, last_col)); + } + + // compute the gradient at the four corners + dx(0, 0) = 0.5f * (src(0, 1) - src(0, 0)); + dy(0, 0) = 0.5f * (src(1, 0) - src(0, 0)); + + dx(0, last_col) = 0.5f * (src(0, last_col) - src(0, last_col - 1)); + dy(0, last_col) = 0.5f * (src(1, last_col) - src(0, last_col)); + + dx(last_row, 0) = 0.5f * (src(last_row, 1) - src(last_row, 0)); + dy(last_row, 0) = 0.5f * (src(last_row, 0) - src(last_row - 1, 0)); + + dx(last_row, last_col) = 0.5f * (src(last_row, last_col) - src(last_row, last_col - 1)); + dy(last_row, last_col) = 0.5f * (src(last_row, last_col) - src(last_row - 1, last_col)); +} + +//////////////////////////////////////////////////////////// +// forwardGradient + +struct ForwardGradientBody : ParallelLoopBody +{ + void operator() (const Range& range) const CV_OVERRIDE; + + Mat_ src; + mutable Mat_ dx; + mutable Mat_ dy; +}; + +void ForwardGradientBody::operator() (const Range& range) const +{ + const int last_col = src.cols - 1; + + for (int y = range.start; y < range.end; ++y) + { + const float* srcCurRow = src[y]; + const float* srcNextRow = src[y + 1]; + + float* dxRow = dx[y]; + float* dyRow = dy[y]; + + for (int x = 0; x < last_col; ++x) + { + dxRow[x] = srcCurRow[x + 1] - srcCurRow[x]; + dyRow[x] = srcNextRow[x] - srcCurRow[x]; + } + } +} + +static void forwardGradient(const Mat_& src, Mat_& dx, Mat_& dy) +{ + CV_DbgAssert( src.rows > 2 && src.cols > 2 ); + CV_DbgAssert( dx.size() == src.size() ); + CV_DbgAssert( dy.size() == src.size() ); + + const int last_row = src.rows - 1; + const int last_col = src.cols - 1; + + // compute the gradient on the central body of the image + { + ForwardGradientBody body; + + body.src = src; + body.dx = dx; + body.dy = dy; + + parallel_for_(Range(0, last_row), body); + } + + // compute the gradient on the last row + for (int x = 0; x < last_col; ++x) + { + dx(last_row, x) = src(last_row, x + 1) - src(last_row, x); + dy(last_row, x) = 0.0f; + } + + // compute the gradient on the last column + for (int y = 0; y < last_row; ++y) + { + dx(y, last_col) = 0.0f; + dy(y, last_col) = src(y + 1, last_col) - src(y, last_col); + } + + dx(last_row, last_col) = 0.0f; + dy(last_row, last_col) = 0.0f; +} + +//////////////////////////////////////////////////////////// +// divergence + +struct DivergenceBody : ParallelLoopBody +{ + void operator() (const Range& range) const CV_OVERRIDE; + + Mat_ v1; + Mat_ v2; + mutable Mat_ div; +}; + +void DivergenceBody::operator() (const Range& range) const +{ + for (int y = range.start; y < range.end; ++y) + { + const float* v1Row = v1[y]; + const float* v2PrevRow = v2[y - 1]; + const float* v2CurRow = v2[y]; + + float* divRow = div[y]; + + for(int x = 1; x < v1.cols; ++x) + { + const float v1x = v1Row[x] - v1Row[x - 1]; + const float v2y = v2CurRow[x] - v2PrevRow[x]; + + divRow[x] = v1x + v2y; + } + } +} + +static void divergence(const Mat_& v1, const Mat_& v2, Mat_& div) +{ + CV_DbgAssert( v1.rows > 2 && v1.cols > 2 ); + CV_DbgAssert( v2.size() == v1.size() ); + CV_DbgAssert( div.size() == v1.size() ); + + { + DivergenceBody body; + + body.v1 = v1; + body.v2 = v2; + body.div = div; + + parallel_for_(Range(1, v1.rows), body); + } + + // compute the divergence on the first row + for(int x = 1; x < v1.cols; ++x) + div(0, x) = v1(0, x) - v1(0, x - 1) + v2(0, x); + + // compute the divergence on the first column + for (int y = 1; y < v1.rows; ++y) + div(y, 0) = v1(y, 0) + v2(y, 0) - v2(y - 1, 0); + + div(0, 0) = v1(0, 0) + v2(0, 0); +} + +//////////////////////////////////////////////////////////// +// calcGradRho + +struct CalcGradRhoBody : ParallelLoopBody +{ + void operator() (const Range& range) const CV_OVERRIDE; + + Mat_ I0; + Mat_ I1w; + Mat_ I1wx; + Mat_ I1wy; + Mat_ u1; + Mat_ u2; + mutable Mat_ grad; + mutable Mat_ rho_c; +}; + +void CalcGradRhoBody::operator() (const Range& range) const +{ + for (int y = range.start; y < range.end; ++y) + { + const float* I0Row = I0[y]; + const float* I1wRow = I1w[y]; + const float* I1wxRow = I1wx[y]; + const float* I1wyRow = I1wy[y]; + const float* u1Row = u1[y]; + const float* u2Row = u2[y]; + + float* gradRow = grad[y]; + float* rhoRow = rho_c[y]; + + for (int x = 0; x < I0.cols; ++x) + { + const float Ix2 = I1wxRow[x] * I1wxRow[x]; + const float Iy2 = I1wyRow[x] * I1wyRow[x]; + + // store the |Grad(I1)|^2 + gradRow[x] = Ix2 + Iy2; + + // compute the constant part of the rho function + rhoRow[x] = (I1wRow[x] - I1wxRow[x] * u1Row[x] - I1wyRow[x] * u2Row[x] - I0Row[x]); + } + } +} + +static void calcGradRho(const Mat_& I0, const Mat_& I1w, const Mat_& I1wx, const Mat_& I1wy, const Mat_& u1, const Mat_& u2, + Mat_& grad, Mat_& rho_c) +{ + CV_DbgAssert( I1w.size() == I0.size() ); + CV_DbgAssert( I1wx.size() == I0.size() ); + CV_DbgAssert( I1wy.size() == I0.size() ); + CV_DbgAssert( u1.size() == I0.size() ); + CV_DbgAssert( u2.size() == I0.size() ); + CV_DbgAssert( grad.size() == I0.size() ); + CV_DbgAssert( rho_c.size() == I0.size() ); + + CalcGradRhoBody body; + + body.I0 = I0; + body.I1w = I1w; + body.I1wx = I1wx; + body.I1wy = I1wy; + body.u1 = u1; + body.u2 = u2; + body.grad = grad; + body.rho_c = rho_c; + + parallel_for_(Range(0, I0.rows), body); +} + +//////////////////////////////////////////////////////////// +// estimateV + +struct EstimateVBody : ParallelLoopBody +{ + void operator() (const Range& range) const CV_OVERRIDE; + + Mat_ I1wx; + Mat_ I1wy; + Mat_ u1; + Mat_ u2; + Mat_ u3; + Mat_ grad; + Mat_ rho_c; + mutable Mat_ v1; + mutable Mat_ v2; + mutable Mat_ v3; + float l_t; + float gamma; +}; + +void EstimateVBody::operator() (const Range& range) const +{ + bool use_gamma = gamma != 0; + for (int y = range.start; y < range.end; ++y) + { + const float* I1wxRow = I1wx[y]; + const float* I1wyRow = I1wy[y]; + const float* u1Row = u1[y]; + const float* u2Row = u2[y]; + const float* u3Row = use_gamma?u3[y]:NULL; + const float* gradRow = grad[y]; + const float* rhoRow = rho_c[y]; + + float* v1Row = v1[y]; + float* v2Row = v2[y]; + float* v3Row = use_gamma ? v3[y]:NULL; + + for (int x = 0; x < I1wx.cols; ++x) + { + const float rho = use_gamma ? rhoRow[x] + (I1wxRow[x] * u1Row[x] + I1wyRow[x] * u2Row[x]) + gamma * u3Row[x] : + rhoRow[x] + (I1wxRow[x] * u1Row[x] + I1wyRow[x] * u2Row[x]); + float d1 = 0.0f; + float d2 = 0.0f; + float d3 = 0.0f; + if (rho < -l_t * gradRow[x]) + { + d1 = l_t * I1wxRow[x]; + d2 = l_t * I1wyRow[x]; + if (use_gamma) d3 = l_t * gamma; + } + else if (rho > l_t * gradRow[x]) + { + d1 = -l_t * I1wxRow[x]; + d2 = -l_t * I1wyRow[x]; + if (use_gamma) d3 = -l_t * gamma; + } + else if (gradRow[x] > std::numeric_limits::epsilon()) + { + float fi = -rho / gradRow[x]; + d1 = fi * I1wxRow[x]; + d2 = fi * I1wyRow[x]; + if (use_gamma) d3 = fi * gamma; + } + + v1Row[x] = u1Row[x] + d1; + v2Row[x] = u2Row[x] + d2; + if (use_gamma) v3Row[x] = u3Row[x] + d3; + } + } +} + +static void estimateV(const Mat_& I1wx, const Mat_& I1wy, const Mat_& u1, const Mat_& u2, const Mat_& u3, const Mat_& grad, const Mat_& rho_c, + Mat_& v1, Mat_& v2, Mat_& v3, float l_t, float gamma) +{ + CV_DbgAssert( I1wy.size() == I1wx.size() ); + CV_DbgAssert( u1.size() == I1wx.size() ); + CV_DbgAssert( u2.size() == I1wx.size() ); + CV_DbgAssert( grad.size() == I1wx.size() ); + CV_DbgAssert( rho_c.size() == I1wx.size() ); + CV_DbgAssert( v1.size() == I1wx.size() ); + CV_DbgAssert( v2.size() == I1wx.size() ); + + EstimateVBody body; + bool use_gamma = gamma != 0; + body.I1wx = I1wx; + body.I1wy = I1wy; + body.u1 = u1; + body.u2 = u2; + if (use_gamma) body.u3 = u3; + body.grad = grad; + body.rho_c = rho_c; + body.v1 = v1; + body.v2 = v2; + if (use_gamma) body.v3 = v3; + body.l_t = l_t; + body.gamma = gamma; + parallel_for_(Range(0, I1wx.rows), body); +} + +//////////////////////////////////////////////////////////// +// estimateU + +static float estimateU(const Mat_& v1, const Mat_& v2, const Mat_& v3, + const Mat_& div_p1, const Mat_& div_p2, const Mat_& div_p3, + Mat_& u1, Mat_& u2, Mat_& u3, + float theta, float gamma) +{ + CV_DbgAssert( v2.size() == v1.size() ); + CV_DbgAssert( div_p1.size() == v1.size() ); + CV_DbgAssert( div_p2.size() == v1.size() ); + CV_DbgAssert( u1.size() == v1.size() ); + CV_DbgAssert( u2.size() == v1.size() ); + + float error = 0.0f; + bool use_gamma = gamma != 0; + for (int y = 0; y < v1.rows; ++y) + { + const float* v1Row = v1[y]; + const float* v2Row = v2[y]; + const float* v3Row = use_gamma?v3[y]:NULL; + const float* divP1Row = div_p1[y]; + const float* divP2Row = div_p2[y]; + const float* divP3Row = use_gamma?div_p3[y]:NULL; + + float* u1Row = u1[y]; + float* u2Row = u2[y]; + float* u3Row = use_gamma?u3[y]:NULL; + + + for (int x = 0; x < v1.cols; ++x) + { + const float u1k = u1Row[x]; + const float u2k = u2Row[x]; + const float u3k = use_gamma?u3Row[x]:0; + + u1Row[x] = v1Row[x] + theta * divP1Row[x]; + u2Row[x] = v2Row[x] + theta * divP2Row[x]; + if (use_gamma) u3Row[x] = v3Row[x] + theta * divP3Row[x]; + error += use_gamma?(u1Row[x] - u1k) * (u1Row[x] - u1k) + (u2Row[x] - u2k) * (u2Row[x] - u2k) + (u3Row[x] - u3k) * (u3Row[x] - u3k): + (u1Row[x] - u1k) * (u1Row[x] - u1k) + (u2Row[x] - u2k) * (u2Row[x] - u2k); + } + } + + return error; +} + +//////////////////////////////////////////////////////////// +// estimateDualVariables + +struct EstimateDualVariablesBody : ParallelLoopBody +{ + void operator() (const Range& range) const CV_OVERRIDE; + + Mat_ u1x; + Mat_ u1y; + Mat_ u2x; + Mat_ u2y; + Mat_ u3x; + Mat_ u3y; + mutable Mat_ p11; + mutable Mat_ p12; + mutable Mat_ p21; + mutable Mat_ p22; + mutable Mat_ p31; + mutable Mat_ p32; + float taut; + bool use_gamma; +}; + +void EstimateDualVariablesBody::operator() (const Range& range) const +{ + for (int y = range.start; y < range.end; ++y) + { + const float* u1xRow = u1x[y]; + const float* u1yRow = u1y[y]; + const float* u2xRow = u2x[y]; + const float* u2yRow = u2y[y]; + const float* u3xRow = u3x[y]; + const float* u3yRow = u3y[y]; + + float* p11Row = p11[y]; + float* p12Row = p12[y]; + float* p21Row = p21[y]; + float* p22Row = p22[y]; + float* p31Row = p31[y]; + float* p32Row = p32[y]; + + for (int x = 0; x < u1x.cols; ++x) + { + const float g1 = static_cast(hypot(u1xRow[x], u1yRow[x])); + const float g2 = static_cast(hypot(u2xRow[x], u2yRow[x])); + + const float ng1 = 1.0f + taut * g1; + const float ng2 = 1.0f + taut * g2; + + p11Row[x] = (p11Row[x] + taut * u1xRow[x]) / ng1; + p12Row[x] = (p12Row[x] + taut * u1yRow[x]) / ng1; + p21Row[x] = (p21Row[x] + taut * u2xRow[x]) / ng2; + p22Row[x] = (p22Row[x] + taut * u2yRow[x]) / ng2; + + if (use_gamma) + { + const float g3 = static_cast(hypot(u3xRow[x], u3yRow[x])); + const float ng3 = 1.0f + taut * g3; + p31Row[x] = (p31Row[x] + taut * u3xRow[x]) / ng3; + p32Row[x] = (p32Row[x] + taut * u3yRow[x]) / ng3; + } + } + } +} + +static void estimateDualVariables(const Mat_& u1x, const Mat_& u1y, + const Mat_& u2x, const Mat_& u2y, + const Mat_& u3x, const Mat_& u3y, + Mat_& p11, Mat_& p12, + Mat_& p21, Mat_& p22, + Mat_& p31, Mat_& p32, + float taut, bool use_gamma) +{ + CV_DbgAssert( u1y.size() == u1x.size() ); + CV_DbgAssert( u2x.size() == u1x.size() ); + CV_DbgAssert( u3x.size() == u1x.size() ); + CV_DbgAssert( u2y.size() == u1x.size() ); + CV_DbgAssert( u3y.size() == u1x.size() ); + CV_DbgAssert( p11.size() == u1x.size() ); + CV_DbgAssert( p12.size() == u1x.size() ); + CV_DbgAssert( p21.size() == u1x.size() ); + CV_DbgAssert( p22.size() == u1x.size() ); + CV_DbgAssert( p31.size() == u1x.size() ); + CV_DbgAssert( p32.size() == u1x.size() ); + + EstimateDualVariablesBody body; + + body.u1x = u1x; + body.u1y = u1y; + body.u2x = u2x; + body.u2y = u2y; + body.u3x = u3x; + body.u3y = u3y; + body.p11 = p11; + body.p12 = p12; + body.p21 = p21; + body.p22 = p22; + body.p31 = p31; + body.p32 = p32; + body.taut = taut; + body.use_gamma = use_gamma; + + parallel_for_(Range(0, u1x.rows), body); +} + +#ifdef HAVE_OPENCL +bool OpticalFlowDual_TVL1::procOneScale_ocl(const UMat& I0, const UMat& I1, UMat& u1, UMat& u2) +{ + using namespace cv_ocl_tvl1flow; + + const double scaledEpsilon = epsilon * epsilon * I0.size().area(); + + CV_DbgAssert(I1.size() == I0.size()); + CV_DbgAssert(I1.type() == I0.type()); + CV_DbgAssert(u1.empty() || u1.size() == I0.size()); + CV_DbgAssert(u2.size() == u1.size()); + + if (u1.empty()) + { + u1.create(I0.size(), CV_32FC1); + u1.setTo(Scalar::all(0)); + + u2.create(I0.size(), CV_32FC1); + u2.setTo(Scalar::all(0)); + } + + UMat I1x = dum.I1x_buf(Rect(0, 0, I0.cols, I0.rows)); + UMat I1y = dum.I1y_buf(Rect(0, 0, I0.cols, I0.rows)); + + if (!centeredGradient(I1, I1x, I1y)) + return false; + + UMat I1w = dum.I1w_buf(Rect(0, 0, I0.cols, I0.rows)); + UMat I1wx = dum.I1wx_buf(Rect(0, 0, I0.cols, I0.rows)); + UMat I1wy = dum.I1wy_buf(Rect(0, 0, I0.cols, I0.rows)); + + UMat grad = dum.grad_buf(Rect(0, 0, I0.cols, I0.rows)); + UMat rho_c = dum.rho_c_buf(Rect(0, 0, I0.cols, I0.rows)); + + UMat p11 = dum.p11_buf(Rect(0, 0, I0.cols, I0.rows)); + UMat p12 = dum.p12_buf(Rect(0, 0, I0.cols, I0.rows)); + UMat p21 = dum.p21_buf(Rect(0, 0, I0.cols, I0.rows)); + UMat p22 = dum.p22_buf(Rect(0, 0, I0.cols, I0.rows)); + p11.setTo(Scalar::all(0)); + p12.setTo(Scalar::all(0)); + p21.setTo(Scalar::all(0)); + p22.setTo(Scalar::all(0)); + + UMat diff = dum.diff_buf(Rect(0, 0, I0.cols, I0.rows)); + + const float l_t = static_cast(lambda * theta); + const float taut = static_cast(tau / theta); + int n; + + for (int warpings = 0; warpings < warps; ++warpings) + { + if (!warpBackward(I0, I1, I1x, I1y, u1, u2, I1w, I1wx, I1wy, grad, rho_c)) + return false; + + double error = std::numeric_limits::max(); + double prev_error = 0; + + for (int n_outer = 0; error > scaledEpsilon && n_outer < outerIterations; ++n_outer) + { + if (medianFiltering > 1) { + cv::medianBlur(u1, u1, medianFiltering); + cv::medianBlur(u2, u2, medianFiltering); + } + for (int n_inner = 0; error > scaledEpsilon && n_inner < innerIterations; ++n_inner) + { + // some tweaks to make sum operation less frequently + n = n_inner + n_outer*innerIterations; + char calc_error = (n & 0x1) && (prev_error < scaledEpsilon); + if (!estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, + u1, u2, diff, l_t, static_cast(theta), calc_error)) + return false; + if (calc_error) + { + error = cv::sum(diff)[0]; + prev_error = error; + } + else + { + error = std::numeric_limits::max(); + prev_error -= scaledEpsilon; + } + if (!estimateDualVariables(u1, u2, p11, p12, p21, p22, taut)) + return false; + } + } + } + return true; +} +#endif + +void OpticalFlowDual_TVL1::procOneScale(const Mat_& I0, const Mat_& I1, Mat_& u1, Mat_& u2, Mat_& u3) +{ + const float scaledEpsilon = static_cast(epsilon * epsilon * I0.size().area()); + + CV_DbgAssert( I1.size() == I0.size() ); + CV_DbgAssert( I1.type() == I0.type() ); + CV_DbgAssert( u1.size() == I0.size() ); + CV_DbgAssert( u2.size() == u1.size() ); + + Mat_ I1x = dm.I1x_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ I1y = dm.I1y_buf(Rect(0, 0, I0.cols, I0.rows)); + centeredGradient(I1, I1x, I1y); + + Mat_ flowMap1 = dm.flowMap1_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ flowMap2 = dm.flowMap2_buf(Rect(0, 0, I0.cols, I0.rows)); + + Mat_ I1w = dm.I1w_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ I1wx = dm.I1wx_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ I1wy = dm.I1wy_buf(Rect(0, 0, I0.cols, I0.rows)); + + Mat_ grad = dm.grad_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ rho_c = dm.rho_c_buf(Rect(0, 0, I0.cols, I0.rows)); + + Mat_ v1 = dm.v1_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ v2 = dm.v2_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ v3 = dm.v3_buf(Rect(0, 0, I0.cols, I0.rows)); + + Mat_ p11 = dm.p11_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ p12 = dm.p12_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ p21 = dm.p21_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ p22 = dm.p22_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ p31 = dm.p31_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ p32 = dm.p32_buf(Rect(0, 0, I0.cols, I0.rows)); + p11.setTo(Scalar::all(0)); + p12.setTo(Scalar::all(0)); + p21.setTo(Scalar::all(0)); + p22.setTo(Scalar::all(0)); + bool use_gamma = gamma != 0.; + if (use_gamma) p31.setTo(Scalar::all(0)); + if (use_gamma) p32.setTo(Scalar::all(0)); + + Mat_ div_p1 = dm.div_p1_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ div_p2 = dm.div_p2_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ div_p3 = dm.div_p3_buf(Rect(0, 0, I0.cols, I0.rows)); + + Mat_ u1x = dm.u1x_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ u1y = dm.u1y_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ u2x = dm.u2x_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ u2y = dm.u2y_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ u3x = dm.u3x_buf(Rect(0, 0, I0.cols, I0.rows)); + Mat_ u3y = dm.u3y_buf(Rect(0, 0, I0.cols, I0.rows)); + + const float l_t = static_cast(lambda * theta); + const float taut = static_cast(tau / theta); + + for (int warpings = 0; warpings < warps; ++warpings) + { + // compute the warping of the target image and its derivatives + buildFlowMap(u1, u2, flowMap1, flowMap2); + remap(I1, I1w, flowMap1, flowMap2, INTER_CUBIC); + remap(I1x, I1wx, flowMap1, flowMap2, INTER_CUBIC); + remap(I1y, I1wy, flowMap1, flowMap2, INTER_CUBIC); + //calculate I1(x+u0) and its gradient + calcGradRho(I0, I1w, I1wx, I1wy, u1, u2, grad, rho_c); + + float error = std::numeric_limits::max(); + for (int n_outer = 0; error > scaledEpsilon && n_outer < outerIterations; ++n_outer) + { + if (medianFiltering > 1) { + cv::medianBlur(u1, u1, medianFiltering); + cv::medianBlur(u2, u2, medianFiltering); + } + for (int n_inner = 0; error > scaledEpsilon && n_inner < innerIterations; ++n_inner) + { + // estimate the values of the variable (v1, v2) (thresholding operator TH) + estimateV(I1wx, I1wy, u1, u2, u3, grad, rho_c, v1, v2, v3, l_t, static_cast(gamma)); + + // compute the divergence of the dual variable (p1, p2, p3) + divergence(p11, p12, div_p1); + divergence(p21, p22, div_p2); + if (use_gamma) divergence(p31, p32, div_p3); + + // estimate the values of the optical flow (u1, u2) + error = estimateU(v1, v2, v3, div_p1, div_p2, div_p3, u1, u2, u3, static_cast(theta), static_cast(gamma)); + + // compute the gradient of the optical flow (Du1, Du2) + forwardGradient(u1, u1x, u1y); + forwardGradient(u2, u2x, u2y); + if (use_gamma) forwardGradient(u3, u3x, u3y); + + // estimate the values of the dual variable (p1, p2, p3) + estimateDualVariables(u1x, u1y, u2x, u2y, u3x, u3y, p11, p12, p21, p22, p31, p32, taut, use_gamma); + } + } + } +} + +void OpticalFlowDual_TVL1::collectGarbage() +{ + //dataMat structure dm + dm.I0s.clear(); + dm.I1s.clear(); + dm.u1s.clear(); + dm.u2s.clear(); + + dm.I1x_buf.release(); + dm.I1y_buf.release(); + + dm.flowMap1_buf.release(); + dm.flowMap2_buf.release(); + + dm.I1w_buf.release(); + dm.I1wx_buf.release(); + dm.I1wy_buf.release(); + + dm.grad_buf.release(); + dm.rho_c_buf.release(); + + dm.v1_buf.release(); + dm.v2_buf.release(); + + dm.p11_buf.release(); + dm.p12_buf.release(); + dm.p21_buf.release(); + dm.p22_buf.release(); + + dm.div_p1_buf.release(); + dm.div_p2_buf.release(); + + dm.u1x_buf.release(); + dm.u1y_buf.release(); + dm.u2x_buf.release(); + dm.u2y_buf.release(); + +#ifdef HAVE_OPENCL + //dataUMat structure dum + dum.I0s.clear(); + dum.I1s.clear(); + dum.u1s.clear(); + dum.u2s.clear(); + + dum.I1x_buf.release(); + dum.I1y_buf.release(); + + dum.I1w_buf.release(); + dum.I1wx_buf.release(); + dum.I1wy_buf.release(); + + dum.grad_buf.release(); + dum.rho_c_buf.release(); + + dum.p11_buf.release(); + dum.p12_buf.release(); + dum.p21_buf.release(); + dum.p22_buf.release(); + + dum.diff_buf.release(); + dum.norm_buf.release(); +#endif +} + +Ptr createOptFlow_DualTVL1() +{ + return makePtr(); +} + +Ptr DualTVL1OpticalFlow::create( + double tau, double lambda, double theta, int nscales, int warps, + double epsilon, int innerIterations, int outerIterations, double scaleStep, + double gamma, int medianFilter, bool useInitialFlow) +{ + return makePtr(tau, lambda, theta, nscales, warps, + epsilon, innerIterations, outerIterations, + scaleStep, gamma, medianFilter, useInitialFlow); +} + +} +} diff --git a/modules/optflow/src/variational_refinement.cpp b/modules/optflow/src/variational_refinement.cpp deleted file mode 100644 index 8fe8e0415..000000000 --- a/modules/optflow/src/variational_refinement.cpp +++ /dev/null @@ -1,1192 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "precomp.hpp" -#include "opencv2/core/hal/intrin.hpp" - -using namespace std; - -namespace cv -{ -namespace optflow -{ - -class VariationalRefinementImpl CV_FINAL : public VariationalRefinement -{ - public: - VariationalRefinementImpl(); - - void calc(InputArray I0, InputArray I1, InputOutputArray flow) CV_OVERRIDE; - void calcUV(InputArray I0, InputArray I1, InputOutputArray flow_u, InputOutputArray flow_v) CV_OVERRIDE; - void collectGarbage() CV_OVERRIDE; - - protected: //!< algorithm parameters - int fixedPointIterations, sorIterations; - float omega; - float alpha, delta, gamma; - float zeta, epsilon; - - public: - int getFixedPointIterations() const CV_OVERRIDE { return fixedPointIterations; } - void setFixedPointIterations(int val) CV_OVERRIDE { fixedPointIterations = val; } - int getSorIterations() const CV_OVERRIDE { return sorIterations; } - void setSorIterations(int val) CV_OVERRIDE { sorIterations = val; } - float getOmega() const CV_OVERRIDE { return omega; } - void setOmega(float val) CV_OVERRIDE { omega = val; } - float getAlpha() const CV_OVERRIDE { return alpha; } - void setAlpha(float val) CV_OVERRIDE { alpha = val; } - float getDelta() const CV_OVERRIDE { return delta; } - void setDelta(float val) CV_OVERRIDE { delta = val; } - float getGamma() const CV_OVERRIDE { return gamma; } - void setGamma(float val) CV_OVERRIDE { gamma = val; } - - protected: //!< internal buffers - /* This struct defines a special data layout for Mat_. Original buffer is split into two: one for "red" - * elements (sum of indices is even) and one for "black" (sum of indices is odd) in a checkerboard pattern. It - * allows for more efficient processing in SOR iterations, more natural SIMD vectorization and parallelization - * (Red-Black SOR). Additionally, it simplifies border handling by adding repeated borders to both red and - * black buffers. - */ - struct RedBlackBuffer - { - Mat_ red; //!< (i+j)%2==0 - Mat_ black; //!< (i+j)%2==1 - - /* Width of even and odd rows may be different */ - int red_even_len, red_odd_len; - int black_even_len, black_odd_len; - - void create(Size s); - void release(); - }; - - Mat_ Ix, Iy, Iz, Ixx, Ixy, Iyy, Ixz, Iyz; //!< image derivative buffers - RedBlackBuffer Ix_rb, Iy_rb, Iz_rb, Ixx_rb, Ixy_rb, Iyy_rb, Ixz_rb, Iyz_rb; //!< corresponding red-black buffers - - RedBlackBuffer A11, A12, A22, b1, b2; //!< main linear system coefficients - RedBlackBuffer weights; //!< smoothness term weights in the current fixed point iteration - - Mat_ mapX, mapY; //!< auxiliary buffers for remapping - - RedBlackBuffer tempW_u, tempW_v; //!< flow buffers that are modified in each fixed point iteration - RedBlackBuffer dW_u, dW_v; //!< optical flow increment - RedBlackBuffer W_u_rb, W_v_rb; //!< red-black-buffer version of the input flow - - private: //!< private methods and parallel sections - void splitCheckerboard(RedBlackBuffer &dst, Mat &src); - void mergeCheckerboard(Mat &dst, RedBlackBuffer &src); - void updateRepeatedBorders(RedBlackBuffer &dst); - void warpImage(Mat &dst, Mat &src, Mat &flow_u, Mat &flow_v); - void prepareBuffers(Mat &I0, Mat &I1, Mat &W_u, Mat &W_v); - - /* Parallelizing arbitrary operations with 3 input/output arguments */ - typedef void (VariationalRefinementImpl::*Op)(void *op1, void *op2, void *op3); - struct ParallelOp_ParBody : public ParallelLoopBody - { - VariationalRefinementImpl *var; - vector ops; - vector op1s; - vector op2s; - vector op3s; - - ParallelOp_ParBody(VariationalRefinementImpl &_var, vector _ops, vector &_op1s, - vector &_op2s, vector &_op3s); - void operator()(const Range &range) const CV_OVERRIDE; - }; - void gradHorizAndSplitOp(void *src, void *dst, void *dst_split) - { - Sobel(*(Mat *)src, *(Mat *)dst, -1, 1, 0, 1, 1, 0.00, BORDER_REPLICATE); - splitCheckerboard(*(RedBlackBuffer *)dst_split, *(Mat *)dst); - } - void gradVertAndSplitOp(void *src, void *dst, void *dst_split) - { - Sobel(*(Mat *)src, *(Mat *)dst, -1, 0, 1, 1, 1, 0.00, BORDER_REPLICATE); - splitCheckerboard(*(RedBlackBuffer *)dst_split, *(Mat *)dst); - } - void averageOp(void *src1, void *src2, void *dst) - { - addWeighted(*(Mat *)src1, 0.5, *(Mat *)src2, 0.5, 0.0, *(Mat *)dst, CV_32F); - } - void subtractOp(void *src1, void *src2, void *dst) - { - subtract(*(Mat *)src1, *(Mat *)src2, *(Mat *)dst, noArray(), CV_32F); - } - - struct ComputeDataTerm_ParBody : public ParallelLoopBody - { - VariationalRefinementImpl *var; - int nstripes, stripe_sz; - int h; - RedBlackBuffer *dW_u, *dW_v; - bool red_pass; - - ComputeDataTerm_ParBody(VariationalRefinementImpl &_var, int _nstripes, int _h, RedBlackBuffer &_dW_u, - RedBlackBuffer &_dW_v, bool _red_pass); - void operator()(const Range &range) const CV_OVERRIDE; - }; - - struct ComputeSmoothnessTermHorPass_ParBody : public ParallelLoopBody - { - VariationalRefinementImpl *var; - int nstripes, stripe_sz; - int h; - RedBlackBuffer *W_u, *W_v, *curW_u, *curW_v; - bool red_pass; - - ComputeSmoothnessTermHorPass_ParBody(VariationalRefinementImpl &_var, int _nstripes, int _h, - RedBlackBuffer &_W_u, RedBlackBuffer &_W_v, RedBlackBuffer &_tempW_u, - RedBlackBuffer &_tempW_v, bool _red_pass); - void operator()(const Range &range) const CV_OVERRIDE; - }; - - struct ComputeSmoothnessTermVertPass_ParBody : public ParallelLoopBody - { - VariationalRefinementImpl *var; - int nstripes, stripe_sz; - int h; - RedBlackBuffer *W_u, *W_v; - bool red_pass; - - ComputeSmoothnessTermVertPass_ParBody(VariationalRefinementImpl &_var, int _nstripes, int _h, - RedBlackBuffer &W_u, RedBlackBuffer &_W_v, bool _red_pass); - void operator()(const Range &range) const CV_OVERRIDE; - }; - - struct RedBlackSOR_ParBody : public ParallelLoopBody - { - VariationalRefinementImpl *var; - int nstripes, stripe_sz; - int h; - RedBlackBuffer *dW_u, *dW_v; - bool red_pass; - - RedBlackSOR_ParBody(VariationalRefinementImpl &_var, int _nstripes, int _h, RedBlackBuffer &_dW_u, - RedBlackBuffer &_dW_v, bool _red_pass); - void operator()(const Range &range) const CV_OVERRIDE; - }; -}; - -VariationalRefinementImpl::VariationalRefinementImpl() -{ - fixedPointIterations = 5; - sorIterations = 5; - alpha = 20.0f; - delta = 5.0f; - gamma = 10.0f; - omega = 1.6f; - zeta = 0.1f; - epsilon = 0.001f; -} - -/* This function converts an input Mat into the RedBlackBuffer format, which involves - * splitting the input buffer into two and adding repeated borders. Assumes that enough - * memory in dst is already allocated. - */ -void VariationalRefinementImpl::splitCheckerboard(RedBlackBuffer &dst, Mat &src) -{ - int buf_j, j; - int buf_w = (int)ceil(src.cols / 2.0) + 2; //!< max width of red/black buffers with borders - - /* Rows of red and black buffers can have different actual width, some extra repeated values are - * added for padding in such cases. - */ - for (int i = 0; i < src.rows; i++) - { - float *src_buf = src.ptr(i); - float *r_buf = dst.red.ptr(i + 1); - float *b_buf = dst.black.ptr(i + 1); - r_buf[0] = b_buf[0] = src_buf[0]; - buf_j = 1; - if (i % 2 == 0) - { - for (j = 0; j < src.cols - 1; j += 2) - { - r_buf[buf_j] = src_buf[j]; - b_buf[buf_j] = src_buf[j + 1]; - buf_j++; - } - if (j < src.cols) - r_buf[buf_j] = b_buf[buf_j] = src_buf[j]; - else - j--; - } - else - { - for (j = 0; j < src.cols - 1; j += 2) - { - b_buf[buf_j] = src_buf[j]; - r_buf[buf_j] = src_buf[j + 1]; - buf_j++; - } - if (j < src.cols) - r_buf[buf_j] = b_buf[buf_j] = src_buf[j]; - else - j--; - } - r_buf[buf_w - 1] = b_buf[buf_w - 1] = src_buf[j]; - } - - /* Fill top and bottom borders: */ - { - float *r_buf_border = dst.red.ptr(dst.red.rows - 1); - float *b_buf_border = dst.black.ptr(dst.black.rows - 1); - float *r_buf = dst.red.ptr(dst.red.rows - 2); - float *b_buf = dst.black.ptr(dst.black.rows - 2); - memcpy(r_buf_border, b_buf, buf_w * sizeof(float)); - memcpy(b_buf_border, r_buf, buf_w * sizeof(float)); - } - { - float *r_buf_border = dst.red.ptr(0); - float *b_buf_border = dst.black.ptr(0); - float *r_buf = dst.red.ptr(1); - float *b_buf = dst.black.ptr(1); - memcpy(r_buf_border, b_buf, buf_w * sizeof(float)); - memcpy(b_buf_border, r_buf, buf_w * sizeof(float)); - } -} - -/* The inverse of splitCheckerboard, i.e. converting the RedBlackBuffer back into Mat. - * Assumes that enough memory in dst is already allocated. - */ -void VariationalRefinementImpl::mergeCheckerboard(Mat &dst, RedBlackBuffer &src) -{ - int buf_j, j; - for (int i = 0; i < dst.rows; i++) - { - float *src_r_buf = src.red.ptr(i + 1); - float *src_b_buf = src.black.ptr(i + 1); - float *dst_buf = dst.ptr(i); - buf_j = 1; - - if (i % 2 == 0) - { - for (j = 0; j < dst.cols - 1; j += 2) - { - dst_buf[j] = src_r_buf[buf_j]; - dst_buf[j + 1] = src_b_buf[buf_j]; - buf_j++; - } - if (j < dst.cols) - dst_buf[j] = src_r_buf[buf_j]; - } - else - { - for (j = 0; j < dst.cols - 1; j += 2) - { - dst_buf[j] = src_b_buf[buf_j]; - dst_buf[j + 1] = src_r_buf[buf_j]; - buf_j++; - } - if (j < dst.cols) - dst_buf[j] = src_b_buf[buf_j]; - } - } -} - -/* An auxiliary function that updates the borders. Used to enforce that border values repeat - * the ones adjacent to the border. - */ -void VariationalRefinementImpl::updateRepeatedBorders(RedBlackBuffer &dst) -{ - int buf_w = dst.red.cols; - for (int i = 0; i < dst.red.rows - 2; i++) - { - float *r_buf = dst.red.ptr(i + 1); - float *b_buf = dst.black.ptr(i + 1); - - if (i % 2 == 0) - { - b_buf[0] = r_buf[1]; - if (dst.red_even_len > dst.black_even_len) - b_buf[dst.black_even_len + 1] = r_buf[dst.red_even_len]; - else - r_buf[dst.red_even_len + 1] = b_buf[dst.black_even_len]; - } - else - { - r_buf[0] = b_buf[1]; - if (dst.red_odd_len < dst.black_odd_len) - r_buf[dst.red_odd_len + 1] = b_buf[dst.black_odd_len]; - else - b_buf[dst.black_odd_len + 1] = r_buf[dst.red_odd_len]; - } - } - { - float *r_buf_border = dst.red.ptr(dst.red.rows - 1); - float *b_buf_border = dst.black.ptr(dst.black.rows - 1); - float *r_buf = dst.red.ptr(dst.red.rows - 2); - float *b_buf = dst.black.ptr(dst.black.rows - 2); - memcpy(r_buf_border, b_buf, buf_w * sizeof(float)); - memcpy(b_buf_border, r_buf, buf_w * sizeof(float)); - } - { - float *r_buf_border = dst.red.ptr(0); - float *b_buf_border = dst.black.ptr(0); - float *r_buf = dst.red.ptr(1); - float *b_buf = dst.black.ptr(1); - memcpy(r_buf_border, b_buf, buf_w * sizeof(float)); - memcpy(b_buf_border, r_buf, buf_w * sizeof(float)); - } -} - -void VariationalRefinementImpl::RedBlackBuffer::create(Size s) -{ - /* Allocate enough memory to include borders */ - int w = (int)ceil(s.width / 2.0) + 2; - red.create(s.height + 2, w); - black.create(s.height + 2, w); - - if (s.width % 2 == 0) - red_even_len = red_odd_len = black_even_len = black_odd_len = w - 2; - else - { - red_even_len = black_odd_len = w - 2; - red_odd_len = black_even_len = w - 3; - } -} - -void VariationalRefinementImpl::RedBlackBuffer::release() -{ - red.release(); - black.release(); -} - -VariationalRefinementImpl::ParallelOp_ParBody::ParallelOp_ParBody(VariationalRefinementImpl &_var, vector _ops, - vector &_op1s, vector &_op2s, - vector &_op3s) - : var(&_var), ops(_ops), op1s(_op1s), op2s(_op2s), op3s(_op3s) -{ -} - -void VariationalRefinementImpl::ParallelOp_ParBody::operator()(const Range &range) const -{ - for (int i = range.start; i < range.end; i++) - (var->*ops[i])(op1s[i], op2s[i], op3s[i]); -} - -void VariationalRefinementImpl::warpImage(Mat &dst, Mat &src, Mat &flow_u, Mat &flow_v) -{ - for (int i = 0; i < flow_u.rows; i++) - { - float *pFlowU = flow_u.ptr(i); - float *pFlowV = flow_v.ptr(i); - float *pMapX = mapX.ptr(i); - float *pMapY = mapY.ptr(i); - for (int j = 0; j < flow_u.cols; j++) - { - pMapX[j] = j + pFlowU[j]; - pMapY[j] = i + pFlowV[j]; - } - } - remap(src, dst, mapX, mapY, INTER_LINEAR, BORDER_REPLICATE); -} - -void VariationalRefinementImpl::prepareBuffers(Mat &I0, Mat &I1, Mat &W_u, Mat &W_v) -{ - Size s = I0.size(); - A11.create(s); - A12.create(s); - A22.create(s); - b1.create(s); - b2.create(s); - weights.create(s); - weights.red.setTo(0.0f); - weights.black.setTo(0.0f); - tempW_u.create(s); - tempW_v.create(s); - dW_u.create(s); - dW_v.create(s); - W_u_rb.create(s); - W_v_rb.create(s); - - Ix.create(s); - Iy.create(s); - Iz.create(s); - Ixx.create(s); - Ixy.create(s); - Iyy.create(s); - Ixz.create(s); - Iyz.create(s); - - Ix_rb.create(s); - Iy_rb.create(s); - Iz_rb.create(s); - Ixx_rb.create(s); - Ixy_rb.create(s); - Iyy_rb.create(s); - Ixz_rb.create(s); - Iyz_rb.create(s); - - mapX.create(s); - mapY.create(s); - - /* Floating point warps work significantly better than fixed-point */ - Mat I1flt, warpedI; - I1.convertTo(I1flt, CV_32F); - warpImage(warpedI, I1flt, W_u, W_v); - - /* Computing an average of the current and warped next frames (to compute the derivatives on) and - * temporal derivative Iz - */ - Mat averagedI; - { - vector op1s; - op1s.push_back((void *)&I0); - op1s.push_back((void *)&warpedI); - vector op2s; - op2s.push_back((void *)&warpedI); - op2s.push_back((void *)&I0); - vector op3s; - op3s.push_back((void *)&averagedI); - op3s.push_back((void *)&Iz); - vector ops; - ops.push_back(&VariationalRefinementImpl::averageOp); - ops.push_back(&VariationalRefinementImpl::subtractOp); - parallel_for_(Range(0, 2), ParallelOp_ParBody(*this, ops, op1s, op2s, op3s)); - } - splitCheckerboard(Iz_rb, Iz); - - /* Computing first-order derivatives */ - { - vector op1s; - op1s.push_back((void *)&averagedI); - op1s.push_back((void *)&averagedI); - op1s.push_back((void *)&Iz); - op1s.push_back((void *)&Iz); - vector op2s; - op2s.push_back((void *)&Ix); - op2s.push_back((void *)&Iy); - op2s.push_back((void *)&Ixz); - op2s.push_back((void *)&Iyz); - vector op3s; - op3s.push_back((void *)&Ix_rb); - op3s.push_back((void *)&Iy_rb); - op3s.push_back((void *)&Ixz_rb); - op3s.push_back((void *)&Iyz_rb); - vector ops; - ops.push_back(&VariationalRefinementImpl::gradHorizAndSplitOp); - ops.push_back(&VariationalRefinementImpl::gradVertAndSplitOp); - ops.push_back(&VariationalRefinementImpl::gradHorizAndSplitOp); - ops.push_back(&VariationalRefinementImpl::gradVertAndSplitOp); - parallel_for_(Range(0, 4), ParallelOp_ParBody(*this, ops, op1s, op2s, op3s)); - } - - /* Computing second-order derivatives */ - { - vector op1s; - op1s.push_back((void *)&Ix); - op1s.push_back((void *)&Ix); - op1s.push_back((void *)&Iy); - vector op2s; - op2s.push_back((void *)&Ixx); - op2s.push_back((void *)&Ixy); - op2s.push_back((void *)&Iyy); - vector op3s; - op3s.push_back((void *)&Ixx_rb); - op3s.push_back((void *)&Ixy_rb); - op3s.push_back((void *)&Iyy_rb); - vector ops; - ops.push_back(&VariationalRefinementImpl::gradHorizAndSplitOp); - ops.push_back(&VariationalRefinementImpl::gradVertAndSplitOp); - ops.push_back(&VariationalRefinementImpl::gradVertAndSplitOp); - parallel_for_(Range(0, 3), ParallelOp_ParBody(*this, ops, op1s, op2s, op3s)); - } -} - -VariationalRefinementImpl::ComputeDataTerm_ParBody::ComputeDataTerm_ParBody(VariationalRefinementImpl &_var, - int _nstripes, int _h, - RedBlackBuffer &_dW_u, - RedBlackBuffer &_dW_v, bool _red_pass) - : var(&_var), nstripes(_nstripes), h(_h), dW_u(&_dW_u), dW_v(&_dW_v), red_pass(_red_pass) -{ - stripe_sz = (int)ceil(h / (double)nstripes); -} - -/* This function computes parts of the main linear system coefficients A11,A12,A22,b1,b1 - * that correspond to the data term, which includes color and gradient constancy assumptions. - */ -void VariationalRefinementImpl::ComputeDataTerm_ParBody::operator()(const Range &range) const -{ - int start_i = min(range.start * stripe_sz, h); - int end_i = min(range.end * stripe_sz, h); - - float zeta_squared = var->zeta * var->zeta; - float epsilon_squared = var->epsilon * var->epsilon; - float gamma2 = var->gamma / 2; - float delta2 = var->delta / 2; - - float *pIx, *pIy, *pIz; - float *pIxx, *pIxy, *pIyy, *pIxz, *pIyz; - float *pdU, *pdV; - float *pa11, *pa12, *pa22, *pb1, *pb2; - - float derivNorm, derivNorm2; - float Ik1z, Ik1zx, Ik1zy; - float weight; - int len; - for (int i = start_i; i < end_i; i++) - { -#define INIT_ROW_POINTERS(color) \ - pIx = var->Ix_rb.color.ptr(i + 1) + 1; \ - pIy = var->Iy_rb.color.ptr(i + 1) + 1; \ - pIz = var->Iz_rb.color.ptr(i + 1) + 1; \ - pIxx = var->Ixx_rb.color.ptr(i + 1) + 1; \ - pIxy = var->Ixy_rb.color.ptr(i + 1) + 1; \ - pIyy = var->Iyy_rb.color.ptr(i + 1) + 1; \ - pIxz = var->Ixz_rb.color.ptr(i + 1) + 1; \ - pIyz = var->Iyz_rb.color.ptr(i + 1) + 1; \ - pa11 = var->A11.color.ptr(i + 1) + 1; \ - pa12 = var->A12.color.ptr(i + 1) + 1; \ - pa22 = var->A22.color.ptr(i + 1) + 1; \ - pb1 = var->b1.color.ptr(i + 1) + 1; \ - pb2 = var->b2.color.ptr(i + 1) + 1; \ - pdU = dW_u->color.ptr(i + 1) + 1; \ - pdV = dW_v->color.ptr(i + 1) + 1; \ - if (i % 2 == 0) \ - len = var->Ix_rb.color##_even_len; \ - else \ - len = var->Ix_rb.color##_odd_len; - - if (red_pass) - { - INIT_ROW_POINTERS(red); - } - else - { - INIT_ROW_POINTERS(black); - } -#undef INIT_ROW_POINTERS - - int j = 0; -#ifdef CV_SIMD128 - v_float32x4 zeta_vec = v_setall_f32(zeta_squared); - v_float32x4 eps_vec = v_setall_f32(epsilon_squared); - v_float32x4 delta_vec = v_setall_f32(delta2); - v_float32x4 gamma_vec = v_setall_f32(gamma2); - v_float32x4 zero_vec = v_setall_f32(0.0f); - v_float32x4 pIx_vec, pIy_vec, pIz_vec, pdU_vec, pdV_vec; - v_float32x4 pIxx_vec, pIxy_vec, pIyy_vec, pIxz_vec, pIyz_vec; - v_float32x4 derivNorm_vec, derivNorm2_vec, weight_vec; - v_float32x4 Ik1z_vec, Ik1zx_vec, Ik1zy_vec; - v_float32x4 pa11_vec, pa12_vec, pa22_vec, pb1_vec, pb2_vec; - - for (; j < len - 3; j += 4) - { - pIx_vec = v_load(pIx + j); - pIy_vec = v_load(pIy + j); - pIz_vec = v_load(pIz + j); - pdU_vec = v_load(pdU + j); - pdV_vec = v_load(pdV + j); - - derivNorm_vec = pIx_vec * pIx_vec + pIy_vec * pIy_vec + zeta_vec; - Ik1z_vec = pIz_vec + pIx_vec * pdU_vec + pIy_vec * pdV_vec; - weight_vec = (delta_vec / v_sqrt(Ik1z_vec * Ik1z_vec / derivNorm_vec + eps_vec)) / derivNorm_vec; - - pa11_vec = weight_vec * (pIx_vec * pIx_vec) + zeta_vec; - pa12_vec = weight_vec * (pIx_vec * pIy_vec); - pa22_vec = weight_vec * (pIy_vec * pIy_vec) + zeta_vec; - pb1_vec = zero_vec - weight_vec * (pIz_vec * pIx_vec); - pb2_vec = zero_vec - weight_vec * (pIz_vec * pIy_vec); - - pIxx_vec = v_load(pIxx + j); - pIxy_vec = v_load(pIxy + j); - pIyy_vec = v_load(pIyy + j); - pIxz_vec = v_load(pIxz + j); - pIyz_vec = v_load(pIyz + j); - - derivNorm_vec = pIxx_vec * pIxx_vec + pIxy_vec * pIxy_vec + zeta_vec; - derivNorm2_vec = pIyy_vec * pIyy_vec + pIxy_vec * pIxy_vec + zeta_vec; - Ik1zx_vec = pIxz_vec + pIxx_vec * pdU_vec + pIxy_vec * pdV_vec; - Ik1zy_vec = pIyz_vec + pIxy_vec * pdU_vec + pIyy_vec * pdV_vec; - weight_vec = gamma_vec / v_sqrt(Ik1zx_vec * Ik1zx_vec / derivNorm_vec + - Ik1zy_vec * Ik1zy_vec / derivNorm2_vec + eps_vec); - - pa11_vec += weight_vec * (pIxx_vec * pIxx_vec / derivNorm_vec + pIxy_vec * pIxy_vec / derivNorm2_vec); - pa12_vec += weight_vec * (pIxx_vec * pIxy_vec / derivNorm_vec + pIxy_vec * pIyy_vec / derivNorm2_vec); - pa22_vec += weight_vec * (pIxy_vec * pIxy_vec / derivNorm_vec + pIyy_vec * pIyy_vec / derivNorm2_vec); - pb1_vec -= weight_vec * (pIxx_vec * pIxz_vec / derivNorm_vec + pIxy_vec * pIyz_vec / derivNorm2_vec); - pb2_vec -= weight_vec * (pIxy_vec * pIxz_vec / derivNorm_vec + pIyy_vec * pIyz_vec / derivNorm2_vec); - - v_store(pa11 + j, pa11_vec); - v_store(pa12 + j, pa12_vec); - v_store(pa22 + j, pa22_vec); - v_store(pb1 + j, pb1_vec); - v_store(pb2 + j, pb2_vec); - } -#endif - for (; j < len; j++) - { - /* Step 1: Compute color constancy terms */ - /* Normalization factor:*/ - derivNorm = pIx[j] * pIx[j] + pIy[j] * pIy[j] + zeta_squared; - /* Color constancy penalty (computed by Taylor expansion):*/ - Ik1z = pIz[j] + pIx[j] * pdU[j] + pIy[j] * pdV[j]; - /* Weight of the color constancy term in the current fixed-point iteration divided by derivNorm: */ - weight = (delta2 / sqrt(Ik1z * Ik1z / derivNorm + epsilon_squared)) / derivNorm; - /* Add respective color constancy terms to the linear system coefficients: */ - pa11[j] = weight * (pIx[j] * pIx[j]) + zeta_squared; - pa12[j] = weight * (pIx[j] * pIy[j]); - pa22[j] = weight * (pIy[j] * pIy[j]) + zeta_squared; - pb1[j] = -weight * (pIz[j] * pIx[j]); - pb2[j] = -weight * (pIz[j] * pIy[j]); - - /* Step 2: Compute gradient constancy terms */ - /* Normalization factor for x gradient: */ - derivNorm = pIxx[j] * pIxx[j] + pIxy[j] * pIxy[j] + zeta_squared; - /* Normalization factor for y gradient: */ - derivNorm2 = pIyy[j] * pIyy[j] + pIxy[j] * pIxy[j] + zeta_squared; - /* Gradient constancy penalties (computed by Taylor expansion): */ - Ik1zx = pIxz[j] + pIxx[j] * pdU[j] + pIxy[j] * pdV[j]; - Ik1zy = pIyz[j] + pIxy[j] * pdU[j] + pIyy[j] * pdV[j]; - /* Weight of the gradient constancy term in the current fixed-point iteration: */ - weight = gamma2 / sqrt(Ik1zx * Ik1zx / derivNorm + Ik1zy * Ik1zy / derivNorm2 + epsilon_squared); - /* Add respective gradient constancy components to the linear system coefficients: */ - pa11[j] += weight * (pIxx[j] * pIxx[j] / derivNorm + pIxy[j] * pIxy[j] / derivNorm2); - pa12[j] += weight * (pIxx[j] * pIxy[j] / derivNorm + pIxy[j] * pIyy[j] / derivNorm2); - pa22[j] += weight * (pIxy[j] * pIxy[j] / derivNorm + pIyy[j] * pIyy[j] / derivNorm2); - pb1[j] += -weight * (pIxx[j] * pIxz[j] / derivNorm + pIxy[j] * pIyz[j] / derivNorm2); - pb2[j] += -weight * (pIxy[j] * pIxz[j] / derivNorm + pIyy[j] * pIyz[j] / derivNorm2); - } - } -} - -VariationalRefinementImpl::ComputeSmoothnessTermHorPass_ParBody::ComputeSmoothnessTermHorPass_ParBody( - VariationalRefinementImpl &_var, int _nstripes, int _h, RedBlackBuffer &_W_u, RedBlackBuffer &_W_v, - RedBlackBuffer &_tempW_u, RedBlackBuffer &_tempW_v, bool _red_pass) - : var(&_var), nstripes(_nstripes), h(_h), W_u(&_W_u), W_v(&_W_v), curW_u(&_tempW_u), curW_v(&_tempW_v), - red_pass(_red_pass) -{ - stripe_sz = (int)ceil(h / (double)nstripes); -} - -/* This function updates the linear system coefficients A11,A22,b1,b1 according to the - * flow smoothness term and computes corresponding weights for the current fixed point iteration. - * A11,A22,b1,b1 are updated only partially (horizontal pass). Doing both horizontal and vertical - * passes in one loop complicates parallelization (different threads write to the same elements). - */ -void VariationalRefinementImpl::ComputeSmoothnessTermHorPass_ParBody::operator()(const Range &range) const -{ - int start_i = min(range.start * stripe_sz, h); - int end_i = min(range.end * stripe_sz, h); - - float epsilon_squared = var->epsilon * var->epsilon; - float alpha2 = var->alpha / 2; - float *pWeight; - float *pA_u, *pA_u_next, *pA_v, *pA_v_next; - float *pB_u, *pB_u_next, *pB_v, *pB_v_next; - float *cW_u, *cW_u_next, *cW_u_next_row; - float *cW_v, *cW_v_next, *cW_v_next_row; - float *pW_u, *pW_u_next; - float *pW_v, *pW_v_next; - float ux, uy, vx, vy; - int len; - bool touches_right_border = true; - -#define INIT_ROW_POINTERS(cur_color, next_color, next_offs_even, next_offs_odd, bool_default) \ - pWeight = var->weights.cur_color.ptr(i + 1) + 1; \ - pA_u = var->A11.cur_color.ptr(i + 1) + 1; \ - pB_u = var->b1.cur_color.ptr(i + 1) + 1; \ - cW_u = curW_u->cur_color.ptr(i + 1) + 1; \ - pW_u = W_u->cur_color.ptr(i + 1) + 1; \ - pA_v = var->A22.cur_color.ptr(i + 1) + 1; \ - pB_v = var->b2.cur_color.ptr(i + 1) + 1; \ - cW_v = curW_v->cur_color.ptr(i + 1) + 1; \ - pW_v = W_v->cur_color.ptr(i + 1) + 1; \ - \ - cW_u_next_row = curW_u->next_color.ptr(i + 2) + 1; \ - cW_v_next_row = curW_v->next_color.ptr(i + 2) + 1; \ - \ - if (i % 2 == 0) \ - { \ - pA_u_next = var->A11.next_color.ptr(i + 1) + next_offs_even; \ - pB_u_next = var->b1.next_color.ptr(i + 1) + next_offs_even; \ - cW_u_next = curW_u->next_color.ptr(i + 1) + next_offs_even; \ - pW_u_next = W_u->next_color.ptr(i + 1) + next_offs_even; \ - pA_v_next = var->A22.next_color.ptr(i + 1) + next_offs_even; \ - pB_v_next = var->b2.next_color.ptr(i + 1) + next_offs_even; \ - cW_v_next = curW_v->next_color.ptr(i + 1) + next_offs_even; \ - pW_v_next = W_v->next_color.ptr(i + 1) + next_offs_even; \ - len = var->A11.cur_color##_even_len; \ - if (var->A11.cur_color##_even_len != var->A11.cur_color##_odd_len) \ - touches_right_border = bool_default; \ - else \ - touches_right_border = !bool_default; \ - } \ - else \ - { \ - pA_u_next = var->A11.next_color.ptr(i + 1) + next_offs_odd; \ - pB_u_next = var->b1.next_color.ptr(i + 1) + next_offs_odd; \ - cW_u_next = curW_u->next_color.ptr(i + 1) + next_offs_odd; \ - pW_u_next = W_u->next_color.ptr(i + 1) + next_offs_odd; \ - pA_v_next = var->A22.next_color.ptr(i + 1) + next_offs_odd; \ - pB_v_next = var->b2.next_color.ptr(i + 1) + next_offs_odd; \ - cW_v_next = curW_v->next_color.ptr(i + 1) + next_offs_odd; \ - pW_v_next = W_v->next_color.ptr(i + 1) + next_offs_odd; \ - len = var->A11.cur_color##_odd_len; \ - if (var->A11.cur_color##_even_len != var->A11.cur_color##_odd_len) \ - touches_right_border = !bool_default; \ - else \ - touches_right_border = bool_default; \ - } - - for (int i = start_i; i < end_i; i++) - { - if (red_pass) - { - INIT_ROW_POINTERS(red, black, 1, 2, true); - } - else - { - INIT_ROW_POINTERS(black, red, 2, 1, false); - } -#undef INIT_ROW_POINTERS - -#define COMPUTE \ - /* Gradients for the flow on the current fixed-point iteration: */ \ - ux = cW_u_next[j] - cW_u[j]; \ - vx = cW_v_next[j] - cW_v[j]; \ - uy = cW_u_next_row[j] - cW_u[j]; \ - vy = cW_v_next_row[j] - cW_v[j]; \ - /* Weight of the smoothness term in the current fixed-point iteration: */ \ - pWeight[j] = alpha2 / sqrt(ux * ux + vx * vx + uy * uy + vy * vy + epsilon_squared); \ - /* Gradients for initial raw flow multiplied by weight:*/ \ - ux = pWeight[j] * (pW_u_next[j] - pW_u[j]); \ - vx = pWeight[j] * (pW_v_next[j] - pW_v[j]); - -#define UPDATE \ - pB_u[j] += ux; \ - pA_u[j] += pWeight[j]; \ - pB_v[j] += vx; \ - pA_v[j] += pWeight[j]; \ - pB_u_next[j] -= ux; \ - pA_u_next[j] += pWeight[j]; \ - pB_v_next[j] -= vx; \ - pA_v_next[j] += pWeight[j]; - - int j = 0; -#ifdef CV_SIMD128 - v_float32x4 alpha2_vec = v_setall_f32(alpha2); - v_float32x4 eps_vec = v_setall_f32(epsilon_squared); - v_float32x4 cW_u_vec, cW_v_vec; - v_float32x4 pWeight_vec, ux_vec, vx_vec, uy_vec, vy_vec; - - for (; j < len - 4; j += 4) - { - cW_u_vec = v_load(cW_u + j); - cW_v_vec = v_load(cW_v + j); - - ux_vec = v_load(cW_u_next + j) - cW_u_vec; - vx_vec = v_load(cW_v_next + j) - cW_v_vec; - uy_vec = v_load(cW_u_next_row + j) - cW_u_vec; - vy_vec = v_load(cW_v_next_row + j) - cW_v_vec; - pWeight_vec = - alpha2_vec / v_sqrt(ux_vec * ux_vec + vx_vec * vx_vec + uy_vec * uy_vec + vy_vec * vy_vec + eps_vec); - v_store(pWeight + j, pWeight_vec); - - ux_vec = pWeight_vec * (v_load(pW_u_next + j) - v_load(pW_u + j)); - vx_vec = pWeight_vec * (v_load(pW_v_next + j) - v_load(pW_v + j)); - - v_store(pA_u + j, v_load(pA_u + j) + pWeight_vec); - v_store(pA_v + j, v_load(pA_v + j) + pWeight_vec); - v_store(pB_u + j, v_load(pB_u + j) + ux_vec); - v_store(pB_v + j, v_load(pB_v + j) + vx_vec); - - v_store(pA_u_next + j, v_load(pA_u_next + j) + pWeight_vec); - v_store(pA_v_next + j, v_load(pA_v_next + j) + pWeight_vec); - v_store(pB_u_next + j, v_load(pB_u_next + j) - ux_vec); - v_store(pB_v_next + j, v_load(pB_v_next + j) - vx_vec); - } -#endif - for (; j < len - 1; j++) - { - COMPUTE; - UPDATE; - } - - /* Omit the update on the rightmost elements */ - if (touches_right_border) - { - COMPUTE; - } - else - { - COMPUTE; - UPDATE; - } - } -#undef COMPUTE -#undef UPDATE -} - -VariationalRefinementImpl::ComputeSmoothnessTermVertPass_ParBody::ComputeSmoothnessTermVertPass_ParBody( - VariationalRefinementImpl &_var, int _nstripes, int _h, RedBlackBuffer &_W_u, RedBlackBuffer &_W_v, bool _red_pass) - : var(&_var), nstripes(_nstripes), W_u(&_W_u), W_v(&_W_v), red_pass(_red_pass) -{ - /* Omit the last row in the vertical pass */ - h = _h - 1; - stripe_sz = (int)ceil(h / (double)nstripes); -} - -/* This function adds the last remaining terms to the linear system coefficients A11,A22,b1,b1. */ -void VariationalRefinementImpl::ComputeSmoothnessTermVertPass_ParBody::operator()(const Range &range) const -{ - int start_i = min(range.start * stripe_sz, h); - int end_i = min(range.end * stripe_sz, h); - - float *pWeight; - float *pA_u, *pA_u_next_row, *pA_v, *pA_v_next_row; - float *pB_u, *pB_u_next_row, *pB_v, *pB_v_next_row; - float *pW_u, *pW_u_next_row, *pW_v, *pW_v_next_row; - float vy, uy; - int len; - - for (int i = start_i; i < end_i; i++) - { -#define INIT_ROW_POINTERS(cur_color, next_color) \ - pWeight = var->weights.cur_color.ptr(i + 1) + 1; \ - pA_u = var->A11.cur_color.ptr(i + 1) + 1; \ - pB_u = var->b1.cur_color.ptr(i + 1) + 1; \ - pW_u = W_u->cur_color.ptr(i + 1) + 1; \ - pA_v = var->A22.cur_color.ptr(i + 1) + 1; \ - pB_v = var->b2.cur_color.ptr(i + 1) + 1; \ - pW_v = W_v->cur_color.ptr(i + 1) + 1; \ - \ - pA_u_next_row = var->A11.next_color.ptr(i + 2) + 1; \ - pB_u_next_row = var->b1.next_color.ptr(i + 2) + 1; \ - pW_u_next_row = W_u->next_color.ptr(i + 2) + 1; \ - pA_v_next_row = var->A22.next_color.ptr(i + 2) + 1; \ - pB_v_next_row = var->b2.next_color.ptr(i + 2) + 1; \ - pW_v_next_row = W_v->next_color.ptr(i + 2) + 1; \ - \ - if (i % 2 == 0) \ - len = var->A11.cur_color##_even_len; \ - else \ - len = var->A11.cur_color##_odd_len; - - if (red_pass) - { - INIT_ROW_POINTERS(red, black); - } - else - { - INIT_ROW_POINTERS(black, red); - } -#undef INIT_ROW_POINTERS - - int j = 0; -#ifdef CV_SIMD128 - v_float32x4 pWeight_vec, uy_vec, vy_vec; - for (; j < len - 3; j += 4) - { - pWeight_vec = v_load(pWeight + j); - uy_vec = pWeight_vec * (v_load(pW_u_next_row + j) - v_load(pW_u + j)); - vy_vec = pWeight_vec * (v_load(pW_v_next_row + j) - v_load(pW_v + j)); - - v_store(pA_u + j, v_load(pA_u + j) + pWeight_vec); - v_store(pA_v + j, v_load(pA_v + j) + pWeight_vec); - v_store(pB_u + j, v_load(pB_u + j) + uy_vec); - v_store(pB_v + j, v_load(pB_v + j) + vy_vec); - - v_store(pA_u_next_row + j, v_load(pA_u_next_row + j) + pWeight_vec); - v_store(pA_v_next_row + j, v_load(pA_v_next_row + j) + pWeight_vec); - v_store(pB_u_next_row + j, v_load(pB_u_next_row + j) - uy_vec); - v_store(pB_v_next_row + j, v_load(pB_v_next_row + j) - vy_vec); - } -#endif - for (; j < len; j++) - { - uy = pWeight[j] * (pW_u_next_row[j] - pW_u[j]); - vy = pWeight[j] * (pW_v_next_row[j] - pW_v[j]); - pB_u[j] += uy; - pA_u[j] += pWeight[j]; - pB_v[j] += vy; - pA_v[j] += pWeight[j]; - pB_u_next_row[j] -= uy; - pA_u_next_row[j] += pWeight[j]; - pB_v_next_row[j] -= vy; - pA_v_next_row[j] += pWeight[j]; - } - } -} - -VariationalRefinementImpl::RedBlackSOR_ParBody::RedBlackSOR_ParBody(VariationalRefinementImpl &_var, int _nstripes, - int _h, RedBlackBuffer &_dW_u, - RedBlackBuffer &_dW_v, bool _red_pass) - : var(&_var), nstripes(_nstripes), h(_h), dW_u(&_dW_u), dW_v(&_dW_v), red_pass(_red_pass) -{ - stripe_sz = (int)ceil(h / (double)nstripes); -} - -/* This function implements the Red-Black SOR (successive-over relaxation) method for solving the main - * linear system in the current fixed-point iteration. - */ -void VariationalRefinementImpl::RedBlackSOR_ParBody::operator()(const Range &range) const -{ - int start = min(range.start * stripe_sz, h); - int end = min(range.end * stripe_sz, h); - - float *pa11, *pa12, *pa22, *pb1, *pb2, *pW, *pdu, *pdv; - float *pW_next, *pdu_next, *pdv_next; - float *pW_prev_row, *pdu_prev_row, *pdv_prev_row; - float *pdu_next_row, *pdv_next_row; - - float sigmaU, sigmaV; - int j, len; - for (int i = start; i < end; i++) - { -#define INIT_ROW_POINTERS(cur_color, next_color, next_offs_even, next_offs_odd) \ - pW = var->weights.cur_color.ptr(i + 1) + 1; \ - pa11 = var->A11.cur_color.ptr(i + 1) + 1; \ - pa12 = var->A12.cur_color.ptr(i + 1) + 1; \ - pa22 = var->A22.cur_color.ptr(i + 1) + 1; \ - pb1 = var->b1.cur_color.ptr(i + 1) + 1; \ - pb2 = var->b2.cur_color.ptr(i + 1) + 1; \ - pdu = dW_u->cur_color.ptr(i + 1) + 1; \ - pdv = dW_v->cur_color.ptr(i + 1) + 1; \ - \ - pdu_next_row = dW_u->next_color.ptr(i + 2) + 1; \ - pdv_next_row = dW_v->next_color.ptr(i + 2) + 1; \ - \ - pW_prev_row = var->weights.next_color.ptr(i) + 1; \ - pdu_prev_row = dW_u->next_color.ptr(i) + 1; \ - pdv_prev_row = dW_v->next_color.ptr(i) + 1; \ - \ - if (i % 2 == 0) \ - { \ - pW_next = var->weights.next_color.ptr(i + 1) + next_offs_even; \ - pdu_next = dW_u->next_color.ptr(i + 1) + next_offs_even; \ - pdv_next = dW_v->next_color.ptr(i + 1) + next_offs_even; \ - len = var->A11.cur_color##_even_len; \ - } \ - else \ - { \ - pW_next = var->weights.next_color.ptr(i + 1) + next_offs_odd; \ - pdu_next = dW_u->next_color.ptr(i + 1) + next_offs_odd; \ - pdv_next = dW_v->next_color.ptr(i + 1) + next_offs_odd; \ - len = var->A11.cur_color##_odd_len; \ - } - if (red_pass) - { - INIT_ROW_POINTERS(red, black, 1, 2); - } - else - { - INIT_ROW_POINTERS(black, red, 2, 1); - } -#undef INIT_ROW_POINTERS - - j = 0; -#ifdef CV_SIMD128 - v_float32x4 pW_prev_vec = v_setall_f32(pW_next[-1]); - v_float32x4 pdu_prev_vec = v_setall_f32(pdu_next[-1]); - v_float32x4 pdv_prev_vec = v_setall_f32(pdv_next[-1]); - v_float32x4 omega_vec = v_setall_f32(var->omega); - v_float32x4 pW_vec, pW_next_vec, pW_prev_row_vec; - v_float32x4 pdu_next_vec, pdu_prev_row_vec, pdu_next_row_vec; - v_float32x4 pdv_next_vec, pdv_prev_row_vec, pdv_next_row_vec; - v_float32x4 pW_shifted_vec, pdu_shifted_vec, pdv_shifted_vec; - v_float32x4 pa12_vec, sigmaU_vec, sigmaV_vec, pdu_vec, pdv_vec; - for (; j < len - 3; j += 4) - { - pW_vec = v_load(pW + j); - pW_next_vec = v_load(pW_next + j); - pW_prev_row_vec = v_load(pW_prev_row + j); - pdu_next_vec = v_load(pdu_next + j); - pdu_prev_row_vec = v_load(pdu_prev_row + j); - pdu_next_row_vec = v_load(pdu_next_row + j); - pdv_next_vec = v_load(pdv_next + j); - pdv_prev_row_vec = v_load(pdv_prev_row + j); - pdv_next_row_vec = v_load(pdv_next_row + j); - pa12_vec = v_load(pa12 + j); - pW_shifted_vec = v_reinterpret_as_f32( - v_extract<3>(v_reinterpret_as_s32(pW_prev_vec), v_reinterpret_as_s32(pW_next_vec))); - pdu_shifted_vec = v_reinterpret_as_f32( - v_extract<3>(v_reinterpret_as_s32(pdu_prev_vec), v_reinterpret_as_s32(pdu_next_vec))); - pdv_shifted_vec = v_reinterpret_as_f32( - v_extract<3>(v_reinterpret_as_s32(pdv_prev_vec), v_reinterpret_as_s32(pdv_next_vec))); - - sigmaU_vec = pW_shifted_vec * pdu_shifted_vec + pW_vec * pdu_next_vec + pW_prev_row_vec * pdu_prev_row_vec + - pW_vec * pdu_next_row_vec; - sigmaV_vec = pW_shifted_vec * pdv_shifted_vec + pW_vec * pdv_next_vec + pW_prev_row_vec * pdv_prev_row_vec + - pW_vec * pdv_next_row_vec; - - pdu_vec = v_load(pdu + j); - pdv_vec = v_load(pdv + j); - pdu_vec += omega_vec * ((sigmaU_vec + v_load(pb1 + j) - pdv_vec * pa12_vec) / v_load(pa11 + j) - pdu_vec); - pdv_vec += omega_vec * ((sigmaV_vec + v_load(pb2 + j) - pdu_vec * pa12_vec) / v_load(pa22 + j) - pdv_vec); - v_store(pdu + j, pdu_vec); - v_store(pdv + j, pdv_vec); - - pW_prev_vec = pW_next_vec; - pdu_prev_vec = pdu_next_vec; - pdv_prev_vec = pdv_next_vec; - } -#endif - for (; j < len; j++) - { - sigmaU = pW_next[j - 1] * pdu_next[j - 1] + pW[j] * pdu_next[j] + pW_prev_row[j] * pdu_prev_row[j] + - pW[j] * pdu_next_row[j]; - sigmaV = pW_next[j - 1] * pdv_next[j - 1] + pW[j] * pdv_next[j] + pW_prev_row[j] * pdv_prev_row[j] + - pW[j] * pdv_next_row[j]; - pdu[j] += var->omega * ((sigmaU + pb1[j] - pdv[j] * pa12[j]) / pa11[j] - pdu[j]); - pdv[j] += var->omega * ((sigmaV + pb2[j] - pdu[j] * pa12[j]) / pa22[j] - pdv[j]); - } - } -} - -void VariationalRefinementImpl::calc(InputArray I0, InputArray I1, InputOutputArray flow) -{ - CV_Assert(!I0.empty() && I0.channels() == 1); - CV_Assert(!I1.empty() && I1.channels() == 1); - CV_Assert(I0.sameSize(I1)); - CV_Assert((I0.depth() == CV_8U && I1.depth() == CV_8U) || (I0.depth() == CV_32F && I1.depth() == CV_32F)); - CV_Assert(!flow.empty() && flow.depth() == CV_32F && flow.channels() == 2); - CV_Assert(I0.sameSize(flow)); - - Mat uv[2]; - Mat &flowMat = flow.getMatRef(); - split(flowMat, uv); - calcUV(I0, I1, uv[0], uv[1]); - merge(uv, 2, flowMat); -} - -void VariationalRefinementImpl::calcUV(InputArray I0, InputArray I1, InputOutputArray flow_u, InputOutputArray flow_v) -{ - CV_Assert(!I0.empty() && I0.channels() == 1); - CV_Assert(!I1.empty() && I1.channels() == 1); - CV_Assert(I0.sameSize(I1)); - CV_Assert((I0.depth() == CV_8U && I1.depth() == CV_8U) || (I0.depth() == CV_32F && I1.depth() == CV_32F)); - CV_Assert(!flow_u.empty() && flow_u.depth() == CV_32F && flow_u.channels() == 1); - CV_Assert(!flow_v.empty() && flow_v.depth() == CV_32F && flow_v.channels() == 1); - CV_Assert(I0.sameSize(flow_u)); - CV_Assert(flow_u.sameSize(flow_v)); - - int num_stripes = getNumThreads(); - Mat I0Mat = I0.getMat(); - Mat I1Mat = I1.getMat(); - Mat &W_u = flow_u.getMatRef(); - Mat &W_v = flow_v.getMatRef(); - prepareBuffers(I0Mat, I1Mat, W_u, W_v); - - splitCheckerboard(W_u_rb, W_u); - splitCheckerboard(W_v_rb, W_v); - W_u_rb.red.copyTo(tempW_u.red); - W_u_rb.black.copyTo(tempW_u.black); - W_v_rb.red.copyTo(tempW_v.red); - W_v_rb.black.copyTo(tempW_v.black); - dW_u.red.setTo(0.0f); - dW_u.black.setTo(0.0f); - dW_v.red.setTo(0.0f); - dW_v.black.setTo(0.0f); - - for (int i = 0; i < fixedPointIterations; i++) - { - parallel_for_(Range(0, num_stripes), ComputeDataTerm_ParBody(*this, num_stripes, I0Mat.rows, dW_u, dW_v, true)); - parallel_for_(Range(0, num_stripes), ComputeDataTerm_ParBody(*this, num_stripes, I0Mat.rows, dW_u, dW_v, false)); - - parallel_for_(Range(0, num_stripes), ComputeSmoothnessTermHorPass_ParBody( - *this, num_stripes, I0Mat.rows, W_u_rb, W_v_rb, tempW_u, tempW_v, true)); - parallel_for_(Range(0, num_stripes), ComputeSmoothnessTermHorPass_ParBody( - *this, num_stripes, I0Mat.rows, W_u_rb, W_v_rb, tempW_u, tempW_v, false)); - - parallel_for_(Range(0, num_stripes), - ComputeSmoothnessTermVertPass_ParBody(*this, num_stripes, I0Mat.rows, W_u_rb, W_v_rb, true)); - parallel_for_(Range(0, num_stripes), - ComputeSmoothnessTermVertPass_ParBody(*this, num_stripes, I0Mat.rows, W_u_rb, W_v_rb, false)); - - for (int j = 0; j < sorIterations; j++) - { - parallel_for_(Range(0, num_stripes), RedBlackSOR_ParBody(*this, num_stripes, I0Mat.rows, dW_u, dW_v, true)); - parallel_for_(Range(0, num_stripes), RedBlackSOR_ParBody(*this, num_stripes, I0Mat.rows, dW_u, dW_v, false)); - } - - tempW_u.red = W_u_rb.red + dW_u.red; - tempW_u.black = W_u_rb.black + dW_u.black; - updateRepeatedBorders(tempW_u); - tempW_v.red = W_v_rb.red + dW_v.red; - tempW_v.black = W_v_rb.black + dW_v.black; - updateRepeatedBorders(tempW_v); - } - mergeCheckerboard(W_u, tempW_u); - mergeCheckerboard(W_v, tempW_v); -} -void VariationalRefinementImpl::collectGarbage() -{ - Ix.release(); - Iy.release(); - Iz.release(); - Ixx.release(); - Ixy.release(); - Iyy.release(); - Ixz.release(); - Iyz.release(); - - Ix_rb.release(); - Iy_rb.release(); - Iz_rb.release(); - Ixx_rb.release(); - Ixy_rb.release(); - Iyy_rb.release(); - Ixz_rb.release(); - Iyz_rb.release(); - - A11.release(); - A12.release(); - A22.release(); - b1.release(); - b2.release(); - weights.release(); - - mapX.release(); - mapY.release(); - - tempW_u.release(); - tempW_v.release(); - dW_u.release(); - dW_v.release(); - W_u_rb.release(); - W_v_rb.release(); -} - -Ptr createVariationalFlowRefinement() { return makePtr(); } -} -} diff --git a/modules/optflow/test/ocl/test_dis.cpp b/modules/optflow/test/ocl/test_optflow_tvl1flow.cpp similarity index 50% rename from modules/optflow/test/ocl/test_dis.cpp rename to modules/optflow/test/ocl/test_optflow_tvl1flow.cpp index 57f2969e1..4c8dca430 100644 --- a/modules/optflow/test/ocl/test_dis.cpp +++ b/modules/optflow/test/ocl/test_optflow_tvl1flow.cpp @@ -7,10 +7,12 @@ // copy or use the software. // // -// Intel License Agreement +// License Agreement // For Open Source Computer Vision Library // -// Copyright (C) 2000, Intel Corporation, all rights reserved. +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, @@ -23,7 +25,7 @@ // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // -// * The name of Intel Corporation may not be used to endorse or promote products +// * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // // This software is provided by the copyright holders and contributors "as is" and @@ -44,53 +46,72 @@ #ifdef HAVE_OPENCL -namespace opencv_test { namespace { +namespace opencv_test { +namespace ocl { -PARAM_TEST_CASE(OCL_DenseOpticalFlow_DIS, int) +///////////////////////////////////////////////////////////////////////////////////////////////// +// Optical_flow_tvl1 +namespace { - int preset; + IMPLEMENT_PARAM_CLASS(UseInitFlow, bool) + IMPLEMENT_PARAM_CLASS(MedianFiltering, int) + IMPLEMENT_PARAM_CLASS(ScaleStep, double) +} +PARAM_TEST_CASE(OpticalFlowTVL1, UseInitFlow, MedianFiltering, ScaleStep) +{ + bool useInitFlow; + int medianFiltering; + double scaleStep; virtual void SetUp() { - preset = GET_PARAM(0); + useInitFlow = GET_PARAM(0); + medianFiltering = GET_PARAM(1); + scaleStep = GET_PARAM(2); } }; -OCL_TEST_P(OCL_DenseOpticalFlow_DIS, Mat) +OCL_TEST_P(OpticalFlowTVL1, Mat) { - Mat frame1, frame2, GT; + Mat frame0 = readImage("optflow/RubberWhale1.png", IMREAD_GRAYSCALE); + ASSERT_FALSE(frame0.empty()); - frame1 = imread(TS::ptr()->get_data_path() + "optflow/RubberWhale1.png"); - frame2 = imread(TS::ptr()->get_data_path() + "optflow/RubberWhale2.png"); + Mat frame1 = readImage("optflow/RubberWhale2.png", IMREAD_GRAYSCALE); + ASSERT_FALSE(frame1.empty()); - CV_Assert(!frame1.empty() && !frame2.empty()); + Mat flow; UMat uflow; - cvtColor(frame1, frame1, COLOR_BGR2GRAY); - cvtColor(frame2, frame2, COLOR_BGR2GRAY); + //create algorithm + Ptr alg = createOptFlow_DualTVL1(); - Ptr algo; + //set parameters + alg->setScaleStep(scaleStep); + alg->setMedianFiltering(medianFiltering); - // iterate over presets: - for (int i = 0; i < cvtest::ocl::test_loop_times; i++) + //create initial flow as result of algorithm calculation + if (useInitFlow) { - Mat flow; - UMat ocl_flow; - - algo = createOptFlow_DIS(preset); - OCL_OFF(algo->calc(frame1, frame2, flow)); - OCL_ON(algo->calc(frame1, frame2, ocl_flow)); - ASSERT_EQ(flow.rows, ocl_flow.rows); - ASSERT_EQ(flow.cols, ocl_flow.cols); - - EXPECT_MAT_SIMILAR(flow, ocl_flow, 6e-3); + OCL_ON(alg->calc(frame0, frame1, uflow)); + uflow.copyTo(flow); } + + //set flag to use initial flow as it is ready to use + alg->setUseInitialFlow(useInitFlow); + + OCL_OFF(alg->calc(frame0, frame1, flow)); + OCL_ON(alg->calc(frame0, frame1, uflow)); + + EXPECT_MAT_SIMILAR(flow, uflow, 1e-2); } -OCL_INSTANTIATE_TEST_CASE_P(Contrib, OCL_DenseOpticalFlow_DIS, - Values(DISOpticalFlow::PRESET_ULTRAFAST, - DISOpticalFlow::PRESET_FAST, - DISOpticalFlow::PRESET_MEDIUM)); +OCL_INSTANTIATE_TEST_CASE_P(Contrib, OpticalFlowTVL1, + Combine( + Values(UseInitFlow(false), UseInitFlow(true)), + Values(MedianFiltering(3), MedianFiltering(-1)), + Values(ScaleStep(0.3),ScaleStep(0.5)) + ) + ); -}} // namespace +} } // namespace opencv_test::ocl #endif // HAVE_OPENCL diff --git a/modules/optflow/test/test_OF_accuracy.cpp b/modules/optflow/test/test_OF_accuracy.cpp index bf83e5052..6a5ed702e 100644 --- a/modules/optflow/test/test_OF_accuracy.cpp +++ b/modules/optflow/test/test_OF_accuracy.cpp @@ -172,52 +172,6 @@ TEST(DenseOpticalFlow_SparseToDenseFlow, ReferenceAccuracy) EXPECT_LE(calcRMSE(GT, flow), target_RMSE); } -TEST(DenseOpticalFlow_DIS, ReferenceAccuracy) -{ - Mat frame1, frame2, GT; - ASSERT_TRUE(readRubberWhale(frame1, frame2, GT)); - int presets[] = {DISOpticalFlow::PRESET_ULTRAFAST, DISOpticalFlow::PRESET_FAST, DISOpticalFlow::PRESET_MEDIUM}; - float target_RMSE[] = {0.86f, 0.74f, 0.49f}; - cvtColor(frame1, frame1, COLOR_BGR2GRAY); - cvtColor(frame2, frame2, COLOR_BGR2GRAY); - - Ptr algo; - - // iterate over presets: - for (int i = 0; i < 3; i++) - { - Mat flow; - algo = createOptFlow_DIS(presets[i]); - algo->calc(frame1, frame2, flow); - ASSERT_EQ(GT.rows, flow.rows); - ASSERT_EQ(GT.cols, flow.cols); - EXPECT_LE(calcRMSE(GT, flow), target_RMSE[i]); - } -} - -TEST(DenseOpticalFlow_VariationalRefinement, ReferenceAccuracy) -{ - Mat frame1, frame2, GT; - ASSERT_TRUE(readRubberWhale(frame1, frame2, GT)); - float target_RMSE = 0.86f; - cvtColor(frame1, frame1, COLOR_BGR2GRAY); - cvtColor(frame2, frame2, COLOR_BGR2GRAY); - - Ptr var_ref; - var_ref = createVariationalFlowRefinement(); - var_ref->setAlpha(20.0f); - var_ref->setDelta(5.0f); - var_ref->setGamma(10.0f); - var_ref->setSorIterations(25); - var_ref->setFixedPointIterations(25); - Mat flow(frame1.size(), CV_32FC2); - flow.setTo(0.0f); - var_ref->calc(frame1, frame2, flow); - ASSERT_EQ(GT.rows, flow.rows); - ASSERT_EQ(GT.cols, flow.cols); - EXPECT_LE(calcRMSE(GT, flow), target_RMSE); -} - TEST(DenseOpticalFlow_PCAFlow, ReferenceAccuracy) { Mat frame1, frame2, GT; diff --git a/modules/optflow/test/test_OF_reproducibility.cpp b/modules/optflow/test/test_OF_reproducibility.cpp deleted file mode 100644 index 21fed1d5c..000000000 --- a/modules/optflow/test/test_OF_reproducibility.cpp +++ /dev/null @@ -1,160 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// Intel License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000, Intel Corporation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of Intel Corporation may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "test_precomp.hpp" - -namespace opencv_test { namespace { - -typedef tuple OFParams; -typedef TestWithParam DenseOpticalFlow_DIS; -typedef TestWithParam DenseOpticalFlow_VariationalRefinement; - -TEST_P(DenseOpticalFlow_DIS, MultithreadReproducibility) -{ - double MAX_DIF = 0.01; - double MAX_MEAN_DIF = 0.001; - int loopsCount = 2; - RNG rng(0); - - OFParams params = GetParam(); - Size size = get<0>(params); - - int nThreads = cv::getNumThreads(); - if (nThreads == 1) - throw SkipTestException("Single thread environment"); - for (int iter = 0; iter <= loopsCount; iter++) - { - Mat frame1(size, CV_8U); - randu(frame1, 0, 255); - Mat frame2(size, CV_8U); - randu(frame2, 0, 255); - - Ptr algo = createOptFlow_DIS(); - int psz = rng.uniform(4, 16); - int pstr = rng.uniform(1, psz - 1); - int grad_iter = rng.uniform(1, 64); - int var_iter = rng.uniform(0, 10); - bool use_mean_normalization = !!rng.uniform(0, 2); - bool use_spatial_propagation = !!rng.uniform(0, 2); - algo->setFinestScale(0); - algo->setPatchSize(psz); - algo->setPatchStride(pstr); - algo->setGradientDescentIterations(grad_iter); - algo->setVariationalRefinementIterations(var_iter); - algo->setUseMeanNormalization(use_mean_normalization); - algo->setUseSpatialPropagation(use_spatial_propagation); - - cv::setNumThreads(nThreads); - Mat resMultiThread; - algo->calc(frame1, frame2, resMultiThread); - - cv::setNumThreads(1); - Mat resSingleThread; - algo->calc(frame1, frame2, resSingleThread); - - EXPECT_LE(cv::norm(resSingleThread, resMultiThread, NORM_INF), MAX_DIF); - EXPECT_LE(cv::norm(resSingleThread, resMultiThread, NORM_L1), MAX_MEAN_DIF * frame1.total()); - - // resulting flow should be within the frame bounds: - double min_val, max_val; - minMaxLoc(resMultiThread, &min_val, &max_val); - EXPECT_LE(abs(min_val), sqrt( static_cast(size.height * size.height + size.width * size.width)) ); - EXPECT_LE(abs(max_val), sqrt( static_cast(size.height * size.height + size.width * size.width)) ); - } -} - -INSTANTIATE_TEST_CASE_P(FullSet, DenseOpticalFlow_DIS, Values(szODD, szQVGA)); - -TEST_P(DenseOpticalFlow_VariationalRefinement, MultithreadReproducibility) -{ - double MAX_DIF = 0.01; - double MAX_MEAN_DIF = 0.001; - float input_flow_rad = 5.0; - int loopsCount = 2; - RNG rng(0); - - OFParams params = GetParam(); - Size size = get<0>(params); - - int nThreads = cv::getNumThreads(); - if (nThreads == 1) - throw SkipTestException("Single thread environment"); - for (int iter = 0; iter <= loopsCount; iter++) - { - Mat frame1(size, CV_8U); - randu(frame1, 0, 255); - Mat frame2(size, CV_8U); - randu(frame2, 0, 255); - Mat flow(size, CV_32FC2); - randu(flow, -input_flow_rad, input_flow_rad); - - Ptr var = createVariationalFlowRefinement(); - var->setAlpha(rng.uniform(1.0f, 100.0f)); - var->setGamma(rng.uniform(0.1f, 10.0f)); - var->setDelta(rng.uniform(0.1f, 10.0f)); - var->setSorIterations(rng.uniform(1, 20)); - var->setFixedPointIterations(rng.uniform(1, 20)); - var->setOmega(rng.uniform(1.01f, 1.99f)); - - cv::setNumThreads(nThreads); - Mat resMultiThread; - flow.copyTo(resMultiThread); - var->calc(frame1, frame2, resMultiThread); - - cv::setNumThreads(1); - Mat resSingleThread; - flow.copyTo(resSingleThread); - var->calc(frame1, frame2, resSingleThread); - - EXPECT_LE(cv::norm(resSingleThread, resMultiThread, NORM_INF), MAX_DIF); - EXPECT_LE(cv::norm(resSingleThread, resMultiThread, NORM_L1), MAX_MEAN_DIF * frame1.total()); - - // resulting flow should be within the frame bounds: - double min_val, max_val; - minMaxLoc(resMultiThread, &min_val, &max_val); - EXPECT_LE(abs(min_val), sqrt( static_cast(size.height * size.height + size.width * size.width)) ); - EXPECT_LE(abs(max_val), sqrt( static_cast(size.height * size.height + size.width * size.width)) ); - } -} - -INSTANTIATE_TEST_CASE_P(FullSet, DenseOpticalFlow_VariationalRefinement, Values(szODD, szQVGA)); - -}} // namespace diff --git a/modules/optflow/test/test_tvl1optflow.cpp b/modules/optflow/test/test_tvl1optflow.cpp new file mode 100644 index 000000000..c4274ffbe --- /dev/null +++ b/modules/optflow/test/test_tvl1optflow.cpp @@ -0,0 +1,173 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// Intel License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000, Intel Corporation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of Intel Corporation may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "test_precomp.hpp" + +namespace opencv_test { namespace { + +//#define DUMP + + // first four bytes, should be the same in little endian + const float FLO_TAG_FLOAT = 202021.25f; // check for this when READING the file + +#ifdef DUMP + // binary file format for flow data specified here: + // http://vision.middlebury.edu/flow/data/ + void writeOpticalFlowToFile(const Mat_& flow, const string& fileName) + { + const char FLO_TAG_STRING[] = "PIEH"; // use this when WRITING the file + ofstream file(fileName.c_str(), ios_base::binary); + + file << FLO_TAG_STRING; + + file.write((const char*) &flow.cols, sizeof(int)); + file.write((const char*) &flow.rows, sizeof(int)); + + for (int i = 0; i < flow.rows; ++i) + { + for (int j = 0; j < flow.cols; ++j) + { + const Point2f u = flow(i, j); + + file.write((const char*) &u.x, sizeof(float)); + file.write((const char*) &u.y, sizeof(float)); + } + } + } +#endif + + // binary file format for flow data specified here: + // http://vision.middlebury.edu/flow/data/ + void readOpticalFlowFromFile(Mat_& flow, const string& fileName) + { + std::ifstream file(fileName.c_str(), std::ios_base::binary); + + float tag; + file.read((char*) &tag, sizeof(float)); + CV_Assert( tag == FLO_TAG_FLOAT ); + + Size size; + + file.read((char*) &size.width, sizeof(int)); + file.read((char*) &size.height, sizeof(int)); + + flow.create(size); + + for (int i = 0; i < flow.rows; ++i) + { + for (int j = 0; j < flow.cols; ++j) + { + Point2f u; + + file.read((char*) &u.x, sizeof(float)); + file.read((char*) &u.y, sizeof(float)); + + flow(i, j) = u; + } + } + file.close(); + } + + bool isFlowCorrect(Point2f u) + { + return !cvIsNaN(u.x) && !cvIsNaN(u.y) && (fabs(u.x) < 1e9) && (fabs(u.y) < 1e9); + } + + void check(const Mat_& gold, const Mat_& flow, double threshold = 0.1, double expectedAccuracy = 0.95) + { + threshold = threshold*threshold; + + size_t gold_counter = 0; + size_t valid_counter = 0; + + for (int i = 0; i < gold.rows; ++i) + { + for (int j = 0; j < gold.cols; ++j) + { + const Point2f u1 = gold(i, j); + const Point2f u2 = flow(i, j); + + if (isFlowCorrect(u1)) + { + gold_counter++; + if (isFlowCorrect(u2)) + { + const Point2f diff = u1 - u2; + double err = diff.ddot(diff); + if (err <= threshold) + valid_counter++; + } + } + } + } + EXPECT_GE(valid_counter, expectedAccuracy * gold_counter); + } + +TEST(Contrib_calcOpticalFlowDual_TVL1, Regression) +{ + const string frame1_path = TS::ptr()->get_data_path() + "optflow/RubberWhale1.png"; + const string frame2_path = TS::ptr()->get_data_path() + "optflow/RubberWhale2.png"; + const string gold_flow_path = TS::ptr()->get_data_path() + "optflow/tvl1_flow.flo"; + + Mat frame1 = imread(frame1_path, IMREAD_GRAYSCALE); + Mat frame2 = imread(frame2_path, IMREAD_GRAYSCALE); + ASSERT_FALSE(frame1.empty()); + ASSERT_FALSE(frame2.empty()); + + Mat_ flow; + Ptr tvl1 = cv::optflow::DualTVL1OpticalFlow::create(); + + tvl1->calc(frame1, frame2, flow); + +#ifdef DUMP + writeOpticalFlowToFile(flow, gold_flow_path); +#else + Mat_ gold; + readOpticalFlowFromFile(gold, gold_flow_path); + + ASSERT_EQ(gold.rows, flow.rows); + ASSERT_EQ(gold.cols, flow.cols); + + check(gold, flow); +#endif +} + +}} // namespace diff --git a/modules/superres/CMakeLists.txt b/modules/superres/CMakeLists.txt index 028274b8e..2979ef0fb 100644 --- a/modules/superres/CMakeLists.txt +++ b/modules/superres/CMakeLists.txt @@ -6,6 +6,6 @@ set(the_description "Super Resolution") if(HAVE_CUDA) ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wshadow) endif() -ocv_define_module(superres opencv_imgproc opencv_video +ocv_define_module(superres opencv_imgproc opencv_video opencv_optflow OPTIONAL opencv_videoio opencv_cudaarithm opencv_cudafilters opencv_cudawarping opencv_cudaimgproc opencv_cudaoptflow opencv_cudacodec WRAP python) diff --git a/modules/superres/src/optical_flow.cpp b/modules/superres/src/optical_flow.cpp index 4b13e94ba..50e5b0486 100644 --- a/modules/superres/src/optical_flow.cpp +++ b/modules/superres/src/optical_flow.cpp @@ -41,6 +41,7 @@ //M*/ #include "precomp.hpp" +#include "opencv2/optflow.hpp" #include "opencv2/core/opencl/ocl_defs.hpp" using namespace cv; @@ -371,12 +372,12 @@ namespace void impl(InputArray input0, InputArray input1, OutputArray dst) CV_OVERRIDE; private: - Ptr alg_; + Ptr alg_; }; DualTVL1::DualTVL1() : CpuOpticalFlow(CV_8UC1) { - alg_ = cv::createOptFlow_DualTVL1(); + alg_ = optflow::createOptFlow_DualTVL1(); } void DualTVL1::calc(InputArray frame0, InputArray frame1, OutputArray flow1, OutputArray flow2) diff --git a/samples/python2/dis_opt_flow.py b/samples/python2/dis_opt_flow.py deleted file mode 100644 index 171e6a235..000000000 --- a/samples/python2/dis_opt_flow.py +++ /dev/null @@ -1,114 +0,0 @@ -#!/usr/bin/env python - -''' -example to show optical flow estimation using DISOpticalFlow - -USAGE: dis_opt_flow.py [] - -Keys: - 1 - toggle HSV flow visualization - 2 - toggle glitch - 3 - toggle spatial propagation of flow vectors - 4 - toggle temporal propagation of flow vectors -ESC - exit -''' - -# Python 2/3 compatibility -from __future__ import print_function - -import numpy as np -import cv2 as cv -import video - - -def draw_flow(img, flow, step=16): - h, w = img.shape[:2] - y, x = np.mgrid[step/2:h:step, step/2:w:step].reshape(2,-1).astype(int) - fx, fy = flow[y,x].T - lines = np.vstack([x, y, x+fx, y+fy]).T.reshape(-1, 2, 2) - lines = np.int32(lines + 0.5) - vis = cv.cvtColor(img, cv.COLOR_GRAY2BGR) - cv.polylines(vis, lines, 0, (0, 255, 0)) - for (x1, y1), (x2, y2) in lines: - cv.circle(vis, (x1, y1), 1, (0, 255, 0), -1) - return vis - - -def draw_hsv(flow): - h, w = flow.shape[:2] - fx, fy = flow[:,:,0], flow[:,:,1] - ang = np.arctan2(fy, fx) + np.pi - v = np.sqrt(fx*fx+fy*fy) - hsv = np.zeros((h, w, 3), np.uint8) - hsv[...,0] = ang*(180/np.pi/2) - hsv[...,1] = 255 - hsv[...,2] = np.minimum(v*4, 255) - bgr = cv.cvtColor(hsv, cv.COLOR_HSV2BGR) - return bgr - - -def warp_flow(img, flow): - h, w = flow.shape[:2] - flow = -flow - flow[:,:,0] += np.arange(w) - flow[:,:,1] += np.arange(h)[:,np.newaxis] - res = cv.remap(img, flow, None, cv.INTER_LINEAR) - return res - - -if __name__ == '__main__': - import sys - print(__doc__) - try: - fn = sys.argv[1] - except IndexError: - fn = 0 - - cam = video.create_capture(fn) - ret, prev = cam.read() - prevgray = cv.cvtColor(prev, cv.COLOR_BGR2GRAY) - show_hsv = False - show_glitch = False - use_spatial_propagation = False - use_temporal_propagation = True - cur_glitch = prev.copy() - inst = cv.optflow.createOptFlow_DIS(cv.optflow.DISOPTICAL_FLOW_PRESET_MEDIUM) - inst.setUseSpatialPropagation(use_spatial_propagation) - - flow = None - while True: - ret, img = cam.read() - gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY) - if flow is not None and use_temporal_propagation: - #warp previous flow to get an initial approximation for the current flow: - flow = inst.calc(prevgray, gray, warp_flow(flow,flow)) - else: - flow = inst.calc(prevgray, gray, None) - prevgray = gray - - cv.imshow('flow', draw_flow(gray, flow)) - if show_hsv: - cv.imshow('flow HSV', draw_hsv(flow)) - if show_glitch: - cur_glitch = warp_flow(cur_glitch, flow) - cv.imshow('glitch', cur_glitch) - - ch = 0xFF & cv.waitKey(5) - if ch == 27: - break - if ch == ord('1'): - show_hsv = not show_hsv - print('HSV flow visualization is', ['off', 'on'][show_hsv]) - if ch == ord('2'): - show_glitch = not show_glitch - if show_glitch: - cur_glitch = img.copy() - print('glitch is', ['off', 'on'][show_glitch]) - if ch == ord('3'): - use_spatial_propagation = not use_spatial_propagation - inst.setUseSpatialPropagation(use_spatial_propagation) - print('spatial propagation is', ['off', 'on'][use_spatial_propagation]) - if ch == ord('4'): - use_temporal_propagation = not use_temporal_propagation - print('temporal propagation is', ['off', 'on'][use_temporal_propagation]) - cv.destroyAllWindows()