diff --git a/modules/optflow/doc/optflow.bib b/modules/optflow/doc/optflow.bib index 5cc73d83e..a7b218698 100644 --- a/modules/optflow/doc/optflow.bib +++ b/modules/optflow/doc/optflow.bib @@ -44,3 +44,11 @@ journal={arXiv preprint arXiv:1603.03590}, year={2016} } + +@inproceedings{Brox2004, + title={High accuracy optical flow estimation based on a theory for warping}, + author={Brox, Thomas and Bruhn, Andr{\'e}s and Papenberg, Nils and Weickert, Joachim}, + booktitle={European Conference on Computer Vision (ECCV)}, + pages={25--36}, + year={2004} +} diff --git a/modules/optflow/include/opencv2/optflow.hpp b/modules/optflow/include/opencv2/optflow.hpp index 6d87c627f..23c40ddaf 100644 --- a/modules/optflow/include/opencv2/optflow.hpp +++ b/modules/optflow/include/opencv2/optflow.hpp @@ -154,6 +154,64 @@ to the flow in the horizontal direction (u), second - vertical (v). */ CV_EXPORTS_W bool writeOpticalFlow( const String& path, InputArray flow ); +/** @brief Variational optical flow refinement + +This class implements variational refinement of the input flow field, i.e. +it uses input flow to initialize the minimization of the following functional: +\f$E(U) = \int_{\Omega} \delta \Psi(E_I) + \gamma \Psi(E_G) + \alpha \Psi(E_S) \f$, +where \f$E_I,E_G,E_S\f$ are color constancy, gradient constancy and smoothness terms +respectively. \f$\Psi(s^2)=\sqrt{s^2+\epsilon^2}\f$ is a robust penalizer to limit the +influence of outliers. A complete formulation and a description of the minimization +procedure can be found in @cite Brox2004 +*/ +class CV_EXPORTS_W VariationalRefinement : public DenseOpticalFlow +{ +public: + /** @brief calc function overload to handle separate horizontal (u) and vertical (v) flow components + (to avoid extra splits/merges) */ + CV_WRAP virtual void calcUV(InputArray I0, InputArray I1, InputOutputArray flow_u, InputOutputArray flow_v) = 0; + + /** @brief Number of outer (fixed-point) iterations in the minimization procedure. + @see setFixedPointIterations */ + CV_WRAP virtual int getFixedPointIterations() const = 0; + /** @copybrief getFixedPointIterations @see getFixedPointIterations */ + CV_WRAP virtual void setFixedPointIterations(int val) = 0; + + /** @brief Number of inner successive over-relaxation (SOR) iterations + in the minimization procedure to solve the respective linear system. + @see setSorIterations */ + CV_WRAP virtual int getSorIterations() const = 0; + /** @copybrief getSorIterations @see getSorIterations */ + CV_WRAP virtual void setSorIterations(int val) = 0; + + /** @brief Relaxation factor in SOR + @see setOmega */ + CV_WRAP virtual float getOmega() const = 0; + /** @copybrief getOmega @see getOmega */ + CV_WRAP virtual void setOmega(float val) = 0; + + /** @brief Weight of the smoothness term + @see setAlpha */ + CV_WRAP virtual float getAlpha() const = 0; + /** @copybrief getAlpha @see getAlpha */ + CV_WRAP virtual void setAlpha(float val) = 0; + + /** @brief Weight of the color constancy term + @see setDelta */ + CV_WRAP virtual float getDelta() const = 0; + /** @copybrief getDelta @see getDelta */ + CV_WRAP virtual void setDelta(float val) = 0; + + /** @brief Weight of the gradient constancy term + @see setGamma */ + CV_WRAP virtual float getGamma() const = 0; + /** @copybrief getGamma @see getGamma */ + CV_WRAP virtual void setGamma(float val) = 0; +}; + +/** @brief Creates an instance of VariationalRefinement +*/ +CV_EXPORTS_W Ptr createVariationalFlowRefinement(); /** @brief DeepFlow optical flow algorithm implementation. @@ -194,40 +252,80 @@ CV_EXPORTS_W Ptr createOptFlow_SparseToDense(); /** @brief DIS optical flow algorithm. This class implements the Dense Inverse Search (DIS) optical flow algorithm. More -details about the algorithm can be found at @cite Kroeger2016 . +details about the algorithm can be found at @cite Kroeger2016 . Includes three presets with preselected +parameters to provide reasonable trade-off between speed and quality. However, even the slowest preset is +still relatively fast, use DeepFlow if you need better quality and don't care about speed. */ class CV_EXPORTS_W DISOpticalFlow : public DenseOpticalFlow { public: - /** @brief Finest level of the gaussian pyramid on which the flow is computed (zero level - corresponds to the original image resolution).The final flow is obtained by bilinear upscaling. + enum + { + PRESET_ULTRAFAST = 0, + PRESET_FAST = 1, + PRESET_MEDIUM = 2 + }; + + /** @brief Finest level of the Gaussian pyramid on which the flow is computed (zero level + corresponds to the original image resolution). The final flow is obtained by bilinear upscaling. @see setFinestScale */ - virtual int getFinestScale() const = 0; + CV_WRAP virtual int getFinestScale() const = 0; /** @copybrief getFinestScale @see getFinestScale */ - virtual void setFinestScale(int val) = 0; + CV_WRAP virtual void setFinestScale(int val) = 0; - /** @brief Size of an image patch for matching (in pixels) + /** @brief Size of an image patch for matching (in pixels). Normally, default 8x8 patches work well + enough in most cases. @see setPatchSize */ - virtual int getPatchSize() const = 0; + CV_WRAP virtual int getPatchSize() const = 0; /** @copybrief getPatchSize @see getPatchSize */ - virtual void setPatchSize(int val) = 0; + CV_WRAP virtual void setPatchSize(int val) = 0; - /** @brief Stride between neighbor patches. Must be less than patch size. + /** @brief Stride between neighbor patches. Must be less than patch size. Lower values correspond + to higher flow quality. @see setPatchStride */ - virtual int getPatchStride() const = 0; + CV_WRAP virtual int getPatchStride() const = 0; /** @copybrief getPatchStride @see getPatchStride */ - virtual void setPatchStride(int val) = 0; + CV_WRAP virtual void setPatchStride(int val) = 0; - /** @brief number of gradient descent iterations in the patch inverse search stage + /** @brief Maximum number of gradient descent iterations in the patch inverse search stage. Higher values + may improve quality in some cases. @see setGradientDescentIterations */ - virtual int getGradientDescentIterations() const = 0; + CV_WRAP virtual int getGradientDescentIterations() const = 0; /** @copybrief getGradientDescentIterations @see getGradientDescentIterations */ - virtual void setGradientDescentIterations(int val) = 0; + CV_WRAP virtual void setGradientDescentIterations(int val) = 0; + + /** @brief Number of fixed point iterations of variational refinement per scale. Set to zero to + disable variational refinement completely. Higher values will typically result in more smooth and + high-quality flow. + @see setGradientDescentIterations */ + CV_WRAP virtual int getVariationalRefinementIterations() const = 0; + /** @copybrief getGradientDescentIterations @see getGradientDescentIterations */ + CV_WRAP virtual void setVariationalRefinementIterations(int val) = 0; + + /** @brief Whether to use mean-normalization of patches when computing patch distance. It is turned on + by default as it typically provides a noticeable quality boost because of increased robustness to + illumanition variations. Turn it off if you are certain that your sequence does't contain any changes + in illumination. + @see setUseMeanNormalization */ + CV_WRAP virtual bool getUseMeanNormalization() const = 0; + /** @copybrief getUseMeanNormalization @see getUseMeanNormalization */ + CV_WRAP virtual void setUseMeanNormalization(bool val) = 0; + + /** @brief Whether to use spatial propagation of good optical flow vectors. This option is turned on by + default, as it tends to work better on average and can sometimes help recover from major errors + introduced by the coarse-to-fine scheme employed by the DIS optical flow algorithm. Turning this + option off can make the output flow field a bit smoother, however. + @see setUseSpatialPropagation */ + CV_WRAP virtual bool getUseSpatialPropagation() const = 0; + /** @copybrief getUseSpatialPropagation @see getUseSpatialPropagation */ + CV_WRAP virtual void setUseSpatialPropagation(bool val) = 0; }; /** @brief Creates an instance of DISOpticalFlow + +@param preset one of PRESET_ULTRAFAST, PRESET_FAST and PRESET_MEDIUM */ -CV_EXPORTS_W Ptr createOptFlow_DIS(); +CV_EXPORTS_W Ptr createOptFlow_DIS(int preset = DISOpticalFlow::PRESET_FAST); //! @} diff --git a/modules/optflow/perf/perf_deepflow.cpp b/modules/optflow/perf/perf_deepflow.cpp new file mode 100644 index 000000000..7df5bb2ae --- /dev/null +++ b/modules/optflow/perf/perf_deepflow.cpp @@ -0,0 +1,69 @@ +/* + * By downloading, copying, installing or using the software you agree to this license. + * If you do not agree to this license, do not download, install, + * copy or use the software. + * + * + * License Agreement + * For Open Source Computer Vision Library + * (3 - clause BSD License) + * + * Redistribution and use in source and binary forms, with or without modification, + * are permitted provided that the following conditions are met : + * + * *Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and / or other materials provided with the distribution. + * + * * Neither the names of the copyright holders nor the names of the contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * This software is provided by the copyright holders and contributors "as is" and + * any express or implied warranties, including, but not limited to, the implied + * warranties of merchantability and fitness for a particular purpose are disclaimed. + * In no event shall copyright holders or contributors be liable for any direct, + * indirect, incidental, special, exemplary, or consequential damages + * (including, but not limited to, procurement of substitute goods or services; + * loss of use, data, or profits; or business interruption) however caused + * and on any theory of liability, whether in contract, strict liability, + * or tort(including negligence or otherwise) arising in any way out of + * the use of this software, even if advised of the possibility of such damage. + */ + +#include "perf_precomp.hpp" + +using std::tr1::tuple; +using std::tr1::get; +using namespace perf; +using namespace testing; +using namespace cv; +using namespace cv::optflow; + +typedef tuple DFParams; +typedef TestBaseWithParam DenseOpticalFlow_DeepFlow; + +PERF_TEST_P(DenseOpticalFlow_DeepFlow, perf, Values(szVGA, sz720p)) +{ + DFParams params = GetParam(); + Size sz = get<0>(params); + + Mat frame1(sz, CV_8U); + Mat frame2(sz, CV_8U); + Mat flow; + + randu(frame1, 0, 255); + randu(frame2, 0, 255); + + cv::setNumThreads(cv::getNumberOfCPUs()); + TEST_CYCLE_N(1) + { + Ptr algo = createOptFlow_DeepFlow(); + algo->calc(frame1, frame2, flow); + } + + SANITY_CHECK_NOTHING(); +} diff --git a/modules/optflow/perf/perf_disflow.cpp b/modules/optflow/perf/perf_disflow.cpp new file mode 100644 index 000000000..ad05420be --- /dev/null +++ b/modules/optflow/perf/perf_disflow.cpp @@ -0,0 +1,103 @@ +/* + * By downloading, copying, installing or using the software you agree to this license. + * If you do not agree to this license, do not download, install, + * copy or use the software. + * + * + * License Agreement + * For Open Source Computer Vision Library + * (3 - clause BSD License) + * + * Redistribution and use in source and binary forms, with or without modification, + * are permitted provided that the following conditions are met : + * + * *Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and / or other materials provided with the distribution. + * + * * Neither the names of the copyright holders nor the names of the contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * This software is provided by the copyright holders and contributors "as is" and + * any express or implied warranties, including, but not limited to, the implied + * warranties of merchantability and fitness for a particular purpose are disclaimed. + * In no event shall copyright holders or contributors be liable for any direct, + * indirect, incidental, special, exemplary, or consequential damages + * (including, but not limited to, procurement of substitute goods or services; + * loss of use, data, or profits; or business interruption) however caused + * and on any theory of liability, whether in contract, strict liability, + * or tort(including negligence or otherwise) arising in any way out of + * the use of this software, even if advised of the possibility of such damage. + */ + +#include "perf_precomp.hpp" + +using std::tr1::tuple; +using std::tr1::get; +using namespace perf; +using namespace testing; +using namespace cv; +using namespace cv::optflow; + +void MakeArtificialExample(Mat &dst_frame1, Mat &dst_frame2); + +typedef tuple DISParams; +typedef TestBaseWithParam DenseOpticalFlow_DIS; + +PERF_TEST_P(DenseOpticalFlow_DIS, perf, + Combine(Values("PRESET_ULTRAFAST", "PRESET_FAST", "PRESET_MEDIUM"), Values(szVGA, sz720p, sz1080p))) +{ + DISParams params = GetParam(); + + // use strings to print preset names in the perf test results: + String preset_string = get<0>(params); + int preset = DISOpticalFlow::PRESET_FAST; + if (preset_string == "PRESET_ULTRAFAST") + preset = DISOpticalFlow::PRESET_ULTRAFAST; + else if (preset_string == "PRESET_FAST") + preset = DISOpticalFlow::PRESET_FAST; + else if (preset_string == "PRESET_MEDIUM") + preset = DISOpticalFlow::PRESET_MEDIUM; + Size sz = get<1>(params); + + Mat frame1(sz, CV_8U); + Mat frame2(sz, CV_8U); + Mat flow; + + MakeArtificialExample(frame1, frame2); + + cv::setNumThreads(cv::getNumberOfCPUs()); + TEST_CYCLE_N(10) + { + Ptr algo = createOptFlow_DIS(preset); + algo->calc(frame1, frame2, flow); + } + + SANITY_CHECK_NOTHING(); +} + +void MakeArtificialExample(Mat &dst_frame1, Mat &dst_frame2) +{ + int src_scale = 2; + int OF_scale = 6; + double sigma = dst_frame1.cols / 300; + + Mat tmp(Size(dst_frame1.cols / (int)pow(2, src_scale), dst_frame1.rows / (int)pow(2, src_scale)), CV_8U); + randu(tmp, 0, 255); + resize(tmp, dst_frame1, dst_frame1.size(), 0.0, 0.0, INTER_LINEAR); + resize(tmp, dst_frame2, dst_frame2.size(), 0.0, 0.0, INTER_LINEAR); + + Mat displacement_field(Size(dst_frame1.cols / (int)pow(2, OF_scale), dst_frame1.rows / (int)pow(2, OF_scale)), + CV_32FC2); + randn(displacement_field, 0.0, sigma); + resize(displacement_field, displacement_field, dst_frame2.size(), 0.0, 0.0, INTER_CUBIC); + for (int i = 0; i < displacement_field.rows; i++) + for (int j = 0; j < displacement_field.cols; j++) + displacement_field.at(i, j) += Vec2f((float)j, (float)i); + + remap(dst_frame2, dst_frame2, displacement_field, Mat(), INTER_LINEAR, BORDER_REPLICATE); +} diff --git a/modules/optflow/perf/perf_main.cpp b/modules/optflow/perf/perf_main.cpp new file mode 100644 index 000000000..0fbf990c3 --- /dev/null +++ b/modules/optflow/perf/perf_main.cpp @@ -0,0 +1,3 @@ +#include "perf_precomp.hpp" + +CV_PERF_TEST_MAIN(optflow) diff --git a/modules/optflow/perf/perf_precomp.hpp b/modules/optflow/perf/perf_precomp.hpp new file mode 100644 index 000000000..79e2f05f5 --- /dev/null +++ b/modules/optflow/perf/perf_precomp.hpp @@ -0,0 +1,17 @@ +#ifdef __GNUC__ +# pragma GCC diagnostic ignored "-Wmissing-declarations" +# if defined __clang__ || defined __APPLE__ +# pragma GCC diagnostic ignored "-Wmissing-prototypes" +# pragma GCC diagnostic ignored "-Wextra" +# endif +#endif + +#ifndef __OPENCV_PERF_PRECOMP_HPP__ +#define __OPENCV_PERF_PRECOMP_HPP__ + +#include "opencv2/ts.hpp" +#include "opencv2/imgproc.hpp" +#include "opencv2/optflow.hpp" +#include "opencv2/highgui.hpp" + +#endif diff --git a/modules/optflow/perf/perf_variational_refinement.cpp b/modules/optflow/perf/perf_variational_refinement.cpp new file mode 100644 index 000000000..e9a4ca1b8 --- /dev/null +++ b/modules/optflow/perf/perf_variational_refinement.cpp @@ -0,0 +1,77 @@ +/* +* By downloading, copying, installing or using the software you agree to this license. +* If you do not agree to this license, do not download, install, +* copy or use the software. +* +* +* License Agreement +* For Open Source Computer Vision Library +* (3 - clause BSD License) +* +* Redistribution and use in source and binary forms, with or without modification, +* are permitted provided that the following conditions are met : +* +* *Redistributions of source code must retain the above copyright notice, +* this list of conditions and the following disclaimer. +* +* * Redistributions in binary form must reproduce the above copyright notice, +* this list of conditions and the following disclaimer in the documentation +* and / or other materials provided with the distribution. +* +* * Neither the names of the copyright holders nor the names of the contributors +* may be used to endorse or promote products derived from this software +* without specific prior written permission. +* +* This software is provided by the copyright holders and contributors "as is" and +* any express or implied warranties, including, but not limited to, the implied +* warranties of merchantability and fitness for a particular purpose are disclaimed. +* In no event shall copyright holders or contributors be liable for any direct, +* indirect, incidental, special, exemplary, or consequential damages +* (including, but not limited to, procurement of substitute goods or services; +* loss of use, data, or profits; or business interruption) however caused +* and on any theory of liability, whether in contract, strict liability, +* or tort(including negligence or otherwise) arising in any way out of +* the use of this software, even if advised of the possibility of such damage. +*/ + +#include "perf_precomp.hpp" + +using std::tr1::tuple; +using std::tr1::get; +using namespace perf; +using namespace testing; +using namespace cv; +using namespace cv::optflow; + +typedef tuple VarRefParams; +typedef TestBaseWithParam DenseOpticalFlow_VariationalRefinement; + +PERF_TEST_P(DenseOpticalFlow_VariationalRefinement, perf, Combine(Values(szQVGA, szVGA), Values(5, 10), Values(5, 10))) +{ + VarRefParams params = GetParam(); + Size sz = get<0>(params); + int sorIter = get<1>(params); + int fixedPointIter = get<2>(params); + + Mat frame1(sz, CV_8U); + Mat frame2(sz, CV_8U); + Mat flow(sz, CV_32FC2); + + randu(frame1, 0, 255); + randu(frame2, 0, 255); + flow.setTo(0.0f); + + cv::setNumThreads(cv::getNumberOfCPUs()); + TEST_CYCLE_N(10) + { + Ptr var = createVariationalFlowRefinement(); + var->setAlpha(20.0f); + var->setGamma(10.0f); + var->setDelta(5.0f); + var->setSorIterations(sorIter); + var->setFixedPointIterations(fixedPointIter); + var->calc(frame1, frame2, flow); + } + + SANITY_CHECK_NOTHING(); +} diff --git a/modules/optflow/samples/optical_flow_evaluation.cpp b/modules/optflow/samples/optical_flow_evaluation.cpp index 5d57511e5..def0f19fb 100644 --- a/modules/optflow/samples/optical_flow_evaluation.cpp +++ b/modules/optflow/samples/optical_flow_evaluation.cpp @@ -11,7 +11,7 @@ using namespace optflow; const String keys = "{help h usage ? | | print this message }" "{@image1 | | image1 }" "{@image2 | | image2 }" - "{@algorithm | | [farneback, simpleflow, tvl1, deepflow, sparsetodenseflow or DISflow] }" + "{@algorithm | | [farneback, simpleflow, tvl1, deepflow, sparsetodenseflow, DISflow_ultrafast, DISflow_fast, DISflow_medium] }" "{@groundtruth | | path to the .flo file (optional), Middlebury format }" "{m measure |endpoint| error measure - [endpoint or angular] }" "{r region |all | region to compute stats about [all, discontinuities, untextured] }" @@ -229,7 +229,7 @@ int main( int argc, char** argv ) if ( i2.depth() != CV_8U ) i2.convertTo(i2, CV_8U); - if ( (method == "farneback" || method == "tvl1" || method == "deepflow" || method == "DISflow") && i1.channels() == 3 ) + if ( (method == "farneback" || method == "tvl1" || method == "deepflow" || method == "DISflow_ultrafast" || method == "DISflow_fast" || method == "DISflow_medium") && i1.channels() == 3 ) { // 1-channel images are expected cvtColor(i1, i1, COLOR_BGR2GRAY); cvtColor(i2, i2, COLOR_BGR2GRAY); @@ -252,8 +252,12 @@ int main( int argc, char** argv ) algorithm = createOptFlow_DeepFlow(); else if ( method == "sparsetodenseflow" ) algorithm = createOptFlow_SparseToDense(); - else if ( method == "DISflow" ) - algorithm = createOptFlow_DIS(); + else if ( method == "DISflow_ultrafast" ) + algorithm = createOptFlow_DIS(DISOpticalFlow::PRESET_ULTRAFAST); + else if (method == "DISflow_fast") + algorithm = createOptFlow_DIS(DISOpticalFlow::PRESET_FAST); + else if (method == "DISflow_medium") + algorithm = createOptFlow_DIS(DISOpticalFlow::PRESET_MEDIUM); else { printf("Wrong method!\n"); diff --git a/modules/optflow/src/dis_flow.cpp b/modules/optflow/src/dis_flow.cpp index a18d2c644..fed97439f 100644 --- a/modules/optflow/src/dis_flow.cpp +++ b/modules/optflow/src/dis_flow.cpp @@ -40,10 +40,11 @@ // //M*/ +#include "opencv2/core/hal/intrin.hpp" #include "precomp.hpp" -#include "opencv2/imgproc.hpp" using namespace std; #define EPS 0.001F +#define INF 1E+10F namespace cv { @@ -58,13 +59,21 @@ class DISOpticalFlowImpl : public DISOpticalFlow void calc(InputArray I0, InputArray I1, InputOutputArray flow); void collectGarbage(); - protected: // algorithm parameters + protected: //!< algorithm parameters int finest_scale, coarsest_scale; int patch_size; int patch_stride; int grad_descent_iter; + int variational_refinement_iter; + bool use_mean_normalization; + bool use_spatial_propagation; - public: // getters and setters + protected: //!< some auxiliary variables + int border_size; + int w, h; //!< flow buffer width and height on the current scale + int ws, hs; //!< sparse flow buffer width and height on the current scale + + public: int getFinestScale() const { return finest_scale; } void setFinestScale(int val) { finest_scale = val; } int getPatchSize() const { return patch_size; } @@ -73,60 +82,111 @@ class DISOpticalFlowImpl : public DISOpticalFlow void setPatchStride(int val) { patch_stride = val; } int getGradientDescentIterations() const { return grad_descent_iter; } void setGradientDescentIterations(int val) { grad_descent_iter = val; } + int getVariationalRefinementIterations() const { return variational_refinement_iter; } + void setVariationalRefinementIterations(int val) { variational_refinement_iter = val; } + bool getUseMeanNormalization() const { return use_mean_normalization; } + void setUseMeanNormalization(bool val) { use_mean_normalization = val; } + bool getUseSpatialPropagation() const { return use_spatial_propagation; } + void setUseSpatialPropagation(bool val) { use_spatial_propagation = val; } - private: // internal buffers - vector< Mat_ > I0s; // gaussian pyramid for the current frame - vector< Mat_ > I1s; // gaussian pyramid for the next frame + protected: //!< internal buffers + vector > I0s; //!< Gaussian pyramid for the current frame + vector > I1s; //!< Gaussian pyramid for the next frame + vector > I1s_ext; //!< I1s with borders - vector< Mat_ > I0xs; // gaussian pyramid for the x gradient of the current frame - vector< Mat_ > I0ys; // gaussian pyramid for the y gradient of the current frame + vector > I0xs; //!< Gaussian pyramid for the x gradient of the current frame + vector > I0ys; //!< Gaussian pyramid for the y gradient of the current frame - vector< Mat_ > Ux; // x component of the flow vectors - vector< Mat_ > Uy; // y component of the flow vectors + vector > Ux; //!< x component of the flow vectors + vector > Uy; //!< y component of the flow vectors - Mat_ U; // buffers for the merged flow + Mat_ U; //!< a buffer for the merged flow - Mat_ Sx; // x component of the sparse flow vectors (before densification) - Mat_ Sy; // y component of the sparse flow vectors (before densification) + Mat_ Sx; //!< intermediate sparse flow representation (x component) + Mat_ Sy; //!< intermediate sparse flow representation (y component) - // structure tensor components and auxiliary buffers: - Mat_ I0xx_buf; // sum of squares of x gradient values - Mat_ I0yy_buf; // sum of squares of y gradient values - Mat_ I0xy_buf; // sum of x and y gradient products + /* Structure tensor components: */ + Mat_ I0xx_buf; //!< sum of squares of x gradient values + Mat_ I0yy_buf; //!< sum of squares of y gradient values + Mat_ I0xy_buf; //!< sum of x and y gradient products - Mat_ I0xx_buf_aux; // for computing sums using the summed area table + /* Extra buffers that are useful if patch mean-normalization is used: */ + Mat_ I0x_buf; //!< sum of of x gradient values + Mat_ I0y_buf; //!< sum of of y gradient values + + /* Auxiliary buffers used in structure tensor computation: */ + Mat_ I0xx_buf_aux; Mat_ I0yy_buf_aux; Mat_ I0xy_buf_aux; - //////////////////////////////////////////////////////////// + Mat_ I0x_buf_aux; + Mat_ I0y_buf_aux; - private: // private methods + vector > variational_refinement_processors; + + private: //!< private methods and parallel sections void prepareBuffers(Mat &I0, Mat &I1); - void precomputeStructureTensor(Mat &dst_I0xx, Mat &dst_I0yy, Mat &dst_I0xy, Mat &I0x, Mat &I0y); - void patchInverseSearch(Mat &dst_Sx, Mat &dst_Sy, Mat &src_Ux, Mat &src_Uy, Mat &I0, Mat &I0x, Mat &I0y, Mat &I1); - void densification(Mat &dst_Ux, Mat &dst_Uy, Mat &src_Sx, Mat &src_Sy, Mat &I0, Mat &I1); + void precomputeStructureTensor(Mat &dst_I0xx, Mat &dst_I0yy, Mat &dst_I0xy, Mat &dst_I0x, Mat &dst_I0y, Mat &I0x, + Mat &I0y); + + struct PatchInverseSearch_ParBody : public ParallelLoopBody + { + DISOpticalFlowImpl *dis; + int nstripes, stripe_sz; + int hs; + Mat *Sx, *Sy, *Ux, *Uy, *I0, *I1, *I0x, *I0y; + int num_iter; + + PatchInverseSearch_ParBody(DISOpticalFlowImpl &_dis, int _nstripes, int _hs, Mat &dst_Sx, Mat &dst_Sy, + Mat &src_Ux, Mat &src_Uy, Mat &_I0, Mat &_I1, Mat &_I0x, Mat &_I0y, int _num_iter); + void operator()(const Range &range) const; + }; + + struct Densification_ParBody : public ParallelLoopBody + { + DISOpticalFlowImpl *dis; + int nstripes, stripe_sz; + int h; + Mat *Ux, *Uy, *Sx, *Sy, *I0, *I1; + + Densification_ParBody(DISOpticalFlowImpl &_dis, int _nstripes, int _h, Mat &dst_Ux, Mat &dst_Uy, Mat &src_Sx, + Mat &src_Sy, Mat &_I0, Mat &_I1); + void operator()(const Range &range) const; + }; }; DISOpticalFlowImpl::DISOpticalFlowImpl() { - finest_scale = 3; - patch_size = 9; + finest_scale = 2; + patch_size = 8; patch_stride = 4; - grad_descent_iter = 12; + grad_descent_iter = 16; + variational_refinement_iter = 5; + border_size = 16; + use_mean_normalization = true; + use_spatial_propagation = true; + + /* Use separate variational refinement instances for different scales to avoid repeated memory allocation: */ + int max_possible_scales = 10; + for (int i = 0; i < max_possible_scales; i++) + variational_refinement_processors.push_back(createVariationalFlowRefinement()); } void DISOpticalFlowImpl::prepareBuffers(Mat &I0, Mat &I1) { I0s.resize(coarsest_scale + 1); I1s.resize(coarsest_scale + 1); + I1s_ext.resize(coarsest_scale + 1); I0xs.resize(coarsest_scale + 1); I0ys.resize(coarsest_scale + 1); Ux.resize(coarsest_scale + 1); Uy.resize(coarsest_scale + 1); + int fraction = 1; int cur_rows = 0, cur_cols = 0; for (int i = 0; i <= coarsest_scale; i++) { + /* Avoid initializing the pyramid levels above the finest scale, as they won't be used anyway */ if (i == finest_scale) { cur_rows = I0.rows / fraction; @@ -136,14 +196,21 @@ void DISOpticalFlowImpl::prepareBuffers(Mat &I0, Mat &I1) I1s[i].create(cur_rows, cur_cols); resize(I1, I1s[i], I1s[i].size(), 0.0, 0.0, INTER_AREA); + /* These buffers are reused in each scale so we initialize them once on the finest scale: */ Sx.create(cur_rows / patch_stride, cur_cols / patch_stride); Sy.create(cur_rows / patch_stride, cur_cols / patch_stride); I0xx_buf.create(cur_rows / patch_stride, cur_cols / patch_stride); I0yy_buf.create(cur_rows / patch_stride, cur_cols / patch_stride); I0xy_buf.create(cur_rows / patch_stride, cur_cols / patch_stride); + I0x_buf.create(cur_rows / patch_stride, cur_cols / patch_stride); + I0y_buf.create(cur_rows / patch_stride, cur_cols / patch_stride); + I0xx_buf_aux.create(cur_rows, cur_cols / patch_stride); I0yy_buf_aux.create(cur_rows, cur_cols / patch_stride); I0xy_buf_aux.create(cur_rows, cur_cols / patch_stride); + I0x_buf_aux.create(cur_rows, cur_cols / patch_stride); + I0y_buf_aux.create(cur_rows, cur_cols / patch_stride); + U.create(cur_rows, cur_cols); } else if (i > finest_scale) @@ -160,89 +227,112 @@ void DISOpticalFlowImpl::prepareBuffers(Mat &I0, Mat &I1) if (i >= finest_scale) { + I1s_ext[i].create(cur_rows + 2 * border_size, cur_cols + 2 * border_size); + copyMakeBorder(I1s[i], I1s_ext[i], border_size, border_size, border_size, border_size, BORDER_REPLICATE); I0xs[i].create(cur_rows, cur_cols); I0ys[i].create(cur_rows, cur_cols); spatialGradient(I0s[i], I0xs[i], I0ys[i]); Ux[i].create(cur_rows, cur_cols); Uy[i].create(cur_rows, cur_cols); + variational_refinement_processors[i]->setAlpha(20.0f); + variational_refinement_processors[i]->setDelta(5.0f); + variational_refinement_processors[i]->setGamma(10.0f); + variational_refinement_processors[i]->setSorIterations(5); + variational_refinement_processors[i]->setFixedPointIterations(variational_refinement_iter); } } } -void DISOpticalFlowImpl::precomputeStructureTensor(Mat &dst_I0xx, Mat &dst_I0yy, Mat &dst_I0xy, Mat &I0x, Mat &I0y) +/* This function computes the structure tensor elements (local sums of I0x^2, I0x*I0y and I0y^2). + * A simple box filter is not used instead because we need to compute these sums on a sparse grid + * and store them densely in the output buffers. + */ +void DISOpticalFlowImpl::precomputeStructureTensor(Mat &dst_I0xx, Mat &dst_I0yy, Mat &dst_I0xy, Mat &dst_I0x, + Mat &dst_I0y, Mat &I0x, Mat &I0y) { - short *I0x_ptr = I0x.ptr(); - short *I0y_ptr = I0y.ptr(); - float *I0xx_ptr = dst_I0xx.ptr(); float *I0yy_ptr = dst_I0yy.ptr(); float *I0xy_ptr = dst_I0xy.ptr(); + float *I0x_ptr = dst_I0x.ptr(); + float *I0y_ptr = dst_I0y.ptr(); float *I0xx_aux_ptr = I0xx_buf_aux.ptr(); float *I0yy_aux_ptr = I0yy_buf_aux.ptr(); float *I0xy_aux_ptr = I0xy_buf_aux.ptr(); + float *I0x_aux_ptr = I0x_buf_aux.ptr(); + float *I0y_aux_ptr = I0y_buf_aux.ptr(); - int w = I0x.cols; - int h = I0x.rows; - int psz2 = patch_size / 2; - int psz = 2 * psz2; - // width of the sparse OF fields: - int ws = 1 + (w - patch_size) / patch_stride; - - // separable box filter for computing patch sums on a sparse - // grid (determined by patch_stride) + /* Separable box filter: horizontal pass */ for (int i = 0; i < h; i++) { - float sum_xx = 0.0f, sum_yy = 0.0f, sum_xy = 0.0f; - short *x_row = I0x_ptr + i * w, *y_row = I0y_ptr + i * w; + float sum_xx = 0.0f, sum_yy = 0.0f, sum_xy = 0.0f, sum_x = 0.0f, sum_y = 0.0f; + short *x_row = I0x.ptr(i); + short *y_row = I0y.ptr(i); for (int j = 0; j < patch_size; j++) { sum_xx += x_row[j] * x_row[j]; sum_yy += y_row[j] * y_row[j]; sum_xy += x_row[j] * y_row[j]; + sum_x += x_row[j]; + sum_y += y_row[j]; } I0xx_aux_ptr[i * ws] = sum_xx; I0yy_aux_ptr[i * ws] = sum_yy; I0xy_aux_ptr[i * ws] = sum_xy; + I0x_aux_ptr[i * ws] = sum_x; + I0y_aux_ptr[i * ws] = sum_y; int js = 1; for (int j = patch_size; j < w; j++) { sum_xx += (x_row[j] * x_row[j] - x_row[j - patch_size] * x_row[j - patch_size]); sum_yy += (y_row[j] * y_row[j] - y_row[j - patch_size] * y_row[j - patch_size]); sum_xy += (x_row[j] * y_row[j] - x_row[j - patch_size] * y_row[j - patch_size]); - if ((j - psz) % patch_stride == 0) + sum_x += (x_row[j] - x_row[j - patch_size]); + sum_y += (y_row[j] - y_row[j - patch_size]); + if ((j - patch_size + 1) % patch_stride == 0) { I0xx_aux_ptr[i * ws + js] = sum_xx; I0yy_aux_ptr[i * ws + js] = sum_yy; I0xy_aux_ptr[i * ws + js] = sum_xy; + I0x_aux_ptr[i * ws + js] = sum_x; + I0y_aux_ptr[i * ws + js] = sum_y; js++; } } } - AutoBuffer sum_xx_buf(ws), sum_yy_buf(ws), sum_xy_buf(ws); + AutoBuffer sum_xx_buf(ws), sum_yy_buf(ws), sum_xy_buf(ws), sum_x_buf(ws), sum_y_buf(ws); float *sum_xx = (float *)sum_xx_buf; float *sum_yy = (float *)sum_yy_buf; float *sum_xy = (float *)sum_xy_buf; + float *sum_x = (float *)sum_x_buf; + float *sum_y = (float *)sum_y_buf; for (int j = 0; j < ws; j++) { sum_xx[j] = 0.0f; sum_yy[j] = 0.0f; sum_xy[j] = 0.0f; + sum_x[j] = 0.0f; + sum_y[j] = 0.0f; } + /* Separable box filter: vertical pass */ for (int i = 0; i < patch_size; i++) for (int j = 0; j < ws; j++) { sum_xx[j] += I0xx_aux_ptr[i * ws + j]; sum_yy[j] += I0yy_aux_ptr[i * ws + j]; sum_xy[j] += I0xy_aux_ptr[i * ws + j]; + sum_x[j] += I0x_aux_ptr[i * ws + j]; + sum_y[j] += I0y_aux_ptr[i * ws + j]; } for (int j = 0; j < ws; j++) { I0xx_ptr[j] = sum_xx[j]; I0yy_ptr[j] = sum_yy[j]; I0xy_ptr[j] = sum_xy[j]; + I0x_ptr[j] = sum_x[j]; + I0y_ptr[j] = sum_y[j]; } int is = 1; for (int i = patch_size; i < h; i++) @@ -252,166 +342,605 @@ void DISOpticalFlowImpl::precomputeStructureTensor(Mat &dst_I0xx, Mat &dst_I0yy, sum_xx[j] += (I0xx_aux_ptr[i * ws + j] - I0xx_aux_ptr[(i - patch_size) * ws + j]); sum_yy[j] += (I0yy_aux_ptr[i * ws + j] - I0yy_aux_ptr[(i - patch_size) * ws + j]); sum_xy[j] += (I0xy_aux_ptr[i * ws + j] - I0xy_aux_ptr[(i - patch_size) * ws + j]); + sum_x[j] += (I0x_aux_ptr[i * ws + j] - I0x_aux_ptr[(i - patch_size) * ws + j]); + sum_y[j] += (I0y_aux_ptr[i * ws + j] - I0y_aux_ptr[(i - patch_size) * ws + j]); } - if ((i - psz) % patch_stride == 0) + if ((i - patch_size + 1) % patch_stride == 0) { for (int j = 0; j < ws; j++) { I0xx_ptr[is * ws + j] = sum_xx[j]; I0yy_ptr[is * ws + j] = sum_yy[j]; I0xy_ptr[is * ws + j] = sum_xy[j]; + I0x_ptr[is * ws + j] = sum_x[j]; + I0y_ptr[is * ws + j] = sum_y[j]; } is++; } } } -void DISOpticalFlowImpl::patchInverseSearch(Mat &dst_Sx, Mat &dst_Sy, Mat &src_Ux, Mat &src_Uy, Mat &I0, Mat &I0x, - Mat &I0y, Mat &I1) +DISOpticalFlowImpl::PatchInverseSearch_ParBody::PatchInverseSearch_ParBody(DISOpticalFlowImpl &_dis, int _nstripes, + int _hs, Mat &dst_Sx, Mat &dst_Sy, + Mat &src_Ux, Mat &src_Uy, Mat &_I0, Mat &_I1, + Mat &_I0x, Mat &_I0y, int _num_iter) + : dis(&_dis), nstripes(_nstripes), hs(_hs), Sx(&dst_Sx), Sy(&dst_Sy), Ux(&src_Ux), Uy(&src_Uy), I0(&_I0), I1(&_I1), + I0x(&_I0x), I0y(&_I0y), num_iter(_num_iter) { - float *Ux_ptr = src_Ux.ptr(); - float *Uy_ptr = src_Uy.ptr(); - float *Sx_ptr = dst_Sx.ptr(); - float *Sy_ptr = dst_Sy.ptr(); - uchar *I0_ptr = I0.ptr(); - uchar *I1_ptr = I1.ptr(); - short *I0x_ptr = I0x.ptr(); - short *I0y_ptr = I0y.ptr(); - int w = I0.cols; - int h = I1.rows; - int psz2 = patch_size / 2; - // width and height of the sparse OF fields: - int ws = 1 + (w - patch_size) / patch_stride; - int hs = 1 + (h - patch_size) / patch_stride; - - precomputeStructureTensor(I0xx_buf, I0yy_buf, I0xy_buf, I0x, I0y); - float *xx_ptr = I0xx_buf.ptr(); - float *yy_ptr = I0yy_buf.ptr(); - float *xy_ptr = I0xy_buf.ptr(); - - // perform a fixed number of gradient descent iterations for each patch: - int i = psz2; - for (int is = 0; is < hs; is++) - { - int j = psz2; - for (int js = 0; js < ws; js++) - { - float cur_Ux = Ux_ptr[i * w + j]; - float cur_Uy = Uy_ptr[i * w + j]; - float detH = xx_ptr[is * ws + js] * yy_ptr[is * ws + js] - xy_ptr[is * ws + js] * xy_ptr[is * ws + js]; - if (abs(detH) < EPS) - detH = EPS; - float invH11 = yy_ptr[is * ws + js] / detH; - float invH12 = -xy_ptr[is * ws + js] / detH; - float invH22 = xx_ptr[is * ws + js] / detH; - float prev_sum_diff = 100000000.0f; - for (int t = 0; t < grad_descent_iter; t++) - { - float dUx = 0, dUy = 0; - float diff = 0; - float sum_diff = 0.0f; - for (int pos_y = i - psz2; pos_y <= i + psz2; pos_y++) - for (int pos_x = j - psz2; pos_x <= j + psz2; pos_x++) - { - float pos_x_shifted = min(max(pos_x + cur_Ux, 0.0f), w - 1.0f - EPS); - float pos_y_shifted = min(max(pos_y + cur_Uy, 0.0f), h - 1.0f - EPS); - int pos_x_lower = (int)pos_x_shifted; - int pos_x_upper = pos_x_lower + 1; - int pos_y_lower = (int)pos_y_shifted; - int pos_y_upper = pos_y_lower + 1; - diff = (pos_x_shifted - pos_x_lower) * (pos_y_shifted - pos_y_lower) * - I1_ptr[pos_y_upper * w + pos_x_upper] + - (pos_x_upper - pos_x_shifted) * (pos_y_shifted - pos_y_lower) * - I1_ptr[pos_y_upper * w + pos_x_lower] + - (pos_x_shifted - pos_x_lower) * (pos_y_upper - pos_y_shifted) * - I1_ptr[pos_y_lower * w + pos_x_upper] + - (pos_x_upper - pos_x_shifted) * (pos_y_upper - pos_y_shifted) * - I1_ptr[pos_y_lower * w + pos_x_lower] - - I0_ptr[pos_y * w + pos_x]; - sum_diff += diff * diff; - dUx += I0x_ptr[pos_y * w + pos_x] * diff; - dUy += I0y_ptr[pos_y * w + pos_x] * diff; - } - cur_Ux -= invH11 * dUx + invH12 * dUy; - cur_Uy -= invH12 * dUx + invH22 * dUy; - if (sum_diff > prev_sum_diff) - break; - prev_sum_diff = sum_diff; - } - if (norm(Vec2f(cur_Ux - Ux_ptr[i * w + j], cur_Uy - Uy_ptr[i * w + j])) <= patch_size) - { - Sx_ptr[is * ws + js] = cur_Ux; - Sy_ptr[is * ws + js] = cur_Uy; - } - else - { - Sx_ptr[is * ws + js] = Ux_ptr[i * w + j]; - Sy_ptr[is * ws + js] = Uy_ptr[i * w + j]; - } - j += patch_stride; - } - i += patch_stride; - } + stripe_sz = (int)ceil(hs / (double)nstripes); } -void DISOpticalFlowImpl::densification(Mat &dst_Ux, Mat &dst_Uy, Mat &src_Sx, Mat &src_Sy, Mat &I0, Mat &I1) +/////////////////////////////////////////////* Patch processing functions *///////////////////////////////////////////// + +/* Some auxiliary macros */ +#define HAL_INIT_BILINEAR_8x8_PATCH_EXTRACTION \ + v_float32x4 w00v = v_setall_f32(w00); \ + v_float32x4 w01v = v_setall_f32(w01); \ + v_float32x4 w10v = v_setall_f32(w10); \ + v_float32x4 w11v = v_setall_f32(w11); \ + \ + v_uint8x16 I0_row_16, I1_row_16, I1_row_shifted_16, I1_row_next_16, I1_row_next_shifted_16; \ + v_uint16x8 I0_row_8, I1_row_8, I1_row_shifted_8, I1_row_next_8, I1_row_next_shifted_8, tmp; \ + v_uint32x4 I0_row_4_left, I1_row_4_left, I1_row_shifted_4_left, I1_row_next_4_left, I1_row_next_shifted_4_left; \ + v_uint32x4 I0_row_4_right, I1_row_4_right, I1_row_shifted_4_right, I1_row_next_4_right, \ + I1_row_next_shifted_4_right; \ + v_float32x4 I_diff_left, I_diff_right; \ + \ + /* Preload and expand the first row of I1: */ \ + I1_row_16 = v_load(I1_ptr); \ + I1_row_shifted_16 = v_extract<1>(I1_row_16, I1_row_16); \ + v_expand(I1_row_16, I1_row_8, tmp); \ + v_expand(I1_row_shifted_16, I1_row_shifted_8, tmp); \ + v_expand(I1_row_8, I1_row_4_left, I1_row_4_right); \ + v_expand(I1_row_shifted_8, I1_row_shifted_4_left, I1_row_shifted_4_right); \ + I1_ptr += I1_stride; + +#define HAL_PROCESS_BILINEAR_8x8_PATCH_EXTRACTION \ + /* Load the next row of I1: */ \ + I1_row_next_16 = v_load(I1_ptr); \ + /* Circular shift left by 1 element: */ \ + I1_row_next_shifted_16 = v_extract<1>(I1_row_next_16, I1_row_next_16); \ + /* Expand to 8 ushorts (we only need the first 8 values): */ \ + v_expand(I1_row_next_16, I1_row_next_8, tmp); \ + v_expand(I1_row_next_shifted_16, I1_row_next_shifted_8, tmp); \ + /* Separate the left and right halves: */ \ + v_expand(I1_row_next_8, I1_row_next_4_left, I1_row_next_4_right); \ + v_expand(I1_row_next_shifted_8, I1_row_next_shifted_4_left, I1_row_next_shifted_4_right); \ + \ + /* Load current row of I0: */ \ + I0_row_16 = v_load(I0_ptr); \ + v_expand(I0_row_16, I0_row_8, tmp); \ + v_expand(I0_row_8, I0_row_4_left, I0_row_4_right); \ + \ + /* Compute diffs between I0 and bilinearly interpolated I1: */ \ + I_diff_left = w00v * v_cvt_f32(v_reinterpret_as_s32(I1_row_4_left)) + \ + w01v * v_cvt_f32(v_reinterpret_as_s32(I1_row_shifted_4_left)) + \ + w10v * v_cvt_f32(v_reinterpret_as_s32(I1_row_next_4_left)) + \ + w11v * v_cvt_f32(v_reinterpret_as_s32(I1_row_next_shifted_4_left)) - \ + v_cvt_f32(v_reinterpret_as_s32(I0_row_4_left)); \ + I_diff_right = w00v * v_cvt_f32(v_reinterpret_as_s32(I1_row_4_right)) + \ + w01v * v_cvt_f32(v_reinterpret_as_s32(I1_row_shifted_4_right)) + \ + w10v * v_cvt_f32(v_reinterpret_as_s32(I1_row_next_4_right)) + \ + w11v * v_cvt_f32(v_reinterpret_as_s32(I1_row_next_shifted_4_right)) - \ + v_cvt_f32(v_reinterpret_as_s32(I0_row_4_right)); + +#define HAL_BILINEAR_8x8_PATCH_EXTRACTION_NEXT_ROW \ + I0_ptr += I0_stride; \ + I1_ptr += I1_stride; \ + \ + I1_row_4_left = I1_row_next_4_left; \ + I1_row_4_right = I1_row_next_4_right; \ + I1_row_shifted_4_left = I1_row_next_shifted_4_left; \ + I1_row_shifted_4_right = I1_row_next_shifted_4_right; + +/* This function essentially performs one iteration of gradient descent when finding the most similar patch in I1 for a + * given one in I0. It assumes that I0_ptr and I1_ptr already point to the corresponding patches and w00, w01, w10, w11 + * are precomputed bilinear interpolation weights. It returns the SSD (sum of squared differences) between these patches + * and computes the values (dst_dUx, dst_dUy) that are used in the flow vector update. HAL acceleration is implemented + * only for the default patch size (8x8). Everything is processed in floats as using fixed-point approximations harms + * the quality significantly. + */ +inline float processPatch(float &dst_dUx, float &dst_dUy, uchar *I0_ptr, uchar *I1_ptr, short *I0x_ptr, short *I0y_ptr, + int I0_stride, int I1_stride, float w00, float w01, float w10, float w11, int patch_sz) { - float *Ux_ptr = dst_Ux.ptr(); - float *Uy_ptr = dst_Uy.ptr(); - float *Sx_ptr = src_Sx.ptr(); - float *Sy_ptr = src_Sy.ptr(); - uchar *I0_ptr = I0.ptr(); - uchar *I1_ptr = I1.ptr(); - int w = I0.cols; - int h = I0.rows; - int psz2 = patch_size / 2; - // width of the sparse OF fields: - int ws = 1 + (w - patch_size) / patch_stride; - - int start_x; - int start_y; - - start_y = psz2; - for (int i = 0; i < h; i++) + float SSD = 0.0f; +#ifdef CV_SIMD128 + if (patch_sz == 8) { - if (i - psz2 > start_y && start_y + patch_stride < h - psz2) - start_y += patch_stride; - start_x = psz2; - for (int j = 0; j < w; j++) + /* Variables to accumulate the sums */ + v_float32x4 Ux_vec = v_setall_f32(0); + v_float32x4 Uy_vec = v_setall_f32(0); + v_float32x4 SSD_vec = v_setall_f32(0); + + v_int16x8 I0x_row, I0y_row; + v_int32x4 I0x_row_4_left, I0x_row_4_right, I0y_row_4_left, I0y_row_4_right; + + HAL_INIT_BILINEAR_8x8_PATCH_EXTRACTION; + for (int row = 0; row < 8; row++) { + HAL_PROCESS_BILINEAR_8x8_PATCH_EXTRACTION; + I0x_row = v_load(I0x_ptr); + v_expand(I0x_row, I0x_row_4_left, I0x_row_4_right); + I0y_row = v_load(I0y_ptr); + v_expand(I0y_row, I0y_row_4_left, I0y_row_4_right); + + /* Update the sums: */ + Ux_vec += I_diff_left * v_cvt_f32(I0x_row_4_left) + I_diff_right * v_cvt_f32(I0x_row_4_right); + Uy_vec += I_diff_left * v_cvt_f32(I0y_row_4_left) + I_diff_right * v_cvt_f32(I0y_row_4_right); + SSD_vec += I_diff_left * I_diff_left + I_diff_right * I_diff_right; + + I0x_ptr += I0_stride; + I0y_ptr += I0_stride; + HAL_BILINEAR_8x8_PATCH_EXTRACTION_NEXT_ROW; + } + + /* Final reduce operations: */ + dst_dUx = v_reduce_sum(Ux_vec); + dst_dUy = v_reduce_sum(Uy_vec); + SSD = v_reduce_sum(SSD_vec); + } + else + { +#endif + dst_dUx = 0.0f; + dst_dUy = 0.0f; + float diff; + for (int i = 0; i < patch_sz; i++) + for (int j = 0; j < patch_sz; j++) + { + diff = w00 * I1_ptr[i * I1_stride + j] + w01 * I1_ptr[i * I1_stride + j + 1] + + w10 * I1_ptr[(i + 1) * I1_stride + j] + w11 * I1_ptr[(i + 1) * I1_stride + j + 1] - + I0_ptr[i * I0_stride + j]; + + SSD += diff * diff; + dst_dUx += diff * I0x_ptr[i * I0_stride + j]; + dst_dUy += diff * I0y_ptr[i * I0_stride + j]; + } +#ifdef CV_SIMD128 + } +#endif + return SSD; +} + +/* Same as processPatch, but with patch mean normalization, which improves robustness under changing + * lighting conditions + */ +inline float processPatchMeanNorm(float &dst_dUx, float &dst_dUy, uchar *I0_ptr, uchar *I1_ptr, short *I0x_ptr, + short *I0y_ptr, int I0_stride, int I1_stride, float w00, float w01, float w10, + float w11, int patch_sz, float x_grad_sum, float y_grad_sum) +{ + float sum_diff = 0.0, sum_diff_sq = 0.0; + float sum_I0x_mul = 0.0, sum_I0y_mul = 0.0; + float n = (float)patch_sz * patch_sz; + +#ifdef CV_SIMD128 + if (patch_sz == 8) + { + /* Variables to accumulate the sums */ + v_float32x4 sum_I0x_mul_vec = v_setall_f32(0); + v_float32x4 sum_I0y_mul_vec = v_setall_f32(0); + v_float32x4 sum_diff_vec = v_setall_f32(0); + v_float32x4 sum_diff_sq_vec = v_setall_f32(0); + + v_int16x8 I0x_row, I0y_row; + v_int32x4 I0x_row_4_left, I0x_row_4_right, I0y_row_4_left, I0y_row_4_right; + + HAL_INIT_BILINEAR_8x8_PATCH_EXTRACTION; + for (int row = 0; row < 8; row++) + { + HAL_PROCESS_BILINEAR_8x8_PATCH_EXTRACTION; + I0x_row = v_load(I0x_ptr); + v_expand(I0x_row, I0x_row_4_left, I0x_row_4_right); + I0y_row = v_load(I0y_ptr); + v_expand(I0y_row, I0y_row_4_left, I0y_row_4_right); + + /* Update the sums: */ + sum_I0x_mul_vec += I_diff_left * v_cvt_f32(I0x_row_4_left) + I_diff_right * v_cvt_f32(I0x_row_4_right); + sum_I0y_mul_vec += I_diff_left * v_cvt_f32(I0y_row_4_left) + I_diff_right * v_cvt_f32(I0y_row_4_right); + sum_diff_sq_vec += I_diff_left * I_diff_left + I_diff_right * I_diff_right; + sum_diff_vec += I_diff_left + I_diff_right; + + I0x_ptr += I0_stride; + I0y_ptr += I0_stride; + HAL_BILINEAR_8x8_PATCH_EXTRACTION_NEXT_ROW; + } + + /* Final reduce operations: */ + sum_I0x_mul = v_reduce_sum(sum_I0x_mul_vec); + sum_I0y_mul = v_reduce_sum(sum_I0y_mul_vec); + sum_diff = v_reduce_sum(sum_diff_vec); + sum_diff_sq = v_reduce_sum(sum_diff_sq_vec); + } + else + { +#endif + float diff; + for (int i = 0; i < patch_sz; i++) + for (int j = 0; j < patch_sz; j++) + { + diff = w00 * I1_ptr[i * I1_stride + j] + w01 * I1_ptr[i * I1_stride + j + 1] + + w10 * I1_ptr[(i + 1) * I1_stride + j] + w11 * I1_ptr[(i + 1) * I1_stride + j + 1] - + I0_ptr[i * I0_stride + j]; + + sum_diff += diff; + sum_diff_sq += diff * diff; + + sum_I0x_mul += diff * I0x_ptr[i * I0_stride + j]; + sum_I0y_mul += diff * I0y_ptr[i * I0_stride + j]; + } +#ifdef CV_SIMD128 + } +#endif + dst_dUx = sum_I0x_mul - sum_diff * x_grad_sum / n; + dst_dUy = sum_I0y_mul - sum_diff * y_grad_sum / n; + return sum_diff_sq - sum_diff * sum_diff / n; +} + +/* Similar to processPatch, but compute only the sum of squared differences (SSD) between the patches */ +inline float computeSSD(uchar *I0_ptr, uchar *I1_ptr, int I0_stride, int I1_stride, float w00, float w01, float w10, + float w11, int patch_sz) +{ + float SSD = 0.0f; +#ifdef CV_SIMD128 + if (patch_sz == 8) + { + v_float32x4 SSD_vec = v_setall_f32(0); + HAL_INIT_BILINEAR_8x8_PATCH_EXTRACTION; + for (int row = 0; row < 8; row++) + { + HAL_PROCESS_BILINEAR_8x8_PATCH_EXTRACTION; + SSD_vec += I_diff_left * I_diff_left + I_diff_right * I_diff_right; + HAL_BILINEAR_8x8_PATCH_EXTRACTION_NEXT_ROW; + } + SSD = v_reduce_sum(SSD_vec); + } + else + { +#endif + float diff; + for (int i = 0; i < patch_sz; i++) + for (int j = 0; j < patch_sz; j++) + { + diff = w00 * I1_ptr[i * I1_stride + j] + w01 * I1_ptr[i * I1_stride + j + 1] + + w10 * I1_ptr[(i + 1) * I1_stride + j] + w11 * I1_ptr[(i + 1) * I1_stride + j + 1] - + I0_ptr[i * I0_stride + j]; + SSD += diff * diff; + } +#ifdef CV_SIMD128 + } +#endif + return SSD; +} + +/* Same as computeSSD, but with patch mean normalization */ +inline float computeSSDMeanNorm(uchar *I0_ptr, uchar *I1_ptr, int I0_stride, int I1_stride, float w00, float w01, + float w10, float w11, int patch_sz) +{ + float sum_diff = 0.0f, sum_diff_sq = 0.0f; + float n = (float)patch_sz * patch_sz; +#ifdef CV_SIMD128 + if (patch_sz == 8) + { + v_float32x4 sum_diff_vec = v_setall_f32(0); + v_float32x4 sum_diff_sq_vec = v_setall_f32(0); + HAL_INIT_BILINEAR_8x8_PATCH_EXTRACTION; + for (int row = 0; row < 8; row++) + { + HAL_PROCESS_BILINEAR_8x8_PATCH_EXTRACTION; + sum_diff_sq_vec += I_diff_left * I_diff_left + I_diff_right * I_diff_right; + sum_diff_vec += I_diff_left + I_diff_right; + HAL_BILINEAR_8x8_PATCH_EXTRACTION_NEXT_ROW; + } + sum_diff = v_reduce_sum(sum_diff_vec); + sum_diff_sq = v_reduce_sum(sum_diff_sq_vec); + } + else + { +#endif + float diff; + for (int i = 0; i < patch_sz; i++) + for (int j = 0; j < patch_sz; j++) + { + diff = w00 * I1_ptr[i * I1_stride + j] + w01 * I1_ptr[i * I1_stride + j + 1] + + w10 * I1_ptr[(i + 1) * I1_stride + j] + w11 * I1_ptr[(i + 1) * I1_stride + j + 1] - + I0_ptr[i * I0_stride + j]; + + sum_diff += diff; + sum_diff_sq += diff * diff; + } +#ifdef CV_SIMD128 + } +#endif + return sum_diff_sq - sum_diff * sum_diff / n; +} + +#undef HAL_INIT_BILINEAR_8x8_PATCH_EXTRACTION +#undef HAL_PROCESS_BILINEAR_8x8_PATCH_EXTRACTION +#undef HAL_BILINEAR_8x8_PATCH_EXTRACTION_NEXT_ROW +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +void DISOpticalFlowImpl::PatchInverseSearch_ParBody::operator()(const Range &range) const +{ + // force separate processing of stripes if we are using spatial propagation: + if(dis->use_spatial_propagation && range.end>range.start+1) + { + for(int n=range.start;npatch_size; + int psz2 = psz / 2; + int w_ext = dis->w + 2 * dis->border_size; //!< width of I1_ext + int bsz = dis->border_size; + + /* Input dense flow */ + float *Ux_ptr = Ux->ptr(); + float *Uy_ptr = Uy->ptr(); + + /* Output sparse flow */ + float *Sx_ptr = Sx->ptr(); + float *Sy_ptr = Sy->ptr(); + + uchar *I0_ptr = I0->ptr(); + uchar *I1_ptr = I1->ptr(); + short *I0x_ptr = I0x->ptr(); + short *I0y_ptr = I0y->ptr(); + + /* Precomputed structure tensor */ + float *xx_ptr = dis->I0xx_buf.ptr(); + float *yy_ptr = dis->I0yy_buf.ptr(); + float *xy_ptr = dis->I0xy_buf.ptr(); + /* And extra buffers for mean-normalization: */ + float *x_ptr = dis->I0x_buf.ptr(); + float *y_ptr = dis->I0y_buf.ptr(); + + int i, j, dir; + int start_is, end_is, start_js, end_js; + int start_i, start_j; + float i_lower_limit = bsz - psz + 1.0f; + float i_upper_limit = bsz + dis->h - 1.0f; + float j_lower_limit = bsz - psz + 1.0f; + float j_upper_limit = bsz + dis->w - 1.0f; + float dUx, dUy, i_I1, j_I1, w00, w01, w10, w11, dx, dy; + +#define INIT_BILINEAR_WEIGHTS(Ux, Uy) \ + i_I1 = min(max(i + Uy + bsz, i_lower_limit), i_upper_limit); \ + j_I1 = min(max(j + Ux + bsz, j_lower_limit), j_upper_limit); \ + \ + w11 = (i_I1 - floor(i_I1)) * (j_I1 - floor(j_I1)); \ + w10 = (i_I1 - floor(i_I1)) * (floor(j_I1) + 1 - j_I1); \ + w01 = (floor(i_I1) + 1 - i_I1) * (j_I1 - floor(j_I1)); \ + w00 = (floor(i_I1) + 1 - i_I1) * (floor(j_I1) + 1 - j_I1); + +#define COMPUTE_SSD(dst, Ux, Uy) \ + INIT_BILINEAR_WEIGHTS(Ux, Uy); \ + if (dis->use_mean_normalization) \ + dst = computeSSDMeanNorm(I0_ptr + i * dis->w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1, dis->w, w_ext, w00, \ + w01, w10, w11, psz); \ + else \ + dst = computeSSD(I0_ptr + i * dis->w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1, dis->w, w_ext, w00, w01, \ + w10, w11, psz); + + int num_inner_iter = (int)floor(dis->grad_descent_iter / (float)num_iter); + for (int iter = 0; iter < num_iter; iter++) + { + if (iter % 2 == 0) + { + dir = 1; + start_is = min(range.start * stripe_sz, hs); + end_is = min(range.end * stripe_sz, hs); + start_js = 0; + end_js = dis->ws; + start_i = start_is * dis->patch_stride; + start_j = 0; + } + else + { + dir = -1; + start_is = min(range.end * stripe_sz, hs) - 1; + end_is = min(range.start * stripe_sz, hs) - 1; + start_js = dis->ws - 1; + end_js = -1; + start_i = start_is * dis->patch_stride; + start_j = (dis->ws - 1) * dis->patch_stride; + } + + i = start_i; + for (int is = start_is; dir * is < dir * end_is; is += dir) + { + j = start_j; + for (int js = start_js; dir * js < dir * end_js; js += dir) + { + if (iter == 0) + { + /* Using result form the previous pyramid level as the very first approximation: */ + Sx_ptr[is * dis->ws + js] = Ux_ptr[(i + psz2) * dis->w + j + psz2]; + Sy_ptr[is * dis->ws + js] = Uy_ptr[(i + psz2) * dis->w + j + psz2]; + } + + if (dis->use_spatial_propagation) + { + /* Updating the current Sx_ptr, Sy_ptr to the best candidate: */ + float min_SSD, cur_SSD; + COMPUTE_SSD(min_SSD, Sx_ptr[is * dis->ws + js], Sy_ptr[is * dis->ws + js]); + if (dir * js > dir * start_js) + { + COMPUTE_SSD(cur_SSD, Sx_ptr[is * dis->ws + js - dir], Sy_ptr[is * dis->ws + js - dir]); + if (cur_SSD < min_SSD) + { + min_SSD = cur_SSD; + Sx_ptr[is * dis->ws + js] = Sx_ptr[is * dis->ws + js - dir]; + Sy_ptr[is * dis->ws + js] = Sy_ptr[is * dis->ws + js - dir]; + } + } + /* Flow vectors won't actually propagate across different stripes, which is the reason for keeping + * the number of stripes constant. It works well enough in practice and doesn't introduce any + * visible seams. + */ + if (dir * is > dir * start_is) + { + COMPUTE_SSD(cur_SSD, Sx_ptr[(is - dir) * dis->ws + js], Sy_ptr[(is - dir) * dis->ws + js]); + if (cur_SSD < min_SSD) + { + min_SSD = cur_SSD; + Sx_ptr[is * dis->ws + js] = Sx_ptr[(is - dir) * dis->ws + js]; + Sy_ptr[is * dis->ws + js] = Sy_ptr[(is - dir) * dis->ws + js]; + } + } + } + + /* Use the best candidate as a starting point for the gradient descent: */ + float cur_Ux = Sx_ptr[is * dis->ws + js]; + float cur_Uy = Sy_ptr[is * dis->ws + js]; + + /* Computing the inverse of the structure tensor: */ + float detH = xx_ptr[is * dis->ws + js] * yy_ptr[is * dis->ws + js] - + xy_ptr[is * dis->ws + js] * xy_ptr[is * dis->ws + js]; + if (abs(detH) < EPS) + detH = EPS; + float invH11 = yy_ptr[is * dis->ws + js] / detH; + float invH12 = -xy_ptr[is * dis->ws + js] / detH; + float invH22 = xx_ptr[is * dis->ws + js] / detH; + float prev_SSD = INF, SSD; + float x_grad_sum = x_ptr[is * dis->ws + js]; + float y_grad_sum = y_ptr[is * dis->ws + js]; + + for (int t = 0; t < num_inner_iter; t++) + { + INIT_BILINEAR_WEIGHTS(cur_Ux, cur_Uy); + if (dis->use_mean_normalization) + SSD = processPatchMeanNorm(dUx, dUy, I0_ptr + i * dis->w + j, + I1_ptr + (int)i_I1 * w_ext + (int)j_I1, I0x_ptr + i * dis->w + j, + I0y_ptr + i * dis->w + j, dis->w, w_ext, w00, w01, w10, w11, psz, + x_grad_sum, y_grad_sum); + else + SSD = processPatch(dUx, dUy, I0_ptr + i * dis->w + j, I1_ptr + (int)i_I1 * w_ext + (int)j_I1, + I0x_ptr + i * dis->w + j, I0y_ptr + i * dis->w + j, dis->w, w_ext, w00, w01, + w10, w11, psz); + + dx = invH11 * dUx + invH12 * dUy; + dy = invH12 * dUx + invH22 * dUy; + cur_Ux -= dx; + cur_Uy -= dy; + + /* Break when patch distance stops decreasing */ + if (SSD >= prev_SSD) + break; + prev_SSD = SSD; + } + + /* If gradient descent converged to a flow vector that is very far from the initial approximation + * (more than patch size) then we don't use it. Noticeably improves the robustness. + */ + if (norm(Vec2f(cur_Ux - Sx_ptr[is * dis->ws + js], cur_Uy - Sy_ptr[is * dis->ws + js])) <= psz) + { + Sx_ptr[is * dis->ws + js] = cur_Ux; + Sy_ptr[is * dis->ws + js] = cur_Uy; + } + j += dir * dis->patch_stride; + } + i += dir * dis->patch_stride; + } + } +#undef INIT_BILINEAR_WEIGHTS +#undef COMPUTE_SSD +} + +DISOpticalFlowImpl::Densification_ParBody::Densification_ParBody(DISOpticalFlowImpl &_dis, int _nstripes, int _h, + Mat &dst_Ux, Mat &dst_Uy, Mat &src_Sx, Mat &src_Sy, + Mat &_I0, Mat &_I1) + : dis(&_dis), nstripes(_nstripes), h(_h), Ux(&dst_Ux), Uy(&dst_Uy), Sx(&src_Sx), Sy(&src_Sy), I0(&_I0), I1(&_I1) +{ + stripe_sz = (int)ceil(h / (double)nstripes); +} + +/* This function transforms a sparse optical flow field obtained by PatchInverseSearch (which computes flow values + * on a sparse grid defined by patch_stride) into a dense optical flow field by weighted averaging of values from the + * overlapping patches. + */ +void DISOpticalFlowImpl::Densification_ParBody::operator()(const Range &range) const +{ + int start_i = min(range.start * stripe_sz, h); + int end_i = min(range.end * stripe_sz, h); + + /* Input sparse flow */ + float *Sx_ptr = Sx->ptr(); + float *Sy_ptr = Sy->ptr(); + + /* Output dense flow */ + float *Ux_ptr = Ux->ptr(); + float *Uy_ptr = Uy->ptr(); + + uchar *I0_ptr = I0->ptr(); + uchar *I1_ptr = I1->ptr(); + + int psz = dis->patch_size; + int pstr = dis->patch_stride; + int i_l, i_u; + int j_l, j_u; + float i_m, j_m, diff; + + /* These values define the set of sparse grid locations that contain patches overlapping with the current dense flow + * location */ + int start_is, end_is; + int start_js, end_js; + +/* Some helper macros for updating this set of sparse grid locations */ +#define UPDATE_SPARSE_I_COORDINATES \ + if (i % pstr == 0 && i + psz <= h) \ + end_is++; \ + if (i - psz >= 0 && (i - psz) % pstr == 0 && start_is < end_is) \ + start_is++; + +#define UPDATE_SPARSE_J_COORDINATES \ + if (j % pstr == 0 && j + psz <= dis->w) \ + end_js++; \ + if (j - psz >= 0 && (j - psz) % pstr == 0 && start_js < end_js) \ + start_js++; + + start_is = 0; + end_is = -1; + for (int i = 0; i < start_i; i++) + { + UPDATE_SPARSE_I_COORDINATES; + } + for (int i = start_i; i < end_i; i++) + { + UPDATE_SPARSE_I_COORDINATES; + start_js = 0; + end_js = -1; + for (int j = 0; j < dis->w; j++) + { + UPDATE_SPARSE_J_COORDINATES; float coef, sum_coef = 0.0f; float sum_Ux = 0.0f; float sum_Uy = 0.0f; - if (j - psz2 > start_x && start_x + patch_stride < w - psz2) - start_x += patch_stride; - - for (int pos_y = start_y; pos_y <= min(i + psz2, h - psz2 - 1); pos_y += patch_stride) - for (int pos_x = start_x; pos_x <= min(j + psz2, w - psz2 - 1); pos_x += patch_stride) + /* Iterate through all the patches that overlap the current location (i,j) */ + for (int is = start_is; is <= end_is; is++) + for (int js = start_js; js <= end_js; js++) { - float diff; - int is = (pos_y - psz2) / patch_stride; - int js = (pos_x - psz2) / patch_stride; - float j_shifted = min(max(j + Sx_ptr[is * ws + js], 0.0f), w - 1.0f - EPS); - float i_shifted = min(max(i + Sy_ptr[is * ws + js], 0.0f), h - 1.0f - EPS); - int j_lower = (int)j_shifted; - int j_upper = j_lower + 1; - int i_lower = (int)i_shifted; - int i_upper = i_lower + 1; - diff = (j_shifted - j_lower) * (i_shifted - i_lower) * I1_ptr[i_upper * w + j_upper] + - (j_upper - j_shifted) * (i_shifted - i_lower) * I1_ptr[i_upper * w + j_lower] + - (j_shifted - j_lower) * (i_upper - i_shifted) * I1_ptr[i_lower * w + j_upper] + - (j_upper - j_shifted) * (i_upper - i_shifted) * I1_ptr[i_lower * w + j_lower] - - I0_ptr[i * w + j]; - coef = 1 / max(1.0f, diff * diff); - sum_Ux += coef * Sx_ptr[is * ws + js]; - sum_Uy += coef * Sy_ptr[is * ws + js]; + j_m = min(max(j + Sx_ptr[is * dis->ws + js], 0.0f), dis->w - 1.0f - EPS); + i_m = min(max(i + Sy_ptr[is * dis->ws + js], 0.0f), dis->h - 1.0f - EPS); + j_l = (int)j_m; + j_u = j_l + 1; + i_l = (int)i_m; + i_u = i_l + 1; + diff = (j_m - j_l) * (i_m - i_l) * I1_ptr[i_u * dis->w + j_u] + + (j_u - j_m) * (i_m - i_l) * I1_ptr[i_u * dis->w + j_l] + + (j_m - j_l) * (i_u - i_m) * I1_ptr[i_l * dis->w + j_u] + + (j_u - j_m) * (i_u - i_m) * I1_ptr[i_l * dis->w + j_l] - I0_ptr[i * dis->w + j]; + coef = 1 / max(1.0f, abs(diff)); + sum_Ux += coef * Sx_ptr[is * dis->ws + js]; + sum_Uy += coef * Sy_ptr[is * dis->ws + js]; sum_coef += coef; } - Ux_ptr[i * w + j] = sum_Ux / sum_coef; - Uy_ptr[i * w + j] = sum_Uy / sum_coef; + Ux_ptr[i * dis->w + j] = sum_Ux / sum_coef; + Uy_ptr[i * dis->w + j] = sum_Uy / sum_coef; } } +#undef UPDATE_SPARSE_I_COORDINATES +#undef UPDATE_SPARSE_J_COORDINATES } void DISOpticalFlowImpl::calc(InputArray I0, InputArray I1, InputOutputArray flow) @@ -419,12 +948,15 @@ void DISOpticalFlowImpl::calc(InputArray I0, InputArray I1, InputOutputArray flo CV_Assert(!I0.empty() && I0.depth() == CV_8U && I0.channels() == 1); CV_Assert(!I1.empty() && I1.depth() == CV_8U && I1.channels() == 1); CV_Assert(I0.sameSize(I1)); + CV_Assert(I0.isContinuous()); + CV_Assert(I1.isContinuous()); Mat I0Mat = I0.getMat(); Mat I1Mat = I1.getMat(); flow.create(I1Mat.size(), CV_32FC2); Mat &flowMat = flow.getMatRef(); - coarsest_scale = (int)(log((2 * I0Mat.cols) / (4.0 * patch_size))/log(2.0) + 0.5) - 1; + coarsest_scale = (int)(log((2 * I0Mat.cols) / (4.0 * patch_size)) / log(2.0) + 0.5) - 1; + int num_stripes = getNumThreads(); prepareBuffers(I0Mat, I1Mat); Ux[coarsest_scale].setTo(0.0f); @@ -432,9 +964,31 @@ void DISOpticalFlowImpl::calc(InputArray I0, InputArray I1, InputOutputArray flo for (int i = coarsest_scale; i >= finest_scale; i--) { - patchInverseSearch(Sx, Sy, Ux[i], Uy[i], I0s[i], I0xs[i], I0ys[i], I1s[i]); - densification(Ux[i], Uy[i], Sx, Sy, I0s[i], I1s[i]); - // TODO: variational refinement step + w = I0s[i].cols; + h = I0s[i].rows; + ws = 1 + (w - patch_size) / patch_stride; + hs = 1 + (h - patch_size) / patch_stride; + + precomputeStructureTensor(I0xx_buf, I0yy_buf, I0xy_buf, I0x_buf, I0y_buf, I0xs[i], I0ys[i]); + if (use_spatial_propagation) + { + /* Use a fixed number of stripes regardless the number of threads to make inverse search + * with spatial propagation reproducible + */ + parallel_for_(Range(0, 8), PatchInverseSearch_ParBody(*this, 8, hs, Sx, Sy, Ux[i], Uy[i], I0s[i], + I1s_ext[i], I0xs[i], I0ys[i], 2)); + } + else + { + parallel_for_(Range(0, num_stripes), + PatchInverseSearch_ParBody(*this, num_stripes, hs, Sx, Sy, Ux[i], Uy[i], I0s[i], I1s_ext[i], + I0xs[i], I0ys[i], 1)); + } + + parallel_for_(Range(0, num_stripes), + Densification_ParBody(*this, num_stripes, I0s[i].rows, Ux[i], Uy[i], Sx, Sy, I0s[i], I1s[i])); + if (variational_refinement_iter > 0) + variational_refinement_processors[i]->calcUV(I0s[i], I1s[i], Ux[i], Uy[i]); if (i > finest_scale) { @@ -454,6 +1008,7 @@ void DISOpticalFlowImpl::collectGarbage() { I0s.clear(); I1s.clear(); + I1s_ext.clear(); I0xs.clear(); I0ys.clear(); Ux.clear(); @@ -467,8 +1022,39 @@ void DISOpticalFlowImpl::collectGarbage() I0xx_buf_aux.release(); I0yy_buf_aux.release(); I0xy_buf_aux.release(); + + for (int i = finest_scale; i <= coarsest_scale; i++) + variational_refinement_processors[i]->collectGarbage(); + variational_refinement_processors.clear(); } -Ptr createOptFlow_DIS() { return makePtr(); } +Ptr createOptFlow_DIS(int preset) +{ + Ptr dis = makePtr(); + dis->setPatchSize(8); + if (preset == DISOpticalFlow::PRESET_ULTRAFAST) + { + dis->setFinestScale(2); + dis->setPatchStride(4); + dis->setGradientDescentIterations(12); + dis->setVariationalRefinementIterations(0); + } + else if (preset == DISOpticalFlow::PRESET_FAST) + { + dis->setFinestScale(2); + dis->setPatchStride(4); + dis->setGradientDescentIterations(16); + dis->setVariationalRefinementIterations(5); + } + else if (preset == DISOpticalFlow::PRESET_MEDIUM) + { + dis->setFinestScale(1); + dis->setPatchStride(3); + dis->setGradientDescentIterations(25); + dis->setVariationalRefinementIterations(5); + } + + return dis; +} } } diff --git a/modules/optflow/src/variational_refinement.cpp b/modules/optflow/src/variational_refinement.cpp new file mode 100644 index 000000000..e7eccf318 --- /dev/null +++ b/modules/optflow/src/variational_refinement.cpp @@ -0,0 +1,1189 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "opencv2/core/hal/intrin.hpp" +#include "precomp.hpp" +using namespace std; + +namespace cv +{ +namespace optflow +{ + +class VariationalRefinementImpl : public VariationalRefinement +{ + public: + VariationalRefinementImpl(); + + void calc(InputArray I0, InputArray I1, InputOutputArray flow); + void calcUV(InputArray I0, InputArray I1, InputOutputArray flow_u, InputOutputArray flow_v); + void collectGarbage(); + + protected: //!< algorithm parameters + int fixedPointIterations, sorIterations; + float omega; + float alpha, delta, gamma; + float zeta, epsilon; + + public: + int getFixedPointIterations() const { return fixedPointIterations; } + void setFixedPointIterations(int val) { fixedPointIterations = val; } + int getSorIterations() const { return sorIterations; } + void setSorIterations(int val) { sorIterations = val; } + float getOmega() const { return omega; } + void setOmega(float val) { omega = val; } + float getAlpha() const { return alpha; } + void setAlpha(float val) { alpha = val; } + float getDelta() const { return delta; } + void setDelta(float val) { delta = val; } + float getGamma() const { return gamma; } + void setGamma(float val) { gamma = val; } + + protected: //!< internal buffers + /* This struct defines a special data layout for Mat_. Original buffer is split into two: one for "red" + * elements (sum of indices is even) and one for "black" (sum of indices is odd) in a checkerboard pattern. It + * allows for more efficient processing in SOR iterations, more natural SIMD vectorization and parallelization + * (Red-Black SOR). Additionally, it simplifies border handling by adding repeated borders to both red and + * black buffers. + */ + struct RedBlackBuffer + { + Mat_ red; //!< (i+j)%2==0 + Mat_ black; //!< (i+j)%2==1 + + /* Width of even and odd rows may be different */ + int red_even_len, red_odd_len; + int black_even_len, black_odd_len; + + void create(Size s); + void release(); + }; + + Mat_ Ix, Iy, Iz, Ixx, Ixy, Iyy, Ixz, Iyz; //!< image derivative buffers + RedBlackBuffer Ix_rb, Iy_rb, Iz_rb, Ixx_rb, Ixy_rb, Iyy_rb, Ixz_rb, Iyz_rb; //!< corresponding red-black buffers + + RedBlackBuffer A11, A12, A22, b1, b2; //!< main linear system coefficients + RedBlackBuffer weights; //!< smoothness term weights in the current fixed point iteration + + Mat_ mapX, mapY; //!< auxiliary buffers for remapping + + RedBlackBuffer tempW_u, tempW_v; //!< flow buffers that are modified in each fixed point iteration + RedBlackBuffer dW_u, dW_v; //!< optical flow increment + RedBlackBuffer W_u_rb, W_v_rb; //!< red-black-buffer version of the input flow + + private: //!< private methods and parallel sections + void splitCheckerboard(RedBlackBuffer &dst, Mat &src); + void mergeCheckerboard(Mat &dst, RedBlackBuffer &src); + void updateRepeatedBorders(RedBlackBuffer &dst); + void warpImage(Mat &dst, Mat &src, Mat &flow_u, Mat &flow_v); + void prepareBuffers(Mat &I0, Mat &I1, Mat &W_u, Mat &W_v); + + /* Parallelizing arbitrary operations with 3 input/output arguments */ + typedef void (VariationalRefinementImpl::*Op)(void *op1, void *op2, void *op3); + struct ParallelOp_ParBody : public ParallelLoopBody + { + VariationalRefinementImpl *var; + vector ops; + vector op1s; + vector op2s; + vector op3s; + + ParallelOp_ParBody(VariationalRefinementImpl &_var, vector _ops, vector &_op1s, + vector &_op2s, vector &_op3s); + void operator()(const Range &range) const; + }; + void gradHorizAndSplitOp(void *src, void *dst, void *dst_split) + { + Sobel(*(Mat *)src, *(Mat *)dst, -1, 1, 0, 1, 1, 0.00, BORDER_REPLICATE); + splitCheckerboard(*(RedBlackBuffer *)dst_split, *(Mat *)dst); + } + void gradVertAndSplitOp(void *src, void *dst, void *dst_split) + { + Sobel(*(Mat *)src, *(Mat *)dst, -1, 0, 1, 1, 1, 0.00, BORDER_REPLICATE); + splitCheckerboard(*(RedBlackBuffer *)dst_split, *(Mat *)dst); + } + void averageOp(void *src1, void *src2, void *dst) + { + addWeighted(*(Mat *)src1, 0.5, *(Mat *)src2, 0.5, 0.0, *(Mat *)dst, CV_32F); + } + void subtractOp(void *src1, void *src2, void *dst) + { + subtract(*(Mat *)src1, *(Mat *)src2, *(Mat *)dst, noArray(), CV_32F); + } + + struct ComputeDataTerm_ParBody : public ParallelLoopBody + { + VariationalRefinementImpl *var; + int nstripes, stripe_sz; + int h; + RedBlackBuffer *dW_u, *dW_v; + bool red_pass; + + ComputeDataTerm_ParBody(VariationalRefinementImpl &_var, int _nstripes, int _h, RedBlackBuffer &_dW_u, + RedBlackBuffer &_dW_v, bool _red_pass); + void operator()(const Range &range) const; + }; + + struct ComputeSmoothnessTermHorPass_ParBody : public ParallelLoopBody + { + VariationalRefinementImpl *var; + int nstripes, stripe_sz; + int h; + RedBlackBuffer *W_u, *W_v, *curW_u, *curW_v; + bool red_pass; + + ComputeSmoothnessTermHorPass_ParBody(VariationalRefinementImpl &_var, int _nstripes, int _h, + RedBlackBuffer &_W_u, RedBlackBuffer &_W_v, RedBlackBuffer &_tempW_u, + RedBlackBuffer &_tempW_v, bool _red_pass); + void operator()(const Range &range) const; + }; + + struct ComputeSmoothnessTermVertPass_ParBody : public ParallelLoopBody + { + VariationalRefinementImpl *var; + int nstripes, stripe_sz; + int h; + RedBlackBuffer *W_u, *W_v; + bool red_pass; + + ComputeSmoothnessTermVertPass_ParBody(VariationalRefinementImpl &_var, int _nstripes, int _h, + RedBlackBuffer &W_u, RedBlackBuffer &_W_v, bool _red_pass); + void operator()(const Range &range) const; + }; + + struct RedBlackSOR_ParBody : public ParallelLoopBody + { + VariationalRefinementImpl *var; + int nstripes, stripe_sz; + int h; + RedBlackBuffer *dW_u, *dW_v; + bool red_pass; + + RedBlackSOR_ParBody(VariationalRefinementImpl &_var, int _nstripes, int _h, RedBlackBuffer &_dW_u, + RedBlackBuffer &_dW_v, bool _red_pass); + void operator()(const Range &range) const; + }; +}; + +VariationalRefinementImpl::VariationalRefinementImpl() +{ + fixedPointIterations = 5; + sorIterations = 5; + alpha = 20.0f; + delta = 5.0f; + gamma = 10.0f; + omega = 1.6f; + zeta = 0.1f; + epsilon = 0.001f; +} + +/* This function converts an input Mat into the RedBlackBuffer format, which involves + * splitting the input buffer into two and adding repeated borders. Assumes that enough + * memory in dst is already allocated. + */ +void VariationalRefinementImpl::splitCheckerboard(RedBlackBuffer &dst, Mat &src) +{ + int buf_j, j; + int buf_w = (int)ceil(src.cols / 2.0) + 2; //!< max width of red/black buffers with borders + + /* Rows of red and black buffers can have different actual width, some extra repeated values are + * added for padding in such cases. + */ + for (int i = 0; i < src.rows; i++) + { + float *src_buf = src.ptr(i); + float *r_buf = dst.red.ptr(i + 1); + float *b_buf = dst.black.ptr(i + 1); + r_buf[0] = b_buf[0] = src_buf[0]; + buf_j = 1; + if (i % 2 == 0) + { + for (j = 0; j < src.cols - 1; j += 2) + { + r_buf[buf_j] = src_buf[j]; + b_buf[buf_j] = src_buf[j + 1]; + buf_j++; + } + if (j < src.cols) + r_buf[buf_j] = b_buf[buf_j] = src_buf[j]; + else + j--; + } + else + { + for (j = 0; j < src.cols - 1; j += 2) + { + b_buf[buf_j] = src_buf[j]; + r_buf[buf_j] = src_buf[j + 1]; + buf_j++; + } + if (j < src.cols) + r_buf[buf_j] = b_buf[buf_j] = src_buf[j]; + else + j--; + } + r_buf[buf_w - 1] = b_buf[buf_w - 1] = src_buf[j]; + } + + /* Fill top and bottom borders: */ + { + float *r_buf_border = dst.red.ptr(dst.red.rows - 1); + float *b_buf_border = dst.black.ptr(dst.black.rows - 1); + float *r_buf = dst.red.ptr(dst.red.rows - 2); + float *b_buf = dst.black.ptr(dst.black.rows - 2); + memcpy(r_buf_border, b_buf, buf_w * sizeof(float)); + memcpy(b_buf_border, r_buf, buf_w * sizeof(float)); + } + { + float *r_buf_border = dst.red.ptr(0); + float *b_buf_border = dst.black.ptr(0); + float *r_buf = dst.red.ptr(1); + float *b_buf = dst.black.ptr(1); + memcpy(r_buf_border, b_buf, buf_w * sizeof(float)); + memcpy(b_buf_border, r_buf, buf_w * sizeof(float)); + } +} + +/* The inverse of splitCheckerboard, i.e. converting the RedBlackBuffer back into Mat. + * Assumes that enough memory in dst is already allocated. + */ +void VariationalRefinementImpl::mergeCheckerboard(Mat &dst, RedBlackBuffer &src) +{ + int buf_j, j; + for (int i = 0; i < dst.rows; i++) + { + float *src_r_buf = src.red.ptr(i + 1); + float *src_b_buf = src.black.ptr(i + 1); + float *dst_buf = dst.ptr(i); + buf_j = 1; + + if (i % 2 == 0) + { + for (j = 0; j < dst.cols - 1; j += 2) + { + dst_buf[j] = src_r_buf[buf_j]; + dst_buf[j + 1] = src_b_buf[buf_j]; + buf_j++; + } + if (j < dst.cols) + dst_buf[j] = src_r_buf[buf_j]; + } + else + { + for (j = 0; j < dst.cols - 1; j += 2) + { + dst_buf[j] = src_b_buf[buf_j]; + dst_buf[j + 1] = src_r_buf[buf_j]; + buf_j++; + } + if (j < dst.cols) + dst_buf[j] = src_b_buf[buf_j]; + } + } +} + +/* An auxiliary function that updates the borders. Used to enforce that border values repeat + * the ones adjacent to the border. + */ +void VariationalRefinementImpl::updateRepeatedBorders(RedBlackBuffer &dst) +{ + int buf_w = dst.red.cols; + for (int i = 0; i < dst.red.rows - 2; i++) + { + float *r_buf = dst.red.ptr(i + 1); + float *b_buf = dst.black.ptr(i + 1); + + if (i % 2 == 0) + { + b_buf[0] = r_buf[1]; + if (dst.red_even_len > dst.black_even_len) + b_buf[dst.black_even_len + 1] = r_buf[dst.red_even_len]; + else + r_buf[dst.red_even_len + 1] = b_buf[dst.black_even_len]; + } + else + { + r_buf[0] = b_buf[1]; + if (dst.red_odd_len < dst.black_odd_len) + r_buf[dst.red_odd_len + 1] = b_buf[dst.black_odd_len]; + else + b_buf[dst.black_odd_len + 1] = r_buf[dst.red_odd_len]; + } + } + { + float *r_buf_border = dst.red.ptr(dst.red.rows - 1); + float *b_buf_border = dst.black.ptr(dst.black.rows - 1); + float *r_buf = dst.red.ptr(dst.red.rows - 2); + float *b_buf = dst.black.ptr(dst.black.rows - 2); + memcpy(r_buf_border, b_buf, buf_w * sizeof(float)); + memcpy(b_buf_border, r_buf, buf_w * sizeof(float)); + } + { + float *r_buf_border = dst.red.ptr(0); + float *b_buf_border = dst.black.ptr(0); + float *r_buf = dst.red.ptr(1); + float *b_buf = dst.black.ptr(1); + memcpy(r_buf_border, b_buf, buf_w * sizeof(float)); + memcpy(b_buf_border, r_buf, buf_w * sizeof(float)); + } +} + +void VariationalRefinementImpl::RedBlackBuffer::create(Size s) +{ + /* Allocate enough memory to include borders */ + int w = (int)ceil(s.width / 2.0) + 2; + red.create(s.height + 2, w); + black.create(s.height + 2, w); + + if (s.width % 2 == 0) + red_even_len = red_odd_len = black_even_len = black_odd_len = w - 2; + else + { + red_even_len = black_odd_len = w - 2; + red_odd_len = black_even_len = w - 3; + } +} + +void VariationalRefinementImpl::RedBlackBuffer::release() +{ + red.release(); + black.release(); +} + +VariationalRefinementImpl::ParallelOp_ParBody::ParallelOp_ParBody(VariationalRefinementImpl &_var, vector _ops, + vector &_op1s, vector &_op2s, + vector &_op3s) + : var(&_var), ops(_ops), op1s(_op1s), op2s(_op2s), op3s(_op3s) +{ +} + +void VariationalRefinementImpl::ParallelOp_ParBody::operator()(const Range &range) const +{ + for (int i = range.start; i < range.end; i++) + (var->*ops[i])(op1s[i], op2s[i], op3s[i]); +} + +void VariationalRefinementImpl::warpImage(Mat &dst, Mat &src, Mat &flow_u, Mat &flow_v) +{ + for (int i = 0; i < flow_u.rows; i++) + { + float *pFlowU = flow_u.ptr(i); + float *pFlowV = flow_v.ptr(i); + float *pMapX = mapX.ptr(i); + float *pMapY = mapY.ptr(i); + for (int j = 0; j < flow_u.cols; j++) + { + pMapX[j] = j + pFlowU[j]; + pMapY[j] = i + pFlowV[j]; + } + } + remap(src, dst, mapX, mapY, INTER_LINEAR, BORDER_REPLICATE); +} + +void VariationalRefinementImpl::prepareBuffers(Mat &I0, Mat &I1, Mat &W_u, Mat &W_v) +{ + Size s = I0.size(); + A11.create(s); + A12.create(s); + A22.create(s); + b1.create(s); + b2.create(s); + weights.create(s); + weights.red.setTo(0.0f); + weights.black.setTo(0.0f); + tempW_u.create(s); + tempW_v.create(s); + dW_u.create(s); + dW_v.create(s); + W_u_rb.create(s); + W_v_rb.create(s); + + Ix.create(s); + Iy.create(s); + Iz.create(s); + Ixx.create(s); + Ixy.create(s); + Iyy.create(s); + Ixz.create(s); + Iyz.create(s); + + Ix_rb.create(s); + Iy_rb.create(s); + Iz_rb.create(s); + Ixx_rb.create(s); + Ixy_rb.create(s); + Iyy_rb.create(s); + Ixz_rb.create(s); + Iyz_rb.create(s); + + mapX.create(s); + mapY.create(s); + + /* Floating point warps work significantly better than fixed-point */ + Mat I1flt, warpedI; + I1.convertTo(I1flt, CV_32F); + warpImage(warpedI, I1flt, W_u, W_v); + + /* Computing an average of the current and warped next frames (to compute the derivatives on) and + * temporal derivative Iz + */ + Mat averagedI; + { + vector op1s; + op1s.push_back((void *)&I0); + op1s.push_back((void *)&warpedI); + vector op2s; + op2s.push_back((void *)&warpedI); + op2s.push_back((void *)&I0); + vector op3s; + op3s.push_back((void *)&averagedI); + op3s.push_back((void *)&Iz); + vector ops; + ops.push_back(&VariationalRefinementImpl::averageOp); + ops.push_back(&VariationalRefinementImpl::subtractOp); + parallel_for_(Range(0, 2), ParallelOp_ParBody(*this, ops, op1s, op2s, op3s)); + } + splitCheckerboard(Iz_rb, Iz); + + /* Computing first-order derivatives */ + { + vector op1s; + op1s.push_back((void *)&averagedI); + op1s.push_back((void *)&averagedI); + op1s.push_back((void *)&Iz); + op1s.push_back((void *)&Iz); + vector op2s; + op2s.push_back((void *)&Ix); + op2s.push_back((void *)&Iy); + op2s.push_back((void *)&Ixz); + op2s.push_back((void *)&Iyz); + vector op3s; + op3s.push_back((void *)&Ix_rb); + op3s.push_back((void *)&Iy_rb); + op3s.push_back((void *)&Ixz_rb); + op3s.push_back((void *)&Iyz_rb); + vector ops; + ops.push_back(&VariationalRefinementImpl::gradHorizAndSplitOp); + ops.push_back(&VariationalRefinementImpl::gradVertAndSplitOp); + ops.push_back(&VariationalRefinementImpl::gradHorizAndSplitOp); + ops.push_back(&VariationalRefinementImpl::gradVertAndSplitOp); + parallel_for_(Range(0, 4), ParallelOp_ParBody(*this, ops, op1s, op2s, op3s)); + } + + /* Computing second-order derivatives */ + { + vector op1s; + op1s.push_back((void *)&Ix); + op1s.push_back((void *)&Ix); + op1s.push_back((void *)&Iy); + vector op2s; + op2s.push_back((void *)&Ixx); + op2s.push_back((void *)&Ixy); + op2s.push_back((void *)&Iyy); + vector op3s; + op3s.push_back((void *)&Ixx_rb); + op3s.push_back((void *)&Ixy_rb); + op3s.push_back((void *)&Iyy_rb); + vector ops; + ops.push_back(&VariationalRefinementImpl::gradHorizAndSplitOp); + ops.push_back(&VariationalRefinementImpl::gradVertAndSplitOp); + ops.push_back(&VariationalRefinementImpl::gradVertAndSplitOp); + parallel_for_(Range(0, 3), ParallelOp_ParBody(*this, ops, op1s, op2s, op3s)); + } +} + +VariationalRefinementImpl::ComputeDataTerm_ParBody::ComputeDataTerm_ParBody(VariationalRefinementImpl &_var, + int _nstripes, int _h, + RedBlackBuffer &_dW_u, + RedBlackBuffer &_dW_v, bool _red_pass) + : var(&_var), nstripes(_nstripes), h(_h), dW_u(&_dW_u), dW_v(&_dW_v), red_pass(_red_pass) +{ + stripe_sz = (int)ceil(h / (double)nstripes); +} + +/* This function computes parts of the main linear system coefficients A11,A12,A22,b1,b1 + * that correspond to the data term, which includes color and gradient constancy assumptions. + */ +void VariationalRefinementImpl::ComputeDataTerm_ParBody::operator()(const Range &range) const +{ + int start_i = min(range.start * stripe_sz, h); + int end_i = min(range.end * stripe_sz, h); + + float zeta_squared = var->zeta * var->zeta; + float epsilon_squared = var->epsilon * var->epsilon; + float gamma2 = var->gamma / 2; + float delta2 = var->delta / 2; + + float *pIx, *pIy, *pIz; + float *pIxx, *pIxy, *pIyy, *pIxz, *pIyz; + float *pdU, *pdV; + float *pa11, *pa12, *pa22, *pb1, *pb2; + + float derivNorm, derivNorm2; + float Ik1z, Ik1zx, Ik1zy; + float weight; + int len; + for (int i = start_i; i < end_i; i++) + { +#define INIT_ROW_POINTERS(color) \ + pIx = var->Ix_rb.color.ptr(i + 1) + 1; \ + pIy = var->Iy_rb.color.ptr(i + 1) + 1; \ + pIz = var->Iz_rb.color.ptr(i + 1) + 1; \ + pIxx = var->Ixx_rb.color.ptr(i + 1) + 1; \ + pIxy = var->Ixy_rb.color.ptr(i + 1) + 1; \ + pIyy = var->Iyy_rb.color.ptr(i + 1) + 1; \ + pIxz = var->Ixz_rb.color.ptr(i + 1) + 1; \ + pIyz = var->Iyz_rb.color.ptr(i + 1) + 1; \ + pa11 = var->A11.color.ptr(i + 1) + 1; \ + pa12 = var->A12.color.ptr(i + 1) + 1; \ + pa22 = var->A22.color.ptr(i + 1) + 1; \ + pb1 = var->b1.color.ptr(i + 1) + 1; \ + pb2 = var->b2.color.ptr(i + 1) + 1; \ + pdU = dW_u->color.ptr(i + 1) + 1; \ + pdV = dW_v->color.ptr(i + 1) + 1; \ + if (i % 2 == 0) \ + len = var->Ix_rb.color##_even_len; \ + else \ + len = var->Ix_rb.color##_odd_len; + + if (red_pass) + { + INIT_ROW_POINTERS(red); + } + else + { + INIT_ROW_POINTERS(black); + } +#undef INIT_ROW_POINTERS + + int j = 0; +#ifdef CV_SIMD128 + v_float32x4 zeta_vec = v_setall_f32(zeta_squared); + v_float32x4 eps_vec = v_setall_f32(epsilon_squared); + v_float32x4 delta_vec = v_setall_f32(delta2); + v_float32x4 gamma_vec = v_setall_f32(gamma2); + v_float32x4 zero_vec = v_setall_f32(0.0f); + v_float32x4 pIx_vec, pIy_vec, pIz_vec, pdU_vec, pdV_vec; + v_float32x4 pIxx_vec, pIxy_vec, pIyy_vec, pIxz_vec, pIyz_vec; + v_float32x4 derivNorm_vec, derivNorm2_vec, weight_vec; + v_float32x4 Ik1z_vec, Ik1zx_vec, Ik1zy_vec; + v_float32x4 pa11_vec, pa12_vec, pa22_vec, pb1_vec, pb2_vec; + + for (; j < len - 3; j += 4) + { + pIx_vec = v_load(pIx + j); + pIy_vec = v_load(pIy + j); + pIz_vec = v_load(pIz + j); + pdU_vec = v_load(pdU + j); + pdV_vec = v_load(pdV + j); + + derivNorm_vec = pIx_vec * pIx_vec + pIy_vec * pIy_vec + zeta_vec; + Ik1z_vec = pIz_vec + pIx_vec * pdU_vec + pIy_vec * pdV_vec; + weight_vec = (delta_vec / v_sqrt(Ik1z_vec * Ik1z_vec / derivNorm_vec + eps_vec)) / derivNorm_vec; + + pa11_vec = weight_vec * (pIx_vec * pIx_vec) + zeta_vec; + pa12_vec = weight_vec * (pIx_vec * pIy_vec); + pa22_vec = weight_vec * (pIy_vec * pIy_vec) + zeta_vec; + pb1_vec = zero_vec - weight_vec * (pIz_vec * pIx_vec); + pb2_vec = zero_vec - weight_vec * (pIz_vec * pIy_vec); + + pIxx_vec = v_load(pIxx + j); + pIxy_vec = v_load(pIxy + j); + pIyy_vec = v_load(pIyy + j); + pIxz_vec = v_load(pIxz + j); + pIyz_vec = v_load(pIyz + j); + + derivNorm_vec = pIxx_vec * pIxx_vec + pIxy_vec * pIxy_vec + zeta_vec; + derivNorm2_vec = pIyy_vec * pIyy_vec + pIxy_vec * pIxy_vec + zeta_vec; + Ik1zx_vec = pIxz_vec + pIxx_vec * pdU_vec + pIxy_vec * pdV_vec; + Ik1zy_vec = pIyz_vec + pIxy_vec * pdU_vec + pIyy_vec * pdV_vec; + weight_vec = gamma_vec / v_sqrt(Ik1zx_vec * Ik1zx_vec / derivNorm_vec + + Ik1zy_vec * Ik1zy_vec / derivNorm2_vec + eps_vec); + + pa11_vec += weight_vec * (pIxx_vec * pIxx_vec / derivNorm_vec + pIxy_vec * pIxy_vec / derivNorm2_vec); + pa12_vec += weight_vec * (pIxx_vec * pIxy_vec / derivNorm_vec + pIxy_vec * pIyy_vec / derivNorm2_vec); + pa22_vec += weight_vec * (pIxy_vec * pIxy_vec / derivNorm_vec + pIyy_vec * pIyy_vec / derivNorm2_vec); + pb1_vec -= weight_vec * (pIxx_vec * pIxz_vec / derivNorm_vec + pIxy_vec * pIyz_vec / derivNorm2_vec); + pb2_vec -= weight_vec * (pIxy_vec * pIxz_vec / derivNorm_vec + pIyy_vec * pIyz_vec / derivNorm2_vec); + + v_store(pa11 + j, pa11_vec); + v_store(pa12 + j, pa12_vec); + v_store(pa22 + j, pa22_vec); + v_store(pb1 + j, pb1_vec); + v_store(pb2 + j, pb2_vec); + } +#endif + for (; j < len; j++) + { + /* Step 1: Compute color constancy terms */ + /* Normalization factor:*/ + derivNorm = pIx[j] * pIx[j] + pIy[j] * pIy[j] + zeta_squared; + /* Color constancy penalty (computed by Taylor expansion):*/ + Ik1z = pIz[j] + pIx[j] * pdU[j] + pIy[j] * pdV[j]; + /* Weight of the color constancy term in the current fixed-point iteration divided by derivNorm: */ + weight = (delta2 / sqrt(Ik1z * Ik1z / derivNorm + epsilon_squared)) / derivNorm; + /* Add respective color constancy terms to the linear system coefficients: */ + pa11[j] = weight * (pIx[j] * pIx[j]) + zeta_squared; + pa12[j] = weight * (pIx[j] * pIy[j]); + pa22[j] = weight * (pIy[j] * pIy[j]) + zeta_squared; + pb1[j] = -weight * (pIz[j] * pIx[j]); + pb2[j] = -weight * (pIz[j] * pIy[j]); + + /* Step 2: Compute gradient constancy terms */ + /* Normalization factor for x gradient: */ + derivNorm = pIxx[j] * pIxx[j] + pIxy[j] * pIxy[j] + zeta_squared; + /* Normalization factor for y gradient: */ + derivNorm2 = pIyy[j] * pIyy[j] + pIxy[j] * pIxy[j] + zeta_squared; + /* Gradient constancy penalties (computed by Taylor expansion): */ + Ik1zx = pIxz[j] + pIxx[j] * pdU[j] + pIxy[j] * pdV[j]; + Ik1zy = pIyz[j] + pIxy[j] * pdU[j] + pIyy[j] * pdV[j]; + /* Weight of the gradient constancy term in the current fixed-point iteration: */ + weight = gamma2 / sqrt(Ik1zx * Ik1zx / derivNorm + Ik1zy * Ik1zy / derivNorm2 + epsilon_squared); + /* Add respective gradient constancy components to the linear system coefficients: */ + pa11[j] += weight * (pIxx[j] * pIxx[j] / derivNorm + pIxy[j] * pIxy[j] / derivNorm2); + pa12[j] += weight * (pIxx[j] * pIxy[j] / derivNorm + pIxy[j] * pIyy[j] / derivNorm2); + pa22[j] += weight * (pIxy[j] * pIxy[j] / derivNorm + pIyy[j] * pIyy[j] / derivNorm2); + pb1[j] += -weight * (pIxx[j] * pIxz[j] / derivNorm + pIxy[j] * pIyz[j] / derivNorm2); + pb2[j] += -weight * (pIxy[j] * pIxz[j] / derivNorm + pIyy[j] * pIyz[j] / derivNorm2); + } + } +} + +VariationalRefinementImpl::ComputeSmoothnessTermHorPass_ParBody::ComputeSmoothnessTermHorPass_ParBody( + VariationalRefinementImpl &_var, int _nstripes, int _h, RedBlackBuffer &_W_u, RedBlackBuffer &_W_v, + RedBlackBuffer &_tempW_u, RedBlackBuffer &_tempW_v, bool _red_pass) + : var(&_var), nstripes(_nstripes), h(_h), W_u(&_W_u), W_v(&_W_v), curW_u(&_tempW_u), curW_v(&_tempW_v), + red_pass(_red_pass) +{ + stripe_sz = (int)ceil(h / (double)nstripes); +} + +/* This function updates the linear system coefficients A11,A22,b1,b1 according to the + * flow smoothness term and computes corresponding weights for the current fixed point iteration. + * A11,A22,b1,b1 are updated only partially (horizontal pass). Doing both horizontal and vertical + * passes in one loop complicates parallelization (different threads write to the same elements). + */ +void VariationalRefinementImpl::ComputeSmoothnessTermHorPass_ParBody::operator()(const Range &range) const +{ + int start_i = min(range.start * stripe_sz, h); + int end_i = min(range.end * stripe_sz, h); + + float epsilon_squared = var->epsilon * var->epsilon; + float alpha2 = var->alpha / 2; + float *pWeight; + float *pA_u, *pA_u_next, *pA_v, *pA_v_next; + float *pB_u, *pB_u_next, *pB_v, *pB_v_next; + float *cW_u, *cW_u_next, *cW_u_next_row; + float *cW_v, *cW_v_next, *cW_v_next_row; + float *pW_u, *pW_u_next; + float *pW_v, *pW_v_next; + float ux, uy, vx, vy; + int len; + bool touches_right_border = true; + +#define INIT_ROW_POINTERS(cur_color, next_color, next_offs_even, next_offs_odd, bool_default) \ + pWeight = var->weights.cur_color.ptr(i + 1) + 1; \ + pA_u = var->A11.cur_color.ptr(i + 1) + 1; \ + pB_u = var->b1.cur_color.ptr(i + 1) + 1; \ + cW_u = curW_u->cur_color.ptr(i + 1) + 1; \ + pW_u = W_u->cur_color.ptr(i + 1) + 1; \ + pA_v = var->A22.cur_color.ptr(i + 1) + 1; \ + pB_v = var->b2.cur_color.ptr(i + 1) + 1; \ + cW_v = curW_v->cur_color.ptr(i + 1) + 1; \ + pW_v = W_v->cur_color.ptr(i + 1) + 1; \ + \ + cW_u_next_row = curW_u->next_color.ptr(i + 2) + 1; \ + cW_v_next_row = curW_v->next_color.ptr(i + 2) + 1; \ + \ + if (i % 2 == 0) \ + { \ + pA_u_next = var->A11.next_color.ptr(i + 1) + next_offs_even; \ + pB_u_next = var->b1.next_color.ptr(i + 1) + next_offs_even; \ + cW_u_next = curW_u->next_color.ptr(i + 1) + next_offs_even; \ + pW_u_next = W_u->next_color.ptr(i + 1) + next_offs_even; \ + pA_v_next = var->A22.next_color.ptr(i + 1) + next_offs_even; \ + pB_v_next = var->b2.next_color.ptr(i + 1) + next_offs_even; \ + cW_v_next = curW_v->next_color.ptr(i + 1) + next_offs_even; \ + pW_v_next = W_v->next_color.ptr(i + 1) + next_offs_even; \ + len = var->A11.cur_color##_even_len; \ + if (var->A11.cur_color##_even_len != var->A11.cur_color##_odd_len) \ + touches_right_border = bool_default; \ + else \ + touches_right_border = !bool_default; \ + } \ + else \ + { \ + pA_u_next = var->A11.next_color.ptr(i + 1) + next_offs_odd; \ + pB_u_next = var->b1.next_color.ptr(i + 1) + next_offs_odd; \ + cW_u_next = curW_u->next_color.ptr(i + 1) + next_offs_odd; \ + pW_u_next = W_u->next_color.ptr(i + 1) + next_offs_odd; \ + pA_v_next = var->A22.next_color.ptr(i + 1) + next_offs_odd; \ + pB_v_next = var->b2.next_color.ptr(i + 1) + next_offs_odd; \ + cW_v_next = curW_v->next_color.ptr(i + 1) + next_offs_odd; \ + pW_v_next = W_v->next_color.ptr(i + 1) + next_offs_odd; \ + len = var->A11.cur_color##_odd_len; \ + if (var->A11.cur_color##_even_len != var->A11.cur_color##_odd_len) \ + touches_right_border = !bool_default; \ + else \ + touches_right_border = bool_default; \ + } + + for (int i = start_i; i < end_i; i++) + { + if (red_pass) + { + INIT_ROW_POINTERS(red, black, 1, 2, true); + } + else + { + INIT_ROW_POINTERS(black, red, 2, 1, false); + } +#undef INIT_ROW_POINTERS + +#define COMPUTE \ + /* Gradients for the flow on the current fixed-point iteration: */ \ + ux = cW_u_next[j] - cW_u[j]; \ + vx = cW_v_next[j] - cW_v[j]; \ + uy = cW_u_next_row[j] - cW_u[j]; \ + vy = cW_v_next_row[j] - cW_v[j]; \ + /* Weight of the smoothness term in the current fixed-point iteration: */ \ + pWeight[j] = alpha2 / sqrt(ux * ux + vx * vx + uy * uy + vy * vy + epsilon_squared); \ + /* Gradients for initial raw flow multiplied by weight:*/ \ + ux = pWeight[j] * (pW_u_next[j] - pW_u[j]); \ + vx = pWeight[j] * (pW_v_next[j] - pW_v[j]); + +#define UPDATE \ + pB_u[j] += ux; \ + pA_u[j] += pWeight[j]; \ + pB_v[j] += vx; \ + pA_v[j] += pWeight[j]; \ + pB_u_next[j] -= ux; \ + pA_u_next[j] += pWeight[j]; \ + pB_v_next[j] -= vx; \ + pA_v_next[j] += pWeight[j]; + + int j = 0; +#ifdef CV_SIMD128 + v_float32x4 alpha2_vec = v_setall_f32(alpha2); + v_float32x4 eps_vec = v_setall_f32(epsilon_squared); + v_float32x4 cW_u_vec, cW_v_vec; + v_float32x4 pWeight_vec, ux_vec, vx_vec, uy_vec, vy_vec; + + for (; j < len - 4; j += 4) + { + cW_u_vec = v_load(cW_u + j); + cW_v_vec = v_load(cW_v + j); + + ux_vec = v_load(cW_u_next + j) - cW_u_vec; + vx_vec = v_load(cW_v_next + j) - cW_v_vec; + uy_vec = v_load(cW_u_next_row + j) - cW_u_vec; + vy_vec = v_load(cW_v_next_row + j) - cW_v_vec; + pWeight_vec = + alpha2_vec / v_sqrt(ux_vec * ux_vec + vx_vec * vx_vec + uy_vec * uy_vec + vy_vec * vy_vec + eps_vec); + v_store(pWeight + j, pWeight_vec); + + ux_vec = pWeight_vec * (v_load(pW_u_next + j) - v_load(pW_u + j)); + vx_vec = pWeight_vec * (v_load(pW_v_next + j) - v_load(pW_v + j)); + + v_store(pA_u + j, v_load(pA_u + j) + pWeight_vec); + v_store(pA_v + j, v_load(pA_v + j) + pWeight_vec); + v_store(pB_u + j, v_load(pB_u + j) + ux_vec); + v_store(pB_v + j, v_load(pB_v + j) + vx_vec); + + v_store(pA_u_next + j, v_load(pA_u_next + j) + pWeight_vec); + v_store(pA_v_next + j, v_load(pA_v_next + j) + pWeight_vec); + v_store(pB_u_next + j, v_load(pB_u_next + j) - ux_vec); + v_store(pB_v_next + j, v_load(pB_v_next + j) - vx_vec); + } +#endif + for (; j < len - 1; j++) + { + COMPUTE; + UPDATE; + } + + /* Omit the update on the rightmost elements */ + if (touches_right_border) + { + COMPUTE; + } + else + { + COMPUTE; + UPDATE; + } + } +#undef COMPUTE +#undef UPDATE +} + +VariationalRefinementImpl::ComputeSmoothnessTermVertPass_ParBody::ComputeSmoothnessTermVertPass_ParBody( + VariationalRefinementImpl &_var, int _nstripes, int _h, RedBlackBuffer &_W_u, RedBlackBuffer &_W_v, bool _red_pass) + : var(&_var), nstripes(_nstripes), W_u(&_W_u), W_v(&_W_v), red_pass(_red_pass) +{ + /* Omit the last row in the vertical pass */ + h = _h - 1; + stripe_sz = (int)ceil(h / (double)nstripes); +} + +/* This function adds the last remaining terms to the linear system coefficients A11,A22,b1,b1. */ +void VariationalRefinementImpl::ComputeSmoothnessTermVertPass_ParBody::operator()(const Range &range) const +{ + int start_i = min(range.start * stripe_sz, h); + int end_i = min(range.end * stripe_sz, h); + + float *pWeight; + float *pA_u, *pA_u_next_row, *pA_v, *pA_v_next_row; + float *pB_u, *pB_u_next_row, *pB_v, *pB_v_next_row; + float *pW_u, *pW_u_next_row, *pW_v, *pW_v_next_row; + float vy, uy; + int len; + + for (int i = start_i; i < end_i; i++) + { +#define INIT_ROW_POINTERS(cur_color, next_color) \ + pWeight = var->weights.cur_color.ptr(i + 1) + 1; \ + pA_u = var->A11.cur_color.ptr(i + 1) + 1; \ + pB_u = var->b1.cur_color.ptr(i + 1) + 1; \ + pW_u = W_u->cur_color.ptr(i + 1) + 1; \ + pA_v = var->A22.cur_color.ptr(i + 1) + 1; \ + pB_v = var->b2.cur_color.ptr(i + 1) + 1; \ + pW_v = W_v->cur_color.ptr(i + 1) + 1; \ + \ + pA_u_next_row = var->A11.next_color.ptr(i + 2) + 1; \ + pB_u_next_row = var->b1.next_color.ptr(i + 2) + 1; \ + pW_u_next_row = W_u->next_color.ptr(i + 2) + 1; \ + pA_v_next_row = var->A22.next_color.ptr(i + 2) + 1; \ + pB_v_next_row = var->b2.next_color.ptr(i + 2) + 1; \ + pW_v_next_row = W_v->next_color.ptr(i + 2) + 1; \ + \ + if (i % 2 == 0) \ + len = var->A11.cur_color##_even_len; \ + else \ + len = var->A11.cur_color##_odd_len; + + if (red_pass) + { + INIT_ROW_POINTERS(red, black); + } + else + { + INIT_ROW_POINTERS(black, red); + } +#undef INIT_ROW_POINTERS + + int j = 0; +#ifdef CV_SIMD128 + v_float32x4 pWeight_vec, uy_vec, vy_vec; + for (; j < len - 3; j += 4) + { + pWeight_vec = v_load(pWeight + j); + uy_vec = pWeight_vec * (v_load(pW_u_next_row + j) - v_load(pW_u + j)); + vy_vec = pWeight_vec * (v_load(pW_v_next_row + j) - v_load(pW_v + j)); + + v_store(pA_u + j, v_load(pA_u + j) + pWeight_vec); + v_store(pA_v + j, v_load(pA_v + j) + pWeight_vec); + v_store(pB_u + j, v_load(pB_u + j) + uy_vec); + v_store(pB_v + j, v_load(pB_v + j) + vy_vec); + + v_store(pA_u_next_row + j, v_load(pA_u_next_row + j) + pWeight_vec); + v_store(pA_v_next_row + j, v_load(pA_v_next_row + j) + pWeight_vec); + v_store(pB_u_next_row + j, v_load(pB_u_next_row + j) - uy_vec); + v_store(pB_v_next_row + j, v_load(pB_v_next_row + j) - vy_vec); + } +#endif + for (; j < len; j++) + { + uy = pWeight[j] * (pW_u_next_row[j] - pW_u[j]); + vy = pWeight[j] * (pW_v_next_row[j] - pW_v[j]); + pB_u[j] += uy; + pA_u[j] += pWeight[j]; + pB_v[j] += vy; + pA_v[j] += pWeight[j]; + pB_u_next_row[j] -= uy; + pA_u_next_row[j] += pWeight[j]; + pB_v_next_row[j] -= vy; + pA_v_next_row[j] += pWeight[j]; + } + } +} + +VariationalRefinementImpl::RedBlackSOR_ParBody::RedBlackSOR_ParBody(VariationalRefinementImpl &_var, int _nstripes, + int _h, RedBlackBuffer &_dW_u, + RedBlackBuffer &_dW_v, bool _red_pass) + : var(&_var), nstripes(_nstripes), h(_h), dW_u(&_dW_u), dW_v(&_dW_v), red_pass(_red_pass) +{ + stripe_sz = (int)ceil(h / (double)nstripes); +} + +/* This function implements the Red-Black SOR (successive-over relaxation) method for solving the main + * linear system in the current fixed-point iteration. + */ +void VariationalRefinementImpl::RedBlackSOR_ParBody::operator()(const Range &range) const +{ + int start = min(range.start * stripe_sz, h); + int end = min(range.end * stripe_sz, h); + + float *pa11, *pa12, *pa22, *pb1, *pb2, *pW, *pdu, *pdv; + float *pW_next, *pdu_next, *pdv_next; + float *pW_prev_row, *pdu_prev_row, *pdv_prev_row; + float *pdu_next_row, *pdv_next_row; + + float sigmaU, sigmaV; + int j, len; + for (int i = start; i < end; i++) + { +#define INIT_ROW_POINTERS(cur_color, next_color, next_offs_even, next_offs_odd) \ + pW = var->weights.cur_color.ptr(i + 1) + 1; \ + pa11 = var->A11.cur_color.ptr(i + 1) + 1; \ + pa12 = var->A12.cur_color.ptr(i + 1) + 1; \ + pa22 = var->A22.cur_color.ptr(i + 1) + 1; \ + pb1 = var->b1.cur_color.ptr(i + 1) + 1; \ + pb2 = var->b2.cur_color.ptr(i + 1) + 1; \ + pdu = dW_u->cur_color.ptr(i + 1) + 1; \ + pdv = dW_v->cur_color.ptr(i + 1) + 1; \ + \ + pdu_next_row = dW_u->next_color.ptr(i + 2) + 1; \ + pdv_next_row = dW_v->next_color.ptr(i + 2) + 1; \ + \ + pW_prev_row = var->weights.next_color.ptr(i) + 1; \ + pdu_prev_row = dW_u->next_color.ptr(i) + 1; \ + pdv_prev_row = dW_v->next_color.ptr(i) + 1; \ + \ + if (i % 2 == 0) \ + { \ + pW_next = var->weights.next_color.ptr(i + 1) + next_offs_even; \ + pdu_next = dW_u->next_color.ptr(i + 1) + next_offs_even; \ + pdv_next = dW_v->next_color.ptr(i + 1) + next_offs_even; \ + len = var->A11.cur_color##_even_len; \ + } \ + else \ + { \ + pW_next = var->weights.next_color.ptr(i + 1) + next_offs_odd; \ + pdu_next = dW_u->next_color.ptr(i + 1) + next_offs_odd; \ + pdv_next = dW_v->next_color.ptr(i + 1) + next_offs_odd; \ + len = var->A11.cur_color##_odd_len; \ + } + if (red_pass) + { + INIT_ROW_POINTERS(red, black, 1, 2); + } + else + { + INIT_ROW_POINTERS(black, red, 2, 1); + } +#undef INIT_ROW_POINTERS + + j = 0; +#ifdef CV_SIMD128 + v_float32x4 pW_prev_vec = v_setall_f32(pW_next[-1]); + v_float32x4 pdu_prev_vec = v_setall_f32(pdu_next[-1]); + v_float32x4 pdv_prev_vec = v_setall_f32(pdv_next[-1]); + v_float32x4 omega_vec = v_setall_f32(var->omega); + v_float32x4 pW_vec, pW_next_vec, pW_prev_row_vec; + v_float32x4 pdu_next_vec, pdu_prev_row_vec, pdu_next_row_vec; + v_float32x4 pdv_next_vec, pdv_prev_row_vec, pdv_next_row_vec; + v_float32x4 pW_shifted_vec, pdu_shifted_vec, pdv_shifted_vec; + v_float32x4 pa12_vec, sigmaU_vec, sigmaV_vec, pdu_vec, pdv_vec; + for (; j < len - 3; j += 4) + { + pW_vec = v_load(pW + j); + pW_next_vec = v_load(pW_next + j); + pW_prev_row_vec = v_load(pW_prev_row + j); + pdu_next_vec = v_load(pdu_next + j); + pdu_prev_row_vec = v_load(pdu_prev_row + j); + pdu_next_row_vec = v_load(pdu_next_row + j); + pdv_next_vec = v_load(pdv_next + j); + pdv_prev_row_vec = v_load(pdv_prev_row + j); + pdv_next_row_vec = v_load(pdv_next_row + j); + pa12_vec = v_load(pa12 + j); + pW_shifted_vec = v_reinterpret_as_f32( + v_extract<3>(v_reinterpret_as_s32(pW_prev_vec), v_reinterpret_as_s32(pW_next_vec))); + pdu_shifted_vec = v_reinterpret_as_f32( + v_extract<3>(v_reinterpret_as_s32(pdu_prev_vec), v_reinterpret_as_s32(pdu_next_vec))); + pdv_shifted_vec = v_reinterpret_as_f32( + v_extract<3>(v_reinterpret_as_s32(pdv_prev_vec), v_reinterpret_as_s32(pdv_next_vec))); + + sigmaU_vec = pW_shifted_vec * pdu_shifted_vec + pW_vec * pdu_next_vec + pW_prev_row_vec * pdu_prev_row_vec + + pW_vec * pdu_next_row_vec; + sigmaV_vec = pW_shifted_vec * pdv_shifted_vec + pW_vec * pdv_next_vec + pW_prev_row_vec * pdv_prev_row_vec + + pW_vec * pdv_next_row_vec; + + pdu_vec = v_load(pdu + j); + pdv_vec = v_load(pdv + j); + pdu_vec += omega_vec * ((sigmaU_vec + v_load(pb1 + j) - pdv_vec * pa12_vec) / v_load(pa11 + j) - pdu_vec); + pdv_vec += omega_vec * ((sigmaV_vec + v_load(pb2 + j) - pdu_vec * pa12_vec) / v_load(pa22 + j) - pdv_vec); + v_store(pdu + j, pdu_vec); + v_store(pdv + j, pdv_vec); + + pW_prev_vec = pW_next_vec; + pdu_prev_vec = pdu_next_vec; + pdv_prev_vec = pdv_next_vec; + } +#endif + for (; j < len; j++) + { + sigmaU = pW_next[j - 1] * pdu_next[j - 1] + pW[j] * pdu_next[j] + pW_prev_row[j] * pdu_prev_row[j] + + pW[j] * pdu_next_row[j]; + sigmaV = pW_next[j - 1] * pdv_next[j - 1] + pW[j] * pdv_next[j] + pW_prev_row[j] * pdv_prev_row[j] + + pW[j] * pdv_next_row[j]; + pdu[j] += var->omega * ((sigmaU + pb1[j] - pdv[j] * pa12[j]) / pa11[j] - pdu[j]); + pdv[j] += var->omega * ((sigmaV + pb2[j] - pdu[j] * pa12[j]) / pa22[j] - pdv[j]); + } + } +} + +void VariationalRefinementImpl::calc(InputArray I0, InputArray I1, InputOutputArray flow) +{ + CV_Assert(!I0.empty() && I0.depth() == CV_8U && I0.channels() == 1); + CV_Assert(!I1.empty() && I1.depth() == CV_8U && I1.channels() == 1); + CV_Assert(I0.sameSize(I1)); + CV_Assert(!flow.empty() && flow.depth() == CV_32F && flow.channels() == 2); + CV_Assert(I0.sameSize(flow)); + + Mat uv[2]; + Mat &flowMat = flow.getMatRef(); + split(flowMat, uv); + calcUV(I0, I1, uv[0], uv[1]); + merge(uv, 2, flowMat); +} + +void VariationalRefinementImpl::calcUV(InputArray I0, InputArray I1, InputOutputArray flow_u, InputOutputArray flow_v) +{ + CV_Assert(!I0.empty() && I0.depth() == CV_8U && I0.channels() == 1); + CV_Assert(!I1.empty() && I1.depth() == CV_8U && I1.channels() == 1); + CV_Assert(I0.sameSize(I1)); + CV_Assert(!flow_u.empty() && flow_u.depth() == CV_32F && flow_u.channels() == 1); + CV_Assert(!flow_v.empty() && flow_v.depth() == CV_32F && flow_v.channels() == 1); + CV_Assert(I0.sameSize(flow_u)); + CV_Assert(flow_u.sameSize(flow_v)); + + int num_stripes = getNumThreads(); + Mat I0Mat = I0.getMat(); + Mat I1Mat = I1.getMat(); + Mat &W_u = flow_u.getMatRef(); + Mat &W_v = flow_v.getMatRef(); + prepareBuffers(I0Mat, I1Mat, W_u, W_v); + + splitCheckerboard(W_u_rb, W_u); + splitCheckerboard(W_v_rb, W_v); + W_u_rb.red.copyTo(tempW_u.red); + W_u_rb.black.copyTo(tempW_u.black); + W_v_rb.red.copyTo(tempW_v.red); + W_v_rb.black.copyTo(tempW_v.black); + dW_u.red.setTo(0.0f); + dW_u.black.setTo(0.0f); + dW_v.red.setTo(0.0f); + dW_v.black.setTo(0.0f); + + for (int i = 0; i < fixedPointIterations; i++) + { + parallel_for_(Range(0, num_stripes), ComputeDataTerm_ParBody(*this, num_stripes, I0Mat.rows, dW_u, dW_v, true)); + parallel_for_(Range(0, num_stripes), ComputeDataTerm_ParBody(*this, num_stripes, I0Mat.rows, dW_u, dW_v, false)); + + parallel_for_(Range(0, num_stripes), ComputeSmoothnessTermHorPass_ParBody( + *this, num_stripes, I0Mat.rows, W_u_rb, W_v_rb, tempW_u, tempW_v, true)); + parallel_for_(Range(0, num_stripes), ComputeSmoothnessTermHorPass_ParBody( + *this, num_stripes, I0Mat.rows, W_u_rb, W_v_rb, tempW_u, tempW_v, false)); + + parallel_for_(Range(0, num_stripes), + ComputeSmoothnessTermVertPass_ParBody(*this, num_stripes, I0Mat.rows, W_u_rb, W_v_rb, true)); + parallel_for_(Range(0, num_stripes), + ComputeSmoothnessTermVertPass_ParBody(*this, num_stripes, I0Mat.rows, W_u_rb, W_v_rb, false)); + + for (int j = 0; j < sorIterations; j++) + { + parallel_for_(Range(0, num_stripes), RedBlackSOR_ParBody(*this, num_stripes, I0Mat.rows, dW_u, dW_v, true)); + parallel_for_(Range(0, num_stripes), RedBlackSOR_ParBody(*this, num_stripes, I0Mat.rows, dW_u, dW_v, false)); + } + + tempW_u.red = W_u_rb.red + dW_u.red; + tempW_u.black = W_u_rb.black + dW_u.black; + updateRepeatedBorders(tempW_u); + tempW_v.red = W_v_rb.red + dW_v.red; + tempW_v.black = W_v_rb.black + dW_v.black; + updateRepeatedBorders(tempW_v); + } + mergeCheckerboard(W_u, tempW_u); + mergeCheckerboard(W_v, tempW_v); +} +void VariationalRefinementImpl::collectGarbage() +{ + Ix.release(); + Iy.release(); + Iz.release(); + Ixx.release(); + Ixy.release(); + Iyy.release(); + Ixz.release(); + Iyz.release(); + + Ix_rb.release(); + Iy_rb.release(); + Iz_rb.release(); + Ixx_rb.release(); + Ixy_rb.release(); + Iyy_rb.release(); + Ixz_rb.release(); + Iyz_rb.release(); + + A11.release(); + A12.release(); + A22.release(); + b1.release(); + b2.release(); + weights.release(); + + mapX.release(); + mapY.release(); + + tempW_u.release(); + tempW_v.release(); + dW_u.release(); + dW_v.release(); + W_u_rb.release(); + W_v_rb.release(); +} + +Ptr createVariationalFlowRefinement() { return makePtr(); } +} +} diff --git a/modules/optflow/test/test_OF_accuracy.cpp b/modules/optflow/test/test_OF_accuracy.cpp new file mode 100644 index 000000000..2125f3fd1 --- /dev/null +++ b/modules/optflow/test/test_OF_accuracy.cpp @@ -0,0 +1,190 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// Intel License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000, Intel Corporation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of Intel Corporation may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "test_precomp.hpp" +#include + +using namespace std; +using namespace cv; +using namespace cvtest; +using namespace optflow; + +static string getDataDir() { return TS::ptr()->get_data_path(); } + +static bool isFlowCorrect(float u) { return !cvIsNaN(u) && (fabs(u) < 1e9); } + +static float calcRMSE(Mat flow1, Mat flow2) +{ + float sum = 0; + int counter = 0; + const int rows = flow1.rows; + const int cols = flow1.cols; + + for (int y = 0; y < rows; ++y) + { + for (int x = 0; x < cols; ++x) + { + Vec2f flow1_at_point = flow1.at(y, x); + Vec2f flow2_at_point = flow2.at(y, x); + + float u1 = flow1_at_point[0]; + float v1 = flow1_at_point[1]; + float u2 = flow2_at_point[0]; + float v2 = flow2_at_point[1]; + + if (isFlowCorrect(u1) && isFlowCorrect(u2) && isFlowCorrect(v1) && isFlowCorrect(v2)) + { + sum += (u1 - u2) * (u1 - u2) + (v1 - v2) * (v1 - v2); + counter++; + } + } + } + return (float)sqrt(sum / (1e-9 + counter)); +} + +bool readRubberWhale(Mat &dst_frame_1, Mat &dst_frame_2, Mat &dst_GT) +{ + const string frame1_path = getDataDir() + "optflow/RubberWhale1.png"; + const string frame2_path = getDataDir() + "optflow/RubberWhale2.png"; + const string gt_flow_path = getDataDir() + "optflow/RubberWhale.flo"; + + dst_frame_1 = imread(frame1_path); + dst_frame_2 = imread(frame2_path); + dst_GT = readOpticalFlow(gt_flow_path); + + if (dst_frame_1.empty() || dst_frame_2.empty() || dst_GT.empty()) + return false; + else + return true; +} + +TEST(DenseOpticalFlow_SimpleFlow, ReferenceAccuracy) +{ + Mat frame1, frame2, GT; + ASSERT_TRUE(readRubberWhale(frame1, frame2, GT)); + float target_RMSE = 0.37f; + + Mat flow; + Ptr algo; + algo = createOptFlow_SimpleFlow(); + algo->calc(frame1, frame2, flow); + ASSERT_EQ(GT.rows, flow.rows); + ASSERT_EQ(GT.cols, flow.cols); + EXPECT_LE(calcRMSE(GT, flow), target_RMSE); +} + +TEST(DenseOpticalFlow_DeepFlow, ReferenceAccuracy) +{ + Mat frame1, frame2, GT; + ASSERT_TRUE(readRubberWhale(frame1, frame2, GT)); + float target_RMSE = 0.35f; + cvtColor(frame1, frame1, COLOR_BGR2GRAY); + cvtColor(frame2, frame2, COLOR_BGR2GRAY); + + Mat flow; + Ptr algo; + algo = createOptFlow_DeepFlow(); + algo->calc(frame1, frame2, flow); + ASSERT_EQ(GT.rows, flow.rows); + ASSERT_EQ(GT.cols, flow.cols); + EXPECT_LE(calcRMSE(GT, flow), target_RMSE); +} + +TEST(DenseOpticalFlow_SparseToDenseFlow, ReferenceAccuracy) +{ + Mat frame1, frame2, GT; + ASSERT_TRUE(readRubberWhale(frame1, frame2, GT)); + float target_RMSE = 0.52f; + + Mat flow; + Ptr algo; + algo = createOptFlow_SparseToDense(); + algo->calc(frame1, frame2, flow); + ASSERT_EQ(GT.rows, flow.rows); + ASSERT_EQ(GT.cols, flow.cols); + EXPECT_LE(calcRMSE(GT, flow), target_RMSE); +} + +TEST(DenseOpticalFlow_DIS, ReferenceAccuracy) +{ + Mat frame1, frame2, GT; + ASSERT_TRUE(readRubberWhale(frame1, frame2, GT)); + int presets[] = {DISOpticalFlow::PRESET_ULTRAFAST, DISOpticalFlow::PRESET_FAST, DISOpticalFlow::PRESET_MEDIUM}; + float target_RMSE[] = {0.86f, 0.74f, 0.49f}; + cvtColor(frame1, frame1, COLOR_BGR2GRAY); + cvtColor(frame2, frame2, COLOR_BGR2GRAY); + + Ptr algo; + + // iterate over presets: + for (int i = 0; i < 3; i++) + { + Mat flow; + algo = createOptFlow_DIS(presets[i]); + algo->calc(frame1, frame2, flow); + ASSERT_EQ(GT.rows, flow.rows); + ASSERT_EQ(GT.cols, flow.cols); + EXPECT_LE(calcRMSE(GT, flow), target_RMSE[i]); + } +} + +TEST(DenseOpticalFlow_VariationalRefinement, ReferenceAccuracy) +{ + Mat frame1, frame2, GT; + ASSERT_TRUE(readRubberWhale(frame1, frame2, GT)); + float target_RMSE = 0.86f; + cvtColor(frame1, frame1, COLOR_BGR2GRAY); + cvtColor(frame2, frame2, COLOR_BGR2GRAY); + + Ptr var_ref; + var_ref = createVariationalFlowRefinement(); + var_ref->setAlpha(20.0f); + var_ref->setDelta(5.0f); + var_ref->setGamma(10.0f); + var_ref->setSorIterations(25); + var_ref->setFixedPointIterations(25); + Mat flow(frame1.size(), CV_32FC2); + flow.setTo(0.0f); + var_ref->calc(frame1, frame2, flow); + ASSERT_EQ(GT.rows, flow.rows); + ASSERT_EQ(GT.cols, flow.cols); + EXPECT_LE(calcRMSE(GT, flow), target_RMSE); +} diff --git a/modules/optflow/test/test_OF_reproducibility.cpp b/modules/optflow/test/test_OF_reproducibility.cpp new file mode 100644 index 000000000..0cdbd50fd --- /dev/null +++ b/modules/optflow/test/test_OF_reproducibility.cpp @@ -0,0 +1,159 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// Intel License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000, Intel Corporation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of Intel Corporation may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "test_precomp.hpp" +#include + +using namespace std; +using namespace std::tr1; +using namespace cv; +using namespace cvtest; +using namespace perf; +using namespace testing; +using namespace optflow; + +typedef tuple OFParams; +typedef TestWithParam DenseOpticalFlow_DIS; +typedef TestWithParam DenseOpticalFlow_VariationalRefinement; + +TEST_P(DenseOpticalFlow_DIS, MultithreadReproducibility) +{ + double MAX_DIF = 0.01; + double MAX_MEAN_DIF = 0.001; + int loopsCount = 2; + RNG rng(0); + + OFParams params = GetParam(); + Size size = get<0>(params); + + for (int iter = 0; iter <= loopsCount; iter++) + { + Mat frame1(size, CV_8U); + randu(frame1, 0, 255); + Mat frame2(size, CV_8U); + randu(frame2, 0, 255); + + Ptr algo = createOptFlow_DIS(); + int psz = rng.uniform(4, 16); + int pstr = rng.uniform(1, psz - 1); + int grad_iter = rng.uniform(1, 64); + int var_iter = rng.uniform(0, 10); + bool use_mean_normalization = !!rng.uniform(0, 2); + bool use_spatial_propagation = !!rng.uniform(0, 2); + algo->setFinestScale(0); + algo->setPatchSize(psz); + algo->setPatchStride(pstr); + algo->setGradientDescentIterations(grad_iter); + algo->setVariationalRefinementIterations(var_iter); + algo->setUseMeanNormalization(use_mean_normalization); + algo->setUseSpatialPropagation(use_spatial_propagation); + + cv::setNumThreads(cv::getNumberOfCPUs()); + Mat resMultiThread; + algo->calc(frame1, frame2, resMultiThread); + + cv::setNumThreads(1); + Mat resSingleThread; + algo->calc(frame1, frame2, resSingleThread); + + EXPECT_LE(cv::norm(resSingleThread, resMultiThread, NORM_INF), MAX_DIF); + EXPECT_LE(cv::norm(resSingleThread, resMultiThread, NORM_L1), MAX_MEAN_DIF * frame1.total()); + + // resulting flow should be within the frame bounds: + double min_val, max_val; + minMaxLoc(resMultiThread, &min_val, &max_val); + EXPECT_LE(abs(min_val), sqrt(size.height * size.height + size.width * size.width)); + EXPECT_LE(abs(max_val), sqrt(size.height * size.height + size.width * size.width)); + } +} + +INSTANTIATE_TEST_CASE_P(FullSet, DenseOpticalFlow_DIS, Values(szODD, szQVGA)); + +TEST_P(DenseOpticalFlow_VariationalRefinement, MultithreadReproducibility) +{ + double MAX_DIF = 0.01; + double MAX_MEAN_DIF = 0.001; + float input_flow_rad = 5.0; + int loopsCount = 2; + RNG rng(0); + + OFParams params = GetParam(); + Size size = get<0>(params); + + for (int iter = 0; iter <= loopsCount; iter++) + { + Mat frame1(size, CV_8U); + randu(frame1, 0, 255); + Mat frame2(size, CV_8U); + randu(frame2, 0, 255); + Mat flow(size, CV_32FC2); + randu(flow, -input_flow_rad, input_flow_rad); + + Ptr var = createVariationalFlowRefinement(); + var->setAlpha(rng.uniform(1.0f, 100.0f)); + var->setGamma(rng.uniform(0.1f, 10.0f)); + var->setDelta(rng.uniform(0.1f, 10.0f)); + var->setSorIterations(rng.uniform(1, 20)); + var->setFixedPointIterations(rng.uniform(1, 20)); + var->setOmega(rng.uniform(1.01f, 1.99f)); + + cv::setNumThreads(cv::getNumberOfCPUs()); + Mat resMultiThread; + flow.copyTo(resMultiThread); + var->calc(frame1, frame2, resMultiThread); + + cv::setNumThreads(1); + Mat resSingleThread; + flow.copyTo(resSingleThread); + var->calc(frame1, frame2, resSingleThread); + + EXPECT_LE(cv::norm(resSingleThread, resMultiThread, NORM_INF), MAX_DIF); + EXPECT_LE(cv::norm(resSingleThread, resMultiThread, NORM_L1), MAX_MEAN_DIF * frame1.total()); + + // resulting flow should be within the frame bounds: + double min_val, max_val; + minMaxLoc(resMultiThread, &min_val, &max_val); + EXPECT_LE(abs(min_val), sqrt(size.height * size.height + size.width * size.width)); + EXPECT_LE(abs(max_val), sqrt(size.height * size.height + size.width * size.width)); + } +} + +INSTANTIATE_TEST_CASE_P(FullSet, DenseOpticalFlow_VariationalRefinement, Values(szODD, szQVGA)); diff --git a/modules/optflow/test/test_simpleflow.cpp b/modules/optflow/test/test_simpleflow.cpp deleted file mode 100644 index b04cae223..000000000 --- a/modules/optflow/test/test_simpleflow.cpp +++ /dev/null @@ -1,190 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// Intel License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000, Intel Corporation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of Intel Corporation may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "test_precomp.hpp" - -#include - -using namespace std; - -/* ///////////////////// simpleflow_test ///////////////////////// */ - -class CV_SimpleFlowTest : public cvtest::BaseTest -{ -public: - CV_SimpleFlowTest(); -protected: - void run(int); -}; - - -CV_SimpleFlowTest::CV_SimpleFlowTest() {} - -static bool readOpticalFlowFromFile(FILE* file, cv::Mat& flow) { - char header[5]; - if (fread(header, 1, 4, file) < 4 && (string)header != "PIEH") { - return false; - } - - int cols, rows; - if (fread(&cols, sizeof(int), 1, file) != 1|| - fread(&rows, sizeof(int), 1, file) != 1) { - return false; - } - - flow = cv::Mat::zeros(rows, cols, CV_32FC2); - - for (int i = 0; i < rows; ++i) { - for (int j = 0; j < cols; ++j) { - cv::Vec2f flow_at_point; - if (fread(&(flow_at_point[0]), sizeof(float), 1, file) != 1 || - fread(&(flow_at_point[1]), sizeof(float), 1, file) != 1) { - return false; - } - flow.at(i, j) = flow_at_point; - } - } - - return true; -} - -static bool isFlowCorrect(float u) { - return !cvIsNaN(u) && (fabs(u) < 1e9); -} - -static float calc_rmse(cv::Mat flow1, cv::Mat flow2) { - float sum = 0; - int counter = 0; - const int rows = flow1.rows; - const int cols = flow1.cols; - - for (int y = 0; y < rows; ++y) { - for (int x = 0; x < cols; ++x) { - cv::Vec2f flow1_at_point = flow1.at(y, x); - cv::Vec2f flow2_at_point = flow2.at(y, x); - - float u1 = flow1_at_point[0]; - float v1 = flow1_at_point[1]; - float u2 = flow2_at_point[0]; - float v2 = flow2_at_point[1]; - - if (isFlowCorrect(u1) && isFlowCorrect(u2) && isFlowCorrect(v1) && isFlowCorrect(v2)) { - sum += (u1-u2)*(u1-u2) + (v1-v2)*(v1-v2); - counter++; - } - } - } - return (float)sqrt(sum / (1e-9 + counter)); -} - -void CV_SimpleFlowTest::run(int) { - const float MAX_RMSE = 0.6f; - const string frame1_path = ts->get_data_path() + "optflow/RubberWhale1.png"; - const string frame2_path = ts->get_data_path() + "optflow/RubberWhale2.png"; - const string gt_flow_path = ts->get_data_path() + "optflow/RubberWhale.flo"; - - cv::Mat frame1 = cv::imread(frame1_path); - cv::Mat frame2 = cv::imread(frame2_path); - - if (frame1.empty()) { - ts->printf(cvtest::TS::LOG, "could not read image %s\n", frame2_path.c_str()); - ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA); - return; - } - - if (frame2.empty()) { - ts->printf(cvtest::TS::LOG, "could not read image %s\n", frame2_path.c_str()); - ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA); - return; - } - - if (frame1.rows != frame2.rows && frame1.cols != frame2.cols) { - ts->printf(cvtest::TS::LOG, "images should be of equal sizes (%s and %s)", - frame1_path.c_str(), frame2_path.c_str()); - ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA); - return; - } - - if (frame1.type() != 16 || frame2.type() != 16) { - ts->printf(cvtest::TS::LOG, "images should be of equal type CV_8UC3 (%s and %s)", - frame1_path.c_str(), frame2_path.c_str()); - ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA); - return; - } - - cv::Mat flow_gt; - - FILE* gt_flow_file = fopen(gt_flow_path.c_str(), "rb"); - if (gt_flow_file == NULL) { - ts->printf(cvtest::TS::LOG, "could not read ground-thuth flow from file %s", - gt_flow_path.c_str()); - ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA); - return; - } - - if (!readOpticalFlowFromFile(gt_flow_file, flow_gt)) { - ts->printf(cvtest::TS::LOG, "error while reading flow data from file %s", - gt_flow_path.c_str()); - ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA); - return; - } - fclose(gt_flow_file); - - cv::Mat flow; - cv::optflow::calcOpticalFlowSF(frame1, frame2, flow, 3, 2, 4); - - float rmse = calc_rmse(flow_gt, flow); - - ts->printf(cvtest::TS::LOG, "Optical flow estimation RMSE for SimpleFlow algorithm : %lf\n", - rmse); - - if (rmse > MAX_RMSE) { - ts->printf( cvtest::TS::LOG, - "Too big rmse error : %lf ( >= %lf )\n", rmse, MAX_RMSE); - ts->set_failed_test_info(cvtest::TS::FAIL_BAD_ACCURACY); - return; - } -} - - -TEST(Video_OpticalFlowSimpleFlow, accuracy) { CV_SimpleFlowTest test; test.safe_run(); } - -/* End of file. */ diff --git a/modules/optflow/test/test_sparsetodenseflow.cpp b/modules/optflow/test/test_sparsetodenseflow.cpp deleted file mode 100644 index a8d645d30..000000000 --- a/modules/optflow/test/test_sparsetodenseflow.cpp +++ /dev/null @@ -1,146 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// Intel License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000, Intel Corporation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of Intel Corporation may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "test_precomp.hpp" - -#include - -using namespace std; -using namespace cv; - -/* ///////////////////// sparsetodenseflow_test ///////////////////////// */ - -class CV_SparseToDenseFlowTest : public cvtest::BaseTest -{ -protected: - void run(int); -}; - -static bool isFlowCorrect(float u) { - return !cvIsNaN(u) && (fabs(u) < 1e9); -} - -static float calc_rmse(Mat flow1, Mat flow2) { - float sum = 0; - int counter = 0; - const int rows = flow1.rows; - const int cols = flow1.cols; - - for (int y = 0; y < rows; ++y) { - for (int x = 0; x < cols; ++x) { - Vec2f flow1_at_point = flow1.at(y, x); - Vec2f flow2_at_point = flow2.at(y, x); - - float u1 = flow1_at_point[0]; - float v1 = flow1_at_point[1]; - float u2 = flow2_at_point[0]; - float v2 = flow2_at_point[1]; - - if (isFlowCorrect(u1) && isFlowCorrect(u2) && isFlowCorrect(v1) && isFlowCorrect(v2)) { - sum += (u1-u2)*(u1-u2) + (v1-v2)*(v1-v2); - counter++; - } - } - } - return (float)sqrt(sum / (1e-9 + counter)); -} - -void CV_SparseToDenseFlowTest::run(int) { - const float MAX_RMSE = 0.6f; - const string frame1_path = ts->get_data_path() + "optflow/RubberWhale1.png"; - const string frame2_path = ts->get_data_path() + "optflow/RubberWhale2.png"; - const string gt_flow_path = ts->get_data_path() + "optflow/RubberWhale.flo"; - - Mat frame1 = imread(frame1_path); - Mat frame2 = imread(frame2_path); - - if (frame1.empty()) { - ts->printf(cvtest::TS::LOG, "could not read image %s\n", frame2_path.c_str()); - ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA); - return; - } - - if (frame2.empty()) { - ts->printf(cvtest::TS::LOG, "could not read image %s\n", frame2_path.c_str()); - ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA); - return; - } - - if (frame1.rows != frame2.rows && frame1.cols != frame2.cols) { - ts->printf(cvtest::TS::LOG, "images should be of equal sizes (%s and %s)", - frame1_path.c_str(), frame2_path.c_str()); - ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA); - return; - } - - if (frame1.type() != 16 || frame2.type() != 16) { - ts->printf(cvtest::TS::LOG, "images should be of equal type CV_8UC3 (%s and %s)", - frame1_path.c_str(), frame2_path.c_str()); - ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA); - return; - } - - Mat flow_gt = optflow::readOpticalFlow(gt_flow_path); - if(flow_gt.empty()) { - ts->printf(cvtest::TS::LOG, "error while reading flow data from file %s", - gt_flow_path.c_str()); - ts->set_failed_test_info(cvtest::TS::FAIL_MISSING_TEST_DATA); - return; - } - - Mat flow; - optflow::calcOpticalFlowSparseToDense(frame1, frame2, flow); - - float rmse = calc_rmse(flow_gt, flow); - - ts->printf(cvtest::TS::LOG, "Optical flow estimation RMSE for SparseToDenseFlow algorithm : %lf\n", - rmse); - - if (rmse > MAX_RMSE) { - ts->printf( cvtest::TS::LOG, - "Too big rmse error : %lf ( >= %lf )\n", rmse, MAX_RMSE); - ts->set_failed_test_info(cvtest::TS::FAIL_BAD_ACCURACY); - return; - } -} - - -TEST(Video_OpticalFlowSparseToDenseFlow, accuracy) { CV_SparseToDenseFlowTest test; test.safe_run(); }