[GSoC] Implementation of the Global Patch Collider and demo for PCAFlow (#752)

* Minor fixes * Start adding correspondence finding * Added finding of correspondences using GPC * New evaluation tool for GPC * Changed default parameters * Display ground truth in the evaluation tool * Added training tool for MPI Sintel dataset * Added the training tool for Middlebury dataset * Added some OpenCL optimization * Added explanatory notes * Minor improvements: time measurements + little ocl optimization * Added demos * Fixed warnings * Make parameter struct assignable * Fix warning * Proper command line argument usage * Prettified training tool, added parameters * Fixed VS warning * Fixed VS warning * Using of compressed forest.yml.gz files by default to save space * Added OpenCL flag to the evaluation tool * Updated documentation * Major speed and memory improvements: 1) Added new (optional) type of patch descriptors which are much faster. Retraining with option --descriptor-type=1 is required. 2) Got rid of hash table for descriptors, less memory usage. * Fixed various floating point errors related to precision. SIMD for dot product, forest traversing is a little bit faster now. * Tolerant floating point comparison * Triplets * Added comment * Choosing negative sample among nearest neighbors * Fix warning * Usage of parallel_for_() in critical places. Performance improvments. * Simulated annealing heuristic * Moved OpenCL kernel to separate file * Moved implementation to source file * Added basic accuracy tests for GPC and PCAFlow * Fixing warnings * Test accuracy constraints were too strict * Test accuracy constraints were too strict * Make tests more lightweight
2025-10-20 04:25:42 +08:00 · 2016-10-17 18:15:22 +03:00
parent 25575af653
commit ac62d70f97
12 changed files with 1483 additions and 162 deletions
--- a/modules/optflow/include/opencv2/optflow.hpp
+++ b/modules/optflow/include/opencv2/optflow.hpp
@@ -43,9 +43,6 @@ the use of this software, even if advised of the possibility of such damage.
 #include "opencv2/core.hpp"
 #include "opencv2/video.hpp"

-#include "opencv2/optflow/pcaflow.hpp"
-#include "opencv2/optflow/sparse_matching_gpc.hpp"
-
 /**
@defgroup optflow Optical Flow Algorithms

@@ -69,6 +66,9 @@ Functions reading and writing .flo files in "Middlebury" format, see: <http://vi

 */

+#include "opencv2/optflow/pcaflow.hpp"
+#include "opencv2/optflow/sparse_matching_gpc.hpp"
+
 namespace cv
 {
 namespace optflow
--- a/modules/optflow/include/opencv2/optflow/pcaflow.hpp
+++ b/modules/optflow/include/opencv2/optflow/pcaflow.hpp
@@ -37,23 +37,19 @@ or tort (including negligence or otherwise) arising in any way out of
 the use of this software, even if advised of the possibility of such damage.
 */

-/*
-Implementation of the PCAFlow algorithm from the following paper:
-http://files.is.tue.mpg.de/black/papers/cvpr2015_pcaflow.pdf
-
-@inproceedings{Wulff:CVPR:2015,
-  title = {Efficient Sparse-to-Dense Optical Flow Estimation using a Learned Basis and Layers},
-  author = {Wulff, Jonas and Black, Michael J.},
-  booktitle = { IEEE Conf. on Computer Vision and Pattern Recognition (CVPR) 2015},
-  month = jun,
-  year = {2015}
-}
-
-There are some key differences which distinguish this algorithm from the original PCAFlow (see paper):
-  - Discrete Cosine Transform basis is used instead of basis extracted with PCA.
-    Reasoning: DCT basis has comparable performance and it doesn't require additional storage space.
-    Also, this decision helps to avoid overloading the algorithm with a lot of external input.
-  - Usage of built-in OpenCV feature tracking instead of libviso.
+/**
+ * @file   pcaflow.hpp
+ * @author Vladislav Samsonov <vvladxx@gmail.com>
+ * @brief  Implementation of the PCAFlow algorithm from the following paper:
+ * http://files.is.tue.mpg.de/black/papers/cvpr2015_pcaflow.pdf
+ *
+ * @cite Wulff:CVPR:2015
+ *
+ * There are some key differences which distinguish this algorithm from the original PCAFlow (see paper):
+ * - Discrete Cosine Transform basis is used instead of basis extracted with PCA.
+ *   Reasoning: DCT basis has comparable performance and it doesn't require additional storage space.
+ *   Also, this decision helps to avoid overloading the algorithm with a lot of external input.
+ * - Usage of built-in OpenCV feature tracking instead of libviso.
 */

 #ifndef __OPENCV_OPTFLOW_PCAFLOW_HPP__
@@ -67,7 +63,10 @@ namespace cv
 namespace optflow
 {

-/*
+//! @addtogroup optflow
+//! @{
+
+/** @brief
 * This class can be used for imposing a learned prior on the resulting optical flow.
 * Solution will be regularized according to this prior.
 * You need to generate appropriate prior file with "learn_prior.py" script beforehand.
@@ -90,6 +89,8 @@ public:
  void fillConstraints( float *A1, float *A2, float *b1, float *b2 ) const;
 };

+/** @brief PCAFlow algorithm.
+ */
 class CV_EXPORTS_W OpticalFlowPCAFlow : public DenseOpticalFlow
 {
 protected:
@@ -103,6 +104,15 @@ protected:
  bool useOpenCL;

 public:
+  /** @brief Creates an instance of PCAFlow algorithm.
+   * @param _prior Learned prior or no prior (default). @see cv::optflow::PCAPrior
+   * @param _basisSize Number of basis vectors.
+   * @param _sparseRate Controls density of sparse matches.
+   * @param _retainedCornersFraction Retained corners fraction.
+   * @param _occlusionsThreshold Occlusion threshold.
+   * @param _dampingFactor Regularization term for solving least-squares. It is not related to the prior regularization.
+   * @param _claheClip Clip parameter for CLAHE.
+   */
  OpticalFlowPCAFlow( Ptr<const PCAPrior> _prior = Ptr<const PCAPrior>(), const Size _basisSize = Size( 18, 14 ),
                      float _sparseRate = 0.024, float _retainedCornersFraction = 0.2,
                      float _occlusionsThreshold = 0.0003, float _dampingFactor = 0.00002, float _claheClip = 14 );
@@ -127,7 +137,12 @@ private:
  OpticalFlowPCAFlow& operator=( const OpticalFlowPCAFlow& ); // make it non-assignable
 };

+/** @brief Creates an instance of PCAFlow
+*/
 CV_EXPORTS_W Ptr<DenseOpticalFlow> createOptFlow_PCAFlow();
+
+//! @}
+
 }
 }

--- a/modules/optflow/include/opencv2/optflow/sparse_matching_gpc.hpp
+++ b/modules/optflow/include/opencv2/optflow/sparse_matching_gpc.hpp
@@ -37,68 +37,135 @@ or tort (including negligence or otherwise) arising in any way out of
 the use of this software, even if advised of the possibility of such damage.
 */

-/*
-Implementation of the Global Patch Collider algorithm from the following paper:
-http://research.microsoft.com/en-us/um/people/pkohli/papers/wfrik_cvpr2016.pdf
-
-@InProceedings{Wang_2016_CVPR,
- author = {Wang, Shenlong and Ryan Fanello, Sean and Rhemann, Christoph and Izadi, Shahram and Kohli, Pushmeet},
- title = {The Global Patch Collider},
- booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- month = {June},
- year = {2016}
-}
-*/
+/**
+ * @file   sparse_matching_gpc.hpp
+ * @author Vladislav Samsonov <vvladxx@gmail.com>
+ * @brief  Implementation of the Global Patch Collider.
+ *
+ * Implementation of the Global Patch Collider algorithm from the following paper:
+ * http://research.microsoft.com/en-us/um/people/pkohli/papers/wfrik_cvpr2016.pdf
+ *
+ * @cite Wang_2016_CVPR
+ */

 #ifndef __OPENCV_OPTFLOW_SPARSE_MATCHING_GPC_HPP__
 #define __OPENCV_OPTFLOW_SPARSE_MATCHING_GPC_HPP__

 #include "opencv2/core.hpp"
+#include "opencv2/core/hal/intrin.hpp"
+#include "opencv2/imgproc.hpp"

 namespace cv
 {
 namespace optflow
 {

+//! @addtogroup optflow
+//! @{
+
 struct CV_EXPORTS_W GPCPatchDescriptor
 {
-  static const unsigned nFeatures = 18; // number of features in a patch descriptor
+  static const unsigned nFeatures = 18; //!< number of features in a patch descriptor
  Vec< double, nFeatures > feature;

-  GPCPatchDescriptor( const Mat *imgCh, int i, int j );
+  double dot( const Vec< double, nFeatures > &coef ) const;
+
+  void markAsSeparated() { feature[0] = std::numeric_limits< double >::quiet_NaN(); }
+
+  bool isSeparated() const { return cvIsNaN( feature[0] ) != 0; }
+};
+
+struct CV_EXPORTS_W GPCPatchSample
+{
+  GPCPatchDescriptor ref;
+  GPCPatchDescriptor pos;
+  GPCPatchDescriptor neg;
+
+  void getDirections( bool &refdir, bool &posdir, bool &negdir, const Vec< double, GPCPatchDescriptor::nFeatures > &coef, double rhs ) const;
 };

-typedef std::pair< GPCPatchDescriptor, GPCPatchDescriptor > GPCPatchSample;
 typedef std::vector< GPCPatchSample > GPCSamplesVector;

+/** @brief Descriptor types for the Global Patch Collider.
+ */
+enum GPCDescType
+{
+  GPC_DESCRIPTOR_DCT = 0, //!< Better quality but slow
+  GPC_DESCRIPTOR_WHT      //!< Worse quality but much faster
+};
+
 /** @brief Class encapsulating training samples.
 */
 class CV_EXPORTS_W GPCTrainingSamples
 {
 private:
  GPCSamplesVector samples;
+  int descriptorType;

 public:
  /** @brief This function can be used to extract samples from a pair of images and a ground truth flow.
   * Sizes of all the provided vectors must be equal.
   */
  static Ptr< GPCTrainingSamples > create( const std::vector< String > &imagesFrom, const std::vector< String > &imagesTo,
-                                           const std::vector< String > &gt );
+                                           const std::vector< String > &gt, int descriptorType );
+
+  static Ptr< GPCTrainingSamples > create( InputArrayOfArrays imagesFrom, InputArrayOfArrays imagesTo, InputArrayOfArrays gt,
+                                           int descriptorType );

  size_t size() const { return samples.size(); }

-  operator GPCSamplesVector() const { return samples; }
+  int type() const { return descriptorType; }

  operator GPCSamplesVector &() { return samples; }
 };

+/** @brief Class encapsulating training parameters.
+ */
+struct GPCTrainingParams
+{
+  unsigned maxTreeDepth;  //!< Maximum tree depth to stop partitioning.
+  int minNumberOfSamples; //!< Minimum number of samples in the node to stop partitioning.
+  int descriptorType;     //!< Type of descriptors to use.
+  bool printProgress;     //!< Print progress to stdout.
+
+  GPCTrainingParams( unsigned _maxTreeDepth = 20, int _minNumberOfSamples = 3, GPCDescType _descriptorType = GPC_DESCRIPTOR_DCT,
+                     bool _printProgress = true )
+      : maxTreeDepth( _maxTreeDepth ), minNumberOfSamples( _minNumberOfSamples ), descriptorType( _descriptorType ),
+        printProgress( _printProgress )
+  {
+    CV_Assert( check() );
+  }
+
+  GPCTrainingParams( const GPCTrainingParams &params )
+      : maxTreeDepth( params.maxTreeDepth ), minNumberOfSamples( params.minNumberOfSamples ), descriptorType( params.descriptorType ),
+        printProgress( params.printProgress )
+  {
+    CV_Assert( check() );
+  }
+
+  bool check() const { return maxTreeDepth > 1 && minNumberOfSamples > 1; }
+};
+
+/** @brief Class encapsulating matching parameters.
+ */
+struct GPCMatchingParams
+{
+  bool useOpenCL; //!< Whether to use OpenCL to speed up the matching.
+
+  GPCMatchingParams( bool _useOpenCL = false ) : useOpenCL( _useOpenCL ) {}
+
+  GPCMatchingParams( const GPCMatchingParams &params ) : useOpenCL( params.useOpenCL ) {}
+};
+
+/** @brief Class for individual tree.
+ */
 class CV_EXPORTS_W GPCTree : public Algorithm
 {
 public:
  struct Node
  {
-    Vec< double, GPCPatchDescriptor::nFeatures > coef; // hyperplane coefficients
-    double rhs;
+    Vec< double, GPCPatchDescriptor::nFeatures > coef; //!< Hyperplane coefficients
+    double rhs;                                        //!< Bias term of the hyperplane
    unsigned left;
    unsigned right;

@@ -109,45 +176,100 @@ private:
  typedef GPCSamplesVector::iterator SIter;

  std::vector< Node > nodes;
+  GPCTrainingParams params;

  bool trainNode( size_t nodeId, SIter begin, SIter end, unsigned depth );

 public:
-  void train( GPCSamplesVector &samples );
+  void train( GPCTrainingSamples &samples, const GPCTrainingParams params = GPCTrainingParams() );

  void write( FileStorage &fs ) const;

  void read( const FileNode &fn );

+  unsigned findLeafForPatch( const GPCPatchDescriptor &descr ) const;
+
  static Ptr< GPCTree > create() { return makePtr< GPCTree >(); }

  bool operator==( const GPCTree &t ) const { return nodes == t.nodes; }
+
+  int getDescriptorType() const { return params.descriptorType; }
 };

 template < int T > class CV_EXPORTS_W GPCForest : public Algorithm
 {
 private:
+  struct Trail
+  {
+    unsigned leaf[T]; //!< Inside which leaf of the tree 0..T the patch fell?
+    Point2i coord;    //!< Patch coordinates.
+
+    bool operator==( const Trail &trail ) const { return memcmp( leaf, trail.leaf, sizeof( leaf ) ) == 0; }
+
+    bool operator<( const Trail &trail ) const
+    {
+      for ( int i = 0; i < T - 1; ++i )
+        if ( leaf[i] != trail.leaf[i] )
+          return leaf[i] < trail.leaf[i];
+      return leaf[T - 1] < trail.leaf[T - 1];
+    }
+  };
+
+  class ParallelTrailsFilling : public ParallelLoopBody
+  {
+  private:
+    const GPCForest *forest;
+    const std::vector< GPCPatchDescriptor > *descr;
+    std::vector< Trail > *trails;
+
+    ParallelTrailsFilling &operator=( const ParallelTrailsFilling & );
+
+  public:
+    ParallelTrailsFilling( const GPCForest *_forest, const std::vector< GPCPatchDescriptor > *_descr, std::vector< Trail > *_trails )
+        : forest( _forest ), descr( _descr ), trails( _trails ){};
+
+    void operator()( const Range &range ) const
+    {
+      for ( int t = range.start; t < range.end; ++t )
+        for ( size_t i = 0; i < descr->size(); ++i )
+          trails->at( i ).leaf[t] = forest->tree[t].findLeafForPatch( descr->at( i ) );
+    }
+  };
+
  GPCTree tree[T];

 public:
  /** @brief Train the forest using one sample set for every tree.
   * Please, consider using the next method instead of this one for better quality.
   */
-  void train( GPCSamplesVector &samples )
+  void train( GPCTrainingSamples &samples, const GPCTrainingParams params = GPCTrainingParams() )
  {
    for ( int i = 0; i < T; ++i )
-      tree[i].train( samples );
+      tree[i].train( samples, params );
  }

  /** @brief Train the forest using individual samples for each tree.
   * It is generally better to use this instead of the first method.
   */
-  void train( const std::vector< String > &imagesFrom, const std::vector< String > &imagesTo, const std::vector< String > &gt )
+  void train( const std::vector< String > &imagesFrom, const std::vector< String > &imagesTo, const std::vector< String > &gt,
+              const GPCTrainingParams params = GPCTrainingParams() )
  {
    for ( int i = 0; i < T; ++i )
    {
-      Ptr< GPCTrainingSamples > samples = GPCTrainingSamples::create( imagesFrom, imagesTo, gt ); // Create training set for the tree
-      tree[i].train( *samples );
+      Ptr< GPCTrainingSamples > samples =
+        GPCTrainingSamples::create( imagesFrom, imagesTo, gt, params.descriptorType ); // Create training set for the tree
+      tree[i].train( *samples, params );
+    }
+  }
+
+  void train( InputArrayOfArrays imagesFrom, InputArrayOfArrays imagesTo, InputArrayOfArrays gt,
+              const GPCTrainingParams params = GPCTrainingParams() )
+  {
+    for ( int i = 0; i < T; ++i )
+    {
+      Ptr< GPCTrainingSamples > samples =
+        GPCTrainingSamples::create( imagesFrom, imagesTo, gt, params.descriptorType ); // Create training set for the tree
+      tree[i].train( *samples, params );
    }
  }

@@ -166,19 +288,93 @@ public:

  void read( const FileNode &fn )
  {
-    CV_Assert( T == (int)fn["ntrees"] );
+    CV_Assert( T <= (int)fn["ntrees"] );
    FileNodeIterator it = fn["trees"].begin();
    for ( int i = 0; i < T; ++i, ++it )
      tree[i].read( *it );
  }

+  /** @brief Find correspondences between two images.
+   * @param[in] imgFrom First image in a sequence.
+   * @param[in] imgTo Second image in a sequence.
+   * @param[out] corr Output vector with pairs of corresponding points.
+   * @param[in] params Additional matching parameters for fine-tuning.
+   */
+  void findCorrespondences( InputArray imgFrom, InputArray imgTo, std::vector< std::pair< Point2i, Point2i > > &corr,
+                            const GPCMatchingParams params = GPCMatchingParams() ) const;
+
  static Ptr< GPCForest > create() { return makePtr< GPCForest >(); }
 };
+
+class CV_EXPORTS_W GPCDetails
+{
+public:
+  static void dropOutliers( std::vector< std::pair< Point2i, Point2i > > &corr );
+
+  static void getAllDescriptorsForImage( const Mat *imgCh, std::vector< GPCPatchDescriptor > &descr, const GPCMatchingParams &mp,
+                                         int type );
+
+  static void getCoordinatesFromIndex( size_t index, Size sz, int &x, int &y );
+};
+
+template < int T >
+void GPCForest< T >::findCorrespondences( InputArray imgFrom, InputArray imgTo, std::vector< std::pair< Point2i, Point2i > > &corr,
+                                          const GPCMatchingParams params ) const
+{
+  CV_Assert( imgFrom.channels() == 3 );
+  CV_Assert( imgTo.channels() == 3 );
+
+  Mat from, to;
+  imgFrom.getMat().convertTo( from, CV_32FC3 );
+  imgTo.getMat().convertTo( to, CV_32FC3 );
+  cvtColor( from, from, COLOR_BGR2YCrCb );
+  cvtColor( to, to, COLOR_BGR2YCrCb );
+
+  Mat fromCh[3], toCh[3];
+  split( from, fromCh );
+  split( to, toCh );
+
+  std::vector< GPCPatchDescriptor > descr;
+  GPCDetails::getAllDescriptorsForImage( fromCh, descr, params, tree[0].getDescriptorType() );
+  std::vector< Trail > trailsFrom( descr.size() ), trailsTo( descr.size() );
+
+  for ( size_t i = 0; i < descr.size(); ++i )
+    GPCDetails::getCoordinatesFromIndex( i, from.size(), trailsFrom[i].coord.x, trailsFrom[i].coord.y );
+  parallel_for_( Range( 0, T ), ParallelTrailsFilling( this, &descr, &trailsFrom ) );
+
+  descr.clear();
+  GPCDetails::getAllDescriptorsForImage( toCh, descr, params, tree[0].getDescriptorType() );
+
+  for ( size_t i = 0; i < descr.size(); ++i )
+    GPCDetails::getCoordinatesFromIndex( i, to.size(), trailsTo[i].coord.x, trailsTo[i].coord.y );
+  parallel_for_( Range( 0, T ), ParallelTrailsFilling( this, &descr, &trailsTo ) );
+
+  std::sort( trailsFrom.begin(), trailsFrom.end() );
+  std::sort( trailsTo.begin(), trailsTo.end() );
+
+  for ( size_t i = 0; i < trailsFrom.size(); ++i )
+  {
+    bool uniq = true;
+    while ( i + 1 < trailsFrom.size() && trailsFrom[i] == trailsFrom[i + 1] )
+      ++i, uniq = false;
+    if ( uniq )
+    {
+      typename std::vector< Trail >::const_iterator lb = std::lower_bound( trailsTo.begin(), trailsTo.end(), trailsFrom[i] );
+      if ( lb != trailsTo.end() && *lb == trailsFrom[i] && ( ( lb + 1 ) == trailsTo.end() || !( *lb == *( lb + 1 ) ) ) )
+        corr.push_back( std::make_pair( trailsFrom[i].coord, lb->coord ) );
+    }
+  }
+
+  GPCDetails::dropOutliers( corr );
 }

+//! @}
+
+} // namespace optflow
+
 CV_EXPORTS void write( FileStorage &fs, const String &name, const optflow::GPCTree::Node &node );

 CV_EXPORTS void read( const FileNode &fn, optflow::GPCTree::Node &node, optflow::GPCTree::Node );
-}
+} // namespace cv

 #endif