Modified the class heirarchy

2025-10-20 12:55:15 +08:00 · 2017-07-19 16:58:11 +02:00
parent 2b8ed124f2
commit be395e5981
5 changed files with 509 additions and 56 deletions
--- a/modules/text/include/opencv2/text/ocr.hpp
+++ b/modules/text/include/opencv2/text/ocr.hpp
@@ -716,10 +716,6 @@ public:
    /** @brief produces a class confidence row-vector given an image
     */
    CV_WRAP virtual void classify(InputArray image, OutputArray classProbabilities) = 0;
-    /** @brief produces a list of bounding box given an image
-     */
-
-    CV_WRAP virtual void detect(InputArray image, OutputArray classProbabilities) = 0;

    /** @brief produces a matrix containing class confidence row-vectors given an collection of images
     */
--- a/modules/text/include/opencv2/text/textDetector.hpp
+++ b/modules/text/include/opencv2/text/textDetector.hpp
@@ -65,19 +65,131 @@ namespace text
 //detection scenario
 class CV_EXPORTS_W BaseDetector
 {
- public:
+public:
    virtual ~BaseDetector() {};

    virtual void run(Mat& image,
-                     std::vector<Rect>* component_rects=NULL,                     
+                     std::vector<Rect>* component_rects=NULL,
                     std::vector<float>* component_confidences=NULL,
                     int component_level=0) = 0;

    virtual void run(Mat& image, Mat& mask,
-                     std::vector<Rect>* component_rects=NULL,                     
+                     std::vector<Rect>* component_rects=NULL,
                     std::vector<float>* component_confidences=NULL,
                     int component_level=0) = 0;

+};
+/** A virtual class for different models of text detection (including CNN based deep models)
+ */
+
+class CV_EXPORTS_W TextRegionDetector
+{
+protected:
+    /** Stores input and output size
+     */
+    //netGeometry inputGeometry_;
+    //netGeometry outputGeometry_;
+    Size inputGeometry_;
+    Size outputGeometry_;
+    int inputChannelCount_;
+    int outputChannelCount_;
+
+public:
+    virtual ~TextRegionDetector() {}
+
+    /** @brief produces a list of Bounding boxes and an estimate of text-ness confidence of Bounding Boxes
+     */
+    CV_WRAP virtual void detect(InputArray image, OutputArray bboxProb ) = 0;
+
+
+    /** @brief simple getter method returning the size (height, width) of the input sample
+     */
+    CV_WRAP virtual Size  getInputGeometry(){return this->inputGeometry_;}
+
+    /** @brief simple getter method returning the shape of the oputput
+     *   Any text detector should output a number of text regions alongwith a score of text-ness
+     *   From the shape it can be inferred the number of text regions and number of returned value
+     *   for each region
+     */
+    CV_WRAP virtual Size getOutputGeometry(){return this->outputGeometry_;}
+
+
+
+};
+
+/** Generic structure of Deep CNN based Text Detectors
+ * */
+class CV_EXPORTS_W  DeepCNNTextDetector : public TextRegionDetector
+{
+    /** @brief Class that uses a pretrained caffe model for text detection.
+     * Any text detection should
+     * This network is described in detail in:
+     * Minghui Liao et al.: TextBoxes: A Fast Text Detector with a Single Deep Neural Network
+     * https://arxiv.org/abs/1611.06779
+     */
+protected:
+    /** all deep CNN based text detectors have a preprocessor (normally)
+         */
+    Ptr<ImagePreprocessor> preprocessor_;
+    /** @brief all image preprocessing is handled here including whitening etc.
+         *
+         *  @param input the image to be preprocessed for the classifier. If the depth
+         * is CV_U8 values should be in [0,255] otherwise values are assumed to be in [0,1]
+         *
+         * @param output reference to the image to be fed to the classifier, the preprocessor will
+         * resize the image to the apropriate size and convert it to the apropriate depth\
+         *
+         * The method preprocess should never be used externally, it is up to classify and classifyBatch
+         * methods to employ it.
+         */
+    virtual void preprocess(const Mat& input,Mat& output);
+public:
+    virtual ~DeepCNNTextDetector() {};
+
+    /** @brief Constructs a DeepCNNTextDetector object from a caffe pretrained model
+     *
+     * @param archFilename is the path to the prototxt file containing the deployment model architecture description.
+     *
+     * @param weightsFilename is the path to the pretrained weights of the model in binary fdorm.
+     *
+     * @param preprocessor is a pointer to the instance of a ImagePreprocessor implementing the preprocess_ protecteed method;
+     *
+     * @param minibatchSz the maximum number of samples that can processed in parallel. In practice this parameter
+     * has an effect only when computing in the GPU and should be set with respect to the memory available in the GPU.
+     *
+     * @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is
+     * the only option
+     */
+    CV_WRAP static Ptr<DeepCNNTextDetector> create(String archFilename,String weightsFilename,Ptr<ImagePreprocessor> preprocessor,int minibatchSz=100,int backEnd=OCR_HOLISTIC_BACKEND_CAFFE);
+
+    /** @brief Constructs a DeepCNNTextDetector intended to be used for text area detection.
+     *
+     * This method loads a pretrained classifier and couples with a preprocessor that preprocess the image with mean subtraction of ()
+     * The architecture and models weights can be downloaded from:
+     * https://github.com/sghoshcvc/TextBox-Models.git (size is around 100 MB)
+
+     * @param archFilename is the path to the prototxt file containing the deployment model architecture description.
+     * When employing OCR_HOLISTIC_BACKEND_CAFFE this is the path to the deploy ".prototxt".
+     *
+     * @param weightsFilename is the path to the pretrained weights of the model. When employing
+     * OCR_HOLISTIC_BACKEND_CAFFE this is the path to the ".caffemodel" file.
+     *
+     * @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is
+     * the only option
+     */
+    CV_WRAP static Ptr<DeepCNNTextDetector> createTextBoxNet(String archFilename,String weightsFilename,int backEnd=OCR_HOLISTIC_BACKEND_CAFFE);
+    friend class ImagePreprocessor;
+
+};
+
+/** @brief textDetector class provides the functionallity of text bounding box detection.
+ * A TextRegionDetector is employed to find bounding boxes of text
+ * words given an input image.
+ *
+ * This class implements the logic of providing text bounding boxes in a vector of rects given an TextRegionDetector
+ * The TextRegionDetector can be any text detector
+ *
+ */

 class CV_EXPORTS_W textDetector : public BaseDetector
 {
@@ -125,9 +237,9 @@ public:



-   /** @brief simple getter for the preprocessing functor
+    /** @brief simple getter for the preprocessing functor
     */
-    CV_WRAP virtual Ptr<TextImageClassifier> getClassifier()=0;
+    CV_WRAP virtual Ptr<TextRegionDetector> getClassifier()=0;

    /** @brief Creates an instance of the textDetector class.

@@ -135,7 +247,7 @@ public:


     */
-    CV_WRAP static Ptr<textDetector> create(Ptr<TextImageClassifier> classifierPtr);
+    CV_WRAP static Ptr<textDetector> create(Ptr<TextRegionDetector> classifierPtr);


    /** @brief Creates an instance of the textDetector class and implicitly also a DeepCNN classifier.