text python bindings

2025-10-18 17:24:28 +08:00 · 2015-11-06 20:00:28 +00:00
parent c8053da4f2
commit fe05681627
4 changed files with 221 additions and 24 deletions
--- a/modules/text/include/opencv2/text/ocr.hpp
+++ b/modules/text/include/opencv2/text/ocr.hpp
@@ -1,4 +1,4 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
+/*M//////////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
@@ -62,7 +62,7 @@ enum
 };

 //base class BaseOCR declares a common API that would be used in a typical text recognition scenario
-class CV_EXPORTS BaseOCR
+class CV_EXPORTS_W BaseOCR
 {
 public:
    virtual ~BaseOCR() {};
@@ -86,7 +86,7 @@ Notice that it is compiled only when tesseract-ocr is correctly installed.
        found at the webcam_demo:
        <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp>
 */
-class CV_EXPORTS OCRTesseract : public BaseOCR
+class CV_EXPORTS_W OCRTesseract : public BaseOCR
 {
 public:
    /** @brief Recognize text using the tesseract-ocr API.
@@ -113,6 +113,14 @@ public:
                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
                     int component_level=0);

+    // aliases for scripting
+    CV_WRAP String run(InputArray image, int min_confidence, int component_level=0);
+
+    CV_WRAP String run(InputArray image, InputArray mask, int min_confidence, int component_level=0);
+
+    CV_WRAP virtual void setWhiteList(const String& char_whitelist) = 0;
+
+
    /** @brief Creates an instance of the OCRTesseract class. Initializes Tesseract.

    @param datapath the name of the parent directory of tessdata ended with "/", or NULL to use the
@@ -127,7 +135,7 @@ public:
    (fully automatic layout analysis) is used. See the tesseract-ocr API documentation for other
    possible values.
     */
-    static Ptr<OCRTesseract> create(const char* datapath=NULL, const char* language=NULL,
+    CV_WRAP static Ptr<OCRTesseract> create(const char* datapath=NULL, const char* language=NULL,
                                    const char* char_whitelist=NULL, int oem=3, int psmode=3);
 };

@@ -146,7 +154,7 @@ enum decoder_mode
        be found at the webcam_demo sample:
        <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp>
 */
-class CV_EXPORTS OCRHMMDecoder : public BaseOCR
+class CV_EXPORTS_W OCRHMMDecoder : public BaseOCR
 {
 public:

@@ -159,7 +167,7 @@ public:
    loadOCRHMMClassifierNM and KNN model provided in
    <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRHMM_knn_model_data.xml.gz>.
     */
-    class CV_EXPORTS ClassifierCallback
+    class CV_EXPORTS_W ClassifierCallback
    {
    public:
        virtual ~ClassifierCallback() { }
@@ -227,6 +235,11 @@ public:
                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
                     int component_level=0);

+    // aliases for scripting
+    CV_WRAP String run(InputArray image, int min_confidence, int component_level=0);
+
+    CV_WRAP String run(InputArray image, InputArray mask, int min_confidence, int component_level=0);
+
    /** @brief Creates an instance of the OCRHMMDecoder class. Initializes HMMDecoder.

    @param classifier The character classifier with built in feature extractor.
@@ -252,6 +265,15 @@ public:
                                                                                       //     cols == rows == vocabulari.size()
                                     decoder_mode mode = OCR_DECODER_VITERBI);         // HMM Decoding algorithm (only Viterbi for the moment)

+    CV_WRAP static Ptr<OCRHMMDecoder> create(const Ptr<OCRHMMDecoder::ClassifierCallback> classifier,// The character classifier with built in feature extractor
+                                     const String& vocabulary,                    // The language vocabulary (chars when ascii english text)
+                                                                                       //     size() must be equal to the number of classes
+                                     InputArray transition_probabilities_table,        // Table with transition probabilities between character pairs
+                                                                                       //     cols == rows == vocabulari.size()
+                                     InputArray emission_probabilities_table,          // Table with observation emission probabilities
+                                                                                       //     cols == rows == vocabulari.size()
+                                     int mode = OCR_DECODER_VITERBI);         // HMM Decoding algorithm (only Viterbi for the moment)
+
 protected:

    Ptr<OCRHMMDecoder::ClassifierCallback> classifier;
@@ -272,7 +294,8 @@ based on gradient orientations along the chain-code of its perimeter. Then, the
 using a KNN model trained with synthetic data of rendered characters with different standard font
 types.
 */
-CV_EXPORTS Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const std::string& filename);
+
+CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const String& filename);

 /** @brief Allow to implicitly load the default character classifier when creating an OCRHMMDecoder object.

@@ -283,7 +306,7 @@ Andrew NG in [Coates11a]. The character classifier consists in a Single Layer Co
 a linear classifier. It is applied to the input image in a sliding window fashion, providing a set of recognitions
 at each window location.
 */
-CV_EXPORTS Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN(const std::string& filename);
+CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN(const String& filename);

 //! @}

@@ -299,9 +322,11 @@ CV_EXPORTS Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN(const
 * @note
 *    -   (C++) An alternative would be to load the default generic language transition table provided in the text module samples folder (created from ispell 42869 english words list) :
 *            <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRHMM_transitions_table.xml>
- *             */
+ **/
 CV_EXPORTS void createOCRHMMTransitionsTable(std::string& vocabulary, std::vector<std::string>& lexicon, OutputArray transition_probabilities_table);

+CV_EXPORTS_W Mat createOCRHMMTransitionsTable(const String& vocabulary, std::vector<cv::String>& lexicon);
+

 /* OCR BeamSearch Decoder */

@@ -312,7 +337,7 @@ CV_EXPORTS void createOCRHMMTransitionsTable(std::string& vocabulary, std::vecto
        be found at the demo sample:
        <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/word_recognition.cpp>
 */
-class CV_EXPORTS OCRBeamSearchDecoder : public BaseOCR
+class CV_EXPORTS_W OCRBeamSearchDecoder : public BaseOCR
 {
 public:

@@ -325,7 +350,7 @@ public:
    loadOCRBeamSearchClassifierCNN with all its parameters provided in
    <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRBeamSearch_CNN_model_data.xml.gz>.
     */
-    class CV_EXPORTS ClassifierCallback
+    class CV_EXPORTS_W ClassifierCallback
    {
    public:
        virtual ~ClassifierCallback() { }
@@ -350,7 +375,7 @@ public:
    provides also the Rects for individual text elements found (e.g. words), and the list of those
    text elements with their confidence values.

-    @param image Input image CV_8UC1 with a single text line (or word).
+    @param image Input binary image CV_8UC1 with a single text line (or word).

    @param output_text Output text. Most likely character sequence found by the HMM decoder.

@@ -373,6 +398,11 @@ public:
                     std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
                     int component_level=0);

+    // aliases for scripting
+    CV_WRAP String run(InputArray image, int min_confidence, int component_level=0);
+
+    CV_WRAP String run(InputArray image, InputArray mask, int min_confidence, int component_level=0);
+
    /** @brief Creates an instance of the OCRBeamSearchDecoder class. Initializes HMMDecoder.

    @param classifier The character classifier with built in feature extractor.
@@ -401,6 +431,16 @@ public:
                                     decoder_mode mode = OCR_DECODER_VITERBI,          // HMM Decoding algorithm (only Viterbi for the moment)
                                     int beam_size = 500);                              // Size of the beam in Beam Search algorithm

+    CV_WRAP static Ptr<OCRBeamSearchDecoder> create(const Ptr<OCRBeamSearchDecoder::ClassifierCallback> classifier, // The character classifier with built in feature extractor
+                                     const String& vocabulary,                    // The language vocabulary (chars when ascii english text)
+                                                                                       //     size() must be equal to the number of classes
+                                     InputArray transition_probabilities_table,        // Table with transition probabilities between character pairs
+                                                                                       //     cols == rows == vocabulari.size()
+                                     InputArray emission_probabilities_table,          // Table with observation emission probabilities
+                                                                                       //     cols == rows == vocabulari.size()
+                                     int mode = OCR_DECODER_VITERBI,          // HMM Decoding algorithm (only Viterbi for the moment)
+                                     int beam_size = 500);                              // Size of the beam in Beam Search algorithm
+
 protected:

    Ptr<OCRBeamSearchDecoder::ClassifierCallback> classifier;
@@ -420,7 +460,8 @@ Andrew NG in [Coates11a]. The character classifier consists in a Single Layer Co
 a linear classifier. It is applied to the input image in a sliding window fashion, providing a set of recognitions
 at each window location.
 */
-CV_EXPORTS Ptr<OCRBeamSearchDecoder::ClassifierCallback> loadOCRBeamSearchClassifierCNN(const std::string& filename);
+
+CV_EXPORTS_W Ptr<OCRBeamSearchDecoder::ClassifierCallback> loadOCRBeamSearchClassifierCNN(const String& filename);

 //! @}

--- a/modules/text/src/ocr_beamsearch_decoder.cpp
+++ b/modules/text/src/ocr_beamsearch_decoder.cpp
@@ -88,6 +88,45 @@ void OCRBeamSearchDecoder::run(Mat& image, Mat& mask, string& output_text, vecto
        component_confidences->clear();
 }

+CV_WRAP String OCRBeamSearchDecoder::run(InputArray image, int min_confidence, int component_level)
+{
+    std::string output1;
+    std::string output2;
+    vector<string> component_texts;
+    vector<float> component_confidences;
+    Mat image_m = image.getMat();
+    run(image_m, output1, NULL, &component_texts, &component_confidences, component_level);
+    for(unsigned int i = 0; i < component_texts.size(); i++)
+    {
+        //cout << "confidence: " << component_confidences[i] << " text:" << component_texts[i] << endl;
+        if(component_confidences[i] > min_confidence)
+        {
+            output2 += component_texts[i];
+        }
+    }
+    return String(output2);
+}
+
+CV_WRAP String OCRBeamSearchDecoder::run(InputArray image, InputArray mask, int min_confidence, int component_level)
+{
+    std::string output1;
+    std::string output2;
+    vector<string> component_texts;
+    vector<float> component_confidences;
+    Mat image_m = image.getMat();
+    Mat mask_m = mask.getMat();
+    run(image_m, mask_m, output1, NULL, &component_texts, &component_confidences, component_level);
+    for(unsigned int i = 0; i < component_texts.size(); i++)
+    {
+        //cout << "confidence: " << component_confidences[i] << " text:" << component_texts[i] << endl;
+        if(component_confidences[i] > min_confidence)
+        {
+            output2 += component_texts[i];
+        }
+    }
+    return String(output2);
+}
+

 void OCRBeamSearchDecoder::ClassifierCallback::eval( InputArray image, vector< vector<double> >& recognition_probabilities, vector<int>& oversegmentation)
 {
@@ -460,6 +499,16 @@ Ptr<OCRBeamSearchDecoder> OCRBeamSearchDecoder::create( Ptr<OCRBeamSearchDecoder
    return makePtr<OCRBeamSearchDecoderImpl>(_classifier, _vocabulary, transition_p, emission_p, _mode, _beam_size);
 }

+CV_EXPORTS_W Ptr<OCRBeamSearchDecoder> OCRBeamSearchDecoder::create(Ptr<OCRBeamSearchDecoder::ClassifierCallback> _classifier,
+                                                        const String& _vocabulary,
+                                                        InputArray transition_p,
+                                                        InputArray emission_p,
+                                                        int _mode,
+                                                        int _beam_size)
+{
+    return makePtr<OCRBeamSearchDecoderImpl>(_classifier, _vocabulary, transition_p, emission_p, (decoder_mode)_mode, _beam_size);
+}
+

 class CV_EXPORTS OCRBeamSearchClassifierCNN : public OCRBeamSearchDecoder::ClassifierCallback
 {
@@ -727,11 +776,10 @@ double OCRBeamSearchClassifierCNN::eval_feature(Mat& feature, double* prob_estim
    return dec_max_idx;
 }

-
-Ptr<OCRBeamSearchDecoder::ClassifierCallback> loadOCRBeamSearchClassifierCNN(const std::string& filename)
+Ptr<OCRBeamSearchDecoder::ClassifierCallback> loadOCRBeamSearchClassifierCNN(const String& filename)

 {
-    return makePtr<OCRBeamSearchClassifierCNN>(filename);
+    return makePtr<OCRBeamSearchClassifierCNN>(std::string(filename));
 }

 }
--- a/modules/text/src/ocr_hmm_decoder.cpp
+++ b/modules/text/src/ocr_hmm_decoder.cpp
@@ -90,6 +90,46 @@ void OCRHMMDecoder::run(Mat& image, Mat& mask, string& output_text, vector<Rect>
        component_confidences->clear();
 }

+CV_WRAP String OCRHMMDecoder::run(InputArray image, int min_confidence, int component_level)
+{
+    std::string output1;
+    std::string output2;
+    vector<string> component_texts;
+    vector<float> component_confidences;
+    Mat image_m = image.getMat();
+    run(image_m, output1, NULL, &component_texts, &component_confidences, component_level);
+    for(unsigned int i = 0; i < component_texts.size(); i++)
+    {
+        //cout << "confidence: " << component_confidences[i] << " text:" << component_texts[i] << endl;
+        if(component_confidences[i] > min_confidence)
+        {
+            output2 += component_texts[i];
+        }
+    }
+    return String(output2);
+}
+
+CV_WRAP cv::String OCRHMMDecoder::run(InputArray image, InputArray mask, int min_confidence, int component_level)
+{
+    std::string output1;
+    std::string output2;
+    vector<string> component_texts;
+    vector<float> component_confidences;
+    Mat image_m = image.getMat();
+    Mat mask_m = mask.getMat();
+    run(image_m, mask_m, output1, NULL, &component_texts, &component_confidences, component_level);
+    for(unsigned int i = 0; i < component_texts.size(); i++)
+    {
+        cout << "confidence: " << component_confidences[i] << " text:" << component_texts[i] << endl;
+
+        if(component_confidences[i] > min_confidence)
+        {
+            output2 += component_texts[i];
+        }
+    }
+    return String(output2);
+}
+
 void OCRHMMDecoder::ClassifierCallback::eval( InputArray image, vector<int>& out_class, vector<double>& out_confidence)
 {
    CV_Assert(( image.getMat().type() == CV_8UC3 ) || ( image.getMat().type() == CV_8UC1 ));
@@ -635,6 +675,16 @@ Ptr<OCRHMMDecoder> OCRHMMDecoder::create( Ptr<OCRHMMDecoder::ClassifierCallback>
 }


+Ptr<OCRHMMDecoder> OCRHMMDecoder::create( Ptr<OCRHMMDecoder::ClassifierCallback> _classifier,
+                                          const String& _vocabulary,
+                                          InputArray transition_p,
+                                          InputArray emission_p,
+                                          int _mode)
+{
+    return makePtr<OCRHMMDecoderImpl>(_classifier, _vocabulary, transition_p, emission_p, (decoder_mode)_mode);
+}
+
+
 class CV_EXPORTS OCRHMMClassifierKNN : public OCRHMMDecoder::ClassifierCallback
 {
 public:
@@ -867,14 +917,12 @@ void OCRHMMClassifierKNN::eval( InputArray _mask, vector<int>& out_class, vector
 }


-Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const std::string& filename)
+Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const String& filename)

 {
-    return makePtr<OCRHMMClassifierKNN>(filename);
+    return makePtr<OCRHMMClassifierKNN>(std::string(filename));
 }

-
-
 class CV_EXPORTS OCRHMMClassifierCNN : public OCRHMMDecoder::ClassifierCallback
 {
 public:
@@ -1139,10 +1187,10 @@ double OCRHMMClassifierCNN::eval_feature(Mat& feature, double* prob_estimates)
 }


-Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN(const std::string& filename)
+Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN(const String& filename)

 {
-    return makePtr<OCRHMMClassifierCNN>(filename);
+    return makePtr<OCRHMMClassifierCNN>(std::string(filename));
 }

 /** @brief Utility function to create a tailored language model transitions table from a given list of words (lexicon).
@@ -1201,5 +1249,17 @@ void createOCRHMMTransitionsTable(string& vocabulary, vector<string>& lexicon, O
    return;
 }

+Mat createOCRHMMTransitionsTable(const String& vocabulary, vector<cv::String>& lexicon)
+{
+    std::string voc(vocabulary);
+    vector<string> lex;
+    for(vector<cv::String>::iterator l = lexicon.begin(); l != lexicon.end(); l++)
+      lex.push_back(std::string(*l));
+
+    Mat _transitions;
+    createOCRHMMTransitionsTable(voc, lex, _transitions);
+    return _transitions;
+}
+
 }
 }
--- a/modules/text/src/ocr_tesseract.cpp
+++ b/modules/text/src/ocr_tesseract.cpp
@@ -86,6 +86,47 @@ void OCRTesseract::run(Mat& image, Mat& mask, string& output_text, vector<Rect>*
        component_confidences->clear();
 }

+CV_WRAP String OCRTesseract::run(InputArray image, int min_confidence, int component_level)
+{
+    std::string output1;
+    std::string output2;
+    vector<string> component_texts;
+    vector<float> component_confidences;
+    Mat image_m = image.getMat();
+    run(image_m, output1, NULL, &component_texts, &component_confidences, component_level);
+    for(unsigned int i = 0; i < component_texts.size(); i++)
+    {
+        // cout << "confidence: " << component_confidences[i] << " text:" << component_texts[i] << endl;
+        if(component_confidences[i] > min_confidence)
+        {
+            output2 += component_texts[i];
+        }
+    }
+    return String(output2);
+}
+
+CV_WRAP String OCRTesseract::run(InputArray image, InputArray mask, int min_confidence, int component_level)
+{
+    std::string output1;
+    std::string output2;
+    vector<string> component_texts;
+    vector<float> component_confidences;
+    Mat image_m = image.getMat();
+    Mat mask_m = mask.getMat();
+    run(image_m, mask_m, output1, NULL, &component_texts, &component_confidences, component_level);
+    for(unsigned int i = 0; i < component_texts.size(); i++)
+    {
+        cout << "confidence: " << component_confidences[i] << " text:" << component_texts[i] << endl;
+
+        if(component_confidences[i] > min_confidence)
+        {
+            output2 += component_texts[i];
+        }
+    }
+    return String(output2);
+}
+
+
 class OCRTesseractImpl : public OCRTesseract
 {
 private:
@@ -215,13 +256,20 @@ public:
        run( mask, output, component_rects, component_texts, component_confidences, component_level);
    }

-
+    void setWhiteList(const String& char_whitelist)
+    {
+  #ifdef HAVE_TESSERACT
+        tess.SetVariable("tessedit_char_whitelist", char_whitelist.c_str());
+  #else
+        (void)char_whitelist;
+  #endif
+    }
 };

 Ptr<OCRTesseract> OCRTesseract::create(const char* datapath, const char* language,
                                       const char* char_whitelist, int oem, int psmode)
 {
-    return makePtr<OCRTesseractImpl>(datapath,language,char_whitelist,oem,psmode);
+    return makePtr<OCRTesseractImpl>(datapath, language, char_whitelist, oem, psmode);
 }