Adds example on segmented word recognition. Shows the use of the OCRHMMDecoder with the NM and CNN default classifiers.

2025-10-21 23:01:45 +08:00 · 2015-08-03 20:50:33 +02:00
parent ee677a255b
commit 2538bf74a6
11 changed files with 116 additions and 0 deletions
--- a/modules/text/samples/segmented_word_recognition.cpp
+++ b/modules/text/samples/segmented_word_recognition.cpp
@@ -0,0 +1,116 @@
+/*
+ * segmented_word_recognition.cpp
+ *
+ * A demo program on segmented word recognition.
+ * Shows the use of the OCRHMMDecoder API with the two provided default character classifiers.
+ *
+ * Created on: Jul 31, 2015
+ *     Author: Lluis Gomez i Bigorda <lgomez AT cvc.uab.es>
+ */
+
+#include "opencv2/text.hpp"
+#include "opencv2/core/utility.hpp"
+#include "opencv2/highgui.hpp"
+#include "opencv2/imgproc.hpp"
+
+#include <iostream>
+
+using namespace std;
+using namespace cv;
+using namespace text;
+
+
+int main(int argc, char* argv[]) {
+
+    const String keys =
+      "{help h usage ? |      | print this message.}"
+      "{@image         |      | source image for recognition.}"
+      "{@mask          |      | binary segmentation mask where each contour is a character.}"
+      "{lexicon lex l  |      | (optional) lexicon provided as a list of comma separated words.}"
+      ;
+    CommandLineParser parser(argc, argv, keys);
+
+    parser.about("\nSegmented word recognition.\nA demo program on segmented word recognition. Shows the use of the OCRHMMDecoder API with the two provided default character classifiers.\n");
+
+    String filename1 = parser.get<String>(0);
+    String filename2 = parser.get<String>(1);
+
+    parser.printMessage();
+    cout << endl << endl;
+    if ((parser.has("help")) || (filename1.size()==0))
+    {
+        return 0;
+    }
+    if (!parser.check())
+    {
+        parser.printErrors();
+        return 0;
+    }
+
+    Mat image = imread(filename1);
+    Mat mask;
+    if (filename2.size() > 0)
+      mask = imread(filename2);
+    else
+      image.copyTo(mask);
+
+    // be sure the mask is a binry image
+    cvtColor(mask, mask, COLOR_BGR2GRAY);
+    threshold(mask, mask, 128., 255, THRESH_BINARY);
+
+    // character recognition vocabulary
+    string voc = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
+    // Emission probabilities for the HMM language model (identity matrix by default)
+    Mat emissionProbabilities = Mat::eye((int)voc.size(), (int)voc.size(), CV_64FC1);
+    // Bigram transition probabilities for the HMM language model
+    Mat transitionProbabilities;
+
+    string lex = parser.get<string>("lex");
+    if (lex.size()>0)
+    {
+        // Build tailored language model for the provided lexicon
+        vector<string> lexicon;
+        size_t pos = 0;
+        string delimiter = ",";
+        std::string token;
+        while ((pos = lex.find(delimiter)) != std::string::npos) {
+            token = lex.substr(0, pos);
+            lexicon.push_back(token);
+            lex.erase(0, pos + delimiter.length());
+        }
+        lexicon.push_back(lex);
+        createOCRHMMTransitionsTable(voc,lexicon,transitionProbabilities);
+    } else {
+        // Or load the generic language model (from Aspell English dictionary)
+        FileStorage fs("./OCRHMM_transitions_table.xml", FileStorage::READ);
+        fs["transition_probabilities"] >> transitionProbabilities;
+        fs.release();
+    }
+
+    Ptr<OCRTesseract>  ocrTes = OCRTesseract::create();
+
+    Ptr<OCRHMMDecoder> ocrNM  = OCRHMMDecoder::create(
+                                 loadOCRHMMClassifierNM("./OCRHMM_knn_model_data.xml.gz"),
+                                 voc, transitionProbabilities, emissionProbabilities);
+
+    Ptr<OCRHMMDecoder> ocrCNN = OCRHMMDecoder::create(
+                                 loadOCRHMMClassifierCNN("OCRBeamSearch_CNN_model_data.xml.gz"),
+                                 voc, transitionProbabilities, emissionProbabilities);
+
+    std::string output;
+    double t_r = getTickCount();
+    ocrTes->run(mask, output);
+    output.erase(remove(output.begin(), output.end(), '\n'), output.end());
+    cout << " OCR_Tesseract  output \"" << output << "\". Done in "
+         << ((double)getTickCount() - t_r)*1000/getTickFrequency() << " ms." << endl;
+
+    t_r = getTickCount();
+    ocrNM->run(mask, output);
+    cout << " OCR_NM         output \"" << output << "\". Done in "
+         << ((double)getTickCount() - t_r)*1000/getTickFrequency() << " ms." << endl;
+
+    t_r = getTickCount();
+    ocrCNN->run(image, mask, output);
+    cout << " OCR_CNN        output \"" << output << "\". Done in "
+         << ((double)getTickCount() - t_r)*1000/getTickFrequency() << " ms." << endl;
+}