mirror of
https://github.com/opencv/opencv_contrib.git
synced 2025-10-18 17:24:28 +08:00
text python bindings
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
/*M//////////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
@@ -62,7 +62,7 @@ enum
|
||||
};
|
||||
|
||||
//base class BaseOCR declares a common API that would be used in a typical text recognition scenario
|
||||
class CV_EXPORTS BaseOCR
|
||||
class CV_EXPORTS_W BaseOCR
|
||||
{
|
||||
public:
|
||||
virtual ~BaseOCR() {};
|
||||
@@ -86,7 +86,7 @@ Notice that it is compiled only when tesseract-ocr is correctly installed.
|
||||
found at the webcam_demo:
|
||||
<https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp>
|
||||
*/
|
||||
class CV_EXPORTS OCRTesseract : public BaseOCR
|
||||
class CV_EXPORTS_W OCRTesseract : public BaseOCR
|
||||
{
|
||||
public:
|
||||
/** @brief Recognize text using the tesseract-ocr API.
|
||||
@@ -113,6 +113,14 @@ public:
|
||||
std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
|
||||
int component_level=0);
|
||||
|
||||
// aliases for scripting
|
||||
CV_WRAP String run(InputArray image, int min_confidence, int component_level=0);
|
||||
|
||||
CV_WRAP String run(InputArray image, InputArray mask, int min_confidence, int component_level=0);
|
||||
|
||||
CV_WRAP virtual void setWhiteList(const String& char_whitelist) = 0;
|
||||
|
||||
|
||||
/** @brief Creates an instance of the OCRTesseract class. Initializes Tesseract.
|
||||
|
||||
@param datapath the name of the parent directory of tessdata ended with "/", or NULL to use the
|
||||
@@ -127,7 +135,7 @@ public:
|
||||
(fully automatic layout analysis) is used. See the tesseract-ocr API documentation for other
|
||||
possible values.
|
||||
*/
|
||||
static Ptr<OCRTesseract> create(const char* datapath=NULL, const char* language=NULL,
|
||||
CV_WRAP static Ptr<OCRTesseract> create(const char* datapath=NULL, const char* language=NULL,
|
||||
const char* char_whitelist=NULL, int oem=3, int psmode=3);
|
||||
};
|
||||
|
||||
@@ -146,7 +154,7 @@ enum decoder_mode
|
||||
be found at the webcam_demo sample:
|
||||
<https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/webcam_demo.cpp>
|
||||
*/
|
||||
class CV_EXPORTS OCRHMMDecoder : public BaseOCR
|
||||
class CV_EXPORTS_W OCRHMMDecoder : public BaseOCR
|
||||
{
|
||||
public:
|
||||
|
||||
@@ -159,7 +167,7 @@ public:
|
||||
loadOCRHMMClassifierNM and KNN model provided in
|
||||
<https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRHMM_knn_model_data.xml.gz>.
|
||||
*/
|
||||
class CV_EXPORTS ClassifierCallback
|
||||
class CV_EXPORTS_W ClassifierCallback
|
||||
{
|
||||
public:
|
||||
virtual ~ClassifierCallback() { }
|
||||
@@ -227,6 +235,11 @@ public:
|
||||
std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
|
||||
int component_level=0);
|
||||
|
||||
// aliases for scripting
|
||||
CV_WRAP String run(InputArray image, int min_confidence, int component_level=0);
|
||||
|
||||
CV_WRAP String run(InputArray image, InputArray mask, int min_confidence, int component_level=0);
|
||||
|
||||
/** @brief Creates an instance of the OCRHMMDecoder class. Initializes HMMDecoder.
|
||||
|
||||
@param classifier The character classifier with built in feature extractor.
|
||||
@@ -252,6 +265,15 @@ public:
|
||||
// cols == rows == vocabulari.size()
|
||||
decoder_mode mode = OCR_DECODER_VITERBI); // HMM Decoding algorithm (only Viterbi for the moment)
|
||||
|
||||
CV_WRAP static Ptr<OCRHMMDecoder> create(const Ptr<OCRHMMDecoder::ClassifierCallback> classifier,// The character classifier with built in feature extractor
|
||||
const String& vocabulary, // The language vocabulary (chars when ascii english text)
|
||||
// size() must be equal to the number of classes
|
||||
InputArray transition_probabilities_table, // Table with transition probabilities between character pairs
|
||||
// cols == rows == vocabulari.size()
|
||||
InputArray emission_probabilities_table, // Table with observation emission probabilities
|
||||
// cols == rows == vocabulari.size()
|
||||
int mode = OCR_DECODER_VITERBI); // HMM Decoding algorithm (only Viterbi for the moment)
|
||||
|
||||
protected:
|
||||
|
||||
Ptr<OCRHMMDecoder::ClassifierCallback> classifier;
|
||||
@@ -272,7 +294,8 @@ based on gradient orientations along the chain-code of its perimeter. Then, the
|
||||
using a KNN model trained with synthetic data of rendered characters with different standard font
|
||||
types.
|
||||
*/
|
||||
CV_EXPORTS Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const std::string& filename);
|
||||
|
||||
CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const String& filename);
|
||||
|
||||
/** @brief Allow to implicitly load the default character classifier when creating an OCRHMMDecoder object.
|
||||
|
||||
@@ -283,7 +306,7 @@ Andrew NG in [Coates11a]. The character classifier consists in a Single Layer Co
|
||||
a linear classifier. It is applied to the input image in a sliding window fashion, providing a set of recognitions
|
||||
at each window location.
|
||||
*/
|
||||
CV_EXPORTS Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN(const std::string& filename);
|
||||
CV_EXPORTS_W Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN(const String& filename);
|
||||
|
||||
//! @}
|
||||
|
||||
@@ -299,9 +322,11 @@ CV_EXPORTS Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN(const
|
||||
* @note
|
||||
* - (C++) An alternative would be to load the default generic language transition table provided in the text module samples folder (created from ispell 42869 english words list) :
|
||||
* <https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRHMM_transitions_table.xml>
|
||||
* */
|
||||
**/
|
||||
CV_EXPORTS void createOCRHMMTransitionsTable(std::string& vocabulary, std::vector<std::string>& lexicon, OutputArray transition_probabilities_table);
|
||||
|
||||
CV_EXPORTS_W Mat createOCRHMMTransitionsTable(const String& vocabulary, std::vector<cv::String>& lexicon);
|
||||
|
||||
|
||||
/* OCR BeamSearch Decoder */
|
||||
|
||||
@@ -312,7 +337,7 @@ CV_EXPORTS void createOCRHMMTransitionsTable(std::string& vocabulary, std::vecto
|
||||
be found at the demo sample:
|
||||
<https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/word_recognition.cpp>
|
||||
*/
|
||||
class CV_EXPORTS OCRBeamSearchDecoder : public BaseOCR
|
||||
class CV_EXPORTS_W OCRBeamSearchDecoder : public BaseOCR
|
||||
{
|
||||
public:
|
||||
|
||||
@@ -325,7 +350,7 @@ public:
|
||||
loadOCRBeamSearchClassifierCNN with all its parameters provided in
|
||||
<https://github.com/Itseez/opencv_contrib/blob/master/modules/text/samples/OCRBeamSearch_CNN_model_data.xml.gz>.
|
||||
*/
|
||||
class CV_EXPORTS ClassifierCallback
|
||||
class CV_EXPORTS_W ClassifierCallback
|
||||
{
|
||||
public:
|
||||
virtual ~ClassifierCallback() { }
|
||||
@@ -350,7 +375,7 @@ public:
|
||||
provides also the Rects for individual text elements found (e.g. words), and the list of those
|
||||
text elements with their confidence values.
|
||||
|
||||
@param image Input image CV_8UC1 with a single text line (or word).
|
||||
@param image Input binary image CV_8UC1 with a single text line (or word).
|
||||
|
||||
@param output_text Output text. Most likely character sequence found by the HMM decoder.
|
||||
|
||||
@@ -373,6 +398,11 @@ public:
|
||||
std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
|
||||
int component_level=0);
|
||||
|
||||
// aliases for scripting
|
||||
CV_WRAP String run(InputArray image, int min_confidence, int component_level=0);
|
||||
|
||||
CV_WRAP String run(InputArray image, InputArray mask, int min_confidence, int component_level=0);
|
||||
|
||||
/** @brief Creates an instance of the OCRBeamSearchDecoder class. Initializes HMMDecoder.
|
||||
|
||||
@param classifier The character classifier with built in feature extractor.
|
||||
@@ -401,6 +431,16 @@ public:
|
||||
decoder_mode mode = OCR_DECODER_VITERBI, // HMM Decoding algorithm (only Viterbi for the moment)
|
||||
int beam_size = 500); // Size of the beam in Beam Search algorithm
|
||||
|
||||
CV_WRAP static Ptr<OCRBeamSearchDecoder> create(const Ptr<OCRBeamSearchDecoder::ClassifierCallback> classifier, // The character classifier with built in feature extractor
|
||||
const String& vocabulary, // The language vocabulary (chars when ascii english text)
|
||||
// size() must be equal to the number of classes
|
||||
InputArray transition_probabilities_table, // Table with transition probabilities between character pairs
|
||||
// cols == rows == vocabulari.size()
|
||||
InputArray emission_probabilities_table, // Table with observation emission probabilities
|
||||
// cols == rows == vocabulari.size()
|
||||
int mode = OCR_DECODER_VITERBI, // HMM Decoding algorithm (only Viterbi for the moment)
|
||||
int beam_size = 500); // Size of the beam in Beam Search algorithm
|
||||
|
||||
protected:
|
||||
|
||||
Ptr<OCRBeamSearchDecoder::ClassifierCallback> classifier;
|
||||
@@ -420,7 +460,8 @@ Andrew NG in [Coates11a]. The character classifier consists in a Single Layer Co
|
||||
a linear classifier. It is applied to the input image in a sliding window fashion, providing a set of recognitions
|
||||
at each window location.
|
||||
*/
|
||||
CV_EXPORTS Ptr<OCRBeamSearchDecoder::ClassifierCallback> loadOCRBeamSearchClassifierCNN(const std::string& filename);
|
||||
|
||||
CV_EXPORTS_W Ptr<OCRBeamSearchDecoder::ClassifierCallback> loadOCRBeamSearchClassifierCNN(const String& filename);
|
||||
|
||||
//! @}
|
||||
|
||||
|
@@ -88,6 +88,45 @@ void OCRBeamSearchDecoder::run(Mat& image, Mat& mask, string& output_text, vecto
|
||||
component_confidences->clear();
|
||||
}
|
||||
|
||||
CV_WRAP String OCRBeamSearchDecoder::run(InputArray image, int min_confidence, int component_level)
|
||||
{
|
||||
std::string output1;
|
||||
std::string output2;
|
||||
vector<string> component_texts;
|
||||
vector<float> component_confidences;
|
||||
Mat image_m = image.getMat();
|
||||
run(image_m, output1, NULL, &component_texts, &component_confidences, component_level);
|
||||
for(unsigned int i = 0; i < component_texts.size(); i++)
|
||||
{
|
||||
//cout << "confidence: " << component_confidences[i] << " text:" << component_texts[i] << endl;
|
||||
if(component_confidences[i] > min_confidence)
|
||||
{
|
||||
output2 += component_texts[i];
|
||||
}
|
||||
}
|
||||
return String(output2);
|
||||
}
|
||||
|
||||
CV_WRAP String OCRBeamSearchDecoder::run(InputArray image, InputArray mask, int min_confidence, int component_level)
|
||||
{
|
||||
std::string output1;
|
||||
std::string output2;
|
||||
vector<string> component_texts;
|
||||
vector<float> component_confidences;
|
||||
Mat image_m = image.getMat();
|
||||
Mat mask_m = mask.getMat();
|
||||
run(image_m, mask_m, output1, NULL, &component_texts, &component_confidences, component_level);
|
||||
for(unsigned int i = 0; i < component_texts.size(); i++)
|
||||
{
|
||||
//cout << "confidence: " << component_confidences[i] << " text:" << component_texts[i] << endl;
|
||||
if(component_confidences[i] > min_confidence)
|
||||
{
|
||||
output2 += component_texts[i];
|
||||
}
|
||||
}
|
||||
return String(output2);
|
||||
}
|
||||
|
||||
|
||||
void OCRBeamSearchDecoder::ClassifierCallback::eval( InputArray image, vector< vector<double> >& recognition_probabilities, vector<int>& oversegmentation)
|
||||
{
|
||||
@@ -460,6 +499,16 @@ Ptr<OCRBeamSearchDecoder> OCRBeamSearchDecoder::create( Ptr<OCRBeamSearchDecoder
|
||||
return makePtr<OCRBeamSearchDecoderImpl>(_classifier, _vocabulary, transition_p, emission_p, _mode, _beam_size);
|
||||
}
|
||||
|
||||
CV_EXPORTS_W Ptr<OCRBeamSearchDecoder> OCRBeamSearchDecoder::create(Ptr<OCRBeamSearchDecoder::ClassifierCallback> _classifier,
|
||||
const String& _vocabulary,
|
||||
InputArray transition_p,
|
||||
InputArray emission_p,
|
||||
int _mode,
|
||||
int _beam_size)
|
||||
{
|
||||
return makePtr<OCRBeamSearchDecoderImpl>(_classifier, _vocabulary, transition_p, emission_p, (decoder_mode)_mode, _beam_size);
|
||||
}
|
||||
|
||||
|
||||
class CV_EXPORTS OCRBeamSearchClassifierCNN : public OCRBeamSearchDecoder::ClassifierCallback
|
||||
{
|
||||
@@ -727,11 +776,10 @@ double OCRBeamSearchClassifierCNN::eval_feature(Mat& feature, double* prob_estim
|
||||
return dec_max_idx;
|
||||
}
|
||||
|
||||
|
||||
Ptr<OCRBeamSearchDecoder::ClassifierCallback> loadOCRBeamSearchClassifierCNN(const std::string& filename)
|
||||
Ptr<OCRBeamSearchDecoder::ClassifierCallback> loadOCRBeamSearchClassifierCNN(const String& filename)
|
||||
|
||||
{
|
||||
return makePtr<OCRBeamSearchClassifierCNN>(filename);
|
||||
return makePtr<OCRBeamSearchClassifierCNN>(std::string(filename));
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -90,6 +90,46 @@ void OCRHMMDecoder::run(Mat& image, Mat& mask, string& output_text, vector<Rect>
|
||||
component_confidences->clear();
|
||||
}
|
||||
|
||||
CV_WRAP String OCRHMMDecoder::run(InputArray image, int min_confidence, int component_level)
|
||||
{
|
||||
std::string output1;
|
||||
std::string output2;
|
||||
vector<string> component_texts;
|
||||
vector<float> component_confidences;
|
||||
Mat image_m = image.getMat();
|
||||
run(image_m, output1, NULL, &component_texts, &component_confidences, component_level);
|
||||
for(unsigned int i = 0; i < component_texts.size(); i++)
|
||||
{
|
||||
//cout << "confidence: " << component_confidences[i] << " text:" << component_texts[i] << endl;
|
||||
if(component_confidences[i] > min_confidence)
|
||||
{
|
||||
output2 += component_texts[i];
|
||||
}
|
||||
}
|
||||
return String(output2);
|
||||
}
|
||||
|
||||
CV_WRAP cv::String OCRHMMDecoder::run(InputArray image, InputArray mask, int min_confidence, int component_level)
|
||||
{
|
||||
std::string output1;
|
||||
std::string output2;
|
||||
vector<string> component_texts;
|
||||
vector<float> component_confidences;
|
||||
Mat image_m = image.getMat();
|
||||
Mat mask_m = mask.getMat();
|
||||
run(image_m, mask_m, output1, NULL, &component_texts, &component_confidences, component_level);
|
||||
for(unsigned int i = 0; i < component_texts.size(); i++)
|
||||
{
|
||||
cout << "confidence: " << component_confidences[i] << " text:" << component_texts[i] << endl;
|
||||
|
||||
if(component_confidences[i] > min_confidence)
|
||||
{
|
||||
output2 += component_texts[i];
|
||||
}
|
||||
}
|
||||
return String(output2);
|
||||
}
|
||||
|
||||
void OCRHMMDecoder::ClassifierCallback::eval( InputArray image, vector<int>& out_class, vector<double>& out_confidence)
|
||||
{
|
||||
CV_Assert(( image.getMat().type() == CV_8UC3 ) || ( image.getMat().type() == CV_8UC1 ));
|
||||
@@ -635,6 +675,16 @@ Ptr<OCRHMMDecoder> OCRHMMDecoder::create( Ptr<OCRHMMDecoder::ClassifierCallback>
|
||||
}
|
||||
|
||||
|
||||
Ptr<OCRHMMDecoder> OCRHMMDecoder::create( Ptr<OCRHMMDecoder::ClassifierCallback> _classifier,
|
||||
const String& _vocabulary,
|
||||
InputArray transition_p,
|
||||
InputArray emission_p,
|
||||
int _mode)
|
||||
{
|
||||
return makePtr<OCRHMMDecoderImpl>(_classifier, _vocabulary, transition_p, emission_p, (decoder_mode)_mode);
|
||||
}
|
||||
|
||||
|
||||
class CV_EXPORTS OCRHMMClassifierKNN : public OCRHMMDecoder::ClassifierCallback
|
||||
{
|
||||
public:
|
||||
@@ -867,14 +917,12 @@ void OCRHMMClassifierKNN::eval( InputArray _mask, vector<int>& out_class, vector
|
||||
}
|
||||
|
||||
|
||||
Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const std::string& filename)
|
||||
Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierNM(const String& filename)
|
||||
|
||||
{
|
||||
return makePtr<OCRHMMClassifierKNN>(filename);
|
||||
return makePtr<OCRHMMClassifierKNN>(std::string(filename));
|
||||
}
|
||||
|
||||
|
||||
|
||||
class CV_EXPORTS OCRHMMClassifierCNN : public OCRHMMDecoder::ClassifierCallback
|
||||
{
|
||||
public:
|
||||
@@ -1139,10 +1187,10 @@ double OCRHMMClassifierCNN::eval_feature(Mat& feature, double* prob_estimates)
|
||||
}
|
||||
|
||||
|
||||
Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN(const std::string& filename)
|
||||
Ptr<OCRHMMDecoder::ClassifierCallback> loadOCRHMMClassifierCNN(const String& filename)
|
||||
|
||||
{
|
||||
return makePtr<OCRHMMClassifierCNN>(filename);
|
||||
return makePtr<OCRHMMClassifierCNN>(std::string(filename));
|
||||
}
|
||||
|
||||
/** @brief Utility function to create a tailored language model transitions table from a given list of words (lexicon).
|
||||
@@ -1201,5 +1249,17 @@ void createOCRHMMTransitionsTable(string& vocabulary, vector<string>& lexicon, O
|
||||
return;
|
||||
}
|
||||
|
||||
Mat createOCRHMMTransitionsTable(const String& vocabulary, vector<cv::String>& lexicon)
|
||||
{
|
||||
std::string voc(vocabulary);
|
||||
vector<string> lex;
|
||||
for(vector<cv::String>::iterator l = lexicon.begin(); l != lexicon.end(); l++)
|
||||
lex.push_back(std::string(*l));
|
||||
|
||||
Mat _transitions;
|
||||
createOCRHMMTransitionsTable(voc, lex, _transitions);
|
||||
return _transitions;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
@@ -86,6 +86,47 @@ void OCRTesseract::run(Mat& image, Mat& mask, string& output_text, vector<Rect>*
|
||||
component_confidences->clear();
|
||||
}
|
||||
|
||||
CV_WRAP String OCRTesseract::run(InputArray image, int min_confidence, int component_level)
|
||||
{
|
||||
std::string output1;
|
||||
std::string output2;
|
||||
vector<string> component_texts;
|
||||
vector<float> component_confidences;
|
||||
Mat image_m = image.getMat();
|
||||
run(image_m, output1, NULL, &component_texts, &component_confidences, component_level);
|
||||
for(unsigned int i = 0; i < component_texts.size(); i++)
|
||||
{
|
||||
// cout << "confidence: " << component_confidences[i] << " text:" << component_texts[i] << endl;
|
||||
if(component_confidences[i] > min_confidence)
|
||||
{
|
||||
output2 += component_texts[i];
|
||||
}
|
||||
}
|
||||
return String(output2);
|
||||
}
|
||||
|
||||
CV_WRAP String OCRTesseract::run(InputArray image, InputArray mask, int min_confidence, int component_level)
|
||||
{
|
||||
std::string output1;
|
||||
std::string output2;
|
||||
vector<string> component_texts;
|
||||
vector<float> component_confidences;
|
||||
Mat image_m = image.getMat();
|
||||
Mat mask_m = mask.getMat();
|
||||
run(image_m, mask_m, output1, NULL, &component_texts, &component_confidences, component_level);
|
||||
for(unsigned int i = 0; i < component_texts.size(); i++)
|
||||
{
|
||||
cout << "confidence: " << component_confidences[i] << " text:" << component_texts[i] << endl;
|
||||
|
||||
if(component_confidences[i] > min_confidence)
|
||||
{
|
||||
output2 += component_texts[i];
|
||||
}
|
||||
}
|
||||
return String(output2);
|
||||
}
|
||||
|
||||
|
||||
class OCRTesseractImpl : public OCRTesseract
|
||||
{
|
||||
private:
|
||||
@@ -215,13 +256,20 @@ public:
|
||||
run( mask, output, component_rects, component_texts, component_confidences, component_level);
|
||||
}
|
||||
|
||||
|
||||
void setWhiteList(const String& char_whitelist)
|
||||
{
|
||||
#ifdef HAVE_TESSERACT
|
||||
tess.SetVariable("tessedit_char_whitelist", char_whitelist.c_str());
|
||||
#else
|
||||
(void)char_whitelist;
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
Ptr<OCRTesseract> OCRTesseract::create(const char* datapath, const char* language,
|
||||
const char* char_whitelist, int oem, int psmode)
|
||||
{
|
||||
return makePtr<OCRTesseractImpl>(datapath,language,char_whitelist,oem,psmode);
|
||||
return makePtr<OCRTesseractImpl>(datapath, language, char_whitelist, oem, psmode);
|
||||
}
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user