mirror of
https://github.com/opencv/opencv_contrib.git
synced 2025-10-19 19:44:14 +08:00
Update doc for text module
This commit is contained in:
34
modules/text/doc/text.bib
Normal file
34
modules/text/doc/text.bib
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
@inproceedings{Neumann12,
|
||||||
|
title={Scene Text Localization and Recognition},
|
||||||
|
author={Neumann and L., Matas and J.},
|
||||||
|
journal={ Computer Vision and Pattern Recognition (CVPR), 2012 IEEE Conference on},
|
||||||
|
pages={3538--3545},
|
||||||
|
year={2012},
|
||||||
|
organization={IEEE}
|
||||||
|
}
|
||||||
|
@inproceedings{Neumann11,
|
||||||
|
author = {Lukáš Neumann and Jiří Matas},
|
||||||
|
title = {Text localization in real-world images using efficiently pruned exhaustive search},
|
||||||
|
booktitle = {in Document Analysis and Recognition, 2011 International Conference on. IEEE, 2011},
|
||||||
|
year = {},
|
||||||
|
pages = {687--691}
|
||||||
|
}
|
||||||
|
@inproceedings{Gomez13,
|
||||||
|
author = {G\'{o}mez, Llu\'{\i}s and Karatzas, Dimosthenis},
|
||||||
|
title={Multi-script Text Extraction from Natural Scenes},
|
||||||
|
booktitle = {Proceedings of the 2013 12th International Conference on Document Analysis and Recognition},
|
||||||
|
series = {ICDAR '13},
|
||||||
|
year = {2013},
|
||||||
|
isbn = {978-0-7695-4999-6},
|
||||||
|
pages = {467--471},
|
||||||
|
publisher = {IEEE Computer Society}
|
||||||
|
}
|
||||||
|
@article{Gomez14,
|
||||||
|
author = {Lluis Gomez i Bigorda and
|
||||||
|
Dimosthenis Karatzas},
|
||||||
|
title = {A Fast Hierarchical Method for Multi-script and Arbitrary Oriented
|
||||||
|
Scene Text Extraction},
|
||||||
|
journal = {CoRR},
|
||||||
|
volume = {abs/1407.7504},
|
||||||
|
year = {2014},
|
||||||
|
}
|
@@ -54,7 +54,7 @@ Class-specific Extremal Regions for Scene Text Detection
|
|||||||
--------------------------------------------------------
|
--------------------------------------------------------
|
||||||
|
|
||||||
The scene text detection algorithm described below has been initially proposed by Lukás Neumann &
|
The scene text detection algorithm described below has been initially proposed by Lukás Neumann &
|
||||||
Jiri Matas [Neumann12]. The main idea behind Class-specific Extremal Regions is similar to the MSER
|
Jiri Matas @cite Neumann11. The main idea behind Class-specific Extremal Regions is similar to the MSER
|
||||||
in that suitable Extremal Regions (ERs) are selected from the whole component tree of the image.
|
in that suitable Extremal Regions (ERs) are selected from the whole component tree of the image.
|
||||||
However, this technique differs from MSER in that selection of suitable ERs is done by a sequential
|
However, this technique differs from MSER in that selection of suitable ERs is done by a sequential
|
||||||
classifier trained for character detection, i.e. dropping the stability requirement of MSERs and
|
classifier trained for character detection, i.e. dropping the stability requirement of MSERs and
|
||||||
@@ -87,9 +87,9 @@ order to increase the character localization recall.
|
|||||||
|
|
||||||
After the ER filtering is done on each input channel, character candidates must be grouped in
|
After the ER filtering is done on each input channel, character candidates must be grouped in
|
||||||
high-level text blocks (i.e. words, text lines, paragraphs, ...). The opencv_text module implements
|
high-level text blocks (i.e. words, text lines, paragraphs, ...). The opencv_text module implements
|
||||||
two different grouping algorithms: the Exhaustive Search algorithm proposed in [Neumann11] for
|
two different grouping algorithms: the Exhaustive Search algorithm proposed in @cite Neumann12 for
|
||||||
grouping horizontally aligned text, and the method proposed by Lluis Gomez and Dimosthenis Karatzas
|
grouping horizontally aligned text, and the method proposed by Lluis Gomez and Dimosthenis Karatzas
|
||||||
in [Gomez13][Gomez14] for grouping arbitrary oriented text (see erGrouping).
|
in @cite Gomez13 @cite Gomez14 for grouping arbitrary oriented text (see erGrouping).
|
||||||
|
|
||||||
To see the text detector at work, have a look at the textdetection demo:
|
To see the text detector at work, have a look at the textdetection demo:
|
||||||
<https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/textdetection.cpp>
|
<https://github.com/opencv/opencv_contrib/blob/master/modules/text/samples/textdetection.cpp>
|
||||||
|
@@ -111,7 +111,7 @@ public:
|
|||||||
ERStat* min_probability_ancestor;
|
ERStat* min_probability_ancestor;
|
||||||
};
|
};
|
||||||
|
|
||||||
/** @brief Base class for 1st and 2nd stages of Neumann and Matas scene text detection algorithm [Neumann12]. :
|
/** @brief Base class for 1st and 2nd stages of Neumann and Matas scene text detection algorithm @cite Neumann12. :
|
||||||
|
|
||||||
Extracts the component tree (if needed) and filter the extremal regions (ER's) by using a given classifier.
|
Extracts the component tree (if needed) and filter the extremal regions (ER's) by using a given classifier.
|
||||||
*/
|
*/
|
||||||
@@ -163,31 +163,8 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
/*!
|
|
||||||
Create an Extremal Region Filter for the 1st stage classifier of N&M algorithm
|
|
||||||
Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012
|
|
||||||
|
|
||||||
The component tree of the image is extracted by a threshold increased step by step
|
/** @brief Create an Extremal Region Filter for the 1st stage classifier of N&M algorithm @cite Neumann12.
|
||||||
from 0 to 255, incrementally computable descriptors (aspect_ratio, compactness,
|
|
||||||
number of holes, and number of horizontal crossings) are computed for each ER
|
|
||||||
and used as features for a classifier which estimates the class-conditional
|
|
||||||
probability P(er|character). The value of P(er|character) is tracked using the inclusion
|
|
||||||
relation of ER across all thresholds and only the ERs which correspond to local maximum
|
|
||||||
of the probability P(er|character) are selected (if the local maximum of the
|
|
||||||
probability is above a global limit pmin and the difference between local maximum and
|
|
||||||
local minimum is greater than minProbabilityDiff).
|
|
||||||
|
|
||||||
@param cb – Callback with the classifier. Default classifier can be implicitly load with function
|
|
||||||
loadClassifierNM1(), e.g. from file in samples/cpp/trained_classifierNM1.xml
|
|
||||||
@param thresholdDelta – Threshold step in subsequent thresholds when extracting the component tree
|
|
||||||
@param minArea – The minimum area (% of image size) allowed for retreived ER’s
|
|
||||||
@param maxArea – The maximum area (% of image size) allowed for retreived ER’s
|
|
||||||
@param minProbability – The minimum probability P(er|character) allowed for retreived ER’s
|
|
||||||
@param nonMaxSuppression – Whenever non-maximum suppression is done over the branch probabilities
|
|
||||||
@param minProbabilityDiff – The minimum probability difference between local maxima and local minima ERs
|
|
||||||
*/
|
|
||||||
|
|
||||||
/** @brief Create an Extremal Region Filter for the 1st stage classifier of N&M algorithm [Neumann12].
|
|
||||||
|
|
||||||
@param cb : Callback with the classifier. Default classifier can be implicitly load with function
|
@param cb : Callback with the classifier. Default classifier can be implicitly load with function
|
||||||
loadClassifierNM1, e.g. from file in samples/cpp/trained_classifierNM1.xml
|
loadClassifierNM1, e.g. from file in samples/cpp/trained_classifierNM1.xml
|
||||||
@@ -213,7 +190,7 @@ CV_EXPORTS_W Ptr<ERFilter> createERFilterNM1(const Ptr<ERFilter::Callback>& cb,
|
|||||||
bool nonMaxSuppression = true,
|
bool nonMaxSuppression = true,
|
||||||
float minProbabilityDiff = (float)0.1);
|
float minProbabilityDiff = (float)0.1);
|
||||||
|
|
||||||
/** @brief Create an Extremal Region Filter for the 2nd stage classifier of N&M algorithm [Neumann12].
|
/** @brief Create an Extremal Region Filter for the 2nd stage classifier of N&M algorithm @cite Neumann12.
|
||||||
|
|
||||||
@param cb : Callback with the classifier. Default classifier can be implicitly load with function
|
@param cb : Callback with the classifier. Default classifier can be implicitly load with function
|
||||||
loadClassifierNM2, e.g. from file in samples/cpp/trained_classifierNM2.xml
|
loadClassifierNM2, e.g. from file in samples/cpp/trained_classifierNM2.xml
|
||||||
@@ -268,7 +245,7 @@ enum { ERFILTER_NM_RGBLGrad,
|
|||||||
ERFILTER_NM_IHSGrad
|
ERFILTER_NM_IHSGrad
|
||||||
};
|
};
|
||||||
|
|
||||||
/** @brief Compute the different channels to be processed independently in the N&M algorithm [Neumann12].
|
/** @brief Compute the different channels to be processed independently in the N&M algorithm @cite Neumann12.
|
||||||
|
|
||||||
@param _src Source image. Must be RGB CV_8UC3.
|
@param _src Source image. Must be RGB CV_8UC3.
|
||||||
|
|
||||||
@@ -289,7 +266,7 @@ CV_EXPORTS_W void computeNMChannels(InputArray _src, CV_OUT OutputArrayOfArrays
|
|||||||
//! text::erGrouping operation modes
|
//! text::erGrouping operation modes
|
||||||
enum erGrouping_Modes {
|
enum erGrouping_Modes {
|
||||||
|
|
||||||
/** Exhaustive Search algorithm proposed in [Neumann11] for grouping horizontally aligned text.
|
/** Exhaustive Search algorithm proposed in @cite Neumann11 for grouping horizontally aligned text.
|
||||||
The algorithm models a verification function for all the possible ER sequences. The
|
The algorithm models a verification function for all the possible ER sequences. The
|
||||||
verification fuction for ER pairs consists in a set of threshold-based pairwise rules which
|
verification fuction for ER pairs consists in a set of threshold-based pairwise rules which
|
||||||
compare measurements of two regions (height ratio, centroid angle, and region distance). The
|
compare measurements of two regions (height ratio, centroid angle, and region distance). The
|
||||||
@@ -300,7 +277,7 @@ enum erGrouping_Modes {
|
|||||||
consistent.
|
consistent.
|
||||||
*/
|
*/
|
||||||
ERGROUPING_ORIENTATION_HORIZ,
|
ERGROUPING_ORIENTATION_HORIZ,
|
||||||
/** Text grouping method proposed in [Gomez13][Gomez14] for grouping arbitrary oriented text. Regions
|
/** Text grouping method proposed in @cite Gomez13 @cite Gomez14 for grouping arbitrary oriented text. Regions
|
||||||
are agglomerated by Single Linkage Clustering in a weighted feature space that combines proximity
|
are agglomerated by Single Linkage Clustering in a weighted feature space that combines proximity
|
||||||
(x,y coordinates) and similarity measures (color, size, gradient magnitude, stroke width, etc.).
|
(x,y coordinates) and similarity measures (color, size, gradient magnitude, stroke width, etc.).
|
||||||
SLC provides a dendrogram where each node represents a text group hypothesis. Then the algorithm
|
SLC provides a dendrogram where each node represents a text group hypothesis. Then the algorithm
|
||||||
@@ -375,8 +352,8 @@ CV_EXPORTS_W void detectRegions(InputArray image, const Ptr<ERFilter>& er_filter
|
|||||||
/** @brief Extracts text regions from image.
|
/** @brief Extracts text regions from image.
|
||||||
|
|
||||||
@param image Source image where text blocks needs to be extracted from. Should be CV_8UC3 (color).
|
@param image Source image where text blocks needs to be extracted from. Should be CV_8UC3 (color).
|
||||||
@param er_filter1 Extremal Region Filter for the 1st stage classifier of N&M algorithm [Neumann12]
|
@param er_filter1 Extremal Region Filter for the 1st stage classifier of N&M algorithm @cite Neumann12
|
||||||
@param er_filter2 Extremal Region Filter for the 2nd stage classifier of N&M algorithm [Neumann12]
|
@param er_filter2 Extremal Region Filter for the 2nd stage classifier of N&M algorithm @cite Neumann12
|
||||||
@param groups_rects Output list of rectangle blocks with text
|
@param groups_rects Output list of rectangle blocks with text
|
||||||
@param method Grouping method (see text::erGrouping_Modes). Can be one of ERGROUPING_ORIENTATION_HORIZ, ERGROUPING_ORIENTATION_ANY.
|
@param method Grouping method (see text::erGrouping_Modes). Can be one of ERGROUPING_ORIENTATION_HORIZ, ERGROUPING_ORIENTATION_ANY.
|
||||||
@param filename The XML or YAML file with the classifier model (e.g. samples/trained_classifier_erGrouping.xml). Only to use when grouping method is ERGROUPING_ORIENTATION_ANY.
|
@param filename The XML or YAML file with the classifier model (e.g. samples/trained_classifier_erGrouping.xml). Only to use when grouping method is ERGROUPING_ORIENTATION_ANY.
|
||||||
|
@@ -128,7 +128,7 @@ public:
|
|||||||
recognition of individual text elements found (e.g. words or text lines).
|
recognition of individual text elements found (e.g. words or text lines).
|
||||||
@param component_confidences If provided the method will output a list of confidence values
|
@param component_confidences If provided the method will output a list of confidence values
|
||||||
for the recognition of individual text elements found (e.g. words or text lines).
|
for the recognition of individual text elements found (e.g. words or text lines).
|
||||||
@param component_level OCR_LEVEL_WORD (by default), or OCR_LEVEL_TEXT_LINE.
|
@param component_level OCR_LEVEL_WORD (by default), or OCR_LEVEL_TEXTLINE.
|
||||||
*/
|
*/
|
||||||
virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
|
virtual void run(Mat& image, std::string& output_text, std::vector<Rect>* component_rects=NULL,
|
||||||
std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
|
std::vector<std::string>* component_texts=NULL, std::vector<float>* component_confidences=NULL,
|
||||||
|
Reference in New Issue
Block a user