mirror of
https://github.com/opencv/opencv_contrib.git
synced 2025-10-20 12:55:15 +08:00
Modified the class heirarchy
This commit is contained in:
@@ -716,10 +716,6 @@ public:
|
||||
/** @brief produces a class confidence row-vector given an image
|
||||
*/
|
||||
CV_WRAP virtual void classify(InputArray image, OutputArray classProbabilities) = 0;
|
||||
/** @brief produces a list of bounding box given an image
|
||||
*/
|
||||
|
||||
CV_WRAP virtual void detect(InputArray image, OutputArray classProbabilities) = 0;
|
||||
|
||||
/** @brief produces a matrix containing class confidence row-vectors given an collection of images
|
||||
*/
|
||||
|
@@ -65,19 +65,131 @@ namespace text
|
||||
//detection scenario
|
||||
class CV_EXPORTS_W BaseDetector
|
||||
{
|
||||
public:
|
||||
public:
|
||||
virtual ~BaseDetector() {};
|
||||
|
||||
virtual void run(Mat& image,
|
||||
std::vector<Rect>* component_rects=NULL,
|
||||
std::vector<Rect>* component_rects=NULL,
|
||||
std::vector<float>* component_confidences=NULL,
|
||||
int component_level=0) = 0;
|
||||
|
||||
virtual void run(Mat& image, Mat& mask,
|
||||
std::vector<Rect>* component_rects=NULL,
|
||||
std::vector<Rect>* component_rects=NULL,
|
||||
std::vector<float>* component_confidences=NULL,
|
||||
int component_level=0) = 0;
|
||||
|
||||
};
|
||||
/** A virtual class for different models of text detection (including CNN based deep models)
|
||||
*/
|
||||
|
||||
class CV_EXPORTS_W TextRegionDetector
|
||||
{
|
||||
protected:
|
||||
/** Stores input and output size
|
||||
*/
|
||||
//netGeometry inputGeometry_;
|
||||
//netGeometry outputGeometry_;
|
||||
Size inputGeometry_;
|
||||
Size outputGeometry_;
|
||||
int inputChannelCount_;
|
||||
int outputChannelCount_;
|
||||
|
||||
public:
|
||||
virtual ~TextRegionDetector() {}
|
||||
|
||||
/** @brief produces a list of Bounding boxes and an estimate of text-ness confidence of Bounding Boxes
|
||||
*/
|
||||
CV_WRAP virtual void detect(InputArray image, OutputArray bboxProb ) = 0;
|
||||
|
||||
|
||||
/** @brief simple getter method returning the size (height, width) of the input sample
|
||||
*/
|
||||
CV_WRAP virtual Size getInputGeometry(){return this->inputGeometry_;}
|
||||
|
||||
/** @brief simple getter method returning the shape of the oputput
|
||||
* Any text detector should output a number of text regions alongwith a score of text-ness
|
||||
* From the shape it can be inferred the number of text regions and number of returned value
|
||||
* for each region
|
||||
*/
|
||||
CV_WRAP virtual Size getOutputGeometry(){return this->outputGeometry_;}
|
||||
|
||||
|
||||
|
||||
};
|
||||
|
||||
/** Generic structure of Deep CNN based Text Detectors
|
||||
* */
|
||||
class CV_EXPORTS_W DeepCNNTextDetector : public TextRegionDetector
|
||||
{
|
||||
/** @brief Class that uses a pretrained caffe model for text detection.
|
||||
* Any text detection should
|
||||
* This network is described in detail in:
|
||||
* Minghui Liao et al.: TextBoxes: A Fast Text Detector with a Single Deep Neural Network
|
||||
* https://arxiv.org/abs/1611.06779
|
||||
*/
|
||||
protected:
|
||||
/** all deep CNN based text detectors have a preprocessor (normally)
|
||||
*/
|
||||
Ptr<ImagePreprocessor> preprocessor_;
|
||||
/** @brief all image preprocessing is handled here including whitening etc.
|
||||
*
|
||||
* @param input the image to be preprocessed for the classifier. If the depth
|
||||
* is CV_U8 values should be in [0,255] otherwise values are assumed to be in [0,1]
|
||||
*
|
||||
* @param output reference to the image to be fed to the classifier, the preprocessor will
|
||||
* resize the image to the apropriate size and convert it to the apropriate depth\
|
||||
*
|
||||
* The method preprocess should never be used externally, it is up to classify and classifyBatch
|
||||
* methods to employ it.
|
||||
*/
|
||||
virtual void preprocess(const Mat& input,Mat& output);
|
||||
public:
|
||||
virtual ~DeepCNNTextDetector() {};
|
||||
|
||||
/** @brief Constructs a DeepCNNTextDetector object from a caffe pretrained model
|
||||
*
|
||||
* @param archFilename is the path to the prototxt file containing the deployment model architecture description.
|
||||
*
|
||||
* @param weightsFilename is the path to the pretrained weights of the model in binary fdorm.
|
||||
*
|
||||
* @param preprocessor is a pointer to the instance of a ImagePreprocessor implementing the preprocess_ protecteed method;
|
||||
*
|
||||
* @param minibatchSz the maximum number of samples that can processed in parallel. In practice this parameter
|
||||
* has an effect only when computing in the GPU and should be set with respect to the memory available in the GPU.
|
||||
*
|
||||
* @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is
|
||||
* the only option
|
||||
*/
|
||||
CV_WRAP static Ptr<DeepCNNTextDetector> create(String archFilename,String weightsFilename,Ptr<ImagePreprocessor> preprocessor,int minibatchSz=100,int backEnd=OCR_HOLISTIC_BACKEND_CAFFE);
|
||||
|
||||
/** @brief Constructs a DeepCNNTextDetector intended to be used for text area detection.
|
||||
*
|
||||
* This method loads a pretrained classifier and couples with a preprocessor that preprocess the image with mean subtraction of ()
|
||||
* The architecture and models weights can be downloaded from:
|
||||
* https://github.com/sghoshcvc/TextBox-Models.git (size is around 100 MB)
|
||||
|
||||
* @param archFilename is the path to the prototxt file containing the deployment model architecture description.
|
||||
* When employing OCR_HOLISTIC_BACKEND_CAFFE this is the path to the deploy ".prototxt".
|
||||
*
|
||||
* @param weightsFilename is the path to the pretrained weights of the model. When employing
|
||||
* OCR_HOLISTIC_BACKEND_CAFFE this is the path to the ".caffemodel" file.
|
||||
*
|
||||
* @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is
|
||||
* the only option
|
||||
*/
|
||||
CV_WRAP static Ptr<DeepCNNTextDetector> createTextBoxNet(String archFilename,String weightsFilename,int backEnd=OCR_HOLISTIC_BACKEND_CAFFE);
|
||||
friend class ImagePreprocessor;
|
||||
|
||||
};
|
||||
|
||||
/** @brief textDetector class provides the functionallity of text bounding box detection.
|
||||
* A TextRegionDetector is employed to find bounding boxes of text
|
||||
* words given an input image.
|
||||
*
|
||||
* This class implements the logic of providing text bounding boxes in a vector of rects given an TextRegionDetector
|
||||
* The TextRegionDetector can be any text detector
|
||||
*
|
||||
*/
|
||||
|
||||
class CV_EXPORTS_W textDetector : public BaseDetector
|
||||
{
|
||||
@@ -125,9 +237,9 @@ public:
|
||||
|
||||
|
||||
|
||||
/** @brief simple getter for the preprocessing functor
|
||||
/** @brief simple getter for the preprocessing functor
|
||||
*/
|
||||
CV_WRAP virtual Ptr<TextImageClassifier> getClassifier()=0;
|
||||
CV_WRAP virtual Ptr<TextRegionDetector> getClassifier()=0;
|
||||
|
||||
/** @brief Creates an instance of the textDetector class.
|
||||
|
||||
@@ -135,7 +247,7 @@ public:
|
||||
|
||||
|
||||
*/
|
||||
CV_WRAP static Ptr<textDetector> create(Ptr<TextImageClassifier> classifierPtr);
|
||||
CV_WRAP static Ptr<textDetector> create(Ptr<TextRegionDetector> classifierPtr);
|
||||
|
||||
|
||||
/** @brief Creates an instance of the textDetector class and implicitly also a DeepCNN classifier.
|
||||
|
Reference in New Issue
Block a user