mirror of
https://github.com/opencv/opencv_contrib.git
synced 2025-10-21 06:11:09 +08:00
Modified the class heirarchy
This commit is contained in:
@@ -716,10 +716,6 @@ public:
|
|||||||
/** @brief produces a class confidence row-vector given an image
|
/** @brief produces a class confidence row-vector given an image
|
||||||
*/
|
*/
|
||||||
CV_WRAP virtual void classify(InputArray image, OutputArray classProbabilities) = 0;
|
CV_WRAP virtual void classify(InputArray image, OutputArray classProbabilities) = 0;
|
||||||
/** @brief produces a list of bounding box given an image
|
|
||||||
*/
|
|
||||||
|
|
||||||
CV_WRAP virtual void detect(InputArray image, OutputArray classProbabilities) = 0;
|
|
||||||
|
|
||||||
/** @brief produces a matrix containing class confidence row-vectors given an collection of images
|
/** @brief produces a matrix containing class confidence row-vectors given an collection of images
|
||||||
*/
|
*/
|
||||||
|
@@ -65,7 +65,7 @@ namespace text
|
|||||||
//detection scenario
|
//detection scenario
|
||||||
class CV_EXPORTS_W BaseDetector
|
class CV_EXPORTS_W BaseDetector
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
virtual ~BaseDetector() {};
|
virtual ~BaseDetector() {};
|
||||||
|
|
||||||
virtual void run(Mat& image,
|
virtual void run(Mat& image,
|
||||||
@@ -78,6 +78,118 @@ class CV_EXPORTS_W BaseDetector
|
|||||||
std::vector<float>* component_confidences=NULL,
|
std::vector<float>* component_confidences=NULL,
|
||||||
int component_level=0) = 0;
|
int component_level=0) = 0;
|
||||||
|
|
||||||
|
};
|
||||||
|
/** A virtual class for different models of text detection (including CNN based deep models)
|
||||||
|
*/
|
||||||
|
|
||||||
|
class CV_EXPORTS_W TextRegionDetector
|
||||||
|
{
|
||||||
|
protected:
|
||||||
|
/** Stores input and output size
|
||||||
|
*/
|
||||||
|
//netGeometry inputGeometry_;
|
||||||
|
//netGeometry outputGeometry_;
|
||||||
|
Size inputGeometry_;
|
||||||
|
Size outputGeometry_;
|
||||||
|
int inputChannelCount_;
|
||||||
|
int outputChannelCount_;
|
||||||
|
|
||||||
|
public:
|
||||||
|
virtual ~TextRegionDetector() {}
|
||||||
|
|
||||||
|
/** @brief produces a list of Bounding boxes and an estimate of text-ness confidence of Bounding Boxes
|
||||||
|
*/
|
||||||
|
CV_WRAP virtual void detect(InputArray image, OutputArray bboxProb ) = 0;
|
||||||
|
|
||||||
|
|
||||||
|
/** @brief simple getter method returning the size (height, width) of the input sample
|
||||||
|
*/
|
||||||
|
CV_WRAP virtual Size getInputGeometry(){return this->inputGeometry_;}
|
||||||
|
|
||||||
|
/** @brief simple getter method returning the shape of the oputput
|
||||||
|
* Any text detector should output a number of text regions alongwith a score of text-ness
|
||||||
|
* From the shape it can be inferred the number of text regions and number of returned value
|
||||||
|
* for each region
|
||||||
|
*/
|
||||||
|
CV_WRAP virtual Size getOutputGeometry(){return this->outputGeometry_;}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
/** Generic structure of Deep CNN based Text Detectors
|
||||||
|
* */
|
||||||
|
class CV_EXPORTS_W DeepCNNTextDetector : public TextRegionDetector
|
||||||
|
{
|
||||||
|
/** @brief Class that uses a pretrained caffe model for text detection.
|
||||||
|
* Any text detection should
|
||||||
|
* This network is described in detail in:
|
||||||
|
* Minghui Liao et al.: TextBoxes: A Fast Text Detector with a Single Deep Neural Network
|
||||||
|
* https://arxiv.org/abs/1611.06779
|
||||||
|
*/
|
||||||
|
protected:
|
||||||
|
/** all deep CNN based text detectors have a preprocessor (normally)
|
||||||
|
*/
|
||||||
|
Ptr<ImagePreprocessor> preprocessor_;
|
||||||
|
/** @brief all image preprocessing is handled here including whitening etc.
|
||||||
|
*
|
||||||
|
* @param input the image to be preprocessed for the classifier. If the depth
|
||||||
|
* is CV_U8 values should be in [0,255] otherwise values are assumed to be in [0,1]
|
||||||
|
*
|
||||||
|
* @param output reference to the image to be fed to the classifier, the preprocessor will
|
||||||
|
* resize the image to the apropriate size and convert it to the apropriate depth\
|
||||||
|
*
|
||||||
|
* The method preprocess should never be used externally, it is up to classify and classifyBatch
|
||||||
|
* methods to employ it.
|
||||||
|
*/
|
||||||
|
virtual void preprocess(const Mat& input,Mat& output);
|
||||||
|
public:
|
||||||
|
virtual ~DeepCNNTextDetector() {};
|
||||||
|
|
||||||
|
/** @brief Constructs a DeepCNNTextDetector object from a caffe pretrained model
|
||||||
|
*
|
||||||
|
* @param archFilename is the path to the prototxt file containing the deployment model architecture description.
|
||||||
|
*
|
||||||
|
* @param weightsFilename is the path to the pretrained weights of the model in binary fdorm.
|
||||||
|
*
|
||||||
|
* @param preprocessor is a pointer to the instance of a ImagePreprocessor implementing the preprocess_ protecteed method;
|
||||||
|
*
|
||||||
|
* @param minibatchSz the maximum number of samples that can processed in parallel. In practice this parameter
|
||||||
|
* has an effect only when computing in the GPU and should be set with respect to the memory available in the GPU.
|
||||||
|
*
|
||||||
|
* @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is
|
||||||
|
* the only option
|
||||||
|
*/
|
||||||
|
CV_WRAP static Ptr<DeepCNNTextDetector> create(String archFilename,String weightsFilename,Ptr<ImagePreprocessor> preprocessor,int minibatchSz=100,int backEnd=OCR_HOLISTIC_BACKEND_CAFFE);
|
||||||
|
|
||||||
|
/** @brief Constructs a DeepCNNTextDetector intended to be used for text area detection.
|
||||||
|
*
|
||||||
|
* This method loads a pretrained classifier and couples with a preprocessor that preprocess the image with mean subtraction of ()
|
||||||
|
* The architecture and models weights can be downloaded from:
|
||||||
|
* https://github.com/sghoshcvc/TextBox-Models.git (size is around 100 MB)
|
||||||
|
|
||||||
|
* @param archFilename is the path to the prototxt file containing the deployment model architecture description.
|
||||||
|
* When employing OCR_HOLISTIC_BACKEND_CAFFE this is the path to the deploy ".prototxt".
|
||||||
|
*
|
||||||
|
* @param weightsFilename is the path to the pretrained weights of the model. When employing
|
||||||
|
* OCR_HOLISTIC_BACKEND_CAFFE this is the path to the ".caffemodel" file.
|
||||||
|
*
|
||||||
|
* @param backEnd integer parameter selecting the coputation framework. For now OCR_HOLISTIC_BACKEND_CAFFE is
|
||||||
|
* the only option
|
||||||
|
*/
|
||||||
|
CV_WRAP static Ptr<DeepCNNTextDetector> createTextBoxNet(String archFilename,String weightsFilename,int backEnd=OCR_HOLISTIC_BACKEND_CAFFE);
|
||||||
|
friend class ImagePreprocessor;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
/** @brief textDetector class provides the functionallity of text bounding box detection.
|
||||||
|
* A TextRegionDetector is employed to find bounding boxes of text
|
||||||
|
* words given an input image.
|
||||||
|
*
|
||||||
|
* This class implements the logic of providing text bounding boxes in a vector of rects given an TextRegionDetector
|
||||||
|
* The TextRegionDetector can be any text detector
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
class CV_EXPORTS_W textDetector : public BaseDetector
|
class CV_EXPORTS_W textDetector : public BaseDetector
|
||||||
{
|
{
|
||||||
@@ -125,9 +237,9 @@ public:
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
/** @brief simple getter for the preprocessing functor
|
/** @brief simple getter for the preprocessing functor
|
||||||
*/
|
*/
|
||||||
CV_WRAP virtual Ptr<TextImageClassifier> getClassifier()=0;
|
CV_WRAP virtual Ptr<TextRegionDetector> getClassifier()=0;
|
||||||
|
|
||||||
/** @brief Creates an instance of the textDetector class.
|
/** @brief Creates an instance of the textDetector class.
|
||||||
|
|
||||||
@@ -135,7 +247,7 @@ public:
|
|||||||
|
|
||||||
|
|
||||||
*/
|
*/
|
||||||
CV_WRAP static Ptr<textDetector> create(Ptr<TextImageClassifier> classifierPtr);
|
CV_WRAP static Ptr<textDetector> create(Ptr<TextRegionDetector> classifierPtr);
|
||||||
|
|
||||||
|
|
||||||
/** @brief Creates an instance of the textDetector class and implicitly also a DeepCNN classifier.
|
/** @brief Creates an instance of the textDetector class and implicitly also a DeepCNN classifier.
|
||||||
|
@@ -459,53 +459,53 @@ protected:
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void process_(Mat inputImage, Mat &outputMat)
|
// void process_(Mat inputImage, Mat &outputMat)
|
||||||
{
|
// {
|
||||||
// do forward pass and stores the output in outputMat
|
// // do forward pass and stores the output in outputMat
|
||||||
//Process one image
|
// //Process one image
|
||||||
CV_Assert(this->minibatchSz_==1);
|
// CV_Assert(this->minibatchSz_==1);
|
||||||
//CV_Assert(outputMat.isContinuous());
|
// //CV_Assert(outputMat.isContinuous());
|
||||||
|
|
||||||
#ifdef HAVE_CAFFE
|
//#ifdef HAVE_CAFFE
|
||||||
net_->input_blobs()[0]->Reshape(1, this->channelCount_,this->inputGeometry_.height,this->inputGeometry_.width);
|
// net_->input_blobs()[0]->Reshape(1, this->channelCount_,this->inputGeometry_.height,this->inputGeometry_.width);
|
||||||
net_->Reshape();
|
// net_->Reshape();
|
||||||
float* inputBuffer=net_->input_blobs()[0]->mutable_cpu_data();
|
// float* inputBuffer=net_->input_blobs()[0]->mutable_cpu_data();
|
||||||
float* inputData=inputBuffer;
|
// float* inputData=inputBuffer;
|
||||||
|
|
||||||
std::vector<Mat> input_channels;
|
// std::vector<Mat> input_channels;
|
||||||
Mat preprocessed;
|
// Mat preprocessed;
|
||||||
// if the image have multiple color channels the input layer should be populated accordingly
|
// // if the image have multiple color channels the input layer should be populated accordingly
|
||||||
for (int channel=0;channel < this->channelCount_;channel++){
|
// for (int channel=0;channel < this->channelCount_;channel++){
|
||||||
|
|
||||||
cv::Mat netInputWraped(this->inputGeometry_.height, this->inputGeometry_.width, CV_32FC1, inputData);
|
// cv::Mat netInputWraped(this->inputGeometry_.height, this->inputGeometry_.width, CV_32FC1, inputData);
|
||||||
input_channels.push_back(netInputWraped);
|
// input_channels.push_back(netInputWraped);
|
||||||
//input_data += width * height;
|
// //input_data += width * height;
|
||||||
inputData+=(this->inputGeometry_.height*this->inputGeometry_.width);
|
// inputData+=(this->inputGeometry_.height*this->inputGeometry_.width);
|
||||||
}
|
// }
|
||||||
this->preprocess(inputImage,preprocessed);
|
// this->preprocess(inputImage,preprocessed);
|
||||||
split(preprocessed, input_channels);
|
// split(preprocessed, input_channels);
|
||||||
|
|
||||||
//preprocessed.copyTo(netInputWraped);
|
// //preprocessed.copyTo(netInputWraped);
|
||||||
|
|
||||||
|
|
||||||
this->net_->Forward();
|
// this->net_->Forward();
|
||||||
const float* outputNetData=net_->output_blobs()[0]->cpu_data();
|
// const float* outputNetData=net_->output_blobs()[0]->cpu_data();
|
||||||
// const float* outputNetData1=net_->output_blobs()[1]->cpu_data();
|
// // const float* outputNetData1=net_->output_blobs()[1]->cpu_data();
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
this->outputGeometry_ = Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
|
// this->outputGeometry_ = Size(net_->output_blobs()[0]->width(),net_->output_blobs()[0]->height());
|
||||||
int outputSz = this->outputSize_ * this->outputGeometry_.height * this->outputGeometry_.width;
|
// int outputSz = this->outputSize_ * this->outputGeometry_.height * this->outputGeometry_.width;
|
||||||
outputMat.create(this->outputGeometry_.height , this->outputGeometry_.width,CV_32FC1);
|
// outputMat.create(this->outputGeometry_.height , this->outputGeometry_.width,CV_32FC1);
|
||||||
float*outputMatData=(float*)(outputMat.data);
|
// float*outputMatData=(float*)(outputMat.data);
|
||||||
|
|
||||||
memcpy(outputMatData,outputNetData,sizeof(float)*outputSz);
|
// memcpy(outputMatData,outputNetData,sizeof(float)*outputSz);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#endif
|
//#endif
|
||||||
}
|
// }
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -587,15 +587,15 @@ public:
|
|||||||
inputImageList.push_back(image.getMat());
|
inputImageList.push_back(image.getMat());
|
||||||
classifyBatch(inputImageList,classProbabilities);
|
classifyBatch(inputImageList,classProbabilities);
|
||||||
}
|
}
|
||||||
void detect(InputArray image, OutputArray Bbox_prob)
|
// void detect(InputArray image, OutputArray Bbox_prob)
|
||||||
{
|
// {
|
||||||
|
|
||||||
Bbox_prob.create(this->outputGeometry_,CV_32F); // dummy initialization is it needed
|
// Bbox_prob.create(this->outputGeometry_,CV_32F); // dummy initialization is it needed
|
||||||
Mat outputMat = Bbox_prob.getMat();
|
// Mat outputMat = Bbox_prob.getMat();
|
||||||
process_(image.getMat(),outputMat);
|
// process_(image.getMat(),outputMat);
|
||||||
//copy back to outputArray
|
// //copy back to outputArray
|
||||||
outputMat.copyTo(Bbox_prob);
|
// outputMat.copyTo(Bbox_prob);
|
||||||
}
|
// }
|
||||||
|
|
||||||
void classifyBatch(InputArrayOfArrays inputImageList, OutputArray classProbabilities)
|
void classifyBatch(InputArrayOfArrays inputImageList, OutputArray classProbabilities)
|
||||||
{
|
{
|
||||||
|
@@ -23,6 +23,8 @@
|
|||||||
namespace cv { namespace text {
|
namespace cv { namespace text {
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class textDetectImpl: public textDetector{
|
class textDetectImpl: public textDetector{
|
||||||
private:
|
private:
|
||||||
struct NetOutput{
|
struct NetOutput{
|
||||||
@@ -60,9 +62,9 @@ private:
|
|||||||
};
|
};
|
||||||
protected:
|
protected:
|
||||||
|
|
||||||
Ptr<TextImageClassifier> classifier_;
|
Ptr<TextRegionDetector> classifier_;
|
||||||
public:
|
public:
|
||||||
textDetectImpl(Ptr<TextImageClassifier> classifierPtr):classifier_(classifierPtr)
|
textDetectImpl(Ptr<TextRegionDetector> classifierPtr):classifier_(classifierPtr)
|
||||||
{
|
{
|
||||||
|
|
||||||
}
|
}
|
||||||
@@ -131,13 +133,13 @@ public:
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
Ptr<TextImageClassifier> getClassifier()
|
Ptr<TextRegionDetector> getClassifier()
|
||||||
{
|
{
|
||||||
return this->classifier_;
|
return this->classifier_;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
Ptr<textDetector> textDetector::create(Ptr<TextImageClassifier> classifierPtr)
|
Ptr<textDetector> textDetector::create(Ptr<TextRegionDetector> classifierPtr)
|
||||||
{
|
{
|
||||||
return Ptr<textDetector>(new textDetectImpl(classifierPtr));
|
return Ptr<textDetector>(new textDetectImpl(classifierPtr));
|
||||||
}
|
}
|
||||||
@@ -155,7 +157,7 @@ Ptr<textDetector> textDetector::create(String modelArchFilename, String modelWei
|
|||||||
textbox_mean.at<uchar>(0,2)=123;
|
textbox_mean.at<uchar>(0,2)=123;
|
||||||
preprocessor->set_mean(textbox_mean);
|
preprocessor->set_mean(textbox_mean);
|
||||||
// create a pointer to text box detector(textDetector)
|
// create a pointer to text box detector(textDetector)
|
||||||
Ptr<TextImageClassifier> classifierPtr(DeepCNN::create(modelArchFilename,modelWeightsFilename,preprocessor,1));
|
Ptr<TextRegionDetector> classifierPtr(DeepCNNTextDetector::create(modelArchFilename,modelWeightsFilename,preprocessor,1));
|
||||||
return Ptr<textDetector>(new textDetectImpl(classifierPtr));
|
return Ptr<textDetector>(new textDetectImpl(classifierPtr));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
343
modules/text/src/text_detectorCNN.cpp
Normal file
343
modules/text/src/text_detectorCNN.cpp
Normal file
@@ -0,0 +1,343 @@
|
|||||||
|
#include "precomp.hpp"
|
||||||
|
#include "opencv2/imgproc.hpp"
|
||||||
|
#include "opencv2/core.hpp"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <fstream>
|
||||||
|
#include <sstream>
|
||||||
|
#include <queue>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <iosfwd>
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef HAVE_CAFFE
|
||||||
|
#include "caffe/caffe.hpp"
|
||||||
|
#endif
|
||||||
|
namespace cv { namespace text {
|
||||||
|
|
||||||
|
inline bool fileExists (String filename) {
|
||||||
|
std::ifstream f(filename.c_str());
|
||||||
|
return f.good();
|
||||||
|
}
|
||||||
|
|
||||||
|
//************************************************************************************
|
||||||
|
//****************** TextImageClassifier *****************************************
|
||||||
|
//************************************************************************************
|
||||||
|
|
||||||
|
//void TextImageClassifier::preprocess(const Mat& input,Mat& output)
|
||||||
|
//{
|
||||||
|
// this->preprocessor_->preprocess_(input,output,this->inputGeometry_,this->channelCount_);
|
||||||
|
//}
|
||||||
|
|
||||||
|
//void TextImageClassifier::setPreprocessor(Ptr<ImagePreprocessor> ptr)
|
||||||
|
//{
|
||||||
|
// CV_Assert(!ptr.empty());
|
||||||
|
// preprocessor_=ptr;
|
||||||
|
//}
|
||||||
|
|
||||||
|
//Ptr<ImagePreprocessor> TextImageClassifier::getPreprocessor()
|
||||||
|
//{
|
||||||
|
// return preprocessor_;
|
||||||
|
//}
|
||||||
|
|
||||||
|
|
||||||
|
class DeepCNNTextDetectorCaffeImpl: public DeepCNNTextDetector{
|
||||||
|
protected:
|
||||||
|
|
||||||
|
|
||||||
|
void process_(Mat inputImage, Mat &outputMat)
|
||||||
|
{
|
||||||
|
// do forward pass and stores the output in outputMat
|
||||||
|
//Process one image
|
||||||
|
// CV_Assert(this->outputGeometry_.batchSize==1);
|
||||||
|
//CV_Assert(outputMat.isContinuous());
|
||||||
|
|
||||||
|
#ifdef HAVE_CAFFE
|
||||||
|
net_->input_blobs()[0]->Reshape(1, this->inputChannelCount_,this->inputGeometry_.height,this->inputGeometry_.width);
|
||||||
|
net_->Reshape();
|
||||||
|
float* inputBuffer=net_->input_blobs()[0]->mutable_cpu_data();
|
||||||
|
float* inputData=inputBuffer;
|
||||||
|
|
||||||
|
std::vector<Mat> input_channels;
|
||||||
|
Mat preprocessed;
|
||||||
|
// if the image have multiple color channels the input layer should be populated accordingly
|
||||||
|
for (int channel=0;channel < this->inputChannelCount_;channel++){
|
||||||
|
|
||||||
|
cv::Mat netInputWraped(this->inputGeometry_.height, this->inputGeometry_.width, CV_32FC1, inputData);
|
||||||
|
input_channels.push_back(netInputWraped);
|
||||||
|
//input_data += width * height;
|
||||||
|
inputData+=(this->inputGeometry_.height*this->inputGeometry_.width);
|
||||||
|
}
|
||||||
|
this->preprocess(inputImage,preprocessed);
|
||||||
|
split(preprocessed, input_channels);
|
||||||
|
|
||||||
|
//preprocessed.copyTo(netInputWraped);
|
||||||
|
|
||||||
|
|
||||||
|
this->net_->Forward();
|
||||||
|
const float* outputNetData=net_->output_blobs()[0]->cpu_data();
|
||||||
|
// const float* outputNetData1=net_->output_blobs()[1]->cpu_data();
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
this->outputGeometry_.height = net_->output_blobs()[0]->height();
|
||||||
|
this->outputGeometry_.width = net_->output_blobs()[0]->width();
|
||||||
|
this->outputChannelCount_ = net_->output_blobs()[0]->channels();
|
||||||
|
int outputSz = this->outputChannelCount_ * this->outputGeometry_.height * this->outputGeometry_.width;
|
||||||
|
outputMat.create(this->outputGeometry_.height , this->outputGeometry_.width,CV_32FC1);
|
||||||
|
float*outputMatData=(float*)(outputMat.data);
|
||||||
|
|
||||||
|
memcpy(outputMatData,outputNetData,sizeof(float)*outputSz);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef HAVE_CAFFE
|
||||||
|
Ptr<caffe::Net<float> > net_;
|
||||||
|
#endif
|
||||||
|
//Size inputGeometry_;
|
||||||
|
int minibatchSz_;//The existence of the assignment operator mandates this to be nonconst
|
||||||
|
//int outputSize_;
|
||||||
|
public:
|
||||||
|
DeepCNNTextDetectorCaffeImpl(const DeepCNNTextDetectorCaffeImpl& dn):
|
||||||
|
minibatchSz_(dn.minibatchSz_){
|
||||||
|
outputGeometry_=dn.outputGeometry_;
|
||||||
|
inputGeometry_=dn.inputGeometry_;
|
||||||
|
//Implemented to supress Visual Studio warning "assignment operator could not be generated"
|
||||||
|
#ifdef HAVE_CAFFE
|
||||||
|
this->net_=dn.net_;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
DeepCNNTextDetectorCaffeImpl& operator=(const DeepCNNTextDetectorCaffeImpl &dn)
|
||||||
|
{
|
||||||
|
#ifdef HAVE_CAFFE
|
||||||
|
this->net_=dn.net_;
|
||||||
|
#endif
|
||||||
|
this->setPreprocessor(dn.preprocessor_);
|
||||||
|
this->inputGeometry_=dn.inputGeometry_;
|
||||||
|
this->inputChannelCount_=dn.inputChannelCount_;
|
||||||
|
this->outputChannelCount_ = dn.outputChannelCount_;
|
||||||
|
// this->minibatchSz_=dn.minibatchSz_;
|
||||||
|
//this->outputGeometry_=dn.outputSize_;
|
||||||
|
this->preprocessor_=dn.preprocessor_;
|
||||||
|
this->outputGeometry_=dn.outputGeometry_;
|
||||||
|
return *this;
|
||||||
|
//Implemented to supress Visual Studio warning "assignment operator could not be generated"
|
||||||
|
}
|
||||||
|
|
||||||
|
DeepCNNTextDetectorCaffeImpl(String modelArchFilename, String modelWeightsFilename,Ptr<ImagePreprocessor> preprocessor, int maxMinibatchSz)
|
||||||
|
:minibatchSz_(maxMinibatchSz)
|
||||||
|
{
|
||||||
|
|
||||||
|
CV_Assert(this->minibatchSz_>0);
|
||||||
|
CV_Assert(fileExists(modelArchFilename));
|
||||||
|
CV_Assert(fileExists(modelWeightsFilename));
|
||||||
|
CV_Assert(!preprocessor.empty());
|
||||||
|
this->setPreprocessor(preprocessor);
|
||||||
|
#ifdef HAVE_CAFFE
|
||||||
|
this->net_.reset(new caffe::Net<float>(modelArchFilename, caffe::TEST));
|
||||||
|
CV_Assert(net_->num_inputs()==1);
|
||||||
|
CV_Assert(net_->num_outputs()==1);
|
||||||
|
CV_Assert(this->net_->input_blobs()[0]->channels()==1
|
||||||
|
||this->net_->input_blobs()[0]->channels()==3);
|
||||||
|
// this->channelCount_=this->net_->input_blobs()[0]->channels();
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
this->net_->CopyTrainedLayersFrom(modelWeightsFilename);
|
||||||
|
|
||||||
|
caffe::Blob<float>* inputLayer = this->net_->input_blobs()[0];
|
||||||
|
|
||||||
|
this->inputGeometry_.height = inputLayer->height();
|
||||||
|
this->inputGeometry_.width = inputLayer->width();
|
||||||
|
this->inputChannelCount_ = inputLayer->channels();
|
||||||
|
//this->inputGeometry_.batchSize =1;
|
||||||
|
|
||||||
|
inputLayer->Reshape(this->minibatchSz_,this->inputChannelCount_,this->inputGeometry_.height, this->inputGeometry_.width);
|
||||||
|
net_->Reshape();
|
||||||
|
this->outputChannelCount_ = net_->output_blobs()[0]->channels();
|
||||||
|
//this->outputGeometry_.batchSize =1;
|
||||||
|
this->outputGeometry_.height =net_->output_blobs()[0]->height();
|
||||||
|
this->outputGeometry_.width = net_->output_blobs()[0]->width();
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#else
|
||||||
|
CV_Error(Error::StsError,"Caffe not available during compilation!");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void detect(InputArray image, OutputArray Bbox_prob)
|
||||||
|
{
|
||||||
|
Size outSize = Size(this->outputGeometry_.height,outputGeometry_.width);
|
||||||
|
Bbox_prob.create(outSize,CV_32F); // dummy initialization is it needed
|
||||||
|
Mat outputMat = Bbox_prob.getMat();
|
||||||
|
process_(image.getMat(),outputMat);
|
||||||
|
//copy back to outputArray
|
||||||
|
outputMat.copyTo(Bbox_prob);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//int getOutputSize()
|
||||||
|
//{
|
||||||
|
// return this->outputSize_;
|
||||||
|
//}
|
||||||
|
Size getOutputGeometry()
|
||||||
|
{
|
||||||
|
return this->outputGeometry_;
|
||||||
|
}
|
||||||
|
Size getinputGeometry()
|
||||||
|
{
|
||||||
|
return this->inputGeometry_;
|
||||||
|
}
|
||||||
|
|
||||||
|
int getMinibatchSize()
|
||||||
|
{
|
||||||
|
return this->minibatchSz_;
|
||||||
|
}
|
||||||
|
|
||||||
|
int getBackend()
|
||||||
|
{
|
||||||
|
return OCR_HOLISTIC_BACKEND_CAFFE;
|
||||||
|
}
|
||||||
|
void setPreprocessor(Ptr<ImagePreprocessor> ptr)
|
||||||
|
{
|
||||||
|
CV_Assert(!ptr.empty());
|
||||||
|
preprocessor_=ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ptr<ImagePreprocessor> getPreprocessor()
|
||||||
|
{
|
||||||
|
return preprocessor_;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
Ptr<DeepCNNTextDetector> DeepCNNTextDetector::create(String archFilename,String weightsFilename,Ptr<ImagePreprocessor> preprocessor,int minibatchSz,int backEnd)
|
||||||
|
{
|
||||||
|
if(preprocessor.empty())
|
||||||
|
{
|
||||||
|
// create a custom preprocessor with rawval
|
||||||
|
Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageCustomPreprocessor(255);
|
||||||
|
// set the mean for the preprocessor
|
||||||
|
|
||||||
|
Mat textbox_mean(1,3,CV_8U);
|
||||||
|
textbox_mean.at<uchar>(0,0)=104;
|
||||||
|
textbox_mean.at<uchar>(0,1)=117;
|
||||||
|
textbox_mean.at<uchar>(0,2)=123;
|
||||||
|
preprocessor->set_mean(textbox_mean);
|
||||||
|
}
|
||||||
|
switch(backEnd){
|
||||||
|
case OCR_HOLISTIC_BACKEND_CAFFE:
|
||||||
|
|
||||||
|
return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, minibatchSz));
|
||||||
|
break;
|
||||||
|
case OCR_HOLISTIC_BACKEND_NONE:
|
||||||
|
default:
|
||||||
|
CV_Error(Error::StsError,"DeepCNN::create backend not implemented");
|
||||||
|
return Ptr<DeepCNNTextDetector>();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return Ptr<DeepCNNTextDetector>();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Ptr<DeepCNNTextDetector> DeepCNNTextDetector::createTextBoxNet(String archFilename,String weightsFilename,int backEnd)
|
||||||
|
{
|
||||||
|
|
||||||
|
// create a custom preprocessor with rawval
|
||||||
|
Ptr<ImagePreprocessor> preprocessor=ImagePreprocessor::createImageCustomPreprocessor(255);
|
||||||
|
// set the mean for the preprocessor
|
||||||
|
|
||||||
|
Mat textbox_mean(1,3,CV_8U);
|
||||||
|
textbox_mean.at<uchar>(0,0)=104;
|
||||||
|
textbox_mean.at<uchar>(0,1)=117;
|
||||||
|
textbox_mean.at<uchar>(0,2)=123;
|
||||||
|
preprocessor->set_mean(textbox_mean);
|
||||||
|
switch(backEnd){
|
||||||
|
case OCR_HOLISTIC_BACKEND_CAFFE:
|
||||||
|
return Ptr<DeepCNNTextDetector>(new DeepCNNTextDetectorCaffeImpl(archFilename, weightsFilename,preprocessor, 100));
|
||||||
|
break;
|
||||||
|
case OCR_HOLISTIC_BACKEND_NONE:
|
||||||
|
default:
|
||||||
|
CV_Error(Error::StsError,"DeepCNN::create backend not implemented");
|
||||||
|
return Ptr<DeepCNNTextDetector>();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return Ptr<DeepCNNTextDetector>();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void DeepCNNTextDetector::preprocess(const Mat& input,Mat& output)
|
||||||
|
{
|
||||||
|
Size inputHtWd = Size(this->inputGeometry_.height,this->inputGeometry_.width);
|
||||||
|
this->preprocessor_->preprocess(input,output,inputHtWd,this->inputChannelCount_);
|
||||||
|
}
|
||||||
|
|
||||||
|
//namespace cnn_config{
|
||||||
|
//namespace caffe_backend{
|
||||||
|
|
||||||
|
//#ifdef HAVE_CAFFE
|
||||||
|
|
||||||
|
//bool getCaffeGpuMode()
|
||||||
|
//{
|
||||||
|
// return caffe::Caffe::mode()==caffe::Caffe::GPU;
|
||||||
|
//}
|
||||||
|
|
||||||
|
//void setCaffeGpuMode(bool useGpu)
|
||||||
|
//{
|
||||||
|
// if(useGpu)
|
||||||
|
// {
|
||||||
|
// caffe::Caffe::set_mode(caffe::Caffe::GPU);
|
||||||
|
// }else
|
||||||
|
// {
|
||||||
|
// caffe::Caffe::set_mode(caffe::Caffe::CPU);
|
||||||
|
// }
|
||||||
|
//}
|
||||||
|
|
||||||
|
//bool getCaffeAvailable()
|
||||||
|
//{
|
||||||
|
// return true;
|
||||||
|
//}
|
||||||
|
|
||||||
|
//#else
|
||||||
|
|
||||||
|
//bool getCaffeGpuMode()
|
||||||
|
//{
|
||||||
|
// CV_Error(Error::StsError,"Caffe not available during compilation!");
|
||||||
|
// return 0;
|
||||||
|
//}
|
||||||
|
|
||||||
|
//void setCaffeGpuMode(bool useGpu)
|
||||||
|
//{
|
||||||
|
// CV_Error(Error::StsError,"Caffe not available during compilation!");
|
||||||
|
// CV_Assert(useGpu==1);//Compilation directives force
|
||||||
|
//}
|
||||||
|
|
||||||
|
//bool getCaffeAvailable(){
|
||||||
|
// return 0;
|
||||||
|
//}
|
||||||
|
|
||||||
|
//#endif
|
||||||
|
|
||||||
|
//}//namespace caffe
|
||||||
|
//}//namespace cnn_config
|
||||||
|
|
||||||
|
} } //namespace text namespace cv
|
||||||
|
|
Reference in New Issue
Block a user