mirror of
https://github.com/opencv/opencv_contrib.git
synced 2025-10-18 08:44:11 +08:00
text: improve DL-based samples
This commit is contained in:
@@ -54,9 +54,15 @@ public:
|
||||
|
||||
@param modelArchFilename the relative or absolute path to the prototxt file describing the classifiers architecture.
|
||||
@param modelWeightsFilename the relative or absolute path to the file containing the pretrained weights of the model in caffe-binary form.
|
||||
@param detectMultiscale if true, multiple scales of the input image will be used as network input
|
||||
@param detectionSizes a list of sizes for multiscale detection. The values`[(300,300),(700,500),(700,300),(700,700),(1600,1600)]` are
|
||||
recommended in @cite LiaoSBWL17 to achieve the best quality.
|
||||
*/
|
||||
CV_WRAP static Ptr<TextDetectorCNN> create(const String& modelArchFilename, const String& modelWeightsFilename, bool detectMultiscale = false);
|
||||
static Ptr<TextDetectorCNN> create(const String& modelArchFilename, const String& modelWeightsFilename,
|
||||
std::vector<Size> detectionSizes);
|
||||
/**
|
||||
@overload
|
||||
*/
|
||||
CV_WRAP static Ptr<TextDetectorCNN> create(const String& modelArchFilename, const String& modelWeightsFilename);
|
||||
};
|
||||
|
||||
//! @}
|
||||
|
@@ -1,6 +1,7 @@
|
||||
#include <opencv2/text.hpp>
|
||||
#include <opencv2/highgui.hpp>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <opencv2/dnn.hpp>
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
@@ -29,22 +30,27 @@ bool fileExists (const string& filename)
|
||||
return f.good();
|
||||
}
|
||||
|
||||
void textbox_draw(Mat src, vector<Rect>& groups, vector<float>& probs, float thres)
|
||||
void textbox_draw(Mat src, std::vector<Rect>& groups, std::vector<float>& probs, std::vector<int>& indexes)
|
||||
{
|
||||
for (size_t i = 0; i < groups.size(); i++)
|
||||
for (size_t i = 0; i < indexes.size(); i++)
|
||||
{
|
||||
if(probs[i] > thres)
|
||||
if (src.type() == CV_8UC3)
|
||||
{
|
||||
if (src.type() == CV_8UC3)
|
||||
{
|
||||
rectangle(src, groups[i], Scalar( 0, 255, 255 ), 2, LINE_AA);
|
||||
String label = format("%.2f", probs[i]);
|
||||
cout << "text box: " << groups[i] << " confidence: " << probs[i] << "\n";
|
||||
putText(src, label, groups.at(i).tl(), FONT_HERSHEY_PLAIN, 1, Scalar( 0,0,255 ), 1, LINE_AA);
|
||||
}
|
||||
else
|
||||
rectangle(src, groups[i], Scalar( 255 ), 3, 8 );
|
||||
Rect currrentBox = groups[indexes[i]];
|
||||
rectangle(src, currrentBox, Scalar( 0, 255, 255 ), 2, LINE_AA);
|
||||
String label = format("%.2f", probs[indexes[i]]);
|
||||
std::cout << "text box: " << currrentBox << " confidence: " << probs[indexes[i]] << "\n";
|
||||
|
||||
int baseLine = 0;
|
||||
Size labelSize = getTextSize(label, FONT_HERSHEY_PLAIN, 1, 1, &baseLine);
|
||||
int yLeftBottom = std::max(currrentBox.y, labelSize.height);
|
||||
rectangle(src, Point(currrentBox.x, yLeftBottom - labelSize.height),
|
||||
Point(currrentBox.x + labelSize.width, yLeftBottom + baseLine), Scalar( 255, 255, 255 ), FILLED);
|
||||
|
||||
putText(src, label, Point(currrentBox.x, yLeftBottom), FONT_HERSHEY_PLAIN, 1, Scalar( 0,0,0 ), 1, LINE_AA);
|
||||
}
|
||||
else
|
||||
rectangle(src, groups[i], Scalar( 255 ), 3, 8 );
|
||||
}
|
||||
}
|
||||
|
||||
@@ -73,33 +79,41 @@ int main(int argc, const char * argv[])
|
||||
|
||||
cout << "Starting Text Box Demo" << endl;
|
||||
Ptr<text::TextDetectorCNN> textSpotter =
|
||||
text::TextDetectorCNN::create(modelArch, moddelWeights, false);
|
||||
text::TextDetectorCNN::create(modelArch, moddelWeights);
|
||||
|
||||
vector<Rect> bbox;
|
||||
vector<float> outProbabillities;
|
||||
textSpotter->detect(image, bbox, outProbabillities);
|
||||
std::vector<int> indexes;
|
||||
cv::dnn::NMSBoxes(bbox, outProbabillities, 0.4f, 0.5f, indexes);
|
||||
|
||||
float prob_threshold = 0.6f;
|
||||
Mat image_copy = image.clone();
|
||||
textbox_draw(image_copy, bbox, outProbabillities, prob_threshold);
|
||||
textbox_draw(image_copy, bbox, outProbabillities, indexes);
|
||||
imshow("Text detection", image_copy);
|
||||
image_copy = image.clone();
|
||||
|
||||
Ptr<text::OCRHolisticWordRecognizer> wordSpotter =
|
||||
text::OCRHolisticWordRecognizer::create("dictnet_vgg_deploy.prototxt", "dictnet_vgg.caffemodel", "dictnet_vgg_labels.txt");
|
||||
|
||||
for(size_t i = 0; i < bbox.size(); i++)
|
||||
for(size_t i = 0; i < indexes.size(); i++)
|
||||
{
|
||||
if(outProbabillities[i] > prob_threshold)
|
||||
{
|
||||
Mat wordImg;
|
||||
cvtColor(image(bbox[i]), wordImg, COLOR_BGR2GRAY);
|
||||
string word;
|
||||
vector<float> confs;
|
||||
wordSpotter->run(wordImg, word, NULL, NULL, &confs);
|
||||
rectangle(image_copy, bbox[i], Scalar(0, 255, 255), 1, LINE_AA);
|
||||
putText(image_copy, word, bbox[i].tl(), FONT_HERSHEY_PLAIN, 1, Scalar(0, 0, 255), 1, LINE_AA);
|
||||
}
|
||||
Mat wordImg;
|
||||
cvtColor(image(bbox[indexes[i]]), wordImg, COLOR_BGR2GRAY);
|
||||
string word;
|
||||
vector<float> confs;
|
||||
wordSpotter->run(wordImg, word, NULL, NULL, &confs);
|
||||
|
||||
Rect currrentBox = bbox[indexes[i]];
|
||||
rectangle(image_copy, currrentBox, Scalar( 0, 255, 255 ), 2, LINE_AA);
|
||||
|
||||
int baseLine = 0;
|
||||
Size labelSize = getTextSize(word, FONT_HERSHEY_PLAIN, 1, 1, &baseLine);
|
||||
int yLeftBottom = std::max(currrentBox.y, labelSize.height);
|
||||
rectangle(image_copy, Point(currrentBox.x, yLeftBottom - labelSize.height),
|
||||
Point(currrentBox.x + labelSize.width, yLeftBottom + baseLine), Scalar( 255, 255, 255 ), FILLED);
|
||||
|
||||
putText(image_copy, word, Point(currrentBox.x, yLeftBottom), FONT_HERSHEY_PLAIN, 1, Scalar( 0,0,0 ), 1, LINE_AA);
|
||||
|
||||
}
|
||||
imshow("Text recognition", image_copy);
|
||||
cout << "Recognition finished. Press any key to exit.\n";
|
||||
|
@@ -1,6 +1,7 @@
|
||||
#include <opencv2/text.hpp>
|
||||
#include <opencv2/highgui.hpp>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <opencv2/dnn.hpp>
|
||||
|
||||
#include <sstream>
|
||||
#include <iostream>
|
||||
@@ -27,22 +28,27 @@ bool fileExists (const std::string& filename)
|
||||
return f.good();
|
||||
}
|
||||
|
||||
void textbox_draw(Mat src, std::vector<Rect>& groups, std::vector<float>& probs, float thres)
|
||||
void textbox_draw(Mat src, std::vector<Rect>& groups, std::vector<float>& probs, std::vector<int>& indexes)
|
||||
{
|
||||
for (size_t i = 0; i < groups.size(); i++)
|
||||
for (size_t i = 0; i < indexes.size(); i++)
|
||||
{
|
||||
if(probs[i] > thres)
|
||||
if (src.type() == CV_8UC3)
|
||||
{
|
||||
if (src.type() == CV_8UC3)
|
||||
{
|
||||
rectangle(src, groups[i], Scalar( 0, 255, 255 ), 2, LINE_AA);
|
||||
String label = format("%.2f", probs[i]);
|
||||
std::cout << "text box: " << groups[i] << " confidence: " << probs[i] << "\n";
|
||||
putText(src, label, groups.at(i).tl(), FONT_HERSHEY_PLAIN, 1, Scalar( 0,0,255 ), 1, LINE_AA);
|
||||
}
|
||||
else
|
||||
rectangle(src, groups[i], Scalar( 255 ), 3, 8 );
|
||||
Rect currrentBox = groups[indexes[i]];
|
||||
rectangle(src, currrentBox, Scalar( 0, 255, 255 ), 2, LINE_AA);
|
||||
String label = format("%.2f", probs[indexes[i]]);
|
||||
std::cout << "text box: " << currrentBox << " confidence: " << probs[indexes[i]] << "\n";
|
||||
|
||||
int baseLine = 0;
|
||||
Size labelSize = getTextSize(label, FONT_HERSHEY_PLAIN, 1, 1, &baseLine);
|
||||
int yLeftBottom = std::max(currrentBox.y, labelSize.height);
|
||||
rectangle(src, Point(currrentBox.x, yLeftBottom - labelSize.height),
|
||||
Point(currrentBox.x + labelSize.width, yLeftBottom + baseLine), Scalar( 255, 255, 255 ), FILLED);
|
||||
|
||||
putText(src, label, Point(currrentBox.x, yLeftBottom), FONT_HERSHEY_PLAIN, 1, Scalar( 0,0,0 ), 1, LINE_AA);
|
||||
}
|
||||
else
|
||||
rectangle(src, groups[i], Scalar( 255 ), 3, 8 );
|
||||
}
|
||||
}
|
||||
|
||||
@@ -62,7 +68,7 @@ int main(int argc, const char * argv[])
|
||||
|
||||
if (!fileExists(modelArch) || !fileExists(moddelWeights))
|
||||
{
|
||||
std::cout<<getHelpStr(argv[0]);
|
||||
std::cout << getHelpStr(argv[0]);
|
||||
std::cout << "Model files not found in the current directory. Aborting!" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
@@ -71,13 +77,16 @@ int main(int argc, const char * argv[])
|
||||
|
||||
std::cout << "Starting Text Box Demo" << std::endl;
|
||||
Ptr<text::TextDetectorCNN> textSpotter =
|
||||
text::TextDetectorCNN::create(modelArch, moddelWeights, false);
|
||||
text::TextDetectorCNN::create(modelArch, moddelWeights);
|
||||
|
||||
std::vector<Rect> bbox;
|
||||
std::vector<float> outProbabillities;
|
||||
textSpotter->detect(image, bbox, outProbabillities);
|
||||
|
||||
textbox_draw(image, bbox, outProbabillities, 0.5f);
|
||||
std::vector<int> indexes;
|
||||
cv::dnn::NMSBoxes(bbox, outProbabillities, 0.3f, 0.4f, indexes);
|
||||
|
||||
textbox_draw(image, bbox, outProbabillities, indexes);
|
||||
|
||||
imshow("TextBox Demo",image);
|
||||
std::cout << "Done!" << std::endl << std::endl;
|
||||
|
@@ -23,8 +23,6 @@ protected:
|
||||
Net net_;
|
||||
std::vector<Size> sizes_;
|
||||
int inputChannelCount_;
|
||||
bool detectMultiscale_;
|
||||
|
||||
|
||||
void getOutputs(const float* buffer,int nbrTextBoxes,int nCol,
|
||||
std::vector<Rect>& Bbox, std::vector<float>& confidence, Size inputShape)
|
||||
@@ -54,21 +52,12 @@ protected:
|
||||
}
|
||||
|
||||
public:
|
||||
TextDetectorCNNImpl(const String& modelArchFilename, const String& modelWeightsFilename, bool detectMultiscale) :
|
||||
detectMultiscale_(detectMultiscale)
|
||||
TextDetectorCNNImpl(const String& modelArchFilename, const String& modelWeightsFilename, std::vector<Size> detectionSizes) :
|
||||
sizes_(detectionSizes)
|
||||
{
|
||||
net_ = readNetFromCaffe(modelArchFilename, modelWeightsFilename);
|
||||
CV_Assert(!net_.empty());
|
||||
inputChannelCount_ = 3;
|
||||
sizes_.push_back(Size(700, 700));
|
||||
|
||||
if(detectMultiscale_)
|
||||
{
|
||||
sizes_.push_back(Size(300, 300));
|
||||
sizes_.push_back(Size(700,500));
|
||||
sizes_.push_back(Size(700,300));
|
||||
sizes_.push_back(Size(1600,1600));
|
||||
}
|
||||
}
|
||||
|
||||
void detect(InputArray inputImage_, std::vector<Rect>& Bbox, std::vector<float>& confidence)
|
||||
@@ -92,9 +81,14 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
Ptr<TextDetectorCNN> TextDetectorCNN::create(const String &modelArchFilename, const String &modelWeightsFilename, bool detectMultiscale)
|
||||
Ptr<TextDetectorCNN> TextDetectorCNN::create(const String &modelArchFilename, const String &modelWeightsFilename, std::vector<Size> detectionSizes)
|
||||
{
|
||||
return makePtr<TextDetectorCNNImpl>(modelArchFilename, modelWeightsFilename, detectMultiscale);
|
||||
return makePtr<TextDetectorCNNImpl>(modelArchFilename, modelWeightsFilename, detectionSizes);
|
||||
}
|
||||
|
||||
Ptr<TextDetectorCNN> TextDetectorCNN::create(const String &modelArchFilename, const String &modelWeightsFilename)
|
||||
{
|
||||
return create(modelArchFilename, modelWeightsFilename, std::vector<Size>(1, Size(300, 300)));
|
||||
}
|
||||
} //namespace text
|
||||
} //namespace cv
|
||||
|
Reference in New Issue
Block a user