Merge pull request #1253 from kvmanohar22:GSoC17_dnn_objdetect

GSoC'17 Learning compact models for object detection (#1253) * Final solver and model for SqueezeNet model * update README * update dependencies and CMakeLists * add global pooling * Add training scripts * fix typo * fix dependency of caffe * fix whitespace * Add squeezedet architecture * Pascal pre process script * Adding pre process scripts * Generate the graph of the model * more readable * fix some bugs in the graph * Post process class implementation * Complete minimal post processing and standalone running * Complete the base class * remove c++11 features and fix bugs * Complete example * fix bugs * Adding final scripts * Classification scripts * Update README.md * Add example code and results * Update README.md * Re-order and fix some bugs * fix build failure * Document classes and functions * Add instructions on how to use samples * update instructionos * fix docs failure * fix conversion types * fix type conversion warning * Change examples to sample directoryu * restructure directories * add more references * fix whitespace * retain aspect ratio * Add more examples * fix docs warnings * update with links to trained weights * threshold update * png -> jpg * fix tutorial * model files * precomp.hpp , fix readme links, module dependencies * copyrights - no copyright in samples - use new style OpenCV copyright header - precomp.hpp
2025-10-20 04:25:42 +08:00 · 2018-01-29 14:38:32 +05:30
parent c0b298c5f3
commit 41a5a5eaf5
33 changed files with 5376 additions and 0 deletions
--- a/modules/dnn_objdetect/samples/obj_detect.cpp
+++ b/modules/dnn_objdetect/samples/obj_detect.cpp
@@ -0,0 +1,169 @@
+#include <opencv2/dnn.hpp>
+#include <opencv2/imgproc.hpp>
+#include <opencv2/highgui.hpp>
+
+#include <fstream>
+#include <iostream>
+#include <cstdlib>
+
+#include <opencv2/core_detect.hpp>
+
+using namespace cv;
+using namespace std;
+using namespace cv::dnn;
+using namespace cv::dnn_objdetect;
+
+int main(int argc, char **argv)
+{
+    if (argc < 4)
+    {
+        std::cerr << "Usage " << argv[0] << ": "
+                  << "<model-definition-file> "
+                  << "<model-weights-file> "
+                  << "<test-image> "
+                  << "<threshold>(optional)\n";
+        return -1;
+    }
+
+    std::string model_prototxt = argv[1];
+    std::string model_binary = argv[2];
+    std::string test_input_image = argv[3];
+    double threshold = 0.7;
+
+    if (argc == 5)
+    {
+      threshold = atof(argv[4]);
+      if (threshold > 1.0 || threshold < 0.0)
+      {
+        std::cerr << "Threshold should belong to [0, 1]\n";
+        return -1;
+      }
+    }
+
+    // Load the network
+    std::cout << "Loading the network...\n";
+    Net net = dnn::readNetFromCaffe(model_prototxt, model_binary);
+    if (net.empty())
+    {
+       std::cerr << "Couldn't load the model !\n";
+       return -2;
+    }
+    else
+    {
+      std::cout << "Done loading the network !\n\n";
+    }
+
+    // Load the test image
+    Mat img = cv::imread(test_input_image);
+    Mat original_img(img);
+    if (img.empty())
+    {
+        std::cerr << "Couldn't load image: " << test_input_image << "\n";
+        return -3;
+    }
+
+    cv::namedWindow("Initial Image", WINDOW_AUTOSIZE);
+    cv::imshow("Initial Image", img);
+
+    cv::resize(img, img, cv::Size(416, 416));
+    Mat img_copy(img);
+    img.convertTo(img, CV_32FC3);
+    Mat input_blob = blobFromImage(img, 1.0, Size(), cv::Scalar(104, 117, 123), false);
+
+    // Set the input blob
+
+    // Set the output layers
+    std::cout << "Getting the output of all the three blobs...\n";
+    std::vector<Mat> outblobs(3);
+    std::vector<cv::String> out_layers;
+    out_layers.push_back("slice");
+    out_layers.push_back("softmax");
+    out_layers.push_back("sigmoid");
+
+    // Bbox delta blob
+    std::vector<Mat> temp_blob;
+    net.setInput(input_blob);
+    cv::TickMeter t;
+
+    t.start();
+    net.forward(temp_blob, out_layers[0]);
+    t.stop();
+    outblobs[0] = temp_blob[2];
+
+    // class_scores blob
+    net.setInput(input_blob);
+    t.start();
+    outblobs[1] = net.forward(out_layers[1]);
+    t.stop();
+
+    // conf_scores blob
+    net.setInput(input_blob);
+    t.start();
+    outblobs[2] = net.forward(out_layers[2]);
+    t.stop();
+
+    // Check that the blobs are valid
+    for (size_t i = 0; i < outblobs.size(); ++i)
+    {
+        if (outblobs[i].empty())
+        {
+          std::cerr << "Blob: " << i << " is empty !\n";
+        }
+    }
+
+    int delta_bbox_size[3] = {23, 23, 36};
+    Mat delta_bbox(3, delta_bbox_size, CV_32F, outblobs[0].ptr<float>());
+
+    int class_scores_size[2] = {4761, 20};
+    Mat class_scores(2, class_scores_size, CV_32F, outblobs[1].ptr<float>());
+
+    int conf_scores_size[3] = {23, 23, 9};
+    Mat conf_scores(3, conf_scores_size, CV_32F, outblobs[2].ptr<float>());
+
+    InferBbox inf(delta_bbox, class_scores, conf_scores);
+    inf.filter(threshold);
+
+
+    double average_time = t.getTimeSec() / t.getCounter();
+    std::cout << "\nTotal objects detected: " << inf.detections.size()
+              << " in " << average_time << " seconds\n";
+    std::cout << "------\n";
+    float x_ratio = (float)original_img.cols / img_copy.cols;
+    float y_ratio = (float)original_img.rows / img_copy.rows;
+    for (size_t i = 0; i < inf.detections.size(); ++i)
+    {
+
+      int xmin = inf.detections[i].xmin;
+      int ymin = inf.detections[i].ymin;
+      int xmax = inf.detections[i].xmax;
+      int ymax = inf.detections[i].ymax;
+      cv::String class_name = inf.detections[i].label_name;
+      std::cout << "Class: " << class_name << "\n"
+                << "Probability: " << inf.detections[i].class_prob << "\n"
+                << "Co-ordinates: " << inf.detections[i].xmin << " "
+                << inf.detections[i].ymin << " "
+                << inf.detections[i].xmax << " "
+                << inf.detections[i].ymax << "\n";
+      std::cout << "------\n";
+      // Draw the corresponding bounding box(s)
+      cv::rectangle(original_img, cv::Point((int)(xmin * x_ratio), (int)(ymin * y_ratio)),
+          cv::Point((int)(xmax * x_ratio), (int)(ymax * y_ratio)), cv::Scalar(255, 0, 0), 2);
+      cv::putText(original_img, class_name, cv::Point((int)(xmin * x_ratio), (int)(ymin * y_ratio)),
+        cv::FONT_HERSHEY_SIMPLEX, 0.7, cv::Scalar(255, 0, 0), 1);
+    }
+
+    try
+    {
+      cv::namedWindow("Final Detections", WINDOW_AUTOSIZE);
+      cv::imshow("Final Detections", original_img);
+      cv::imwrite("image.png", original_img);
+      cv::waitKey(0);
+    }
+    catch (const char* msg)
+    {
+      std::cerr << msg << "\n";
+      return -4;
+    }
+
+    return 0;
+}