mirror of
https://github.com/opencv/opencv_contrib.git
synced 2025-10-20 21:40:49 +08:00
258 lines
9.9 KiB
C++
Executable File
258 lines
9.9 KiB
C++
Executable File
#include "precomp.hpp"
|
|
using namespace caffe;
|
|
|
|
namespace cv
|
|
{
|
|
namespace cnn_3dobj
|
|
{
|
|
descriptorExtractor::descriptorExtractor(const String& device_type, int device_id)
|
|
{
|
|
net_ready = 0;
|
|
if (strcmp(device_type.c_str(), "CPU") == 0 || strcmp(device_type.c_str(), "GPU") == 0)
|
|
{
|
|
if (strcmp(device_type.c_str(), "CPU") == 0)
|
|
{
|
|
caffe::Caffe::set_mode(caffe::Caffe::CPU);
|
|
deviceType = "CPU";
|
|
std::cout << "Using CPU" << std::endl;
|
|
}
|
|
else
|
|
{
|
|
caffe::Caffe::set_mode(caffe::Caffe::GPU);
|
|
caffe::Caffe::SetDevice(device_id);
|
|
deviceType = "GPU";
|
|
std::cout << "Using GPU" << std::endl;
|
|
std::cout << "Using Device_id=" << device_id << std::endl;
|
|
}
|
|
net_set = true;
|
|
}
|
|
else
|
|
{
|
|
std::cout << "Error: Device name must be 'GPU' together with an device number or 'CPU'." << std::endl;
|
|
net_set = false;
|
|
}
|
|
};
|
|
|
|
String descriptorExtractor::getDeviceType()
|
|
{
|
|
String device_info_out;
|
|
device_info_out = deviceType;
|
|
return device_info_out;
|
|
};
|
|
|
|
int descriptorExtractor::getDeviceId()
|
|
{
|
|
int device_info_out;
|
|
device_info_out = deviceId;
|
|
return device_info_out;
|
|
};
|
|
|
|
void descriptorExtractor::setDeviceType(const String& device_type)
|
|
{
|
|
if (strcmp(device_type.c_str(), "CPU") == 0 || strcmp(device_type.c_str(), "GPU") == 0)
|
|
{
|
|
if (strcmp(device_type.c_str(), "CPU") == 0)
|
|
{
|
|
caffe::Caffe::set_mode(caffe::Caffe::CPU);
|
|
deviceType = "CPU";
|
|
std::cout << "Using CPU" << std::endl;
|
|
}
|
|
else
|
|
{
|
|
caffe::Caffe::set_mode(caffe::Caffe::GPU);
|
|
deviceType = "GPU";
|
|
std::cout << "Using GPU" << std::endl;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
std::cout << "Error: Device name must be 'GPU' or 'CPU'." << std::endl;
|
|
}
|
|
};
|
|
|
|
void descriptorExtractor::setDeviceId(const int& device_id)
|
|
{
|
|
if (strcmp(deviceType.c_str(), "GPU") == 0)
|
|
{
|
|
caffe::Caffe::SetDevice(device_id);
|
|
deviceId = device_id;
|
|
std::cout << "Using GPU with Device ID = " << device_id << std::endl;
|
|
}
|
|
else
|
|
{
|
|
std::cout << "Error: Device ID only need to be set when GPU is used." << std::endl;
|
|
}
|
|
};
|
|
|
|
void descriptorExtractor::loadNet(const String& model_file, const String& trained_file, const String& mean_file)
|
|
{
|
|
if (net_set)
|
|
{
|
|
/* Load the network. */
|
|
convnet = new Net<float>(model_file, TEST);
|
|
convnet->CopyTrainedLayersFrom(trained_file);
|
|
if (convnet->num_inputs() != 1)
|
|
std::cout << "Network should have exactly one input." << std::endl;
|
|
if (convnet->num_outputs() != 1)
|
|
std::cout << "Network should have exactly one output." << std::endl;
|
|
Blob<float>* input_layer = convnet->input_blobs()[0];
|
|
num_channels = input_layer->channels();
|
|
if (num_channels != 3 && num_channels != 1)
|
|
std::cout << "Input layer should have 1 or 3 channels." << std::endl;
|
|
input_geometry = cv::Size(input_layer->width(), input_layer->height());
|
|
/* Load the binaryproto mean file. */
|
|
if (!mean_file.empty())
|
|
{
|
|
setMean(mean_file);
|
|
net_ready = 2;
|
|
}
|
|
else
|
|
{
|
|
net_ready = 1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
std::cout << "Error: Net is not set properly in advance using construtor." << std::endl;
|
|
}
|
|
};
|
|
|
|
/* Load the mean file in binaryproto format. */
|
|
void descriptorExtractor::setMean(const String& mean_file)
|
|
{
|
|
BlobProto blob_proto;
|
|
ReadProtoFromBinaryFileOrDie(mean_file.c_str(), &blob_proto);
|
|
/* Convert from BlobProto to Blob<float> */
|
|
Blob<float> mean_blob;
|
|
mean_blob.FromProto(blob_proto);
|
|
if (mean_blob.channels() != num_channels)
|
|
std::cout << "Number of channels of mean file doesn't match input layer." << std::endl;
|
|
/* The format of the mean file is planar 32-bit float BGR or grayscale. */
|
|
std::vector<cv::Mat> channels;
|
|
float* data = mean_blob.mutable_cpu_data();
|
|
for (int i = 0; i < num_channels; ++i)
|
|
{
|
|
/* Extract an individual channel. */
|
|
cv::Mat channel(mean_blob.height(), mean_blob.width(), CV_32FC1, data);
|
|
channels.push_back(channel);
|
|
data += mean_blob.height() * mean_blob.width();
|
|
}
|
|
/* Merge the separate channels into a single image. */
|
|
cv::Mat mean;
|
|
cv::merge(channels, mean);
|
|
/* Compute the global mean pixel value and create a mean image
|
|
* filled with this value. */
|
|
cv::Scalar channel_mean = cv::mean(mean);
|
|
mean_ = cv::Mat(input_geometry, mean.type(), channel_mean);
|
|
};
|
|
|
|
void descriptorExtractor::extract(InputArrayOfArrays inputimg, OutputArray feature, String feature_blob)
|
|
{
|
|
if (net_ready)
|
|
{
|
|
Blob<float>* input_layer = convnet->input_blobs()[0];
|
|
input_layer->Reshape(1, num_channels,
|
|
input_geometry.height, input_geometry.width);
|
|
/* Forward dimension change to all layers. */
|
|
convnet->Reshape();
|
|
std::vector<cv::Mat> input_channels;
|
|
wrapInput(&input_channels);
|
|
if (inputimg.kind() == 65536)
|
|
{/* this is a Mat */
|
|
Mat img = inputimg.getMat();
|
|
preprocess(img, &input_channels);
|
|
convnet->ForwardPrefilled();
|
|
/* Copy the output layer to a std::vector */
|
|
Blob<float>* output_layer = convnet->blob_by_name(feature_blob).get();
|
|
const float* begin = output_layer->cpu_data();
|
|
const float* end = begin + output_layer->channels();
|
|
std::vector<float> featureVec = std::vector<float>(begin, end);
|
|
cv::Mat feature_mat = cv::Mat(featureVec, true).t();
|
|
feature_mat.copyTo(feature);
|
|
}
|
|
else
|
|
{/* This is a vector<Mat> */
|
|
vector<Mat> img;
|
|
inputimg.getMatVector(img);
|
|
Mat feature_vector;
|
|
for (unsigned int i = 0; i < img.size(); ++i)
|
|
{
|
|
preprocess(img[i], &input_channels);
|
|
convnet->ForwardPrefilled();
|
|
/* Copy the output layer to a std::vector */
|
|
Blob<float>* output_layer = convnet->blob_by_name(feature_blob).get();
|
|
const float* begin = output_layer->cpu_data();
|
|
const float* end = begin + output_layer->channels();
|
|
std::vector<float> featureVec = std::vector<float>(begin, end);
|
|
if (i == 0)
|
|
{
|
|
feature_vector = cv::Mat(featureVec, true).t();
|
|
int dim_feature = feature_vector.cols;
|
|
feature_vector.resize(img.size(), dim_feature);
|
|
}
|
|
feature_vector.row(i) = cv::Mat(featureVec, true).t();
|
|
}
|
|
feature_vector.copyTo(feature);
|
|
}
|
|
}
|
|
else
|
|
std::cout << "Device must be set properly using constructor and the net must be set in advance using loadNet.";
|
|
};
|
|
|
|
/* Wrap the input layer of the network in separate cv::Mat objects
|
|
* (one per channel). This way we save one memcpy operation and we
|
|
* don't need to rely on cudaMemcpy2D. The last preprocessing
|
|
* operation will write the separate channels directly to the input
|
|
* layer. */
|
|
void descriptorExtractor::wrapInput(std::vector<cv::Mat>* input_channels)
|
|
{
|
|
Blob<float>* input_layer = convnet->input_blobs()[0];
|
|
int width = input_layer->width();
|
|
int height = input_layer->height();
|
|
float* input_data = input_layer->mutable_cpu_data();
|
|
for (int i = 0; i < input_layer->channels(); ++i)
|
|
{
|
|
cv::Mat channel(height, width, CV_32FC1, input_data);
|
|
input_channels->push_back(channel);
|
|
input_data += width * height;
|
|
}
|
|
};
|
|
|
|
void descriptorExtractor::preprocess(const cv::Mat& img, std::vector<cv::Mat>* input_channels)
|
|
{
|
|
/* Convert the input image to the input image format of the network. */
|
|
cv::Mat sample;
|
|
if (num_channels == 1)
|
|
cv::cvtColor(img, sample, COLOR_BGR2GRAY);
|
|
else if (img.channels() == 4 && num_channels == 3)
|
|
cv::cvtColor(img, sample, COLOR_BGRA2BGR);
|
|
else if (img.channels() == 1 && num_channels == 3)
|
|
cv::cvtColor(img, sample, COLOR_GRAY2BGR);
|
|
else
|
|
sample = img;
|
|
|
|
cv::Mat sample_resized;
|
|
if (sample.size() != input_geometry)
|
|
cv::resize(sample, sample_resized, input_geometry);
|
|
else
|
|
sample_resized = sample;
|
|
|
|
cv::Mat sample_float;
|
|
sample_resized.convertTo(sample_float, CV_32F);
|
|
|
|
cv::Mat sample_normalized;
|
|
if (net_ready == 2)
|
|
cv::subtract(sample_float, mean_, sample_normalized);
|
|
else
|
|
sample_normalized = sample_float;
|
|
/* This operation will write the separate BGR planes directly to the
|
|
* input layer of the network because it is wrapped by the cv::Mat
|
|
* objects in input_channels. */
|
|
cv::split(sample_normalized, *input_channels);
|
|
if (reinterpret_cast<float*>(input_channels->at(0).data)
|
|
!= convnet->input_blobs()[0]->cpu_data())
|
|
std::cout << "Input channels are not wrapping the input layer of the network." << std::endl;
|
|
};
|
|
} /* namespace cnn_3dobj */
|
|
} /* namespace cv */
|