mirror of
https://github.com/opencv/opencv_contrib.git
synced 2025-10-19 19:44:14 +08:00
fixed some compile warnings in dnn & protobuf; improved convolution layer performance when blas is not available by parallelizing gemmCPU() function in dnn
This commit is contained in:
@@ -223,8 +223,7 @@ struct PointerStringPairHash {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Used only by MSVC and platforms where hash_map is not available.
|
// Used only by MSVC and platforms where hash_map is not available.
|
||||||
static const size_t bucket_size = 4;
|
enum { bucket_size = 4, min_buckets = 8 };
|
||||||
static const size_t min_buckets = 8;
|
|
||||||
inline bool operator()(const PointerStringPair& a,
|
inline bool operator()(const PointerStringPair& a,
|
||||||
const PointerStringPair& b) const {
|
const PointerStringPair& b) const {
|
||||||
if (a.first < b.first) return true;
|
if (a.first < b.first) return true;
|
||||||
|
@@ -729,13 +729,6 @@ char *FastHex32ToBuffer(uint32 value, char* buffer) {
|
|||||||
return InternalFastHexToBuffer(value, buffer, 8);
|
return InternalFastHexToBuffer(value, buffer, 8);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline char* PlaceNum(char* p, int num, char prev_sep) {
|
|
||||||
*p-- = '0' + num % 10;
|
|
||||||
*p-- = '0' + num / 10;
|
|
||||||
*p-- = prev_sep;
|
|
||||||
return p;
|
|
||||||
}
|
|
||||||
|
|
||||||
// ----------------------------------------------------------------------
|
// ----------------------------------------------------------------------
|
||||||
// FastInt32ToBufferLeft()
|
// FastInt32ToBufferLeft()
|
||||||
// FastUInt32ToBufferLeft()
|
// FastUInt32ToBufferLeft()
|
||||||
|
@@ -53,15 +53,6 @@ namespace google {
|
|||||||
namespace protobuf {
|
namespace protobuf {
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
namespace {
|
|
||||||
|
|
||||||
// This function turns out to be convenient when using some macros later.
|
|
||||||
inline int GetEnumNumber(const EnumValueDescriptor* descriptor) {
|
|
||||||
return descriptor->number();
|
|
||||||
}
|
|
||||||
|
|
||||||
} // anonymous namespace
|
|
||||||
|
|
||||||
// ===================================================================
|
// ===================================================================
|
||||||
|
|
||||||
bool UnknownFieldSetFieldSkipper::SkipField(
|
bool UnknownFieldSetFieldSkipper::SkipField(
|
||||||
|
@@ -140,7 +140,10 @@ int main(int argc, char **argv)
|
|||||||
//! [Set input blob]
|
//! [Set input blob]
|
||||||
|
|
||||||
//! [Make forward pass]
|
//! [Make forward pass]
|
||||||
|
double t = (double)cv::getTickCount();
|
||||||
net.forward(); //compute output
|
net.forward(); //compute output
|
||||||
|
t = (double)cv::getTickCount() - t;
|
||||||
|
printf("processing time: %.1fms\n", t*1000./getTickFrequency());
|
||||||
//! [Make forward pass]
|
//! [Make forward pass]
|
||||||
|
|
||||||
//! [Gather output]
|
//! [Gather output]
|
||||||
|
@@ -44,7 +44,7 @@ Ptr<Layer> createLayerFromCaffe<DeconvolutionLayer>(LayerParams ¶ms)
|
|||||||
template<>
|
template<>
|
||||||
Ptr<Layer> createLayerFromCaffe<PoolingLayer>(LayerParams ¶ms)
|
Ptr<Layer> createLayerFromCaffe<PoolingLayer>(LayerParams ¶ms)
|
||||||
{
|
{
|
||||||
int type;
|
int type = PoolingLayer::MAX;
|
||||||
Size kernel, stride, pad;
|
Size kernel, stride, pad;
|
||||||
bool globalPooling;
|
bool globalPooling;
|
||||||
|
|
||||||
@@ -60,10 +60,6 @@ Ptr<Layer> createLayerFromCaffe<PoolingLayer>(LayerParams ¶ms)
|
|||||||
else
|
else
|
||||||
CV_Error(Error::StsBadArg, "Unknown pooling type \"" + pool + "\"");
|
CV_Error(Error::StsBadArg, "Unknown pooling type \"" + pool + "\"");
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
type = PoolingLayer::MAX;
|
|
||||||
}
|
|
||||||
|
|
||||||
getPoolingKernelParams(params, kernel.height, kernel.width, globalPooling, pad.height, pad.width, stride.height, stride.width);
|
getPoolingKernelParams(params, kernel.height, kernel.width, globalPooling, pad.height, pad.width, stride.height, stride.width);
|
||||||
//getCaffeConvParams(params, kernel, pad, stride);
|
//getCaffeConvParams(params, kernel, pad, stride);
|
||||||
@@ -107,7 +103,7 @@ Ptr<Layer> createLayerFromCaffe<InnerProductLayer>(LayerParams ¶ms)
|
|||||||
template<> //LRNLayer specialization
|
template<> //LRNLayer specialization
|
||||||
Ptr<Layer> createLayerFromCaffe<LRNLayer>(LayerParams& params)
|
Ptr<Layer> createLayerFromCaffe<LRNLayer>(LayerParams& params)
|
||||||
{
|
{
|
||||||
int type;
|
int type = -1;
|
||||||
String nrmType = params.get<String>("norm_region", "ACROSS_CHANNELS");
|
String nrmType = params.get<String>("norm_region", "ACROSS_CHANNELS");
|
||||||
if (nrmType == "ACROSS_CHANNELS")
|
if (nrmType == "ACROSS_CHANNELS")
|
||||||
type = LRNLayer::CHANNEL_NRM;
|
type = LRNLayer::CHANNEL_NRM;
|
||||||
|
@@ -55,7 +55,7 @@ namespace dnn
|
|||||||
|
|
||||||
ConvolutionLayerImpl::ConvolutionLayerImpl()
|
ConvolutionLayerImpl::ConvolutionLayerImpl()
|
||||||
{
|
{
|
||||||
tryUseOpenCL = true;
|
tryUseOpenCL = false; //true;
|
||||||
numOutput = -1;
|
numOutput = -1;
|
||||||
group = -1;
|
group = -1;
|
||||||
|
|
||||||
|
@@ -28,8 +28,85 @@ inline void SwapRowCols(const Mat &A, int &rows, int &cols, bool isTrans)
|
|||||||
cols = (isTrans) ? A.rows : A.cols;
|
cols = (isTrans) ? A.rows : A.cols;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class GEMMInvoker : public ParallelLoopBody
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
GEMMInvoker(const Mat* _a, const Mat* _b, double _alpha, Mat* _c, double _beta)
|
||||||
|
{
|
||||||
|
a = _a;
|
||||||
|
b = _b;
|
||||||
|
c = _c;
|
||||||
|
alpha = _alpha;
|
||||||
|
beta = _beta;
|
||||||
|
}
|
||||||
|
|
||||||
|
void operator()(const Range& range) const
|
||||||
|
{
|
||||||
|
int mmax = a->rows;
|
||||||
|
int nmax = range.end - range.start;
|
||||||
|
int kmax = a->cols;
|
||||||
|
int m, n, k;
|
||||||
|
AutoBuffer<float> buf(nmax);
|
||||||
|
float* ptr = buf;
|
||||||
|
if( mmax %2 != 0 )
|
||||||
|
memset(ptr, 0, nmax*sizeof(ptr[0]));
|
||||||
|
|
||||||
|
for( m = 0; m < mmax; m += 2 )
|
||||||
|
{
|
||||||
|
float* dst0 = c->ptr<float>(m) + range.start;
|
||||||
|
float* dst1 = m+1 < mmax ? c->ptr<float>(m+1) + range.start : ptr;
|
||||||
|
const float* aptr0 = a->ptr<float>(m);
|
||||||
|
const float* aptr1 = m+1 < mmax ? a->ptr<float>(m+1) : aptr0;
|
||||||
|
|
||||||
|
if( beta != 1 )
|
||||||
|
{
|
||||||
|
if( beta == 0 )
|
||||||
|
for( n = 0; n < nmax; n++ )
|
||||||
|
{
|
||||||
|
dst0[n] = 0.f;
|
||||||
|
dst1[n] = 0.f;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
for( n = 0; n < nmax; n++ )
|
||||||
|
{
|
||||||
|
dst0[n] *= (float)beta;
|
||||||
|
dst1[n] *= (float)beta;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for( k = 0; k < kmax; k++ )
|
||||||
|
{
|
||||||
|
float alpha0 = (float)(alpha*aptr0[k]);
|
||||||
|
float alpha1 = (float)(alpha*aptr1[k]);
|
||||||
|
const float* bptr = b->ptr<float>(k) + range.start;
|
||||||
|
|
||||||
|
for( n = 0; n < nmax; n++ )
|
||||||
|
{
|
||||||
|
float d0 = dst0[n] + alpha0*bptr[n];
|
||||||
|
float d1 = dst1[n] + alpha1*bptr[n];
|
||||||
|
dst0[n] = d0;
|
||||||
|
dst1[n] = d1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const Mat *a, *b;
|
||||||
|
Mat* c;
|
||||||
|
double alpha, beta;
|
||||||
|
};
|
||||||
|
|
||||||
void gemmCPU(const Mat &A, const Mat &B, double alpha, Mat &C, double beta, int flags /*= 0*/)
|
void gemmCPU(const Mat &A, const Mat &B, double alpha, Mat &C, double beta, int flags /*= 0*/)
|
||||||
{
|
{
|
||||||
|
if( C.type() == CV_32F && flags == 0 )
|
||||||
|
{
|
||||||
|
GEMMInvoker invoker(&A, &B, alpha, &C, beta);
|
||||||
|
double granularity = 10000000./((double)A.rows*A.cols);
|
||||||
|
parallel_for_(Range(0, B.cols), invoker, granularity);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
#if HAVE_CBLAS
|
#if HAVE_CBLAS
|
||||||
bool transA = static_cast<bool>(flags & GEMM_1_T);
|
bool transA = static_cast<bool>(flags & GEMM_1_T);
|
||||||
bool transB = static_cast<bool>(flags & GEMM_2_T);
|
bool transB = static_cast<bool>(flags & GEMM_2_T);
|
||||||
@@ -70,6 +147,7 @@ void gemmCPU(const Mat &A, const Mat &B, double alpha, Mat &C, double beta, int
|
|||||||
#else
|
#else
|
||||||
cv::gemm(A, B, alpha, C, beta, C, flags);
|
cv::gemm(A, B, alpha, C, beta, C, flags);
|
||||||
#endif
|
#endif
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int getBlasThreads()
|
int getBlasThreads()
|
||||||
|
Reference in New Issue
Block a user