mirror of
https://github.com/opencv/opencv_contrib.git
synced 2025-10-24 20:01:12 +08:00
Adding of OCL and public interface for Softmax layer
This commit is contained in:
@@ -260,6 +260,13 @@ namespace dnn
|
|||||||
static Ptr<PoolingLayer> create(int type = MAX, Size kernel = Size(2, 2), Size stride = Size(1, 1), Size pad = Size(0, 0));
|
static Ptr<PoolingLayer> create(int type = MAX, Size kernel = Size(2, 2), Size stride = Size(1, 1), Size pad = Size(0, 0));
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class CV_EXPORTS_W SoftmaxLayer : public Layer
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
|
||||||
|
static Ptr<SoftmaxLayer> create(int axis = 1);
|
||||||
|
};
|
||||||
|
|
||||||
//! @}
|
//! @}
|
||||||
//! @}
|
//! @}
|
||||||
|
|
||||||
|
|||||||
@@ -77,7 +77,7 @@ void initModule()
|
|||||||
return;
|
return;
|
||||||
|
|
||||||
REG_RUNTIME_LAYER_CLASS(Slice, SliceLayer)
|
REG_RUNTIME_LAYER_CLASS(Slice, SliceLayer)
|
||||||
REG_RUNTIME_LAYER_CLASS(Softmax, SoftMaxLayer)
|
REG_STATIC_LAYER_FUNC(Softmax, createSoftmaxLayerFromCaffe)
|
||||||
REG_RUNTIME_LAYER_CLASS(Split, SplitLayer)
|
REG_RUNTIME_LAYER_CLASS(Split, SplitLayer)
|
||||||
REG_RUNTIME_LAYER_CLASS(Reshape, ReshapeLayer)
|
REG_RUNTIME_LAYER_CLASS(Reshape, ReshapeLayer)
|
||||||
REG_STATIC_LAYER_FUNC(Flatten, createFlattenLayer)
|
REG_STATIC_LAYER_FUNC(Flatten, createFlattenLayer)
|
||||||
|
|||||||
@@ -269,7 +269,7 @@ Ptr<PoolingLayer> PoolingLayer::create(int type, Size kernel, Size stride, Size
|
|||||||
Ptr<Layer> createPoolingLayerFromCaffe(LayerParams ¶ms)
|
Ptr<Layer> createPoolingLayerFromCaffe(LayerParams ¶ms)
|
||||||
{
|
{
|
||||||
int type;
|
int type;
|
||||||
Size kernel, pad, stride;
|
Size kernel, stride, pad;
|
||||||
|
|
||||||
if (params.has("pool"))
|
if (params.has("pool"))
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -42,6 +42,8 @@
|
|||||||
#include "../precomp.hpp"
|
#include "../precomp.hpp"
|
||||||
#include "layers_common.hpp"
|
#include "layers_common.hpp"
|
||||||
#include "softmax_layer.hpp"
|
#include "softmax_layer.hpp"
|
||||||
|
#include <opencv2/core/ocl.hpp>
|
||||||
|
#include "modules/dnn/opencl_kernels_dnn.hpp"
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
using std::max;
|
using std::max;
|
||||||
@@ -50,38 +52,111 @@ namespace cv
|
|||||||
{
|
{
|
||||||
namespace dnn
|
namespace dnn
|
||||||
{
|
{
|
||||||
//TODO: set default axis number to 1, and add custom shape length in FullyConnected
|
|
||||||
SoftMaxLayer::SoftMaxLayer(LayerParams ¶ms) : Layer(params)
|
SoftMaxLayerImpl::SoftMaxLayerImpl(int axis)
|
||||||
{
|
{
|
||||||
//hotfix!!!
|
axisRaw = axis;
|
||||||
axis_ = params.get<int>("axis", 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void SoftMaxLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
|
void SoftMaxLayerImpl::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
|
||||||
{
|
{
|
||||||
CV_Assert(inputs.size() == 1);
|
CV_Assert(inputs.size() == 1);
|
||||||
axis = inputs[0]->canonicalAxis(axis_);
|
axis = inputs[0]->canonicalAxis(axisRaw);
|
||||||
|
|
||||||
|
useOpenCL = ocl::useOpenCL();
|
||||||
|
|
||||||
BlobShape shape = inputs[0]->shape();
|
BlobShape shape = inputs[0]->shape();
|
||||||
outputs.resize(1);
|
outerSize = shape.total(0, axis);
|
||||||
outputs[0].create(shape);
|
channels = shape[axis];
|
||||||
|
innerSize = shape.total(axis + 1);
|
||||||
|
|
||||||
|
int allocFlag = (useOpenCL) ? Blob::ALLOC_UMAT : Blob::ALLOC_MAT;
|
||||||
shape[axis] = 1;
|
shape[axis] = 1;
|
||||||
maxAggregator.create(shape);
|
buf.create(shape, inputs[0]->type(), allocFlag);
|
||||||
|
|
||||||
|
outputs.resize(1);
|
||||||
|
outputs[0].create(inputs[0]->shape(), inputs[0]->type(), allocFlag);
|
||||||
}
|
}
|
||||||
|
|
||||||
void SoftMaxLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
|
void SoftMaxLayerImpl::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
|
||||||
{
|
{
|
||||||
Blob &src = *inputs[0];
|
Blob &src = *inputs[0];
|
||||||
Blob &dst = outputs[0];
|
Blob &dst = outputs[0];
|
||||||
|
|
||||||
|
if (!useOpenCL)
|
||||||
|
forward_cpu(src, dst);
|
||||||
|
else
|
||||||
|
{
|
||||||
|
CV_Assert(forward_ocl(src, dst));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_OPENCL
|
||||||
|
bool SoftMaxLayerImpl::forward_ocl(Blob &src, Blob &dst)
|
||||||
|
{
|
||||||
|
const UMat &srcMat = src.umatRefConst();
|
||||||
|
UMat &dstMat = dst.umatRef();
|
||||||
|
srcMat.copyTo(dstMat);
|
||||||
|
UMat &bufMat = buf.umatRef();
|
||||||
|
CV_Assert(dstMat.offset == 0);
|
||||||
|
|
||||||
|
String buildOpts = String("-DT=") + ocl::typeToStr(src.type());
|
||||||
|
ocl::Kernel kmax, ksub, ksum, kdiv;
|
||||||
|
|
||||||
|
if (!kmax.create("kernel_channel_max", ocl::dnn::softmax_oclsrc, buildOpts))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (!ksub.create("kernel_channel_subtract", ocl::dnn::softmax_oclsrc, buildOpts))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (!ksum.create("kernel_channel_sum", ocl::dnn::softmax_oclsrc, buildOpts))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (!kdiv.create("kernel_channel_div", ocl::dnn::softmax_oclsrc, buildOpts))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
size_t wgSize = ocl::Device::getDefault().maxWorkGroupSize();
|
||||||
|
size_t bufSize = buf.total();
|
||||||
|
size_t totalSize = src.total();
|
||||||
|
|
||||||
|
kmax.args((int)outerSize, (int)channels, (int)innerSize,
|
||||||
|
ocl::KernelArg::PtrReadOnly(dstMat), ocl::KernelArg::PtrReadWrite(bufMat));
|
||||||
|
if (!kmax.run(1, &bufSize, &wgSize, true))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
ksub.args((int)totalSize, (int)outerSize, (int)channels, (int)innerSize,
|
||||||
|
ocl::KernelArg::PtrReadOnly(bufMat), ocl::KernelArg::PtrReadWrite(dstMat));
|
||||||
|
if (!ksub.run(1, &totalSize, &wgSize, true))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
cv::exp(dstMat, dstMat);
|
||||||
|
|
||||||
|
ksum.args((int)outerSize, (int)channels, (int)innerSize,
|
||||||
|
ocl::KernelArg::PtrReadOnly(dstMat), ocl::KernelArg::PtrReadWrite(bufMat));
|
||||||
|
if (!ksum.run(1, &bufSize, &wgSize, true))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
kdiv.args((int)totalSize, (int)outerSize, (int)channels, (int)innerSize,
|
||||||
|
ocl::KernelArg::PtrReadOnly(bufMat), ocl::KernelArg::PtrReadWrite(dstMat));
|
||||||
|
if (!kdiv.run(1, &totalSize, &wgSize, true))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
bool SoftMaxLayerImpl::forward_ocl(Blob&, Blob&)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void SoftMaxLayerImpl::forward_cpu(Blob &src, Blob &dst)
|
||||||
|
{
|
||||||
|
CV_Assert(src.type() == CV_32F);
|
||||||
|
|
||||||
float *srcPtr = src.ptrf();
|
float *srcPtr = src.ptrf();
|
||||||
float *dstPtr = dst.ptrf();
|
float *dstPtr = dst.ptrf();
|
||||||
float *bufPtr = maxAggregator.ptrf();
|
float *bufPtr = buf.ptrf();
|
||||||
|
|
||||||
size_t outerSize = src.total(0, axis);
|
|
||||||
size_t channels = src.size(axis);
|
|
||||||
size_t innerSize = src.total(axis + 1);
|
|
||||||
|
|
||||||
size_t outerStep = src.total(axis);
|
size_t outerStep = src.total(axis);
|
||||||
size_t cnStep = src.total(axis + 1);
|
size_t cnStep = src.total(axis + 1);
|
||||||
@@ -140,5 +215,16 @@ namespace dnn
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Ptr<SoftmaxLayer> SoftmaxLayer::create(int axis)
|
||||||
|
{
|
||||||
|
return Ptr<SoftmaxLayer>(new SoftMaxLayerImpl(axis));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ptr<Layer> createSoftmaxLayerFromCaffe(LayerParams ¶ms)
|
||||||
|
{
|
||||||
|
int axis = params.get<int>("axis", 1);
|
||||||
|
return Ptr<Layer>(SoftmaxLayer::create(axis));
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -42,21 +42,33 @@
|
|||||||
#ifndef __OPENCV_DNN_LAYERS_SOFTMAX_LAYER_HPP__
|
#ifndef __OPENCV_DNN_LAYERS_SOFTMAX_LAYER_HPP__
|
||||||
#define __OPENCV_DNN_LAYERS_SOFTMAX_LAYER_HPP__
|
#define __OPENCV_DNN_LAYERS_SOFTMAX_LAYER_HPP__
|
||||||
#include "../precomp.hpp"
|
#include "../precomp.hpp"
|
||||||
|
#include <opencv2/dnn/all_layers.hpp>
|
||||||
|
|
||||||
namespace cv
|
namespace cv
|
||||||
{
|
{
|
||||||
namespace dnn
|
namespace dnn
|
||||||
{
|
{
|
||||||
class SoftMaxLayer : public Layer
|
|
||||||
|
class SoftMaxLayerImpl : public SoftmaxLayer
|
||||||
{
|
{
|
||||||
int axis_, axis;
|
int axis, axisRaw;
|
||||||
Blob maxAggregator;
|
Blob buf;
|
||||||
|
bool useOpenCL;
|
||||||
|
size_t outerSize, channels, innerSize;
|
||||||
|
|
||||||
|
|
||||||
|
bool forward_ocl(Blob &src, Blob &dst);
|
||||||
|
void forward_cpu(Blob &src, Blob &dst);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
SoftMaxLayer(LayerParams ¶ms);
|
|
||||||
|
SoftMaxLayerImpl(int axis = 1);
|
||||||
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
|
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
|
||||||
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
|
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Ptr<Layer> createSoftmaxLayerFromCaffe(LayerParams ¶ms);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
75
modules/dnn/src/opencl/softmax.cl
Normal file
75
modules/dnn/src/opencl/softmax.cl
Normal file
@@ -0,0 +1,75 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
* Copyright (c) 2015, Advanced Micro Devices, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
* are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||||
|
* list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation and/or
|
||||||
|
* other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||||
|
* IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||||
|
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||||
|
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
|
||||||
|
* OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||||
|
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**************************************************************************************/
|
||||||
|
|
||||||
|
__kernel void kernel_channel_max(const int num, const int channels,
|
||||||
|
const int spatial_dim, __global const T* data, __global T* out) {
|
||||||
|
int index = get_global_id(0);
|
||||||
|
if(index < num * spatial_dim) {
|
||||||
|
int n = index / spatial_dim;
|
||||||
|
int s = index % spatial_dim;
|
||||||
|
T maxval = -FLT_MAX;
|
||||||
|
for (int c = 0; c < channels; ++c) {
|
||||||
|
maxval = max(data[(n * channels + c) * spatial_dim + s], maxval);
|
||||||
|
}
|
||||||
|
out[index] = maxval;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__kernel void kernel_channel_subtract(const int count,
|
||||||
|
const int num, const int channels,
|
||||||
|
const int spatial_dim, __global const T* channel_max, __global T* data) {
|
||||||
|
int index = get_global_id(0);
|
||||||
|
if(index < count) {
|
||||||
|
int n = index / channels / spatial_dim;
|
||||||
|
int s = index % spatial_dim;
|
||||||
|
data[index] -= channel_max[n * spatial_dim + s];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__kernel void kernel_channel_sum(const int num, const int channels,
|
||||||
|
const int spatial_dim, __global const T* data, __global T* channel_sum) {
|
||||||
|
int index = get_global_id(0);
|
||||||
|
if(index < num * spatial_dim) {
|
||||||
|
int n = index / spatial_dim;
|
||||||
|
int s = index % spatial_dim;
|
||||||
|
T sum = 0;
|
||||||
|
for (int c = 0; c < channels; ++c) {
|
||||||
|
sum += data[(n * channels + c) * spatial_dim + s];
|
||||||
|
}
|
||||||
|
channel_sum[index] = sum;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
__kernel void kernel_channel_div(const int count,
|
||||||
|
const int num, const int channels,
|
||||||
|
const int spatial_dim, __global const T* channel_sum, __global T* data) {
|
||||||
|
int index = get_global_id(0);
|
||||||
|
if(index < count) {
|
||||||
|
int n = index / channels / spatial_dim;
|
||||||
|
int s = index % spatial_dim;
|
||||||
|
data[index] /= channel_sum[n * spatial_dim + s];
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -87,7 +87,7 @@ void testLayerUsingCaffeModels(String basename, bool useCaffeModel = false, bool
|
|||||||
|
|
||||||
TEST(Layer_Test_Softmax, Accuracy)
|
TEST(Layer_Test_Softmax, Accuracy)
|
||||||
{
|
{
|
||||||
testLayerUsingCaffeModels("layer_softmax");
|
OCL_OFF(testLayerUsingCaffeModels("layer_softmax"));
|
||||||
}
|
}
|
||||||
OCL_TEST(Layer_Test_Softmax, Accuracy)
|
OCL_TEST(Layer_Test_Softmax, Accuracy)
|
||||||
{
|
{
|
||||||
|
|||||||
Reference in New Issue
Block a user