mirror of
https://github.com/opencv/opencv_contrib.git
synced 2025-10-23 00:49:38 +08:00
Adding of OCL and public interface for Softmax layer
This commit is contained in:
@@ -260,6 +260,13 @@ namespace dnn
|
||||
static Ptr<PoolingLayer> create(int type = MAX, Size kernel = Size(2, 2), Size stride = Size(1, 1), Size pad = Size(0, 0));
|
||||
};
|
||||
|
||||
class CV_EXPORTS_W SoftmaxLayer : public Layer
|
||||
{
|
||||
public:
|
||||
|
||||
static Ptr<SoftmaxLayer> create(int axis = 1);
|
||||
};
|
||||
|
||||
//! @}
|
||||
//! @}
|
||||
|
||||
|
@@ -77,7 +77,7 @@ void initModule()
|
||||
return;
|
||||
|
||||
REG_RUNTIME_LAYER_CLASS(Slice, SliceLayer)
|
||||
REG_RUNTIME_LAYER_CLASS(Softmax, SoftMaxLayer)
|
||||
REG_STATIC_LAYER_FUNC(Softmax, createSoftmaxLayerFromCaffe)
|
||||
REG_RUNTIME_LAYER_CLASS(Split, SplitLayer)
|
||||
REG_RUNTIME_LAYER_CLASS(Reshape, ReshapeLayer)
|
||||
REG_STATIC_LAYER_FUNC(Flatten, createFlattenLayer)
|
||||
|
@@ -269,7 +269,7 @@ Ptr<PoolingLayer> PoolingLayer::create(int type, Size kernel, Size stride, Size
|
||||
Ptr<Layer> createPoolingLayerFromCaffe(LayerParams ¶ms)
|
||||
{
|
||||
int type;
|
||||
Size kernel, pad, stride;
|
||||
Size kernel, stride, pad;
|
||||
|
||||
if (params.has("pool"))
|
||||
{
|
||||
|
@@ -42,6 +42,8 @@
|
||||
#include "../precomp.hpp"
|
||||
#include "layers_common.hpp"
|
||||
#include "softmax_layer.hpp"
|
||||
#include <opencv2/core/ocl.hpp>
|
||||
#include "modules/dnn/opencl_kernels_dnn.hpp"
|
||||
#include <algorithm>
|
||||
#include <stdlib.h>
|
||||
using std::max;
|
||||
@@ -50,38 +52,111 @@ namespace cv
|
||||
{
|
||||
namespace dnn
|
||||
{
|
||||
//TODO: set default axis number to 1, and add custom shape length in FullyConnected
|
||||
SoftMaxLayer::SoftMaxLayer(LayerParams ¶ms) : Layer(params)
|
||||
|
||||
SoftMaxLayerImpl::SoftMaxLayerImpl(int axis)
|
||||
{
|
||||
//hotfix!!!
|
||||
axis_ = params.get<int>("axis", 1);
|
||||
axisRaw = axis;
|
||||
}
|
||||
|
||||
void SoftMaxLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
|
||||
void SoftMaxLayerImpl::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
|
||||
{
|
||||
CV_Assert(inputs.size() == 1);
|
||||
axis = inputs[0]->canonicalAxis(axis_);
|
||||
axis = inputs[0]->canonicalAxis(axisRaw);
|
||||
|
||||
useOpenCL = ocl::useOpenCL();
|
||||
|
||||
BlobShape shape = inputs[0]->shape();
|
||||
outputs.resize(1);
|
||||
outputs[0].create(shape);
|
||||
outerSize = shape.total(0, axis);
|
||||
channels = shape[axis];
|
||||
innerSize = shape.total(axis + 1);
|
||||
|
||||
int allocFlag = (useOpenCL) ? Blob::ALLOC_UMAT : Blob::ALLOC_MAT;
|
||||
shape[axis] = 1;
|
||||
maxAggregator.create(shape);
|
||||
buf.create(shape, inputs[0]->type(), allocFlag);
|
||||
|
||||
outputs.resize(1);
|
||||
outputs[0].create(inputs[0]->shape(), inputs[0]->type(), allocFlag);
|
||||
}
|
||||
|
||||
void SoftMaxLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
|
||||
void SoftMaxLayerImpl::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
|
||||
{
|
||||
Blob &src = *inputs[0];
|
||||
Blob &dst = outputs[0];
|
||||
|
||||
if (!useOpenCL)
|
||||
forward_cpu(src, dst);
|
||||
else
|
||||
{
|
||||
CV_Assert(forward_ocl(src, dst));
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
bool SoftMaxLayerImpl::forward_ocl(Blob &src, Blob &dst)
|
||||
{
|
||||
const UMat &srcMat = src.umatRefConst();
|
||||
UMat &dstMat = dst.umatRef();
|
||||
srcMat.copyTo(dstMat);
|
||||
UMat &bufMat = buf.umatRef();
|
||||
CV_Assert(dstMat.offset == 0);
|
||||
|
||||
String buildOpts = String("-DT=") + ocl::typeToStr(src.type());
|
||||
ocl::Kernel kmax, ksub, ksum, kdiv;
|
||||
|
||||
if (!kmax.create("kernel_channel_max", ocl::dnn::softmax_oclsrc, buildOpts))
|
||||
return false;
|
||||
|
||||
if (!ksub.create("kernel_channel_subtract", ocl::dnn::softmax_oclsrc, buildOpts))
|
||||
return false;
|
||||
|
||||
if (!ksum.create("kernel_channel_sum", ocl::dnn::softmax_oclsrc, buildOpts))
|
||||
return false;
|
||||
|
||||
if (!kdiv.create("kernel_channel_div", ocl::dnn::softmax_oclsrc, buildOpts))
|
||||
return false;
|
||||
|
||||
size_t wgSize = ocl::Device::getDefault().maxWorkGroupSize();
|
||||
size_t bufSize = buf.total();
|
||||
size_t totalSize = src.total();
|
||||
|
||||
kmax.args((int)outerSize, (int)channels, (int)innerSize,
|
||||
ocl::KernelArg::PtrReadOnly(dstMat), ocl::KernelArg::PtrReadWrite(bufMat));
|
||||
if (!kmax.run(1, &bufSize, &wgSize, true))
|
||||
return false;
|
||||
|
||||
ksub.args((int)totalSize, (int)outerSize, (int)channels, (int)innerSize,
|
||||
ocl::KernelArg::PtrReadOnly(bufMat), ocl::KernelArg::PtrReadWrite(dstMat));
|
||||
if (!ksub.run(1, &totalSize, &wgSize, true))
|
||||
return false;
|
||||
|
||||
cv::exp(dstMat, dstMat);
|
||||
|
||||
ksum.args((int)outerSize, (int)channels, (int)innerSize,
|
||||
ocl::KernelArg::PtrReadOnly(dstMat), ocl::KernelArg::PtrReadWrite(bufMat));
|
||||
if (!ksum.run(1, &bufSize, &wgSize, true))
|
||||
return false;
|
||||
|
||||
kdiv.args((int)totalSize, (int)outerSize, (int)channels, (int)innerSize,
|
||||
ocl::KernelArg::PtrReadOnly(bufMat), ocl::KernelArg::PtrReadWrite(dstMat));
|
||||
if (!kdiv.run(1, &totalSize, &wgSize, true))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
#else
|
||||
bool SoftMaxLayerImpl::forward_ocl(Blob&, Blob&)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
void SoftMaxLayerImpl::forward_cpu(Blob &src, Blob &dst)
|
||||
{
|
||||
CV_Assert(src.type() == CV_32F);
|
||||
|
||||
float *srcPtr = src.ptrf();
|
||||
float *dstPtr = dst.ptrf();
|
||||
float *bufPtr = maxAggregator.ptrf();
|
||||
|
||||
size_t outerSize = src.total(0, axis);
|
||||
size_t channels = src.size(axis);
|
||||
size_t innerSize = src.total(axis + 1);
|
||||
float *bufPtr = buf.ptrf();
|
||||
|
||||
size_t outerStep = src.total(axis);
|
||||
size_t cnStep = src.total(axis + 1);
|
||||
@@ -140,5 +215,16 @@ namespace dnn
|
||||
}
|
||||
}
|
||||
|
||||
Ptr<SoftmaxLayer> SoftmaxLayer::create(int axis)
|
||||
{
|
||||
return Ptr<SoftmaxLayer>(new SoftMaxLayerImpl(axis));
|
||||
}
|
||||
|
||||
Ptr<Layer> createSoftmaxLayerFromCaffe(LayerParams ¶ms)
|
||||
{
|
||||
int axis = params.get<int>("axis", 1);
|
||||
return Ptr<Layer>(SoftmaxLayer::create(axis));
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
@@ -42,21 +42,33 @@
|
||||
#ifndef __OPENCV_DNN_LAYERS_SOFTMAX_LAYER_HPP__
|
||||
#define __OPENCV_DNN_LAYERS_SOFTMAX_LAYER_HPP__
|
||||
#include "../precomp.hpp"
|
||||
#include <opencv2/dnn/all_layers.hpp>
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace dnn
|
||||
{
|
||||
class SoftMaxLayer : public Layer
|
||||
|
||||
class SoftMaxLayerImpl : public SoftmaxLayer
|
||||
{
|
||||
int axis_, axis;
|
||||
Blob maxAggregator;
|
||||
int axis, axisRaw;
|
||||
Blob buf;
|
||||
bool useOpenCL;
|
||||
size_t outerSize, channels, innerSize;
|
||||
|
||||
|
||||
bool forward_ocl(Blob &src, Blob &dst);
|
||||
void forward_cpu(Blob &src, Blob &dst);
|
||||
|
||||
public:
|
||||
SoftMaxLayer(LayerParams ¶ms);
|
||||
|
||||
SoftMaxLayerImpl(int axis = 1);
|
||||
void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
|
||||
void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
|
||||
};
|
||||
|
||||
Ptr<Layer> createSoftmaxLayerFromCaffe(LayerParams ¶ms);
|
||||
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
75
modules/dnn/src/opencl/softmax.cl
Normal file
75
modules/dnn/src/opencl/softmax.cl
Normal file
@@ -0,0 +1,75 @@
|
||||
/*************************************************************************************
|
||||
* Copyright (c) 2015, Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification,
|
||||
* are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation and/or
|
||||
* other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
|
||||
* OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
**************************************************************************************/
|
||||
|
||||
__kernel void kernel_channel_max(const int num, const int channels,
|
||||
const int spatial_dim, __global const T* data, __global T* out) {
|
||||
int index = get_global_id(0);
|
||||
if(index < num * spatial_dim) {
|
||||
int n = index / spatial_dim;
|
||||
int s = index % spatial_dim;
|
||||
T maxval = -FLT_MAX;
|
||||
for (int c = 0; c < channels; ++c) {
|
||||
maxval = max(data[(n * channels + c) * spatial_dim + s], maxval);
|
||||
}
|
||||
out[index] = maxval;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void kernel_channel_subtract(const int count,
|
||||
const int num, const int channels,
|
||||
const int spatial_dim, __global const T* channel_max, __global T* data) {
|
||||
int index = get_global_id(0);
|
||||
if(index < count) {
|
||||
int n = index / channels / spatial_dim;
|
||||
int s = index % spatial_dim;
|
||||
data[index] -= channel_max[n * spatial_dim + s];
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void kernel_channel_sum(const int num, const int channels,
|
||||
const int spatial_dim, __global const T* data, __global T* channel_sum) {
|
||||
int index = get_global_id(0);
|
||||
if(index < num * spatial_dim) {
|
||||
int n = index / spatial_dim;
|
||||
int s = index % spatial_dim;
|
||||
T sum = 0;
|
||||
for (int c = 0; c < channels; ++c) {
|
||||
sum += data[(n * channels + c) * spatial_dim + s];
|
||||
}
|
||||
channel_sum[index] = sum;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void kernel_channel_div(const int count,
|
||||
const int num, const int channels,
|
||||
const int spatial_dim, __global const T* channel_sum, __global T* data) {
|
||||
int index = get_global_id(0);
|
||||
if(index < count) {
|
||||
int n = index / channels / spatial_dim;
|
||||
int s = index % spatial_dim;
|
||||
data[index] /= channel_sum[n * spatial_dim + s];
|
||||
}
|
||||
}
|
@@ -87,7 +87,7 @@ void testLayerUsingCaffeModels(String basename, bool useCaffeModel = false, bool
|
||||
|
||||
TEST(Layer_Test_Softmax, Accuracy)
|
||||
{
|
||||
testLayerUsingCaffeModels("layer_softmax");
|
||||
OCL_OFF(testLayerUsingCaffeModels("layer_softmax"));
|
||||
}
|
||||
OCL_TEST(Layer_Test_Softmax, Accuracy)
|
||||
{
|
||||
|
Reference in New Issue
Block a user