Adding of OCL and public interface for Softmax layer

2025-10-24 20:01:12 +08:00 · 2016-07-27 19:22:16 +03:00
parent d8507fef74
commit bf9e9b81ac
7 changed files with 271 additions and 91 deletions
--- a/modules/dnn/include/opencv2/dnn/all_layers.hpp
+++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp
@@ -260,6 +260,13 @@ namespace dnn
        static Ptr<PoolingLayer> create(int type = MAX, Size kernel = Size(2, 2), Size stride = Size(1, 1), Size pad = Size(0, 0));
    };
    class CV_EXPORTS_W SoftmaxLayer : public Layer
    {
    public:
        static Ptr<SoftmaxLayer> create(int axis = 1);
    };
 //! @}
 //! @}
--- a/modules/dnn/src/init.cpp
+++ b/modules/dnn/src/init.cpp
@@ -77,7 +77,7 @@ void initModule()
        return;
    REG_RUNTIME_LAYER_CLASS(Slice, SliceLayer)
-    REG_RUNTIME_LAYER_CLASS(Softmax, SoftMaxLayer)
+    REG_STATIC_LAYER_FUNC(Softmax, createSoftmaxLayerFromCaffe)
    REG_RUNTIME_LAYER_CLASS(Split, SplitLayer)
    REG_RUNTIME_LAYER_CLASS(Reshape, ReshapeLayer)
    REG_STATIC_LAYER_FUNC(Flatten, createFlattenLayer)
--- a/modules/dnn/src/layers/pooling_layer.cpp
+++ b/modules/dnn/src/layers/pooling_layer.cpp
@@ -269,7 +269,7 @@ Ptr<PoolingLayer> PoolingLayer::create(int type, Size kernel, Size stride, Size
 Ptr<Layer> createPoolingLayerFromCaffe(LayerParams &params)
 {
    int type;
-    Size kernel, pad, stride;
+    Size kernel, stride, pad;
    if (params.has("pool"))
    {
--- a/modules/dnn/src/layers/softmax_layer.cpp
+++ b/modules/dnn/src/layers/softmax_layer.cpp
@@ -42,6 +42,8 @@
 #include "../precomp.hpp"
 #include "layers_common.hpp"
 #include "softmax_layer.hpp"
 #include <opencv2/core/ocl.hpp>
 #include "modules/dnn/opencl_kernels_dnn.hpp"
 #include <algorithm>
 #include <stdlib.h>
 using std::max;
@@ -50,38 +52,111 @@ namespace cv
 {
 namespace dnn
 {
-    //TODO: set default axis number to 1, and add custom shape length in FullyConnected
+
-    SoftMaxLayer::SoftMaxLayer(LayerParams &params) : Layer(params)
+SoftMaxLayerImpl::SoftMaxLayerImpl(int axis)
 {
-        //hotfix!!!
+    axisRaw = axis;
        axis_ = params.get<int>("axis", 1);
 }
-    void SoftMaxLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
+void SoftMaxLayerImpl::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
 {
    CV_Assert(inputs.size() == 1);
-        axis = inputs[0]->canonicalAxis(axis_);
+    axis = inputs[0]->canonicalAxis(axisRaw);
    useOpenCL = ocl::useOpenCL();
    BlobShape shape = inputs[0]->shape();
-        outputs.resize(1);
+    outerSize = shape.total(0, axis);
-        outputs[0].create(shape);
+    channels = shape[axis];
    innerSize = shape.total(axis + 1);
    int allocFlag = (useOpenCL) ? Blob::ALLOC_UMAT : Blob::ALLOC_MAT;
    shape[axis] = 1;
-        maxAggregator.create(shape);
+    buf.create(shape, inputs[0]->type(), allocFlag);
    outputs.resize(1);
    outputs[0].create(inputs[0]->shape(), inputs[0]->type(), allocFlag);
 }
-    void SoftMaxLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
+void SoftMaxLayerImpl::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
 {
    Blob &src = *inputs[0];
    Blob &dst = outputs[0];
    if (!useOpenCL)
        forward_cpu(src, dst);
    else
    {
        CV_Assert(forward_ocl(src, dst));
    }
 }
 #ifdef HAVE_OPENCL
 bool SoftMaxLayerImpl::forward_ocl(Blob &src, Blob &dst)
 {
    const UMat &srcMat = src.umatRefConst();
    UMat &dstMat = dst.umatRef();
    srcMat.copyTo(dstMat);
    UMat &bufMat = buf.umatRef();
    CV_Assert(dstMat.offset == 0);
    String buildOpts = String("-DT=") + ocl::typeToStr(src.type());
    ocl::Kernel kmax, ksub, ksum, kdiv;
    if (!kmax.create("kernel_channel_max", ocl::dnn::softmax_oclsrc, buildOpts))
        return false;
    if (!ksub.create("kernel_channel_subtract", ocl::dnn::softmax_oclsrc, buildOpts))
        return false;
    if (!ksum.create("kernel_channel_sum", ocl::dnn::softmax_oclsrc, buildOpts))
        return false;
    if (!kdiv.create("kernel_channel_div", ocl::dnn::softmax_oclsrc, buildOpts))
        return false;
    size_t wgSize = ocl::Device::getDefault().maxWorkGroupSize();
    size_t bufSize = buf.total();
    size_t totalSize = src.total();
    kmax.args((int)outerSize, (int)channels, (int)innerSize,
              ocl::KernelArg::PtrReadOnly(dstMat), ocl::KernelArg::PtrReadWrite(bufMat));
    if (!kmax.run(1, &bufSize, &wgSize, true))
        return false;
    ksub.args((int)totalSize, (int)outerSize, (int)channels, (int)innerSize,
              ocl::KernelArg::PtrReadOnly(bufMat), ocl::KernelArg::PtrReadWrite(dstMat));
    if (!ksub.run(1, &totalSize, &wgSize, true))
        return false;
    cv::exp(dstMat, dstMat);
    ksum.args((int)outerSize, (int)channels, (int)innerSize,
              ocl::KernelArg::PtrReadOnly(dstMat), ocl::KernelArg::PtrReadWrite(bufMat));
    if (!ksum.run(1, &bufSize, &wgSize, true))
        return false;
    kdiv.args((int)totalSize, (int)outerSize, (int)channels, (int)innerSize,
              ocl::KernelArg::PtrReadOnly(bufMat), ocl::KernelArg::PtrReadWrite(dstMat));
    if (!kdiv.run(1, &totalSize, &wgSize, true))
        return false;
    return true;
 }
 #else
 bool SoftMaxLayerImpl::forward_ocl(Blob&, Blob&)
 {
    return false;
 }
 #endif
 void SoftMaxLayerImpl::forward_cpu(Blob &src, Blob &dst)
 {
    CV_Assert(src.type() == CV_32F);
    float *srcPtr = src.ptrf();
    float *dstPtr = dst.ptrf();
-        float *bufPtr = maxAggregator.ptrf();
+    float *bufPtr = buf.ptrf();
        size_t outerSize = src.total(0, axis);
        size_t channels = src.size(axis);
        size_t innerSize = src.total(axis + 1);
    size_t outerStep = src.total(axis);
    size_t cnStep = src.total(axis + 1);
@@ -140,5 +215,16 @@ namespace dnn
    }
 }
 Ptr<SoftmaxLayer> SoftmaxLayer::create(int axis)
 {
    return Ptr<SoftmaxLayer>(new SoftMaxLayerImpl(axis));
 }
 Ptr<Layer> createSoftmaxLayerFromCaffe(LayerParams &params)
 {
    int axis = params.get<int>("axis", 1);
    return Ptr<Layer>(SoftmaxLayer::create(axis));
 }
 }
 }
--- a/modules/dnn/src/layers/softmax_layer.hpp
+++ b/modules/dnn/src/layers/softmax_layer.hpp
@@ -42,21 +42,33 @@
 #ifndef __OPENCV_DNN_LAYERS_SOFTMAX_LAYER_HPP__
 #define __OPENCV_DNN_LAYERS_SOFTMAX_LAYER_HPP__
 #include "../precomp.hpp"
 #include <opencv2/dnn/all_layers.hpp>
 namespace cv
 {
 namespace dnn
 {
-    class SoftMaxLayer : public Layer
+
 class SoftMaxLayerImpl : public SoftmaxLayer
 {
-        int axis_, axis;
+    int axis, axisRaw;
-        Blob maxAggregator;
+    Blob buf;
    bool useOpenCL;
    size_t outerSize, channels, innerSize;
    bool forward_ocl(Blob &src, Blob &dst);
    void forward_cpu(Blob &src, Blob &dst);
 public:
-        SoftMaxLayer(LayerParams &params);
+
    SoftMaxLayerImpl(int axis = 1);
    void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
    void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
 };
 Ptr<Layer> createSoftmaxLayerFromCaffe(LayerParams &params);
 }
 }
 #endif
--- a/modules/dnn/src/opencl/softmax.cl
+++ b/modules/dnn/src/opencl/softmax.cl
@@ -0,0 +1,75 @@
 /*************************************************************************************
 * Copyright (c) 2015, Advanced Micro Devices, Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modification,
 * are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice, this
 * list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 * this list of conditions and the following disclaimer in the documentation and/or
 *  other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 **************************************************************************************/
 __kernel void kernel_channel_max(const int num, const int channels,
    const int spatial_dim, __global const T* data, __global T* out) {
  int index = get_global_id(0);
  if(index < num * spatial_dim) {
    int n = index / spatial_dim;
    int s = index % spatial_dim;
    T maxval = -FLT_MAX;
    for (int c = 0; c < channels; ++c) {
      maxval = max(data[(n * channels + c) * spatial_dim + s], maxval);
    }
    out[index] = maxval;
  }
 }
 __kernel void kernel_channel_subtract(const int count,
    const int num, const int channels,
    const int spatial_dim, __global const T* channel_max, __global T* data) {
  int index = get_global_id(0);
  if(index < count) {
    int n = index / channels / spatial_dim;
    int s = index % spatial_dim;
    data[index] -= channel_max[n * spatial_dim + s];
  }
 }
 __kernel void kernel_channel_sum(const int num, const int channels,
    const int spatial_dim, __global const T* data, __global T* channel_sum) {
  int index = get_global_id(0);
  if(index < num * spatial_dim) {
    int n = index / spatial_dim;
    int s = index % spatial_dim;
    T sum = 0;
    for (int c = 0; c < channels; ++c) {
      sum += data[(n * channels + c) * spatial_dim + s];
    }
    channel_sum[index] = sum;
  }
 }
 __kernel void kernel_channel_div(const int count,
    const int num, const int channels,
    const int spatial_dim, __global const T* channel_sum, __global T* data) {
  int index = get_global_id(0);
  if(index < count) {
    int n = index / channels / spatial_dim;
    int s = index % spatial_dim;
    data[index] /= channel_sum[n * spatial_dim + s];
  }
 }
--- a/modules/dnn/test/test_layers.cpp
+++ b/modules/dnn/test/test_layers.cpp
@@ -87,7 +87,7 @@ void testLayerUsingCaffeModels(String basename, bool useCaffeModel = false, bool
 TEST(Layer_Test_Softmax, Accuracy)
 {
-     testLayerUsingCaffeModels("layer_softmax");
+     OCL_OFF(testLayerUsingCaffeModels("layer_softmax"));
 }
 OCL_TEST(Layer_Test_Softmax, Accuracy)
 {