Adding of OCL and public interface for Softmax layer

2025-10-23 00:49:38 +08:00 · 2016-07-27 19:22:16 +03:00
parent d8507fef74
commit bf9e9b81ac
7 changed files with 271 additions and 91 deletions
--- a/modules/dnn/include/opencv2/dnn/all_layers.hpp
+++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp
@@ -260,6 +260,13 @@ namespace dnn
        static Ptr<PoolingLayer> create(int type = MAX, Size kernel = Size(2, 2), Size stride = Size(1, 1), Size pad = Size(0, 0));
    };

+    class CV_EXPORTS_W SoftmaxLayer : public Layer
+    {
+    public:
+
+        static Ptr<SoftmaxLayer> create(int axis = 1);
+    };
+
 //! @}
 //! @}

--- a/modules/dnn/src/init.cpp
+++ b/modules/dnn/src/init.cpp
@@ -77,7 +77,7 @@ void initModule()
        return;

    REG_RUNTIME_LAYER_CLASS(Slice, SliceLayer)
-    REG_RUNTIME_LAYER_CLASS(Softmax, SoftMaxLayer)
+    REG_STATIC_LAYER_FUNC(Softmax, createSoftmaxLayerFromCaffe)
    REG_RUNTIME_LAYER_CLASS(Split, SplitLayer)
    REG_RUNTIME_LAYER_CLASS(Reshape, ReshapeLayer)
    REG_STATIC_LAYER_FUNC(Flatten, createFlattenLayer)
--- a/modules/dnn/src/layers/pooling_layer.cpp
+++ b/modules/dnn/src/layers/pooling_layer.cpp
@@ -269,7 +269,7 @@ Ptr<PoolingLayer> PoolingLayer::create(int type, Size kernel, Size stride, Size
 Ptr<Layer> createPoolingLayerFromCaffe(LayerParams &params)
 {
    int type;
-    Size kernel, pad, stride;
+    Size kernel, stride, pad;

    if (params.has("pool"))
    {
--- a/modules/dnn/src/layers/softmax_layer.cpp
+++ b/modules/dnn/src/layers/softmax_layer.cpp
@@ -42,6 +42,8 @@
 #include "../precomp.hpp"
 #include "layers_common.hpp"
 #include "softmax_layer.hpp"
+#include <opencv2/core/ocl.hpp>
+#include "modules/dnn/opencl_kernels_dnn.hpp"
 #include <algorithm>
 #include <stdlib.h>
 using std::max;
@@ -50,38 +52,111 @@ namespace cv
 {
 namespace dnn
 {
-    //TODO: set default axis number to 1, and add custom shape length in FullyConnected
-    SoftMaxLayer::SoftMaxLayer(LayerParams &params) : Layer(params)
+
+SoftMaxLayerImpl::SoftMaxLayerImpl(int axis)
 {
-        //hotfix!!!
-        axis_ = params.get<int>("axis", 1);
+    axisRaw = axis;
 }

-    void SoftMaxLayer::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
+void SoftMaxLayerImpl::allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
 {
    CV_Assert(inputs.size() == 1);
-        axis = inputs[0]->canonicalAxis(axis_);
+    axis = inputs[0]->canonicalAxis(axisRaw);
+
+    useOpenCL = ocl::useOpenCL();

    BlobShape shape = inputs[0]->shape();
-        outputs.resize(1);
-        outputs[0].create(shape);
+    outerSize = shape.total(0, axis);
+    channels = shape[axis];
+    innerSize = shape.total(axis + 1);

+    int allocFlag = (useOpenCL) ? Blob::ALLOC_UMAT : Blob::ALLOC_MAT;
    shape[axis] = 1;
-        maxAggregator.create(shape);
+    buf.create(shape, inputs[0]->type(), allocFlag);
+
+    outputs.resize(1);
+    outputs[0].create(inputs[0]->shape(), inputs[0]->type(), allocFlag);
 }

-    void SoftMaxLayer::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
+void SoftMaxLayerImpl::forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs)
 {
    Blob &src = *inputs[0];
    Blob &dst = outputs[0];

+    if (!useOpenCL)
+        forward_cpu(src, dst);
+    else
+    {
+        CV_Assert(forward_ocl(src, dst));
+    }
+}
+
+#ifdef HAVE_OPENCL
+bool SoftMaxLayerImpl::forward_ocl(Blob &src, Blob &dst)
+{
+    const UMat &srcMat = src.umatRefConst();
+    UMat &dstMat = dst.umatRef();
+    srcMat.copyTo(dstMat);
+    UMat &bufMat = buf.umatRef();
+    CV_Assert(dstMat.offset == 0);
+
+    String buildOpts = String("-DT=") + ocl::typeToStr(src.type());
+    ocl::Kernel kmax, ksub, ksum, kdiv;
+
+    if (!kmax.create("kernel_channel_max", ocl::dnn::softmax_oclsrc, buildOpts))
+        return false;
+
+    if (!ksub.create("kernel_channel_subtract", ocl::dnn::softmax_oclsrc, buildOpts))
+        return false;
+
+    if (!ksum.create("kernel_channel_sum", ocl::dnn::softmax_oclsrc, buildOpts))
+        return false;
+
+    if (!kdiv.create("kernel_channel_div", ocl::dnn::softmax_oclsrc, buildOpts))
+        return false;
+
+    size_t wgSize = ocl::Device::getDefault().maxWorkGroupSize();
+    size_t bufSize = buf.total();
+    size_t totalSize = src.total();
+
+    kmax.args((int)outerSize, (int)channels, (int)innerSize,
+              ocl::KernelArg::PtrReadOnly(dstMat), ocl::KernelArg::PtrReadWrite(bufMat));
+    if (!kmax.run(1, &bufSize, &wgSize, true))
+        return false;
+
+    ksub.args((int)totalSize, (int)outerSize, (int)channels, (int)innerSize,
+              ocl::KernelArg::PtrReadOnly(bufMat), ocl::KernelArg::PtrReadWrite(dstMat));
+    if (!ksub.run(1, &totalSize, &wgSize, true))
+        return false;
+
+    cv::exp(dstMat, dstMat);
+
+    ksum.args((int)outerSize, (int)channels, (int)innerSize,
+              ocl::KernelArg::PtrReadOnly(dstMat), ocl::KernelArg::PtrReadWrite(bufMat));
+    if (!ksum.run(1, &bufSize, &wgSize, true))
+        return false;
+
+    kdiv.args((int)totalSize, (int)outerSize, (int)channels, (int)innerSize,
+              ocl::KernelArg::PtrReadOnly(bufMat), ocl::KernelArg::PtrReadWrite(dstMat));
+    if (!kdiv.run(1, &totalSize, &wgSize, true))
+        return false;
+
+    return true;
+}
+#else
+bool SoftMaxLayerImpl::forward_ocl(Blob&, Blob&)
+{
+    return false;
+}
+#endif
+
+void SoftMaxLayerImpl::forward_cpu(Blob &src, Blob &dst)
+{
+    CV_Assert(src.type() == CV_32F);
+
    float *srcPtr = src.ptrf();
    float *dstPtr = dst.ptrf();
-        float *bufPtr = maxAggregator.ptrf();
-
-        size_t outerSize = src.total(0, axis);
-        size_t channels = src.size(axis);
-        size_t innerSize = src.total(axis + 1);
+    float *bufPtr = buf.ptrf();

    size_t outerStep = src.total(axis);
    size_t cnStep = src.total(axis + 1);
@@ -140,5 +215,16 @@ namespace dnn
    }
 }

+Ptr<SoftmaxLayer> SoftmaxLayer::create(int axis)
+{
+    return Ptr<SoftmaxLayer>(new SoftMaxLayerImpl(axis));
+}
+
+Ptr<Layer> createSoftmaxLayerFromCaffe(LayerParams &params)
+{
+    int axis = params.get<int>("axis", 1);
+    return Ptr<Layer>(SoftmaxLayer::create(axis));
+}
+
 }
 }
--- a/modules/dnn/src/layers/softmax_layer.hpp
+++ b/modules/dnn/src/layers/softmax_layer.hpp
@@ -42,21 +42,33 @@
 #ifndef __OPENCV_DNN_LAYERS_SOFTMAX_LAYER_HPP__
 #define __OPENCV_DNN_LAYERS_SOFTMAX_LAYER_HPP__
 #include "../precomp.hpp"
+#include <opencv2/dnn/all_layers.hpp>

 namespace cv
 {
 namespace dnn
 {
-    class SoftMaxLayer : public Layer
+
+class SoftMaxLayerImpl : public SoftmaxLayer
 {
-        int axis_, axis;
-        Blob maxAggregator;
+    int axis, axisRaw;
+    Blob buf;
+    bool useOpenCL;
+    size_t outerSize, channels, innerSize;
+
+
+    bool forward_ocl(Blob &src, Blob &dst);
+    void forward_cpu(Blob &src, Blob &dst);

 public:
-        SoftMaxLayer(LayerParams &params);
+
+    SoftMaxLayerImpl(int axis = 1);
    void allocate(const std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
    void forward(std::vector<Blob*> &inputs, std::vector<Blob> &outputs);
 };
+
+Ptr<Layer> createSoftmaxLayerFromCaffe(LayerParams &params);
+
 }
 }
 #endif
--- a/modules/dnn/src/opencl/softmax.cl
+++ b/modules/dnn/src/opencl/softmax.cl
@@ -0,0 +1,75 @@
+/*************************************************************************************
+ * Copyright (c) 2015, Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation and/or
+ *  other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
+ * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ **************************************************************************************/
+
+__kernel void kernel_channel_max(const int num, const int channels,
+    const int spatial_dim, __global const T* data, __global T* out) {
+  int index = get_global_id(0);
+  if(index < num * spatial_dim) {
+    int n = index / spatial_dim;
+    int s = index % spatial_dim;
+    T maxval = -FLT_MAX;
+    for (int c = 0; c < channels; ++c) {
+      maxval = max(data[(n * channels + c) * spatial_dim + s], maxval);
+    }
+    out[index] = maxval;
+  }
+}
+
+__kernel void kernel_channel_subtract(const int count,
+    const int num, const int channels,
+    const int spatial_dim, __global const T* channel_max, __global T* data) {
+  int index = get_global_id(0);
+  if(index < count) {
+    int n = index / channels / spatial_dim;
+    int s = index % spatial_dim;
+    data[index] -= channel_max[n * spatial_dim + s];
+  }
+}
+
+__kernel void kernel_channel_sum(const int num, const int channels,
+    const int spatial_dim, __global const T* data, __global T* channel_sum) {
+  int index = get_global_id(0);
+  if(index < num * spatial_dim) {
+    int n = index / spatial_dim;
+    int s = index % spatial_dim;
+    T sum = 0;
+    for (int c = 0; c < channels; ++c) {
+      sum += data[(n * channels + c) * spatial_dim + s];
+    }
+    channel_sum[index] = sum;
+  }
+}
+
+__kernel void kernel_channel_div(const int count,
+    const int num, const int channels,
+    const int spatial_dim, __global const T* channel_sum, __global T* data) {
+  int index = get_global_id(0);
+  if(index < count) {
+    int n = index / channels / spatial_dim;
+    int s = index % spatial_dim;
+    data[index] /= channel_sum[n * spatial_dim + s];
+  }
+}
--- a/modules/dnn/test/test_layers.cpp
+++ b/modules/dnn/test/test_layers.cpp
@@ -87,7 +87,7 @@ void testLayerUsingCaffeModels(String basename, bool useCaffeModel = false, bool

 TEST(Layer_Test_Softmax, Accuracy)
 {
-     testLayerUsingCaffeModels("layer_softmax");
+     OCL_OFF(testLayerUsingCaffeModels("layer_softmax"));
 }
 OCL_TEST(Layer_Test_Softmax, Accuracy)
 {