mirror of
https://github.com/mit-han-lab/tinyengine.git
synced 2025-05-10 09:28:47 +08:00
872 lines
27 KiB
Python
872 lines
27 KiB
Python
class depthwiseInplace_bitmask(object):
|
|
def __init__(self, kernel_h, kernel_w, pad_h, pad_w, stride, dataflow="CHW", fp_requantize=False):
|
|
self.pad_h = pad_h
|
|
self.pad_w = pad_w
|
|
self.kernel_h = kernel_h
|
|
self.kernel_w = kernel_w
|
|
self.stride = stride
|
|
self.arch = "ARMv7E-M" # by default
|
|
self.dataflow = dataflow
|
|
self.fp_requantize = fp_requantize
|
|
|
|
def setArch(self, arch):
|
|
self.arch = arch
|
|
|
|
def __genCode(self):
|
|
retString = self.__genHeader()
|
|
retString += self.__genKernelDefine() + ";\n"
|
|
retString += self.genFuncDefine() + "\n{\n"
|
|
if self.dataflow == "CHW":
|
|
retString += self.__genBufferInitialization()
|
|
elif self.dataflow == "CWH":
|
|
retString += self.__genBufferInitializationCWH()
|
|
|
|
if self.dataflow == "CHW":
|
|
retString += self.__genInplaceKernel()
|
|
elif self.dataflow == "CWH":
|
|
retString += self.__genInplaceKernelCWH()
|
|
|
|
retString += self.__genEndStr()
|
|
if self.dataflow == "CHW":
|
|
retString += "\n" + self.__genKernel()
|
|
elif self.dataflow == "CWH":
|
|
retString += "\n" + self.__genKernelCWH()
|
|
|
|
return retString
|
|
|
|
def __getFunctionName(self):
|
|
return (
|
|
"depthwise_kernel"
|
|
+ str(self.kernel_h)
|
|
+ "x"
|
|
+ str(self.kernel_w)
|
|
+ "_stride"
|
|
+ str(self.stride)
|
|
+ "_inplace_"
|
|
+ self.dataflow
|
|
+ "_fpreq_bitmask"
|
|
)
|
|
|
|
def __getKernelName(self):
|
|
return (
|
|
"depthwise_kernel"
|
|
+ str(self.kernel_h)
|
|
+ "x"
|
|
+ str(self.kernel_w)
|
|
+ "_stride"
|
|
+ str(self.stride)
|
|
+ "_inplace_kernel_"
|
|
+ self.dataflow
|
|
+ "_fpreq_bitmask"
|
|
)
|
|
|
|
def genFuncDefine(self):
|
|
retString = ""
|
|
function_name = self.__getFunctionName()
|
|
retString += "tinyengine_status " + function_name
|
|
retString += """(q7_t *input, const uint16_t input_x, const uint16_t input_y,
|
|
const uint16_t input_ch, const q7_t *kernel, const int32_t *bias, const int32_t *biasR,
|
|
const float *scales,
|
|
const int32_t output_offset, const int32_t input_offset,
|
|
const int32_t output_activation_min,
|
|
const int32_t output_activation_max, q7_t *output, q7_t *output_mask,
|
|
const uint16_t output_x, const uint16_t output_y,
|
|
const uint16_t output_ch, q15_t *runtime_buf, q7_t pad_value)"""
|
|
return retString
|
|
|
|
def __genKernelDefine(self):
|
|
retString = ""
|
|
retString += (
|
|
"void "
|
|
+ self.__getKernelName()
|
|
+ """(
|
|
const uint16_t output_y, const uint16_t output_x,
|
|
const int32_t *bias, const int32_t *biasR, const q7_t *ksrc, const float *scales,
|
|
q7_t *output, q7_t *output_mask, const int mask_idx, const int32_t output_offset,
|
|
const int32_t activation_min, const int32_t activation_max,
|
|
q7_t *cols_8b_iterptr, const uint16_t column_x, int channel_offset)"""
|
|
)
|
|
|
|
return retString
|
|
|
|
def __genHeader(self):
|
|
retString = (
|
|
"""/* This file is automatically generated */
|
|
/* ----------------------------------------------------------------------
|
|
* Project: TinyEngine
|
|
* Description: for sparse in-place """
|
|
+ str(self.kernel_h)
|
|
+ "x"
|
|
+ str(self.kernel_w)
|
|
+ """ depth-wise convolution (HWC->CHW->HWC)
|
|
* Target ISA: """
|
|
+ self.arch
|
|
+ """
|
|
* Author: wmchen@mit.edu
|
|
* -------------------------------------------------------------------- */
|
|
#include "arm_nnsupportfunctions.h" //TODO: remove this in the future for self-contained
|
|
#include "tinyengine_function.h"\n"""
|
|
)
|
|
|
|
return retString
|
|
|
|
def __genBufferInitialization(self):
|
|
retString = (
|
|
"""
|
|
uint16_t c,i,j;
|
|
q7_t *cols_8b_start = (q7_t *)runtime_buf;
|
|
q7_t* cols_8b = (q7_t* )cols_8b_start;
|
|
|
|
//Set padding value
|
|
q7_t PAD8 = pad_value;
|
|
/* setup the padding regions for Im2col buffers */
|
|
//top region: 8bit x (input_x + pad_w * 2) x pad_h: unroll by pad value
|
|
for(i = 0; i < input_x + """
|
|
+ str(self.pad_w * 2)
|
|
+ """; i++){"""
|
|
)
|
|
for i in range(self.pad_h):
|
|
retString += """
|
|
*cols_8b++ = PAD8;"""
|
|
retString += """
|
|
}
|
|
|
|
//middle regions: left and right regions
|
|
for(i = 0; i < input_y; i++){"""
|
|
for i in range(self.pad_w):
|
|
retString += """
|
|
*cols_8b++ = PAD8;//left"""
|
|
retString += """
|
|
cols_8b += input_x; //skip middle"""
|
|
for i in range(self.pad_w):
|
|
retString += """
|
|
*cols_8b++ = PAD8;//right"""
|
|
retString += (
|
|
"""
|
|
}
|
|
|
|
//bottom region: 8bit x (input_x + pad_w * 2) x pad_h: unroll by pad value
|
|
for(i = 0; i < input_x + """
|
|
+ str(self.pad_w * 2)
|
|
+ """; i++){"""
|
|
)
|
|
for i in range(self.pad_h):
|
|
retString += """
|
|
*cols_8b++ = PAD8;"""
|
|
retString += """
|
|
}
|
|
|
|
const q7_t *src;
|
|
const q7_t *ksrc = kernel;
|
|
"""
|
|
return retString
|
|
|
|
def __genBufferInitializationCWH(self):
|
|
# HWC to CWH data flow
|
|
retString = (
|
|
"""
|
|
uint16_t c,i,j;
|
|
q7_t *cols_8b_start = (q7_t *)runtime_buf;
|
|
q7_t* cols_8b = (q7_t* )cols_8b_start;
|
|
|
|
//HWC to CWH data flow
|
|
//Set padding value
|
|
q7_t PAD8 = pad_value;
|
|
/* setup the padding regions for Im2col buffers */
|
|
//top region: 8bit x (input_y + pad_h * 2) x pad_w: unroll by pad value
|
|
for(i = 0; i < input_y + """
|
|
+ str(self.pad_h * 2)
|
|
+ """; i++){"""
|
|
)
|
|
for i in range(self.pad_w):
|
|
retString += """
|
|
*cols_8b++ = PAD8;"""
|
|
retString += """
|
|
}
|
|
|
|
//middle regions: left and right regions
|
|
for(i = 0; i < input_x; i++){"""
|
|
for i in range(self.pad_h):
|
|
retString += """
|
|
*cols_8b++ = PAD8;//left"""
|
|
retString += """
|
|
cols_8b += input_y; //skip middle"""
|
|
for i in range(self.pad_h):
|
|
retString += """
|
|
*cols_8b++ = PAD8;//right"""
|
|
retString += (
|
|
"""
|
|
}
|
|
|
|
//bottom region: 8bit x (input_y + pad_h * 2) x pad_w: unroll by pad value
|
|
for(i = 0; i < input_y + """
|
|
+ str(self.pad_h * 2)
|
|
+ """; i++){"""
|
|
)
|
|
for i in range(self.pad_w):
|
|
retString += """
|
|
*cols_8b++ = PAD8;"""
|
|
retString += """
|
|
}
|
|
|
|
const q7_t *src;
|
|
const q7_t *ksrc = kernel;
|
|
"""
|
|
return retString
|
|
|
|
def __genInplaceKernelCWH(self):
|
|
retString = ""
|
|
retString += """
|
|
for (c = 0; c < input_ch; c++){"""
|
|
retString += self.__genFixedLoadHWC2CWH()
|
|
retString += self.__assignInplaceOut(2, "input")
|
|
retString += self.__genHandle1CH_CWH(2, "input_ch")
|
|
retString += """
|
|
}\n"""
|
|
return retString
|
|
|
|
def __genFixedLoadHWC2CWH(self):
|
|
retString = (
|
|
"""
|
|
cols_8b = (q7_t*)(cols_8b_start + """
|
|
+ str(self.pad_w)
|
|
+ """ * (input_y) + """
|
|
+ str(self.pad_h * self.pad_w * 2)
|
|
+ """); //skip """
|
|
+ str(self.pad_w)
|
|
+ """ rows
|
|
src = input;
|
|
for(i = 0; i < input_x; i++){
|
|
cols_8b += """
|
|
+ str(self.pad_h)
|
|
+ """;//skip front
|
|
for(j = 0; j < input_y; j++){
|
|
*cols_8b++ = src[input_x * j * input_ch];// + input_offset;
|
|
}
|
|
src += input_ch;
|
|
cols_8b += """
|
|
+ str(self.pad_h)
|
|
+ """;//skip end
|
|
}\n"""
|
|
)
|
|
|
|
return retString
|
|
|
|
def __genInplaceKernel(self):
|
|
retString = ""
|
|
retString += """
|
|
for (c = 0; c < input_ch; c++){"""
|
|
retString += self.__genFixedLoadHWC2CHW()
|
|
retString += self.__assignInplaceOut(2, "input")
|
|
retString += self.__genHandle1CH(2, "input_ch")
|
|
retString += """
|
|
}\n"""
|
|
return retString
|
|
|
|
def __genFixedLoadHWC2CHW(self):
|
|
retString = (
|
|
"""
|
|
cols_8b = (q7_t*)(cols_8b_start + """
|
|
+ str(self.pad_h)
|
|
+ """ * (input_x) + """
|
|
+ str(self.pad_h * self.pad_w * 2)
|
|
+ """); //skip """
|
|
+ str(self.pad_h)
|
|
+ """ rows
|
|
src = input;
|
|
for(i = 0; i < input_y; i++){
|
|
cols_8b += """
|
|
+ str(self.pad_w)
|
|
+ """;//skip front
|
|
for(j = 0; j < input_x; j++){
|
|
*cols_8b++ = *src;// + input_offset;
|
|
src += input_ch;
|
|
}
|
|
cols_8b += """
|
|
+ str(self.pad_w)
|
|
+ """;//skip end
|
|
}\n"""
|
|
)
|
|
|
|
return retString
|
|
|
|
def __genEndStr(self):
|
|
return "\n}"
|
|
|
|
def __assignInplaceOut(self, pre_indent, out_str):
|
|
return " " * pre_indent + "q7_t *inplace_out = " + out_str + ";\n"
|
|
|
|
def __genHandle1CH(self, pre_indent, out_offset_str):
|
|
retString = " " * pre_indent + "if (c % 8 == 0 && c > 1) output_mask++;\n"
|
|
retString += (
|
|
" " * pre_indent
|
|
+ self.__getKernelName()
|
|
+ "(output_y, output_x, bias++, biasR++, ksrc, scales++, inplace_out, "
|
|
+ "output_mask, c % 8, output_offset,output_activation_min, output_activation_max,cols_8b_start, input_x, "
|
|
+ out_offset_str
|
|
+ ");\n"
|
|
)
|
|
retString += " " * pre_indent + "ksrc += " + str(self.kernel_h * self.kernel_w) + ";\n"
|
|
retString += " " * pre_indent + "input++;\n"
|
|
return retString
|
|
|
|
def genFile(self, path):
|
|
import os
|
|
|
|
outpath = os.path.join(path, self.__getFunctionName() + ".c")
|
|
outf = open(outpath, "w")
|
|
outf.write(self.__genCode())
|
|
outf.close()
|
|
|
|
# ********************* KERNEL ********************* #
|
|
def __genMACStr(self, input_cnt, kernel_cnt, stride):
|
|
inbuf = "cols_8b"
|
|
kerbuf = "ksrc"
|
|
sum0 = "sum0"
|
|
sum1 = "sum1"
|
|
|
|
ret = (
|
|
" "
|
|
+ sum0
|
|
+ " += "
|
|
+ inbuf
|
|
+ "["
|
|
+ str(input_cnt)
|
|
+ "]"
|
|
+ "*"
|
|
+ kerbuf
|
|
+ "["
|
|
+ str(kernel_cnt)
|
|
+ "]"
|
|
+ ";\n"
|
|
)
|
|
ret += (
|
|
" "
|
|
+ sum1
|
|
+ " += "
|
|
+ inbuf
|
|
+ "["
|
|
+ str(input_cnt + stride)
|
|
+ "]"
|
|
+ "*"
|
|
+ kerbuf
|
|
+ "["
|
|
+ str(kernel_cnt)
|
|
+ "]"
|
|
+ ";\n"
|
|
)
|
|
|
|
return ret
|
|
|
|
def __genConvString(self):
|
|
retString = ""
|
|
kercnt = 0
|
|
for i in range(self.kernel_h):
|
|
if i > 0:
|
|
retString += " cols_8b += column_x + " + str(self.pad_w * 2) + ";\n"
|
|
for j in range(self.kernel_w):
|
|
retString += self.__genMACStr(j, kercnt, self.stride)
|
|
kercnt += 1
|
|
|
|
return retString
|
|
|
|
def __genConvStringCWH(self):
|
|
retString = ""
|
|
kercnt = 0
|
|
for i in range(self.kernel_w):
|
|
if i > 0:
|
|
retString += " cols_8b += column_x + " + str(self.pad_h * 2) + ";\n"
|
|
for j in range(self.kernel_h):
|
|
retString += self.__genMACStr(j, kercnt, self.stride)
|
|
kercnt += 1
|
|
|
|
return retString
|
|
|
|
def __genConvLeftStringCWH(self):
|
|
retString = ""
|
|
kercnt = 0
|
|
for i in range(self.kernel_w):
|
|
if i > 0:
|
|
retString += " cols_8b += column_x + " + str(self.pad_h * 2) + ";\n"
|
|
for j in range(self.kernel_h):
|
|
retString += self.__genLeftMACStr(j, kercnt, self.stride)
|
|
kercnt += 1
|
|
|
|
return retString
|
|
|
|
def __genLeftMACStr(self, input_cnt, kernel_cnt, stride):
|
|
inbuf = "cols_8b"
|
|
kerbuf = "ksrc"
|
|
sum0 = "sum"
|
|
|
|
ret = (
|
|
" "
|
|
+ sum0
|
|
+ " += "
|
|
+ inbuf
|
|
+ "["
|
|
+ str(input_cnt)
|
|
+ "]"
|
|
+ "*"
|
|
+ kerbuf
|
|
+ "["
|
|
+ str(kernel_cnt)
|
|
+ "]"
|
|
+ ";\n"
|
|
)
|
|
|
|
return ret
|
|
|
|
def __genConvLeftString(self):
|
|
retString = ""
|
|
kercnt = 0
|
|
for i in range(self.kernel_h):
|
|
if i > 0:
|
|
retString += " cols_8b += column_x + " + str(self.pad_w * 2) + ";\n"
|
|
for j in range(self.kernel_w):
|
|
retString += self.__genLeftMACStr(j, kercnt, self.stride)
|
|
kercnt += 1
|
|
|
|
return retString
|
|
|
|
def __genKernel(self):
|
|
retString = ""
|
|
# function name
|
|
retString += self.__genKernelDefine() + "\n{\n"
|
|
retString += " #define STRIDE " + str(self.stride) + "\n"
|
|
retString += " int i, j;\n"
|
|
retString += " q7_t mask_value;\n"
|
|
|
|
# initialize accumulators as bias
|
|
retString += """ /* MACs for each output */
|
|
for (i = 0; i < output_y; i++) {
|
|
for (j = 0; j < output_x / 2; j++) {
|
|
q7_t *cols_8b = cols_8b_iterptr;
|
|
|
|
q31_t sum0 = bias[0] + biasR[0];
|
|
q31_t sum1 = bias[0] + biasR[0];
|
|
"""
|
|
# computation
|
|
retString += """
|
|
/* computation */\n"""
|
|
retString += self.__genConvString()
|
|
|
|
# requantize
|
|
retString += """
|
|
/* requantize */
|
|
sum0 = (float) sum0 * *scales;
|
|
sum0 += output_offset;
|
|
mask_value = 1;
|
|
if (sum0 < activation_min){
|
|
sum0 = activation_min;
|
|
mask_value = 0;
|
|
}
|
|
if (sum0 > activation_max){
|
|
sum0 = activation_max;
|
|
mask_value = 0;
|
|
}
|
|
output[(i * output_x + j * 2) * channel_offset] = sum0;
|
|
if (mask_value == 1)
|
|
BIT_SET(output_mask[(i * output_x + j * 2) * channel_offset / 8], mask_idx);
|
|
else
|
|
BIT_CLEAR(output_mask[(i * output_x + j * 2) * channel_offset / 8], mask_idx);
|
|
|
|
sum1 = (float) sum1 * *scales;
|
|
sum1 += output_offset;
|
|
mask_value = 1;
|
|
if (sum1 < activation_min){
|
|
sum1 = activation_min;
|
|
mask_value = 0;
|
|
}
|
|
if (sum1 > activation_max){
|
|
sum1 = activation_max;
|
|
mask_value = 0;
|
|
}
|
|
output[(i * output_x + (j * 2 + 1)) * channel_offset] = sum1;
|
|
if (mask_value == 1)
|
|
BIT_SET(output_mask[(i * output_x + (j * 2 + 1)) * channel_offset / 8], mask_idx);
|
|
else
|
|
BIT_CLEAR(output_mask[(i * output_x + (j * 2 + 1)) * channel_offset / 8], mask_idx);
|
|
|
|
cols_8b_iterptr += STRIDE * 2;
|
|
}
|
|
"""
|
|
|
|
# left over for odd dimension
|
|
retString += """if (output_x & 1) {
|
|
q7_t * cols_8b = cols_8b_iterptr;
|
|
q31_t sum = bias[0] + biasR[0];\n"""
|
|
|
|
retString += self.__genConvLeftString()
|
|
retString += """
|
|
sum = (float) sum * *scales;
|
|
sum += output_offset;
|
|
mask_value = 1;
|
|
if (sum < activation_min){
|
|
sum = activation_min;
|
|
mask_value = 0;
|
|
}
|
|
if (sum > activation_max){
|
|
sum = activation_max;
|
|
mask_value = 0;
|
|
}
|
|
output[(i * output_x + output_x - 1) * channel_offset] = sum;
|
|
if (mask_value == 1)
|
|
BIT_SET(output_mask[(i * output_x + output_x - 1) * channel_offset / 8], mask_idx);
|
|
else
|
|
BIT_CLEAR(output_mask[(i * output_x + output_x - 1) * channel_offset / 8], mask_idx);
|
|
|
|
cols_8b_iterptr += STRIDE;
|
|
}
|
|
"""
|
|
if self.stride != 1:
|
|
retString += "cols_8b_iterptr += " + str(self.pad_w) + " * 2 - (column_x & 1);\n"
|
|
retString += " cols_8b_iterptr += (STRIDE - 1) * (column_x + " + str(self.pad_w) + " * 2);\n"
|
|
else:
|
|
retString += "cols_8b_iterptr += " + str(self.pad_w) + " * 2;\n"
|
|
|
|
# end of function
|
|
retString += """ }
|
|
}\n"""
|
|
return retString
|
|
|
|
def __genKernelCWH(self):
|
|
retString = ""
|
|
# function name
|
|
retString += self.__genKernelDefine() + "\n{\n"
|
|
retString += " #define STRIDE " + str(self.stride) + "\n"
|
|
retString += " int i, j;\n"
|
|
|
|
# initialize accumulators as bias
|
|
retString += """ /* MACs for each output */
|
|
for (j = 0; j < output_x; j++) {
|
|
for (i = 0; i < output_y / 2; i++) {
|
|
q7_t *cols_8b = cols_8b_iterptr;
|
|
|
|
q31_t sum0 = bias[0] + biasR[0];
|
|
q31_t sum1 = bias[0] + biasR[0];
|
|
"""
|
|
# computation
|
|
retString += """
|
|
/* computation */\n"""
|
|
retString += self.__genConvStringCWH()
|
|
|
|
# requantize
|
|
retString += """
|
|
/* requantize */
|
|
sum0 = (float) sum0 * *scales;
|
|
sum0 += output_offset;
|
|
mask_value = 1;
|
|
if (sum0 < activation_min){
|
|
sum0 = activation_min;
|
|
mask_value = 0;
|
|
}
|
|
if (sum0 > activation_max){
|
|
sum0 = activation_max;
|
|
mask_value = 0;
|
|
}
|
|
output[(i * output_x + j * 2) * channel_offset] = sum0;
|
|
if (mask_value == 1)
|
|
BIT_SET(output_mask[(i * output_x + j * 2) * channel_offset / 8], mask_idx);
|
|
else
|
|
BIT_CLEAR(output_mask[(i * output_x + j * 2) * channel_offset / 8], mask_idx);
|
|
|
|
sum1 = (float) sum1 * *scales;
|
|
sum1 += output_offset;
|
|
mask_value = 1;
|
|
if (sum1 < activation_min){
|
|
sum1 = activation_min;
|
|
mask_value = 0;
|
|
}
|
|
if (sum1 > activation_max){
|
|
sum1 = activation_max;
|
|
mask_value = 0;
|
|
}
|
|
output[(i * output_x + (j * 2 + 1)) * channel_offset] = sum1;
|
|
if (mask_value == 1)
|
|
BIT_SET(output_mask[(i * output_x + (j * 2 + 1)) * channel_offset / 8], mask_idx);
|
|
else
|
|
BIT_CLEAR(output_mask[(i * output_x + (j * 2 + 1)) * channel_offset / 8], mask_idx);
|
|
|
|
cols_8b_iterptr += STRIDE * 2;
|
|
}
|
|
"""
|
|
|
|
# left over for odd dimension
|
|
retString += """if (output_y & 1) {
|
|
q7_t * cols_8b = cols_8b_iterptr;
|
|
q31_t sum = bias[0] + biasR[0];\n"""
|
|
|
|
retString += self.__genConvLeftStringCWH()
|
|
|
|
retString += """
|
|
sum = (float) sum * *scales;
|
|
sum += output_offset;
|
|
mask_value = 1;
|
|
if (sum < activation_min){
|
|
sum = activation_min;
|
|
mask_value = 0;
|
|
}
|
|
if (sum > activation_max){
|
|
sum = activation_max;
|
|
mask_value = 0;
|
|
}
|
|
output[(i * output_x + output_x - 1) * channel_offset] = sum;
|
|
output_mask[(i * output_x + output_x - 1) * channel_offset] = mask_value;
|
|
|
|
cols_8b_iterptr += STRIDE;
|
|
}
|
|
"""
|
|
if self.stride != 1:
|
|
retString += "cols_8b_iterptr += " + str(self.pad_h) + " * 2 - (column_x & 1);\n"
|
|
retString += " cols_8b_iterptr += (STRIDE - 1) * (column_x + " + str(self.pad_h) + " * 2);\n"
|
|
else:
|
|
retString += "cols_8b_iterptr += " + str(self.pad_h) + " * 2;\n"
|
|
|
|
# end of function
|
|
retString += """ }
|
|
}\n"""
|
|
return retString
|
|
|
|
# ********************* unit test ********************* #
|
|
def genTestUint(self, dirpath, height, width, channel):
|
|
# code to test this layer
|
|
outpath = dirpath + "\\test_" + str(self.__getFunctionName()) + ".h"
|
|
outf = open(outpath, "w")
|
|
string = (
|
|
"""#include "stdio.h"
|
|
#include "scratch_buffer.h"
|
|
extern "C"
|
|
{
|
|
#include "kernel_buffer.h"
|
|
#include "tinyengine_function.h"
|
|
}
|
|
UART_HandleTypeDef UART;
|
|
void printLog(const char* s) {
|
|
static bool is_initialized = false;
|
|
if (!is_initialized) {
|
|
UART.Instance = USART1;
|
|
UART.Init.BaudRate = 115200;
|
|
UART.Init.WordLength = UART_WORDLENGTH_8B;
|
|
UART.Init.StopBits = UART_STOPBITS_1;
|
|
UART.Init.Parity = UART_PARITY_NONE;
|
|
UART.Init.Mode = UART_MODE_TX_RX;
|
|
UART.Init.HwFlowCtl = UART_HWCONTROL_NONE;
|
|
UART.Init.OverSampling = UART_OVERSAMPLING_16;
|
|
UART.Init.OneBitSampling = UART_ONE_BIT_SAMPLE_DISABLE;
|
|
UART.AdvancedInit.AdvFeatureInit = UART_ADVFEATURE_NO_INIT;
|
|
if (HAL_UART_Init(&UART) != HAL_OK)
|
|
{
|
|
//Error handling
|
|
}
|
|
is_initialized = true;
|
|
}
|
|
HAL_UART_Transmit(&UART, (uint8_t *)s, strlen(s), 10);
|
|
}
|
|
q7_t input["""
|
|
+ str(width * height * channel)
|
|
+ """] = {"""
|
|
)
|
|
for i in range(width * height * channel):
|
|
string += str(i % 128) + ","
|
|
string += (
|
|
"""};
|
|
q7_t input1["""
|
|
+ str(width * height * channel)
|
|
+ """] = {"""
|
|
)
|
|
for i in range(width * height * channel):
|
|
string += str(i % 128) + ","
|
|
string += (
|
|
"""};
|
|
q7_t output1["""
|
|
+ str(width * height * channel)
|
|
+ """];
|
|
q7_t output2["""
|
|
+ str(width * height * channel)
|
|
+ """];
|
|
void test_"""
|
|
+ str(self.__getFunctionName())
|
|
+ """()\n{
|
|
printLog("test layer """
|
|
+ str(self.__getFunctionName())
|
|
+ """'s depthwise convolution, input_y:"""
|
|
+ str(height)
|
|
+ """,input_x:"""
|
|
+ str(width)
|
|
+ """,input_ch:"""
|
|
+ str(channel)
|
|
+ """\\r\\n");
|
|
int input_x = """
|
|
+ str(width)
|
|
+ ", input_y = "
|
|
+ str(height)
|
|
+ ", input_ch = "
|
|
+ str(channel)
|
|
+ ", output_ch = "
|
|
+ str(channel)
|
|
+ ";\n"
|
|
+ """
|
|
int output_x = """
|
|
+ str(width // self.stride)
|
|
+ ", output_y = "
|
|
+ str(height // self.stride)
|
|
+ ";\n"
|
|
+ """
|
|
int32_t kbuf[1];
|
|
set_kernel_buffer((int32_t*)kbuf, 1);
|
|
int16_t sbuf["""
|
|
+ str(
|
|
max((width + self.pad_w * 2) * (height + self.pad_h * 2), self.kernel_h * self.kernel_w * channel * 2)
|
|
)
|
|
+ """];
|
|
set_scratch_buffer((int16_t*)sbuf, """
|
|
+ str(
|
|
max((width + self.pad_w * 2) * (height + self.pad_h * 2), self.kernel_h * self.kernel_w * channel * 2)
|
|
)
|
|
+ """);
|
|
q7_t kernelchw["""
|
|
+ str(self.kernel_h * self.kernel_w * channel)
|
|
+ "] = {"
|
|
)
|
|
for c in range(channel):
|
|
for i in range(self.kernel_h):
|
|
for j in range(self.kernel_w):
|
|
string += "1,"
|
|
string += (
|
|
"""};
|
|
q7_t kernelhwc["""
|
|
+ str(self.kernel_h * self.kernel_w * channel)
|
|
+ "] = {"
|
|
)
|
|
for j in range(channel):
|
|
for i in range(self.kernel_h):
|
|
for c in range(self.kernel_w):
|
|
string += "1,"
|
|
string += (
|
|
"""};
|
|
int32_t bias["""
|
|
+ str(channel)
|
|
+ "] = {"
|
|
)
|
|
for i in range(channel):
|
|
string += str(i * 100) + ","
|
|
string += (
|
|
"""};
|
|
int32_t multiplier["""
|
|
+ str(channel)
|
|
+ "] = {"
|
|
)
|
|
for i in range(int(channel / 2)):
|
|
string += "1706270591,1380135680,"
|
|
string += (
|
|
"""};
|
|
int32_t shift["""
|
|
+ str(channel)
|
|
+ "] = {"
|
|
)
|
|
for i in range(int(channel / 2)):
|
|
string += "-6,-5,"
|
|
string += (
|
|
"""};
|
|
|
|
arm_depthwise_conv_s8_opt(input, input_x, input_y, input_ch, kernelhwc, output_ch, """
|
|
+ str(self.kernel_w)
|
|
+ ","
|
|
+ str(self.kernel_h)
|
|
+ ","
|
|
+ str(self.pad_w)
|
|
+ ","
|
|
+ str(self.pad_h)
|
|
+ ","
|
|
+ str(self.stride)
|
|
+ ","
|
|
+ str(self.stride)
|
|
+ """, bias, output1, shift, multiplier, output_x, output_y, -128, 0, -128, 127, 1, 1, sbuf);
|
|
"""
|
|
+ self.__getFunctionName()
|
|
+ """(input, input_x, input_y, input_ch, kernelchw, bias, shift, multiplier, -128, 0,-128, 127,"""
|
|
+ """ output2, output_x, output_y, output_ch, sbuf, 0);
|
|
bool correct = true;
|
|
int i;
|
|
for (i = 0; i < output_x * output_y * output_ch; i++){
|
|
if(output1[i] != input[i]){
|
|
correct = false;
|
|
break;
|
|
}
|
|
}
|
|
|
|
char buf[100];
|
|
if (correct)
|
|
printLog("Results match\\r\\n");
|
|
else{
|
|
printLog("Error results\\r\\n");
|
|
sprintf(buf,"First inconsistency: %d\\r\\n", i);
|
|
printLog(buf);
|
|
}
|
|
|
|
printLog("Test speed now\\r\\n");
|
|
# define BATCHNUM 100
|
|
float fmac = output_x * output_y * input_ch * """
|
|
+ str(self.kernel_h * self.kernel_w)
|
|
+ """;
|
|
int opstart = HAL_GetTick();
|
|
for(i = 0; i < BATCHNUM; i++){
|
|
arm_depthwise_conv_s8_opt(input, input_x, input_y, input_ch, kernelhwc, output_ch, """
|
|
+ str(self.kernel_w)
|
|
+ ","
|
|
+ str(self.kernel_h)
|
|
+ ","
|
|
+ str(self.pad_w)
|
|
+ ","
|
|
+ str(self.pad_h)
|
|
+ ","
|
|
+ str(self.stride)
|
|
+ ","
|
|
+ str(self.stride)
|
|
+ """, bias, output1, shift, multiplier, output_x, output_y, -128, 128, -128, 127, 1, 1, sbuf);
|
|
}
|
|
int opend = HAL_GetTick();
|
|
|
|
float time = opend - opstart;
|
|
time /= BATCHNUM;//ms
|
|
float mac_pers = fmac / time / 1000;//*1000000/1000 > /1000
|
|
sprintf(buf,"%.2f|%.3f|", time, mac_pers);
|
|
printLog(buf);
|
|
|
|
opstart = HAL_GetTick();
|
|
for(int i = 0; i < BATCHNUM; i++){
|
|
"""
|
|
+ self.__getFunctionName()
|
|
+ """(input, input_x, input_y, input_ch, kernelchw, bias, shift, multiplier, -128, 128,-128, 127,"""
|
|
+ """ output2, output_x, output_y, output_ch, sbuf, -128);
|
|
}
|
|
opend = HAL_GetTick();
|
|
|
|
time = opend - opstart;
|
|
time /= BATCHNUM;//ms
|
|
mac_pers = fmac / time / 1000;//*1000000/1000 > /1000
|
|
sprintf(buf,"%.2f|%.3f|\\r\\n", time, mac_pers);
|
|
printLog(buf);
|
|
}\n
|
|
"""
|
|
)
|
|
outf.write(string)
|
|
outf.close()
|
|
|
|
|
|
class includeFile(object):
|
|
def __init__(self, path):
|
|
self.path = path
|
|
self.defstring = ""
|
|
|
|
def addDefine(self, defstr):
|
|
self.defstring += defstr + ";\n"
|
|
|
|
def writeFile(self):
|
|
import os
|
|
|
|
outpath = os.path.join(self.path, "genInclude.h")
|
|
outf = open(outpath, "w")
|
|
outf.write(self.defstring)
|