mirror of
https://github.com/mit-han-lab/tinyengine.git
synced 2025-05-10 01:18:47 +08:00
668 lines
24 KiB
Python
668 lines
24 KiB
Python
# ----------------------------------------------------------------------
|
|
# Project: TinyEngine
|
|
# Title: CodeGenerator.py
|
|
#
|
|
# Reference papers:
|
|
# - MCUNet: Tiny Deep Learning on IoT Device, NeurIPS 2020
|
|
# - MCUNetV2: Memory-Efficient Patch-based Inference for Tiny Deep Learning, NeurIPS 2021
|
|
# - MCUNetV3: On-Device Training Under 256KB Memory, arXiv:2206.15472
|
|
# Contact authors:
|
|
# - Wei-Ming Chen, wmchen@mit.edu
|
|
# - Wei-Chen Wang, wweichen@mit.edu
|
|
# - Ji Lin, jilin@mit.edu
|
|
# - Ligeng Zhu, ligeng@mit.edu
|
|
# - Song Han, songhan@mit.edu
|
|
#
|
|
# Target ISA: ARMv7E-M
|
|
# ----------------------------------------------------------------------
|
|
|
|
import os
|
|
|
|
from .OpGenerator import OpGenerator
|
|
|
|
Codegen_root = "./codegen/"
|
|
include_path = Codegen_root + "Include/"
|
|
source_path = Codegen_root + "Source/"
|
|
|
|
use_hard_switsh = False
|
|
gen_kernels = True
|
|
use_aggressive_unroll = True
|
|
|
|
|
|
class CodeGenerator:
|
|
"""Provide utilities to generate C code for a given model and memory schdeule."""
|
|
|
|
parse_count = 0
|
|
header_handle = None
|
|
source_handle = None
|
|
|
|
def __init__(
|
|
self,
|
|
memsche,
|
|
inplace,
|
|
precision=8,
|
|
unsigned_input=False,
|
|
patch_params=None,
|
|
FP_output=False,
|
|
profile_mode=False,
|
|
fp_requantize=False,
|
|
tflite_op=False,
|
|
dummy_address=False,
|
|
outputTables=None,
|
|
detectionUtils=None,
|
|
):
|
|
self.MemSche = memsche
|
|
|
|
# Check if path exists, create it if not
|
|
if not os.path.exists(include_path):
|
|
os.makedirs(include_path)
|
|
if not os.path.exists(source_path):
|
|
os.makedirs(source_path)
|
|
|
|
self.header_handle = open(include_path + "genModel.h", "w")
|
|
self.source_handle = open(source_path + "genModel.c", "w")
|
|
self.inplace = inplace
|
|
self.BIT = precision
|
|
self.unsigned_input = unsigned_input
|
|
self.patch_params = patch_params
|
|
self.FP_output = FP_output
|
|
self.profile_mode = profile_mode
|
|
self.fp_requantize = fp_requantize
|
|
self.tflite_op = tflite_op
|
|
self.dummy_address = dummy_address
|
|
self.trainSRAMTable = []
|
|
self.outputTables = outputTables
|
|
self.detectionUtils = detectionUtils
|
|
|
|
def _readOnly(self, name):
|
|
if self.outputTables is None or name is None:
|
|
return True
|
|
else:
|
|
for o in self.outputTables:
|
|
if o.name in name:
|
|
return False
|
|
return True
|
|
|
|
def codeGeneration(self):
|
|
# buffer in SRAM
|
|
self._genMemBuffer()
|
|
|
|
# parse trainable parameters & assign the corresponding buffers for layers
|
|
self._parseTrainable()
|
|
|
|
# include all headers
|
|
self._includeHeaders()
|
|
|
|
# generate detection output if any
|
|
self._genDetprocessing()
|
|
|
|
# generate patch-based
|
|
self._genPatchInference()
|
|
|
|
# generate invoke function
|
|
self._genInvoke()
|
|
|
|
self._closefp()
|
|
|
|
# generate operatior kernels
|
|
if gen_kernels:
|
|
op_gen = OpGenerator(include_path, source_path, self.MemSche.layer, self.fp_requantize)
|
|
op_gen.genOpcode()
|
|
|
|
def _genDetprocessing(self):
|
|
if self.detectionUtils is not None:
|
|
fp = self.source_handle
|
|
fp.write(self.detectionUtils.genPostProcessing())
|
|
|
|
def _genOpstr(self, op, *args):
|
|
if self.profile_mode:
|
|
if len(args) > 0:
|
|
return op.generate_profiling_str(*args)
|
|
else:
|
|
return op.generate_profiling_str()
|
|
else:
|
|
if len(args) > 0:
|
|
return op.generate_inference_str(*args)
|
|
else:
|
|
return op.generate_inference_str()
|
|
|
|
def _genPatchInference(self):
|
|
schedule = self.MemSche
|
|
layer_info = schedule.layer[0].get_layer_info()
|
|
if "is_patch" in layer_info and layer_info["is_patch"]:
|
|
fp = self.source_handle
|
|
string = ""
|
|
first_height = layer_info["input_h"]
|
|
first_width = layer_info["input_w"]
|
|
img_w = (first_width - self.patch_params["pad_l"] - self.patch_params["pad_r"]) * self.patch_params[
|
|
"n_patch"
|
|
]
|
|
# by default, we go three stride 2 conv in the patch-based inference
|
|
patch_out_w = int((first_width - self.patch_params["pad_l"]) / 8)
|
|
# by default, we go three stride 2 conv in the patch-based inference
|
|
patch_out_h = int((first_height - self.patch_params["pad_l"]) / 8)
|
|
out_w = self.patch_params["output_w"]
|
|
# generate code for testing whole inference time
|
|
string += (
|
|
"""void end2endinference(q7_t* img){
|
|
//stage 1
|
|
int i, j, h, w, c;
|
|
for (i = 0; i < """
|
|
+ str(self.patch_params["n_patch"])
|
|
+ """; i++){
|
|
uint16_t pad_t=0,pad_b=0;
|
|
if (i == 0){
|
|
pad_t = """
|
|
+ str(self.patch_params["pad_l"])
|
|
+ """;
|
|
}
|
|
else if (i == """
|
|
+ str(self.patch_params["n_patch"] - 1)
|
|
+ """){
|
|
pad_b = """
|
|
+ str(self.patch_params["pad_r"])
|
|
+ """;
|
|
}
|
|
for (j = 0; j < """
|
|
+ str(self.patch_params["n_patch"])
|
|
+ """; j++){
|
|
uint16_t pad_l=0,pad_r=0;
|
|
if (j == 0){
|
|
pad_l = """
|
|
+ str(self.patch_params["pad_l"])
|
|
+ """;
|
|
}
|
|
else if (j == """
|
|
+ str(self.patch_params["n_patch"] - 1)
|
|
+ """){
|
|
pad_r = """
|
|
+ str(self.patch_params["pad_r"])
|
|
+ """;
|
|
}
|
|
/* load partial input from the img */
|
|
q7_t* patch_input = &buffer0[0]; // for partial input
|
|
int start_x = MAX("""
|
|
+ str(first_width - self.patch_params["pad_l"])
|
|
+ """ * j - """
|
|
+ str(self.patch_params["pad_l"])
|
|
+ """,0);
|
|
int start_y = MAX("""
|
|
+ str(first_height - self.patch_params["pad_l"])
|
|
+ """ * i - """
|
|
+ str(self.patch_params["pad_l"])
|
|
+ """,0);
|
|
q7_t* img_ptr = &img[(start_x + start_y * """
|
|
+ str(img_w)
|
|
+ """) * 3];
|
|
|
|
//skip top
|
|
patch_input += pad_t * """
|
|
+ str(first_width)
|
|
+ """ * 3;
|
|
for (h = pad_t; h < """
|
|
+ str(first_height)
|
|
+ """ - pad_b; h++){
|
|
//skip left
|
|
patch_input += pad_l * 3;
|
|
//fill middle
|
|
int bytes = ("""
|
|
+ str(first_width)
|
|
+ """ - (pad_l + pad_r)) * 3;
|
|
memcpy (patch_input, img_ptr, bytes);
|
|
img_ptr += """
|
|
+ str(img_w)
|
|
+ """ * 3;
|
|
patch_input += bytes;
|
|
//skip right
|
|
patch_input += pad_r * 3;
|
|
}
|
|
invoke_1patch(pad_t,pad_b,pad_l,pad_r);
|
|
/* concat the output from buffer0 (this is set manually for now) */
|
|
q7_t* output_ptr = buffer1 + (i * """
|
|
+ str(patch_out_w)
|
|
+ """ * """
|
|
+ str(out_w)
|
|
+ """ + j * """
|
|
+ str(patch_out_w)
|
|
+ """) * """
|
|
+ str(self.patch_params["output_c"])
|
|
+ """ ;
|
|
for (h = 0; h < """
|
|
+ str(patch_out_h)
|
|
+ """; h++){
|
|
for (w = 0; w < """
|
|
+ str(patch_out_w)
|
|
+ """; w++){
|
|
for (c = 0; c < """
|
|
+ str(self.patch_params["output_c"])
|
|
+ """; c++){
|
|
output_ptr[(w + h * """
|
|
+ str(out_w)
|
|
+ """) * """
|
|
+ str(self.patch_params["output_c"])
|
|
+ """ + c] = buffer0[(w + h * """
|
|
+ str(patch_out_w)
|
|
+ """) * """
|
|
+ str(self.patch_params["output_c"])
|
|
+ """ + c];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
//stage 2
|
|
invoke();
|
|
}"""
|
|
)
|
|
string += """
|
|
|
|
void invoke_1patch(uint16_t pad_t, uint16_t pad_b, uint16_t pad_l ,uint16_t pad_r){
|
|
"""
|
|
fp.write(string)
|
|
|
|
# gen patch-based inference code
|
|
patch_layers = []
|
|
layercnt = 0
|
|
for i, op in enumerate(schedule.layer):
|
|
layer_info = op.get_layer_info()
|
|
if "is_patch" not in layer_info or not layer_info["is_patch"]:
|
|
break # end of patch-based
|
|
string = "/* layer " + str(layercnt) + ":" + layer_info["op"] + " */\n"
|
|
layercnt += 1
|
|
fp.write(string)
|
|
if layer_info["op"] == "CONV_2D":
|
|
# hardcode this memory schedule for quick implementation
|
|
# TODO: adjust this according to model architecture and split index
|
|
next_layer_info = schedule.layer[i + 1].get_layer_info()
|
|
if "is_patch" not in next_layer_info or not next_layer_info["is_patch"]:
|
|
layer_info["output_buf_add"] = "front"
|
|
layer_info["output_buf_add_offset"] = 0
|
|
if self.unsigned_input:
|
|
raise Exception("unsigned input is not supported by patch-based yet")
|
|
|
|
string = self._genOpstr(
|
|
op,
|
|
False,
|
|
self.FP_output,
|
|
use_aggressive_unroll,
|
|
use_hard_switsh,
|
|
self.fp_requantize,
|
|
)
|
|
fp.write(string)
|
|
|
|
elif layer_info["op"] == "DEPTHWISE_CONV_2D":
|
|
string = self._genOpstr(op, self.fp_requantize)
|
|
fp.write(string)
|
|
|
|
elif layer_info["op"] == "ADD":
|
|
string = self._genOpstr(op)
|
|
fp.write(string)
|
|
|
|
patch_layers.append(schedule.layer[i])
|
|
|
|
# remove these layers for patching for the following code gen
|
|
for layer in patch_layers:
|
|
schedule.layer.remove(layer)
|
|
|
|
string = "}\n\n"
|
|
|
|
fp.write(string)
|
|
else: # not patch-based
|
|
string = """void end2endinference(q7_t* img){
|
|
invoke(NULL);
|
|
}
|
|
"""
|
|
fp = self.source_handle
|
|
fp.write(string)
|
|
|
|
def _genInvoke(self):
|
|
fp = self.source_handle
|
|
string = "void invoke(float* labels){\n"
|
|
fp.write(string)
|
|
|
|
schedule = self.MemSche
|
|
for i, op in enumerate(schedule.layer):
|
|
layer_info = op.get_layer_info()
|
|
string = "/* layer " + str(i) + ":" + layer_info["op"] + " */\n"
|
|
fp.write(string)
|
|
|
|
if layer_info["op"] == "CONV_2D":
|
|
if (
|
|
self.FP_output
|
|
and "effective_scale" in layer_info
|
|
and layer_info["output_scale"] is not None
|
|
and layer_info["effective_scale"] is not None
|
|
):
|
|
use_fp = True
|
|
else:
|
|
use_fp = False
|
|
string = self._genOpstr(
|
|
op,
|
|
self.unsigned_input,
|
|
use_fp,
|
|
use_aggressive_unroll,
|
|
use_hard_switsh,
|
|
self.fp_requantize,
|
|
self.tflite_op,
|
|
self.dummy_address,
|
|
)
|
|
fp.write(string)
|
|
elif layer_info["op"] == "DEPTHWISE_CONV_2D":
|
|
string = self._genOpstr(op, self.fp_requantize)
|
|
fp.write(string)
|
|
else:
|
|
string = self._genOpstr(op)
|
|
fp.write(string)
|
|
|
|
string = "}\n"
|
|
fp.write(string)
|
|
|
|
def _getBufferIndex(self, location):
|
|
if location == "front":
|
|
return 0
|
|
elif location == "end":
|
|
return 0
|
|
elif location == "residual":
|
|
return 1
|
|
return None
|
|
|
|
def _genMemBuffer(self):
|
|
schedule = self.MemSche
|
|
# define output tensor
|
|
string = "#define NNoutput &buffer0[" + str(_findtheinferenceOutput(schedule.layer)) + "];"
|
|
fp = self.header_handle
|
|
fp.write("\n" + string + "\n")
|
|
|
|
# activation buffers
|
|
string = "\n/* sram:" + str(schedule.peakmem) + ", flash:" + str(schedule.flash) + " */\n"
|
|
fp.write(string + "\n")
|
|
|
|
string = "static signed char buffer[" + str(schedule.peakmem) + "];\n"
|
|
fp.write(string)
|
|
accumulate_ptr = 0
|
|
string = "static signed char *buffer0 = &buffer[" + str(accumulate_ptr) + "];\n"
|
|
accumulate_ptr += int(schedule.buffers["input_output"])
|
|
fp.write(string)
|
|
string = "static signed char *buffer1 = &buffer[" + str(accumulate_ptr) + "];\n"
|
|
accumulate_ptr += int(schedule.buffers["residual"])
|
|
fp.write(string)
|
|
|
|
string = "static int16_t *sbuf = (int16_t *)&buffer[" + str(accumulate_ptr) + "];\n"
|
|
accumulate_ptr += int(schedule.buffers["im2col"])
|
|
fp.write(string)
|
|
string = "static int32_t *kbuf = (int32_t *)&buffer[" + str(accumulate_ptr) + "];\n"
|
|
accumulate_ptr += int(schedule.buffers["kernel"])
|
|
fp.write(string)
|
|
string = "const int SBuffer_size = " + str(int(schedule.buffers["im2col"])) + ";\n"
|
|
fp.write(string)
|
|
string = "const int KBuffer_size = " + str(int(schedule.buffers["kernel"])) + ";\n"
|
|
fp.write(string + "\n")
|
|
|
|
def _includeHeaders(self):
|
|
include_string = """/* Automatically generated source file */
|
|
#include <float.h>
|
|
#include "arm_nnfunctions.h"
|
|
|
|
#include "genNN.h"
|
|
#include "genModel.h"
|
|
|
|
#include "tinyengine_function.h"
|
|
//#include "tinyengine_function_fp.h"
|
|
|
|
"""
|
|
if self.profile_mode:
|
|
include_string += '#include "profile.h"\n'
|
|
|
|
include_string += """
|
|
/* Variables used by all ops */
|
|
ADD_params add_params;
|
|
//Conv_Params conv_params;
|
|
//Depthwise_Params dpconv_params;
|
|
int i;
|
|
int8_t *int8ptr;
|
|
float *fptr,*fptr2,*fptr3;
|
|
|
|
signed char* getInput() {
|
|
return &buffer0[""" + f"{self.MemSche.layer[0].params['input_buf_add_offset']}" + """];
|
|
}
|
|
signed char* getOutput() {
|
|
return NNoutput;
|
|
}\n"""
|
|
fp = self.source_handle
|
|
fp.write(include_string)
|
|
|
|
def _parseTrainable(self):
|
|
schedule = self.MemSche
|
|
for i, op in enumerate(schedule.layer):
|
|
layer_info = op.get_layer_info()
|
|
if layer_info["op"] == "CONV_2D":
|
|
self._parseWeight(
|
|
self.parse_count,
|
|
layer_info["weight_value"].flatten(),
|
|
layer_info["weight_name"],
|
|
self._readOnly(layer_info["weight_name"]),
|
|
)
|
|
|
|
if "bias_name" in layer_info:
|
|
self._parseBias(
|
|
self.parse_count,
|
|
layer_info["bias"].flatten(),
|
|
layer_info["bias_name"],
|
|
self._readOnly(layer_info["bias_name"]),
|
|
)
|
|
else:
|
|
self._parseBias(self.parse_count, layer_info["bias"].flatten())
|
|
self._parseEffectivescales(self.parse_count, layer_info["effective_scale"].flatten())
|
|
self._parseRequantize(
|
|
self.parse_count,
|
|
layer_info["shift"].flatten(),
|
|
layer_info["multiplier"].flatten(),
|
|
)
|
|
|
|
layer_info["parsed_trainable"] = self.parse_count
|
|
self.parse_count += 1
|
|
elif layer_info["op"] == "DEPTHWISE_CONV_2D":
|
|
if layer_info["kernel_h"] > layer_info["kernel_w"]:
|
|
self._parseCWHWeight(
|
|
self.parse_count,
|
|
layer_info["weight_value"].flatten(),
|
|
layer_info["kernel_h"],
|
|
layer_info["kernel_w"],
|
|
layer_info["input_c"],
|
|
)
|
|
else:
|
|
if "weight_name" in layer_info:
|
|
self._parseCHWWeight(
|
|
self.parse_count,
|
|
layer_info["weight_value"].flatten(),
|
|
layer_info["input_c"],
|
|
)
|
|
else:
|
|
self._parseCHWWeight(
|
|
self.parse_count,
|
|
layer_info["weight_value"].flatten(),
|
|
layer_info["input_c"],
|
|
)
|
|
if "bias_name" in layer_info:
|
|
self._parseoffsetBias(
|
|
self.parse_count,
|
|
layer_info["bias"].flatten(),
|
|
layer_info["input_zero_point"] * -1,
|
|
layer_info["weight_value"].flatten(),
|
|
layer_info["input_c"],
|
|
layer_info["bias_name"],
|
|
self._readOnly(layer_info["bias_name"]),
|
|
)
|
|
else:
|
|
self._parseoffsetBias(
|
|
self.parse_count,
|
|
layer_info["bias"].flatten(),
|
|
layer_info["input_zero_point"] * -1,
|
|
layer_info["weight_value"].flatten(),
|
|
layer_info["input_c"],
|
|
)
|
|
self._parseEffectivescales(self.parse_count, layer_info["effective_scale"].flatten())
|
|
self._parseRequantize(
|
|
self.parse_count,
|
|
layer_info["shift"].flatten(),
|
|
layer_info["multiplier"].flatten(),
|
|
)
|
|
|
|
layer_info["parsed_trainable"] = self.parse_count
|
|
self.parse_count += 1
|
|
|
|
elif layer_info["op"] == "FULLY_CONNECTED":
|
|
self._parseWeight(
|
|
self.parse_count,
|
|
layer_info["weight_value"].flatten(),
|
|
layer_info["weight_name"],
|
|
self._readOnly(layer_info["weight_name"]),
|
|
)
|
|
self._parseBias(self.parse_count, layer_info["bias"].flatten())
|
|
|
|
layer_info["parsed_trainable"] = self.parse_count
|
|
self.parse_count += 1
|
|
|
|
elif layer_info["op"] == "SOFTMAX":
|
|
pass
|
|
|
|
def _parseCWHWeight(self, Lindex, weight, height, width, channel):
|
|
fp = self.header_handle
|
|
# 8bit implementation
|
|
if self.BIT == 8:
|
|
string = "const unsigned char CWHweight" + str(Lindex) + "[" + str(len(weight)) + "] = {"
|
|
fp.write(string)
|
|
for j in range(channel):
|
|
for w in range(width):
|
|
for h in range(height):
|
|
value = weight[(h * width + w) * channel + j]
|
|
if value < 0:
|
|
value += 256
|
|
fp.write(str(format(value, "#04x")) + ", ")
|
|
else:
|
|
raise NotImplementedError
|
|
|
|
fp.write("};\n")
|
|
|
|
def _parseCHWWeight(self, Lindex, weight, channel):
|
|
fp = self.header_handle
|
|
kernelsize = int(len(weight) / channel)
|
|
# 8bit implementation
|
|
if self.BIT == 8:
|
|
string = "const unsigned char CHWweight" + str(Lindex) + "[" + str(len(weight)) + "] = {"
|
|
fp.write(string)
|
|
for j in range(channel):
|
|
for i in range(kernelsize):
|
|
value = int(weight[i * channel + j])
|
|
if value < 0:
|
|
value += 256
|
|
fp.write(str(format(value, "#04x")) + ", ")
|
|
else:
|
|
raise NotImplementedError
|
|
|
|
fp.write("};\n")
|
|
|
|
def _parseEffectivescales(self, Lindex, scales):
|
|
fp = self.header_handle
|
|
string = "const float scales" + str(Lindex) + "[" + str(len(scales)) + "] = {"
|
|
fp.write(string)
|
|
for _, value in enumerate(scales):
|
|
fp.write(str(value) + ", ")
|
|
fp.write("};\n")
|
|
|
|
def _parseWeight(self, Lindex, weight, weight_name=None, is_const=True):
|
|
fp = self.header_handle
|
|
const_str = "const " if is_const else ""
|
|
string = f"{const_str}unsigned char weight" + str(Lindex) + "[" + str(len(weight)) + "] = {"
|
|
fp.write(string)
|
|
for _, value in enumerate(weight):
|
|
value = int(value)
|
|
if value < 0:
|
|
value += 256
|
|
fp.write(str(format(value, "#04x")) + ", ")
|
|
fp.write("};\n")
|
|
|
|
if weight_name is not None:
|
|
for r in self.trainSRAMTable:
|
|
if r.name == weight_name:
|
|
return
|
|
self.trainSRAMTable.append(tensorRecorder(weight_name, len(weight), "unknown"))
|
|
|
|
if weight.dtype == "int8":
|
|
string = f"{const_str}unsigned char* {weight_name}=weight" + str(Lindex) + ";\n"
|
|
else:
|
|
raise NotImplementedError
|
|
fp.write(string)
|
|
|
|
def _parseoffsetBias(self, Lindex, bias, input_offset, weight, channel, bias_name=None, is_const=True):
|
|
fp = self.header_handle
|
|
const_str = "const " if is_const else ""
|
|
string = f"{const_str}int32_t offsetBias" + str(Lindex) + "[" + str(len(bias)) + "] = {"
|
|
fp.write(string)
|
|
kernelsize = int(len(weight) / channel)
|
|
# fuse the offset into bias
|
|
for i in range(channel):
|
|
tmpW = 0
|
|
for j in range(kernelsize):
|
|
tmpW += weight[j * channel + i]
|
|
fp.write(str(self.int32_clip(bias[i] + tmpW * input_offset)) + ", ")
|
|
fp.write("};\n")
|
|
string = f"{const_str}int32_t offsetRBias" + str(Lindex) + "[" + str(len(bias)) + "] = {"
|
|
fp.write(string)
|
|
kernelsize = int(len(weight) / channel)
|
|
for i in range(channel):
|
|
tmpW = 0
|
|
for j in range(kernelsize):
|
|
tmpW += weight[j * channel + i]
|
|
fp.write(str(bias[i] + tmpW * input_offset - self.int32_clip(bias[i] + tmpW * input_offset)) + ", ")
|
|
fp.write("};\n")
|
|
|
|
def _parseBias(self, Lindex, bias, bias_name=None, is_const=True):
|
|
fp = self.header_handle
|
|
const_str = "const " if is_const else ""
|
|
string = f"{const_str}int32_t bias" + str(Lindex) + "[" + str(len(bias)) + "] = {"
|
|
fp.write(string)
|
|
for _, value in enumerate(bias):
|
|
value = int(value)
|
|
fp.write(str(value) + ", ")
|
|
fp.write("};\n")
|
|
|
|
def _parseRequantize(self, Lindex, shift, multiplier):
|
|
fp = self.header_handle
|
|
string = "const int32_t shift" + str(Lindex) + "[" + str(len(shift)) + "] = {"
|
|
fp.write(string)
|
|
for _, value in enumerate(shift):
|
|
fp.write(str(value) + ", ")
|
|
fp.write("};\n")
|
|
|
|
string = "const int32_t multiplier" + str(Lindex) + "[" + str(len(multiplier)) + "] = {"
|
|
fp.write(string)
|
|
for _, value in enumerate(multiplier):
|
|
fp.write(str(value) + ", ")
|
|
fp.write("};\n")
|
|
|
|
def int32_clip(self, a):
|
|
if a < -(2**31):
|
|
return -(2**31)
|
|
elif a > 2**31 - 1:
|
|
return 2**31 - 1
|
|
return a.astype(int)
|
|
|
|
def _closefp(self):
|
|
self.header_handle.close()
|
|
self.source_handle.close()
|
|
|
|
|
|
def _findtheinferenceOutput(layers):
|
|
for cnt, op in enumerate(layers):
|
|
if op.params["output_dtype"] != "int8":
|
|
return layers[cnt - 1].params["output_buf_add_offset"]
|
|
return layers[-1].params["output_buf_add_offset"]
|
|
|
|
|
|
class tensorRecorder:
|
|
def __init__(self, name, len, dtype):
|
|
self.name = name
|
|
self.len = len
|
|
self.dtype = dtype
|