tinyengine/code_generator/TTEParser.py
2022-11-20 16:52:33 -05:00

2149 lines
81 KiB
Python

import json
import math
import pickle
import warnings
import numpy as np
from .constant import (
FUSE_INT8CAST_STR,
FUSE_SGD_UPDATE_STR,
FUSE_TILE_STR,
FUSE_WHERE_ZEROSSTR,
FUSHION_CONFIG,
INPLACE_MUL_STR,
INPLACE_WHERE_STR,
REORDER_STR,
USE_BIT_MASK,
op_name_translation,
)
from .FusionUtil import (
_accessTrainable,
_castisFusable,
_castisFusable_for_gconv,
_fileTileRepAsWeights,
_findBinMaskPattern,
_findBinMaskPatternint8,
_findConv2dwithScaleName,
_findKeyinTensors,
_findMultiplyAbsMaxDivide,
_findPartialConv,
_findTargetWeightforGconv,
_findTransposeMultiplyAbsMaxDivide,
_findWhereTensorFrom,
_removeLayers,
_updateIdx,
_updateIdxParameter,
_updateOutputDtype,
)
from .GraphReorder import reorderGroupConv_TransponseConv, reorderGroupConv_TransponseConv_int8
from .operators import (
add,
add1d,
avgpool2d,
bias_add,
bias_update,
cast,
collapse_sum_like,
conv2d,
dense,
depthwiseConv2d,
div,
exp,
greater,
group_conv2d,
less,
log_softmax,
mat_mul,
mul,
negative,
nll_loss,
ones_like,
permute_4D_3012,
permute_groupconv_out,
relu,
reshape,
reshape_like,
strided_slice,
sub,
sum,
tile,
transpose,
transpose_conv2d,
where,
zeros_like,
)
from .operators.basic_utils import isconstanttstr
from .QAS_util import get_effective_scalename_with_input_key, get_QAS
MAX_DAGOP_OUTPUTS = 5
fused_op = {"clip", "nn.batch_flatten", "squeeze", "reshape", "reshape_like"}
class outputInfo:
def __init__(self, name, idx, len, dtype):
self.name = name
self.idx = idx
self.len = len
self.dtype = dtype
class TTEParser(object):
def __init__(self, model, data, scale_params=None):
self.layer = []
self.gout = []
self.det_outputs = None
with open(model, "r") as f:
self.model = json.load(f)
with open(data, "rb") as f:
w_params = pickle.load(f)
self.data = {}
for k in w_params:
if k[0] != "v":
self.data[f"v{k}"] = w_params[k]
else:
self.data[k] = w_params[k]
self.scale_params = scale_params
self.layer = []
self.trainedWeights = [] # key, weight_ip
self.trainedBias = [] # key, weight_ip
self.fusedInputTable = {}
self.outputTables = []
self.regularFunctionTable = {
"cast": self._convert_cast,
"cast_like": self._convert_cast,
"exp": self._convert_exp,
"transpose": self._convert_transpose,
"where": self._convert_where,
"nn.conv2d_transpose": self._convert_transpose_conv2d,
"strided_slice": self._convert_strided_slice,
"nn.bias_add": self._convert_bias_add,
"nn.relu": self._convert_relu,
"zeros_like": self._convert_zeros_like,
"zeros": self._convert_zeros,
"ones_like": self._convert_ones_like,
"ones": self._convert_ones,
"collapse_sum_like": self._convert_collapse_sum_like,
"less": self._convert_less,
"less_equal": self._convert_less,
"nn.log_softmax": self._convert_log_softmax,
"nn.cross_entropy_with_logits": self._convert_cross_entropy_with_logits,
"divide": self._convert_div,
"tile": self._convert_tile,
"negative": self._convert_negative,
"greater": self._convert_greater,
"greater_equal": self._convert_greater,
"multiply": self._convert_mul,
"nn.matmul": self._convert_matmul,
"nn.dense": self._convert_dense,
"mcumean": self._convert_average_pool,
}
self.partialChannelList = {} # "idx": first_k_channel
def loadModel(self):
last_op = None
has_zero_x = False
zero_x = None
self.fusedInputTable[self.model[0]["inputs"][0]["name"]] = self.model[0]["inputs"][0]["name"]
# reorder the group conv and transpose conv to calculate weight gradients first
if FUSHION_CONFIG[REORDER_STR]:
self.model = reorderGroupConv_TransponseConv(self.model)
self.model = reorderGroupConv_TransponseConv_int8(self.model)
for cnt, op in enumerate(self.model):
op_type = op["type"]
if op_type in {"nn.conv2d", "nn.mcuconv2d"}:
last_op = self._convert_convolution(op)
# Float bp fusion
# check if we need to have binary mask for this conv2d
# conv2d (int32) -> cast -> greater/less -> multiply -> where (which take the map)
# fusion | --------------------------------------|
if op["outputs"][0]["dtype"] == "int32":
pattern_found, op_dict = _findBinMaskPattern(self.model, op["outputs"][0]["name"])
if pattern_found:
# add second output in the output tensors
b_mask_info = op_dict["multiply"]["outputs"][0]
if USE_BIT_MASK:
last_op._add_output(
b_mask_info["name"],
"bool",
int(math.ceil(last_op.params["output_c"] / 8)),
last_op.params["output_w"],
last_op.params["output_h"],
)
else:
last_op._add_output(
b_mask_info["name"],
b_mask_info["dtype"],
last_op.params["output_c"],
last_op.params["output_w"],
last_op.params["output_h"],
)
# update params in conv2d
last_op.params["need_Bmask"] = True
last_op.params["output2_h"] = last_op.params["output_h"]
last_op.params["output2_w"] = last_op.params["output_w"]
last_op.params["output2_c"] = last_op.params["output_c"]
last_op.params["output2_dtype"] = b_mask_info["dtype"]
last_op.params["output2_idx"] = b_mask_info["name"]
# remove fused ops in the graph
_removeLayers(self.model, op_dict)
# int8 bp fusion
# check if we need to have binary mask for this conv2d
# conv2d (int32) -> greater/less -> multiply -> where (which take the map)
# fusion | ------------------------------|
if op["outputs"][0]["dtype"] == "int32":
pattern_found, op_dict = _findBinMaskPatternint8(self.model, op["outputs"][0]["name"])
if pattern_found:
# add second output in the output tensors
b_mask_info = op_dict["multiply"]["outputs"][0]
if USE_BIT_MASK:
last_op._add_output(
b_mask_info["name"],
"bool",
int(math.ceil(last_op.params["output_c"] / 8)),
last_op.params["output_w"],
last_op.params["output_h"],
)
else:
last_op._add_output(
b_mask_info["name"],
b_mask_info["dtype"],
last_op.params["output_c"],
last_op.params["output_w"],
last_op.params["output_h"],
)
# update params in conv2d
last_op.params["need_Bmask"] = True
last_op.params["output2_h"] = last_op.params["output_h"]
last_op.params["output2_w"] = last_op.params["output_w"]
last_op.params["output2_c"] = last_op.params["output_c"]
last_op.params["output2_dtype"] = b_mask_info["dtype"]
last_op.params["output2_idx"] = b_mask_info["name"]
# remove fused ops in the graph
_removeLayers(self.model, op_dict)
# we use hwc for computation, but in bp the 'c' may mean output channel for the training weights.
# in this case, we need to insert an op to permute the weight tensor before running this conv2d op
# TODO: make sure this is not longer needed after we optimize tile + group_conv2d
# if len(self.model) > 0 and "weight" not in op["inputs"][1]["name"]:
# permute_params = {
# "input_idx": op["inputs"][1]["name"],
# "input_dim": 3,
# "input_h": op["inputs"][1]["shape"][-2],
# "input_w": op["inputs"][1]["shape"][-1],
# "input_c": op["inputs"][1]["shape"][-4], # IOHW
# }
# permute_op = permute_3D_120.permute_3D_120(permute_params)
# self.layer.append(permute_op)
if has_zero_x:
last_op.set_input_zero_point(zero_x)
has_zero_x = False
zero_x = None
self.layer.append(last_op)
elif op_type == "nn.mcuadd":
# fp32
pattern_found, op_dict = _findBinMaskPattern(self.model, op["outputs"][0]["name"])
# try int8
if not pattern_found:
pattern_found, op_dict = _findBinMaskPatternint8(self.model, op["outputs"][0]["name"])
last_op = self._convert_qadd(op)
if pattern_found:
# add second output in the output tensors
b_mask_info = op_dict["multiply"]["outputs"][0]
last_op._add_output(
b_mask_info["name"],
b_mask_info["dtype"],
last_op.params["output_c"],
last_op.params["output_w"],
last_op.params["output_h"],
)
# update params in conv2d
last_op.params["need_Bmask"] = True
last_op.params["output2_h"] = last_op.params["output_h"]
last_op.params["output2_w"] = last_op.params["output_w"]
last_op.params["output2_c"] = last_op.params["output_c"]
last_op.params["output2_dtype"] = b_mask_info["dtype"]
last_op.params["output2_idx"] = b_mask_info["name"]
# remove fused ops in the graph
_removeLayers(self.model, op_dict)
self.layer.append(last_op)
elif (
op_type == "cast" and op["inputs"][0]["dtype"] == "int8" and op["outputs"][0]["dtype"] == "int32"
): # int8 gradient for bias
# skip this one
_updateIdx(self.model, self.layer, op["inputs"][0]["name"], op["outputs"][0]["name"])
elif op_type == "cast" and _castisFusable(self.model, op)[0] and FUSHION_CONFIG[FUSE_INT8CAST_STR]:
_, transpose_conv_json = _castisFusable(self.model, op)
transpose_conv_json["inputs"][1] = op["inputs"][0] # pass the int8 input to transpose conv2d
elif (
op_type == "cast" and _castisFusable_for_gconv(self.model, op)[0] and FUSHION_CONFIG[FUSE_INT8CAST_STR]
):
_, group_conv_json = _castisFusable_for_gconv(self.model, op)
group_conv_json["inputs"][0] = op["inputs"][0] # pass the int8 input to group conv2d
group_conv_json["inplace_int8_input"] = True
elif op_type == "tile" and FUSHION_CONFIG[FUSE_TILE_STR]:
# check if we need to fuse ops for tile
# ########## tile -> reshape -> conv2d (which takes it as weights)
# fusion | ------------------------|
pattern_found, op_dict = _fileTileRepAsWeights(self.model, op)
if pattern_found:
# remove reshape
_removeLayers(self.model, {"reshape": op_dict["reshape"]})
# redirect the input of tile to conv2d's weight
op_dict["conv2d"]["inputs"][1] = op_dict["tile"]["inputs"][0]
else:
raise NotImplementedError
elif op_type == "add":
if len(op["inputs"][0]["shape"]) == 4 and op["inputs"][0]["dtype"] == "int8":
if "zero_y" in op["inputs"][1]["name"]:
zero_y = int(self.data[op["inputs"][1]["name"]])
last_op.set_output_zero_point(zero_y)
continue
last_op = self._convert_add(op)
self.layer.append(last_op)
else:
last_op = self._convert_add1d(op)
self.layer.append(last_op)
elif op_type == "nn.bias_add" and op["inputs"][1]["dtype"] == "int8":
last_op.params["bias"] = self.data[op["inputs"][1]["name"]].astype(int)
# redirect the index
last_op.change_output_tensor_idx(op["outputs"][0]["name"])
# fixing HWC -> CHW alginment
elif (
op_type == "reshape"
and len(op["inputs"][0]["shape"]) == 4
and (op["inputs"][0]["shape"][2] != 1 and op["inputs"][0]["shape"][3] != 1)
and op["inputs"][0]["shape"][2] != op["outputs"][0]["shape"][2]
and op["inputs"][0]["shape"][3] != op["outputs"][0]["shape"][3]
):
last_op = self._convert_reshape(op)
self.layer.append(last_op)
# input might be parameters, we handle the inside ops since we only support for scales in `multiply`
elif op_type == "reshape" and op["inputs"][0]["var_type"] == "parameter":
# find out ops taking the output
for other_op in self.model:
for input_tensor in other_op["inputs"]:
if input_tensor["name"] == op["outputs"][0]["name"]:
if other_op["type"] in {"multiply", "divide"}:
_updateIdxParameter(self.model, op["inputs"][0]["name"], op["outputs"][0]["name"])
else:
raise NotImplementedError
# fixing CHW -> HWC alginment
elif (
op_type == "reshape_like"
and len(op["inputs"][1]["shape"]) == 4
and (op["inputs"][1]["shape"][2] != 1 and op["inputs"][1]["shape"][3] != 1)
and op["inputs"][0]["shape"][2] != op["outputs"][0]["shape"][2]
and op["inputs"][0]["shape"][3] != op["outputs"][0]["shape"][3]
):
last_op = self._convert_reshape_like(op)
self.layer.append(last_op)
# bypass this layer by fusing it into the last layer, TODO: revisit this for clipping fp results
elif op_type in fused_op and op:
# update tensors
_updateIdx(self.model, self.layer, op["inputs"][0]["name"], op["outputs"][0]["name"])
elif op_type in "nn.mcutruncate":
# update output dtype
_updateOutputDtype(self.layer, op["inputs"][0]["name"], "int8")
# update tensor idx
_updateIdx(self.model, self.layer, op["inputs"][0]["name"], op["outputs"][0]["name"])
elif op_type == "subtract":
is_fuse = False
for tensor in op["inputs"]:
if "zero_x" in tensor["name"]:
has_zero_x = True
zero_x = int(self.data[tensor["name"]])
is_fuse = True
if not is_fuse:
last_op = self._convert_sub(op)
self.layer.append(last_op)
elif op_type == "sum":
input_length = np.prod(op["inputs"][0]["shape"])
output_length = np.prod(op["outputs"][0]["shape"])
if input_length != output_length:
last_op = self._convert_sum(op)
self.layer.append(last_op)
if op["outputs"][0] and "output_info" in op["outputs"][0]["meta"]:
if op["outputs"][0]["meta"]["output_info"][0] == "v":
key = op["outputs"][0]["meta"]["output_info"]
else:
key = "v" + op["outputs"][0]["meta"]["output_info"]
if self.scale_params is not None:
e_s_name = get_effective_scalename_with_input_key(key, self.model)
QAS = get_QAS(key, self.scale_params, self.data[e_s_name])
else:
QAS = np.zeros(int(output_length)) + 0.000000001 # avoid zero division
bias_update_params = {
"input_idx": last_op.params["output_idx"],
"output_idx": key,
# tensor related
"input_size": int(output_length),
"input_buf_add": None,
"input_buf_add_offset": None,
"QAS": QAS,
"input_dtype": last_op.params["input_dtype"],
"output_dtype": "float32",
}
bias_update_op = bias_update.bias_update(bias_update_params)
self.layer.append(bias_update_op)
else: # skip this, no need to do anything on the data
input_idx = op["inputs"][0]["name"]
output_idx = op["outputs"][0]["name"]
# update the bias
if op["outputs"][0] and "output_info" in op["outputs"][0]["meta"]:
if op["outputs"][0]["meta"]["output_info"][0] == "v":
key = op["outputs"][0]["meta"]["output_info"]
else:
key = "v" + op["outputs"][0]["meta"]["output_info"]
if self.scale_params is not None:
e_s_name = get_effective_scalename_with_input_key(key, self.model)
QAS = get_QAS(key, self.scale_params, self.data[e_s_name])
else:
QAS = np.zeros(int(output_length)) + 0.000000001 # avoid zero division
bias_update_params = {
"input_idx": last_op.params["output_idx"],
"output_idx": key,
# tensor related
"input_size": int(output_length),
"input_buf_add": None,
"input_buf_add_offset": None,
"QAS": QAS,
"input_dtype": "float32",
"output_dtype": "float32",
}
bias_update_op = bias_update.bias_update(bias_update_params)
self.layer.append(bias_update_op)
# # update tensors
_updateIdx(self.model, self.layer, input_idx, output_idx)
# assume weights are updated once we obtain its gradient
elif op_type == "transpose" and FUSHION_CONFIG[FUSE_SGD_UPDATE_STR]:
fuseable, op_dict = _findTransposeMultiplyAbsMaxDivide(self.model, op)
# old IR
if op["outputs"][0]["meta"]["children"] == 0:
# update tensors
_updateIdx(self.model, self.layer, op["inputs"][0]["name"], op["outputs"][0]["name"])
elif fuseable:
# fuse "transpose" -> [max -> divide -> divide (int8 bp)]
_updateIdx(self.model, self.layer, op["inputs"][0]["name"], op_dict["cast"]["outputs"][0]["name"])
# add the output to output table
name = op_dict["cast"]["outputs"][0]["meta"]["output_info"]
idx = op_dict["cast"]["outputs"][0]["name"]
length = np.prod(op_dict["cast"]["outputs"][0]["shape"])
dtype = op_dict["cast"]["outputs"][0]["dtype"]
self.outputTables.append(outputInfo(name, idx, int(length), dtype))
_removeLayers(self.model, op_dict)
else:
raise NotImplementedError
elif (
FUSHION_CONFIG[FUSE_WHERE_ZEROSSTR]
and op_type == "where"
and (op["inputs"][2]["dtype"] in ["int8", "int32", "float32"])
and _findWhereTensorFrom(self.layer, op["inputs"][2]["name"]) is not None
and _findWhereTensorFrom(self.layer, op["inputs"][2]["name"]).params["op"]
== "ZEROS" # third input is from zeros
):
zeros_op = _findWhereTensorFrom(self.layer, op["inputs"][2]["name"])
# remove previous the zeros layer
self.layer.remove(zeros_op)
# parse the where but remove the third input and set "input3_is_zeros" in params
last_op = self._convert_where(op)
last_op.params["input3_is_zeros"] = True
last_op.input_tensors.remove(last_op.input_tensors[2])
# check where we can update input2 inplace
# if input2 is not used by following ops
# (1) make input2_inplace
# (2) update the following op's input idx (normally it is MUL)
can_be_inplace = None
# check if the last_op["input2_idx"] == some_op["ouptuts"][0]
for from_op in self.model:
if from_op["outputs"][0]["name"] == last_op.params["input2_idx"]:
if from_op["outputs"][0]["meta"]["children"] != 1:
can_be_inplace = False
else:
can_be_inplace = True
assert can_be_inplace is not None
if can_be_inplace and FUSHION_CONFIG[INPLACE_WHERE_STR]:
# find the where the output of where goes and link it to the second input of where
for following_op in self.model:
for inp in following_op["inputs"]:
if inp["name"] == op["outputs"][0]["name"]:
inp["name"] = op["inputs"][1]["name"]
# remove output tensor of where
last_op.output_tensors.remove(last_op.output_tensors[0])
# set where to inplace
last_op.params["inplace"] = True
# add the op
self.layer.append(last_op)
elif op_type == "multiply" and FUSHION_CONFIG[INPLACE_MUL_STR]:
last_op = self._convert_mul(op)
last_op_input = last_op.params["input_idx"]
last_op_output = last_op.params["output_idx"]
if last_op.params["input2_size"] > 1 and last_op.params["input_size"] > last_op.params["input2_size"]:
# good to be updated inplace
last_op.params["inplace"] = True
last_op.output_tensors.remove(last_op.output_tensors[0])
# redirect the following op's input as the inplace input
for following_op in self.model:
# if following_op["type"] in {"sum", "nn.conv2d_transpose", "nn.conv2d"}:
for inp in following_op["inputs"]:
if inp["name"] == last_op_output:
inp["name"] = last_op_input
# _updateIdx(self.model, self.layer, last_op_input, last_op_output)
# replace the following
self.layer.append(last_op)
elif op_type in self.regularFunctionTable:
last_op = self.regularFunctionTable[op_type](op)
self.layer.append(last_op)
elif op_type == "abs":
if FUSHION_CONFIG[FUSE_SGD_UPDATE_STR]:
cliping_pattern, op_dict = _findMultiplyAbsMaxDivide(self.model, abs_op=op)
if cliping_pattern:
# For transpose conv2d, this could be float32 -> int8 if it connects to abs
previous_op = _findWhereTensorFrom(self.layer, op["inputs"][0]["name"])
if (
previous_op.params["op"] == "TRANSPOSE_CONV_2D"
and previous_op.params["output_dtype"] == "float32"
):
previous_op.params["float_to_int8"] = True
previous_op.params["output_dtype"] = "int8"
previous_op.output_tensors[0].dtype = "int8"
previous_op.add_int32_buffer_tensor()
_updateIdx(
self.model, self.layer, op["inputs"][0]["name"], op_dict["cast"]["outputs"][0]["name"]
)
_removeLayers(self.model, op_dict)
else:
raise NotImplementedError
else:
cliping_pattern, op_dict = _findMultiplyAbsMaxDivide(self.model, abs_op=op)
if cliping_pattern:
_updateIdx(
self.model, self.layer, op["inputs"][0]["name"], op_dict["cast"]["outputs"][0]["name"]
)
_removeLayers(self.model, op_dict)
# Baseline for int8 fp without graph optimization
# Adding outputTable for accurate trainable measuremnet
if "output_info" in op_dict["cast"]["outputs"][0]["meta"]:
name = op_dict["cast"]["outputs"][0]["meta"]["output_info"]
idx = op_dict["cast"]["outputs"][0]["name"]
length = np.prod(op_dict["cast"]["outputs"][0]["shape"])
dtype = "int8"
self.outputTables.append(outputInfo(name, idx, int(length), dtype))
else:
raise NotImplementedError
else:
warnings.warn("%s op is not `supported" % op_type)
raise NotImplementedError
# GROUP CONV
if self.layer[-1].params["op"] == "GROUP_CONV":
# for group conv the output is actually h, w, IxO, we need to permute it to OHWI
if not FUSHION_CONFIG[FUSE_SGD_UPDATE_STR]:
params = {
# op related
"op": "PERMUTE_GROUPCONV_OUT",
"input_idx": last_op.params["output_idx"],
# tensor related
"input_dim": 3,
"input_h": last_op.params["output_h"],
"input_w": last_op.params["output_w"],
"input_c": last_op.params["output_c"],
"groups": last_op.params["groups"],
"input_dtype": "float32",
"output_dtype": "float32",
}
self.layer.append(permute_groupconv_out.permute_groupconv_out(params))
# we inplace update the weights, for output stantionary group conv
# here we need to
# (1) update the graph: remove gconv -> reshape -> sum -> transpose (done in "transpose" op)
# -> [max -> divide -> divide (int8 bp)]
# (2) remove the output tensor in gconv
# (3) replace the output address with int8 weight in SRAM
# TODO: we also need to back trace the int8 conv and make it use wegiht in both SRAM and Flash
elif len(self.layer[-1].output_tensors) > 0:
# find the target weigth
weight_idx = _findTargetWeightforGconv(self.model, self.layer[-1].output_tensors[0].graph_idx)
assert weight_idx is not None
self.layer[-1].params["inplace_weight_name"] = weight_idx
# back trace to the int8 conv
conv_partial_layer = _findPartialConv(self.layer, weight_idx)
conv_p = conv_partial_layer.params
gconv_output_len = np.prod(self.layer[-1].output_tensors[0].size)
conv_weight_size = conv_p["input_c"] * conv_p["output_c"] * conv_p["kernel_h"] * conv_p["kernel_w"]
if conv_weight_size != gconv_output_len:
# this is partial
# find the first k channel
fisrt_k_channel = int(conv_p["input_c"] * gconv_output_len / conv_weight_size)
conv_partial_layer.params["first_k_channel"] = fisrt_k_channel
self.partialChannelList[weight_idx] = fisrt_k_channel
#
if self.scale_params is not None:
key = weight_idx
e_s_name = get_effective_scalename_with_input_key(key, self.model)
QAS = get_QAS(key, self.scale_params, self.data[e_s_name])
else:
QAS = np.zeros(int(output_length)) + 0.000000001 # avoid zero division
self.layer[-1].params["QAS"] = QAS
# remove for inplace
self.layer[-1].output_tensors.remove(self.layer[-1].output_tensors[0])
# add the gradient_output to table, we will use a custom layer to perform SGD
if (
"meta" in op["outputs"][0]
and op["outputs"][0]["meta"]["children"] == 0
and "output_info" in op["outputs"][0]["meta"]
):
name = op["outputs"][0]["meta"]["output_info"]
idx = op["outputs"][0]["name"]
length = np.prod(op["outputs"][0]["shape"])
dtype = op["outputs"][0]["dtype"]
self.outputTables.append(outputInfo(name, idx, int(length), dtype))
# loop over the graph and find transpose conv that use partial weights
for layer in self.layer:
if (
layer.params["op"] == op_name_translation["nn.conv2d_transpose"]
and layer.params["weight_name"] in self.partialChannelList
):
layer.params["first_k_channel"] = self.partialChannelList[layer.params["weight_name"]]
def _convert_cast(self, op):
# shape
input_info = op["inputs"][0]
input_dtype = get_dtype(op["inputs"][0])
input_shape = input_info["shape"]
output_c = input_c = input_h = input_w = 1
if len(input_shape) == 4:
output_c, input_c, input_h, input_w = input_shape # OIHW
elif len(input_shape) == 2:
input_h, input_w = input_shape
input_c = 1
elif len(input_shape) == 1:
input_h = input_w = 1
input_c = input_shape[0]
else:
raise NotImplementedError
output_info = op["outputs"][0]
output_dtype = get_dtype(op["outputs"][0])
# dtype
input_dtype = get_dtype(input_info)
output_dtype = get_dtype(output_info)
params = {
# operator
"op": op_name_translation[op["type"]],
# tensor
"input_idx": input_info["name"],
"output_idx": output_info["name"],
"input_size": output_c * input_c * input_h * input_w,
"input_dim": 4,
"output_dim": 4,
"input_dtype": input_dtype,
"output_dtype": output_dtype,
"input_meta": op["inputs"][0]["meta"],
}
op = cast.cast(params)
return op
def _convert_relu(self, op):
# shape
input_info = op["inputs"][0]
input_dtype = get_dtype(op["inputs"][0])
input_shape = input_info["shape"]
if len(input_shape) == 4:
_, input_c, input_h, input_w = input_shape
elif len(input_shape) == 2:
input_h, input_w = input_shape
input_c = 1
else:
raise NotImplementedError
output_info = op["outputs"][0]
output_dtype = get_dtype(op["outputs"][0])
output_c = input_c
output_h = input_h
output_w = input_w
# dtype
input_dtype = get_dtype(input_info)
output_dtype = get_dtype(output_info)
params = {
# operator
"op": op_name_translation[op["type"]],
# tensor
"input_idx": input_info["name"],
"output_idx": output_info["name"],
"input_h": input_h,
"input_w": input_w,
"input_c": input_c,
"input_dim": 3,
"output_dim": 3,
"output_h": output_h,
"output_w": output_w,
"output_c": output_c,
"input_dtype": input_dtype,
"output_dtype": output_dtype,
}
op = relu.relu(params)
return op
def _convert_bias_add(self, op):
# shape
input_info = op["inputs"][0]
input_dtype = get_dtype(op["inputs"][0])
input_shape = input_info["shape"]
if len(input_shape) == 2:
input_h = 1
input_w = input_shape[0]
input_c = input_shape[1]
else:
input_c, input_h, input_w = get_chw_shape(input_shape)
output_info = op["outputs"][0]
output_shape = output_info["shape"]
output_c, output_h, output_w = get_chw_shape(output_shape)
# dtype
input_dtype = get_dtype(input_info)
output_dtype = get_dtype(output_info)
bias_name = op["inputs"][1]["name"]
if bias_name not in self.data:
bias_value = bias_name
else:
bias_value = self.data[bias_name]
params = {
# operator
"op": op_name_translation[op["type"]],
# tensor
"input_idx": input_info["name"],
"output_idx": output_info["name"],
"input_h": input_h,
"input_w": input_w,
"input_c": input_c,
"input_dim": 3,
"output_dim": 3,
"output_h": input_h,
"output_w": input_w,
"output_c": input_c,
"input_dtype": input_dtype,
"output_dtype": output_dtype,
"bias": bias_value,
"bias_name": bias_name,
}
op = bias_add.biasAdd(params)
return op
def _convert_reshape(self, op):
# shape
input_info = op["inputs"][0]
input_dtype = get_dtype(op["inputs"][0])
input_shape = input_info["shape"]
input_c, input_h, input_w = get_chw_shape(input_shape)
output_info = op["outputs"][0]
output_shape = output_info["shape"]
output_c, output_h, output_w = get_chw_shape(output_shape)
# dtype
input_dtype = get_dtype(input_info)
output_dtype = get_dtype(output_info)
params = {
# operator
"op": op_name_translation[op["type"]],
# tensor
"input_idx": input_info["name"],
"output_idx": output_info["name"],
"input_h": input_h,
"input_w": input_w,
"input_c": input_c,
"output_h": output_h,
"output_w": output_w,
"output_c": output_c,
"input_dtype": input_dtype,
"output_dtype": output_dtype,
}
op = reshape.reshape(params)
return op
def _convert_reshape_like(self, op):
# shape
input_info = op["inputs"][0]
input_dtype = get_dtype(op["inputs"][0])
input_shape = input_info["shape"]
input_c, input_h, input_w = get_chw_shape(input_shape)
input2_info = op["inputs"][1]
input2_dtype = get_dtype(op["inputs"][1])
input2_shape = input2_info["shape"]
input2_c, input2_h, input2_w = get_chw_shape(input2_shape)
output_info = op["outputs"][0]
output_shape = output_info["shape"]
output_c, output_h, output_w = get_chw_shape(output_shape)
# dtype
input_dtype = get_dtype(input_info)
output_dtype = get_dtype(output_info)
params = {
# operator
"op": op_name_translation[op["type"]],
# tensor
"input_idx": input_info["name"],
"output_idx": output_info["name"],
"input_h": input_h,
"input_w": input_w,
"input_c": input_c,
"input2_h": input2_h,
"input2_w": input2_w,
"input2_c": input2_c,
"output_h": output_h,
"output_w": output_w,
"output_c": output_c,
"input_dtype": input_dtype,
"input2_dtype": input2_dtype,
"output_dtype": output_dtype,
}
op = reshape_like.reshape_like(params)
return op
def _convert_exp(self, op):
# shape
input_info = op["inputs"][0]
input_dtype = get_dtype(op["inputs"][0])
input_shape = input_info["shape"]
input_c, input_h, input_w = get_chw_shape(input_shape)
output_info = op["outputs"][0]
output_shape = output_info["shape"]
output_c, output_h, output_w = get_chw_shape(output_shape)
# dtype
input_dtype = get_dtype(input_info)
output_dtype = get_dtype(output_info)
params = {
# operator
"op": op_name_translation[op["type"]],
# tensor
"input_idx": input_info["name"],
"output_idx": output_info["name"],
"input_size": input_h * input_w * input_c,
"input_dtype": input_dtype,
"output_dtype": output_dtype,
}
op = exp.exp(params)
return op
def _convert_transpose(self, op):
# shape
input_info = op["inputs"][0]
input_dtype = get_dtype(op["inputs"][0])
input_shape = input_info["shape"]
input_c, input_h, input_w = get_chw_shape(input_shape)
output_info = op["outputs"][0]
output_shape = output_info["shape"]
output_c, output_h, output_w = get_chw_shape(output_shape)
# dtype
input_dtype = get_dtype(input_info)
output_dtype = get_dtype(output_info)
params = {
# operator
"op": op_name_translation[op["type"]],
# tensor
"input_idx": input_info["name"],
"output_idx": output_info["name"],
"input_h": input_h,
"input_w": input_w,
"input_c": input_c,
"input_dim": 3,
"output_dim": 3,
"output_h": output_h,
"output_w": output_w,
"output_c": output_c,
"input_dtype": input_dtype,
"input_vartype": input_info["var_type"],
"output_dtype": output_dtype,
}
if "axes" in op["attrs"] and op["attrs"]["axes"] is not None:
if op["attrs"]["axes"] == [1, 0, 2, 3]:
# torch: OIHW -> IOHW -> permute 1023
# tinyengine: OHWI -> IOHW -> permute 3012
params["d1"], params["d2"], params["d3"], params["d4"] = input_shape
params["op"] = "permute_4D_3012"
op = permute_4D_3012.permute_4D_3012(params)
else:
raise NotImplementedError
else:
op = transpose.transpose(params)
return op
def _convert_strided_slice(self, op):
# shape
input_info = op["inputs"][0]
input_dtype = get_dtype(op["inputs"][0])
input_shape = input_info["shape"]
d1, d2, d3, d4 = input_shape # OHWI
output_info = op["outputs"][0]
output_shape = output_info["shape"]
o_d1, o_d2, o_d3, o_d4 = output_shape # OHWI
# dtype
input_dtype = get_dtype(input_info)
output_dtype = get_dtype(output_info)
b_list = op["attrs"]["begin"]
e_list = op["attrs"]["end"]
begin = b_list # [b_list[0], b_list[2], b_list[3], b_list[1]]
end = e_list # [e_list[0], e_list[2], e_list[3], e_list[1]]
strides = op["attrs"]["strides"]
params = {
# operator
"op": op_name_translation[op["type"]],
# tensor
"input_idx": input_info["name"],
"output_idx": output_info["name"],
"d1": d1,
"d2": d2,
"d3": d3,
"d4": d4,
"begin": begin,
"end": end,
"strides": strides,
"input_dim": 4,
"output_dim": 4,
"o_d1": o_d1,
"o_d2": o_d2,
"o_d3": o_d3,
"o_d4": o_d4,
"input_dtype": input_dtype,
"output_dtype": output_dtype,
}
op = strided_slice.stridedSlice(params)
return op
def _convert_average_pool(self, op):
# shape
input_info = op["inputs"][0]
input_dtype = get_dtype(op["inputs"][0])
input_shape = input_info["shape"]
input_c, input_h, input_w = get_chw_shape(input_shape)
output_info = op["outputs"][0]
output_dtype = get_dtype(op["outputs"][0])
output_shape = output_info["shape"]
output_c, output_h, output_w = get_chw_shape(output_shape)
# dtype
input_dtype = get_dtype(input_info)
output_dtype = get_dtype(output_info)
params = {
# operator
"op": "AVERAGE_POOL_2D",
# pool parameters
"filter_h": input_h,
"filter_w": input_w,
"stride_h": 1,
"stride_w": 1,
"pad_h": 0,
"pad_w": 0,
# tensor
"input_idx": input_info["name"],
"output_idx": output_info["name"],
"input_h": input_h,
"input_w": input_w,
"input_c": input_c,
"input_dim": 3,
"output_dim": 3,
"output_h": output_h,
"output_w": output_w,
"output_c": output_c,
"input_dtype": input_dtype,
"output_dtype": output_dtype,
}
op = avgpool2d.AvgPool2d(params)
return op
def _convert_zeros(self, op):
# shape
output_info = op["outputs"][0]
output_dtype = get_dtype(op["outputs"][0])
output_shape = output_info["shape"]
output_c, output_h, output_w = get_chw_shape(output_shape)
# dtype
output_dtype = get_dtype(output_info)
params = {
# operator
"op": op_name_translation[op["type"]],
# tensor
"output_idx": output_info["name"],
"input_dim": 3,
"output_dim": 3,
"output_h": output_h,
"output_w": output_w,
"output_c": output_c,
"output_dtype": output_dtype,
}
op = zeros_like.zeros_like(params)
return op
def _convert_ones(self, op):
# shape
output_info = op["outputs"][0]
output_dtype = get_dtype(op["outputs"][0])
output_shape = output_info["shape"]
output_c, output_h, output_w = get_chw_shape(output_shape)
# dtype
output_dtype = get_dtype(output_info)
params = {
# operator
"op": op_name_translation[op["type"]],
# tensor
"output_idx": output_info["name"],
"input_dim": 3,
"output_dim": 3,
"output_h": output_h,
"output_w": output_w,
"output_c": output_c,
"output_dtype": output_dtype,
}
op = ones_like.ones_like(params)
return op
def _convert_zeros_like(self, op):
# shape
input_info = op["inputs"][0]
input_dtype = get_dtype(op["inputs"][0])
input_shape = input_info["shape"]
input_c, input_h, input_w = get_chw_shape(input_shape)
output_info = op["outputs"][0]
output_dtype = get_dtype(op["outputs"][0])
output_c = input_c
output_h = input_h
output_w = input_w
# dtype
input_dtype = get_dtype(input_info)
output_dtype = get_dtype(output_info)
params = {
# operator
"op": op_name_translation[op["type"]],
# tensor
"input_idx": input_info["name"],
"output_idx": output_info["name"],
"input_h": input_h,
"input_w": input_w,
"input_c": input_c,
"input_dim": 3,
"output_dim": 3,
"output_h": output_h,
"output_w": output_w,
"output_c": output_c,
"input_dtype": input_dtype,
"output_dtype": output_dtype,
}
op = zeros_like.zeros_like(params)
return op
def _convert_ones_like(self, op):
# shape
input_info = op["inputs"][0]
input_dtype = get_dtype(op["inputs"][0])
input_shape = input_info["shape"]
input_c, input_h, input_w = get_chw_shape(input_shape)
output_info = op["outputs"][0]
output_dtype = get_dtype(op["outputs"][0])
output_shape = output_info["shape"]
output_c, output_h, output_w = get_chw_shape(output_shape)
# dtype
input_dtype = get_dtype(input_info)
output_dtype = get_dtype(output_info)
params = {
# operator
"op": op_name_translation[op["type"]],
# tensor
"input_idx": input_info["name"],
"output_idx": output_info["name"],
"input_h": input_h,
"input_w": input_w,
"input_c": input_c,
"input_dim": 3,
"output_dim": 3,
"output_h": output_h,
"output_w": output_w,
"output_c": output_c,
"input_dtype": input_dtype,
"output_dtype": output_dtype,
}
op = ones_like.ones_like(params)
return op
def _convert_collapse_sum_like(self, op):
# shape
input_info = op["inputs"][0]
input_dtype = get_dtype(op["inputs"][0])
input_shape = input_info["shape"]
input_c, input_h, input_w = get_chw_shape(input_shape)
output_info = op["outputs"][0]
output_dtype = get_dtype(op["outputs"][0])
output_shape = output_info["shape"]
output_c, output_h, output_w = get_chw_shape(output_shape)
# dtype
input_dtype = get_dtype(input_info)
output_dtype = get_dtype(output_info)
params = {
# operator
"op": op_name_translation[op["type"]],
# tensor
"input_idx": input_info["name"],
"output_idx": output_info["name"],
"input_h": input_h,
"input_w": input_w,
"input_c": input_c,
"input_dim": 3,
"output_dim": 3,
"output_h": output_h,
"output_w": output_w,
"output_c": output_c,
"input_dtype": input_dtype,
"output_dtype": output_dtype,
}
op = collapse_sum_like.collapseSumLike(params)
return op
def _convert_log_softmax(self, op):
# shape
input_info = op["inputs"][0]
input_dtype = get_dtype(op["inputs"][0])
input_shape = input_info["shape"]
input_h, input_w, input_c = get_chw_shape(input_shape)
output_info = op["outputs"][0]
output_dtype = get_dtype(op["outputs"][0])
output_shape = output_info["shape"]
output_h, output_w, output_c = get_chw_shape(output_shape)
# dtype
input_dtype = get_dtype(input_info)
output_dtype = get_dtype(output_info)
params = {
# operator
"op": op_name_translation[op["type"]],
# tensor
"input_idx": input_info["name"],
"output_idx": output_info["name"],
"input_h": input_h,
"input_w": input_w,
"input_c": input_c,
"input_dim": 3,
"output_dim": 3,
"output_h": output_h,
"output_w": output_w,
"output_c": output_c,
"input_dtype": input_dtype,
"output_dtype": output_dtype,
}
op = log_softmax.logSoftMax(params)
return op
def _convert_cross_entropy_with_logits(self, op):
# shape
input_info = op["inputs"][0]
input_dtype = get_dtype(op["inputs"][0])
input_shape = input_info["shape"]
input_c, input_h, input_w = get_chw_shape(input_shape)
input2_info = op["inputs"][1]
output_info = op["outputs"][0]
output_dtype = get_dtype(op["outputs"][0])
output_shape = output_info["shape"]
output_c, output_h, output_w = get_chw_shape(output_shape)
# dtype
input2_dtype = get_dtype(input_info)
output_dtype = get_dtype(output_info)
params = {
# operator
"op": op_name_translation[op["type"]],
# tensor
"input_idx": input_info["name"],
"input2_idx": input2_info["name"],
"output_idx": output_info["name"],
"input_h": input_h,
"input_w": input_w,
"input_c": input_c,
"input_dim": 3,
"output_dim": 3,
"output_h": output_h,
"output_w": output_w,
"output_c": output_c,
"input_dtype": input_dtype,
"input2_dtype": input2_dtype,
"output_dtype": output_dtype,
}
op = nll_loss.nllLoss(params)
return op
def _convert_qadd(self, op):
def _getSigShift(s):
sig, shi = math.frexp(s)
sig = int(round(sig * 2**31))
if sig == 2**31:
sig /= 2
shi += 1
if shi < -31:
shi = 0
sig = 0
return sig, shi
def _getADDMultiplierShift(input_scale, input2_scale, output_scale):
left_shift = 20
twice_max_input_scale = 2 * np.double(max(input_scale, input2_scale))
real_input1_multiplier = np.double(input_scale / twice_max_input_scale)
real_input2_multiplier = np.double(input2_scale / twice_max_input_scale)
real_output_multiplier = np.double(twice_max_input_scale / ((1 << left_shift) * output_scale))
input_multiplier, input_shift = _getSigShift(real_input1_multiplier)
input2_multiplier, input2_shift = _getSigShift(real_input2_multiplier)
output_multiplier, output_shift = _getSigShift(real_output_multiplier)
return (
left_shift,
input_multiplier,
input_shift,
input2_multiplier,
input2_shift,
output_multiplier,
output_shift,
)
# shape
input0_info = op["inputs"][0]
input0_shape = input0_info["shape"]
input0_c, input0_h, input0_w = get_chw_shape(input0_shape)
input2_info = op["inputs"][1]
output_info = op["outputs"][0]
# dtype
input_dtype = get_dtype(input0_info)
input2_dtype = get_dtype(input2_info)
output_dtype = get_dtype(output_info)
assert len(op["inputs"]) == 8, "Unexpected number of inputs"
input_zero_point = self.data[op["inputs"][2]["name"]][0]
output_zero_point = self.data[op["inputs"][6]["name"]][0]
input2_zero_point = self.data[op["inputs"][3]["name"]][0]
input_scale = self.data[op["inputs"][4]["name"]][0]
input2_scale = self.data[op["inputs"][5]["name"]][0]
output_scale = self.data[op["inputs"][7]["name"]][0]
(
left_shift,
input_multiplier,
input_shift,
input2_multiplier,
input2_shift,
output_multiplier,
output_shift,
) = _getADDMultiplierShift(input_scale, input2_scale, output_scale)
params = {
# operator
"op": op_name_translation[op["type"]],
# tensor
"input_idx": input0_info["name"],
"input2_idx": input2_info["name"],
"output_idx": output_info["name"],
"input_h": input0_h,
"input_w": input0_w,
"input_c": input0_c,
"input2_h": input0_h,
"input2_w": input0_w,
"input2_c": input0_c,
"input_dim": 3,
"input2_dim": 3,
"output_dim": 3,
"output_h": input0_h,
"output_w": input0_w,
"output_c": input0_c,
"input_dtype": input_dtype,
"input2_dtype": input2_dtype,
"output_dtype": output_dtype,
# trainable parameters
"input_zero_point": input_zero_point,
"input2_zero_point": input2_zero_point,
"output_zero_point": output_zero_point,
"input_scale": input_scale,
"input2_scale": input2_scale,
"output_scale": output_scale,
# quantized infernece
"left_shift": left_shift,
"input_multiplier": input_multiplier,
"input2_multiplier": input2_multiplier,
"input_shift": input_shift,
"input2_shift": input2_shift,
"output_multiplier": output_multiplier,
"output_shift": output_shift,
}
op = add.Add(params)
return op
def _convert_add1d(self, op):
# shape
input0_info = op["inputs"][0]
input0_shape = input0_info["shape"]
input0_c, input0_h, input0_w = get_chw_shape(input0_shape)
input2_info = op["inputs"][1]
output_info = op["outputs"][0]
# dtype
input_dtype = get_dtype(input0_info)
input2_dtype = get_dtype(input2_info)
output_dtype = get_dtype(output_info)
params = {
# operator
"op": op_name_translation[op["type"]],
# tensor
"input_idx": input0_info["name"],
"input2_idx": input2_info["name"],
"output_idx": output_info["name"],
"input_size": input0_h * input0_w * input0_c,
"input_dtype": input_dtype,
"input2_dtype": input2_dtype,
"output_dtype": output_dtype,
}
op = add1d.add1d(params)
return op
def _convert_sub(self, op):
# shape
input0_info = op["inputs"][0]
input0_shape = input0_info["shape"]
input0_c, input0_h, input0_w = get_chw_shape(input0_shape)
input2_info = op["inputs"][1]
output_info = op["outputs"][0]
# dtype
input_dtype = get_dtype(input0_info)
input2_dtype = get_dtype(input2_info)
output_dtype = get_dtype(output_info)
params = {
# operator
"op": op_name_translation[op["type"]],
# tensor
"input_idx": input0_info["name"],
"input2_idx": input2_info["name"],
"output_idx": output_info["name"],
"input_size": input0_h * input0_w * input0_c,
"input_dtype": input_dtype,
"input2_dtype": input2_dtype,
"output_dtype": output_dtype,
}
op = sub.sub(params)
return op
def _convert_div(self, op):
# shape
input0_info = op["inputs"][0]
input0_shape = input0_info["shape"]
input0_c, input0_h, input0_w = get_chw_shape(input0_shape)
input2_info = op["inputs"][1]
output_info = op["outputs"][0]
# dtype
input_dtype = get_dtype(input0_info)
input2_dtype = get_dtype(input2_info)
output_dtype = get_dtype(output_info)
if "scale" in input2_info["name"]:
scale_from_add = self.data[input2_info["name"]][0]
else:
scale_from_add = None
input2 = input2_info["meta"]["data"] if isconstanttstr(input2_info["name"]) else None
params = {
# operator
"op": op_name_translation[op["type"]],
# tensor
"input_idx": input0_info["name"],
"input2_idx": input2_info["name"],
"output_idx": output_info["name"],
"input_size": input0_h * input0_w * input0_c,
"input_dtype": input_dtype,
"input2": input2,
"input2_dtype": input2_dtype,
"output_dtype": output_dtype,
# input of scale from some conv2d
"scale_from_add": scale_from_add,
}
op = div.div(params)
return op
def _convert_mul(self, op):
# shape
input0_info = op["inputs"][0]
input0_shape = input0_info["shape"]
input0_c, input0_h, input0_w = get_chw_shape(input0_shape)
input2_info = op["inputs"][1]
input2_shape = input2_info["shape"]
input2_c, input2_h, input2_w = get_chw_shape(input2_shape)
output_info = op["outputs"][0]
output_c, output_h, output_w = get_chw_shape(output_info["shape"])
# dtype
input_dtype = get_dtype(input0_info)
input2_dtype = get_dtype(input2_info)
output_dtype = get_dtype(output_info)
scale_conv_2d_op = None
scale_from_add = None
if "constant" in input2_info["name"]:
constant = input2_info["meta"]["data"]
else:
constant = None
if "scale" in input2_info["name"]:
# should be a scaler then
if "qadd" in input2_info["name"]:
scale_from_add = self.data[input2_info["name"]][0]
constant = self.data[input2_info["name"]][0]
else:
scale_conv_2d_op = _findConv2dwithScaleName(self.model, self.layer, input2_info["name"])
params = {
# operator
"op": op_name_translation[op["type"]],
# tensor
"input_idx": input0_info["name"],
"output_size": output_c * output_h * output_w,
"input2_idx": input2_info["name"],
"output_idx": output_info["name"],
"input_size": input0_h * input0_w * input0_c,
"input2_size": input2_h * input2_w * input2_c,
"input_dtype": input_dtype,
"input2_dtype": input2_dtype,
"output_dtype": output_dtype,
# input of scale from some conv2d
"scale_conv_2d_op": scale_conv_2d_op,
"scale_from_add": scale_from_add,
"constant": constant,
}
op = mul.mul(params)
return op
def _convert_less(self, op):
# shape
input0_info = op["inputs"][0]
input0_shape = input0_info["shape"]
input0_c, input0_h, input0_w = get_chw_shape(input0_shape)
input2_info = op["inputs"][1]
output_info = op["outputs"][0]
output_dtype = get_dtype(op["outputs"][0])
output_shape = output_info["shape"]
output_c, output_h, output_w = get_chw_shape(output_shape)
# dtype
input_dtype = get_dtype(input0_info)
input2_dtype = get_dtype(input2_info)
output_dtype = get_dtype(output_info)
params = {
# operator
"op": op_name_translation[op["type"]],
# tensor
"input_idx": input0_info["name"],
"input2_idx": input2_info["name"],
"output_idx": output_info["name"],
"input_size": input0_c * input0_h * input0_w,
"input_dtype": input_dtype,
"input2_dtype": input2_dtype,
"output_dtype": output_dtype,
}
op = less.less(params)
return op
def _convert_greater(self, op):
# shape
input0_info = op["inputs"][0]
input0_shape = input0_info["shape"]
input0_c, input0_h, input0_w = get_chw_shape(input0_shape)
input2_info = op["inputs"][1]
output_info = op["outputs"][0]
output_dtype = get_dtype(op["outputs"][0])
output_shape = output_info["shape"]
output_c, output_h, output_w = get_chw_shape(output_shape)
# dtype
input_dtype = get_dtype(input0_info)
input2_dtype = get_dtype(input2_info)
output_dtype = get_dtype(output_info)
params = {
# operator
"op": op_name_translation[op["type"]],
# tensor
"input_idx": input0_info["name"],
"input2_idx": input2_info["name"],
"output_idx": output_info["name"],
"input_size": input0_c * input0_h * input0_w,
"input_dtype": input_dtype,
"input2_dtype": input2_dtype,
"output_dtype": output_dtype,
}
op = greater.greater(params)
return op
def _convert_where(self, op):
# shape
input0_info = op["inputs"][0]
input0_shape = input0_info["shape"]
input0_c, input0_h, input0_w = get_chw_shape(input0_shape)
input2_info = op["inputs"][1]
input3_info = op["inputs"][2]
output_info = op["outputs"][0]
output_dtype = get_dtype(op["outputs"][0])
output_shape = output_info["shape"]
output_c, output_h, output_w = get_chw_shape(output_shape)
# dtype
input_dtype = get_dtype(input0_info)
input2_dtype = get_dtype(input2_info)
input3_dtype = get_dtype(input3_info)
output_dtype = get_dtype(output_info)
params = {
# operator
"op": op_name_translation[op["type"]],
# tensor
"input_idx": input0_info["name"],
"input2_idx": input2_info["name"],
"input3_idx": input3_info["name"],
"output_idx": output_info["name"],
"input_size": input0_c * input0_h * input0_w,
"output_size": output_c * output_h * output_w,
"input_dtype": input_dtype,
"input2_dtype": input2_dtype,
"input3_dtype": input3_dtype,
"output_dtype": output_dtype,
}
op = where.where(params)
return op
def _convert_negative(self, op):
# shape
input0_info = op["inputs"][0]
input0_shape = input0_info["shape"]
input_c, input_h, input_w = get_chw_shape(input0_shape)
output_info = op["outputs"][0]
# dtype
input_dtype = get_dtype(input0_info)
output_dtype = get_dtype(output_info)
params = {
# operator
"op": op_name_translation[op["type"]],
# tensor
"input_idx": input0_info["name"],
"output_idx": output_info["name"],
"input_size": input_c * input_h * input_w,
"input_dtype": input_dtype,
"output_dtype": output_dtype,
}
op = negative.negative(params)
return op
def _convert_sum(self, op):
# shape
input_info = op["inputs"][0]
input_shape = input_info["shape"]
if len(input_shape) == 4:
d1, d2, d3, d4 = input_shape
else:
d2, d3, d4 = get_chw_shape(input_shape)
d1 = 1
output_info = op["outputs"][0]
output_shape = output_info["shape"]
if len(output_shape) == 4:
od1, od2, od3, od4 = output_shape
else:
od2, od3, od4 = get_chw_shape(output_shape)
od1 = 1
# dtype
input_dtype = get_dtype(input_info)
output_dtype = get_dtype(output_info)
# TODO: update here after bugs in TTE IR is fixed (null)
if op["attrs"]["axis"] is None:
op["attrs"]["exclude"] = 1
axis = 0
elif op["attrs"]["axis"][0] < 0:
if op["attrs"]["axis"][0] == -1:
axis = 3
elif op["attrs"]["axis"][0] == -2:
axis = 2
elif op["attrs"]["axis"][0] == -3:
axis = 1
elif op["attrs"]["axis"][0] == -4:
axis = 0
else:
axis = op["attrs"]["axis"][0]
params = {
# operator
"op": op_name_translation[op["type"]],
# tensor
"input_idx": input_info["name"],
"output_idx": output_info["name"],
"axis": axis,
"d1": d1,
"d2": d2,
"d3": d3,
"d4": d4,
"input_dim": 4,
"output_dim": 3,
"od1": od1,
"od2": od2,
"od3": od3,
"od4": od4,
"input_dtype": input_dtype,
"output_dtype": output_dtype,
"exclude": bool(op["attrs"]["exclude"]),
}
op = sum.sum(params)
return op
def _convert_tile(self, op):
# shape
input_info = op["inputs"][0]
input_shape = input_info["shape"]
input_c, input_h, input_w = get_chw_shape(input_shape)
output_info = op["outputs"][0]
output_shape = output_info["shape"]
output_c, output_h, output_w = get_chw_shape(output_shape)
# dtype
input_dtype = get_dtype(input_info)
output_dtype = get_dtype(output_info)
reps_size = len(op["attrs"]["reps"])
reps = op["attrs"]["reps"]
params = {
# operator
"op": op_name_translation[op["type"]],
# tensor
"input_idx": input_info["name"],
"output_idx": output_info["name"],
"reps_size": reps_size,
"reps": reps,
"input_h": input_h,
"input_w": input_w,
"input_c": input_c,
"input_dim": 3,
"output_dim": 3,
"output_h": output_h,
"output_w": output_w,
"output_c": output_c,
"input_dtype": input_dtype,
"output_dtype": output_dtype,
}
op = tile.tile(params)
return op
def _convert_convolution(self, op):
weight_info = op["inputs"][1]
if op["attrs"]["kernel_size"] is None:
kh = op["inputs"][1]["shape"][-1]
kw = op["inputs"][1]["shape"][-2]
else:
kh, kw = op["attrs"]["kernel_size"]
padding = op["attrs"]["padding"][0]
groups = op["attrs"]["groups"]
sw, sh = op["attrs"]["strides"]
# shape
input_info = op["inputs"][0]
input_shape = input_info["shape"]
_, input_c, input_h, input_w = input_shape
output_info = op["outputs"][0]
output_shape = output_info["shape"]
_, output_c, output_h, output_w = output_shape
# dtype
input_dtype = get_dtype(input_info)
output_dtype = get_dtype(output_info)
weight_name = weight_info["name"]
if weight_name not in self.data:
weight_value = weight_name
else:
weight_value = self.data[weight_name].transpose(0, 2, 3, 1) # OIHW -> OHWI
if (
groups == input_c == output_c
): # For depthwise conv we assume the weights to be in HWC in code generation,
# we revert the tensor to match the assumption
weight_value = weight_value.transpose(3, 1, 2, 0) # OHWI -> IHWO
has_bias = False
for inp in op["inputs"]:
if "weight" in inp["name"]:
_accessTrainable(self.trainedWeights, inp["name"])
if "bias" in inp["name"]:
bias_name = inp["name"]
bias = self.data[bias_name]
has_bias = True
_accessTrainable(self.trainedBias, inp["name"])
# for floating point implementation, we allow no bias data since the data might be in bias_add ops
if op["type"] not in {"nn.mcuconv2d"} and not has_bias:
bias = np.zeros(output_shape[1])
bias_name = None
has_bias = True
assert has_bias, "no bias data found!"
# normal conv2d, depthwise, group conv
if groups == 1:
op_name_idx = 0
elif groups == input_c == output_c and input_dtype == "int8" and output_dtype == "int32":
op_name_idx = 1
else:
op_name_idx = 2
if input_dtype == "int8" and output_dtype == "int32" and op_name_idx != 2 and op["type"] != "nn.conv2d":
input_zero_point = self.data[_findKeyinTensors(op["inputs"], "zero_x")["name"]][0]
output_zero_point = self.data[_findKeyinTensors(op["inputs"], "zero_y")["name"]][0]
# TODO: if the IR saperate these
input_scale = np.ones(output_shape[1])
output_scale = np.ones(output_shape[1])
weight_scale = np.ones(output_shape[1])
# effective scales
effective_scale = self.data[_findKeyinTensors(op["inputs"], "scale")["name"]]
else:
input_zero_point = 0
output_zero_point = 0
input_scale = np.ones(output_shape[1])
output_scale = np.ones(output_shape[1])
weight_scale = np.ones(output_shape[1])
effective_scale = np.ones(output_shape[1])
multiplier, shift = _getMultiplierShift(effective_scale)
params = {
"op": op_name_translation[op["type"]][op_name_idx],
# SE blocks
"is_SEBlock": "se" in op and op["se"],
"kernel_h": kh,
"kernel_w": kw,
"padding": padding,
"padding_h": op["attrs"]["padding"][0],
"padding_w": op["attrs"]["padding"][1],
"dilation_h": op["attrs"]["dilation"][0],
"dilation_w": op["attrs"]["dilation"][1],
"groups": groups,
"stride_h": sh,
"stride_w": sw,
"input_idx": input_info["name"],
"output_idx": output_info["name"],
"input_dim": 3,
"input_c": input_c,
"input_h": input_h,
"input_w": input_w,
"output_dim": 3,
"output_c": output_c,
"output_h": output_h,
"output_w": output_w,
"input_dtype": input_dtype,
"output_dtype": output_dtype,
"weight_value": weight_value,
"weight_name": weight_name,
"bias": bias.astype(int),
"bias_name": bias_name,
"effective_scale": effective_scale,
"input_zero_point": input_zero_point,
"output_zero_point": output_zero_point,
"multiplier": multiplier.astype(int),
"shift": shift.astype(int),
"input_scale": input_scale,
"output_scale": output_scale,
"weight_scale": weight_scale,
}
# assert weight_value != "out_nn.conv2d_transpose@330"
if groups == 1:
op = conv2d.Conv2d(params)
elif groups == input_c == output_c and input_dtype == "int8" and output_dtype == "int32":
op = depthwiseConv2d.DepthwiseConv2d(params)
else:
if "inplace_int8_input" in op and op["inplace_int8_input"]:
params["inplace_int8_input"] = True
params["float32_input2"] = True if op["inputs"][1]["dtype"] == "float32" else False
op = group_conv2d.groupConv2d(params)
if isinstance(weight_value, str):
w_c, w_w, w_h = (
weight_info["shape"][1] * weight_info["shape"][2],
weight_info["shape"][2],
weight_info["shape"][3],
)
op._add_input(weight_name, input_dtype, w_h, w_w, w_c)
# for int8 group conv
if op_name_idx == 2 and "float32_input2" in params and not params["float32_input2"]:
# we will use int8 for input and output, dynamic quantization will be inside ops
for input_tensor in op.input_tensors:
input_tensor.dtype = "int8"
op.output_tensors[0].dtype = "int8"
op.params["intput_dtype"] = "int8"
op.params["intput2_dtype"] = "int8"
op.params["output_dtype"] = "int8"
# we need int32 intermediate buffer for normalization
op.add_int32_buffer_tensor()
return op
def _convert_transpose_conv2d(self, op):
assert op["attrs"]["kernel_size"] is not None
kh, kw = op["attrs"]["kernel_size"]
padding = op["attrs"]["padding"][0]
groups = op["attrs"]["groups"]
sw, sh = op["attrs"]["strides"]
# shape
input_info = op["inputs"][0]
input2_info = op["inputs"][1]
input_shape = input_info["shape"]
_, input_c, input_h, input_w = input_shape
output_info = op["outputs"][0]
output_shape = output_info["shape"]
_, output_c, output_h, output_w = output_shape
# dtype
input_dtype = get_dtype(input_info)
input2_dtype = get_dtype(input2_info)
output_dtype = get_dtype(output_info)
weight_info = op["inputs"][-1]
weight_name = weight_info["name"]
for inp in op["inputs"]:
if "weight" in inp["name"]:
_accessTrainable(self.trainedWeights, inp["name"])
if "bias" in inp["name"]:
_accessTrainable(self.trainedBias, inp["name"])
if weight_name not in self.data:
weight_value = weight_name
else:
weight_value = self.data[weight_name]
# currently ignore the bias
bias = np.zeros(input_shape[1])
params = {
"op": op_name_translation[op["type"]],
# SE blocks
"is_SEBlock": "se" in op and op["se"],
"kernel_h": kh,
"kernel_w": kw,
"padding": padding,
"padding_h": op["attrs"]["padding"][0],
"padding_w": op["attrs"]["padding"][1],
"kernel_layout": op["attrs"]["kernel_layout"],
"group": groups,
"stride_h": sh,
"stride_w": sw,
"input_idx": input_info["name"],
"output_idx": output_info["name"],
"input_dim": 3,
"input_c": input_shape[1],
"input_h": input_shape[2],
"input_w": input_shape[3],
"output_dim": 3,
"output_c": output_shape[1],
"output_h": output_shape[2],
"output_w": output_shape[3],
"input_dtype": input_dtype,
"input2_dtype": input2_dtype,
"output_dtype": output_dtype,
"weight_value": weight_value,
"weight_name": weight_name,
"bias": bias.astype(int),
}
op_instance = transpose_conv2d.transposeConv2d(params)
if isinstance(weight_value, str):
w_c, w_w, w_h = (
weight_info["shape"][0] * weight_info["shape"][1],
weight_info["shape"][2],
weight_info["shape"][3],
)
op_instance._add_input(weight_name, input_dtype, w_h, w_w, w_c)
# for full int8 tranpose conv
# 1. Cast dtype as we do requantize inside ops
# 2. we need int32 intermediate buffer for normalization and
# 3. update the output type as int8 as we fuze nomalization inside a single op
if params["input2_dtype"] == params["input_dtype"] == "int8" and params["output_dtype"] == "int32":
# we will use int8 for input and output, dynamic quantization will be inside ops
for input_tensor in op_instance.input_tensors:
input_tensor.dtype = "int8"
op_instance.output_tensors[0].dtype = "int8"
op_instance.params["intput_dtype"] = "int8"
op_instance.params["intput2_dtype"] = "int8"
op_instance.params["output_dtype"] = "int8"
op_instance.add_int32_buffer_tensor()
return op_instance
def _convert_matmul(self, op):
# shape
input0_info = op["inputs"][0]
input0_shape = input0_info["shape"]
input0_c, input0_h, input0_w = get_chw_shape(input0_shape)
input2_info = op["inputs"][1]
input2_shape = input2_info["shape"]
output_info = op["outputs"][0]
input2_c, input2_h, input2_w = get_chw_shape(input2_shape)
# dtype
input_dtype = get_dtype(input0_info)
input2_dtype = get_dtype(input2_info)
output_dtype = get_dtype(output_info)
weight_info = op["inputs"][-1]
weight_name = weight_info["name"]
for inp in op["inputs"]:
if "weight" in inp["name"]:
_accessTrainable(self.trainedWeights, inp["name"])
if "bias" in inp["name"]:
_accessTrainable(self.trainedBias, inp["name"])
if weight_name not in self.data:
weight_value = weight_name
else:
weight_value = self.data[weight_name]
assert input0_w == input2_h
params = {
# operator
"op": op_name_translation[op["type"]],
# tensor
"input_idx": input0_info["name"],
"input2_idx": input2_info["name"],
"output_idx": output_info["name"],
"matA_row": input0_h,
"matA_col": input0_w,
"matB_row": input2_h,
"matB_col": input2_w,
"input_dtype": input_dtype,
"input2_dtype": input2_dtype,
"output_dtype": output_dtype,
"weight_value": weight_value,
"weight_name": weight_name,
}
op = mat_mul.mat_mul(params)
if isinstance(weight_value, str):
w_c, w_w, w_h = get_chw_shape(weight_info["shape"])
op._add_input(weight_name, input_dtype, w_h, w_w, w_c)
return op
def _convert_dense(self, op):
# shape
input0_info = op["inputs"][0]
input0_shape = input0_info["shape"]
input0_c, input0_h, input0_w = get_chw_shape(input0_shape)
input2_info = op["inputs"][1]
input2_shape = input2_info["shape"]
output_info = op["outputs"][0]
input2_c, input2_h, input2_w = get_chw_shape(input2_shape)
# dtype
input_dtype = get_dtype(input0_info)
input2_dtype = get_dtype(input2_info)
output_dtype = get_dtype(output_info)
weight_info = op["inputs"][-1]
weight_name = weight_info["name"]
for inp in op["inputs"]:
if "weight" in inp["name"]:
_accessTrainable(self.trainedWeights, inp["name"])
if "bias" in inp["name"]:
_accessTrainable(self.trainedBias, inp["name"])
if weight_name not in self.data:
weight_value = weight_name
else:
weight_value = self.data[weight_name]
assert input0_w == input2_w
params = {
# operator
"op": op_name_translation[op["type"]],
# tensor
"input_idx": input0_info["name"],
"input2_idx": input2_info["name"],
"output_idx": output_info["name"],
"matA_row": input0_h,
"matA_col": input0_w,
"matB_row": input2_h,
"matB_col": input2_w,
"input_dtype": input_dtype,
"input2_dtype": input2_dtype,
"output_dtype": output_dtype,
"weight_value": weight_value,
"weight_name": weight_name,
}
op = dense.dense(params)
if isinstance(weight_value, str):
w_c, w_w, w_h = get_chw_shape(weight_info["shape"])
op._add_input(weight_name, input_dtype, w_h, w_w, w_c)
return op
def get_dtype(tensor_info):
if "dtype" in tensor_info:
return tensor_info["dtype"]
else:
return "int8"
def get_chw_shape(shape):
input_c = input_h = input_w = 0
batch = 1
if len(shape) == 5:
_, batch, input_c, input_h, input_w = shape
elif len(shape) == 4:
batch, input_c, input_h, input_w = shape
elif len(shape) == 2:
input_h, input_w = shape
input_c = 1
elif len(shape) == 1:
input_h = input_w = 1
input_c = shape[0]
else:
raise NotImplementedError
if batch != 1:
# raise ValueError("batch size should be 1")
warnings.warn("batch size should be 1")
return input_c, input_h, input_w
def _getMultiplierShift(effective_scale):
significand = np.zeros(len(effective_scale), dtype="int32")
shift = np.zeros(len(effective_scale), dtype="int32")
for i, s in enumerate(effective_scale):
if s == 0:
significand[i] = 0
shift[i] = 0
else:
sig, shi = math.frexp(s)
sig = int(round(sig * 2**31))
if sig == 2**31:
sig /= 2
shi += 1
if shi < -31:
shi = 0
sig = 0
significand[i] = sig
shift[i] = shi
return significand, shift