mirror of
https://github.com/mit-han-lab/tinyengine.git
synced 2025-05-09 17:11:01 +08:00
2149 lines
81 KiB
Python
2149 lines
81 KiB
Python
import json
|
|
import math
|
|
import pickle
|
|
import warnings
|
|
|
|
import numpy as np
|
|
|
|
from .constant import (
|
|
FUSE_INT8CAST_STR,
|
|
FUSE_SGD_UPDATE_STR,
|
|
FUSE_TILE_STR,
|
|
FUSE_WHERE_ZEROSSTR,
|
|
FUSHION_CONFIG,
|
|
INPLACE_MUL_STR,
|
|
INPLACE_WHERE_STR,
|
|
REORDER_STR,
|
|
USE_BIT_MASK,
|
|
op_name_translation,
|
|
)
|
|
from .FusionUtil import (
|
|
_accessTrainable,
|
|
_castisFusable,
|
|
_castisFusable_for_gconv,
|
|
_fileTileRepAsWeights,
|
|
_findBinMaskPattern,
|
|
_findBinMaskPatternint8,
|
|
_findConv2dwithScaleName,
|
|
_findKeyinTensors,
|
|
_findMultiplyAbsMaxDivide,
|
|
_findPartialConv,
|
|
_findTargetWeightforGconv,
|
|
_findTransposeMultiplyAbsMaxDivide,
|
|
_findWhereTensorFrom,
|
|
_removeLayers,
|
|
_updateIdx,
|
|
_updateIdxParameter,
|
|
_updateOutputDtype,
|
|
)
|
|
from .GraphReorder import reorderGroupConv_TransponseConv, reorderGroupConv_TransponseConv_int8
|
|
from .operators import (
|
|
add,
|
|
add1d,
|
|
avgpool2d,
|
|
bias_add,
|
|
bias_update,
|
|
cast,
|
|
collapse_sum_like,
|
|
conv2d,
|
|
dense,
|
|
depthwiseConv2d,
|
|
div,
|
|
exp,
|
|
greater,
|
|
group_conv2d,
|
|
less,
|
|
log_softmax,
|
|
mat_mul,
|
|
mul,
|
|
negative,
|
|
nll_loss,
|
|
ones_like,
|
|
permute_4D_3012,
|
|
permute_groupconv_out,
|
|
relu,
|
|
reshape,
|
|
reshape_like,
|
|
strided_slice,
|
|
sub,
|
|
sum,
|
|
tile,
|
|
transpose,
|
|
transpose_conv2d,
|
|
where,
|
|
zeros_like,
|
|
)
|
|
from .operators.basic_utils import isconstanttstr
|
|
from .QAS_util import get_effective_scalename_with_input_key, get_QAS
|
|
|
|
MAX_DAGOP_OUTPUTS = 5
|
|
|
|
fused_op = {"clip", "nn.batch_flatten", "squeeze", "reshape", "reshape_like"}
|
|
|
|
|
|
class outputInfo:
|
|
def __init__(self, name, idx, len, dtype):
|
|
self.name = name
|
|
self.idx = idx
|
|
self.len = len
|
|
self.dtype = dtype
|
|
|
|
|
|
class TTEParser(object):
|
|
def __init__(self, model, data, scale_params=None):
|
|
self.layer = []
|
|
self.gout = []
|
|
self.det_outputs = None
|
|
with open(model, "r") as f:
|
|
self.model = json.load(f)
|
|
with open(data, "rb") as f:
|
|
w_params = pickle.load(f)
|
|
self.data = {}
|
|
for k in w_params:
|
|
if k[0] != "v":
|
|
self.data[f"v{k}"] = w_params[k]
|
|
else:
|
|
self.data[k] = w_params[k]
|
|
self.scale_params = scale_params
|
|
|
|
self.layer = []
|
|
self.trainedWeights = [] # key, weight_ip
|
|
self.trainedBias = [] # key, weight_ip
|
|
self.fusedInputTable = {}
|
|
self.outputTables = []
|
|
self.regularFunctionTable = {
|
|
"cast": self._convert_cast,
|
|
"cast_like": self._convert_cast,
|
|
"exp": self._convert_exp,
|
|
"transpose": self._convert_transpose,
|
|
"where": self._convert_where,
|
|
"nn.conv2d_transpose": self._convert_transpose_conv2d,
|
|
"strided_slice": self._convert_strided_slice,
|
|
"nn.bias_add": self._convert_bias_add,
|
|
"nn.relu": self._convert_relu,
|
|
"zeros_like": self._convert_zeros_like,
|
|
"zeros": self._convert_zeros,
|
|
"ones_like": self._convert_ones_like,
|
|
"ones": self._convert_ones,
|
|
"collapse_sum_like": self._convert_collapse_sum_like,
|
|
"less": self._convert_less,
|
|
"less_equal": self._convert_less,
|
|
"nn.log_softmax": self._convert_log_softmax,
|
|
"nn.cross_entropy_with_logits": self._convert_cross_entropy_with_logits,
|
|
"divide": self._convert_div,
|
|
"tile": self._convert_tile,
|
|
"negative": self._convert_negative,
|
|
"greater": self._convert_greater,
|
|
"greater_equal": self._convert_greater,
|
|
"multiply": self._convert_mul,
|
|
"nn.matmul": self._convert_matmul,
|
|
"nn.dense": self._convert_dense,
|
|
"mcumean": self._convert_average_pool,
|
|
}
|
|
self.partialChannelList = {} # "idx": first_k_channel
|
|
|
|
def loadModel(self):
|
|
last_op = None
|
|
has_zero_x = False
|
|
zero_x = None
|
|
self.fusedInputTable[self.model[0]["inputs"][0]["name"]] = self.model[0]["inputs"][0]["name"]
|
|
# reorder the group conv and transpose conv to calculate weight gradients first
|
|
if FUSHION_CONFIG[REORDER_STR]:
|
|
self.model = reorderGroupConv_TransponseConv(self.model)
|
|
self.model = reorderGroupConv_TransponseConv_int8(self.model)
|
|
for cnt, op in enumerate(self.model):
|
|
op_type = op["type"]
|
|
if op_type in {"nn.conv2d", "nn.mcuconv2d"}:
|
|
last_op = self._convert_convolution(op)
|
|
# Float bp fusion
|
|
# check if we need to have binary mask for this conv2d
|
|
# conv2d (int32) -> cast -> greater/less -> multiply -> where (which take the map)
|
|
# fusion | --------------------------------------|
|
|
if op["outputs"][0]["dtype"] == "int32":
|
|
pattern_found, op_dict = _findBinMaskPattern(self.model, op["outputs"][0]["name"])
|
|
if pattern_found:
|
|
# add second output in the output tensors
|
|
b_mask_info = op_dict["multiply"]["outputs"][0]
|
|
if USE_BIT_MASK:
|
|
last_op._add_output(
|
|
b_mask_info["name"],
|
|
"bool",
|
|
int(math.ceil(last_op.params["output_c"] / 8)),
|
|
last_op.params["output_w"],
|
|
last_op.params["output_h"],
|
|
)
|
|
else:
|
|
last_op._add_output(
|
|
b_mask_info["name"],
|
|
b_mask_info["dtype"],
|
|
last_op.params["output_c"],
|
|
last_op.params["output_w"],
|
|
last_op.params["output_h"],
|
|
)
|
|
# update params in conv2d
|
|
last_op.params["need_Bmask"] = True
|
|
last_op.params["output2_h"] = last_op.params["output_h"]
|
|
last_op.params["output2_w"] = last_op.params["output_w"]
|
|
last_op.params["output2_c"] = last_op.params["output_c"]
|
|
last_op.params["output2_dtype"] = b_mask_info["dtype"]
|
|
last_op.params["output2_idx"] = b_mask_info["name"]
|
|
# remove fused ops in the graph
|
|
_removeLayers(self.model, op_dict)
|
|
# int8 bp fusion
|
|
# check if we need to have binary mask for this conv2d
|
|
# conv2d (int32) -> greater/less -> multiply -> where (which take the map)
|
|
# fusion | ------------------------------|
|
|
if op["outputs"][0]["dtype"] == "int32":
|
|
pattern_found, op_dict = _findBinMaskPatternint8(self.model, op["outputs"][0]["name"])
|
|
if pattern_found:
|
|
# add second output in the output tensors
|
|
b_mask_info = op_dict["multiply"]["outputs"][0]
|
|
if USE_BIT_MASK:
|
|
last_op._add_output(
|
|
b_mask_info["name"],
|
|
"bool",
|
|
int(math.ceil(last_op.params["output_c"] / 8)),
|
|
last_op.params["output_w"],
|
|
last_op.params["output_h"],
|
|
)
|
|
else:
|
|
last_op._add_output(
|
|
b_mask_info["name"],
|
|
b_mask_info["dtype"],
|
|
last_op.params["output_c"],
|
|
last_op.params["output_w"],
|
|
last_op.params["output_h"],
|
|
)
|
|
# update params in conv2d
|
|
last_op.params["need_Bmask"] = True
|
|
last_op.params["output2_h"] = last_op.params["output_h"]
|
|
last_op.params["output2_w"] = last_op.params["output_w"]
|
|
last_op.params["output2_c"] = last_op.params["output_c"]
|
|
last_op.params["output2_dtype"] = b_mask_info["dtype"]
|
|
last_op.params["output2_idx"] = b_mask_info["name"]
|
|
# remove fused ops in the graph
|
|
_removeLayers(self.model, op_dict)
|
|
# we use hwc for computation, but in bp the 'c' may mean output channel for the training weights.
|
|
# in this case, we need to insert an op to permute the weight tensor before running this conv2d op
|
|
# TODO: make sure this is not longer needed after we optimize tile + group_conv2d
|
|
# if len(self.model) > 0 and "weight" not in op["inputs"][1]["name"]:
|
|
# permute_params = {
|
|
# "input_idx": op["inputs"][1]["name"],
|
|
# "input_dim": 3,
|
|
# "input_h": op["inputs"][1]["shape"][-2],
|
|
# "input_w": op["inputs"][1]["shape"][-1],
|
|
# "input_c": op["inputs"][1]["shape"][-4], # IOHW
|
|
# }
|
|
# permute_op = permute_3D_120.permute_3D_120(permute_params)
|
|
# self.layer.append(permute_op)
|
|
if has_zero_x:
|
|
last_op.set_input_zero_point(zero_x)
|
|
has_zero_x = False
|
|
zero_x = None
|
|
self.layer.append(last_op)
|
|
elif op_type == "nn.mcuadd":
|
|
# fp32
|
|
pattern_found, op_dict = _findBinMaskPattern(self.model, op["outputs"][0]["name"])
|
|
# try int8
|
|
if not pattern_found:
|
|
pattern_found, op_dict = _findBinMaskPatternint8(self.model, op["outputs"][0]["name"])
|
|
last_op = self._convert_qadd(op)
|
|
if pattern_found:
|
|
# add second output in the output tensors
|
|
b_mask_info = op_dict["multiply"]["outputs"][0]
|
|
last_op._add_output(
|
|
b_mask_info["name"],
|
|
b_mask_info["dtype"],
|
|
last_op.params["output_c"],
|
|
last_op.params["output_w"],
|
|
last_op.params["output_h"],
|
|
)
|
|
# update params in conv2d
|
|
last_op.params["need_Bmask"] = True
|
|
last_op.params["output2_h"] = last_op.params["output_h"]
|
|
last_op.params["output2_w"] = last_op.params["output_w"]
|
|
last_op.params["output2_c"] = last_op.params["output_c"]
|
|
last_op.params["output2_dtype"] = b_mask_info["dtype"]
|
|
last_op.params["output2_idx"] = b_mask_info["name"]
|
|
# remove fused ops in the graph
|
|
_removeLayers(self.model, op_dict)
|
|
self.layer.append(last_op)
|
|
elif (
|
|
op_type == "cast" and op["inputs"][0]["dtype"] == "int8" and op["outputs"][0]["dtype"] == "int32"
|
|
): # int8 gradient for bias
|
|
# skip this one
|
|
_updateIdx(self.model, self.layer, op["inputs"][0]["name"], op["outputs"][0]["name"])
|
|
elif op_type == "cast" and _castisFusable(self.model, op)[0] and FUSHION_CONFIG[FUSE_INT8CAST_STR]:
|
|
_, transpose_conv_json = _castisFusable(self.model, op)
|
|
transpose_conv_json["inputs"][1] = op["inputs"][0] # pass the int8 input to transpose conv2d
|
|
elif (
|
|
op_type == "cast" and _castisFusable_for_gconv(self.model, op)[0] and FUSHION_CONFIG[FUSE_INT8CAST_STR]
|
|
):
|
|
_, group_conv_json = _castisFusable_for_gconv(self.model, op)
|
|
group_conv_json["inputs"][0] = op["inputs"][0] # pass the int8 input to group conv2d
|
|
group_conv_json["inplace_int8_input"] = True
|
|
elif op_type == "tile" and FUSHION_CONFIG[FUSE_TILE_STR]:
|
|
# check if we need to fuse ops for tile
|
|
# ########## tile -> reshape -> conv2d (which takes it as weights)
|
|
# fusion | ------------------------|
|
|
pattern_found, op_dict = _fileTileRepAsWeights(self.model, op)
|
|
if pattern_found:
|
|
# remove reshape
|
|
_removeLayers(self.model, {"reshape": op_dict["reshape"]})
|
|
# redirect the input of tile to conv2d's weight
|
|
op_dict["conv2d"]["inputs"][1] = op_dict["tile"]["inputs"][0]
|
|
else:
|
|
raise NotImplementedError
|
|
elif op_type == "add":
|
|
if len(op["inputs"][0]["shape"]) == 4 and op["inputs"][0]["dtype"] == "int8":
|
|
if "zero_y" in op["inputs"][1]["name"]:
|
|
zero_y = int(self.data[op["inputs"][1]["name"]])
|
|
last_op.set_output_zero_point(zero_y)
|
|
continue
|
|
last_op = self._convert_add(op)
|
|
self.layer.append(last_op)
|
|
else:
|
|
last_op = self._convert_add1d(op)
|
|
self.layer.append(last_op)
|
|
elif op_type == "nn.bias_add" and op["inputs"][1]["dtype"] == "int8":
|
|
last_op.params["bias"] = self.data[op["inputs"][1]["name"]].astype(int)
|
|
# redirect the index
|
|
last_op.change_output_tensor_idx(op["outputs"][0]["name"])
|
|
# fixing HWC -> CHW alginment
|
|
elif (
|
|
op_type == "reshape"
|
|
and len(op["inputs"][0]["shape"]) == 4
|
|
and (op["inputs"][0]["shape"][2] != 1 and op["inputs"][0]["shape"][3] != 1)
|
|
and op["inputs"][0]["shape"][2] != op["outputs"][0]["shape"][2]
|
|
and op["inputs"][0]["shape"][3] != op["outputs"][0]["shape"][3]
|
|
):
|
|
last_op = self._convert_reshape(op)
|
|
self.layer.append(last_op)
|
|
# input might be parameters, we handle the inside ops since we only support for scales in `multiply`
|
|
elif op_type == "reshape" and op["inputs"][0]["var_type"] == "parameter":
|
|
# find out ops taking the output
|
|
for other_op in self.model:
|
|
for input_tensor in other_op["inputs"]:
|
|
if input_tensor["name"] == op["outputs"][0]["name"]:
|
|
if other_op["type"] in {"multiply", "divide"}:
|
|
_updateIdxParameter(self.model, op["inputs"][0]["name"], op["outputs"][0]["name"])
|
|
else:
|
|
raise NotImplementedError
|
|
# fixing CHW -> HWC alginment
|
|
elif (
|
|
op_type == "reshape_like"
|
|
and len(op["inputs"][1]["shape"]) == 4
|
|
and (op["inputs"][1]["shape"][2] != 1 and op["inputs"][1]["shape"][3] != 1)
|
|
and op["inputs"][0]["shape"][2] != op["outputs"][0]["shape"][2]
|
|
and op["inputs"][0]["shape"][3] != op["outputs"][0]["shape"][3]
|
|
):
|
|
last_op = self._convert_reshape_like(op)
|
|
self.layer.append(last_op)
|
|
# bypass this layer by fusing it into the last layer, TODO: revisit this for clipping fp results
|
|
elif op_type in fused_op and op:
|
|
# update tensors
|
|
_updateIdx(self.model, self.layer, op["inputs"][0]["name"], op["outputs"][0]["name"])
|
|
elif op_type in "nn.mcutruncate":
|
|
# update output dtype
|
|
_updateOutputDtype(self.layer, op["inputs"][0]["name"], "int8")
|
|
# update tensor idx
|
|
_updateIdx(self.model, self.layer, op["inputs"][0]["name"], op["outputs"][0]["name"])
|
|
elif op_type == "subtract":
|
|
is_fuse = False
|
|
for tensor in op["inputs"]:
|
|
if "zero_x" in tensor["name"]:
|
|
has_zero_x = True
|
|
zero_x = int(self.data[tensor["name"]])
|
|
is_fuse = True
|
|
if not is_fuse:
|
|
last_op = self._convert_sub(op)
|
|
self.layer.append(last_op)
|
|
elif op_type == "sum":
|
|
input_length = np.prod(op["inputs"][0]["shape"])
|
|
output_length = np.prod(op["outputs"][0]["shape"])
|
|
if input_length != output_length:
|
|
last_op = self._convert_sum(op)
|
|
self.layer.append(last_op)
|
|
if op["outputs"][0] and "output_info" in op["outputs"][0]["meta"]:
|
|
if op["outputs"][0]["meta"]["output_info"][0] == "v":
|
|
key = op["outputs"][0]["meta"]["output_info"]
|
|
else:
|
|
key = "v" + op["outputs"][0]["meta"]["output_info"]
|
|
if self.scale_params is not None:
|
|
e_s_name = get_effective_scalename_with_input_key(key, self.model)
|
|
QAS = get_QAS(key, self.scale_params, self.data[e_s_name])
|
|
else:
|
|
QAS = np.zeros(int(output_length)) + 0.000000001 # avoid zero division
|
|
bias_update_params = {
|
|
"input_idx": last_op.params["output_idx"],
|
|
"output_idx": key,
|
|
# tensor related
|
|
"input_size": int(output_length),
|
|
"input_buf_add": None,
|
|
"input_buf_add_offset": None,
|
|
"QAS": QAS,
|
|
"input_dtype": last_op.params["input_dtype"],
|
|
"output_dtype": "float32",
|
|
}
|
|
bias_update_op = bias_update.bias_update(bias_update_params)
|
|
self.layer.append(bias_update_op)
|
|
else: # skip this, no need to do anything on the data
|
|
input_idx = op["inputs"][0]["name"]
|
|
output_idx = op["outputs"][0]["name"]
|
|
# update the bias
|
|
if op["outputs"][0] and "output_info" in op["outputs"][0]["meta"]:
|
|
if op["outputs"][0]["meta"]["output_info"][0] == "v":
|
|
key = op["outputs"][0]["meta"]["output_info"]
|
|
else:
|
|
key = "v" + op["outputs"][0]["meta"]["output_info"]
|
|
if self.scale_params is not None:
|
|
e_s_name = get_effective_scalename_with_input_key(key, self.model)
|
|
QAS = get_QAS(key, self.scale_params, self.data[e_s_name])
|
|
else:
|
|
QAS = np.zeros(int(output_length)) + 0.000000001 # avoid zero division
|
|
bias_update_params = {
|
|
"input_idx": last_op.params["output_idx"],
|
|
"output_idx": key,
|
|
# tensor related
|
|
"input_size": int(output_length),
|
|
"input_buf_add": None,
|
|
"input_buf_add_offset": None,
|
|
"QAS": QAS,
|
|
"input_dtype": "float32",
|
|
"output_dtype": "float32",
|
|
}
|
|
bias_update_op = bias_update.bias_update(bias_update_params)
|
|
self.layer.append(bias_update_op)
|
|
# # update tensors
|
|
_updateIdx(self.model, self.layer, input_idx, output_idx)
|
|
|
|
# assume weights are updated once we obtain its gradient
|
|
elif op_type == "transpose" and FUSHION_CONFIG[FUSE_SGD_UPDATE_STR]:
|
|
fuseable, op_dict = _findTransposeMultiplyAbsMaxDivide(self.model, op)
|
|
# old IR
|
|
if op["outputs"][0]["meta"]["children"] == 0:
|
|
# update tensors
|
|
_updateIdx(self.model, self.layer, op["inputs"][0]["name"], op["outputs"][0]["name"])
|
|
elif fuseable:
|
|
# fuse "transpose" -> [max -> divide -> divide (int8 bp)]
|
|
_updateIdx(self.model, self.layer, op["inputs"][0]["name"], op_dict["cast"]["outputs"][0]["name"])
|
|
# add the output to output table
|
|
name = op_dict["cast"]["outputs"][0]["meta"]["output_info"]
|
|
idx = op_dict["cast"]["outputs"][0]["name"]
|
|
length = np.prod(op_dict["cast"]["outputs"][0]["shape"])
|
|
dtype = op_dict["cast"]["outputs"][0]["dtype"]
|
|
self.outputTables.append(outputInfo(name, idx, int(length), dtype))
|
|
_removeLayers(self.model, op_dict)
|
|
else:
|
|
raise NotImplementedError
|
|
elif (
|
|
FUSHION_CONFIG[FUSE_WHERE_ZEROSSTR]
|
|
and op_type == "where"
|
|
and (op["inputs"][2]["dtype"] in ["int8", "int32", "float32"])
|
|
and _findWhereTensorFrom(self.layer, op["inputs"][2]["name"]) is not None
|
|
and _findWhereTensorFrom(self.layer, op["inputs"][2]["name"]).params["op"]
|
|
== "ZEROS" # third input is from zeros
|
|
):
|
|
zeros_op = _findWhereTensorFrom(self.layer, op["inputs"][2]["name"])
|
|
# remove previous the zeros layer
|
|
self.layer.remove(zeros_op)
|
|
# parse the where but remove the third input and set "input3_is_zeros" in params
|
|
last_op = self._convert_where(op)
|
|
last_op.params["input3_is_zeros"] = True
|
|
last_op.input_tensors.remove(last_op.input_tensors[2])
|
|
# check where we can update input2 inplace
|
|
# if input2 is not used by following ops
|
|
# (1) make input2_inplace
|
|
# (2) update the following op's input idx (normally it is MUL)
|
|
can_be_inplace = None
|
|
# check if the last_op["input2_idx"] == some_op["ouptuts"][0]
|
|
for from_op in self.model:
|
|
if from_op["outputs"][0]["name"] == last_op.params["input2_idx"]:
|
|
if from_op["outputs"][0]["meta"]["children"] != 1:
|
|
can_be_inplace = False
|
|
else:
|
|
can_be_inplace = True
|
|
assert can_be_inplace is not None
|
|
if can_be_inplace and FUSHION_CONFIG[INPLACE_WHERE_STR]:
|
|
# find the where the output of where goes and link it to the second input of where
|
|
for following_op in self.model:
|
|
for inp in following_op["inputs"]:
|
|
if inp["name"] == op["outputs"][0]["name"]:
|
|
inp["name"] = op["inputs"][1]["name"]
|
|
# remove output tensor of where
|
|
last_op.output_tensors.remove(last_op.output_tensors[0])
|
|
# set where to inplace
|
|
last_op.params["inplace"] = True
|
|
# add the op
|
|
self.layer.append(last_op)
|
|
elif op_type == "multiply" and FUSHION_CONFIG[INPLACE_MUL_STR]:
|
|
last_op = self._convert_mul(op)
|
|
last_op_input = last_op.params["input_idx"]
|
|
last_op_output = last_op.params["output_idx"]
|
|
if last_op.params["input2_size"] > 1 and last_op.params["input_size"] > last_op.params["input2_size"]:
|
|
# good to be updated inplace
|
|
last_op.params["inplace"] = True
|
|
last_op.output_tensors.remove(last_op.output_tensors[0])
|
|
# redirect the following op's input as the inplace input
|
|
for following_op in self.model:
|
|
# if following_op["type"] in {"sum", "nn.conv2d_transpose", "nn.conv2d"}:
|
|
for inp in following_op["inputs"]:
|
|
if inp["name"] == last_op_output:
|
|
inp["name"] = last_op_input
|
|
# _updateIdx(self.model, self.layer, last_op_input, last_op_output)
|
|
# replace the following
|
|
self.layer.append(last_op)
|
|
elif op_type in self.regularFunctionTable:
|
|
last_op = self.regularFunctionTable[op_type](op)
|
|
self.layer.append(last_op)
|
|
elif op_type == "abs":
|
|
if FUSHION_CONFIG[FUSE_SGD_UPDATE_STR]:
|
|
cliping_pattern, op_dict = _findMultiplyAbsMaxDivide(self.model, abs_op=op)
|
|
if cliping_pattern:
|
|
# For transpose conv2d, this could be float32 -> int8 if it connects to abs
|
|
previous_op = _findWhereTensorFrom(self.layer, op["inputs"][0]["name"])
|
|
if (
|
|
previous_op.params["op"] == "TRANSPOSE_CONV_2D"
|
|
and previous_op.params["output_dtype"] == "float32"
|
|
):
|
|
previous_op.params["float_to_int8"] = True
|
|
previous_op.params["output_dtype"] = "int8"
|
|
previous_op.output_tensors[0].dtype = "int8"
|
|
previous_op.add_int32_buffer_tensor()
|
|
|
|
_updateIdx(
|
|
self.model, self.layer, op["inputs"][0]["name"], op_dict["cast"]["outputs"][0]["name"]
|
|
)
|
|
_removeLayers(self.model, op_dict)
|
|
else:
|
|
raise NotImplementedError
|
|
else:
|
|
cliping_pattern, op_dict = _findMultiplyAbsMaxDivide(self.model, abs_op=op)
|
|
if cliping_pattern:
|
|
_updateIdx(
|
|
self.model, self.layer, op["inputs"][0]["name"], op_dict["cast"]["outputs"][0]["name"]
|
|
)
|
|
_removeLayers(self.model, op_dict)
|
|
# Baseline for int8 fp without graph optimization
|
|
# Adding outputTable for accurate trainable measuremnet
|
|
if "output_info" in op_dict["cast"]["outputs"][0]["meta"]:
|
|
name = op_dict["cast"]["outputs"][0]["meta"]["output_info"]
|
|
idx = op_dict["cast"]["outputs"][0]["name"]
|
|
length = np.prod(op_dict["cast"]["outputs"][0]["shape"])
|
|
dtype = "int8"
|
|
self.outputTables.append(outputInfo(name, idx, int(length), dtype))
|
|
else:
|
|
raise NotImplementedError
|
|
|
|
else:
|
|
warnings.warn("%s op is not `supported" % op_type)
|
|
raise NotImplementedError
|
|
|
|
# GROUP CONV
|
|
if self.layer[-1].params["op"] == "GROUP_CONV":
|
|
# for group conv the output is actually h, w, IxO, we need to permute it to OHWI
|
|
if not FUSHION_CONFIG[FUSE_SGD_UPDATE_STR]:
|
|
params = {
|
|
# op related
|
|
"op": "PERMUTE_GROUPCONV_OUT",
|
|
"input_idx": last_op.params["output_idx"],
|
|
# tensor related
|
|
"input_dim": 3,
|
|
"input_h": last_op.params["output_h"],
|
|
"input_w": last_op.params["output_w"],
|
|
"input_c": last_op.params["output_c"],
|
|
"groups": last_op.params["groups"],
|
|
"input_dtype": "float32",
|
|
"output_dtype": "float32",
|
|
}
|
|
self.layer.append(permute_groupconv_out.permute_groupconv_out(params))
|
|
# we inplace update the weights, for output stantionary group conv
|
|
# here we need to
|
|
# (1) update the graph: remove gconv -> reshape -> sum -> transpose (done in "transpose" op)
|
|
# -> [max -> divide -> divide (int8 bp)]
|
|
# (2) remove the output tensor in gconv
|
|
# (3) replace the output address with int8 weight in SRAM
|
|
# TODO: we also need to back trace the int8 conv and make it use wegiht in both SRAM and Flash
|
|
elif len(self.layer[-1].output_tensors) > 0:
|
|
# find the target weigth
|
|
weight_idx = _findTargetWeightforGconv(self.model, self.layer[-1].output_tensors[0].graph_idx)
|
|
assert weight_idx is not None
|
|
self.layer[-1].params["inplace_weight_name"] = weight_idx
|
|
# back trace to the int8 conv
|
|
conv_partial_layer = _findPartialConv(self.layer, weight_idx)
|
|
conv_p = conv_partial_layer.params
|
|
gconv_output_len = np.prod(self.layer[-1].output_tensors[0].size)
|
|
conv_weight_size = conv_p["input_c"] * conv_p["output_c"] * conv_p["kernel_h"] * conv_p["kernel_w"]
|
|
if conv_weight_size != gconv_output_len:
|
|
# this is partial
|
|
# find the first k channel
|
|
fisrt_k_channel = int(conv_p["input_c"] * gconv_output_len / conv_weight_size)
|
|
conv_partial_layer.params["first_k_channel"] = fisrt_k_channel
|
|
self.partialChannelList[weight_idx] = fisrt_k_channel
|
|
#
|
|
if self.scale_params is not None:
|
|
key = weight_idx
|
|
e_s_name = get_effective_scalename_with_input_key(key, self.model)
|
|
QAS = get_QAS(key, self.scale_params, self.data[e_s_name])
|
|
else:
|
|
QAS = np.zeros(int(output_length)) + 0.000000001 # avoid zero division
|
|
self.layer[-1].params["QAS"] = QAS
|
|
# remove for inplace
|
|
self.layer[-1].output_tensors.remove(self.layer[-1].output_tensors[0])
|
|
|
|
# add the gradient_output to table, we will use a custom layer to perform SGD
|
|
if (
|
|
"meta" in op["outputs"][0]
|
|
and op["outputs"][0]["meta"]["children"] == 0
|
|
and "output_info" in op["outputs"][0]["meta"]
|
|
):
|
|
name = op["outputs"][0]["meta"]["output_info"]
|
|
idx = op["outputs"][0]["name"]
|
|
length = np.prod(op["outputs"][0]["shape"])
|
|
dtype = op["outputs"][0]["dtype"]
|
|
self.outputTables.append(outputInfo(name, idx, int(length), dtype))
|
|
|
|
# loop over the graph and find transpose conv that use partial weights
|
|
for layer in self.layer:
|
|
if (
|
|
layer.params["op"] == op_name_translation["nn.conv2d_transpose"]
|
|
and layer.params["weight_name"] in self.partialChannelList
|
|
):
|
|
layer.params["first_k_channel"] = self.partialChannelList[layer.params["weight_name"]]
|
|
|
|
def _convert_cast(self, op):
|
|
# shape
|
|
input_info = op["inputs"][0]
|
|
input_dtype = get_dtype(op["inputs"][0])
|
|
input_shape = input_info["shape"]
|
|
output_c = input_c = input_h = input_w = 1
|
|
if len(input_shape) == 4:
|
|
output_c, input_c, input_h, input_w = input_shape # OIHW
|
|
elif len(input_shape) == 2:
|
|
input_h, input_w = input_shape
|
|
input_c = 1
|
|
elif len(input_shape) == 1:
|
|
input_h = input_w = 1
|
|
input_c = input_shape[0]
|
|
else:
|
|
raise NotImplementedError
|
|
|
|
output_info = op["outputs"][0]
|
|
output_dtype = get_dtype(op["outputs"][0])
|
|
|
|
# dtype
|
|
input_dtype = get_dtype(input_info)
|
|
output_dtype = get_dtype(output_info)
|
|
|
|
params = {
|
|
# operator
|
|
"op": op_name_translation[op["type"]],
|
|
# tensor
|
|
"input_idx": input_info["name"],
|
|
"output_idx": output_info["name"],
|
|
"input_size": output_c * input_c * input_h * input_w,
|
|
"input_dim": 4,
|
|
"output_dim": 4,
|
|
"input_dtype": input_dtype,
|
|
"output_dtype": output_dtype,
|
|
"input_meta": op["inputs"][0]["meta"],
|
|
}
|
|
|
|
op = cast.cast(params)
|
|
|
|
return op
|
|
|
|
def _convert_relu(self, op):
|
|
# shape
|
|
input_info = op["inputs"][0]
|
|
input_dtype = get_dtype(op["inputs"][0])
|
|
input_shape = input_info["shape"]
|
|
if len(input_shape) == 4:
|
|
_, input_c, input_h, input_w = input_shape
|
|
elif len(input_shape) == 2:
|
|
input_h, input_w = input_shape
|
|
input_c = 1
|
|
else:
|
|
raise NotImplementedError
|
|
|
|
output_info = op["outputs"][0]
|
|
output_dtype = get_dtype(op["outputs"][0])
|
|
output_c = input_c
|
|
output_h = input_h
|
|
output_w = input_w
|
|
|
|
# dtype
|
|
input_dtype = get_dtype(input_info)
|
|
output_dtype = get_dtype(output_info)
|
|
|
|
params = {
|
|
# operator
|
|
"op": op_name_translation[op["type"]],
|
|
# tensor
|
|
"input_idx": input_info["name"],
|
|
"output_idx": output_info["name"],
|
|
"input_h": input_h,
|
|
"input_w": input_w,
|
|
"input_c": input_c,
|
|
"input_dim": 3,
|
|
"output_dim": 3,
|
|
"output_h": output_h,
|
|
"output_w": output_w,
|
|
"output_c": output_c,
|
|
"input_dtype": input_dtype,
|
|
"output_dtype": output_dtype,
|
|
}
|
|
|
|
op = relu.relu(params)
|
|
|
|
return op
|
|
|
|
def _convert_bias_add(self, op):
|
|
# shape
|
|
input_info = op["inputs"][0]
|
|
input_dtype = get_dtype(op["inputs"][0])
|
|
input_shape = input_info["shape"]
|
|
if len(input_shape) == 2:
|
|
input_h = 1
|
|
input_w = input_shape[0]
|
|
input_c = input_shape[1]
|
|
else:
|
|
input_c, input_h, input_w = get_chw_shape(input_shape)
|
|
|
|
output_info = op["outputs"][0]
|
|
output_shape = output_info["shape"]
|
|
output_c, output_h, output_w = get_chw_shape(output_shape)
|
|
|
|
# dtype
|
|
input_dtype = get_dtype(input_info)
|
|
output_dtype = get_dtype(output_info)
|
|
|
|
bias_name = op["inputs"][1]["name"]
|
|
if bias_name not in self.data:
|
|
bias_value = bias_name
|
|
else:
|
|
bias_value = self.data[bias_name]
|
|
params = {
|
|
# operator
|
|
"op": op_name_translation[op["type"]],
|
|
# tensor
|
|
"input_idx": input_info["name"],
|
|
"output_idx": output_info["name"],
|
|
"input_h": input_h,
|
|
"input_w": input_w,
|
|
"input_c": input_c,
|
|
"input_dim": 3,
|
|
"output_dim": 3,
|
|
"output_h": input_h,
|
|
"output_w": input_w,
|
|
"output_c": input_c,
|
|
"input_dtype": input_dtype,
|
|
"output_dtype": output_dtype,
|
|
"bias": bias_value,
|
|
"bias_name": bias_name,
|
|
}
|
|
|
|
op = bias_add.biasAdd(params)
|
|
|
|
return op
|
|
|
|
def _convert_reshape(self, op):
|
|
# shape
|
|
input_info = op["inputs"][0]
|
|
input_dtype = get_dtype(op["inputs"][0])
|
|
input_shape = input_info["shape"]
|
|
input_c, input_h, input_w = get_chw_shape(input_shape)
|
|
|
|
output_info = op["outputs"][0]
|
|
output_shape = output_info["shape"]
|
|
output_c, output_h, output_w = get_chw_shape(output_shape)
|
|
|
|
# dtype
|
|
input_dtype = get_dtype(input_info)
|
|
output_dtype = get_dtype(output_info)
|
|
|
|
params = {
|
|
# operator
|
|
"op": op_name_translation[op["type"]],
|
|
# tensor
|
|
"input_idx": input_info["name"],
|
|
"output_idx": output_info["name"],
|
|
"input_h": input_h,
|
|
"input_w": input_w,
|
|
"input_c": input_c,
|
|
"output_h": output_h,
|
|
"output_w": output_w,
|
|
"output_c": output_c,
|
|
"input_dtype": input_dtype,
|
|
"output_dtype": output_dtype,
|
|
}
|
|
|
|
op = reshape.reshape(params)
|
|
|
|
return op
|
|
|
|
def _convert_reshape_like(self, op):
|
|
# shape
|
|
input_info = op["inputs"][0]
|
|
input_dtype = get_dtype(op["inputs"][0])
|
|
input_shape = input_info["shape"]
|
|
input_c, input_h, input_w = get_chw_shape(input_shape)
|
|
|
|
input2_info = op["inputs"][1]
|
|
input2_dtype = get_dtype(op["inputs"][1])
|
|
input2_shape = input2_info["shape"]
|
|
input2_c, input2_h, input2_w = get_chw_shape(input2_shape)
|
|
|
|
output_info = op["outputs"][0]
|
|
output_shape = output_info["shape"]
|
|
output_c, output_h, output_w = get_chw_shape(output_shape)
|
|
|
|
# dtype
|
|
input_dtype = get_dtype(input_info)
|
|
output_dtype = get_dtype(output_info)
|
|
|
|
params = {
|
|
# operator
|
|
"op": op_name_translation[op["type"]],
|
|
# tensor
|
|
"input_idx": input_info["name"],
|
|
"output_idx": output_info["name"],
|
|
"input_h": input_h,
|
|
"input_w": input_w,
|
|
"input_c": input_c,
|
|
"input2_h": input2_h,
|
|
"input2_w": input2_w,
|
|
"input2_c": input2_c,
|
|
"output_h": output_h,
|
|
"output_w": output_w,
|
|
"output_c": output_c,
|
|
"input_dtype": input_dtype,
|
|
"input2_dtype": input2_dtype,
|
|
"output_dtype": output_dtype,
|
|
}
|
|
|
|
op = reshape_like.reshape_like(params)
|
|
|
|
return op
|
|
|
|
def _convert_exp(self, op):
|
|
# shape
|
|
input_info = op["inputs"][0]
|
|
input_dtype = get_dtype(op["inputs"][0])
|
|
input_shape = input_info["shape"]
|
|
input_c, input_h, input_w = get_chw_shape(input_shape)
|
|
|
|
output_info = op["outputs"][0]
|
|
output_shape = output_info["shape"]
|
|
output_c, output_h, output_w = get_chw_shape(output_shape)
|
|
|
|
# dtype
|
|
input_dtype = get_dtype(input_info)
|
|
output_dtype = get_dtype(output_info)
|
|
|
|
params = {
|
|
# operator
|
|
"op": op_name_translation[op["type"]],
|
|
# tensor
|
|
"input_idx": input_info["name"],
|
|
"output_idx": output_info["name"],
|
|
"input_size": input_h * input_w * input_c,
|
|
"input_dtype": input_dtype,
|
|
"output_dtype": output_dtype,
|
|
}
|
|
|
|
op = exp.exp(params)
|
|
|
|
return op
|
|
|
|
def _convert_transpose(self, op):
|
|
# shape
|
|
input_info = op["inputs"][0]
|
|
input_dtype = get_dtype(op["inputs"][0])
|
|
input_shape = input_info["shape"]
|
|
input_c, input_h, input_w = get_chw_shape(input_shape)
|
|
|
|
output_info = op["outputs"][0]
|
|
output_shape = output_info["shape"]
|
|
output_c, output_h, output_w = get_chw_shape(output_shape)
|
|
|
|
# dtype
|
|
input_dtype = get_dtype(input_info)
|
|
output_dtype = get_dtype(output_info)
|
|
|
|
params = {
|
|
# operator
|
|
"op": op_name_translation[op["type"]],
|
|
# tensor
|
|
"input_idx": input_info["name"],
|
|
"output_idx": output_info["name"],
|
|
"input_h": input_h,
|
|
"input_w": input_w,
|
|
"input_c": input_c,
|
|
"input_dim": 3,
|
|
"output_dim": 3,
|
|
"output_h": output_h,
|
|
"output_w": output_w,
|
|
"output_c": output_c,
|
|
"input_dtype": input_dtype,
|
|
"input_vartype": input_info["var_type"],
|
|
"output_dtype": output_dtype,
|
|
}
|
|
|
|
if "axes" in op["attrs"] and op["attrs"]["axes"] is not None:
|
|
if op["attrs"]["axes"] == [1, 0, 2, 3]:
|
|
# torch: OIHW -> IOHW -> permute 1023
|
|
# tinyengine: OHWI -> IOHW -> permute 3012
|
|
params["d1"], params["d2"], params["d3"], params["d4"] = input_shape
|
|
params["op"] = "permute_4D_3012"
|
|
op = permute_4D_3012.permute_4D_3012(params)
|
|
else:
|
|
raise NotImplementedError
|
|
else:
|
|
op = transpose.transpose(params)
|
|
return op
|
|
|
|
def _convert_strided_slice(self, op):
|
|
# shape
|
|
input_info = op["inputs"][0]
|
|
input_dtype = get_dtype(op["inputs"][0])
|
|
input_shape = input_info["shape"]
|
|
|
|
d1, d2, d3, d4 = input_shape # OHWI
|
|
|
|
output_info = op["outputs"][0]
|
|
output_shape = output_info["shape"]
|
|
o_d1, o_d2, o_d3, o_d4 = output_shape # OHWI
|
|
|
|
# dtype
|
|
input_dtype = get_dtype(input_info)
|
|
output_dtype = get_dtype(output_info)
|
|
|
|
b_list = op["attrs"]["begin"]
|
|
e_list = op["attrs"]["end"]
|
|
begin = b_list # [b_list[0], b_list[2], b_list[3], b_list[1]]
|
|
end = e_list # [e_list[0], e_list[2], e_list[3], e_list[1]]
|
|
strides = op["attrs"]["strides"]
|
|
|
|
params = {
|
|
# operator
|
|
"op": op_name_translation[op["type"]],
|
|
# tensor
|
|
"input_idx": input_info["name"],
|
|
"output_idx": output_info["name"],
|
|
"d1": d1,
|
|
"d2": d2,
|
|
"d3": d3,
|
|
"d4": d4,
|
|
"begin": begin,
|
|
"end": end,
|
|
"strides": strides,
|
|
"input_dim": 4,
|
|
"output_dim": 4,
|
|
"o_d1": o_d1,
|
|
"o_d2": o_d2,
|
|
"o_d3": o_d3,
|
|
"o_d4": o_d4,
|
|
"input_dtype": input_dtype,
|
|
"output_dtype": output_dtype,
|
|
}
|
|
|
|
op = strided_slice.stridedSlice(params)
|
|
|
|
return op
|
|
|
|
def _convert_average_pool(self, op):
|
|
# shape
|
|
input_info = op["inputs"][0]
|
|
input_dtype = get_dtype(op["inputs"][0])
|
|
input_shape = input_info["shape"]
|
|
input_c, input_h, input_w = get_chw_shape(input_shape)
|
|
|
|
output_info = op["outputs"][0]
|
|
output_dtype = get_dtype(op["outputs"][0])
|
|
output_shape = output_info["shape"]
|
|
output_c, output_h, output_w = get_chw_shape(output_shape)
|
|
|
|
# dtype
|
|
input_dtype = get_dtype(input_info)
|
|
output_dtype = get_dtype(output_info)
|
|
|
|
params = {
|
|
# operator
|
|
"op": "AVERAGE_POOL_2D",
|
|
# pool parameters
|
|
"filter_h": input_h,
|
|
"filter_w": input_w,
|
|
"stride_h": 1,
|
|
"stride_w": 1,
|
|
"pad_h": 0,
|
|
"pad_w": 0,
|
|
# tensor
|
|
"input_idx": input_info["name"],
|
|
"output_idx": output_info["name"],
|
|
"input_h": input_h,
|
|
"input_w": input_w,
|
|
"input_c": input_c,
|
|
"input_dim": 3,
|
|
"output_dim": 3,
|
|
"output_h": output_h,
|
|
"output_w": output_w,
|
|
"output_c": output_c,
|
|
"input_dtype": input_dtype,
|
|
"output_dtype": output_dtype,
|
|
}
|
|
|
|
op = avgpool2d.AvgPool2d(params)
|
|
|
|
return op
|
|
|
|
def _convert_zeros(self, op):
|
|
# shape
|
|
output_info = op["outputs"][0]
|
|
output_dtype = get_dtype(op["outputs"][0])
|
|
output_shape = output_info["shape"]
|
|
output_c, output_h, output_w = get_chw_shape(output_shape)
|
|
|
|
# dtype
|
|
output_dtype = get_dtype(output_info)
|
|
|
|
params = {
|
|
# operator
|
|
"op": op_name_translation[op["type"]],
|
|
# tensor
|
|
"output_idx": output_info["name"],
|
|
"input_dim": 3,
|
|
"output_dim": 3,
|
|
"output_h": output_h,
|
|
"output_w": output_w,
|
|
"output_c": output_c,
|
|
"output_dtype": output_dtype,
|
|
}
|
|
|
|
op = zeros_like.zeros_like(params)
|
|
|
|
return op
|
|
|
|
def _convert_ones(self, op):
|
|
# shape
|
|
output_info = op["outputs"][0]
|
|
output_dtype = get_dtype(op["outputs"][0])
|
|
output_shape = output_info["shape"]
|
|
output_c, output_h, output_w = get_chw_shape(output_shape)
|
|
|
|
# dtype
|
|
output_dtype = get_dtype(output_info)
|
|
|
|
params = {
|
|
# operator
|
|
"op": op_name_translation[op["type"]],
|
|
# tensor
|
|
"output_idx": output_info["name"],
|
|
"input_dim": 3,
|
|
"output_dim": 3,
|
|
"output_h": output_h,
|
|
"output_w": output_w,
|
|
"output_c": output_c,
|
|
"output_dtype": output_dtype,
|
|
}
|
|
|
|
op = ones_like.ones_like(params)
|
|
|
|
return op
|
|
|
|
def _convert_zeros_like(self, op):
|
|
# shape
|
|
input_info = op["inputs"][0]
|
|
input_dtype = get_dtype(op["inputs"][0])
|
|
input_shape = input_info["shape"]
|
|
input_c, input_h, input_w = get_chw_shape(input_shape)
|
|
|
|
output_info = op["outputs"][0]
|
|
output_dtype = get_dtype(op["outputs"][0])
|
|
output_c = input_c
|
|
output_h = input_h
|
|
output_w = input_w
|
|
|
|
# dtype
|
|
input_dtype = get_dtype(input_info)
|
|
output_dtype = get_dtype(output_info)
|
|
|
|
params = {
|
|
# operator
|
|
"op": op_name_translation[op["type"]],
|
|
# tensor
|
|
"input_idx": input_info["name"],
|
|
"output_idx": output_info["name"],
|
|
"input_h": input_h,
|
|
"input_w": input_w,
|
|
"input_c": input_c,
|
|
"input_dim": 3,
|
|
"output_dim": 3,
|
|
"output_h": output_h,
|
|
"output_w": output_w,
|
|
"output_c": output_c,
|
|
"input_dtype": input_dtype,
|
|
"output_dtype": output_dtype,
|
|
}
|
|
|
|
op = zeros_like.zeros_like(params)
|
|
|
|
return op
|
|
|
|
def _convert_ones_like(self, op):
|
|
# shape
|
|
input_info = op["inputs"][0]
|
|
input_dtype = get_dtype(op["inputs"][0])
|
|
input_shape = input_info["shape"]
|
|
input_c, input_h, input_w = get_chw_shape(input_shape)
|
|
|
|
output_info = op["outputs"][0]
|
|
output_dtype = get_dtype(op["outputs"][0])
|
|
output_shape = output_info["shape"]
|
|
output_c, output_h, output_w = get_chw_shape(output_shape)
|
|
|
|
# dtype
|
|
input_dtype = get_dtype(input_info)
|
|
output_dtype = get_dtype(output_info)
|
|
|
|
params = {
|
|
# operator
|
|
"op": op_name_translation[op["type"]],
|
|
# tensor
|
|
"input_idx": input_info["name"],
|
|
"output_idx": output_info["name"],
|
|
"input_h": input_h,
|
|
"input_w": input_w,
|
|
"input_c": input_c,
|
|
"input_dim": 3,
|
|
"output_dim": 3,
|
|
"output_h": output_h,
|
|
"output_w": output_w,
|
|
"output_c": output_c,
|
|
"input_dtype": input_dtype,
|
|
"output_dtype": output_dtype,
|
|
}
|
|
|
|
op = ones_like.ones_like(params)
|
|
|
|
return op
|
|
|
|
def _convert_collapse_sum_like(self, op):
|
|
# shape
|
|
input_info = op["inputs"][0]
|
|
input_dtype = get_dtype(op["inputs"][0])
|
|
input_shape = input_info["shape"]
|
|
input_c, input_h, input_w = get_chw_shape(input_shape)
|
|
|
|
output_info = op["outputs"][0]
|
|
output_dtype = get_dtype(op["outputs"][0])
|
|
output_shape = output_info["shape"]
|
|
output_c, output_h, output_w = get_chw_shape(output_shape)
|
|
|
|
# dtype
|
|
input_dtype = get_dtype(input_info)
|
|
output_dtype = get_dtype(output_info)
|
|
|
|
params = {
|
|
# operator
|
|
"op": op_name_translation[op["type"]],
|
|
# tensor
|
|
"input_idx": input_info["name"],
|
|
"output_idx": output_info["name"],
|
|
"input_h": input_h,
|
|
"input_w": input_w,
|
|
"input_c": input_c,
|
|
"input_dim": 3,
|
|
"output_dim": 3,
|
|
"output_h": output_h,
|
|
"output_w": output_w,
|
|
"output_c": output_c,
|
|
"input_dtype": input_dtype,
|
|
"output_dtype": output_dtype,
|
|
}
|
|
|
|
op = collapse_sum_like.collapseSumLike(params)
|
|
|
|
return op
|
|
|
|
def _convert_log_softmax(self, op):
|
|
# shape
|
|
input_info = op["inputs"][0]
|
|
input_dtype = get_dtype(op["inputs"][0])
|
|
input_shape = input_info["shape"]
|
|
input_h, input_w, input_c = get_chw_shape(input_shape)
|
|
|
|
output_info = op["outputs"][0]
|
|
output_dtype = get_dtype(op["outputs"][0])
|
|
output_shape = output_info["shape"]
|
|
output_h, output_w, output_c = get_chw_shape(output_shape)
|
|
|
|
# dtype
|
|
input_dtype = get_dtype(input_info)
|
|
output_dtype = get_dtype(output_info)
|
|
|
|
params = {
|
|
# operator
|
|
"op": op_name_translation[op["type"]],
|
|
# tensor
|
|
"input_idx": input_info["name"],
|
|
"output_idx": output_info["name"],
|
|
"input_h": input_h,
|
|
"input_w": input_w,
|
|
"input_c": input_c,
|
|
"input_dim": 3,
|
|
"output_dim": 3,
|
|
"output_h": output_h,
|
|
"output_w": output_w,
|
|
"output_c": output_c,
|
|
"input_dtype": input_dtype,
|
|
"output_dtype": output_dtype,
|
|
}
|
|
|
|
op = log_softmax.logSoftMax(params)
|
|
|
|
return op
|
|
|
|
def _convert_cross_entropy_with_logits(self, op):
|
|
# shape
|
|
input_info = op["inputs"][0]
|
|
input_dtype = get_dtype(op["inputs"][0])
|
|
input_shape = input_info["shape"]
|
|
input_c, input_h, input_w = get_chw_shape(input_shape)
|
|
|
|
input2_info = op["inputs"][1]
|
|
|
|
output_info = op["outputs"][0]
|
|
output_dtype = get_dtype(op["outputs"][0])
|
|
output_shape = output_info["shape"]
|
|
output_c, output_h, output_w = get_chw_shape(output_shape)
|
|
|
|
# dtype
|
|
input2_dtype = get_dtype(input_info)
|
|
output_dtype = get_dtype(output_info)
|
|
|
|
params = {
|
|
# operator
|
|
"op": op_name_translation[op["type"]],
|
|
# tensor
|
|
"input_idx": input_info["name"],
|
|
"input2_idx": input2_info["name"],
|
|
"output_idx": output_info["name"],
|
|
"input_h": input_h,
|
|
"input_w": input_w,
|
|
"input_c": input_c,
|
|
"input_dim": 3,
|
|
"output_dim": 3,
|
|
"output_h": output_h,
|
|
"output_w": output_w,
|
|
"output_c": output_c,
|
|
"input_dtype": input_dtype,
|
|
"input2_dtype": input2_dtype,
|
|
"output_dtype": output_dtype,
|
|
}
|
|
|
|
op = nll_loss.nllLoss(params)
|
|
|
|
return op
|
|
|
|
def _convert_qadd(self, op):
|
|
def _getSigShift(s):
|
|
sig, shi = math.frexp(s)
|
|
sig = int(round(sig * 2**31))
|
|
if sig == 2**31:
|
|
sig /= 2
|
|
shi += 1
|
|
if shi < -31:
|
|
shi = 0
|
|
sig = 0
|
|
|
|
return sig, shi
|
|
|
|
def _getADDMultiplierShift(input_scale, input2_scale, output_scale):
|
|
left_shift = 20
|
|
|
|
twice_max_input_scale = 2 * np.double(max(input_scale, input2_scale))
|
|
real_input1_multiplier = np.double(input_scale / twice_max_input_scale)
|
|
real_input2_multiplier = np.double(input2_scale / twice_max_input_scale)
|
|
real_output_multiplier = np.double(twice_max_input_scale / ((1 << left_shift) * output_scale))
|
|
|
|
input_multiplier, input_shift = _getSigShift(real_input1_multiplier)
|
|
input2_multiplier, input2_shift = _getSigShift(real_input2_multiplier)
|
|
output_multiplier, output_shift = _getSigShift(real_output_multiplier)
|
|
|
|
return (
|
|
left_shift,
|
|
input_multiplier,
|
|
input_shift,
|
|
input2_multiplier,
|
|
input2_shift,
|
|
output_multiplier,
|
|
output_shift,
|
|
)
|
|
|
|
# shape
|
|
input0_info = op["inputs"][0]
|
|
input0_shape = input0_info["shape"]
|
|
input0_c, input0_h, input0_w = get_chw_shape(input0_shape)
|
|
|
|
input2_info = op["inputs"][1]
|
|
output_info = op["outputs"][0]
|
|
|
|
# dtype
|
|
input_dtype = get_dtype(input0_info)
|
|
input2_dtype = get_dtype(input2_info)
|
|
output_dtype = get_dtype(output_info)
|
|
|
|
assert len(op["inputs"]) == 8, "Unexpected number of inputs"
|
|
input_zero_point = self.data[op["inputs"][2]["name"]][0]
|
|
output_zero_point = self.data[op["inputs"][6]["name"]][0]
|
|
input2_zero_point = self.data[op["inputs"][3]["name"]][0]
|
|
input_scale = self.data[op["inputs"][4]["name"]][0]
|
|
input2_scale = self.data[op["inputs"][5]["name"]][0]
|
|
output_scale = self.data[op["inputs"][7]["name"]][0]
|
|
(
|
|
left_shift,
|
|
input_multiplier,
|
|
input_shift,
|
|
input2_multiplier,
|
|
input2_shift,
|
|
output_multiplier,
|
|
output_shift,
|
|
) = _getADDMultiplierShift(input_scale, input2_scale, output_scale)
|
|
|
|
params = {
|
|
# operator
|
|
"op": op_name_translation[op["type"]],
|
|
# tensor
|
|
"input_idx": input0_info["name"],
|
|
"input2_idx": input2_info["name"],
|
|
"output_idx": output_info["name"],
|
|
"input_h": input0_h,
|
|
"input_w": input0_w,
|
|
"input_c": input0_c,
|
|
"input2_h": input0_h,
|
|
"input2_w": input0_w,
|
|
"input2_c": input0_c,
|
|
"input_dim": 3,
|
|
"input2_dim": 3,
|
|
"output_dim": 3,
|
|
"output_h": input0_h,
|
|
"output_w": input0_w,
|
|
"output_c": input0_c,
|
|
"input_dtype": input_dtype,
|
|
"input2_dtype": input2_dtype,
|
|
"output_dtype": output_dtype,
|
|
# trainable parameters
|
|
"input_zero_point": input_zero_point,
|
|
"input2_zero_point": input2_zero_point,
|
|
"output_zero_point": output_zero_point,
|
|
"input_scale": input_scale,
|
|
"input2_scale": input2_scale,
|
|
"output_scale": output_scale,
|
|
# quantized infernece
|
|
"left_shift": left_shift,
|
|
"input_multiplier": input_multiplier,
|
|
"input2_multiplier": input2_multiplier,
|
|
"input_shift": input_shift,
|
|
"input2_shift": input2_shift,
|
|
"output_multiplier": output_multiplier,
|
|
"output_shift": output_shift,
|
|
}
|
|
op = add.Add(params)
|
|
|
|
return op
|
|
|
|
def _convert_add1d(self, op):
|
|
# shape
|
|
input0_info = op["inputs"][0]
|
|
input0_shape = input0_info["shape"]
|
|
input0_c, input0_h, input0_w = get_chw_shape(input0_shape)
|
|
|
|
input2_info = op["inputs"][1]
|
|
output_info = op["outputs"][0]
|
|
|
|
# dtype
|
|
input_dtype = get_dtype(input0_info)
|
|
input2_dtype = get_dtype(input2_info)
|
|
output_dtype = get_dtype(output_info)
|
|
|
|
params = {
|
|
# operator
|
|
"op": op_name_translation[op["type"]],
|
|
# tensor
|
|
"input_idx": input0_info["name"],
|
|
"input2_idx": input2_info["name"],
|
|
"output_idx": output_info["name"],
|
|
"input_size": input0_h * input0_w * input0_c,
|
|
"input_dtype": input_dtype,
|
|
"input2_dtype": input2_dtype,
|
|
"output_dtype": output_dtype,
|
|
}
|
|
op = add1d.add1d(params)
|
|
|
|
return op
|
|
|
|
def _convert_sub(self, op):
|
|
# shape
|
|
input0_info = op["inputs"][0]
|
|
input0_shape = input0_info["shape"]
|
|
input0_c, input0_h, input0_w = get_chw_shape(input0_shape)
|
|
|
|
input2_info = op["inputs"][1]
|
|
output_info = op["outputs"][0]
|
|
|
|
# dtype
|
|
input_dtype = get_dtype(input0_info)
|
|
input2_dtype = get_dtype(input2_info)
|
|
output_dtype = get_dtype(output_info)
|
|
|
|
params = {
|
|
# operator
|
|
"op": op_name_translation[op["type"]],
|
|
# tensor
|
|
"input_idx": input0_info["name"],
|
|
"input2_idx": input2_info["name"],
|
|
"output_idx": output_info["name"],
|
|
"input_size": input0_h * input0_w * input0_c,
|
|
"input_dtype": input_dtype,
|
|
"input2_dtype": input2_dtype,
|
|
"output_dtype": output_dtype,
|
|
}
|
|
op = sub.sub(params)
|
|
|
|
return op
|
|
|
|
def _convert_div(self, op):
|
|
# shape
|
|
input0_info = op["inputs"][0]
|
|
input0_shape = input0_info["shape"]
|
|
input0_c, input0_h, input0_w = get_chw_shape(input0_shape)
|
|
|
|
input2_info = op["inputs"][1]
|
|
output_info = op["outputs"][0]
|
|
|
|
# dtype
|
|
input_dtype = get_dtype(input0_info)
|
|
input2_dtype = get_dtype(input2_info)
|
|
output_dtype = get_dtype(output_info)
|
|
|
|
if "scale" in input2_info["name"]:
|
|
scale_from_add = self.data[input2_info["name"]][0]
|
|
else:
|
|
scale_from_add = None
|
|
|
|
input2 = input2_info["meta"]["data"] if isconstanttstr(input2_info["name"]) else None
|
|
params = {
|
|
# operator
|
|
"op": op_name_translation[op["type"]],
|
|
# tensor
|
|
"input_idx": input0_info["name"],
|
|
"input2_idx": input2_info["name"],
|
|
"output_idx": output_info["name"],
|
|
"input_size": input0_h * input0_w * input0_c,
|
|
"input_dtype": input_dtype,
|
|
"input2": input2,
|
|
"input2_dtype": input2_dtype,
|
|
"output_dtype": output_dtype,
|
|
# input of scale from some conv2d
|
|
"scale_from_add": scale_from_add,
|
|
}
|
|
op = div.div(params)
|
|
|
|
return op
|
|
|
|
def _convert_mul(self, op):
|
|
# shape
|
|
input0_info = op["inputs"][0]
|
|
input0_shape = input0_info["shape"]
|
|
input0_c, input0_h, input0_w = get_chw_shape(input0_shape)
|
|
|
|
input2_info = op["inputs"][1]
|
|
input2_shape = input2_info["shape"]
|
|
input2_c, input2_h, input2_w = get_chw_shape(input2_shape)
|
|
output_info = op["outputs"][0]
|
|
output_c, output_h, output_w = get_chw_shape(output_info["shape"])
|
|
|
|
# dtype
|
|
input_dtype = get_dtype(input0_info)
|
|
input2_dtype = get_dtype(input2_info)
|
|
output_dtype = get_dtype(output_info)
|
|
|
|
scale_conv_2d_op = None
|
|
scale_from_add = None
|
|
|
|
if "constant" in input2_info["name"]:
|
|
constant = input2_info["meta"]["data"]
|
|
else:
|
|
constant = None
|
|
|
|
if "scale" in input2_info["name"]:
|
|
# should be a scaler then
|
|
if "qadd" in input2_info["name"]:
|
|
scale_from_add = self.data[input2_info["name"]][0]
|
|
constant = self.data[input2_info["name"]][0]
|
|
else:
|
|
scale_conv_2d_op = _findConv2dwithScaleName(self.model, self.layer, input2_info["name"])
|
|
|
|
params = {
|
|
# operator
|
|
"op": op_name_translation[op["type"]],
|
|
# tensor
|
|
"input_idx": input0_info["name"],
|
|
"output_size": output_c * output_h * output_w,
|
|
"input2_idx": input2_info["name"],
|
|
"output_idx": output_info["name"],
|
|
"input_size": input0_h * input0_w * input0_c,
|
|
"input2_size": input2_h * input2_w * input2_c,
|
|
"input_dtype": input_dtype,
|
|
"input2_dtype": input2_dtype,
|
|
"output_dtype": output_dtype,
|
|
# input of scale from some conv2d
|
|
"scale_conv_2d_op": scale_conv_2d_op,
|
|
"scale_from_add": scale_from_add,
|
|
"constant": constant,
|
|
}
|
|
op = mul.mul(params)
|
|
|
|
return op
|
|
|
|
def _convert_less(self, op):
|
|
# shape
|
|
input0_info = op["inputs"][0]
|
|
input0_shape = input0_info["shape"]
|
|
input0_c, input0_h, input0_w = get_chw_shape(input0_shape)
|
|
|
|
input2_info = op["inputs"][1]
|
|
|
|
output_info = op["outputs"][0]
|
|
output_dtype = get_dtype(op["outputs"][0])
|
|
output_shape = output_info["shape"]
|
|
output_c, output_h, output_w = get_chw_shape(output_shape)
|
|
|
|
# dtype
|
|
input_dtype = get_dtype(input0_info)
|
|
input2_dtype = get_dtype(input2_info)
|
|
output_dtype = get_dtype(output_info)
|
|
|
|
params = {
|
|
# operator
|
|
"op": op_name_translation[op["type"]],
|
|
# tensor
|
|
"input_idx": input0_info["name"],
|
|
"input2_idx": input2_info["name"],
|
|
"output_idx": output_info["name"],
|
|
"input_size": input0_c * input0_h * input0_w,
|
|
"input_dtype": input_dtype,
|
|
"input2_dtype": input2_dtype,
|
|
"output_dtype": output_dtype,
|
|
}
|
|
op = less.less(params)
|
|
|
|
return op
|
|
|
|
def _convert_greater(self, op):
|
|
# shape
|
|
input0_info = op["inputs"][0]
|
|
input0_shape = input0_info["shape"]
|
|
input0_c, input0_h, input0_w = get_chw_shape(input0_shape)
|
|
|
|
input2_info = op["inputs"][1]
|
|
|
|
output_info = op["outputs"][0]
|
|
output_dtype = get_dtype(op["outputs"][0])
|
|
output_shape = output_info["shape"]
|
|
output_c, output_h, output_w = get_chw_shape(output_shape)
|
|
|
|
# dtype
|
|
input_dtype = get_dtype(input0_info)
|
|
input2_dtype = get_dtype(input2_info)
|
|
output_dtype = get_dtype(output_info)
|
|
|
|
params = {
|
|
# operator
|
|
"op": op_name_translation[op["type"]],
|
|
# tensor
|
|
"input_idx": input0_info["name"],
|
|
"input2_idx": input2_info["name"],
|
|
"output_idx": output_info["name"],
|
|
"input_size": input0_c * input0_h * input0_w,
|
|
"input_dtype": input_dtype,
|
|
"input2_dtype": input2_dtype,
|
|
"output_dtype": output_dtype,
|
|
}
|
|
op = greater.greater(params)
|
|
|
|
return op
|
|
|
|
def _convert_where(self, op):
|
|
# shape
|
|
input0_info = op["inputs"][0]
|
|
input0_shape = input0_info["shape"]
|
|
input0_c, input0_h, input0_w = get_chw_shape(input0_shape)
|
|
|
|
input2_info = op["inputs"][1]
|
|
input3_info = op["inputs"][2]
|
|
|
|
output_info = op["outputs"][0]
|
|
output_dtype = get_dtype(op["outputs"][0])
|
|
output_shape = output_info["shape"]
|
|
output_c, output_h, output_w = get_chw_shape(output_shape)
|
|
|
|
# dtype
|
|
input_dtype = get_dtype(input0_info)
|
|
input2_dtype = get_dtype(input2_info)
|
|
input3_dtype = get_dtype(input3_info)
|
|
output_dtype = get_dtype(output_info)
|
|
|
|
params = {
|
|
# operator
|
|
"op": op_name_translation[op["type"]],
|
|
# tensor
|
|
"input_idx": input0_info["name"],
|
|
"input2_idx": input2_info["name"],
|
|
"input3_idx": input3_info["name"],
|
|
"output_idx": output_info["name"],
|
|
"input_size": input0_c * input0_h * input0_w,
|
|
"output_size": output_c * output_h * output_w,
|
|
"input_dtype": input_dtype,
|
|
"input2_dtype": input2_dtype,
|
|
"input3_dtype": input3_dtype,
|
|
"output_dtype": output_dtype,
|
|
}
|
|
op = where.where(params)
|
|
|
|
return op
|
|
|
|
def _convert_negative(self, op):
|
|
# shape
|
|
input0_info = op["inputs"][0]
|
|
input0_shape = input0_info["shape"]
|
|
input_c, input_h, input_w = get_chw_shape(input0_shape)
|
|
|
|
output_info = op["outputs"][0]
|
|
|
|
# dtype
|
|
input_dtype = get_dtype(input0_info)
|
|
output_dtype = get_dtype(output_info)
|
|
|
|
params = {
|
|
# operator
|
|
"op": op_name_translation[op["type"]],
|
|
# tensor
|
|
"input_idx": input0_info["name"],
|
|
"output_idx": output_info["name"],
|
|
"input_size": input_c * input_h * input_w,
|
|
"input_dtype": input_dtype,
|
|
"output_dtype": output_dtype,
|
|
}
|
|
op = negative.negative(params)
|
|
|
|
return op
|
|
|
|
def _convert_sum(self, op):
|
|
# shape
|
|
input_info = op["inputs"][0]
|
|
input_shape = input_info["shape"]
|
|
if len(input_shape) == 4:
|
|
d1, d2, d3, d4 = input_shape
|
|
else:
|
|
d2, d3, d4 = get_chw_shape(input_shape)
|
|
d1 = 1
|
|
|
|
output_info = op["outputs"][0]
|
|
output_shape = output_info["shape"]
|
|
if len(output_shape) == 4:
|
|
od1, od2, od3, od4 = output_shape
|
|
else:
|
|
od2, od3, od4 = get_chw_shape(output_shape)
|
|
od1 = 1
|
|
|
|
# dtype
|
|
input_dtype = get_dtype(input_info)
|
|
output_dtype = get_dtype(output_info)
|
|
|
|
# TODO: update here after bugs in TTE IR is fixed (null)
|
|
if op["attrs"]["axis"] is None:
|
|
op["attrs"]["exclude"] = 1
|
|
axis = 0
|
|
elif op["attrs"]["axis"][0] < 0:
|
|
if op["attrs"]["axis"][0] == -1:
|
|
axis = 3
|
|
elif op["attrs"]["axis"][0] == -2:
|
|
axis = 2
|
|
elif op["attrs"]["axis"][0] == -3:
|
|
axis = 1
|
|
elif op["attrs"]["axis"][0] == -4:
|
|
axis = 0
|
|
else:
|
|
axis = op["attrs"]["axis"][0]
|
|
|
|
params = {
|
|
# operator
|
|
"op": op_name_translation[op["type"]],
|
|
# tensor
|
|
"input_idx": input_info["name"],
|
|
"output_idx": output_info["name"],
|
|
"axis": axis,
|
|
"d1": d1,
|
|
"d2": d2,
|
|
"d3": d3,
|
|
"d4": d4,
|
|
"input_dim": 4,
|
|
"output_dim": 3,
|
|
"od1": od1,
|
|
"od2": od2,
|
|
"od3": od3,
|
|
"od4": od4,
|
|
"input_dtype": input_dtype,
|
|
"output_dtype": output_dtype,
|
|
"exclude": bool(op["attrs"]["exclude"]),
|
|
}
|
|
op = sum.sum(params)
|
|
|
|
return op
|
|
|
|
def _convert_tile(self, op):
|
|
# shape
|
|
input_info = op["inputs"][0]
|
|
input_shape = input_info["shape"]
|
|
input_c, input_h, input_w = get_chw_shape(input_shape)
|
|
|
|
output_info = op["outputs"][0]
|
|
output_shape = output_info["shape"]
|
|
output_c, output_h, output_w = get_chw_shape(output_shape)
|
|
|
|
# dtype
|
|
input_dtype = get_dtype(input_info)
|
|
output_dtype = get_dtype(output_info)
|
|
|
|
reps_size = len(op["attrs"]["reps"])
|
|
reps = op["attrs"]["reps"]
|
|
|
|
params = {
|
|
# operator
|
|
"op": op_name_translation[op["type"]],
|
|
# tensor
|
|
"input_idx": input_info["name"],
|
|
"output_idx": output_info["name"],
|
|
"reps_size": reps_size,
|
|
"reps": reps,
|
|
"input_h": input_h,
|
|
"input_w": input_w,
|
|
"input_c": input_c,
|
|
"input_dim": 3,
|
|
"output_dim": 3,
|
|
"output_h": output_h,
|
|
"output_w": output_w,
|
|
"output_c": output_c,
|
|
"input_dtype": input_dtype,
|
|
"output_dtype": output_dtype,
|
|
}
|
|
op = tile.tile(params)
|
|
|
|
return op
|
|
|
|
def _convert_convolution(self, op):
|
|
weight_info = op["inputs"][1]
|
|
if op["attrs"]["kernel_size"] is None:
|
|
kh = op["inputs"][1]["shape"][-1]
|
|
kw = op["inputs"][1]["shape"][-2]
|
|
else:
|
|
kh, kw = op["attrs"]["kernel_size"]
|
|
padding = op["attrs"]["padding"][0]
|
|
groups = op["attrs"]["groups"]
|
|
sw, sh = op["attrs"]["strides"]
|
|
|
|
# shape
|
|
input_info = op["inputs"][0]
|
|
input_shape = input_info["shape"]
|
|
_, input_c, input_h, input_w = input_shape
|
|
|
|
output_info = op["outputs"][0]
|
|
output_shape = output_info["shape"]
|
|
_, output_c, output_h, output_w = output_shape
|
|
|
|
# dtype
|
|
input_dtype = get_dtype(input_info)
|
|
output_dtype = get_dtype(output_info)
|
|
|
|
weight_name = weight_info["name"]
|
|
if weight_name not in self.data:
|
|
weight_value = weight_name
|
|
else:
|
|
weight_value = self.data[weight_name].transpose(0, 2, 3, 1) # OIHW -> OHWI
|
|
if (
|
|
groups == input_c == output_c
|
|
): # For depthwise conv we assume the weights to be in HWC in code generation,
|
|
# we revert the tensor to match the assumption
|
|
weight_value = weight_value.transpose(3, 1, 2, 0) # OHWI -> IHWO
|
|
|
|
has_bias = False
|
|
for inp in op["inputs"]:
|
|
if "weight" in inp["name"]:
|
|
_accessTrainable(self.trainedWeights, inp["name"])
|
|
if "bias" in inp["name"]:
|
|
bias_name = inp["name"]
|
|
bias = self.data[bias_name]
|
|
has_bias = True
|
|
_accessTrainable(self.trainedBias, inp["name"])
|
|
# for floating point implementation, we allow no bias data since the data might be in bias_add ops
|
|
if op["type"] not in {"nn.mcuconv2d"} and not has_bias:
|
|
bias = np.zeros(output_shape[1])
|
|
bias_name = None
|
|
has_bias = True
|
|
|
|
assert has_bias, "no bias data found!"
|
|
|
|
# normal conv2d, depthwise, group conv
|
|
if groups == 1:
|
|
op_name_idx = 0
|
|
elif groups == input_c == output_c and input_dtype == "int8" and output_dtype == "int32":
|
|
op_name_idx = 1
|
|
else:
|
|
op_name_idx = 2
|
|
|
|
if input_dtype == "int8" and output_dtype == "int32" and op_name_idx != 2 and op["type"] != "nn.conv2d":
|
|
input_zero_point = self.data[_findKeyinTensors(op["inputs"], "zero_x")["name"]][0]
|
|
output_zero_point = self.data[_findKeyinTensors(op["inputs"], "zero_y")["name"]][0]
|
|
# TODO: if the IR saperate these
|
|
input_scale = np.ones(output_shape[1])
|
|
output_scale = np.ones(output_shape[1])
|
|
weight_scale = np.ones(output_shape[1])
|
|
# effective scales
|
|
effective_scale = self.data[_findKeyinTensors(op["inputs"], "scale")["name"]]
|
|
else:
|
|
input_zero_point = 0
|
|
output_zero_point = 0
|
|
input_scale = np.ones(output_shape[1])
|
|
output_scale = np.ones(output_shape[1])
|
|
weight_scale = np.ones(output_shape[1])
|
|
effective_scale = np.ones(output_shape[1])
|
|
|
|
multiplier, shift = _getMultiplierShift(effective_scale)
|
|
|
|
params = {
|
|
"op": op_name_translation[op["type"]][op_name_idx],
|
|
# SE blocks
|
|
"is_SEBlock": "se" in op and op["se"],
|
|
"kernel_h": kh,
|
|
"kernel_w": kw,
|
|
"padding": padding,
|
|
"padding_h": op["attrs"]["padding"][0],
|
|
"padding_w": op["attrs"]["padding"][1],
|
|
"dilation_h": op["attrs"]["dilation"][0],
|
|
"dilation_w": op["attrs"]["dilation"][1],
|
|
"groups": groups,
|
|
"stride_h": sh,
|
|
"stride_w": sw,
|
|
"input_idx": input_info["name"],
|
|
"output_idx": output_info["name"],
|
|
"input_dim": 3,
|
|
"input_c": input_c,
|
|
"input_h": input_h,
|
|
"input_w": input_w,
|
|
"output_dim": 3,
|
|
"output_c": output_c,
|
|
"output_h": output_h,
|
|
"output_w": output_w,
|
|
"input_dtype": input_dtype,
|
|
"output_dtype": output_dtype,
|
|
"weight_value": weight_value,
|
|
"weight_name": weight_name,
|
|
"bias": bias.astype(int),
|
|
"bias_name": bias_name,
|
|
"effective_scale": effective_scale,
|
|
"input_zero_point": input_zero_point,
|
|
"output_zero_point": output_zero_point,
|
|
"multiplier": multiplier.astype(int),
|
|
"shift": shift.astype(int),
|
|
"input_scale": input_scale,
|
|
"output_scale": output_scale,
|
|
"weight_scale": weight_scale,
|
|
}
|
|
|
|
# assert weight_value != "out_nn.conv2d_transpose@330"
|
|
if groups == 1:
|
|
op = conv2d.Conv2d(params)
|
|
elif groups == input_c == output_c and input_dtype == "int8" and output_dtype == "int32":
|
|
op = depthwiseConv2d.DepthwiseConv2d(params)
|
|
else:
|
|
if "inplace_int8_input" in op and op["inplace_int8_input"]:
|
|
params["inplace_int8_input"] = True
|
|
params["float32_input2"] = True if op["inputs"][1]["dtype"] == "float32" else False
|
|
op = group_conv2d.groupConv2d(params)
|
|
|
|
if isinstance(weight_value, str):
|
|
w_c, w_w, w_h = (
|
|
weight_info["shape"][1] * weight_info["shape"][2],
|
|
weight_info["shape"][2],
|
|
weight_info["shape"][3],
|
|
)
|
|
op._add_input(weight_name, input_dtype, w_h, w_w, w_c)
|
|
|
|
# for int8 group conv
|
|
if op_name_idx == 2 and "float32_input2" in params and not params["float32_input2"]:
|
|
# we will use int8 for input and output, dynamic quantization will be inside ops
|
|
for input_tensor in op.input_tensors:
|
|
input_tensor.dtype = "int8"
|
|
op.output_tensors[0].dtype = "int8"
|
|
op.params["intput_dtype"] = "int8"
|
|
op.params["intput2_dtype"] = "int8"
|
|
op.params["output_dtype"] = "int8"
|
|
# we need int32 intermediate buffer for normalization
|
|
op.add_int32_buffer_tensor()
|
|
|
|
return op
|
|
|
|
def _convert_transpose_conv2d(self, op):
|
|
assert op["attrs"]["kernel_size"] is not None
|
|
kh, kw = op["attrs"]["kernel_size"]
|
|
padding = op["attrs"]["padding"][0]
|
|
groups = op["attrs"]["groups"]
|
|
sw, sh = op["attrs"]["strides"]
|
|
|
|
# shape
|
|
input_info = op["inputs"][0]
|
|
input2_info = op["inputs"][1]
|
|
input_shape = input_info["shape"]
|
|
_, input_c, input_h, input_w = input_shape
|
|
|
|
output_info = op["outputs"][0]
|
|
output_shape = output_info["shape"]
|
|
_, output_c, output_h, output_w = output_shape
|
|
|
|
# dtype
|
|
input_dtype = get_dtype(input_info)
|
|
input2_dtype = get_dtype(input2_info)
|
|
output_dtype = get_dtype(output_info)
|
|
|
|
weight_info = op["inputs"][-1]
|
|
weight_name = weight_info["name"]
|
|
for inp in op["inputs"]:
|
|
if "weight" in inp["name"]:
|
|
_accessTrainable(self.trainedWeights, inp["name"])
|
|
if "bias" in inp["name"]:
|
|
_accessTrainable(self.trainedBias, inp["name"])
|
|
if weight_name not in self.data:
|
|
weight_value = weight_name
|
|
else:
|
|
weight_value = self.data[weight_name]
|
|
# currently ignore the bias
|
|
bias = np.zeros(input_shape[1])
|
|
|
|
params = {
|
|
"op": op_name_translation[op["type"]],
|
|
# SE blocks
|
|
"is_SEBlock": "se" in op and op["se"],
|
|
"kernel_h": kh,
|
|
"kernel_w": kw,
|
|
"padding": padding,
|
|
"padding_h": op["attrs"]["padding"][0],
|
|
"padding_w": op["attrs"]["padding"][1],
|
|
"kernel_layout": op["attrs"]["kernel_layout"],
|
|
"group": groups,
|
|
"stride_h": sh,
|
|
"stride_w": sw,
|
|
"input_idx": input_info["name"],
|
|
"output_idx": output_info["name"],
|
|
"input_dim": 3,
|
|
"input_c": input_shape[1],
|
|
"input_h": input_shape[2],
|
|
"input_w": input_shape[3],
|
|
"output_dim": 3,
|
|
"output_c": output_shape[1],
|
|
"output_h": output_shape[2],
|
|
"output_w": output_shape[3],
|
|
"input_dtype": input_dtype,
|
|
"input2_dtype": input2_dtype,
|
|
"output_dtype": output_dtype,
|
|
"weight_value": weight_value,
|
|
"weight_name": weight_name,
|
|
"bias": bias.astype(int),
|
|
}
|
|
|
|
op_instance = transpose_conv2d.transposeConv2d(params)
|
|
|
|
if isinstance(weight_value, str):
|
|
w_c, w_w, w_h = (
|
|
weight_info["shape"][0] * weight_info["shape"][1],
|
|
weight_info["shape"][2],
|
|
weight_info["shape"][3],
|
|
)
|
|
op_instance._add_input(weight_name, input_dtype, w_h, w_w, w_c)
|
|
|
|
# for full int8 tranpose conv
|
|
# 1. Cast dtype as we do requantize inside ops
|
|
# 2. we need int32 intermediate buffer for normalization and
|
|
# 3. update the output type as int8 as we fuze nomalization inside a single op
|
|
if params["input2_dtype"] == params["input_dtype"] == "int8" and params["output_dtype"] == "int32":
|
|
# we will use int8 for input and output, dynamic quantization will be inside ops
|
|
for input_tensor in op_instance.input_tensors:
|
|
input_tensor.dtype = "int8"
|
|
op_instance.output_tensors[0].dtype = "int8"
|
|
op_instance.params["intput_dtype"] = "int8"
|
|
op_instance.params["intput2_dtype"] = "int8"
|
|
op_instance.params["output_dtype"] = "int8"
|
|
op_instance.add_int32_buffer_tensor()
|
|
|
|
return op_instance
|
|
|
|
def _convert_matmul(self, op):
|
|
# shape
|
|
input0_info = op["inputs"][0]
|
|
input0_shape = input0_info["shape"]
|
|
input0_c, input0_h, input0_w = get_chw_shape(input0_shape)
|
|
|
|
input2_info = op["inputs"][1]
|
|
input2_shape = input2_info["shape"]
|
|
output_info = op["outputs"][0]
|
|
input2_c, input2_h, input2_w = get_chw_shape(input2_shape)
|
|
|
|
# dtype
|
|
input_dtype = get_dtype(input0_info)
|
|
input2_dtype = get_dtype(input2_info)
|
|
output_dtype = get_dtype(output_info)
|
|
|
|
weight_info = op["inputs"][-1]
|
|
weight_name = weight_info["name"]
|
|
for inp in op["inputs"]:
|
|
if "weight" in inp["name"]:
|
|
_accessTrainable(self.trainedWeights, inp["name"])
|
|
if "bias" in inp["name"]:
|
|
_accessTrainable(self.trainedBias, inp["name"])
|
|
if weight_name not in self.data:
|
|
weight_value = weight_name
|
|
else:
|
|
weight_value = self.data[weight_name]
|
|
assert input0_w == input2_h
|
|
params = {
|
|
# operator
|
|
"op": op_name_translation[op["type"]],
|
|
# tensor
|
|
"input_idx": input0_info["name"],
|
|
"input2_idx": input2_info["name"],
|
|
"output_idx": output_info["name"],
|
|
"matA_row": input0_h,
|
|
"matA_col": input0_w,
|
|
"matB_row": input2_h,
|
|
"matB_col": input2_w,
|
|
"input_dtype": input_dtype,
|
|
"input2_dtype": input2_dtype,
|
|
"output_dtype": output_dtype,
|
|
"weight_value": weight_value,
|
|
"weight_name": weight_name,
|
|
}
|
|
op = mat_mul.mat_mul(params)
|
|
|
|
if isinstance(weight_value, str):
|
|
w_c, w_w, w_h = get_chw_shape(weight_info["shape"])
|
|
op._add_input(weight_name, input_dtype, w_h, w_w, w_c)
|
|
|
|
return op
|
|
|
|
def _convert_dense(self, op):
|
|
# shape
|
|
input0_info = op["inputs"][0]
|
|
input0_shape = input0_info["shape"]
|
|
input0_c, input0_h, input0_w = get_chw_shape(input0_shape)
|
|
|
|
input2_info = op["inputs"][1]
|
|
input2_shape = input2_info["shape"]
|
|
output_info = op["outputs"][0]
|
|
input2_c, input2_h, input2_w = get_chw_shape(input2_shape)
|
|
|
|
# dtype
|
|
input_dtype = get_dtype(input0_info)
|
|
input2_dtype = get_dtype(input2_info)
|
|
output_dtype = get_dtype(output_info)
|
|
|
|
weight_info = op["inputs"][-1]
|
|
weight_name = weight_info["name"]
|
|
for inp in op["inputs"]:
|
|
if "weight" in inp["name"]:
|
|
_accessTrainable(self.trainedWeights, inp["name"])
|
|
if "bias" in inp["name"]:
|
|
_accessTrainable(self.trainedBias, inp["name"])
|
|
if weight_name not in self.data:
|
|
weight_value = weight_name
|
|
else:
|
|
weight_value = self.data[weight_name]
|
|
assert input0_w == input2_w
|
|
params = {
|
|
# operator
|
|
"op": op_name_translation[op["type"]],
|
|
# tensor
|
|
"input_idx": input0_info["name"],
|
|
"input2_idx": input2_info["name"],
|
|
"output_idx": output_info["name"],
|
|
"matA_row": input0_h,
|
|
"matA_col": input0_w,
|
|
"matB_row": input2_h,
|
|
"matB_col": input2_w,
|
|
"input_dtype": input_dtype,
|
|
"input2_dtype": input2_dtype,
|
|
"output_dtype": output_dtype,
|
|
"weight_value": weight_value,
|
|
"weight_name": weight_name,
|
|
}
|
|
op = dense.dense(params)
|
|
|
|
if isinstance(weight_value, str):
|
|
w_c, w_w, w_h = get_chw_shape(weight_info["shape"])
|
|
op._add_input(weight_name, input_dtype, w_h, w_w, w_c)
|
|
|
|
return op
|
|
|
|
|
|
def get_dtype(tensor_info):
|
|
if "dtype" in tensor_info:
|
|
return tensor_info["dtype"]
|
|
else:
|
|
return "int8"
|
|
|
|
|
|
def get_chw_shape(shape):
|
|
input_c = input_h = input_w = 0
|
|
batch = 1
|
|
if len(shape) == 5:
|
|
_, batch, input_c, input_h, input_w = shape
|
|
elif len(shape) == 4:
|
|
batch, input_c, input_h, input_w = shape
|
|
elif len(shape) == 2:
|
|
input_h, input_w = shape
|
|
input_c = 1
|
|
elif len(shape) == 1:
|
|
input_h = input_w = 1
|
|
input_c = shape[0]
|
|
else:
|
|
raise NotImplementedError
|
|
if batch != 1:
|
|
# raise ValueError("batch size should be 1")
|
|
warnings.warn("batch size should be 1")
|
|
return input_c, input_h, input_w
|
|
|
|
|
|
def _getMultiplierShift(effective_scale):
|
|
significand = np.zeros(len(effective_scale), dtype="int32")
|
|
shift = np.zeros(len(effective_scale), dtype="int32")
|
|
|
|
for i, s in enumerate(effective_scale):
|
|
if s == 0:
|
|
significand[i] = 0
|
|
shift[i] = 0
|
|
else:
|
|
sig, shi = math.frexp(s)
|
|
sig = int(round(sig * 2**31))
|
|
|
|
if sig == 2**31:
|
|
sig /= 2
|
|
shi += 1
|
|
if shi < -31:
|
|
shi = 0
|
|
sig = 0
|
|
|
|
significand[i] = sig
|
|
shift[i] = shi
|
|
|
|
return significand, shift
|