import json import math import pickle import warnings import numpy as np from .constant import ( FUSE_INT8CAST_STR, FUSE_SGD_UPDATE_STR, FUSE_TILE_STR, FUSE_WHERE_ZEROSSTR, FUSHION_CONFIG, INPLACE_MUL_STR, INPLACE_WHERE_STR, REORDER_STR, USE_BIT_MASK, op_name_translation, ) from .FusionUtil import ( _accessTrainable, _castisFusable, _castisFusable_for_gconv, _fileTileRepAsWeights, _findBinMaskPattern, _findBinMaskPatternint8, _findConv2dwithScaleName, _findKeyinTensors, _findMultiplyAbsMaxDivide, _findPartialConv, _findTargetWeightforGconv, _findTransposeMultiplyAbsMaxDivide, _findWhereTensorFrom, _removeLayers, _updateIdx, _updateIdxParameter, _updateOutputDtype, ) from .GraphReorder import reorderGroupConv_TransponseConv, reorderGroupConv_TransponseConv_int8 from .operators import ( add, add1d, avgpool2d, bias_add, bias_update, cast, collapse_sum_like, conv2d, dense, depthwiseConv2d, div, exp, greater, group_conv2d, less, log_softmax, mat_mul, mul, negative, nll_loss, ones_like, permute_4D_3012, permute_groupconv_out, relu, reshape, reshape_like, strided_slice, sub, sum, tile, transpose, transpose_conv2d, where, zeros_like, ) from .operators.basic_utils import isconstanttstr from .QAS_util import get_effective_scalename_with_input_key, get_QAS MAX_DAGOP_OUTPUTS = 5 fused_op = {"clip", "nn.batch_flatten", "squeeze", "reshape", "reshape_like"} class outputInfo: def __init__(self, name, idx, len, dtype): self.name = name self.idx = idx self.len = len self.dtype = dtype class TTEParser(object): def __init__(self, model, data, scale_params=None): self.layer = [] self.gout = [] self.det_outputs = None with open(model, "r") as f: self.model = json.load(f) with open(data, "rb") as f: w_params = pickle.load(f) self.data = {} for k in w_params: if k[0] != "v": self.data[f"v{k}"] = w_params[k] else: self.data[k] = w_params[k] self.scale_params = scale_params self.layer = [] self.trainedWeights = [] # key, weight_ip self.trainedBias = [] # key, weight_ip self.fusedInputTable = {} self.outputTables = [] self.regularFunctionTable = { "cast": self._convert_cast, "cast_like": self._convert_cast, "exp": self._convert_exp, "transpose": self._convert_transpose, "where": self._convert_where, "nn.conv2d_transpose": self._convert_transpose_conv2d, "strided_slice": self._convert_strided_slice, "nn.bias_add": self._convert_bias_add, "nn.relu": self._convert_relu, "zeros_like": self._convert_zeros_like, "zeros": self._convert_zeros, "ones_like": self._convert_ones_like, "ones": self._convert_ones, "collapse_sum_like": self._convert_collapse_sum_like, "less": self._convert_less, "less_equal": self._convert_less, "nn.log_softmax": self._convert_log_softmax, "nn.cross_entropy_with_logits": self._convert_cross_entropy_with_logits, "divide": self._convert_div, "tile": self._convert_tile, "negative": self._convert_negative, "greater": self._convert_greater, "greater_equal": self._convert_greater, "multiply": self._convert_mul, "nn.matmul": self._convert_matmul, "nn.dense": self._convert_dense, "mcumean": self._convert_average_pool, } self.partialChannelList = {} # "idx": first_k_channel def loadModel(self): last_op = None has_zero_x = False zero_x = None self.fusedInputTable[self.model[0]["inputs"][0]["name"]] = self.model[0]["inputs"][0]["name"] # reorder the group conv and transpose conv to calculate weight gradients first if FUSHION_CONFIG[REORDER_STR]: self.model = reorderGroupConv_TransponseConv(self.model) self.model = reorderGroupConv_TransponseConv_int8(self.model) for cnt, op in enumerate(self.model): op_type = op["type"] if op_type in {"nn.conv2d", "nn.mcuconv2d"}: last_op = self._convert_convolution(op) # Float bp fusion # check if we need to have binary mask for this conv2d # conv2d (int32) -> cast -> greater/less -> multiply -> where (which take the map) # fusion | --------------------------------------| if op["outputs"][0]["dtype"] == "int32": pattern_found, op_dict = _findBinMaskPattern(self.model, op["outputs"][0]["name"]) if pattern_found: # add second output in the output tensors b_mask_info = op_dict["multiply"]["outputs"][0] if USE_BIT_MASK: last_op._add_output( b_mask_info["name"], "bool", int(math.ceil(last_op.params["output_c"] / 8)), last_op.params["output_w"], last_op.params["output_h"], ) else: last_op._add_output( b_mask_info["name"], b_mask_info["dtype"], last_op.params["output_c"], last_op.params["output_w"], last_op.params["output_h"], ) # update params in conv2d last_op.params["need_Bmask"] = True last_op.params["output2_h"] = last_op.params["output_h"] last_op.params["output2_w"] = last_op.params["output_w"] last_op.params["output2_c"] = last_op.params["output_c"] last_op.params["output2_dtype"] = b_mask_info["dtype"] last_op.params["output2_idx"] = b_mask_info["name"] # remove fused ops in the graph _removeLayers(self.model, op_dict) # int8 bp fusion # check if we need to have binary mask for this conv2d # conv2d (int32) -> greater/less -> multiply -> where (which take the map) # fusion | ------------------------------| if op["outputs"][0]["dtype"] == "int32": pattern_found, op_dict = _findBinMaskPatternint8(self.model, op["outputs"][0]["name"]) if pattern_found: # add second output in the output tensors b_mask_info = op_dict["multiply"]["outputs"][0] if USE_BIT_MASK: last_op._add_output( b_mask_info["name"], "bool", int(math.ceil(last_op.params["output_c"] / 8)), last_op.params["output_w"], last_op.params["output_h"], ) else: last_op._add_output( b_mask_info["name"], b_mask_info["dtype"], last_op.params["output_c"], last_op.params["output_w"], last_op.params["output_h"], ) # update params in conv2d last_op.params["need_Bmask"] = True last_op.params["output2_h"] = last_op.params["output_h"] last_op.params["output2_w"] = last_op.params["output_w"] last_op.params["output2_c"] = last_op.params["output_c"] last_op.params["output2_dtype"] = b_mask_info["dtype"] last_op.params["output2_idx"] = b_mask_info["name"] # remove fused ops in the graph _removeLayers(self.model, op_dict) # we use hwc for computation, but in bp the 'c' may mean output channel for the training weights. # in this case, we need to insert an op to permute the weight tensor before running this conv2d op # TODO: make sure this is not longer needed after we optimize tile + group_conv2d # if len(self.model) > 0 and "weight" not in op["inputs"][1]["name"]: # permute_params = { # "input_idx": op["inputs"][1]["name"], # "input_dim": 3, # "input_h": op["inputs"][1]["shape"][-2], # "input_w": op["inputs"][1]["shape"][-1], # "input_c": op["inputs"][1]["shape"][-4], # IOHW # } # permute_op = permute_3D_120.permute_3D_120(permute_params) # self.layer.append(permute_op) if has_zero_x: last_op.set_input_zero_point(zero_x) has_zero_x = False zero_x = None self.layer.append(last_op) elif op_type == "nn.mcuadd": # fp32 pattern_found, op_dict = _findBinMaskPattern(self.model, op["outputs"][0]["name"]) # try int8 if not pattern_found: pattern_found, op_dict = _findBinMaskPatternint8(self.model, op["outputs"][0]["name"]) last_op = self._convert_qadd(op) if pattern_found: # add second output in the output tensors b_mask_info = op_dict["multiply"]["outputs"][0] last_op._add_output( b_mask_info["name"], b_mask_info["dtype"], last_op.params["output_c"], last_op.params["output_w"], last_op.params["output_h"], ) # update params in conv2d last_op.params["need_Bmask"] = True last_op.params["output2_h"] = last_op.params["output_h"] last_op.params["output2_w"] = last_op.params["output_w"] last_op.params["output2_c"] = last_op.params["output_c"] last_op.params["output2_dtype"] = b_mask_info["dtype"] last_op.params["output2_idx"] = b_mask_info["name"] # remove fused ops in the graph _removeLayers(self.model, op_dict) self.layer.append(last_op) elif ( op_type == "cast" and op["inputs"][0]["dtype"] == "int8" and op["outputs"][0]["dtype"] == "int32" ): # int8 gradient for bias # skip this one _updateIdx(self.model, self.layer, op["inputs"][0]["name"], op["outputs"][0]["name"]) elif op_type == "cast" and _castisFusable(self.model, op)[0] and FUSHION_CONFIG[FUSE_INT8CAST_STR]: _, transpose_conv_json = _castisFusable(self.model, op) transpose_conv_json["inputs"][1] = op["inputs"][0] # pass the int8 input to transpose conv2d elif ( op_type == "cast" and _castisFusable_for_gconv(self.model, op)[0] and FUSHION_CONFIG[FUSE_INT8CAST_STR] ): _, group_conv_json = _castisFusable_for_gconv(self.model, op) group_conv_json["inputs"][0] = op["inputs"][0] # pass the int8 input to group conv2d group_conv_json["inplace_int8_input"] = True elif op_type == "tile" and FUSHION_CONFIG[FUSE_TILE_STR]: # check if we need to fuse ops for tile # ########## tile -> reshape -> conv2d (which takes it as weights) # fusion | ------------------------| pattern_found, op_dict = _fileTileRepAsWeights(self.model, op) if pattern_found: # remove reshape _removeLayers(self.model, {"reshape": op_dict["reshape"]}) # redirect the input of tile to conv2d's weight op_dict["conv2d"]["inputs"][1] = op_dict["tile"]["inputs"][0] else: raise NotImplementedError elif op_type == "add": if len(op["inputs"][0]["shape"]) == 4 and op["inputs"][0]["dtype"] == "int8": if "zero_y" in op["inputs"][1]["name"]: zero_y = int(self.data[op["inputs"][1]["name"]]) last_op.set_output_zero_point(zero_y) continue last_op = self._convert_add(op) self.layer.append(last_op) else: last_op = self._convert_add1d(op) self.layer.append(last_op) elif op_type == "nn.bias_add" and op["inputs"][1]["dtype"] == "int8": last_op.params["bias"] = self.data[op["inputs"][1]["name"]].astype(int) # redirect the index last_op.change_output_tensor_idx(op["outputs"][0]["name"]) # fixing HWC -> CHW alginment elif ( op_type == "reshape" and len(op["inputs"][0]["shape"]) == 4 and (op["inputs"][0]["shape"][2] != 1 and op["inputs"][0]["shape"][3] != 1) and op["inputs"][0]["shape"][2] != op["outputs"][0]["shape"][2] and op["inputs"][0]["shape"][3] != op["outputs"][0]["shape"][3] ): last_op = self._convert_reshape(op) self.layer.append(last_op) # input might be parameters, we handle the inside ops since we only support for scales in `multiply` elif op_type == "reshape" and op["inputs"][0]["var_type"] == "parameter": # find out ops taking the output for other_op in self.model: for input_tensor in other_op["inputs"]: if input_tensor["name"] == op["outputs"][0]["name"]: if other_op["type"] in {"multiply", "divide"}: _updateIdxParameter(self.model, op["inputs"][0]["name"], op["outputs"][0]["name"]) else: raise NotImplementedError # fixing CHW -> HWC alginment elif ( op_type == "reshape_like" and len(op["inputs"][1]["shape"]) == 4 and (op["inputs"][1]["shape"][2] != 1 and op["inputs"][1]["shape"][3] != 1) and op["inputs"][0]["shape"][2] != op["outputs"][0]["shape"][2] and op["inputs"][0]["shape"][3] != op["outputs"][0]["shape"][3] ): last_op = self._convert_reshape_like(op) self.layer.append(last_op) # bypass this layer by fusing it into the last layer, TODO: revisit this for clipping fp results elif op_type in fused_op and op: # update tensors _updateIdx(self.model, self.layer, op["inputs"][0]["name"], op["outputs"][0]["name"]) elif op_type in "nn.mcutruncate": # update output dtype _updateOutputDtype(self.layer, op["inputs"][0]["name"], "int8") # update tensor idx _updateIdx(self.model, self.layer, op["inputs"][0]["name"], op["outputs"][0]["name"]) elif op_type == "subtract": is_fuse = False for tensor in op["inputs"]: if "zero_x" in tensor["name"]: has_zero_x = True zero_x = int(self.data[tensor["name"]]) is_fuse = True if not is_fuse: last_op = self._convert_sub(op) self.layer.append(last_op) elif op_type == "sum": input_length = np.prod(op["inputs"][0]["shape"]) output_length = np.prod(op["outputs"][0]["shape"]) if input_length != output_length: last_op = self._convert_sum(op) self.layer.append(last_op) if op["outputs"][0] and "output_info" in op["outputs"][0]["meta"]: if op["outputs"][0]["meta"]["output_info"][0] == "v": key = op["outputs"][0]["meta"]["output_info"] else: key = "v" + op["outputs"][0]["meta"]["output_info"] if self.scale_params is not None: e_s_name = get_effective_scalename_with_input_key(key, self.model) QAS = get_QAS(key, self.scale_params, self.data[e_s_name]) else: QAS = np.zeros(int(output_length)) + 0.000000001 # avoid zero division bias_update_params = { "input_idx": last_op.params["output_idx"], "output_idx": key, # tensor related "input_size": int(output_length), "input_buf_add": None, "input_buf_add_offset": None, "QAS": QAS, "input_dtype": last_op.params["input_dtype"], "output_dtype": "float32", } bias_update_op = bias_update.bias_update(bias_update_params) self.layer.append(bias_update_op) else: # skip this, no need to do anything on the data input_idx = op["inputs"][0]["name"] output_idx = op["outputs"][0]["name"] # update the bias if op["outputs"][0] and "output_info" in op["outputs"][0]["meta"]: if op["outputs"][0]["meta"]["output_info"][0] == "v": key = op["outputs"][0]["meta"]["output_info"] else: key = "v" + op["outputs"][0]["meta"]["output_info"] if self.scale_params is not None: e_s_name = get_effective_scalename_with_input_key(key, self.model) QAS = get_QAS(key, self.scale_params, self.data[e_s_name]) else: QAS = np.zeros(int(output_length)) + 0.000000001 # avoid zero division bias_update_params = { "input_idx": last_op.params["output_idx"], "output_idx": key, # tensor related "input_size": int(output_length), "input_buf_add": None, "input_buf_add_offset": None, "QAS": QAS, "input_dtype": "float32", "output_dtype": "float32", } bias_update_op = bias_update.bias_update(bias_update_params) self.layer.append(bias_update_op) # # update tensors _updateIdx(self.model, self.layer, input_idx, output_idx) # assume weights are updated once we obtain its gradient elif op_type == "transpose" and FUSHION_CONFIG[FUSE_SGD_UPDATE_STR]: fuseable, op_dict = _findTransposeMultiplyAbsMaxDivide(self.model, op) # old IR if op["outputs"][0]["meta"]["children"] == 0: # update tensors _updateIdx(self.model, self.layer, op["inputs"][0]["name"], op["outputs"][0]["name"]) elif fuseable: # fuse "transpose" -> [max -> divide -> divide (int8 bp)] _updateIdx(self.model, self.layer, op["inputs"][0]["name"], op_dict["cast"]["outputs"][0]["name"]) # add the output to output table name = op_dict["cast"]["outputs"][0]["meta"]["output_info"] idx = op_dict["cast"]["outputs"][0]["name"] length = np.prod(op_dict["cast"]["outputs"][0]["shape"]) dtype = op_dict["cast"]["outputs"][0]["dtype"] self.outputTables.append(outputInfo(name, idx, int(length), dtype)) _removeLayers(self.model, op_dict) else: raise NotImplementedError elif ( FUSHION_CONFIG[FUSE_WHERE_ZEROSSTR] and op_type == "where" and (op["inputs"][2]["dtype"] in ["int8", "int32", "float32"]) and _findWhereTensorFrom(self.layer, op["inputs"][2]["name"]) is not None and _findWhereTensorFrom(self.layer, op["inputs"][2]["name"]).params["op"] == "ZEROS" # third input is from zeros ): zeros_op = _findWhereTensorFrom(self.layer, op["inputs"][2]["name"]) # remove previous the zeros layer self.layer.remove(zeros_op) # parse the where but remove the third input and set "input3_is_zeros" in params last_op = self._convert_where(op) last_op.params["input3_is_zeros"] = True last_op.input_tensors.remove(last_op.input_tensors[2]) # check where we can update input2 inplace # if input2 is not used by following ops # (1) make input2_inplace # (2) update the following op's input idx (normally it is MUL) can_be_inplace = None # check if the last_op["input2_idx"] == some_op["ouptuts"][0] for from_op in self.model: if from_op["outputs"][0]["name"] == last_op.params["input2_idx"]: if from_op["outputs"][0]["meta"]["children"] != 1: can_be_inplace = False else: can_be_inplace = True assert can_be_inplace is not None if can_be_inplace and FUSHION_CONFIG[INPLACE_WHERE_STR]: # find the where the output of where goes and link it to the second input of where for following_op in self.model: for inp in following_op["inputs"]: if inp["name"] == op["outputs"][0]["name"]: inp["name"] = op["inputs"][1]["name"] # remove output tensor of where last_op.output_tensors.remove(last_op.output_tensors[0]) # set where to inplace last_op.params["inplace"] = True # add the op self.layer.append(last_op) elif op_type == "multiply" and FUSHION_CONFIG[INPLACE_MUL_STR]: last_op = self._convert_mul(op) last_op_input = last_op.params["input_idx"] last_op_output = last_op.params["output_idx"] if last_op.params["input2_size"] > 1 and last_op.params["input_size"] > last_op.params["input2_size"]: # good to be updated inplace last_op.params["inplace"] = True last_op.output_tensors.remove(last_op.output_tensors[0]) # redirect the following op's input as the inplace input for following_op in self.model: # if following_op["type"] in {"sum", "nn.conv2d_transpose", "nn.conv2d"}: for inp in following_op["inputs"]: if inp["name"] == last_op_output: inp["name"] = last_op_input # _updateIdx(self.model, self.layer, last_op_input, last_op_output) # replace the following self.layer.append(last_op) elif op_type in self.regularFunctionTable: last_op = self.regularFunctionTable[op_type](op) self.layer.append(last_op) elif op_type == "abs": if FUSHION_CONFIG[FUSE_SGD_UPDATE_STR]: cliping_pattern, op_dict = _findMultiplyAbsMaxDivide(self.model, abs_op=op) if cliping_pattern: # For transpose conv2d, this could be float32 -> int8 if it connects to abs previous_op = _findWhereTensorFrom(self.layer, op["inputs"][0]["name"]) if ( previous_op.params["op"] == "TRANSPOSE_CONV_2D" and previous_op.params["output_dtype"] == "float32" ): previous_op.params["float_to_int8"] = True previous_op.params["output_dtype"] = "int8" previous_op.output_tensors[0].dtype = "int8" previous_op.add_int32_buffer_tensor() _updateIdx( self.model, self.layer, op["inputs"][0]["name"], op_dict["cast"]["outputs"][0]["name"] ) _removeLayers(self.model, op_dict) else: raise NotImplementedError else: cliping_pattern, op_dict = _findMultiplyAbsMaxDivide(self.model, abs_op=op) if cliping_pattern: _updateIdx( self.model, self.layer, op["inputs"][0]["name"], op_dict["cast"]["outputs"][0]["name"] ) _removeLayers(self.model, op_dict) # Baseline for int8 fp without graph optimization # Adding outputTable for accurate trainable measuremnet if "output_info" in op_dict["cast"]["outputs"][0]["meta"]: name = op_dict["cast"]["outputs"][0]["meta"]["output_info"] idx = op_dict["cast"]["outputs"][0]["name"] length = np.prod(op_dict["cast"]["outputs"][0]["shape"]) dtype = "int8" self.outputTables.append(outputInfo(name, idx, int(length), dtype)) else: raise NotImplementedError else: warnings.warn("%s op is not `supported" % op_type) raise NotImplementedError # GROUP CONV if self.layer[-1].params["op"] == "GROUP_CONV": # for group conv the output is actually h, w, IxO, we need to permute it to OHWI if not FUSHION_CONFIG[FUSE_SGD_UPDATE_STR]: params = { # op related "op": "PERMUTE_GROUPCONV_OUT", "input_idx": last_op.params["output_idx"], # tensor related "input_dim": 3, "input_h": last_op.params["output_h"], "input_w": last_op.params["output_w"], "input_c": last_op.params["output_c"], "groups": last_op.params["groups"], "input_dtype": "float32", "output_dtype": "float32", } self.layer.append(permute_groupconv_out.permute_groupconv_out(params)) # we inplace update the weights, for output stantionary group conv # here we need to # (1) update the graph: remove gconv -> reshape -> sum -> transpose (done in "transpose" op) # -> [max -> divide -> divide (int8 bp)] # (2) remove the output tensor in gconv # (3) replace the output address with int8 weight in SRAM # TODO: we also need to back trace the int8 conv and make it use wegiht in both SRAM and Flash elif len(self.layer[-1].output_tensors) > 0: # find the target weigth weight_idx = _findTargetWeightforGconv(self.model, self.layer[-1].output_tensors[0].graph_idx) assert weight_idx is not None self.layer[-1].params["inplace_weight_name"] = weight_idx # back trace to the int8 conv conv_partial_layer = _findPartialConv(self.layer, weight_idx) conv_p = conv_partial_layer.params gconv_output_len = np.prod(self.layer[-1].output_tensors[0].size) conv_weight_size = conv_p["input_c"] * conv_p["output_c"] * conv_p["kernel_h"] * conv_p["kernel_w"] if conv_weight_size != gconv_output_len: # this is partial # find the first k channel fisrt_k_channel = int(conv_p["input_c"] * gconv_output_len / conv_weight_size) conv_partial_layer.params["first_k_channel"] = fisrt_k_channel self.partialChannelList[weight_idx] = fisrt_k_channel # if self.scale_params is not None: key = weight_idx e_s_name = get_effective_scalename_with_input_key(key, self.model) QAS = get_QAS(key, self.scale_params, self.data[e_s_name]) else: QAS = np.zeros(int(output_length)) + 0.000000001 # avoid zero division self.layer[-1].params["QAS"] = QAS # remove for inplace self.layer[-1].output_tensors.remove(self.layer[-1].output_tensors[0]) # add the gradient_output to table, we will use a custom layer to perform SGD if ( "meta" in op["outputs"][0] and op["outputs"][0]["meta"]["children"] == 0 and "output_info" in op["outputs"][0]["meta"] ): name = op["outputs"][0]["meta"]["output_info"] idx = op["outputs"][0]["name"] length = np.prod(op["outputs"][0]["shape"]) dtype = op["outputs"][0]["dtype"] self.outputTables.append(outputInfo(name, idx, int(length), dtype)) # loop over the graph and find transpose conv that use partial weights for layer in self.layer: if ( layer.params["op"] == op_name_translation["nn.conv2d_transpose"] and layer.params["weight_name"] in self.partialChannelList ): layer.params["first_k_channel"] = self.partialChannelList[layer.params["weight_name"]] def _convert_cast(self, op): # shape input_info = op["inputs"][0] input_dtype = get_dtype(op["inputs"][0]) input_shape = input_info["shape"] output_c = input_c = input_h = input_w = 1 if len(input_shape) == 4: output_c, input_c, input_h, input_w = input_shape # OIHW elif len(input_shape) == 2: input_h, input_w = input_shape input_c = 1 elif len(input_shape) == 1: input_h = input_w = 1 input_c = input_shape[0] else: raise NotImplementedError output_info = op["outputs"][0] output_dtype = get_dtype(op["outputs"][0]) # dtype input_dtype = get_dtype(input_info) output_dtype = get_dtype(output_info) params = { # operator "op": op_name_translation[op["type"]], # tensor "input_idx": input_info["name"], "output_idx": output_info["name"], "input_size": output_c * input_c * input_h * input_w, "input_dim": 4, "output_dim": 4, "input_dtype": input_dtype, "output_dtype": output_dtype, "input_meta": op["inputs"][0]["meta"], } op = cast.cast(params) return op def _convert_relu(self, op): # shape input_info = op["inputs"][0] input_dtype = get_dtype(op["inputs"][0]) input_shape = input_info["shape"] if len(input_shape) == 4: _, input_c, input_h, input_w = input_shape elif len(input_shape) == 2: input_h, input_w = input_shape input_c = 1 else: raise NotImplementedError output_info = op["outputs"][0] output_dtype = get_dtype(op["outputs"][0]) output_c = input_c output_h = input_h output_w = input_w # dtype input_dtype = get_dtype(input_info) output_dtype = get_dtype(output_info) params = { # operator "op": op_name_translation[op["type"]], # tensor "input_idx": input_info["name"], "output_idx": output_info["name"], "input_h": input_h, "input_w": input_w, "input_c": input_c, "input_dim": 3, "output_dim": 3, "output_h": output_h, "output_w": output_w, "output_c": output_c, "input_dtype": input_dtype, "output_dtype": output_dtype, } op = relu.relu(params) return op def _convert_bias_add(self, op): # shape input_info = op["inputs"][0] input_dtype = get_dtype(op["inputs"][0]) input_shape = input_info["shape"] if len(input_shape) == 2: input_h = 1 input_w = input_shape[0] input_c = input_shape[1] else: input_c, input_h, input_w = get_chw_shape(input_shape) output_info = op["outputs"][0] output_shape = output_info["shape"] output_c, output_h, output_w = get_chw_shape(output_shape) # dtype input_dtype = get_dtype(input_info) output_dtype = get_dtype(output_info) bias_name = op["inputs"][1]["name"] if bias_name not in self.data: bias_value = bias_name else: bias_value = self.data[bias_name] params = { # operator "op": op_name_translation[op["type"]], # tensor "input_idx": input_info["name"], "output_idx": output_info["name"], "input_h": input_h, "input_w": input_w, "input_c": input_c, "input_dim": 3, "output_dim": 3, "output_h": input_h, "output_w": input_w, "output_c": input_c, "input_dtype": input_dtype, "output_dtype": output_dtype, "bias": bias_value, "bias_name": bias_name, } op = bias_add.biasAdd(params) return op def _convert_reshape(self, op): # shape input_info = op["inputs"][0] input_dtype = get_dtype(op["inputs"][0]) input_shape = input_info["shape"] input_c, input_h, input_w = get_chw_shape(input_shape) output_info = op["outputs"][0] output_shape = output_info["shape"] output_c, output_h, output_w = get_chw_shape(output_shape) # dtype input_dtype = get_dtype(input_info) output_dtype = get_dtype(output_info) params = { # operator "op": op_name_translation[op["type"]], # tensor "input_idx": input_info["name"], "output_idx": output_info["name"], "input_h": input_h, "input_w": input_w, "input_c": input_c, "output_h": output_h, "output_w": output_w, "output_c": output_c, "input_dtype": input_dtype, "output_dtype": output_dtype, } op = reshape.reshape(params) return op def _convert_reshape_like(self, op): # shape input_info = op["inputs"][0] input_dtype = get_dtype(op["inputs"][0]) input_shape = input_info["shape"] input_c, input_h, input_w = get_chw_shape(input_shape) input2_info = op["inputs"][1] input2_dtype = get_dtype(op["inputs"][1]) input2_shape = input2_info["shape"] input2_c, input2_h, input2_w = get_chw_shape(input2_shape) output_info = op["outputs"][0] output_shape = output_info["shape"] output_c, output_h, output_w = get_chw_shape(output_shape) # dtype input_dtype = get_dtype(input_info) output_dtype = get_dtype(output_info) params = { # operator "op": op_name_translation[op["type"]], # tensor "input_idx": input_info["name"], "output_idx": output_info["name"], "input_h": input_h, "input_w": input_w, "input_c": input_c, "input2_h": input2_h, "input2_w": input2_w, "input2_c": input2_c, "output_h": output_h, "output_w": output_w, "output_c": output_c, "input_dtype": input_dtype, "input2_dtype": input2_dtype, "output_dtype": output_dtype, } op = reshape_like.reshape_like(params) return op def _convert_exp(self, op): # shape input_info = op["inputs"][0] input_dtype = get_dtype(op["inputs"][0]) input_shape = input_info["shape"] input_c, input_h, input_w = get_chw_shape(input_shape) output_info = op["outputs"][0] output_shape = output_info["shape"] output_c, output_h, output_w = get_chw_shape(output_shape) # dtype input_dtype = get_dtype(input_info) output_dtype = get_dtype(output_info) params = { # operator "op": op_name_translation[op["type"]], # tensor "input_idx": input_info["name"], "output_idx": output_info["name"], "input_size": input_h * input_w * input_c, "input_dtype": input_dtype, "output_dtype": output_dtype, } op = exp.exp(params) return op def _convert_transpose(self, op): # shape input_info = op["inputs"][0] input_dtype = get_dtype(op["inputs"][0]) input_shape = input_info["shape"] input_c, input_h, input_w = get_chw_shape(input_shape) output_info = op["outputs"][0] output_shape = output_info["shape"] output_c, output_h, output_w = get_chw_shape(output_shape) # dtype input_dtype = get_dtype(input_info) output_dtype = get_dtype(output_info) params = { # operator "op": op_name_translation[op["type"]], # tensor "input_idx": input_info["name"], "output_idx": output_info["name"], "input_h": input_h, "input_w": input_w, "input_c": input_c, "input_dim": 3, "output_dim": 3, "output_h": output_h, "output_w": output_w, "output_c": output_c, "input_dtype": input_dtype, "input_vartype": input_info["var_type"], "output_dtype": output_dtype, } if "axes" in op["attrs"] and op["attrs"]["axes"] is not None: if op["attrs"]["axes"] == [1, 0, 2, 3]: # torch: OIHW -> IOHW -> permute 1023 # tinyengine: OHWI -> IOHW -> permute 3012 params["d1"], params["d2"], params["d3"], params["d4"] = input_shape params["op"] = "permute_4D_3012" op = permute_4D_3012.permute_4D_3012(params) else: raise NotImplementedError else: op = transpose.transpose(params) return op def _convert_strided_slice(self, op): # shape input_info = op["inputs"][0] input_dtype = get_dtype(op["inputs"][0]) input_shape = input_info["shape"] d1, d2, d3, d4 = input_shape # OHWI output_info = op["outputs"][0] output_shape = output_info["shape"] o_d1, o_d2, o_d3, o_d4 = output_shape # OHWI # dtype input_dtype = get_dtype(input_info) output_dtype = get_dtype(output_info) b_list = op["attrs"]["begin"] e_list = op["attrs"]["end"] begin = b_list # [b_list[0], b_list[2], b_list[3], b_list[1]] end = e_list # [e_list[0], e_list[2], e_list[3], e_list[1]] strides = op["attrs"]["strides"] params = { # operator "op": op_name_translation[op["type"]], # tensor "input_idx": input_info["name"], "output_idx": output_info["name"], "d1": d1, "d2": d2, "d3": d3, "d4": d4, "begin": begin, "end": end, "strides": strides, "input_dim": 4, "output_dim": 4, "o_d1": o_d1, "o_d2": o_d2, "o_d3": o_d3, "o_d4": o_d4, "input_dtype": input_dtype, "output_dtype": output_dtype, } op = strided_slice.stridedSlice(params) return op def _convert_average_pool(self, op): # shape input_info = op["inputs"][0] input_dtype = get_dtype(op["inputs"][0]) input_shape = input_info["shape"] input_c, input_h, input_w = get_chw_shape(input_shape) output_info = op["outputs"][0] output_dtype = get_dtype(op["outputs"][0]) output_shape = output_info["shape"] output_c, output_h, output_w = get_chw_shape(output_shape) # dtype input_dtype = get_dtype(input_info) output_dtype = get_dtype(output_info) params = { # operator "op": "AVERAGE_POOL_2D", # pool parameters "filter_h": input_h, "filter_w": input_w, "stride_h": 1, "stride_w": 1, "pad_h": 0, "pad_w": 0, # tensor "input_idx": input_info["name"], "output_idx": output_info["name"], "input_h": input_h, "input_w": input_w, "input_c": input_c, "input_dim": 3, "output_dim": 3, "output_h": output_h, "output_w": output_w, "output_c": output_c, "input_dtype": input_dtype, "output_dtype": output_dtype, } op = avgpool2d.AvgPool2d(params) return op def _convert_zeros(self, op): # shape output_info = op["outputs"][0] output_dtype = get_dtype(op["outputs"][0]) output_shape = output_info["shape"] output_c, output_h, output_w = get_chw_shape(output_shape) # dtype output_dtype = get_dtype(output_info) params = { # operator "op": op_name_translation[op["type"]], # tensor "output_idx": output_info["name"], "input_dim": 3, "output_dim": 3, "output_h": output_h, "output_w": output_w, "output_c": output_c, "output_dtype": output_dtype, } op = zeros_like.zeros_like(params) return op def _convert_ones(self, op): # shape output_info = op["outputs"][0] output_dtype = get_dtype(op["outputs"][0]) output_shape = output_info["shape"] output_c, output_h, output_w = get_chw_shape(output_shape) # dtype output_dtype = get_dtype(output_info) params = { # operator "op": op_name_translation[op["type"]], # tensor "output_idx": output_info["name"], "input_dim": 3, "output_dim": 3, "output_h": output_h, "output_w": output_w, "output_c": output_c, "output_dtype": output_dtype, } op = ones_like.ones_like(params) return op def _convert_zeros_like(self, op): # shape input_info = op["inputs"][0] input_dtype = get_dtype(op["inputs"][0]) input_shape = input_info["shape"] input_c, input_h, input_w = get_chw_shape(input_shape) output_info = op["outputs"][0] output_dtype = get_dtype(op["outputs"][0]) output_c = input_c output_h = input_h output_w = input_w # dtype input_dtype = get_dtype(input_info) output_dtype = get_dtype(output_info) params = { # operator "op": op_name_translation[op["type"]], # tensor "input_idx": input_info["name"], "output_idx": output_info["name"], "input_h": input_h, "input_w": input_w, "input_c": input_c, "input_dim": 3, "output_dim": 3, "output_h": output_h, "output_w": output_w, "output_c": output_c, "input_dtype": input_dtype, "output_dtype": output_dtype, } op = zeros_like.zeros_like(params) return op def _convert_ones_like(self, op): # shape input_info = op["inputs"][0] input_dtype = get_dtype(op["inputs"][0]) input_shape = input_info["shape"] input_c, input_h, input_w = get_chw_shape(input_shape) output_info = op["outputs"][0] output_dtype = get_dtype(op["outputs"][0]) output_shape = output_info["shape"] output_c, output_h, output_w = get_chw_shape(output_shape) # dtype input_dtype = get_dtype(input_info) output_dtype = get_dtype(output_info) params = { # operator "op": op_name_translation[op["type"]], # tensor "input_idx": input_info["name"], "output_idx": output_info["name"], "input_h": input_h, "input_w": input_w, "input_c": input_c, "input_dim": 3, "output_dim": 3, "output_h": output_h, "output_w": output_w, "output_c": output_c, "input_dtype": input_dtype, "output_dtype": output_dtype, } op = ones_like.ones_like(params) return op def _convert_collapse_sum_like(self, op): # shape input_info = op["inputs"][0] input_dtype = get_dtype(op["inputs"][0]) input_shape = input_info["shape"] input_c, input_h, input_w = get_chw_shape(input_shape) output_info = op["outputs"][0] output_dtype = get_dtype(op["outputs"][0]) output_shape = output_info["shape"] output_c, output_h, output_w = get_chw_shape(output_shape) # dtype input_dtype = get_dtype(input_info) output_dtype = get_dtype(output_info) params = { # operator "op": op_name_translation[op["type"]], # tensor "input_idx": input_info["name"], "output_idx": output_info["name"], "input_h": input_h, "input_w": input_w, "input_c": input_c, "input_dim": 3, "output_dim": 3, "output_h": output_h, "output_w": output_w, "output_c": output_c, "input_dtype": input_dtype, "output_dtype": output_dtype, } op = collapse_sum_like.collapseSumLike(params) return op def _convert_log_softmax(self, op): # shape input_info = op["inputs"][0] input_dtype = get_dtype(op["inputs"][0]) input_shape = input_info["shape"] input_h, input_w, input_c = get_chw_shape(input_shape) output_info = op["outputs"][0] output_dtype = get_dtype(op["outputs"][0]) output_shape = output_info["shape"] output_h, output_w, output_c = get_chw_shape(output_shape) # dtype input_dtype = get_dtype(input_info) output_dtype = get_dtype(output_info) params = { # operator "op": op_name_translation[op["type"]], # tensor "input_idx": input_info["name"], "output_idx": output_info["name"], "input_h": input_h, "input_w": input_w, "input_c": input_c, "input_dim": 3, "output_dim": 3, "output_h": output_h, "output_w": output_w, "output_c": output_c, "input_dtype": input_dtype, "output_dtype": output_dtype, } op = log_softmax.logSoftMax(params) return op def _convert_cross_entropy_with_logits(self, op): # shape input_info = op["inputs"][0] input_dtype = get_dtype(op["inputs"][0]) input_shape = input_info["shape"] input_c, input_h, input_w = get_chw_shape(input_shape) input2_info = op["inputs"][1] output_info = op["outputs"][0] output_dtype = get_dtype(op["outputs"][0]) output_shape = output_info["shape"] output_c, output_h, output_w = get_chw_shape(output_shape) # dtype input2_dtype = get_dtype(input_info) output_dtype = get_dtype(output_info) params = { # operator "op": op_name_translation[op["type"]], # tensor "input_idx": input_info["name"], "input2_idx": input2_info["name"], "output_idx": output_info["name"], "input_h": input_h, "input_w": input_w, "input_c": input_c, "input_dim": 3, "output_dim": 3, "output_h": output_h, "output_w": output_w, "output_c": output_c, "input_dtype": input_dtype, "input2_dtype": input2_dtype, "output_dtype": output_dtype, } op = nll_loss.nllLoss(params) return op def _convert_qadd(self, op): def _getSigShift(s): sig, shi = math.frexp(s) sig = int(round(sig * 2**31)) if sig == 2**31: sig /= 2 shi += 1 if shi < -31: shi = 0 sig = 0 return sig, shi def _getADDMultiplierShift(input_scale, input2_scale, output_scale): left_shift = 20 twice_max_input_scale = 2 * np.double(max(input_scale, input2_scale)) real_input1_multiplier = np.double(input_scale / twice_max_input_scale) real_input2_multiplier = np.double(input2_scale / twice_max_input_scale) real_output_multiplier = np.double(twice_max_input_scale / ((1 << left_shift) * output_scale)) input_multiplier, input_shift = _getSigShift(real_input1_multiplier) input2_multiplier, input2_shift = _getSigShift(real_input2_multiplier) output_multiplier, output_shift = _getSigShift(real_output_multiplier) return ( left_shift, input_multiplier, input_shift, input2_multiplier, input2_shift, output_multiplier, output_shift, ) # shape input0_info = op["inputs"][0] input0_shape = input0_info["shape"] input0_c, input0_h, input0_w = get_chw_shape(input0_shape) input2_info = op["inputs"][1] output_info = op["outputs"][0] # dtype input_dtype = get_dtype(input0_info) input2_dtype = get_dtype(input2_info) output_dtype = get_dtype(output_info) assert len(op["inputs"]) == 8, "Unexpected number of inputs" input_zero_point = self.data[op["inputs"][2]["name"]][0] output_zero_point = self.data[op["inputs"][6]["name"]][0] input2_zero_point = self.data[op["inputs"][3]["name"]][0] input_scale = self.data[op["inputs"][4]["name"]][0] input2_scale = self.data[op["inputs"][5]["name"]][0] output_scale = self.data[op["inputs"][7]["name"]][0] ( left_shift, input_multiplier, input_shift, input2_multiplier, input2_shift, output_multiplier, output_shift, ) = _getADDMultiplierShift(input_scale, input2_scale, output_scale) params = { # operator "op": op_name_translation[op["type"]], # tensor "input_idx": input0_info["name"], "input2_idx": input2_info["name"], "output_idx": output_info["name"], "input_h": input0_h, "input_w": input0_w, "input_c": input0_c, "input2_h": input0_h, "input2_w": input0_w, "input2_c": input0_c, "input_dim": 3, "input2_dim": 3, "output_dim": 3, "output_h": input0_h, "output_w": input0_w, "output_c": input0_c, "input_dtype": input_dtype, "input2_dtype": input2_dtype, "output_dtype": output_dtype, # trainable parameters "input_zero_point": input_zero_point, "input2_zero_point": input2_zero_point, "output_zero_point": output_zero_point, "input_scale": input_scale, "input2_scale": input2_scale, "output_scale": output_scale, # quantized infernece "left_shift": left_shift, "input_multiplier": input_multiplier, "input2_multiplier": input2_multiplier, "input_shift": input_shift, "input2_shift": input2_shift, "output_multiplier": output_multiplier, "output_shift": output_shift, } op = add.Add(params) return op def _convert_add1d(self, op): # shape input0_info = op["inputs"][0] input0_shape = input0_info["shape"] input0_c, input0_h, input0_w = get_chw_shape(input0_shape) input2_info = op["inputs"][1] output_info = op["outputs"][0] # dtype input_dtype = get_dtype(input0_info) input2_dtype = get_dtype(input2_info) output_dtype = get_dtype(output_info) params = { # operator "op": op_name_translation[op["type"]], # tensor "input_idx": input0_info["name"], "input2_idx": input2_info["name"], "output_idx": output_info["name"], "input_size": input0_h * input0_w * input0_c, "input_dtype": input_dtype, "input2_dtype": input2_dtype, "output_dtype": output_dtype, } op = add1d.add1d(params) return op def _convert_sub(self, op): # shape input0_info = op["inputs"][0] input0_shape = input0_info["shape"] input0_c, input0_h, input0_w = get_chw_shape(input0_shape) input2_info = op["inputs"][1] output_info = op["outputs"][0] # dtype input_dtype = get_dtype(input0_info) input2_dtype = get_dtype(input2_info) output_dtype = get_dtype(output_info) params = { # operator "op": op_name_translation[op["type"]], # tensor "input_idx": input0_info["name"], "input2_idx": input2_info["name"], "output_idx": output_info["name"], "input_size": input0_h * input0_w * input0_c, "input_dtype": input_dtype, "input2_dtype": input2_dtype, "output_dtype": output_dtype, } op = sub.sub(params) return op def _convert_div(self, op): # shape input0_info = op["inputs"][0] input0_shape = input0_info["shape"] input0_c, input0_h, input0_w = get_chw_shape(input0_shape) input2_info = op["inputs"][1] output_info = op["outputs"][0] # dtype input_dtype = get_dtype(input0_info) input2_dtype = get_dtype(input2_info) output_dtype = get_dtype(output_info) if "scale" in input2_info["name"]: scale_from_add = self.data[input2_info["name"]][0] else: scale_from_add = None input2 = input2_info["meta"]["data"] if isconstanttstr(input2_info["name"]) else None params = { # operator "op": op_name_translation[op["type"]], # tensor "input_idx": input0_info["name"], "input2_idx": input2_info["name"], "output_idx": output_info["name"], "input_size": input0_h * input0_w * input0_c, "input_dtype": input_dtype, "input2": input2, "input2_dtype": input2_dtype, "output_dtype": output_dtype, # input of scale from some conv2d "scale_from_add": scale_from_add, } op = div.div(params) return op def _convert_mul(self, op): # shape input0_info = op["inputs"][0] input0_shape = input0_info["shape"] input0_c, input0_h, input0_w = get_chw_shape(input0_shape) input2_info = op["inputs"][1] input2_shape = input2_info["shape"] input2_c, input2_h, input2_w = get_chw_shape(input2_shape) output_info = op["outputs"][0] output_c, output_h, output_w = get_chw_shape(output_info["shape"]) # dtype input_dtype = get_dtype(input0_info) input2_dtype = get_dtype(input2_info) output_dtype = get_dtype(output_info) scale_conv_2d_op = None scale_from_add = None if "constant" in input2_info["name"]: constant = input2_info["meta"]["data"] else: constant = None if "scale" in input2_info["name"]: # should be a scaler then if "qadd" in input2_info["name"]: scale_from_add = self.data[input2_info["name"]][0] constant = self.data[input2_info["name"]][0] else: scale_conv_2d_op = _findConv2dwithScaleName(self.model, self.layer, input2_info["name"]) params = { # operator "op": op_name_translation[op["type"]], # tensor "input_idx": input0_info["name"], "output_size": output_c * output_h * output_w, "input2_idx": input2_info["name"], "output_idx": output_info["name"], "input_size": input0_h * input0_w * input0_c, "input2_size": input2_h * input2_w * input2_c, "input_dtype": input_dtype, "input2_dtype": input2_dtype, "output_dtype": output_dtype, # input of scale from some conv2d "scale_conv_2d_op": scale_conv_2d_op, "scale_from_add": scale_from_add, "constant": constant, } op = mul.mul(params) return op def _convert_less(self, op): # shape input0_info = op["inputs"][0] input0_shape = input0_info["shape"] input0_c, input0_h, input0_w = get_chw_shape(input0_shape) input2_info = op["inputs"][1] output_info = op["outputs"][0] output_dtype = get_dtype(op["outputs"][0]) output_shape = output_info["shape"] output_c, output_h, output_w = get_chw_shape(output_shape) # dtype input_dtype = get_dtype(input0_info) input2_dtype = get_dtype(input2_info) output_dtype = get_dtype(output_info) params = { # operator "op": op_name_translation[op["type"]], # tensor "input_idx": input0_info["name"], "input2_idx": input2_info["name"], "output_idx": output_info["name"], "input_size": input0_c * input0_h * input0_w, "input_dtype": input_dtype, "input2_dtype": input2_dtype, "output_dtype": output_dtype, } op = less.less(params) return op def _convert_greater(self, op): # shape input0_info = op["inputs"][0] input0_shape = input0_info["shape"] input0_c, input0_h, input0_w = get_chw_shape(input0_shape) input2_info = op["inputs"][1] output_info = op["outputs"][0] output_dtype = get_dtype(op["outputs"][0]) output_shape = output_info["shape"] output_c, output_h, output_w = get_chw_shape(output_shape) # dtype input_dtype = get_dtype(input0_info) input2_dtype = get_dtype(input2_info) output_dtype = get_dtype(output_info) params = { # operator "op": op_name_translation[op["type"]], # tensor "input_idx": input0_info["name"], "input2_idx": input2_info["name"], "output_idx": output_info["name"], "input_size": input0_c * input0_h * input0_w, "input_dtype": input_dtype, "input2_dtype": input2_dtype, "output_dtype": output_dtype, } op = greater.greater(params) return op def _convert_where(self, op): # shape input0_info = op["inputs"][0] input0_shape = input0_info["shape"] input0_c, input0_h, input0_w = get_chw_shape(input0_shape) input2_info = op["inputs"][1] input3_info = op["inputs"][2] output_info = op["outputs"][0] output_dtype = get_dtype(op["outputs"][0]) output_shape = output_info["shape"] output_c, output_h, output_w = get_chw_shape(output_shape) # dtype input_dtype = get_dtype(input0_info) input2_dtype = get_dtype(input2_info) input3_dtype = get_dtype(input3_info) output_dtype = get_dtype(output_info) params = { # operator "op": op_name_translation[op["type"]], # tensor "input_idx": input0_info["name"], "input2_idx": input2_info["name"], "input3_idx": input3_info["name"], "output_idx": output_info["name"], "input_size": input0_c * input0_h * input0_w, "output_size": output_c * output_h * output_w, "input_dtype": input_dtype, "input2_dtype": input2_dtype, "input3_dtype": input3_dtype, "output_dtype": output_dtype, } op = where.where(params) return op def _convert_negative(self, op): # shape input0_info = op["inputs"][0] input0_shape = input0_info["shape"] input_c, input_h, input_w = get_chw_shape(input0_shape) output_info = op["outputs"][0] # dtype input_dtype = get_dtype(input0_info) output_dtype = get_dtype(output_info) params = { # operator "op": op_name_translation[op["type"]], # tensor "input_idx": input0_info["name"], "output_idx": output_info["name"], "input_size": input_c * input_h * input_w, "input_dtype": input_dtype, "output_dtype": output_dtype, } op = negative.negative(params) return op def _convert_sum(self, op): # shape input_info = op["inputs"][0] input_shape = input_info["shape"] if len(input_shape) == 4: d1, d2, d3, d4 = input_shape else: d2, d3, d4 = get_chw_shape(input_shape) d1 = 1 output_info = op["outputs"][0] output_shape = output_info["shape"] if len(output_shape) == 4: od1, od2, od3, od4 = output_shape else: od2, od3, od4 = get_chw_shape(output_shape) od1 = 1 # dtype input_dtype = get_dtype(input_info) output_dtype = get_dtype(output_info) # TODO: update here after bugs in TTE IR is fixed (null) if op["attrs"]["axis"] is None: op["attrs"]["exclude"] = 1 axis = 0 elif op["attrs"]["axis"][0] < 0: if op["attrs"]["axis"][0] == -1: axis = 3 elif op["attrs"]["axis"][0] == -2: axis = 2 elif op["attrs"]["axis"][0] == -3: axis = 1 elif op["attrs"]["axis"][0] == -4: axis = 0 else: axis = op["attrs"]["axis"][0] params = { # operator "op": op_name_translation[op["type"]], # tensor "input_idx": input_info["name"], "output_idx": output_info["name"], "axis": axis, "d1": d1, "d2": d2, "d3": d3, "d4": d4, "input_dim": 4, "output_dim": 3, "od1": od1, "od2": od2, "od3": od3, "od4": od4, "input_dtype": input_dtype, "output_dtype": output_dtype, "exclude": bool(op["attrs"]["exclude"]), } op = sum.sum(params) return op def _convert_tile(self, op): # shape input_info = op["inputs"][0] input_shape = input_info["shape"] input_c, input_h, input_w = get_chw_shape(input_shape) output_info = op["outputs"][0] output_shape = output_info["shape"] output_c, output_h, output_w = get_chw_shape(output_shape) # dtype input_dtype = get_dtype(input_info) output_dtype = get_dtype(output_info) reps_size = len(op["attrs"]["reps"]) reps = op["attrs"]["reps"] params = { # operator "op": op_name_translation[op["type"]], # tensor "input_idx": input_info["name"], "output_idx": output_info["name"], "reps_size": reps_size, "reps": reps, "input_h": input_h, "input_w": input_w, "input_c": input_c, "input_dim": 3, "output_dim": 3, "output_h": output_h, "output_w": output_w, "output_c": output_c, "input_dtype": input_dtype, "output_dtype": output_dtype, } op = tile.tile(params) return op def _convert_convolution(self, op): weight_info = op["inputs"][1] if op["attrs"]["kernel_size"] is None: kh = op["inputs"][1]["shape"][-1] kw = op["inputs"][1]["shape"][-2] else: kh, kw = op["attrs"]["kernel_size"] padding = op["attrs"]["padding"][0] groups = op["attrs"]["groups"] sw, sh = op["attrs"]["strides"] # shape input_info = op["inputs"][0] input_shape = input_info["shape"] _, input_c, input_h, input_w = input_shape output_info = op["outputs"][0] output_shape = output_info["shape"] _, output_c, output_h, output_w = output_shape # dtype input_dtype = get_dtype(input_info) output_dtype = get_dtype(output_info) weight_name = weight_info["name"] if weight_name not in self.data: weight_value = weight_name else: weight_value = self.data[weight_name].transpose(0, 2, 3, 1) # OIHW -> OHWI if ( groups == input_c == output_c ): # For depthwise conv we assume the weights to be in HWC in code generation, # we revert the tensor to match the assumption weight_value = weight_value.transpose(3, 1, 2, 0) # OHWI -> IHWO has_bias = False for inp in op["inputs"]: if "weight" in inp["name"]: _accessTrainable(self.trainedWeights, inp["name"]) if "bias" in inp["name"]: bias_name = inp["name"] bias = self.data[bias_name] has_bias = True _accessTrainable(self.trainedBias, inp["name"]) # for floating point implementation, we allow no bias data since the data might be in bias_add ops if op["type"] not in {"nn.mcuconv2d"} and not has_bias: bias = np.zeros(output_shape[1]) bias_name = None has_bias = True assert has_bias, "no bias data found!" # normal conv2d, depthwise, group conv if groups == 1: op_name_idx = 0 elif groups == input_c == output_c and input_dtype == "int8" and output_dtype == "int32": op_name_idx = 1 else: op_name_idx = 2 if input_dtype == "int8" and output_dtype == "int32" and op_name_idx != 2 and op["type"] != "nn.conv2d": input_zero_point = self.data[_findKeyinTensors(op["inputs"], "zero_x")["name"]][0] output_zero_point = self.data[_findKeyinTensors(op["inputs"], "zero_y")["name"]][0] # TODO: if the IR saperate these input_scale = np.ones(output_shape[1]) output_scale = np.ones(output_shape[1]) weight_scale = np.ones(output_shape[1]) # effective scales effective_scale = self.data[_findKeyinTensors(op["inputs"], "scale")["name"]] else: input_zero_point = 0 output_zero_point = 0 input_scale = np.ones(output_shape[1]) output_scale = np.ones(output_shape[1]) weight_scale = np.ones(output_shape[1]) effective_scale = np.ones(output_shape[1]) multiplier, shift = _getMultiplierShift(effective_scale) params = { "op": op_name_translation[op["type"]][op_name_idx], # SE blocks "is_SEBlock": "se" in op and op["se"], "kernel_h": kh, "kernel_w": kw, "padding": padding, "padding_h": op["attrs"]["padding"][0], "padding_w": op["attrs"]["padding"][1], "dilation_h": op["attrs"]["dilation"][0], "dilation_w": op["attrs"]["dilation"][1], "groups": groups, "stride_h": sh, "stride_w": sw, "input_idx": input_info["name"], "output_idx": output_info["name"], "input_dim": 3, "input_c": input_c, "input_h": input_h, "input_w": input_w, "output_dim": 3, "output_c": output_c, "output_h": output_h, "output_w": output_w, "input_dtype": input_dtype, "output_dtype": output_dtype, "weight_value": weight_value, "weight_name": weight_name, "bias": bias.astype(int), "bias_name": bias_name, "effective_scale": effective_scale, "input_zero_point": input_zero_point, "output_zero_point": output_zero_point, "multiplier": multiplier.astype(int), "shift": shift.astype(int), "input_scale": input_scale, "output_scale": output_scale, "weight_scale": weight_scale, } # assert weight_value != "out_nn.conv2d_transpose@330" if groups == 1: op = conv2d.Conv2d(params) elif groups == input_c == output_c and input_dtype == "int8" and output_dtype == "int32": op = depthwiseConv2d.DepthwiseConv2d(params) else: if "inplace_int8_input" in op and op["inplace_int8_input"]: params["inplace_int8_input"] = True params["float32_input2"] = True if op["inputs"][1]["dtype"] == "float32" else False op = group_conv2d.groupConv2d(params) if isinstance(weight_value, str): w_c, w_w, w_h = ( weight_info["shape"][1] * weight_info["shape"][2], weight_info["shape"][2], weight_info["shape"][3], ) op._add_input(weight_name, input_dtype, w_h, w_w, w_c) # for int8 group conv if op_name_idx == 2 and "float32_input2" in params and not params["float32_input2"]: # we will use int8 for input and output, dynamic quantization will be inside ops for input_tensor in op.input_tensors: input_tensor.dtype = "int8" op.output_tensors[0].dtype = "int8" op.params["intput_dtype"] = "int8" op.params["intput2_dtype"] = "int8" op.params["output_dtype"] = "int8" # we need int32 intermediate buffer for normalization op.add_int32_buffer_tensor() return op def _convert_transpose_conv2d(self, op): assert op["attrs"]["kernel_size"] is not None kh, kw = op["attrs"]["kernel_size"] padding = op["attrs"]["padding"][0] groups = op["attrs"]["groups"] sw, sh = op["attrs"]["strides"] # shape input_info = op["inputs"][0] input2_info = op["inputs"][1] input_shape = input_info["shape"] _, input_c, input_h, input_w = input_shape output_info = op["outputs"][0] output_shape = output_info["shape"] _, output_c, output_h, output_w = output_shape # dtype input_dtype = get_dtype(input_info) input2_dtype = get_dtype(input2_info) output_dtype = get_dtype(output_info) weight_info = op["inputs"][-1] weight_name = weight_info["name"] for inp in op["inputs"]: if "weight" in inp["name"]: _accessTrainable(self.trainedWeights, inp["name"]) if "bias" in inp["name"]: _accessTrainable(self.trainedBias, inp["name"]) if weight_name not in self.data: weight_value = weight_name else: weight_value = self.data[weight_name] # currently ignore the bias bias = np.zeros(input_shape[1]) params = { "op": op_name_translation[op["type"]], # SE blocks "is_SEBlock": "se" in op and op["se"], "kernel_h": kh, "kernel_w": kw, "padding": padding, "padding_h": op["attrs"]["padding"][0], "padding_w": op["attrs"]["padding"][1], "kernel_layout": op["attrs"]["kernel_layout"], "group": groups, "stride_h": sh, "stride_w": sw, "input_idx": input_info["name"], "output_idx": output_info["name"], "input_dim": 3, "input_c": input_shape[1], "input_h": input_shape[2], "input_w": input_shape[3], "output_dim": 3, "output_c": output_shape[1], "output_h": output_shape[2], "output_w": output_shape[3], "input_dtype": input_dtype, "input2_dtype": input2_dtype, "output_dtype": output_dtype, "weight_value": weight_value, "weight_name": weight_name, "bias": bias.astype(int), } op_instance = transpose_conv2d.transposeConv2d(params) if isinstance(weight_value, str): w_c, w_w, w_h = ( weight_info["shape"][0] * weight_info["shape"][1], weight_info["shape"][2], weight_info["shape"][3], ) op_instance._add_input(weight_name, input_dtype, w_h, w_w, w_c) # for full int8 tranpose conv # 1. Cast dtype as we do requantize inside ops # 2. we need int32 intermediate buffer for normalization and # 3. update the output type as int8 as we fuze nomalization inside a single op if params["input2_dtype"] == params["input_dtype"] == "int8" and params["output_dtype"] == "int32": # we will use int8 for input and output, dynamic quantization will be inside ops for input_tensor in op_instance.input_tensors: input_tensor.dtype = "int8" op_instance.output_tensors[0].dtype = "int8" op_instance.params["intput_dtype"] = "int8" op_instance.params["intput2_dtype"] = "int8" op_instance.params["output_dtype"] = "int8" op_instance.add_int32_buffer_tensor() return op_instance def _convert_matmul(self, op): # shape input0_info = op["inputs"][0] input0_shape = input0_info["shape"] input0_c, input0_h, input0_w = get_chw_shape(input0_shape) input2_info = op["inputs"][1] input2_shape = input2_info["shape"] output_info = op["outputs"][0] input2_c, input2_h, input2_w = get_chw_shape(input2_shape) # dtype input_dtype = get_dtype(input0_info) input2_dtype = get_dtype(input2_info) output_dtype = get_dtype(output_info) weight_info = op["inputs"][-1] weight_name = weight_info["name"] for inp in op["inputs"]: if "weight" in inp["name"]: _accessTrainable(self.trainedWeights, inp["name"]) if "bias" in inp["name"]: _accessTrainable(self.trainedBias, inp["name"]) if weight_name not in self.data: weight_value = weight_name else: weight_value = self.data[weight_name] assert input0_w == input2_h params = { # operator "op": op_name_translation[op["type"]], # tensor "input_idx": input0_info["name"], "input2_idx": input2_info["name"], "output_idx": output_info["name"], "matA_row": input0_h, "matA_col": input0_w, "matB_row": input2_h, "matB_col": input2_w, "input_dtype": input_dtype, "input2_dtype": input2_dtype, "output_dtype": output_dtype, "weight_value": weight_value, "weight_name": weight_name, } op = mat_mul.mat_mul(params) if isinstance(weight_value, str): w_c, w_w, w_h = get_chw_shape(weight_info["shape"]) op._add_input(weight_name, input_dtype, w_h, w_w, w_c) return op def _convert_dense(self, op): # shape input0_info = op["inputs"][0] input0_shape = input0_info["shape"] input0_c, input0_h, input0_w = get_chw_shape(input0_shape) input2_info = op["inputs"][1] input2_shape = input2_info["shape"] output_info = op["outputs"][0] input2_c, input2_h, input2_w = get_chw_shape(input2_shape) # dtype input_dtype = get_dtype(input0_info) input2_dtype = get_dtype(input2_info) output_dtype = get_dtype(output_info) weight_info = op["inputs"][-1] weight_name = weight_info["name"] for inp in op["inputs"]: if "weight" in inp["name"]: _accessTrainable(self.trainedWeights, inp["name"]) if "bias" in inp["name"]: _accessTrainable(self.trainedBias, inp["name"]) if weight_name not in self.data: weight_value = weight_name else: weight_value = self.data[weight_name] assert input0_w == input2_w params = { # operator "op": op_name_translation[op["type"]], # tensor "input_idx": input0_info["name"], "input2_idx": input2_info["name"], "output_idx": output_info["name"], "matA_row": input0_h, "matA_col": input0_w, "matB_row": input2_h, "matB_col": input2_w, "input_dtype": input_dtype, "input2_dtype": input2_dtype, "output_dtype": output_dtype, "weight_value": weight_value, "weight_name": weight_name, } op = dense.dense(params) if isinstance(weight_value, str): w_c, w_w, w_h = get_chw_shape(weight_info["shape"]) op._add_input(weight_name, input_dtype, w_h, w_w, w_c) return op def get_dtype(tensor_info): if "dtype" in tensor_info: return tensor_info["dtype"] else: return "int8" def get_chw_shape(shape): input_c = input_h = input_w = 0 batch = 1 if len(shape) == 5: _, batch, input_c, input_h, input_w = shape elif len(shape) == 4: batch, input_c, input_h, input_w = shape elif len(shape) == 2: input_h, input_w = shape input_c = 1 elif len(shape) == 1: input_h = input_w = 1 input_c = shape[0] else: raise NotImplementedError if batch != 1: # raise ValueError("batch size should be 1") warnings.warn("batch size should be 1") return input_c, input_h, input_w def _getMultiplierShift(effective_scale): significand = np.zeros(len(effective_scale), dtype="int32") shift = np.zeros(len(effective_scale), dtype="int32") for i, s in enumerate(effective_scale): if s == 0: significand[i] = 0 shift[i] = 0 else: sig, shi = math.frexp(s) sig = int(round(sig * 2**31)) if sig == 2**31: sig /= 2 shi += 1 if shi < -31: shi = 0 sig = 0 significand[i] = sig shift[i] = shi return significand, shift