mirror of
https://github.com/mit-han-lab/tinyengine.git
synced 2025-05-09 17:11:01 +08:00
723 lines
27 KiB
Python
723 lines
27 KiB
Python
from .constant import op_name_translation
|
|
from .GraphReorder import find_following_link_op, find_previous_link_op
|
|
|
|
__all__ = [
|
|
"_accessTrainable",
|
|
"_findKeyinTensors",
|
|
"_removeLayers",
|
|
"_findTargetWeightforGconv",
|
|
"_findBinMaskPattern",
|
|
"_findBinMaskPatternint8",
|
|
"_castisFusable",
|
|
"_castisFusable_for_gconv",
|
|
"_castSliceisFusable",
|
|
"_fileTileRepAsWeights",
|
|
"_updateOutputDtype",
|
|
"_findConv2dwithScaleName",
|
|
"_updateIdxParameter",
|
|
"_updateIdx",
|
|
"_findPartialConv",
|
|
"_findWhereTensorFrom",
|
|
"_findWhereTheTensorIdxFrom",
|
|
"_findTransposeMaxDivideDivide",
|
|
"_findNextOpTakeInputName",
|
|
"_findAbsMaxDivideDivide",
|
|
"_findTransposeMultiplyAbsMaxDivide",
|
|
"_findMultiplyAbsMaxDivide",
|
|
]
|
|
|
|
|
|
def _findWhereTensorFrom(layers, tensor_idx):
|
|
for op in layers:
|
|
if op.params["output_idx"] == tensor_idx:
|
|
return op
|
|
return None
|
|
|
|
|
|
def _findPartialConv(layers, weight_name):
|
|
for op in layers:
|
|
if "weight_name" in op.params and op.params["input_dtype"] == "int8":
|
|
if op.params["weight_name"] == weight_name:
|
|
return op
|
|
raise AssertionError
|
|
|
|
|
|
def _findKeyinTensors(inputs, key):
|
|
for _, inp in enumerate(inputs):
|
|
if key in inp["name"]:
|
|
return inp
|
|
raise KeyError
|
|
|
|
|
|
def _accessTrainable(table, name):
|
|
in_table = False
|
|
for t in table:
|
|
if t.name is name:
|
|
in_table = True
|
|
t.access_cnt += 1
|
|
break
|
|
|
|
if not in_table:
|
|
table.append(trainableTensor(name, 0))
|
|
|
|
|
|
def _removeLayers(layers, target_dict):
|
|
for k in target_dict:
|
|
layers.remove(target_dict[k])
|
|
return layers
|
|
|
|
|
|
# find gconv -> reshape -> sum -> transpose (weight_idx)
|
|
def _findTargetWeightforGconv(layers, output_idx):
|
|
children = 0
|
|
reshape_op = 0
|
|
for op in layers:
|
|
for input_tensor in op["inputs"]:
|
|
if input_tensor["name"] == output_idx:
|
|
children += 1
|
|
reshape_op = op
|
|
# see if we have for reshape
|
|
if not (children == 1 and reshape_op["type"] == "reshape"):
|
|
return None
|
|
# reshape -> sum
|
|
sum_op = None
|
|
children = 0
|
|
for op in layers:
|
|
for input_tensor in op["inputs"]:
|
|
if input_tensor["name"] == reshape_op["outputs"][0]["name"]:
|
|
children += 1
|
|
sum_op = op
|
|
if not (children == 1 and sum_op["type"] == "sum"):
|
|
return None
|
|
# sum -> transpose
|
|
transpose_op = None
|
|
children = 0
|
|
for op in layers:
|
|
for input_tensor in op["inputs"]:
|
|
if input_tensor["name"] == sum_op["outputs"][0]["name"]:
|
|
children += 1
|
|
transpose_op = op
|
|
if not (children == 1 and transpose_op["type"] == "transpose"):
|
|
return None
|
|
if transpose_op["outputs"][0]["meta"]["children"] == 0:
|
|
if transpose_op["outputs"][0]["meta"]["output_info"][0] == "v":
|
|
return transpose_op["outputs"][0]["meta"]["output_info"]
|
|
return "v" + transpose_op["outputs"][0]["meta"]["output_info"]
|
|
else:
|
|
# for int8 bp we may need abs -> max -> divide (127) -> divide (output activation) -> cast to cliping
|
|
last_op = transpose_op
|
|
op_start = _findNextOpTakeInputName(layers, last_op["outputs"][0]["name"])
|
|
# if abs["type"] == "abs":
|
|
# op_start = _findNextOpTakeInputName(layers, abs["outputs"][0]["name"])
|
|
# if op0["type"] == "max":
|
|
# op1 = _findNextOpTakeInputName(layers, op0["outputs"][0]["name"])
|
|
# if op1["type"] == "divide":
|
|
# op2 = _findNextOpTakeInputName(layers, op1["outputs"][0]["name"])
|
|
# if op2["type"] == "divide":
|
|
# cast = _findNextOpTakeInputName(layers, op2["outputs"][0]["name"])
|
|
# if cast["type"] == "cast":
|
|
# if cast["outputs"][0]["meta"]["output_info"][0] == "v":
|
|
# return cast["outputs"][0]["meta"]["output_info"]
|
|
# return "v" + cast["outputs"][0]["meta"]["output_info"]
|
|
# else:
|
|
# last_op = cast
|
|
# for int8 bp we may have the following pattern
|
|
# -> (cast1) -> multiply
|
|
# from op -> -> divide -> cast
|
|
# -> abs -> max -> (cast2)
|
|
if op_start["type"] == "multiply":
|
|
op0 = _findNextOpTakeInputName(layers, op_start["outputs"][0]["name"])
|
|
if op0["type"] == "divide":
|
|
cast = _findNextOpTakeInputName(layers, op0["outputs"][0]["name"])
|
|
if cast["type"] == "cast":
|
|
if cast["outputs"][0]["meta"]["output_info"][0] == "v":
|
|
return cast["outputs"][0]["meta"]["output_info"]
|
|
return "v" + cast["outputs"][0]["meta"]["output_info"]
|
|
else:
|
|
last_op = cast
|
|
elif op_start["type"] == "cast":
|
|
op_0 = _findNextOpTakeInputName(layers, op_start["outputs"][0]["name"])
|
|
if op_0["type"] == "multiply":
|
|
op1 = _findNextOpTakeInputName(layers, op_0["outputs"][0]["name"])
|
|
if op1["type"] == "divide":
|
|
cast = _findNextOpTakeInputName(layers, op1["outputs"][0]["name"])
|
|
if cast["type"] == "cast":
|
|
if cast["outputs"][0]["meta"]["output_info"][0] == "v":
|
|
return cast["outputs"][0]["meta"]["output_info"]
|
|
return "v" + cast["outputs"][0]["meta"]["output_info"]
|
|
else:
|
|
last_op = cast
|
|
elif op_start["type"] == "abs":
|
|
op0 = _findNextOpTakeInputName(layers, op_start["outputs"][0]["name"])
|
|
if op0["type"] == "max":
|
|
op1 = _findNextOpTakeInputName(layers, op0["outputs"][0]["name"])
|
|
if op1["type"] == "divide":
|
|
cast = _findNextOpTakeInputName(layers, op1["outputs"][0]["name"])
|
|
if cast["type"] == "cast":
|
|
if cast["outputs"][0]["meta"]["output_info"][0] == "v":
|
|
return cast["outputs"][0]["meta"]["output_info"]
|
|
return "v" + cast["outputs"][0]["meta"]["output_info"]
|
|
else:
|
|
last_op = cast
|
|
elif op_start["type"] == "abs":
|
|
op0 = _findNextOpTakeInputName(layers, op_start["outputs"][0]["name"])
|
|
if op0["type"] == "max":
|
|
op1 = _findNextOpTakeInputName(layers, op0["outputs"][0]["name"])
|
|
if op1["type"] == "cast":
|
|
op2 = _findNextOpTakeInputName(layers, op1["outputs"][0]["name"])
|
|
if op2["type"] == "divide":
|
|
cast = _findNextOpTakeInputName(layers, op2["outputs"][0]["name"])
|
|
if cast["type"] == "cast":
|
|
if cast["outputs"][0]["meta"]["output_info"][0] == "v":
|
|
return cast["outputs"][0]["meta"]["output_info"]
|
|
return "v" + cast["outputs"][0]["meta"]["output_info"]
|
|
else:
|
|
last_op = cast
|
|
|
|
# we may have slice op
|
|
slice_op = None
|
|
for op in layers:
|
|
for input_tensor in op["inputs"]:
|
|
if input_tensor["name"] == last_op["outputs"][0]["name"]:
|
|
children += 1
|
|
slice_op = op
|
|
assert slice_op["outputs"][0]["meta"]["children"] == 0
|
|
if slice_op["outputs"][0]["meta"]["output_info"][0] == "v":
|
|
return slice_op["outputs"][0]["meta"]["output_info"]
|
|
return "v" + slice_op["outputs"][0]["meta"]["output_info"]
|
|
|
|
|
|
def _findNextOpTakeInputName(layers, input_name):
|
|
for op in layers:
|
|
for input_tensor in op["inputs"]:
|
|
if input_tensor["name"] == input_name:
|
|
return op
|
|
return None
|
|
|
|
|
|
def _findNextOpListTakeInputName(layers, input_name):
|
|
ops = []
|
|
for op in layers:
|
|
for input_tensor in op["inputs"]:
|
|
if input_tensor["name"] == input_name:
|
|
ops.append(op)
|
|
return ops
|
|
|
|
|
|
# check if we need to have binary mask for this conv2d
|
|
# conv2d (int32) -> cast -> greater/less -> multiply -> binary mask
|
|
def _findBinMaskPattern(layers, output_idx):
|
|
children = 0
|
|
f_ops = []
|
|
for op in layers:
|
|
for input_tensor in op["inputs"]:
|
|
if input_tensor["name"] == output_idx:
|
|
children += 1
|
|
f_ops.append(op)
|
|
# see if we have cast op
|
|
cast_op = greater_op = less_op = multiply_op = where_op = None
|
|
if children >= 2:
|
|
for op in f_ops:
|
|
if op["type"] == "cast":
|
|
cast_op = op
|
|
break
|
|
if None in [cast_op]:
|
|
return False, {}
|
|
# find greater/less op
|
|
for op in layers:
|
|
for input_tensor in op["inputs"]:
|
|
if input_tensor["name"] == cast_op["outputs"][0]["name"]:
|
|
if op["type"] in {"greater", "greater_equal"}:
|
|
greater_op = op
|
|
elif op["type"] in {"less", "less_equal"}:
|
|
less_op = op
|
|
if None in [greater_op, less_op]:
|
|
return False, {}
|
|
# find multiply
|
|
for op in layers:
|
|
if op["type"] == "multiply":
|
|
if op["inputs"][0]["name"] in [greater_op["outputs"][0]["name"], less_op["outputs"][0]["name"]] and op[
|
|
"inputs"
|
|
][1]["name"] in [
|
|
greater_op["outputs"][0]["name"],
|
|
less_op["outputs"][0]["name"],
|
|
]:
|
|
multiply_op = op
|
|
break
|
|
if None in [multiply_op]:
|
|
return False, {}
|
|
# find where
|
|
for op in layers:
|
|
if op["type"] == "where":
|
|
if op["inputs"][0]["name"] in multiply_op["outputs"][0]["name"]:
|
|
where_op = op
|
|
break
|
|
if where_op is None:
|
|
return False, {}
|
|
op_dict = {
|
|
"cast": cast_op,
|
|
"greater": greater_op,
|
|
"less": less_op,
|
|
"multiply": multiply_op,
|
|
# "where": where_op,
|
|
# "zeros": zeros_op,
|
|
# "ones": ones_op,
|
|
}
|
|
return True, op_dict
|
|
|
|
|
|
# this is for int8
|
|
# check if we need to have binary mask for this conv2d
|
|
# conv2d (int32) -> greater/less -> multiply -> binary mask
|
|
def _findBinMaskPatternint8(layers, output_idx):
|
|
children = 0
|
|
f_ops = []
|
|
for op in layers:
|
|
for input_tensor in op["inputs"]:
|
|
if input_tensor["name"] == output_idx:
|
|
children += 1
|
|
f_ops.append(op)
|
|
# see if we have cast op
|
|
greater_op = less_op = multiply_op = where_op = None
|
|
if children >= 2:
|
|
for op in f_ops:
|
|
if op["type"] in {"greater", "greater_equal"}:
|
|
greater_op = op
|
|
elif op["type"] in {"less", "less_equal"}:
|
|
less_op = op
|
|
if None in [greater_op, less_op]:
|
|
return False, {}
|
|
# find multiply
|
|
for op in layers:
|
|
if op["type"] == "multiply":
|
|
if op["inputs"][0]["name"] in [greater_op["outputs"][0]["name"], less_op["outputs"][0]["name"]] and op[
|
|
"inputs"
|
|
][1]["name"] in [
|
|
greater_op["outputs"][0]["name"],
|
|
less_op["outputs"][0]["name"],
|
|
]:
|
|
multiply_op = op
|
|
break
|
|
if None in [multiply_op]:
|
|
return False, {}
|
|
# find where
|
|
for op in layers:
|
|
if op["type"] == "where":
|
|
if op["inputs"][0]["name"] in multiply_op["outputs"][0]["name"]:
|
|
where_op = op
|
|
break
|
|
if where_op is None:
|
|
return False, {}
|
|
# packing return ops
|
|
op_dict = {
|
|
"greater": greater_op,
|
|
"less": less_op,
|
|
"multiply": multiply_op,
|
|
}
|
|
return True, op_dict
|
|
|
|
|
|
# check the graph and see if we can fuse the cast into transpose conv2d
|
|
def _castisFusable(layers, cast_op):
|
|
# find the output
|
|
fp_output_idx = cast_op["outputs"][0]["name"]
|
|
children = 0
|
|
following_op = None
|
|
for op in layers:
|
|
for input_tensor in op["inputs"]:
|
|
if input_tensor["name"] == fp_output_idx:
|
|
children += 1
|
|
following_op = op
|
|
# checking the graph matches the patten
|
|
# int8 -> fp32
|
|
# only used as the weight of some transpose conv
|
|
if (
|
|
cast_op["inputs"][0]["dtype"] == "int8"
|
|
and cast_op["outputs"][0]["dtype"] == "float32"
|
|
and children == 1
|
|
and following_op["type"] == "nn.conv2d_transpose"
|
|
and following_op["inputs"][1]["name"] == fp_output_idx
|
|
):
|
|
return True, following_op
|
|
return False, following_op
|
|
|
|
|
|
# check the graph and see if we can fuse the cast into group conv2d
|
|
# cast -> reshape -> group_conv
|
|
def _castisFusable_for_gconv(layers, cast_op):
|
|
# find the output
|
|
idx, f_op_1 = find_following_link_op(layers, cast_op)
|
|
if f_op_1["type"] != "reshape":
|
|
return False, None
|
|
idx, f_op_2 = find_following_link_op(layers, f_op_1)
|
|
if (
|
|
f_op_2["type"] == "nn.conv2d"
|
|
and cast_op["inputs"][0]["dtype"] == "int8"
|
|
and cast_op["outputs"][0]["dtype"] == "float32"
|
|
and f_op_2["inputs"][0]["name"] == f_op_1["outputs"][0]["name"]
|
|
):
|
|
# remove reshape
|
|
# layers.remove(f_op_1)
|
|
return True, f_op_2
|
|
return False, f_op_2
|
|
|
|
|
|
# check the graph and see if we can fuse the cast into sclice conv2d
|
|
# Note: It seems we don't need this
|
|
def _castSliceisFusable(layers, cast_op):
|
|
# find the output
|
|
fp_output_idx = cast_op["outputs"][0]["name"]
|
|
children = 0
|
|
slice_op = reshape_op = g_conv_op = None
|
|
for op in layers:
|
|
for input_tensor in op["inputs"]:
|
|
if input_tensor["name"] == fp_output_idx:
|
|
children += 1
|
|
slice_op = op
|
|
# checking the graph matches the patten
|
|
# cast -> slice -> reshape -> transponse_conv
|
|
# find slice
|
|
if not (children == 1 and slice_op["type"] == "strided_slice"):
|
|
return False, slice_op, reshape_op, g_conv_op
|
|
# find reshape conv
|
|
children = 0
|
|
for op in layers:
|
|
for input_tensor in op["inputs"]:
|
|
if input_tensor["name"] == slice_op["outputs"][0]["name"] and op["type"] == "reshape":
|
|
children += 1
|
|
reshape_op = op
|
|
if not (children == 1 and reshape_op["type"] == "reshape"):
|
|
return False, slice_op, reshape_op, g_conv_op
|
|
# find g conv
|
|
children = 0
|
|
for op in layers:
|
|
if op["type"] == "nn.conv2d" and op["inputs"][0]["name"] == reshape_op["outputs"][0]["name"]:
|
|
children += 1
|
|
g_conv_op = op
|
|
if not (
|
|
children == 1
|
|
and g_conv_op["attrs"]["groups"] > 1
|
|
and cast_op["inputs"][0]["dtype"] == "int8"
|
|
and cast_op["outputs"][0]["dtype"] == "float32"
|
|
):
|
|
return False, slice_op, reshape_op, g_conv_op
|
|
# only used as the weight of some transpose conv
|
|
return True, slice_op, reshape_op, g_conv_op
|
|
|
|
|
|
# check if we need to fuse ops for tile
|
|
# ########## tile -> reshape -> conv2d (which takes it as weights)
|
|
# fusion | ------------------------|
|
|
def _fileTileRepAsWeights(layers, tile_op):
|
|
reshape_op = conv2d_op = None
|
|
children = 0
|
|
for op in layers:
|
|
for input_tensor in op["inputs"]:
|
|
if input_tensor["name"] == tile_op["outputs"][0]["name"]:
|
|
children += 1
|
|
assert children == 1, "output of tile is used by other layers"
|
|
# find reshape op
|
|
for op in layers:
|
|
for input_tensor in op["inputs"]:
|
|
if input_tensor["name"] == tile_op["outputs"][0]["name"]:
|
|
if op["type"] == "reshape":
|
|
reshape_op = op
|
|
if None in [reshape_op]:
|
|
return False, {}
|
|
# find conv2d
|
|
for op in layers:
|
|
if op["type"] == "nn.conv2d":
|
|
if op["inputs"][1]["name"] == reshape_op["outputs"][0]["name"]:
|
|
conv2d_op = op
|
|
break
|
|
if None in [conv2d_op]:
|
|
return False, {}
|
|
op_dict = {
|
|
"tile": tile_op,
|
|
"reshape": reshape_op,
|
|
"conv2d": conv2d_op,
|
|
}
|
|
return True, op_dict
|
|
|
|
|
|
def _findWhereTheTensorIdxFrom(model, tensor_idx):
|
|
for _, op in enumerate(model):
|
|
if op["outputs"][0]["name"] == tensor_idx:
|
|
return op
|
|
return None
|
|
|
|
|
|
def _updateOutputDtype(layer, origin_idx, type):
|
|
for _, prev_op in enumerate(layer):
|
|
if prev_op.params["output_idx"] == origin_idx:
|
|
prev_op.params["output_dtype"] = type
|
|
prev_op.output_tensors[0].dtype = type
|
|
|
|
|
|
def _findConv2dwithScaleName(model, layers, scale_idx):
|
|
for _, op in enumerate(model):
|
|
if op["type"] == "nn.mcuconv2d":
|
|
if _findKeyinTensors(op["inputs"], "scale")["name"] == scale_idx:
|
|
# find the layer
|
|
for layer in layers:
|
|
if layer.params["op"] in op_name_translation["nn.mcuconv2d"]:
|
|
if layer.params["weight_name"] == _findKeyinTensors(op["inputs"], "weight")["name"]:
|
|
return layer
|
|
raise AssertionError
|
|
|
|
|
|
def _updateIdxParameter(json, origin_idx, new_idx):
|
|
# update input json
|
|
for _, op in enumerate(json):
|
|
for t in op["inputs"]:
|
|
if t["name"] == new_idx:
|
|
t["name"] = origin_idx
|
|
# comment this since we should not need to traceback to the tensor
|
|
# for _, prev_op in enumerate(layer):
|
|
# # get all tensors for this layer
|
|
# for t_cnt, t in enumerate(prev_op.input_tensors):
|
|
# if t_cnt == 0 and prev_op.params["input_idx"] == new_idx:
|
|
# prev_op.params["input_idx"] = origin_idx
|
|
# t.graph_idx = origin_idx
|
|
# elif t_cnt == 1 and "input2_idx" in prev_op.params and prev_op.params["input2_idx"] == new_idx:
|
|
# prev_op.params["input2_idx"] = origin_idx
|
|
# t.graph_idx = origin_idx
|
|
# elif t_cnt == 2 and "input3_idx" in prev_op.params and prev_op.params["input3_idx"] == new_idx:
|
|
# prev_op.params["input3_idx"] = origin_idx
|
|
# t.graph_idx = origin_idx
|
|
# if prev_op.params["output_idx"] == new_idx:
|
|
# prev_op.params["output_idx"] = origin_idx
|
|
# prev_op.output_tensors[0].graph_idx = origin_idx
|
|
# if "weight_name" in prev_op.params and prev_op.params["weight_name"] == new_idx:
|
|
# prev_op.params["weight_name"] = origin_idx
|
|
# if "weight_value" in prev_op.params and prev_op.params["weight_value"] == new_idx:
|
|
# prev_op.params["weight_value"] = origin_idx
|
|
|
|
|
|
def _updateIdx(json, layer, origin_idx, new_idx):
|
|
# update input json
|
|
for _, op in enumerate(json):
|
|
for t in op["inputs"]:
|
|
if t["name"] == origin_idx:
|
|
t["name"] = new_idx
|
|
for t in op["outputs"]:
|
|
if t["name"] == origin_idx:
|
|
t["name"] = new_idx
|
|
# update idx for trainable tensors
|
|
# traceback to the tensor
|
|
for _, prev_op in enumerate(layer):
|
|
# get all tensors for this layer
|
|
for t_cnt, t in enumerate(prev_op.input_tensors):
|
|
if t_cnt == 0 and prev_op.params["input_idx"] == origin_idx:
|
|
prev_op.params["input_idx"] = new_idx
|
|
t.graph_idx = new_idx
|
|
elif t_cnt == 1 and "input2_idx" in prev_op.params and prev_op.params["input2_idx"] == origin_idx:
|
|
prev_op.params["input2_idx"] = new_idx
|
|
t.graph_idx = new_idx
|
|
elif t_cnt == 2 and "input3_idx" in prev_op.params and prev_op.params["input3_idx"] == origin_idx:
|
|
prev_op.params["input3_idx"] = new_idx
|
|
t.graph_idx = new_idx
|
|
if prev_op.params["output_idx"] == origin_idx and len(prev_op.output_tensors) > 0:
|
|
prev_op.params["output_idx"] = new_idx
|
|
prev_op.output_tensors[0].graph_idx = new_idx
|
|
if "weight_name" in prev_op.params and prev_op.params["weight_name"] == origin_idx:
|
|
prev_op.params["weight_name"] = new_idx
|
|
if (
|
|
"weight_value" in prev_op.params
|
|
and isinstance(prev_op.params["weight_value"], str)
|
|
and prev_op.params["weight_value"] == origin_idx
|
|
):
|
|
prev_op.params["weight_value"] = new_idx
|
|
|
|
|
|
# Purpose is tofuse "transpose" -> [abs -> max -> divide -> divide -> cast (int8 bp)]
|
|
def _findTransposeMaxDivideDivide(model, transpose_op):
|
|
abs_op = None
|
|
max_op = None
|
|
divide0 = None
|
|
divide1 = None
|
|
cast_op = None
|
|
|
|
abs_op = _findNextOpTakeInputName(model, transpose_op["outputs"][0]["name"])
|
|
if not abs_op or abs_op["type"] != "abs":
|
|
return False, {}
|
|
max_op = _findNextOpTakeInputName(model, abs_op["outputs"][0]["name"])
|
|
if not max_op or max_op["type"] != "max":
|
|
return False, {}
|
|
divide0 = _findNextOpTakeInputName(model, max_op["outputs"][0]["name"])
|
|
if not divide0 or divide0["type"] != "divide":
|
|
return False, {}
|
|
divide1 = _findNextOpTakeInputName(model, divide0["outputs"][0]["name"])
|
|
if not divide1 or divide1["type"] != "divide":
|
|
return False, {}
|
|
cast_op = _findNextOpTakeInputName(model, divide1["outputs"][0]["name"])
|
|
if not cast_op or cast_op["type"] != "cast":
|
|
return False, {}
|
|
|
|
return True, {"abs": abs_op, "max": max_op, "divide_0": divide0, "divide_1": divide1, "cast": cast_op}
|
|
|
|
|
|
# Purpose is to fuse for int8 bp
|
|
# -> (cast1) -> multiply
|
|
# transpose -> -> divide -> cast
|
|
# -> abs -> max -> (cast2)
|
|
def _findTransposeMultiplyAbsMaxDivide(model, transpose_op):
|
|
multiply_op = None
|
|
cast_1 = None
|
|
abs_op = None
|
|
max_op = None
|
|
cast_2 = None
|
|
divide_op = None
|
|
cast_op = None
|
|
|
|
ops = _findNextOpListTakeInputName(model, transpose_op["outputs"][0]["name"])
|
|
|
|
if len(ops) == 2:
|
|
if ops[0]["type"] == "abs" and ops[1]["type"] in ["multiply", "cast"]:
|
|
abs_op = ops[0]
|
|
# cast1
|
|
if ops[1]["type"] == "cast":
|
|
cast_1 = ops[1]
|
|
multiply_op = _findNextOpTakeInputName(model, cast_1["outputs"][0]["name"])
|
|
else:
|
|
multiply_op = ops[1]
|
|
else:
|
|
abs_op = ops[1]
|
|
if ops[0]["type"] == "cast":
|
|
cast_1 = ops[0]
|
|
multiply_op = _findNextOpTakeInputName(model, cast_1["outputs"][0]["name"])
|
|
else:
|
|
multiply_op = ops[0]
|
|
else:
|
|
return False, {}
|
|
|
|
if abs_op["type"] == "abs":
|
|
# max
|
|
max_op = _findNextOpTakeInputName(model, abs_op["outputs"][0]["name"])
|
|
if not max_op or max_op["type"] != "max":
|
|
return False, {}
|
|
next_of_max = _findNextOpTakeInputName(model, max_op["outputs"][0]["name"])
|
|
# -> (cast2) -> divide or divide
|
|
if next_of_max["type"] == "cast":
|
|
cast_2 = next_of_max
|
|
divide_op = _findNextOpTakeInputName(model, cast_2["outputs"][0]["name"])
|
|
else:
|
|
divide_op = _findNextOpTakeInputName(model, max_op["outputs"][0]["name"])
|
|
if not divide_op or divide_op["type"] != "divide":
|
|
return False, {}
|
|
# -> cast
|
|
cast_op = _findNextOpTakeInputName(model, divide_op["outputs"][0]["name"])
|
|
if not cast_op or cast_op["type"] != "cast":
|
|
return False, {}
|
|
if multiply_op["type"] != "multiply":
|
|
return False, {}
|
|
|
|
op_dict = {
|
|
"abs": abs_op,
|
|
"multiply_op": multiply_op,
|
|
"abs": abs_op,
|
|
"max": max_op,
|
|
"divide": divide_op,
|
|
"cast": cast_op,
|
|
}
|
|
if cast_1:
|
|
op_dict["cast_1"] = cast_1
|
|
if cast_2:
|
|
op_dict["cast_2"] = cast_2
|
|
|
|
return True, op_dict
|
|
|
|
|
|
# Find the following for int8 bp
|
|
# -> (cast1) -> multiply
|
|
# from op -> -> divide -> cast
|
|
# -> abs -> max -> (cast2)
|
|
def _findMultiplyAbsMaxDivide(model, abs_op):
|
|
multiply_op = None
|
|
max_op = None
|
|
divide_op = None
|
|
cast_op = None
|
|
cast_1 = None
|
|
cast_2 = None
|
|
|
|
_, from_op = find_previous_link_op(model, abs_op)
|
|
ops = _findNextOpListTakeInputName(model, from_op["outputs"][0]["name"])
|
|
|
|
if len(ops) == 2:
|
|
if ops[0]["type"] == "abs" and ops[1]["type"] in ["multiply", "cast"]:
|
|
abs_op = ops[0]
|
|
# cast1
|
|
if ops[1]["type"] == "cast":
|
|
cast_1 = ops[1]
|
|
multiply_op = _findNextOpTakeInputName(model, cast_1["outputs"][0]["name"])
|
|
else:
|
|
multiply_op = ops[1]
|
|
else:
|
|
abs_op = ops[1]
|
|
if ops[0]["type"] == "cast":
|
|
cast_1 = ops[0]
|
|
multiply_op = _findNextOpTakeInputName(model, cast_1["outputs"][0]["name"])
|
|
else:
|
|
multiply_op = ops[0]
|
|
else:
|
|
return False, {}
|
|
|
|
if abs_op["type"] == "abs":
|
|
# max
|
|
max_op = _findNextOpTakeInputName(model, abs_op["outputs"][0]["name"])
|
|
if not max_op or max_op["type"] != "max":
|
|
return False, {}
|
|
next_of_max = _findNextOpTakeInputName(model, max_op["outputs"][0]["name"])
|
|
# -> (cast2) -> divide or divide
|
|
if next_of_max["type"] == "cast":
|
|
cast_2 = next_of_max
|
|
divide_op = _findNextOpTakeInputName(model, cast_2["outputs"][0]["name"])
|
|
else:
|
|
divide_op = _findNextOpTakeInputName(model, max_op["outputs"][0]["name"])
|
|
if not divide_op or divide_op["type"] != "divide":
|
|
return False, {}
|
|
# -> cast
|
|
cast_op = _findNextOpTakeInputName(model, divide_op["outputs"][0]["name"])
|
|
if not cast_op or cast_op["type"] != "cast":
|
|
return False, {}
|
|
if multiply_op["type"] != "multiply":
|
|
return False, {}
|
|
|
|
op_dict = {"multiply_op": multiply_op, "abs": abs_op, "max": max_op, "divide": divide_op, "cast": cast_op}
|
|
if cast_1:
|
|
op_dict["cast_1"] = cast_1
|
|
if cast_2:
|
|
op_dict["cast_2"] = cast_2
|
|
|
|
return True, op_dict
|
|
|
|
|
|
# find [abs -> max -> divide -> divide (int8 bp)]
|
|
def _findAbsMaxDivideDivide(model, abs_op):
|
|
max_op = None
|
|
divide0 = None
|
|
divide1 = None
|
|
cast_op = None
|
|
|
|
max_op = _findNextOpTakeInputName(model, abs_op["outputs"][0]["name"])
|
|
if not max_op or max_op["type"] != "max":
|
|
return False, {}
|
|
divide0 = _findNextOpTakeInputName(model, max_op["outputs"][0]["name"])
|
|
if not divide0 or divide0["type"] != "divide":
|
|
return False, {}
|
|
divide1 = _findNextOpTakeInputName(model, divide0["outputs"][0]["name"])
|
|
if not divide1 or divide1["type"] != "divide":
|
|
return False, {}
|
|
cast_op = _findNextOpTakeInputName(model, divide1["outputs"][0]["name"])
|
|
if not cast_op or cast_op["type"] != "cast":
|
|
return False, {}
|
|
|
|
return True, {"max": max_op, "divide_0": divide0, "divide_1": divide1, "cast": cast_op}
|
|
|
|
|
|
class trainableTensor:
|
|
def __init__(self, name=None, access_cnt=None) -> None:
|
|
self.name = name
|
|
self.access_cnt = access_cnt
|
|
self.allocated_name = None
|