tinyengine/code_generator/FusionUtil.py

from .constant import op_name_translation
from .GraphReorder import find_following_link_op, find_previous_link_op

__all__ = [
    "_accessTrainable",
    "_findKeyinTensors",
    "_removeLayers",
    "_findTargetWeightforGconv",
    "_findBinMaskPattern",
    "_findBinMaskPatternint8",
    "_castisFusable",
    "_castisFusable_for_gconv",
    "_castSliceisFusable",
    "_fileTileRepAsWeights",
    "_updateOutputDtype",
    "_findConv2dwithScaleName",
    "_updateIdxParameter",
    "_updateIdx",
    "_findPartialConv",
    "_findWhereTensorFrom",
    "_findWhereTheTensorIdxFrom",
    "_findTransposeMaxDivideDivide",
    "_findNextOpTakeInputName",
    "_findAbsMaxDivideDivide",
    "_findTransposeMultiplyAbsMaxDivide",
    "_findMultiplyAbsMaxDivide",
]


def _findWhereTensorFrom(layers, tensor_idx):
    for op in layers:
        if op.params["output_idx"] == tensor_idx:
            return op
    return None


def _findPartialConv(layers, weight_name):
    for op in layers:
        if "weight_name" in op.params and op.params["input_dtype"] == "int8":
            if op.params["weight_name"] == weight_name:
                return op
    raise AssertionError


def _findKeyinTensors(inputs, key):
    for _, inp in enumerate(inputs):
        if key in inp["name"]:
            return inp
    raise KeyError


def _accessTrainable(table, name):
    in_table = False
    for t in table:
        if t.name is name:
            in_table = True
            t.access_cnt += 1
            break

    if not in_table:
        table.append(trainableTensor(name, 0))


def _removeLayers(layers, target_dict):
    for k in target_dict:
        layers.remove(target_dict[k])
    return layers


# find gconv -> reshape -> sum -> transpose (weight_idx)
def _findTargetWeightforGconv(layers, output_idx):
    children = 0
    reshape_op = 0
    for op in layers:
        for input_tensor in op["inputs"]:
            if input_tensor["name"] == output_idx:
                children += 1
                reshape_op = op
    # see if we have for reshape
    if not (children == 1 and reshape_op["type"] == "reshape"):
        return None
    # reshape -> sum
    sum_op = None
    children = 0
    for op in layers:
        for input_tensor in op["inputs"]:
            if input_tensor["name"] == reshape_op["outputs"][0]["name"]:
                children += 1
                sum_op = op
    if not (children == 1 and sum_op["type"] == "sum"):
        return None
    # sum -> transpose
    transpose_op = None
    children = 0
    for op in layers:
        for input_tensor in op["inputs"]:
            if input_tensor["name"] == sum_op["outputs"][0]["name"]:
                children += 1
                transpose_op = op
    if not (children == 1 and transpose_op["type"] == "transpose"):
        return None
    if transpose_op["outputs"][0]["meta"]["children"] == 0:
        if transpose_op["outputs"][0]["meta"]["output_info"][0] == "v":
            return transpose_op["outputs"][0]["meta"]["output_info"]
        return "v" + transpose_op["outputs"][0]["meta"]["output_info"]
    else:
        # for int8 bp we may need abs -> max -> divide (127) -> divide (output activation) -> cast to cliping
        last_op = transpose_op
        op_start = _findNextOpTakeInputName(layers, last_op["outputs"][0]["name"])
        # if abs["type"] == "abs":
        #     op_start = _findNextOpTakeInputName(layers, abs["outputs"][0]["name"])
        #     if op0["type"] == "max":
        #         op1 =  _findNextOpTakeInputName(layers, op0["outputs"][0]["name"])
        #         if op1["type"] == "divide":
        #             op2 =  _findNextOpTakeInputName(layers, op1["outputs"][0]["name"])
        #             if op2["type"] == "divide":
        #                 cast =  _findNextOpTakeInputName(layers, op2["outputs"][0]["name"])
        #                 if cast["type"] == "cast":
        #                     if cast["outputs"][0]["meta"]["output_info"][0] == "v":
        #                         return cast["outputs"][0]["meta"]["output_info"]
        #                     return "v" + cast["outputs"][0]["meta"]["output_info"]
        #                 else:
        #                     last_op = cast
        # for int8 bp we may have the following pattern
        #                -> (cast1) -> multiply
        #  from op  ->                       -> divide -> cast
        #                -> abs -> max -> (cast2)
        if op_start["type"] == "multiply":
            op0 = _findNextOpTakeInputName(layers, op_start["outputs"][0]["name"])
            if op0["type"] == "divide":
                cast = _findNextOpTakeInputName(layers, op0["outputs"][0]["name"])
                if cast["type"] == "cast":
                    if cast["outputs"][0]["meta"]["output_info"][0] == "v":
                        return cast["outputs"][0]["meta"]["output_info"]
                    return "v" + cast["outputs"][0]["meta"]["output_info"]
                else:
                    last_op = cast
        elif op_start["type"] == "cast":
            op_0 = _findNextOpTakeInputName(layers, op_start["outputs"][0]["name"])
            if op_0["type"] == "multiply":
                op1 = _findNextOpTakeInputName(layers, op_0["outputs"][0]["name"])
                if op1["type"] == "divide":
                    cast = _findNextOpTakeInputName(layers, op1["outputs"][0]["name"])
                    if cast["type"] == "cast":
                        if cast["outputs"][0]["meta"]["output_info"][0] == "v":
                            return cast["outputs"][0]["meta"]["output_info"]
                        return "v" + cast["outputs"][0]["meta"]["output_info"]
                    else:
                        last_op = cast
        elif op_start["type"] == "abs":
            op0 = _findNextOpTakeInputName(layers, op_start["outputs"][0]["name"])
            if op0["type"] == "max":
                op1 = _findNextOpTakeInputName(layers, op0["outputs"][0]["name"])
                if op1["type"] == "divide":
                    cast = _findNextOpTakeInputName(layers, op1["outputs"][0]["name"])
                    if cast["type"] == "cast":
                        if cast["outputs"][0]["meta"]["output_info"][0] == "v":
                            return cast["outputs"][0]["meta"]["output_info"]
                        return "v" + cast["outputs"][0]["meta"]["output_info"]
                    else:
                        last_op = cast
        elif op_start["type"] == "abs":
            op0 = _findNextOpTakeInputName(layers, op_start["outputs"][0]["name"])
            if op0["type"] == "max":
                op1 = _findNextOpTakeInputName(layers, op0["outputs"][0]["name"])
                if op1["type"] == "cast":
                    op2 = _findNextOpTakeInputName(layers, op1["outputs"][0]["name"])
                    if op2["type"] == "divide":
                        cast = _findNextOpTakeInputName(layers, op2["outputs"][0]["name"])
                        if cast["type"] == "cast":
                            if cast["outputs"][0]["meta"]["output_info"][0] == "v":
                                return cast["outputs"][0]["meta"]["output_info"]
                            return "v" + cast["outputs"][0]["meta"]["output_info"]
                        else:
                            last_op = cast

        # we may have slice op
        slice_op = None
        for op in layers:
            for input_tensor in op["inputs"]:
                if input_tensor["name"] == last_op["outputs"][0]["name"]:
                    children += 1
                    slice_op = op
                    assert slice_op["outputs"][0]["meta"]["children"] == 0
                    if slice_op["outputs"][0]["meta"]["output_info"][0] == "v":
                        return slice_op["outputs"][0]["meta"]["output_info"]
                    return "v" + slice_op["outputs"][0]["meta"]["output_info"]


def _findNextOpTakeInputName(layers, input_name):
    for op in layers:
        for input_tensor in op["inputs"]:
            if input_tensor["name"] == input_name:
                return op
    return None


def _findNextOpListTakeInputName(layers, input_name):
    ops = []
    for op in layers:
        for input_tensor in op["inputs"]:
            if input_tensor["name"] == input_name:
                ops.append(op)
    return ops


# check if we need to have binary mask for this conv2d
# conv2d (int32) -> cast -> greater/less -> multiply -> binary mask
def _findBinMaskPattern(layers, output_idx):
    children = 0
    f_ops = []
    for op in layers:
        for input_tensor in op["inputs"]:
            if input_tensor["name"] == output_idx:
                children += 1
                f_ops.append(op)
    # see if we have cast op
    cast_op = greater_op = less_op = multiply_op = where_op = None
    if children >= 2:
        for op in f_ops:
            if op["type"] == "cast":
                cast_op = op
                break
    if None in [cast_op]:
        return False, {}
    # find greater/less op
    for op in layers:
        for input_tensor in op["inputs"]:
            if input_tensor["name"] == cast_op["outputs"][0]["name"]:
                if op["type"] in {"greater", "greater_equal"}:
                    greater_op = op
                elif op["type"] in {"less", "less_equal"}:
                    less_op = op
    if None in [greater_op, less_op]:
        return False, {}
    # find multiply
    for op in layers:
        if op["type"] == "multiply":
            if op["inputs"][0]["name"] in [greater_op["outputs"][0]["name"], less_op["outputs"][0]["name"]] and op[
                "inputs"
            ][1]["name"] in [
                greater_op["outputs"][0]["name"],
                less_op["outputs"][0]["name"],
            ]:
                multiply_op = op
                break
    if None in [multiply_op]:
        return False, {}
    # find where
    for op in layers:
        if op["type"] == "where":
            if op["inputs"][0]["name"] in multiply_op["outputs"][0]["name"]:
                where_op = op
                break
    if where_op is None:
        return False, {}
    op_dict = {
        "cast": cast_op,
        "greater": greater_op,
        "less": less_op,
        "multiply": multiply_op,
        # "where": where_op,
        # "zeros": zeros_op,
        # "ones": ones_op,
    }
    return True, op_dict


# this is for int8
# check if we need to have binary mask for this conv2d
# conv2d (int32) -> greater/less -> multiply -> binary mask
def _findBinMaskPatternint8(layers, output_idx):
    children = 0
    f_ops = []
    for op in layers:
        for input_tensor in op["inputs"]:
            if input_tensor["name"] == output_idx:
                children += 1
                f_ops.append(op)
    # see if we have cast op
    greater_op = less_op = multiply_op = where_op = None
    if children >= 2:
        for op in f_ops:
            if op["type"] in {"greater", "greater_equal"}:
                greater_op = op
            elif op["type"] in {"less", "less_equal"}:
                less_op = op
    if None in [greater_op, less_op]:
        return False, {}
    # find multiply
    for op in layers:
        if op["type"] == "multiply":
            if op["inputs"][0]["name"] in [greater_op["outputs"][0]["name"], less_op["outputs"][0]["name"]] and op[
                "inputs"
            ][1]["name"] in [
                greater_op["outputs"][0]["name"],
                less_op["outputs"][0]["name"],
            ]:
                multiply_op = op
                break
    if None in [multiply_op]:
        return False, {}
    # find where
    for op in layers:
        if op["type"] == "where":
            if op["inputs"][0]["name"] in multiply_op["outputs"][0]["name"]:
                where_op = op
                break
    if where_op is None:
        return False, {}
    # packing return ops
    op_dict = {
        "greater": greater_op,
        "less": less_op,
        "multiply": multiply_op,
    }
    return True, op_dict


# check the graph and see if we can fuse the cast into transpose conv2d
def _castisFusable(layers, cast_op):
    # find the output
    fp_output_idx = cast_op["outputs"][0]["name"]
    children = 0
    following_op = None
    for op in layers:
        for input_tensor in op["inputs"]:
            if input_tensor["name"] == fp_output_idx:
                children += 1
                following_op = op
    # checking the graph matches the patten
    # int8 -> fp32
    # only used as the weight of some transpose conv
    if (
        cast_op["inputs"][0]["dtype"] == "int8"
        and cast_op["outputs"][0]["dtype"] == "float32"
        and children == 1
        and following_op["type"] == "nn.conv2d_transpose"
        and following_op["inputs"][1]["name"] == fp_output_idx
    ):
        return True, following_op
    return False, following_op


# check the graph and see if we can fuse the cast into group conv2d
# cast -> reshape -> group_conv
def _castisFusable_for_gconv(layers, cast_op):
    # find the output
    idx, f_op_1 = find_following_link_op(layers, cast_op)
    if f_op_1["type"] != "reshape":
        return False, None
    idx, f_op_2 = find_following_link_op(layers, f_op_1)
    if (
        f_op_2["type"] == "nn.conv2d"
        and cast_op["inputs"][0]["dtype"] == "int8"
        and cast_op["outputs"][0]["dtype"] == "float32"
        and f_op_2["inputs"][0]["name"] == f_op_1["outputs"][0]["name"]
    ):
        # remove reshape
        # layers.remove(f_op_1)
        return True, f_op_2
    return False, f_op_2


# check the graph and see if we can fuse the cast into sclice conv2d
# Note: It seems we don't need this
def _castSliceisFusable(layers, cast_op):
    # find the output
    fp_output_idx = cast_op["outputs"][0]["name"]
    children = 0
    slice_op = reshape_op = g_conv_op = None
    for op in layers:
        for input_tensor in op["inputs"]:
            if input_tensor["name"] == fp_output_idx:
                children += 1
                slice_op = op
    # checking the graph matches the patten
    # cast -> slice -> reshape -> transponse_conv
    # find slice
    if not (children == 1 and slice_op["type"] == "strided_slice"):
        return False, slice_op, reshape_op, g_conv_op
    # find reshape conv
    children = 0
    for op in layers:
        for input_tensor in op["inputs"]:
            if input_tensor["name"] == slice_op["outputs"][0]["name"] and op["type"] == "reshape":
                children += 1
                reshape_op = op
    if not (children == 1 and reshape_op["type"] == "reshape"):
        return False, slice_op, reshape_op, g_conv_op
    # find g conv
    children = 0
    for op in layers:
        if op["type"] == "nn.conv2d" and op["inputs"][0]["name"] == reshape_op["outputs"][0]["name"]:
            children += 1
            g_conv_op = op
    if not (
        children == 1
        and g_conv_op["attrs"]["groups"] > 1
        and cast_op["inputs"][0]["dtype"] == "int8"
        and cast_op["outputs"][0]["dtype"] == "float32"
    ):
        return False, slice_op, reshape_op, g_conv_op
    # only used as the weight of some transpose conv
    return True, slice_op, reshape_op, g_conv_op


# check if we need to fuse ops for tile
# ##########   tile  -> reshape -> conv2d (which takes it as weights)
# fusion     | ------------------------|
def _fileTileRepAsWeights(layers, tile_op):
    reshape_op = conv2d_op = None
    children = 0
    for op in layers:
        for input_tensor in op["inputs"]:
            if input_tensor["name"] == tile_op["outputs"][0]["name"]:
                children += 1
    assert children == 1, "output of tile is used by other layers"
    # find reshape op
    for op in layers:
        for input_tensor in op["inputs"]:
            if input_tensor["name"] == tile_op["outputs"][0]["name"]:
                if op["type"] == "reshape":
                    reshape_op = op
    if None in [reshape_op]:
        return False, {}
    # find conv2d
    for op in layers:
        if op["type"] == "nn.conv2d":
            if op["inputs"][1]["name"] == reshape_op["outputs"][0]["name"]:
                conv2d_op = op
                break
    if None in [conv2d_op]:
        return False, {}
    op_dict = {
        "tile": tile_op,
        "reshape": reshape_op,
        "conv2d": conv2d_op,
    }
    return True, op_dict


def _findWhereTheTensorIdxFrom(model, tensor_idx):
    for _, op in enumerate(model):
        if op["outputs"][0]["name"] == tensor_idx:
            return op
    return None


def _updateOutputDtype(layer, origin_idx, type):
    for _, prev_op in enumerate(layer):
        if prev_op.params["output_idx"] == origin_idx:
            prev_op.params["output_dtype"] = type
            prev_op.output_tensors[0].dtype = type


def _findConv2dwithScaleName(model, layers, scale_idx):
    for _, op in enumerate(model):
        if op["type"] == "nn.mcuconv2d":
            if _findKeyinTensors(op["inputs"], "scale")["name"] == scale_idx:
                # find the layer
                for layer in layers:
                    if layer.params["op"] in op_name_translation["nn.mcuconv2d"]:
                        if layer.params["weight_name"] == _findKeyinTensors(op["inputs"], "weight")["name"]:
                            return layer
    raise AssertionError


def _updateIdxParameter(json, origin_idx, new_idx):
    # update input json
    for _, op in enumerate(json):
        for t in op["inputs"]:
            if t["name"] == new_idx:
                t["name"] = origin_idx
    # comment this since we should not need to traceback to the tensor
    # for _, prev_op in enumerate(layer):
    #     # get all tensors for this layer
    #     for t_cnt, t in enumerate(prev_op.input_tensors):
    #         if t_cnt == 0 and prev_op.params["input_idx"] == new_idx:
    #             prev_op.params["input_idx"] = origin_idx
    #             t.graph_idx = origin_idx
    #         elif t_cnt == 1 and "input2_idx" in prev_op.params and prev_op.params["input2_idx"] == new_idx:
    #             prev_op.params["input2_idx"] = origin_idx
    #             t.graph_idx = origin_idx
    #         elif t_cnt == 2 and "input3_idx" in prev_op.params and prev_op.params["input3_idx"] == new_idx:
    #             prev_op.params["input3_idx"] = origin_idx
    #             t.graph_idx = origin_idx
    #     if prev_op.params["output_idx"] == new_idx:
    #         prev_op.params["output_idx"] = origin_idx
    #         prev_op.output_tensors[0].graph_idx = origin_idx
    #     if "weight_name" in prev_op.params and prev_op.params["weight_name"] == new_idx:
    #         prev_op.params["weight_name"] = origin_idx
    #     if "weight_value" in prev_op.params and prev_op.params["weight_value"] == new_idx:
    #         prev_op.params["weight_value"] = origin_idx


def _updateIdx(json, layer, origin_idx, new_idx):
    # update input json
    for _, op in enumerate(json):
        for t in op["inputs"]:
            if t["name"] == origin_idx:
                t["name"] = new_idx
        for t in op["outputs"]:
            if t["name"] == origin_idx:
                t["name"] = new_idx
        # update idx for trainable tensors
    # traceback to the tensor
    for _, prev_op in enumerate(layer):
        # get all tensors for this layer
        for t_cnt, t in enumerate(prev_op.input_tensors):
            if t_cnt == 0 and prev_op.params["input_idx"] == origin_idx:
                prev_op.params["input_idx"] = new_idx
                t.graph_idx = new_idx
            elif t_cnt == 1 and "input2_idx" in prev_op.params and prev_op.params["input2_idx"] == origin_idx:
                prev_op.params["input2_idx"] = new_idx
                t.graph_idx = new_idx
            elif t_cnt == 2 and "input3_idx" in prev_op.params and prev_op.params["input3_idx"] == origin_idx:
                prev_op.params["input3_idx"] = new_idx
                t.graph_idx = new_idx
        if prev_op.params["output_idx"] == origin_idx and len(prev_op.output_tensors) > 0:
            prev_op.params["output_idx"] = new_idx
            prev_op.output_tensors[0].graph_idx = new_idx
        if "weight_name" in prev_op.params and prev_op.params["weight_name"] == origin_idx:
            prev_op.params["weight_name"] = new_idx
        if (
            "weight_value" in prev_op.params
            and isinstance(prev_op.params["weight_value"], str)
            and prev_op.params["weight_value"] == origin_idx
        ):
            prev_op.params["weight_value"] = new_idx


# Purpose is tofuse "transpose" -> [abs -> max -> divide -> divide -> cast (int8 bp)]
def _findTransposeMaxDivideDivide(model, transpose_op):
    abs_op = None
    max_op = None
    divide0 = None
    divide1 = None
    cast_op = None

    abs_op = _findNextOpTakeInputName(model, transpose_op["outputs"][0]["name"])
    if not abs_op or abs_op["type"] != "abs":
        return False, {}
    max_op = _findNextOpTakeInputName(model, abs_op["outputs"][0]["name"])
    if not max_op or max_op["type"] != "max":
        return False, {}
    divide0 = _findNextOpTakeInputName(model, max_op["outputs"][0]["name"])
    if not divide0 or divide0["type"] != "divide":
        return False, {}
    divide1 = _findNextOpTakeInputName(model, divide0["outputs"][0]["name"])
    if not divide1 or divide1["type"] != "divide":
        return False, {}
    cast_op = _findNextOpTakeInputName(model, divide1["outputs"][0]["name"])
    if not cast_op or cast_op["type"] != "cast":
        return False, {}

    return True, {"abs": abs_op, "max": max_op, "divide_0": divide0, "divide_1": divide1, "cast": cast_op}


# Purpose is to fuse for int8 bp
#                -> (cast1) -> multiply
#  transpose ->                       -> divide -> cast
#                -> abs -> max -> (cast2)
def _findTransposeMultiplyAbsMaxDivide(model, transpose_op):
    multiply_op = None
    cast_1 = None
    abs_op = None
    max_op = None
    cast_2 = None
    divide_op = None
    cast_op = None

    ops = _findNextOpListTakeInputName(model, transpose_op["outputs"][0]["name"])

    if len(ops) == 2:
        if ops[0]["type"] == "abs" and ops[1]["type"] in ["multiply", "cast"]:
            abs_op = ops[0]
            # cast1
            if ops[1]["type"] == "cast":
                cast_1 = ops[1]
                multiply_op = _findNextOpTakeInputName(model, cast_1["outputs"][0]["name"])
            else:
                multiply_op = ops[1]
        else:
            abs_op = ops[1]
            if ops[0]["type"] == "cast":
                cast_1 = ops[0]
                multiply_op = _findNextOpTakeInputName(model, cast_1["outputs"][0]["name"])
            else:
                multiply_op = ops[0]
    else:
        return False, {}

    if abs_op["type"] == "abs":
        # max
        max_op = _findNextOpTakeInputName(model, abs_op["outputs"][0]["name"])
        if not max_op or max_op["type"] != "max":
            return False, {}
        next_of_max = _findNextOpTakeInputName(model, max_op["outputs"][0]["name"])
        # -> (cast2) -> divide or divide
        if next_of_max["type"] == "cast":
            cast_2 = next_of_max
            divide_op = _findNextOpTakeInputName(model, cast_2["outputs"][0]["name"])
        else:
            divide_op = _findNextOpTakeInputName(model, max_op["outputs"][0]["name"])
        if not divide_op or divide_op["type"] != "divide":
            return False, {}
        # -> cast
        cast_op = _findNextOpTakeInputName(model, divide_op["outputs"][0]["name"])
        if not cast_op or cast_op["type"] != "cast":
            return False, {}
    if multiply_op["type"] != "multiply":
        return False, {}

    op_dict = {
        "abs": abs_op,
        "multiply_op": multiply_op,
        "abs": abs_op,
        "max": max_op,
        "divide": divide_op,
        "cast": cast_op,
    }
    if cast_1:
        op_dict["cast_1"] = cast_1
    if cast_2:
        op_dict["cast_2"] = cast_2

    return True, op_dict


# Find the following for int8 bp
#                -> (cast1) -> multiply
#  from op  ->                       -> divide -> cast
#                -> abs -> max -> (cast2)
def _findMultiplyAbsMaxDivide(model, abs_op):
    multiply_op = None
    max_op = None
    divide_op = None
    cast_op = None
    cast_1 = None
    cast_2 = None

    _, from_op = find_previous_link_op(model, abs_op)
    ops = _findNextOpListTakeInputName(model, from_op["outputs"][0]["name"])

    if len(ops) == 2:
        if ops[0]["type"] == "abs" and ops[1]["type"] in ["multiply", "cast"]:
            abs_op = ops[0]
            # cast1
            if ops[1]["type"] == "cast":
                cast_1 = ops[1]
                multiply_op = _findNextOpTakeInputName(model, cast_1["outputs"][0]["name"])
            else:
                multiply_op = ops[1]
        else:
            abs_op = ops[1]
            if ops[0]["type"] == "cast":
                cast_1 = ops[0]
                multiply_op = _findNextOpTakeInputName(model, cast_1["outputs"][0]["name"])
            else:
                multiply_op = ops[0]
    else:
        return False, {}

    if abs_op["type"] == "abs":
        # max
        max_op = _findNextOpTakeInputName(model, abs_op["outputs"][0]["name"])
        if not max_op or max_op["type"] != "max":
            return False, {}
        next_of_max = _findNextOpTakeInputName(model, max_op["outputs"][0]["name"])
        # -> (cast2) -> divide or divide
        if next_of_max["type"] == "cast":
            cast_2 = next_of_max
            divide_op = _findNextOpTakeInputName(model, cast_2["outputs"][0]["name"])
        else:
            divide_op = _findNextOpTakeInputName(model, max_op["outputs"][0]["name"])
        if not divide_op or divide_op["type"] != "divide":
            return False, {}
        # -> cast
        cast_op = _findNextOpTakeInputName(model, divide_op["outputs"][0]["name"])
        if not cast_op or cast_op["type"] != "cast":
            return False, {}
    if multiply_op["type"] != "multiply":
        return False, {}

    op_dict = {"multiply_op": multiply_op, "abs": abs_op, "max": max_op, "divide": divide_op, "cast": cast_op}
    if cast_1:
        op_dict["cast_1"] = cast_1
    if cast_2:
        op_dict["cast_2"] = cast_2

    return True, op_dict


# find [abs -> max -> divide -> divide (int8 bp)]
def _findAbsMaxDivideDivide(model, abs_op):
    max_op = None
    divide0 = None
    divide1 = None
    cast_op = None

    max_op = _findNextOpTakeInputName(model, abs_op["outputs"][0]["name"])
    if not max_op or max_op["type"] != "max":
        return False, {}
    divide0 = _findNextOpTakeInputName(model, max_op["outputs"][0]["name"])
    if not divide0 or divide0["type"] != "divide":
        return False, {}
    divide1 = _findNextOpTakeInputName(model, divide0["outputs"][0]["name"])
    if not divide1 or divide1["type"] != "divide":
        return False, {}
    cast_op = _findNextOpTakeInputName(model, divide1["outputs"][0]["name"])
    if not cast_op or cast_op["type"] != "cast":
        return False, {}

    return True, {"max": max_op, "divide_0": divide0, "divide_1": divide1, "cast": cast_op}


class trainableTensor:
    def __init__(self, name=None, access_cnt=None) -> None:
        self.name = name
        self.access_cnt = access_cnt
        self.allocated_name = None