checkpoint

2021-05-17 22:28:59 +08:00 · 2021-05-17 22:28:59 +08:00 · 4ca17507a1
--- a/demo.py
+++ b/demo.py
@ -1,7 +1,7 @@



-from Latencypredictor.predictors.predict_by_kernel import*
+from prediction.predictors.predict_by_kernel import*
 from kerneldetection.kernel_detector import*
 import pickle,sys,os 
 import argparse 
--- a/demo_with_converter.py
+++ b/demo_with_converter.py
@ -0,0 +1,34 @@
+
+
+from ir_converters.model_to_grapher import*
+from prediction.predictors.predict_by_kernel import*
+from kerneldetection.kernel_detector import*
+import pickle,sys,os 
+import argparse 
+parser = argparse.ArgumentParser("predict model latency on device")
+parser.add_argument('--hardware', type=str, default='cpu')
+parser.add_argument('--mf', type=str, default='alexnet')
+parser.add_argument('--input_models', type=str, required=True, help='Path to input models. Either json or pb.')
+parser.add_argument( '--save_dir', type=str,  default='results', help='Default preserve the original layer names. Readable will assign new kernel names according to types of the layers.')
+parser.add_argument( '--rule_dir', type=str,  default='data/fusionrules', help='Default preserve the original layer names. Readable will assign new kernel names according to types of the layers.')
+
+
+args=parser.parse_args()
+hardware=args.hardware
+input_models=args.input_models
+for hardware in ['cpu','gpu','gpu1','vpu']:
+    print('current hardware',hardware)
+    if hardware=='gpu1':
+        hw='gpu'
+    else:
+        hw=hardware
+    latency_file="data/model_latency/"+hardware+"/"+args.mf+"-log.csv"
+
+    kernel_types,kernel_with_features=split_model_into_kernels(input_models,hw,args.save_dir)
+    rmse,rmspe,error,acc5,acc10=main_kernel_predict(hardware,kernel_with_features,latency_file)
+
+
+
+
+
+
--- a/ir_converters/init.py
+++ b/ir_converters/init.py
@ -0,0 +1 @@
+from .utils import model_to_grapher, model_file_to_grapher
--- a/ir_converters/frozenpb_converter/init.py
+++ b/ir_converters/frozenpb_converter/init.py
@ -0,0 +1 @@
+from .frozenpb_converter import FrozenPbConverter
--- a/ir_converters/frozenpb_converter/frozenpb_converter.py
+++ b/ir_converters/frozenpb_converter/frozenpb_converter.py
@ -0,0 +1,38 @@
+import sys
+import os
+import numpy as np
+
+from .grapher_tool import Grapher
+from .frozenpb_parser import FrozenPbParser
+from .shape_inference import ShapeInference
+
+
+class FrozenPbConverter:
+    def __init__(self, file_name):
+        self.graph = Grapher()
+
+        # Parse pb to graph
+        parser = FrozenPbParser(file_name)
+        parser.parse_graph(self.graph)
+
+        # Change split to more firendly scheme
+        parser.fix_split_naming(self.graph)
+
+        # Get the static shape
+        ShapeInference(self.graph)
+
+        # Strip constant and indentity nodes
+        parser.strip_useless_nodes(self.graph)
+
+    def get_flatten_grapher(self):
+        def np_encoder(d):
+            for k, v in d.items():
+                if isinstance(v, dict):
+                    np_encoder(v)
+                else:
+                    if isinstance(v, np.ndarray):
+                        d[k] = v.tolist()
+                    if isinstance(v, (bytes, bytearray)):
+                        d[k] = v.decode('utf-8')
+        np_encoder(self.graph.get_graph())
+        return self.graph.get_graph()
--- a/ir_converters/frozenpb_converter/frozenpb_parser.py
+++ b/ir_converters/frozenpb_converter/frozenpb_parser.py
@ -0,0 +1,181 @@
+from .protobuf_helper import ProtobufHelper
+from .shape_fetcher import ShapeFetcher
+from tensorflow import io
+from tensorflow import gfile
+from google.protobuf import text_format
+import tensorflow as tf
+import copy
+import re
+import logging
+logging = logging.getLogger(__name__)
+
+
+class FrozenPbParser:
+    def __init__(self, pb_file):
+        f = open(pb_file, 'rb')
+        graph = tf.GraphDef()
+        graph.ParseFromString(f.read())
+
+        self.graph = graph
+
+    @staticmethod
+    def strip_useless_nodes(graph_helper):
+        stripped_nodes_type = ['Const', 'Identity']
+        stripped_nodes_keywords = ['/weight', '/weight/read',
+                                   '/ReadVariableOp',
+                                   '/kernel', '/gamma',
+                                   '/beta', '/moving_mean',
+                                   '/moving_variance',
+                                   '/bias', '/reduction_indices',
+                                   '/shape', '/split_dim', '/axis']
+        graph = graph_helper.get_graph()
+        removed_node = []
+        for key, value in graph.items():
+            if 'attr' in value.keys():
+                if value['attr']['type'] in stripped_nodes_type:
+                    for kw in stripped_nodes_keywords:
+                        if kw in key:
+                            removed_node.append(key)
+                            break
+
+        for key in removed_node:
+            del graph[key]
+
+        graph_helper.refresh()
+        graph_helper.refresh()
+        graph_helper.refresh()
+        graph_helper.refresh()
+        graph_helper.refresh()
+
+    @staticmethod
+    def fix_split_naming(graph_helper):
+        graph = graph_helper.get_graph()
+        graph_nodes = copy.deepcopy(list(graph.keys()))
+        remove_node_list = []
+        for graph_node in graph_nodes:
+            if graph_node in graph.keys():
+                if 'attr' in graph[graph_node].keys():
+                    if graph[graph_node]['attr']['type'] == 'Split' and ':' not in graph_node:
+                        logging.info('Find split main node %s.' % graph_node)
+                        split_node_name = graph_node
+                        split_node_child = []
+                        for node_name in graph.keys():
+                            idx = re.findall(
+                                r'%s:(\d+)' %
+                                split_node_name, node_name)
+                            if len(idx) > 0:
+                                idx = int(idx[0])
+                                logging.info(
+                                    'Find split child node %s.' % node_name)
+                                graph[graph_node]['outbounds'] += graph[node_name]['outbounds']
+                                graph[graph[node_name]['outbounds']
+                                      [0]]['inbounds'] += [graph_node]
+                                remove_node_list.append(node_name)
+
+        for node in remove_node_list:
+            del graph[node]
+
+        graph_helper.refresh()
+        graph_helper.refresh()
+        graph_helper.refresh()
+        graph_helper.refresh()
+        graph_helper.refresh()
+
+    def fetch_attr_to_dict(self, node, shape_fetcher):
+        attr_dict = {}
+
+        attr_as_node = {
+            'Split': {
+                'node_name': lambda x: x + '/split_dim',
+                'attr_name': 'split_dim',
+                'node_value': lambda x: ProtobufHelper.get_tensor_value(x)
+            },
+            'Mean': {
+                'node_name': lambda x: x + '/reduction_indices',
+                'attr_name': 'reduction_indices',
+                'node_value': lambda x: ProtobufHelper.pkg42dec(x.tensor_content)
+            },
+            'Reshape': {
+                'node_name': lambda x: x + '/shape',
+                'attr_name': 'shape',
+                'node_value': lambda x: ProtobufHelper.pkg42dec(x.tensor_content)
+            },
+            'Concat': {
+                'node_name': lambda x: x + '/axis',
+                'attr_name': 'axis',
+                'node_value': lambda x: ProtobufHelper.get_tensor_value(x)
+            },
+            'ConcatV2': {
+                'node_name': lambda x: x + '/axis',
+                'attr_name': 'axis',
+                'node_value': lambda x: ProtobufHelper.get_tensor_value(x)
+            },
+            'Const': {
+                'node_name': lambda x: x,
+                'attr_name': 'constant',
+                'node_value': lambda x: ProtobufHelper.get_tensor_value(x)
+            }
+        }
+
+        list_i_nodes = ['dilations', 'strides', 'ksize']
+        str_nodes = ['padding', 'data_format']
+
+        for attr_name in node.attr.keys():
+            if attr_name in list_i_nodes:
+                attr_dict[attr_name] = [
+                    int(a) for a in node.attr[attr_name].list.i]
+                continue
+
+            if attr_name in str_nodes:
+                attr_dict[attr_name] = node.attr[attr_name].s
+                continue
+
+            if attr_name == 'value':
+                shape = []
+                for dim in node.attr[attr_name].tensor.tensor_shape.dim:
+                    shape.append(dim.size)
+                attr_dict['tensor_shape'] = list(map(int, shape))
+                continue
+
+            if attr_name == 'shape':
+                shape = []
+                for dim in node.attr[attr_name].shape.dim:
+                    shape.append(dim.size)
+                attr_dict['shape'] = list(map(int, shape))
+                continue
+
+        if node.op in attr_as_node.keys():
+            for target_node in self.graph.node:
+                if target_node.name == attr_as_node[node.op]['node_name'](
+                        node.name):
+                    for attr_name in target_node.attr.keys():
+                        if attr_name == 'value' and 'weight' not in node.name and 'BatchNorm' not in node.name and 'kernel' not in node.name:
+                            # print(target_node.attr[attr_name].tensor)
+                            attr_dict[attr_as_node[node.op]['attr_name']] = \
+                                attr_as_node[node.op]['node_value'](target_node.attr[attr_name].tensor)
+
+        # # attr_dict['weight_shape'] = self.find_weights_root(node, shape_fetcher)
+        # print(node.name, attr_dict)
+        # print('------------------')
+        return attr_dict
+
+    def parse_graph(
+            self,
+            graph_helper,
+            required_shape=False,
+            insert_node=False):
+        if required_shape:
+            shape_fetcher = ShapeFetcher(self.graph)
+
+        for node in self.graph.node:
+            graph_helper.node(str(node.name), list(map(str, node.input)))
+            graph_helper.set_node_attr(
+                node.name, {
+                    'name': str(node.name),
+                    'type': str(node.op),
+                    'output_shape': shape_fetcher.shape_results[node.name + ':0'] if required_shape else [],
+                    'attr': self.fetch_attr_to_dict(node, shape_fetcher if required_shape else None),
+                    # 'node': node if insert_node else None
+                })
+
+        # return shape_fetcher
--- a/ir_converters/frozenpb_converter/grapher_tool.py
+++ b/ir_converters/frozenpb_converter/grapher_tool.py
@ -0,0 +1,255 @@
+import numpy as np
+import json
+
+
+class NumpyEncoder(json.JSONEncoder):
+    def default(self, obj):
+        if isinstance(obj, np.ndarray):
+            return obj.tolist()
+        if isinstance(obj, (bytes, bytearray)):
+            return obj.decode('utf-8')
+        return json.JSONEncoder.default(self, obj)
+
+
+class Grapher:
+    def __init__(self, filename=None):
+        if filename is not None:
+            self.graph = json.load(open(filename, 'r'))
+        else:
+            self.graph = {}
+
+    def node(self, name, inbound_nodes=None):
+        self.graph[name] = {}
+        if inbound_nodes is not None:
+            self.graph[name]['inbounds'] = inbound_nodes
+            for node in inbound_nodes:
+                if node not in self.graph.keys():
+                    self.graph[node] = {}
+                if 'outbounds' not in self.graph[node].keys():
+                    self.graph[node]['outbounds'] = []
+                self.graph[node]['outbounds'].append(name)
+
+    def refresh(self):
+        for name in self.graph.keys():
+            self.graph[name]['outbounds'] = []
+
+        for name in self.graph.keys():
+            if 'inbounds' in self.graph[name].keys():
+                for node in self.graph[name]['inbounds']:
+                    if node not in self.graph.keys():
+                        while node in self.graph[name]['inbounds']:
+                            self.graph[name]['inbounds'].remove(node)
+                    else:
+                        if 'outbounds' not in self.graph[node].keys():
+                            self.graph[node]['outbounds'] = []
+
+                        self.graph[node]['outbounds'].append(name)
+
+        spare_nodes = []
+        for name in self.graph.keys():
+            if len(self.graph[name]['outbounds']) == 0 and \
+                    len(self.graph[name]['inbounds']) == 0:
+                spare_nodes.append(name)
+
+        for removing_node_name in spare_nodes:
+            del self.graph[removing_node_name]
+
+    def get_graph(self):
+        return self.graph
+
+    def get_node_inbounds(self, name):
+        if 'inbounds' in self.graph[name]:
+            return self.graph[name]['inbounds']
+        else:
+            return []
+
+    def get_node_outbounds(self, name):
+        if 'outbounds' in self.graph[name]:
+            return self.graph[name]['outbounds']
+        else:
+            return []
+
+    def set_node_inbounds(self, name, inbounds):
+        self.graph[name]['inbounds'] = inbounds
+
+    def set_node_outbounds(self, name, outbounds):
+        self.graph[name]['outbounds'] = outbounds
+
+    def remove_node_inbounds(self, name, inbound):
+        if inbound in self.graph[name]['inbounds']:
+            self.graph[name]['inbounds'].remove(inbound)
+
+    def remove_node_outbounds(self, name, outbound):
+        if outbound in self.graph[name]['outbound']:
+            self.graph[name]['outbounds'].remove(outbound)
+
+    def add_node_inbounds(self, name, inbound):
+        self.graph[name]['inbounds'].append(inbound)
+
+    def add_node_outbounds(self, name, outbound):
+        self.graph[name]['outbounds'].append(outbound)
+
+    def get_graph_head(self):
+        self.heads = []
+        for (key, value) in self.graph.items():
+            if 'inbounds' not in value.keys()\
+                    or len(value['inbounds']) == 0:
+                self.heads.append(key)
+        return self.heads
+
+    def get_graph_tail(self):
+        self.tails = []
+        for (key, value) in self.graph.items():
+            if 'outbounds' not in value.keys()\
+                    or len(value['outbounds']) == 0:
+                self.tails.append(key)
+        return self.tails
+
+    def add_node_attr(self, name, attr_key, attr_value):
+        if name not in self.graph.keys():
+            self.graph[name] = {}
+        self.graph[name]['attr']['attr'][attr_key] = attr_value
+
+    def set_node_attr(self, name, attr):
+        if name not in self.graph.keys():
+            self.graph[name] = {}
+        self.graph[name]['attr'] = attr
+
+    def get_node_attr(self, name):
+        if name in self.graph.keys():
+            return self.graph[name]['attr']
+        else:
+            return None
+
+    def get_node_type(self, name):
+        if name in self.graph.keys() and 'attr' in self.graph[name].keys():
+            return self.graph[name]['attr']['type']
+        else:
+            print(name, self.graph[name])
+            return None
+
+    def plot_graphs(self, comment='Network Grapher View'):
+        from graphviz import Digraph
+
+        dot = Digraph(comment=comment)
+        for (key, value) in self.graph.items():
+            dot.node(key, key)
+            if 'inbounds' in value.keys():
+                for node in value['inbounds']:
+                    dot.edge(node, key, label=', '.join(str(x)
+                                                        for x in value['attr']['output_shape']))
+        dot.render('graph.gv', view=False)
+
+    def plot_networkx_graph(self):
+        import matplotlib.pyplot as plt
+        import networkx as nx
+
+        plt.subplot(121)
+        nx.draw(
+            self.get_networkx_graph(),
+            with_labels=True,
+            font_weight='bold')
+        plt.show()
+
+    def get_networkx_graph(self):
+        import networkx as nx
+        G = nx.MultiDiGraph()
+        for (key, value) in self.graph.items():
+            G.add_node(
+                key,
+                type=value['attr']['type'],
+                **value['attr']['attr'])
+            if 'inbounds' in value.keys():
+                for node in value['inbounds']:
+                    G.add_edge(node, key)
+        self.graphx = G
+        return G
+
+    def match_isomorph_vf2(self):
+        pass
+
+    def find_subgraphs(self, sub_graph, match_func):
+        from networkx.algorithms import isomorphism as iso
+
+        GM = iso.MultiDiGraphMatcher(
+            self.get_networkx_graph(),
+            sub_graph.get_networkx_graph(),
+            node_match=match_func)
+        return list(GM.subgraph_isomorphisms_iter())
+
+    def find_weight_roots(self, layer_name):
+        weight_roots = []
+        weights_nodes = []
+        for inbound in self.graph[layer_name]['inbounds']:
+            if self.graph[inbound]['attr']['type'] == 'Identity' \
+                    and len(self.graph[inbound]['inbounds']) == 1:
+                if self.graph[self.graph[inbound]['inbounds']
+                              [0]]['attr']['type'] == 'Const':
+                    weight_roots.append(inbound)
+                    weights_nodes.append(inbound)
+                    weights_nodes.append(self.graph[inbound]['inbounds'][0])
+
+            if self.graph[inbound]['attr']['type'] == 'Const' \
+                    and len(self.graph[inbound]['inbounds']) == 0:
+                weight_roots.append(inbound)
+                weights_nodes.append(inbound)
+
+        return weight_roots, weights_nodes
+
+    def get_subgraphs(self, sub_graph, match_func):
+        import tensorflow as tf
+        import copy
+
+        fetched_subgraphs = self.find_subgraphs(sub_graph, match_func)
+        tar_sub_graphs = []
+        for sub_fetch_graph in fetched_subgraphs:
+            tar_sub_graphs.append(tf.GraphDef())
+
+            for op_entry in sub_fetch_graph.keys():
+                # --- Repleace dummy op ---
+                if sub_graph.get_graph()[
+                        sub_fetch_graph[op_entry]]['attr']['type'] == 'dummy':
+                    dummy_op = tar_sub_graphs[-1].node.add()
+                    dummy_op.op = "Identity"
+                    dummy_op.name = sub_fetch_graph[op_entry]
+                    dummy_op.input.extend(
+                        sub_graph.get_graph()[
+                            sub_fetch_graph[op_entry]]['inbounds'])
+                    dummy_op.attr['T'].type = 1
+                    # if 'graph_head' in sub_graph.get_graph()[sub_fetch_graph[op_entry]]['attr']['attr']:
+                    #     dummy_op.attr['shape'] = []
+                    #     dummy_op.attr['shape'].dim = list(map(int, sub_graph.get_graph()[sub_fetch_graph[op_entry]]['attr']['attr']['graph_head'].split(',')))
+                    # print(dummy_op)
+                else:
+                    # --- Fetch the main op ---
+                    node = copy.deepcopy(self.graph[op_entry]['attr']['node'])
+
+                    node.name = sub_fetch_graph[op_entry]
+
+                    del node.input[:]
+                    node.input.extend(
+                        sub_graph.get_graph()[
+                            sub_fetch_graph[op_entry]]['inbounds'])
+                    # --- Fetch the constant op ---
+                    roots, nodes = self.find_weight_roots(op_entry)
+                    for weight_root in roots:
+                        node.input.append(weight_root)
+
+                    for weight_node in nodes:
+                        tar_sub_graphs[-1].node.append(
+                            self.graph[weight_node]['attr']['node'])
+
+                    tar_sub_graphs[-1].node.append(node)
+
+            #tf.io.write_graph(tar_sub_graphs[-1], '', 'a.pb')
+        return tar_sub_graphs
+
+    def dump_json(self, filename):
+        with open(filename, 'w+') as fp:
+            json.dump(
+                self.graph,
+                fp,
+                indent=4,
+                skipkeys=True,
+                sort_keys=True,
+                cls=NumpyEncoder)
--- a/ir_converters/frozenpb_converter/protobuf_helper.py
+++ b/ir_converters/frozenpb_converter/protobuf_helper.py
@ -0,0 +1,103 @@
+import logging
+logging = logging.getLogger(__name__)
+
+
+class ProtobufHelper:
+    @staticmethod
+    def get_w(x):
+        l = len(x)
+        if l == 4:
+            return x[1]
+        if l == 2:
+            return x[0]
+        return None
+
+    @staticmethod
+    def get_h(x):
+        l = len(x)
+        if l == 4:
+            return x[2]
+        if l == 2:
+            return x[1]
+        return None
+
+    @staticmethod
+    def find_weights_root(graph, node):
+        NODE_WEIGHT_LUT = {
+            'Conv2D': [
+                lambda x: x.replace('/Conv2D', '/weight'),
+                lambda x: x.replace('/Conv2D', '/kernel'),
+            ],
+            'DepthwiseConv2dNative': [
+                lambda x: x.replace('/depthwise', '/weight')
+            ],
+            'BiasAdd': [
+                lambda x: x.replace('/BiasAdd', '/bias'),
+            ],
+            'FusedBatchNorm': [
+                lambda x: x.replace('/FusedBatchNormV3', '/gamma'),
+                lambda x: x.replace('/FusedBatchNormV3', '/beta'),
+                lambda x: x.replace('/FusedBatchNormV3', '/moving_mean'),
+                lambda x: x.replace('/FusedBatchNormV3', '/moving_variance')
+            ],
+            'MatMul': [
+                lambda x: x.replace('/MatMul', '/weight'),
+            ]
+        }
+
+        weight_name = []
+        if node['attr']['type'] in NODE_WEIGHT_LUT.keys():
+            for lut_lamba in NODE_WEIGHT_LUT[node['attr']['type']]:
+                weight_op = lut_lamba(node['attr']['name'])
+                if weight_op in graph.keys(
+                ) and graph[weight_op]['attr']['type'] != 'Identity':
+                    logging.info(
+                        'Find node %s with its weight op %s.' %
+                        (node['attr']['name'], weight_op))
+                    weight_name.append(weight_op)
+
+        return weight_name
+
+    @staticmethod
+    def get_graph_seq(x):
+        graph_head = x.get_graph_head()
+        seen = set()
+        stack = []
+        order = []
+        q = [graph_head[0]]
+        for head in graph_head:
+            q = [head]
+            while q:
+                v = q.pop()
+                if v not in seen:
+                    seen.add(v)
+                    q.extend(x.get_node_outbounds(v))
+
+                    while stack and v not in x.get_node_outbounds(
+                            stack[-1]):
+                        order.append(stack.pop())
+                    stack.append(v)
+        return stack + order[::-1]
+
+    @staticmethod
+    def pkg42dec(x):
+        total_byte = len(x) // 4
+        assert(total_byte * 4 == len(x))
+
+        num = []
+        for idx in range(total_byte):
+            num.append(0)
+            for i in range(4):
+                num[-1] += x[idx * 4 + i] << (i * 8)
+            if num[-1] == 4294967295:
+                num[-1] = -1
+
+        return num
+
+    @staticmethod
+    def get_tensor_value(x):
+        DTYPE_ENUM = {
+            1: lambda x: list(map(float, x.float_val)),
+            3: lambda x: list(map(int, x.int_val))
+        }
+        return DTYPE_ENUM[x.dtype](x)
--- a/ir_converters/frozenpb_converter/shape_fetcher.py
+++ b/ir_converters/frozenpb_converter/shape_fetcher.py
@ -0,0 +1,49 @@
+import tensorflow as tf
+import numpy as np
+from typing import List
+
+
+class ShapeFetcher:
+
+    def get_nodes_with_input_tensor(self, tensor):
+        return list(filter(
+            lambda op: (tensor in op.inputs) and (op.type not in ["Shape"]),
+            self.graph.get_operations()
+        ))
+
+    def __init__(self, input_graph):
+        tf.compat.v1.disable_eager_execution()
+
+        graph = tf.Graph()
+
+        with graph.as_default():
+            tf.import_graph_def(graph_def=input_graph, name="")
+
+        ops = graph.get_operations()
+        placeholders = list(filter(lambda op: op.type == "Placeholder", ops))
+        assert len(placeholders) == 1
+        graph_input_tensor = placeholders[0].outputs[0]
+        graph_input_tensor_shape = graph_input_tensor.get_shape().as_list()
+        assert graph_input_tensor_shape[1] == graph_input_tensor_shape[2]
+        assert graph_input_tensor_shape[3] == 3
+        self.imsize = graph_input_tensor_shape[1]
+        self.graph: tf.Graph = graph
+
+        tensors_to_fetch: List[tf.Tensor] = []
+        for op in filter(lambda op: op.type not in [], ops):
+            tensors_to_fetch.extend(op.inputs)
+            tensors_to_fetch.extend(op.outputs)
+
+        shape_tensors = dict()
+        for tensor in tensors_to_fetch:
+            shape_tensors[tensor.name] = tf.compat.v1.shape(tensor)
+        self.shape_results = dict()
+
+        with tf.compat.v1.Session(graph=graph) as sess:
+            fake_input = np.random.randn(1, self.imsize, self.imsize, 3)
+            for tensor_name, shape_tensor in shape_tensors.items():
+                self.shape_results[tensor_name] = sess.run(
+                    shape_tensor, feed_dict={
+                        graph_input_tensor: fake_input
+                    }
+                )
--- a/ir_converters/frozenpb_converter/shape_inference.py
+++ b/ir_converters/frozenpb_converter/shape_inference.py
@ -0,0 +1,495 @@
+from .protobuf_helper import ProtobufHelper as ph
+from functools import reduce
+import copy
+import logging
+logging = logging.getLogger(__name__)
+
+
+class ShapeInference:
+
+    @staticmethod
+    def eval_prodcast(grapher, node):
+        input_nodes = node['inbounds']
+        if len(input_nodes) < 2:
+            logging.warn(
+                'Invalid input op num for prodcast op %s' %
+                (node['name']))
+            if len(input_nodes) == 1:
+                return grapher[node['inbounds'][0]]['attr']['output_shape'][0]
+            else:
+                return None
+
+        target_dim = -1
+        target_shape = [1]
+        input_shape_list = []
+        for node_name in input_nodes:
+            input_shape = grapher[node_name]['attr']['output_shape'][0]
+            input_shape_list.append(input_shape)
+            if target_dim < len(input_shape):
+                target_dim = len(input_shape)
+                target_shape = input_shape
+            elif target_dim == len(input_shape):
+                for i in range(target_dim):
+                    if target_shape[i] < input_shape[i]:
+                        target_shape[i] = input_shape[i]
+
+        #     if target_dim < len(input_shape):
+        #         for i in range(len(input_shape)):
+        #             if target_shape[i] == 1 or target_shape[i] == input_shape[i]:
+        #                 target_dim = len(input_shape)
+        #                 target_shape = input_shape
+        #             else:
+        #                 logging.warn('Invalid prodcast shape between %s and %s(%s).'
+        #                     % (str(target_shape), str(input_shape), node_name))
+        #                 return None
+
+        #         logging.warn('Prodcast from %s to %s(%s).' % (str(target_shape), str(input_shape), node_name))
+
+        # for node_name in input_nodes:
+        #     input_shape = grapher[node_name]['attr']['output_shape'][0]
+        #     if largest_dim == len(input_shape):
+        #         prodcast_shape[node_name] = input_shape
+
+        # for node_name, shape in prodcast_shape.items():
+        #     if shape != prodcast_shape[prodcast_node_name]:
+        #         logging.warn('Invalid prodcast shape between %s(%s) and %s(%s).'
+        #             % (node_name, str(shape),
+        #                 prodcast_node_name, prodcast_shape[prodcast_node_name]))
+        #         return None
+
+        return input_shape_list, [target_shape]
+
+    @staticmethod
+    def Const_get_shape(grapher, node):
+        return [], [node['attr']['attr']['tensor_shape']]
+
+    @staticmethod
+    def Identity_get_shape(grapher, node):
+        return [], [grapher[node['inbounds'][0]]['attr']['output_shape'][0]]
+
+    @staticmethod
+    def propogate_shape(grapher, node):
+        in_shape = [grapher[node['inbounds'][0]]['attr']['output_shape'][0]]
+        return in_shape, in_shape
+
+    @staticmethod
+    def FusedBatchNorm_get_shape(grapher, node):
+        return ShapeInference.propogate_shape(grapher, node)
+
+    @staticmethod
+    def BiasAdd_get_shape(grapher, node):
+        return ShapeInference.propogate_shape(grapher, node)
+
+    @staticmethod
+    def Relu_get_shape(grapher, node):
+        return ShapeInference.propogate_shape(grapher, node)
+
+    @staticmethod
+    def Relu6_get_shape(grapher, node):
+        return ShapeInference.propogate_shape(grapher, node)
+
+    @staticmethod
+    def LeakyReLU_get_shape(grapher, node):
+        return ShapeInference.propogate_shape(grapher, node)
+
+    @staticmethod
+    def Add_get_shape(grapher, node):
+        return ShapeInference.eval_prodcast(grapher, node)
+
+    @staticmethod
+    def Mul_get_shape(grapher, node):
+        return ShapeInference.eval_prodcast(grapher, node)
+
+    @staticmethod
+    def Pool_get_shape(grapher, node):
+        if len(node['inbounds']) != 1:
+            logging.warning(
+                'Failed to get input node of %s.' %
+                (node['attr']['name']))
+            logging.info(node)
+            return
+
+        input_shape = grapher[node['inbounds'][0]]['attr']['output_shape'][0]
+        logging.info(
+            'Get input shape of %s from %s, input shape:%s.' %
+            (node['attr']['name'], node['inbounds'][0], input_shape))
+
+        k_size = node['attr']['attr']['ksize']
+
+        if node['attr']['attr']['strides'][::3] != [1, 1]:
+            logging.warning(
+                'Invalid strides %s of node %s.' %
+                (str(
+                    node['attr']['attr']['strides']),
+                    node['attr']['name']))
+            logging.info(node)
+            return
+
+        strides = node['attr']['attr']['strides']
+        padding = node['attr']['attr']['padding'].decode('utf-8')
+        logging.info('Op:%s, stride:%s, padding:%s.' %
+                     (node['attr']['name'], str(strides), str(padding)))
+
+        if padding == 'SAME':
+            wpad = ph.get_w(strides) - 1
+            hpad = ph.get_h(strides) - 1
+        else:
+            wpad = 0
+            hpad = 0
+        padded_shape = [
+            ph.get_w(input_shape) + wpad,
+            ph.get_h(input_shape) + hpad]
+        logging.info('Op:%s, padding:%s, padded shape:%s.' %
+                     (node['attr']['name'], str([wpad, hpad]), str(padded_shape)))
+
+        outw = int(ph.get_w(input_shape) - ph.get_w(k_size)) / \
+            ph.get_w(strides) + 1
+        outh = int(ph.get_h(input_shape) - ph.get_w(k_size)) / \
+            ph.get_h(strides) + 1
+
+        output_shape = list(
+            map(int, [input_shape[0], outh, outw, input_shape[3]]))
+        return [input_shape], [output_shape]
+
+    @staticmethod
+    def AvgPool_get_shape(grapher, node):
+        return ShapeInference.Pool_get_shape(grapher, node)
+
+    @staticmethod
+    def AveragePooling2D_get_shape(grapher, node):
+        return ShapeInference.Pool_get_shape(grapher, node)
+
+    @staticmethod
+    def MaxPool_get_shape(grapher, node):
+        return ShapeInference.Pool_get_shape(grapher, node)
+
+    @staticmethod
+    def MaxPooling2D_get_shape(grapher, node):
+        return ShapeInference.Pool_get_shape(grapher, node)
+
+    @staticmethod
+    def Placeholder_get_shape(grapher, node):
+        return [], [node['attr']['attr']['shape']]
+
+    @staticmethod
+    def Conv2D_get_shape(grapher, node):
+        weight_node = ph.find_weights_root(grapher, node)
+        if len(weight_node) != 1:
+            logging.warning(
+                'Failed to get shape of node %s.' %
+                (node['attr']['name']))
+            logging.info(node)
+            return
+
+        input_node = [x for x in node['inbounds'] if x != weight_node]
+        input_node = [x for x in input_node if grapher[x]
+                      ['attr']['type'] != 'Identity']
+        if len(input_node) != 1:
+            logging.warning(
+                'Failed to get input node of %s.' %
+                (node['attr']['name']))
+            logging.info(node)
+            return
+
+        input_shape = grapher[input_node[0]]['attr']['output_shape'][0]
+        logging.info(
+            'Get input shape of %s from %s, input shape:%s.' %
+            (node['attr']['name'], input_node[0], input_shape))
+
+        weight_shape = grapher[weight_node[0]]['attr']['attr']['tensor_shape']
+        if len(weight_shape) != 4:
+            logging.warning(
+                'Failed to parse weight shape %s of node %s.' %
+                (str(weight_shape), node['attr']['name']))
+            logging.info(node)
+            return
+
+        logging.info(
+            'Get weight shape of %s from %s, input shape:%s.' %
+            (node['attr']['name'], weight_node, weight_shape))
+
+        k_size = weight_shape[:2]
+        cin = weight_shape[2]
+        cout = weight_shape[3]
+
+        if node['attr']['attr']['strides'][::3] != [1, 1]:
+            logging.warning(
+                'Invalid strides %s of node %s.' %
+                (str(
+                    node['attr']['attr']['strides']),
+                    node['attr']['name']))
+            logging.info(node)
+            return
+
+        strides = node['attr']['attr']['strides']
+        dilation = node['attr']['attr']['dilations']
+        padding = node['attr']['attr']['padding'].decode('utf-8')
+        logging.info(
+            'Op:%s, stride:%s, dilation:%s, padding:%s.' %
+            (node['attr']['name'], str(strides), str(dilation), str(padding)))
+
+        kernel_extent_w = ph.get_w(dilation) * (ph.get_w(strides) - 1) + 1
+        kernel_extent_h = ph.get_h(dilation) * (ph.get_h(strides) - 1) + 1
+
+        if padding == 'SAME':
+            wpad = kernel_extent_w + int((ph.get_w(input_shape) - 1) / ph.get_w(
+                dilation)) * ph.get_w(dilation) - ph.get_w(input_shape)
+            hpad = kernel_extent_h + int((ph.get_h(input_shape) - 1) / ph.get_h(
+                dilation)) * ph.get_h(dilation) - ph.get_h(input_shape)
+        else:
+            wpad = 0
+            hpad = 0
+        padded_shape = [
+            ph.get_w(input_shape) + wpad,
+            ph.get_h(input_shape) + hpad]
+        logging.info('Op:%s, kernel_extent:%s, padding:%s, padded shape:%s.' % (node['attr']['name'], str(
+            [kernel_extent_w, kernel_extent_h]), str([wpad, hpad]), str(padded_shape)))
+
+        outw = int(ph.get_w(input_shape) - kernel_extent_w) / \
+            ph.get_w(strides) + 1
+        outh = int(ph.get_h(input_shape) - kernel_extent_h) / \
+            ph.get_h(strides) + 1
+
+        output_shape = list(map(int, [input_shape[0], outh, outw, cout]))
+        return [input_shape], [output_shape]
+
+    @staticmethod
+    def DepthwiseConv2dNative_get_shape(grapher, node):
+        weight_node = ph.find_weights_root(grapher, node)
+        if len(weight_node) != 1:
+            logging.warning(
+                'Failed to get shape of node %s.' %
+                (node['attr']['name']))
+            logging.info(node)
+            return
+
+        input_node = [x for x in node['inbounds'] if x != weight_node]
+        input_node = [x for x in input_node if grapher[x]
+                      ['attr']['type'] != 'Identity']
+        if len(input_node) != 1:
+            logging.warning(
+                'Failed to get input node of %s.' %
+                (node['attr']['name']))
+            logging.info(node)
+            return
+
+        input_shape = grapher[input_node[0]]['attr']['output_shape'][0]
+        logging.info(
+            'Get input shape of %s from %s, input shape:%s.' %
+            (node['attr']['name'], input_node[0], input_shape))
+
+        weight_shape = grapher[weight_node[0]]['attr']['attr']['tensor_shape']
+        if len(weight_shape) != 4:
+            logging.warning(
+                'Failed to parse weight shape %s of node %s.' %
+                (str(weight_shape), node['attr']['name']))
+            logging.info(node)
+            return
+
+        logging.info(
+            'Get weight shape of %s from %s, input shape:%s.' %
+            (node['attr']['name'], weight_node, weight_shape))
+
+        k_size = weight_shape[:2]
+        cin = weight_shape[2]
+
+        if node['attr']['attr']['strides'][::3] != [1, 1]:
+            logging.warning(
+                'Invalid strides %s of node %s.' %
+                (str(
+                    node['attr']['attr']['strides']),
+                    node['attr']['name']))
+            logging.info(node)
+            return
+
+        strides = node['attr']['attr']['strides']
+        dilation = node['attr']['attr']['dilations']
+        padding = node['attr']['attr']['padding'].decode('utf-8')
+        logging.info(
+            'Op:%s, stride:%s, dilation:%s, padding:%s.' %
+            (node['attr']['name'], str(strides), str(dilation), str(padding)))
+
+        kernel_extent_w = ph.get_w(dilation) * (ph.get_w(strides) - 1) + 1
+        kernel_extent_h = ph.get_h(dilation) * (ph.get_h(strides) - 1) + 1
+
+        if padding == 'SAME':
+            wpad = kernel_extent_w + int((ph.get_w(input_shape) - 1) / ph.get_w(
+                dilation)) * ph.get_w(dilation) - ph.get_w(input_shape)
+            hpad = kernel_extent_h + int((ph.get_h(input_shape) - 1) / ph.get_h(
+                dilation)) * ph.get_h(dilation) - ph.get_h(input_shape)
+        else:
+            wpad = 0
+            hpad = 0
+        padded_shape = [
+            ph.get_w(input_shape) + wpad,
+            ph.get_h(input_shape) + hpad]
+        logging.info('Op:%s, kernel_extent:%s, padding:%s, padded shape:%s.' % (node['attr']['name'], str(
+            [kernel_extent_w, kernel_extent_h]), str([wpad, hpad]), str(padded_shape)))
+
+        outw = int(ph.get_w(input_shape) - kernel_extent_w) / \
+            ph.get_w(strides) + 1
+        outh = int(ph.get_h(input_shape) - kernel_extent_h) / \
+            ph.get_h(strides) + 1
+
+        output_shape = list(map(int, [input_shape[0], outh, outw, cin]))
+        return [input_shape], [output_shape]
+
+    @staticmethod
+    def Reduce_get_shape(grapher, node):
+        input_shape = grapher[node['inbounds'][0]]['attr']['output_shape'][0]
+        output_shape = input_shape
+        logging.info(
+            'Get input shape of %s from %s, input shape:%s.' %
+            (node['attr']['name'], node['inbounds'][0], output_shape))
+
+        output_shape[1] = 0
+        output_shape[2] = 0
+
+        reduction_indices = node['attr']['attr']['reduction_indices']
+        logging.info('Get Reduction Indices %s.', str(reduction_indices))
+
+        reduction_cnt = 0
+        for reduction in sorted(reduction_indices):
+            del output_shape[reduction - reduction_cnt]
+            reduction_cnt += 1
+
+        return [input_shape], [output_shape]
+
+    @staticmethod
+    def Mean_get_shape(grapher, node):
+        return ShapeInference.Reduce_get_shape(grapher, node)
+
+    @staticmethod
+    def GlobalAveragePooling2D_get_shape(grapher, node):
+        return ShapeInference.Reduce_get_shape(grapher, node)
+
+    @staticmethod
+    def GlobalMaxPooling2D_get_shape(grapher, node):
+        return ShapeInference.Reduce_get_shape(grapher, node)
+
+    @staticmethod
+    def MatMul_get_shape(grapher, node):
+        weight_node = ph.find_weights_root(grapher, node)
+        if len(weight_node) != 1:
+            logging.warning(
+                'Failed to get shape of node %s.' %
+                (node['attr']['name']))
+            logging.info(node)
+            return
+
+        weight_shape = grapher[weight_node[0]]['attr']['attr']['tensor_shape']
+        if len(weight_shape) != 2:
+            logging.warning(
+                'Failed to parse weight shape %s of node %s.' %
+                (str(weight_shape), node['attr']['name']))
+            logging.info(node)
+            return
+
+        logging.info(
+            'Get weight shape of %s from %s, input shape:%s.' %
+            (node['attr']['name'], weight_node, weight_shape))
+
+        input_node = [x for x in node['inbounds'] if x != weight_node]
+        input_node = [x for x in input_node if grapher[x]
+                      ['attr']['type'] != 'Identity']
+        if len(input_node) != 1:
+            logging.warning(
+                'Failed to get input node of %s.' %
+                (node['attr']['name']))
+            logging.info(node)
+            return
+
+        input_shape = copy.deepcopy(
+            grapher[input_node[0]]['attr']['output_shape'][0])
+        logging.info(
+            'Get input shape of %s from %s, input shape:%s.' %
+            (node['attr']['name'], input_node[0], input_shape))
+
+        if weight_shape[0] != input_shape[1]:
+            logging.warning(
+                'Weight shape and input shape not matched for %s.' %
+                (node['attr']['name']))
+            logging.info(node)
+            return
+
+        output_shape = copy.deepcopy(input_shape)
+        output_shape[1] = weight_shape[1]
+
+        return [input_shape], [output_shape]
+
+    @staticmethod
+    def Reshape_get_shape(grapher, node):
+        input_shape = grapher[node['inbounds'][0]]['attr']['output_shape'][0]
+        exp_output_shape = node['attr']['attr']['shape']
+
+        input_elements = abs(reduce(lambda x, y: x * y, input_shape))
+        exp_output_shape_elements = abs(
+            reduce(lambda x, y: x * y, exp_output_shape))
+
+        if input_elements != exp_output_shape_elements:
+            logging.warning('Input shape %s and output shape %s not matched for %s.' % (
+                str(input_shape, str(output_shape), node['attr']['name'])))
+
+        return [input_shape], [exp_output_shape]
+
+    @staticmethod
+    def Concat_get_shape(grapher, node):
+        input_shape = []
+        for in_node in node['inbounds']:
+            in_shape = grapher[in_node]['attr']['output_shape'][0]
+            if in_shape != []:
+                input_shape.append(in_shape)
+                logging.info('Get input shape of %s from %s, input shape:%s.' % (
+                    node['attr']['name'], in_node, input_shape[-1]))
+
+        axis = node['attr']['attr']['axis'][0]
+
+        output_shape = copy.deepcopy(input_shape[0])
+        for in_shape in input_shape[1:]:
+            output_shape[axis] += in_shape[axis]
+
+        return [input_shape], [output_shape]
+
+    @staticmethod
+    def Concatenate_get_shape(grapher, node):
+        return ShapeInference.Concat_get_shape(grapher, node)
+
+    @staticmethod
+    def ConcatV2_get_shape(grapher, node):
+        return ShapeInference.Concat_get_shape(grapher, node)
+
+    @staticmethod
+    def Split_get_shape(grapher, node):
+        raise NotImplementedError
+
+    @staticmethod
+    def StridedSlice_get_shape(grapher, node):
+        return None, None
+
+    @staticmethod
+    def Pack_get_shape(grapher, node):
+        return None, None
+
+    def __init__(self, grapher):
+        seq = ph.get_graph_seq(grapher)
+        graph = grapher.get_graph()
+        for node_name in seq:
+            node_get_shape_name = grapher.get_node_type(
+                node_name) + '_get_shape'
+            if node_get_shape_name in dir(self):
+                input_shape, output_shape = eval(
+                    'self.' + node_get_shape_name)(graph, graph[node_name])
+                if output_shape is not None:
+                    graph[node_name]['attr']['output_shape'] = output_shape
+                if input_shape is not None:
+                    graph[node_name]['attr']['input_shape'] = input_shape
+                logging.info(
+                    'Input shape of %s op is %s.' %
+                    (node_name, str(input_shape)))
+                logging.info(
+                    'Output shape of %s op is %s.' %
+                    (node_name, str(output_shape)))
+            else:
+                logging.warning(
+                    'Op %s is not support, ignored!' %
+                    grapher.get_node_type(node_name))
--- a/ir_converters/model_to_grapher.py
+++ b/ir_converters/model_to_grapher.py
@ -0,0 +1,23 @@
+from utils import model_file_to_grapher
+import argparse
+import json
+import sys
+
+
+def main(input_model, output_path):
+    result = model_file_to_grapher(input_model)
+
+    if output_path:
+        with open(output_path, 'w') as fp:
+            json.dump(result, fp, indent=4)
+    else:
+        json.dump(result, sys.stdout, indent=4)
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument('-i', '--input_model', type=str, required=True)
+parser.add_argument('-o', '--output_path', type=str, required=False)
+args = parser.parse_args()
+
+if __name__ == '__main__':
+    main(args.input_model, args.output_path)
--- a/ir_converters/onnx_converter/init.py
+++ b/ir_converters/onnx_converter/init.py
@ -0,0 +1 @@
+from .converter import OnnxConverter
--- a/ir_converters/onnx_converter/constants.py
+++ b/ir_converters/onnx_converter/constants.py
@ -0,0 +1,48 @@
+CONV_TYPE = 'Conv'
+BN_TYPE = 'BatchNormalization'
+SLICE_TYPE = 'Slice'
+CONCAT_TYPE = 'Concat'
+MAXPOOL_TYPE = 'MaxPool'
+AVGPOOL_TYPE = 'AveragePool'
+RELU_TYPE = 'Relu'
+ADD_TYPE = 'Add'
+FC_TYPE = 'Gemm'
+RESHAPE_TYPE = 'Reshape'
+GAP_TYPE = 'GlobalAveragePool'
+CLIP_TYPE = 'Clip'
+MUL_TYPE = 'Mul'
+DIV_TYPE = 'Div'
+HARDSIGMOID_TYPE = 'HardSigmoid'
+FLATTEN_TYPE = 'Flatten'
+TRANSPOSE_TYPE = 'Transpose'
+REDUCEMEAN_TYPE = 'ReduceMean'
+SPLIT_TYPE = 'Split'
+PAD_TYPE = 'Pad'
+
+OP_ALIAS = {
+    CONV_TYPE: 'conv',
+    BN_TYPE: 'bn',
+    SLICE_TYPE: 'split',
+    CONCAT_TYPE: 'concat',
+    MAXPOOL_TYPE: 'maxpool',
+    AVGPOOL_TYPE: 'avgpool',
+    RELU_TYPE: 'relu',
+    ADD_TYPE: 'add',
+    FC_TYPE: 'fc',
+    RESHAPE_TYPE: 'reshape',
+    GAP_TYPE: 'gap',
+    CLIP_TYPE: 'clip',
+    MUL_TYPE: 'mul',
+    DIV_TYPE: 'div',
+    HARDSIGMOID_TYPE: 'hardsigmoid',
+    FLATTEN_TYPE: 'flatten',
+    TRANSPOSE_TYPE: 'transpose',
+    REDUCEMEAN_TYPE: 'reducemean',
+    SPLIT_TYPE: 'split',
+    PAD_TYPE: 'pad',
+}
+
+ATTR_ALIAS = {
+    'pads': ('padding', '__all__'),
+    'axis': ('split_dim', ['split']),
+}
--- a/ir_converters/onnx_converter/converter.py
+++ b/ir_converters/onnx_converter/converter.py
@ -0,0 +1,137 @@
+import networkx as nx
+from .utils import get_tensor_shape, convert_attr
+from .constants import *
+from itertools import chain
+import logging
+from onnx import AttributeProto, shape_inference
+
+
+class OnnxConverter:
+    def __init__(self, model):
+        inferred_model = shape_inference.infer_shapes(model)
+        self.graph = inferred_model.graph
+
+        self.tensors = {}
+        for tensor in chain(self.graph.input, self.graph.value_info, self.graph.output):
+            self.tensors[tensor.name] = {
+                'shape': get_tensor_shape(tensor),
+                'inputs': [],
+                'outputs': [],
+            }
+
+        for node in self.graph.node:
+            for input_name in node.input:
+                if input_name in self.tensors:
+                    self.tensors[input_name]['outputs'].append(node)
+            for output_name in node.output:
+                if output_name in self.tensors:
+                    self.tensors[output_name]['inputs'].append(node)
+
+        self.G = self.to_networkx()
+
+    def to_networkx(self):
+        G = nx.DiGraph()
+
+        sliced_tensors = set()
+        selected_slice = set()
+        for node in self.graph.node:
+            if node.op_type == SLICE_TYPE:
+                tensor = node.input[0]
+                if tensor in sliced_tensors:
+                    continue
+                else:
+                    sliced_tensors.add(tensor)
+                    selected_slice.add(node.name)
+            G.add_node(node.name, **self.fetch_attrs(node))
+
+        for node in self.graph.node:
+            if node.op_type == SLICE_TYPE and node.name not in selected_slice:
+                continue
+            for input_name in node.input:
+                if input_name in self.tensors: # remove dummy ops
+                    G.add_edge(input_name, node.name)
+            for output_name in node.output:
+                if output_name in self.tensors:
+                    G.add_edge(node.name, output_name)
+                if node.op_type == SLICE_TYPE:
+                    for tensor_name in self._get_sibling_slice_output_tensors(node):
+                        G.add_edge(node.name, tensor_name)
+
+        return G
+
+    def fetch_attrs(self, node):
+        attrs = {}
+        input_tensors = []
+        for input_name in node.input:
+            if input_name in self.tensors:
+                input_tensors.append(self.tensors[input_name]['shape'])
+        output_tensors = []
+        for output_name in node.output:
+            if output_name in self.tensors:
+                output_tensors.append(self.tensors[output_name]['shape'])
+        if node.op_type == SLICE_TYPE:
+            for tensor_name in self._get_sibling_slice_output_tensors(node):
+                output_tensors.append(self.tensors[tensor_name]['shape'])
+        if len(input_tensors) == 0 or len(input_tensors[0]) <= 1 or len(output_tensors) == 0 or len(output_tensors[0]) <= 1:
+            return attrs
+
+        if node.op_type not in OP_ALIAS:
+            logging.warning(f'Unsupported OP: {node.op_type}')
+
+        attrs['attr'] = {}
+        attrs['type'] = OP_ALIAS.get(node.op_type, node.op_type)
+        attrs['input_shape'] = input_tensors
+        attrs['output_shape'] = output_tensors
+        for attr in node.attribute:
+            if attr.type == AttributeProto.FLOAT:
+                attrs['attr'][attr.name] = attr.f
+            elif attr.type == AttributeProto.INT:
+                attrs['attr'][attr.name] = attr.i
+            elif attr.type == AttributeProto.INTS:
+                attrs['attr'][attr.name] = list(attr.ints)
+            elif attr.type == AttributeProto.STRING:
+                attrs['attr'][attr.name] = str(attr.s)
+            else:
+                logging.warning(f'Unsupported attributes type: {attr.type}')
+
+        return attrs
+
+    def convert(self):
+        result = {}
+
+        for node in self.G.nodes:
+            node_attrs = self.G.nodes[node]
+            if node in self.tensors or not node_attrs:
+                continue
+            node_attrs['attr'] = convert_attr(node_attrs['attr'], node_attrs['type'])
+
+            outbounds = []
+            inbounds = []
+            for successor in self.G.successors(node):
+                try:
+                    outbounds.append(next(self.G.successors(successor)))
+                except StopIteration:
+                    pass
+            for predecessor in self.G.predecessors(node):
+                try:
+                    inbounds.append(next(self.G.predecessors(predecessor)))
+                except StopIteration:
+                    pass
+
+            result[node] = {
+                'attr': node_attrs,
+                'outbounds': outbounds,
+                'inbounds': inbounds,
+            }
+
+        return result
+
+    def _get_sibling_slice_output_tensors(self, node):
+        output_tensors = []
+        for slice in self.tensors[node.input[0]]['outputs']:
+            if slice.name != node.name and slice.op_type == SLICE_TYPE:
+                for output_name in slice.output:
+                    if output_name in self.tensors:
+                        output_tensors.append(output_name)
+
+        return output_tensors
--- a/ir_converters/onnx_converter/utils.py
+++ b/ir_converters/onnx_converter/utils.py
@ -0,0 +1,31 @@
+from .constants import *
+
+
+def get_tensor_shape(tensor):
+    shape = []
+    for dim in tensor.type.tensor_type.shape.dim:
+        shape.append(dim.dim_value)
+    if len(shape) == 4:
+        shape = [shape[0], shape[2], shape[3], shape[1]]
+    return shape
+
+
+def convert_attr(attr, type):
+    def is_type(type, ts):
+        if ts is None:
+            return False
+        elif ts == '__all__':
+            return True
+        else:
+            return type in ts
+
+    new_attr = {}
+
+    for name, value in attr.items():
+        new_name, ts = ATTR_ALIAS.get(name, (name, None))
+        if is_type(type, ts):
+            new_attr[new_name] = value
+        else:
+            new_attr[name] = value
+
+    return new_attr
--- a/ir_converters/utils.py
+++ b/ir_converters/utils.py
@ -0,0 +1,41 @@
+import onnx
+from onnx_converter import OnnxConverter
+from frozenpb_converter import FrozenPbConverter
+
+
+def model_to_grapher(model, model_type=None):
+    if model_type is None:
+        if isinstance(model, onnx.ModelProto):
+            model_type = 'onnx'
+        else:
+            raise ValueError(f'Invalid model: {type(model)}')
+
+    if model_type == 'onnx':
+        converter = OnnxConverter(model)
+        result = converter.convert()
+    elif model_type == 'pb':
+        raise NotImplementedError
+    else:
+        raise ValueError(f'Unsupported model type: {model_type}')
+
+    return result
+
+
+def model_file_to_grapher(filename, model_type=None):
+    if model_type is None:
+        if filename.endswith('.onnx'):
+            model_type = 'onnx'
+        elif filename.endswith('.pb'):
+            converter = FrozenPbConverter(filename)
+            return converter.get_flatten_grapher()
+        else:
+            raise ValueError(f'Unknown file type: {filename}')
+
+    if model_type == 'onnx':
+        model = onnx.load(filename)
+    elif model_type == 'pb':
+        raise NotImplementedError
+    else:
+        raise ValueError(f'Unsupported model type: {model_type}')
+
+    return model_to_grapher(model, model_type)
--- a/kerneldetection/README.md
+++ b/kerneldetection/README.md
@ -0,0 +1,21 @@
+# DAGSplitter
+
+Split pb models into kernels on given device
+
+## Prerequisite
+
+Please first use the tool `ruletest` provided by us to generate the rulefiles (or you can choose to handcraft the files), and replace `rulelib/rules` (default rulefiles are presented there).
+
+## Installation
+
+```
+pip install -r requirements.txt
+```
+
+## Usage
+
+Input models can be either json or pb. Please refer to `/data/raw.json` for json format.
+To output readable results:
+```
+python main.py -i INPUT_MODELS [INPUT_MODELS ...] -f readable
+```
--- a/kerneldetection/frozenpb_parser.py
+++ b/kerneldetection/frozenpb_parser.py
@ -0,0 +1,79 @@
+import tensorflow as tf
+from google.protobuf import text_format
+from tensorflow import gfile
+from tensorflow import io
+
+from shape_fetcher import ShapeFetcher
+
+class FrozenPbParser:
+    def __init__(self, pb_file):
+        f = open(pb_file, 'rb')
+        graph = tf.GraphDef()
+        graph.ParseFromString(f.read())
+
+        self.graph = graph
+
+    def find_weights_root(self, node, shape_fetcher):
+        if shape_fetcher == None:
+            return None
+
+        if node.op == 'Conv2D':
+            weight_name = [node.name.replace('/Conv2D', '/weight/read'), node.name.replace('/Conv2D', '/kernel')]
+        elif node.op == 'DepthwiseConv2dNative':
+            weight_name = [node.name.replace('/depthwise', '/weight/read')]
+        elif node.op == 'MatMul':
+            weight_name = [node.name.replace('/MatMul',' /weight/read')]
+        else:
+            return None
+
+        for target_node in self.graph.node:
+            if target_node.name in weight_name:
+                return [int(i) for i in shape_fetcher.shape_results[target_node.name + ':0']]
+
+    def fetch_attr_to_dict(self, node, shape_fetcher):
+        attr_dict = {}
+
+        list_i_nodes = ['dilations', 'strides', 'ksize']
+        str_nodes = ['padding', 'data_format']
+
+        for attr_name in node.attr.keys():
+            if attr_name in list_i_nodes:
+                attr_dict[attr_name] = [int(a) for a in node.attr[attr_name].list.i]
+                continue
+
+            if attr_name in str_nodes:
+                attr_dict[attr_name] = str(node.attr[attr_name].s)
+                continue
+
+            if attr_name == 'value':
+                shape = []
+                for dim in node.attr[attr_name].tensor.tensor_shape.dim:
+                    shape.append(dim.size)
+                attr_dict['tensor_shape'] = list(map(int, shape))
+                continue
+
+            if attr_name == 'shape':
+                shape = []
+                for dim in node.attr[attr_name].shape.dim:
+                    shape.append(dim.size)
+                attr_dict['shape'] = list(map(int, shape))
+                continue
+
+        attr_dict['weight_shape'] = self.find_weights_root(node, shape_fetcher)
+
+
+        return attr_dict
+
+
+    def parse_graph(self, graph_helper, required_shape=False, insert_node=False):
+        if required_shape:
+            shape_fetcher = ShapeFetcher(self.graph)
+
+        for node in self.graph.node:
+            graph_helper.node(str(node.name), list(map(str,node.input)))
+            graph_helper.set_node_attr(node.name, {
+                'type': str(node.op),
+                'output_shape': [int(i) for i in shape_fetcher.shape_results[node.name + ':0']] if required_shape else [],
+                'attr': self.fetch_attr_to_dict(node, shape_fetcher if required_shape else None),
+                'node': node if insert_node else None
+            })
--- a/kerneldetection/fusion_lib/init.py
+++ b/kerneldetection/fusion_lib/init.py
--- a/kerneldetection/fusion_lib/add-relu_fusionunit.json
+++ b/kerneldetection/fusion_lib/add-relu_fusionunit.json
@ -0,0 +1,20 @@
+{
+    "add_1": {
+        "attr": {
+            "type": "Add",
+            "attr": {}
+        },
+        "inbounds": [
+        ]
+    },
+
+    "relu_2": {
+        "attr": {
+            "type": "ReLU",
+            "attr": {}
+        },
+        "inbounds": [
+            "add_1"
+        ]
+    }
+}
--- a/kerneldetection/fusion_lib/bn-relu_fusionunit.json
+++ b/kerneldetection/fusion_lib/bn-relu_fusionunit.json
@ -0,0 +1,20 @@
+{
+    "bn_1": {
+        "attr": {
+            "type": "FusedBatchNorm",
+            "attr": {}
+        },
+        "inbounds": [
+        ]
+    },
+
+    "relu_2": {
+        "attr": {
+            "type": "ReLU",
+            "attr": {}
+        },
+        "inbounds": [
+            "bn_1"
+        ]
+    }
+}
--- a/kerneldetection/fusion_lib/channelshuffle_fusionunit.json
+++ b/kerneldetection/fusion_lib/channelshuffle_fusionunit.json
@ -0,0 +1,112 @@
+{
+    "Reshape": {
+        "attr": {
+            "attr": {
+                "weight_shape": null
+            },
+            "node": null,
+            "shape": [],
+            "type": "Reshape"
+        },
+        "inbounds": [
+            "packed"
+        ],
+        "outbounds": [
+            "transpose"
+        ]
+    },
+    "packed": {
+        "attr": {
+            "attr": {
+                "weight_shape": null
+            },
+            "node": null,
+            "shape": [],
+            "type": "Pack"
+        },
+        "inbounds": [
+            "strided_slice"
+        ],
+        "outbounds": [
+            "Reshape"
+        ]
+    },
+    "strided_slice": {
+        "attr": {
+            "attr": {
+                "weight_shape": null
+            },
+            "node": null,
+            "shape": [],
+            "type": "StridedSlice"
+        },
+        "inbounds": [
+        ],
+        "outbounds": [
+            "packed"
+        ]
+    },
+    "transpose": {
+        "attr": {
+            "attr": {
+                "weight_shape": null
+            },
+            "node": null,
+            "shape": [],
+            "type": "Transpose"
+        },
+        "inbounds": [
+            "Reshape"
+        ],
+        "outbounds": [
+            "Reshape_1"
+        ]
+    },
+    "Reshape_1": {
+        "attr": {
+            "attr": {
+                "weight_shape": null
+            },
+            "node": null,
+            "shape": [],
+            "type": "Reshape"
+        },
+        "inbounds": [
+            "transpose",
+            "packed_1"
+        ],
+        "outbounds": [
+        ]
+    },
+    "packed_1": {
+        "attr": {
+            "attr": {
+                "weight_shape": null
+            },
+            "node": null,
+            "shape": [],
+            "type": "Pack"
+        },
+        "inbounds": [
+            "strided_slice_1"
+        ],
+        "outbounds": [
+            "Reshape_1"
+        ]
+    },
+    "strided_slice_1": {
+        "attr": {
+            "attr": {
+                "weight_shape": null
+            },
+            "node": null,
+            "shape": [],
+            "type": "StridedSlice"
+        },
+        "inbounds": [
+        ],
+        "outbounds": [
+            "packed_1"
+        ]
+    }
+}
--- a/kerneldetection/fusion_lib/conv-bn-relu_fusionunit.json
+++ b/kerneldetection/fusion_lib/conv-bn-relu_fusionunit.json
@ -0,0 +1,30 @@
+{
+    "conv_1": {
+        "attr": {
+            "type": "Conv2D",
+            "attr": {}
+        },
+        "inbounds": [
+        ]
+    },
+
+    "bn_2": {
+        "attr": {
+            "type": "FusedBatchNorm",
+            "attr": {}
+        },
+        "inbounds": [
+            "conv_1"
+        ]
+    },
+
+    "relu_3": {
+        "attr": {
+            "type": "ReLU",
+            "attr": {}
+        },
+        "inbounds": [
+            "bn_1"
+        ]
+    }
+}
--- a/kerneldetection/fusion_lib/conv-bn_fusionunit.json
+++ b/kerneldetection/fusion_lib/conv-bn_fusionunit.json
@ -0,0 +1,20 @@
+{
+    "conv_1": {
+        "attr": {
+            "type": "Conv2D",
+            "attr": {}
+        },
+        "inbounds": [
+        ]
+    },
+
+    "bn_2": {
+        "attr": {
+            "type": "FusedBatchNorm",
+            "attr": {}
+        },
+        "inbounds": [
+            "conv_1"
+        ]
+    }
+}
--- a/kerneldetection/fusion_lib/dwconv-bn-relu_fusionunit.json
+++ b/kerneldetection/fusion_lib/dwconv-bn-relu_fusionunit.json
@ -0,0 +1,30 @@
+{
+    "dwconv_1": {
+        "attr": {
+            "type": "DepthwiseConv2dNative",
+            "attr": {}
+        },
+        "inbounds": [
+        ]
+    },
+
+    "bn_2": {
+        "attr": {
+            "type": "FusedBatchNorm",
+            "attr": {}
+        },
+        "inbounds": [
+            "dwconv_1"
+        ]
+    },
+
+    "relu_3": {
+        "attr": {
+            "type": "ReLU",
+            "attr": {}
+        },
+        "inbounds": [
+            "bn_1"
+        ]
+    }
+}
--- a/kerneldetection/fusion_lib/elewise_fusionunit.json
+++ b/kerneldetection/fusion_lib/elewise_fusionunit.json
@ -0,0 +1,20 @@
+{
+    "Elewise_1": {
+        "attr": {
+            "type": "Elewise",
+            "attr": {}
+        },
+        "inbounds": [
+        ]
+    },
+
+    "Elewise_2": {
+        "attr": {
+            "type": "Elewise",
+            "attr": {}
+        },
+        "inbounds": [
+            "Elewise_1"
+        ]
+    }
+}
--- a/kerneldetection/fusion_lib/global-avgpool_fusionunit.json
+++ b/kerneldetection/fusion_lib/global-avgpool_fusionunit.json
@ -0,0 +1,20 @@
+{
+    "mean_1": {
+        "attr": {
+            "type": "Mean",
+            "attr": {}
+        },
+        "inbounds": [
+        ]
+    },
+
+    "reshape_2": {
+        "attr": {
+            "type": "Reshape",
+            "attr": {}
+        },
+        "inbounds": [
+            "mean_1"
+        ]
+    }
+}
--- a/kerneldetection/fusion_lib/hswish_fusionunit.json
+++ b/kerneldetection/fusion_lib/hswish_fusionunit.json
@ -0,0 +1,53 @@
+{
+    "conv1.hswish.hswish/Relu6": {
+        "attr": {
+            "attr": {},
+            "type": "Relu6"
+        },
+        "inbounds": [
+            "conv1.hswish.hswish/add"
+        ],
+        "outbounds": [
+            "conv1.hswish.hswish/mul"
+        ]
+    },
+    "conv1.hswish.hswish/add": {
+        "attr": {
+            "attr": {
+                "weight_shape": null
+            },
+            "node": null,
+            "shape": [],
+            "type": "Add"
+        },
+        "inbounds": [
+        ],
+        "outbounds": [
+            "conv1.hswish.hswish/Relu6"
+        ]
+    },
+    "conv1.hswish.hswish/mul": {
+        "attr": {
+            "attr": {},
+            "type": "Mul"
+        },
+        "inbounds": [
+            "conv1.hswish.hswish/Relu6"
+        ],
+        "outbounds": [
+            "conv1.hswish.hswish/mul_1"
+        ]
+    },
+    "conv1.hswish.hswish/mul_1": {
+        "attr": {
+            "attr": {},
+            "type": "Mul"
+        },
+        "inbounds": [
+            "conv1.hswish.hswish/mul"
+        ],
+        "outbounds": [
+            "layer2.1.conv/Conv2D"
+        ]
+    }
+}
--- a/kerneldetection/fusion_lib/se_fusionunit.json
+++ b/kerneldetection/fusion_lib/se_fusionunit.json
@ -0,0 +1,145 @@
+{
+    "dummy_input": {
+        "attr": {
+            "attr": {},
+            "type": "dummy"
+        },
+        "inbounds": [],
+        "outbounds": [
+            "mul_1",
+            "SE/AvgPool"
+        ]
+    },
+    "Add_1": {
+        "attr": {
+            "attr": {
+                "weight_shape": null
+            },
+            "node": null,
+            "type": "Add"
+        },
+        "inbounds": [
+            "SE/conv2d_1/BiasAdd"
+        ],
+        "outbounds": [
+            "Relu6"
+        ]
+    },
+    "Relu6": {
+        "attr": {
+            "attr": {
+                "weight_shape": null
+            },
+            "node": null,
+            "type": "Relu6"
+        },
+        "inbounds": [
+            "Add_1"
+        ],
+        "outbounds": [
+            "mul"
+        ]
+    },
+    "mul": {
+        "attr": {
+            "attr": {
+                "weight_shape": null
+            },
+            "node": null,
+            "type": "Mul"
+        },
+        "inbounds": [
+            "Relu6"
+        ],
+        "outbounds": [
+            "mul_1"
+        ]
+    },
+    "mul_1": {
+        "attr": {
+            "attr": {
+                "weight_shape": null
+            },
+            "node": null,
+            "type": "Mul"
+        },
+        "inbounds": [
+            "mul",
+            "dummy_input"
+        ],
+        "outbounds": [
+        ]
+    },
+    "SE/AvgPool": {
+        "attr": {
+            "attr": {},
+            "type": "AvgPool"
+        },
+        "inbounds": [
+            "dummy_input"
+        ],
+        "outbounds": [
+            "SE/conv2d/Conv2D"
+        ]
+    },
+    "SE/Relu": {
+        "attr": {
+            "attr": {},
+            "type": "Relu"
+        },
+        "inbounds": [
+            "SE/conv2d/BiasAdd"
+        ],
+        "outbounds": [
+            "SE/conv2d_1/Conv2D"
+        ]
+    },
+    "SE/conv2d/BiasAdd": {
+        "attr": {
+            "attr": {},
+            "type": "BiasAdd"
+        },
+        "inbounds": [
+            "SE/conv2d/Conv2D"
+        ],
+        "outbounds": [
+            "SE/Relu"
+        ]
+    },
+    "SE/conv2d/Conv2D": {
+        "attr": {
+            "attr": {},
+            "type": "Conv2D"
+        },
+        "inbounds": [
+            "SE/AvgPool"
+        ],
+        "outbounds": [
+            "SE/conv2d/BiasAdd"
+        ]
+    },
+    "SE/conv2d_1/BiasAdd": {
+        "attr": {
+            "attr": {},
+            "type": "BiasAdd"
+        },
+        "inbounds": [
+            "SE/conv2d_1/Conv2D"
+        ],
+        "outbounds": [
+            "Add_1"
+        ]
+    },
+    "SE/conv2d_1/Conv2D": {
+        "attr": {
+            "attr": {},
+            "type": "Conv2D"
+        },
+        "inbounds": [
+            "SE/Relu"
+        ],
+        "outbounds": [
+            "SE/conv2d_1/BiasAdd"
+        ]
+    }
+}
--- a/kerneldetection/fusion_lib/utils.py
+++ b/kerneldetection/fusion_lib/utils.py
@ -0,0 +1,9 @@
+import os
+from grapher_tool import Grapher
+
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+
+
+def get_fusion_unit(name):
+    filename = os.path.join(BASE_DIR, f'{name}_fusionunit.json')
+    return Grapher(filename)
--- a/kerneldetection/grapher_tool.py
+++ b/kerneldetection/grapher_tool.py
@ -0,0 +1,301 @@
+import json
+import copy
+
+
+class Grapher:
+    def __init__(self, filename=None, graph=None):
+        if filename is not None:
+            self.graph = json.load(open(filename, 'r'))
+        elif graph is not None:
+            self.graph = copy.deepcopy(graph)
+        else:
+            self.graph = {}
+
+        self.refresh()
+
+    def node(self, name, inbound_nodes=None):
+        self.graph[name] = {}
+        if inbound_nodes is not None:
+            self.graph[name]['inbounds'] = inbound_nodes
+            for node in inbound_nodes:
+                if node not in self.graph.keys():
+                    self.graph[node] = {}
+                if 'outbounds' not in self.graph[node].keys():
+                    self.graph[node]['outbounds'] = []
+                self.graph[node]['outbounds'].append(name)
+
+    def refresh(self):
+        for name in self.graph.keys():
+            self.graph[name]['outbounds'] = []
+
+        for name in self.graph.keys():
+            if 'inbounds' not in self.graph[name].keys():
+                self.graph[name]['inbounds'] = []
+            inbounds = self.graph[name]['inbounds'][:]
+            for node in inbounds:
+                if node not in self.graph.keys():
+                    while node in self.graph[name]['inbounds']:
+                        self.graph[name]['inbounds'].remove(node)
+                    print(node)
+                else:
+                    if 'outbounds' not in self.graph[node].keys():
+                        self.graph[node]['outbounds'] = []
+
+                    self.graph[node]['outbounds'].append(name)
+
+    def get_graph(self):
+        return self.graph
+
+    def get_node_inbounds(self, name):
+        if 'inbounds' in self.graph[name]:
+            return self.graph[name]['inbounds']
+        else:
+            return []
+
+    def get_node_outbounds(self, name):
+        if 'outbounds' in self.graph[name]:
+            return self.graph[name]['outbounds']
+        else:
+            return []
+
+    def set_node_inbounds(self, name, inbounds):
+        self.graph[name]['inbounds'] = inbounds
+
+    def set_node_outbounds(self, name, outbounds):
+        self.graph[name]['outbounds'] = outbounds
+
+    def remove_node_inbounds(self, name, inbound):
+        try:
+            self.graph[name]['inbounds'].remove(inbound)
+        except (ValueError, KeyError):
+            pass
+
+    def remove_node_outbounds(self, name, outbound):
+        try:
+            self.graph[name]['outbounds'].remove(outbound)
+        except (ValueError, KeyError):
+            pass
+
+    def add_node_inbounds(self, name, inbound):
+        try:
+            self.graph[name]['inbounds'].append(inbound)
+        except (ValueError, KeyError):
+            pass
+
+    def add_node_outbounds(self, name, outbound):
+        try:
+            self.graph[name]['outbounds'].append(outbound)
+        except (ValueError, KeyError):
+            pass
+
+    def get_graph_heads(self):
+        self.heads = []
+        for (key, value) in self.graph.items():
+            if "graph_head" in value['attr']['attr']:
+                self.heads.append(key)
+        return self.heads
+
+    def get_graph_tails(self):
+        self.tails = []
+        for (key, value) in self.graph.items():
+             if "graph_tail" in value['attr']['attr']:
+                self.tails.append(key)
+        return self.tails
+
+    def add_node_attr(self, name, attr_key, attr_value):
+        if name not in self.graph.keys():
+            self.graph[name] = {}
+        self.graph[name]['attr']['attr'][attr_key] = attr_value
+
+    def set_node_attr(self, name, attr):
+        if name not in self.graph.keys():
+            self.graph[name] = {}
+        self.graph[name]['attr'] = attr
+
+    def get_node_attr(self, name):
+        if name in self.graph.keys():
+            return self.graph[name]['attr']
+        else:
+            return None
+
+    def get_node_type(self, name):
+        try:
+            if name in self.graph.keys():
+                return self.graph[name]['attr']['type']
+            else:
+                return None
+        except:
+            return None
+
+    def set_node_type(self, name, type):
+        if name not in self.graph.keys():
+            self.graph[name] = {}
+        self.graph[name]['attr']['type'] = type
+
+    def fuse(self, subgraph, type, name=None, attr=None, is_block=True):
+        '''
+        subgraph: list of node name
+        Nothing will be done if subgraph doesn't exist in self
+        '''
+        for node in subgraph:
+            if node not in self.graph:
+                return False
+
+        if name is None:
+            name = ';'.join(subgraph)
+
+        if attr is None:
+            attr = {'attr': {}}
+        attr['type'] = type
+        if is_block:
+            attr['attr']['primitive_nodes'] = list(subgraph)
+
+        self.graph[name] = {
+            'attr': attr,
+            'inbounds': [],
+            'outbounds': [],
+        }
+
+        for node in subgraph:
+            for inbound in self.get_node_inbounds(node):
+                if inbound not in subgraph:
+                    if inbound not in self.get_node_inbounds(name):
+                        self.add_node_inbounds(name, inbound)
+                    self.remove_node_outbounds(inbound, node)
+                    if name not in self.get_node_outbounds(inbound):
+                        self.add_node_outbounds(inbound, name)
+            for outbound in self.get_node_outbounds(node):
+                if outbound not in subgraph:
+                    if outbound not in self.get_node_outbounds(name):
+                        self.add_node_outbounds(name, outbound)
+                    self.remove_node_inbounds(outbound, node)
+                    if name not in self.get_node_inbounds(outbound):
+                        self.add_node_inbounds(outbound, name)
+
+        for node in subgraph:
+            del self.graph[node]
+
+        return True
+
+    def get_primitive_nodes(self, name):
+        try:
+            return self.graph[name]['attr']['attr']['primitive_nodes']
+        except KeyError:
+            return [name]
+
+    def plot_graphs(self, comment='Network Grapher View'):
+        from graphviz import Digraph
+
+        dot = Digraph(comment=comment)
+        for (key, value) in self.graph.items():
+            dot.node(key, key)
+            if 'inbounds' in value.keys():
+                for node in value['inbounds']:
+                    dot.edge(node, key, label=', '.join(str(x) for x in value['attr']['shape']))
+        dot.render('graph.gv', view=False)
+
+    def plot_networkx_graph(self):
+        import matplotlib.pyplot as plt
+        import networkx as nx
+
+        plt.subplot(121)
+        nx.draw(self.get_networkx_graph(), with_labels=True, font_weight='bold')
+        plt.show()
+
+    def get_networkx_graph(self):
+        import networkx as nx
+        G = nx.MultiDiGraph()
+        for (key, value) in self.graph.items():
+            try:
+                G.add_node(key, type=value['attr']['type'], **value['attr']['attr'])
+                if 'inbounds' in value.keys():
+                    for node in value['inbounds']:
+                        G.add_edge(node, key)
+            except:
+                continue
+        self.graphx = G
+        return G
+
+    def match_isomorph_vf2(self):
+        pass
+
+    def find_subgraphs(self, sub_graph, match_func):
+        from networkx.algorithms import isomorphism as iso
+
+        GM = iso.MultiDiGraphMatcher(self.get_networkx_graph(), sub_graph.get_networkx_graph(), node_match=match_func)
+        matches = []
+        for match in GM.subgraph_isomorphisms_iter():
+            matches.append({
+                key: value
+                for key, value in match.items()
+                if sub_graph.get_node_type(value) != 'dummy'
+            })
+        return matches
+
+    def find_weight_roots(self, layer_name):
+        weight_roots = []
+        weights_nodes = []
+        for inbound in self.graph[layer_name]['inbounds']:
+            if self.graph[inbound]['attr']['type'] == 'Identity' \
+                        and len(self.graph[inbound]['inbounds']) == 1:
+                if self.graph[self.graph[inbound]['inbounds'][0]]['attr']['type'] == 'Const':
+                    weight_roots.append(inbound)
+                    weights_nodes.append(inbound)
+                    weights_nodes.append(self.graph[inbound]['inbounds'][0])
+
+            if self.graph[inbound]['attr']['type'] == 'Const' \
+                        and len(self.graph[inbound]['inbounds']) == 0:
+                    weight_roots.append(inbound)
+                    weights_nodes.append(inbound)
+
+        return weight_roots, weights_nodes
+
+    def get_subgraphs(self, sub_graph, match_func):
+        import tensorflow as tf
+        import copy
+
+        fetched_subgraphs = self.find_subgraphs(sub_graph, match_func)
+        tar_sub_graphs = []
+        for sub_fetch_graph in fetched_subgraphs:
+            tar_sub_graphs.append(tf.GraphDef())
+
+            for op_entry in sub_fetch_graph.keys():
+                # --- Repleace dummy op ---
+                if sub_graph.get_graph()[sub_fetch_graph[op_entry]]['attr']['type'] == 'dummy':
+                    dummy_op = tar_sub_graphs[-1].node.add()
+                    dummy_op.op = "Identity"
+                    dummy_op.name = sub_fetch_graph[op_entry]
+                    dummy_op.input.extend(sub_graph.get_graph()[sub_fetch_graph[op_entry]]['inbounds'])
+                    dummy_op.attr['T'].type = 1
+                    # if 'graph_head' in sub_graph.get_graph()[sub_fetch_graph[op_entry]]['attr']['attr']:
+                    #     dummy_op.attr['shape'] = []
+                    #     dummy_op.attr['shape'].dim = list(map(int, sub_graph.get_graph()[sub_fetch_graph[op_entry]]['attr']['attr']['graph_head'].split(',')))
+                    print(dummy_op)
+                else:
+                    # --- Fetch the main op ---
+                    node = copy.deepcopy(self.graph[op_entry]['attr']['node'])
+
+                    node.name = sub_fetch_graph[op_entry]
+
+                    del node.input[:]
+                    node.input.extend(sub_graph.get_graph()[sub_fetch_graph[op_entry]]['inbounds'])
+                    # --- Fetch the constant op ---
+                    roots, nodes = self.find_weight_roots(op_entry)
+                    for weight_root in roots:
+                        node.input.append(weight_root)
+
+                    for weight_node in nodes:
+                        tar_sub_graphs[-1].node.append(self.graph[weight_node]['attr']['node'])
+
+                    tar_sub_graphs[-1].node.append(node)
+
+            #tf.io.write_graph(tar_sub_graphs[-1], '', 'a.pb')
+        return tar_sub_graphs
+
+    def dump_json(self, filename):
+        with open(filename, 'w+') as fp:
+            try:
+                json.dump(self.graph, fp, indent=4, sort_keys=True)
+            except:
+                print('Find unsupport field when dumping to json, skipped.')
+                json.dump(self.graph, fp, indent=4, skipkeys=True, sort_keys=True)
--- a/kerneldetection/h5df_parser.py
+++ b/kerneldetection/h5df_parser.py
@ -0,0 +1,108 @@
+import json
+import h5py
+
+class H5dfParser:
+    def __init__(self, h5_file):
+        f = h5py.File(h5_file, mode='r')
+        self.f = f
+        model_config_raw = f.attrs.get('model_config')
+        self.model_config = json.loads(model_config_raw.decode('utf-8'))
+        self.keras_version = self.get_keras_version()
+
+    def get_h5df_file(self):
+        return self.f
+
+    def get_model_config(self):
+        return self.model_config
+
+    def get_keras_version(self):
+        if 'keras_version' in self.f['model_weights'].attrs:
+            original_keras_version = self.f['model_weights']\
+                .attrs['keras_version'].decode('utf8')
+            return original_keras_version
+        else:
+            return '1'
+
+    def get_backend_version(self):
+        if 'backend' in self.f['model_weights'].attrs:
+            original_backend = self.f['model_weights']\
+                .attrs['backend'].decode('utf8')
+            return original_backend
+        else:
+            return None
+
+    def find_weights_root(self, layer_name):
+        if self.keras_version != '1':
+            layer = self.f['model_weights']
+        else:
+            layer = self.f
+
+        while True:
+            layer = layer[layer_name]
+            if (not hasattr(layer, "keys")) or len(layer.keys()) > 1:
+                break
+            layer_keys = list(layer.keys())
+            if len(layer_keys) < 1:
+                return None
+            else:
+                layer_name = list(layer.keys())[0]
+
+        return layer
+
+    def get_if_sequential(self):
+        if self.model_config['class_name'] == 'Sequential':
+            return True
+        else:
+            return False
+
+    def join_inbound_nodes(self, layer):
+        inbound_nodes = []
+        if 'inbound_nodes' in layer.keys():
+            if len(layer['inbound_nodes']) > 0:
+                for inbound in layer['inbound_nodes'][0]:
+                    inbound_nodes.append(inbound[0])
+        return inbound_nodes
+
+    def parse_graph(self, graph_helper):
+        if self.get_if_sequential():
+            self.parse_sequential_graph(graph_helper)
+        else:
+            self.parse_model_graph(
+                self.get_model_config()['config']['layers'],
+                graph_helper)
+
+    def parse_sequential_graph(self, graph_helper):
+        self.joined_layers = []
+        for layers in self.model_config['config']['layers']:
+            if layers['class_name'] == 'Model':
+                self.parse_model_graph(
+                    layers['config']['layers'], graph_helper)
+            else:
+                if layers['class_name'] + '_helper' in dir(KerasParser):
+                    tails = graph_helper.get_graph_tail()
+                    if len(tails) != 1:
+                        raise NotImplementedError
+                    else:
+                        graph_helper.node(layers['config']['name'], tails)
+                        graph_helper.set_node_attr(
+                            layer['config']['name'], {
+                                            'type': layer['class_name'], 
+                                            'shape': [],
+                                            'attr': layer['config'],
+                                            #'node': layer
+                                            })
+                else:
+                    raise NotImplementedError
+
+    def parse_model_graph(self, model_layers, graph_helper):
+        for layer in model_layers:
+            inbound_nodes = self.join_inbound_nodes(layer)
+
+            graph_helper.node(layer['name'], inbound_nodes)
+            graph_helper.set_node_attr(
+                layer['config']['name'], {
+                                'type': layer['class_name'], 
+                                'shape': [],
+                                'attr': layer['config'],
+                                #'node': layer
+                                })
--- a/kerneldetection/kernel_detector.py
+++ b/kerneldetection/kernel_detector.py
@ -0,0 +1,245 @@
+import sys
+sys.path.append("kerneldetection")
+from rulelib.rule_reader import RuleReader
+from rulelib.rule_splitter import RuleSplitter
+from frozenpb_parser import FrozenPbParser
+from grapher_tool import Grapher
+import json
+import os
+import pandas as pd
+import argparse
+import copy
+from itertools import groupby
+backend_maps = {
+  "cpu":"tflite_cpu",
+  "gpu":"tflite_gpu",
+  "vpu":"vpu"
+}
+
+dummy_types = [
+    'Const',
+    'Identity',
+    'Placeholder',
+]
+
+op_alias = {
+    'Relu6': 'relu',
+    'Relu': 'relu',
+    'Add': 'add',
+    'Biasadd': 'add',
+    'Conv2D': 'conv',
+    'Reshape': 'reshape',
+    'FusedBatchNorm': 'bn',
+    'FusedBatchNormV3': 'bn',
+    'MatMul': 'fc',
+    'MaxPool': 'maxpool',
+    'AvgPool': 'avgpool',
+    'Mean': 'gap',
+    'Mul': 'mul',
+    'DepthwiseConv2dNative': 'dwconv',
+    'ConcatV2': 'concat',
+    'Split': 'split',
+    
+
+}
+
+
+fusion_map = {
+    'SE': 'mul-avgpool-conv-add-relu-conv-add-add-relu-mul',
+    'hswish': 'relu-mul-mul-add',
+    'bn':"bnV3",
+    'channelshuffle': 'reshape-Transpose-reshape-Pack-StridedSlice-Pack-StridedSlice',
+     'global-avgpool': 'gap-reshape',
+    
+}
+def get_input_tensors(node, graph):
+    input_tensors = []
+    for inbound in graph.get_node_inbounds(node):
+        try:
+            shape = graph.get_node_attr(inbound)['output_shape']
+            type = graph.get_node_type(node)
+            if shape and type not in dummy_types:
+                if graph.get_node_type(inbound) == 'Split':
+                    outbounds = graph.get_node_outbounds(inbound)
+                    shapes = shape
+                    for outbound, shape in zip(outbounds, shapes):
+                        if outbound == node:
+                            input_tensors.append(shape)
+                else:
+                    input_tensors.append(shape)
+        except:
+            pass
+    return input_tensors
+
+
+
+def bb_to_kernel(bb, graph):
+    types = [graph.get_node_type(node) for node in bb]
+    #print(types)
+    types = [t for t in types if t and t not in dummy_types]
+
+    for old, new in op_alias.items():
+        for i in range(len(types)):
+            types[i] = types[i].replace(old, new)
+            
+
+    if types:
+        type = '-'.join(types)
+        for block, ops in fusion_map.items():
+            type = type.replace(ops, block)
+
+        kernel = {
+            'op': type,
+        }
+
+        layer = bb[0]
+        type = types[0]
+        attr = graph.get_node_attr(layer)['attr']
+        shape = graph.get_node_attr(layer)['output_shape']
+        if type in ['conv', 'dwconv']:
+            weight_shape = attr['weight_shape']
+            try:
+                kernel['ks'] = weight_shape[0:2]
+                kernel['cin'] = weight_shape[2]
+                kernel['cout'] = weight_shape[3]
+                kernel['strides'] = attr['strides']
+                if type=='dwconv':
+                    kernel['cout']=kernel['cin']
+            except:
+                print(bb)
+        elif type in ['maxpool', 'avgpool']:
+            kernel['ks'] = attr['ksize']
+            kernel['cin'] = shape[3]
+            kernel['cout'] = shape[3]
+            kernel['strides'] = attr['strides']
+        elif type == 'fc':
+            kernel['cin'] = shape[1]
+            kernel['cout'] = shape[1]
+        elif type == 'gap':
+            kernel['cin'] = shape[3]
+            kernel['cout'] = shape[3]
+        elif type in ['relu','hswish']:
+            kernel['cin'] = shape[-1]
+            kernel['cout'] = shape[-1]
+
+        input_tensors = get_input_tensors(layer, graph)
+        kernel['input_tensors'] = input_tensors   
+        #print(type,input_tensors)    
+        if type not in ['relu','bn', 'fc', 'reshape',  'Pack', 'StridedSlice','split']:
+            input_shape = input_tensors[0]
+            kernel['inputh'] = input_shape[1]
+            kernel['inputw'] = input_shape[2]
+        elif type in ['fc']:
+            input_shape = input_tensors[0]
+            kernel['cin']=input_shape[1]
+    
+        if type == 'split':
+            kernel['split_dim'] = attr['split_dim']
+            kernel['output_tensors'] = shape
+        return kernel
+    else:
+        return None
+
+def merge_split(graph: Grapher):
+    split_nodes = [node for node in graph.get_graph().keys() if graph.get_node_type(node) == 'Split']
+
+    for name, group in groupby(split_nodes, lambda name: name.split('/')[0]):
+        group = list(group)
+        group.sort(key=lambda name: name.split('/')[1])
+
+        split_dim = graph.get_node_attr(group[0])['attr']['split_dim']
+        inbounds = graph.get_node_inbounds(group[0])
+        output_shapes = [graph.get_node_attr(node)['output_shape'] for node in group]
+
+        # assert
+        for i, node in enumerate(group):
+            assert graph.get_node_attr(node)['attr']['idx'] == i
+            assert graph.get_node_inbounds(node) == inbounds
+            assert graph.get_node_attr(node)['attr']['split_dim'] == split_dim
+
+        graph.fuse(group, 'Split', name, {
+            'attr': {
+                'split_dim': split_dim,
+            },
+            'output_shape': output_shapes,
+        }, is_block=False)
+
+def split_model_into_kernels(input_models,hardware,save_dir,rule_dir='data/fusionrules'):
+
+    if hardware in backend_maps:
+        backend=backend_maps[hardware]
+    else:
+        raise ValueError('Unsupported hardware')
+    splitter = RuleSplitter(RuleReader())
+    kernel_types = {}
+    print(input_models)   
+    mname=input_models.split('/')[-1].replace(".json","")
+    input_models=json.load(open(input_models,'r'))
+
+   
+    
+
+    with pd.ExcelWriter(save_dir+'/'+mname+'_result.xlsx', engine='xlsxwriter', mode='w') as writer:
+              
+            indexes = []
+            counts = []
+            kernel_types[backend] = set({})
+            reader = RuleReader(rule_dir+f'/rule_{backend}.json')
+            splitter = RuleSplitter(reader)
+            mdicts={}
+            for mid in input_models:
+                model_name=mid                
+                fname=mid.split('_')[0]                
+                model=input_models[model_name]
+                graph = Grapher(graph=model)
+                merge_split(graph)
+                tmp_graph = copy.deepcopy(graph)
+                result = splitter.split(tmp_graph)
+                bb_types = {}
+                kernels = []
+                for bb in result:
+                    kernel = bb_to_kernel(bb, graph)
+                    if kernel is not None:
+                        type = kernel['op']
+                        bb_types[type] = bb_types.get(type, 0) + 1
+                        kernels.append(kernel)
+
+                
+                output = {model_name: kernels}
+
+                
+            
+                mdicts[model_name]=kernels
+
+                for type, count in bb_types.items():
+                    kernel_types[backend].add(type)
+                    indexes.append((model_name, type))
+                    counts.append(count)
+                #sys.exit()
+                #break
+
+            index = pd.MultiIndex.from_tuples(indexes, names=['model', 'type'])
+            df = pd.DataFrame(counts, index=index, columns=['Count'])
+            df.to_excel(writer, sheet_name=backend)
+            kernel_types[backend] = list(kernel_types[backend])
+
+            filename = os.path.join(save_dir, f'{hardware}_{fname}.json')
+            os.makedirs(save_dir, exist_ok=True)
+            with open(filename, 'w', encoding='utf-8') as fp:
+                    json.dump(mdicts, fp,indent=2)
+
+    print(json.dumps(kernel_types))
+    return kernel_types,mdicts
+
+
+if __name__=="__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--hardware', type=str, default='cpu')
+    parser.add_argument('-i', '--input_models', type=str, required=True, help='Path to input models. Either json or pb.')
+    parser.add_argument('-dir', '--save_dir', type=str,  default='results', help='Default preserve the original layer names. Readable will assign new kernel names according to types of the layers.')
+    parser.add_argument('-ruledir', '--rule_dir', type=str,  default='data/fusionrules', help='Default preserve the original layer names. Readable will assign new kernel names according to types of the layers.')
+    #parser.add_argument('-t', '--input_type', type=str, choices=['multi-m','single-m'], default='multi-m', help='input file type: multi-m or single-m')
+    #parser.add_argument('-backend', '--backend', type=str, choices=['tflite_cpu','tflite_gpu','vpu'], default='tflite_cpu', help='Default preserve the original layer names. Readable will assign new kernel names according to types of the layers.')
+    args = parser.parse_args()
+    split_model_into_kernels(args.input_models,args.hardware,args.save_dir,rule_dir=args.rule_dir)
+
--- a/kerneldetection/match_helper.py
+++ b/kerneldetection/match_helper.py
@ -0,0 +1,81 @@
+class MatchHelper:
+    base_type_table = {
+        'ReLU': [
+            'Relu',
+            'Relu6',
+            'ReLU',
+            'ReLU6',
+        ],
+        'BatchNorm': [
+            'BatchNorm',
+            'FusedBatchNorm',
+            'FusedBatchNormV2',
+            'FusedBatchNormV3',
+        ],
+        'TwoInputElementWise': [
+            'BiasAdd',
+            'Add',
+            'Mul',
+        ],
+        'DepthwiseConv2D': [
+            'DepthwiseConv2dNative',
+        ],
+        'FC': [
+            'MatMul',
+        ]
+    }
+
+    @classmethod
+    def get_base_type(cls, node_type):
+        for key, value in cls.base_type_table.items():
+            if node_type in value:
+                return key
+        return node_type
+
+    @classmethod
+    def op_type_matcher(cls, node_1, node_2):
+        def get_ast_by_op(op_name):
+            for key, value in cls.base_type_table.items():
+                if op_name in value:
+                    return key
+
+            return op_name
+
+        if 'type' in node_1 and 'type' in node_2:
+            if '_tagged' in node_1 or '_tagged' in node_2:
+                return False
+
+            if node_1['type'] == 'dummy' or node_2['type'] == 'dummy':
+                return True
+            return get_ast_by_op(node_1['type']) == get_ast_by_op(node_2['type'])
+        else:
+            return False
+
+    @staticmethod
+    def strip_useless_nodes(graph_helper):
+        stripped_nodes = ['Const', 'Identity']
+
+        graph = graph_helper.get_graph()
+        removed_node = []
+        for key, value in graph.items():
+            if value['attr']['type'] in stripped_nodes:
+                removed_node.append(key)
+
+        for key in removed_node:
+            del graph[key]
+
+        graph_helper.refresh()
+
+    @staticmethod
+    def tag_matched_nodes(grapher, matched_subgraph):
+        for matched_unit in matched_subgraph:
+            for node_name in matched_unit.keys():
+                grapher.add_node_attr(node_name, '_tagged', '')
+
+    @staticmethod
+    def get_untagged_nodes(grapher):
+        untagged_node = []
+        for node in grapher.get_graph().keys():
+            if '_tagged' not in grapher.get_node_attr(node)['attr']:
+                untagged_node.append(node)
+        return untagged_node
--- a/kerneldetection/requirements.txt
+++ b/kerneldetection/requirements.txt
@ -0,0 +1,33 @@
+absl-py==0.11.0
+astor==0.8.1
+cached-property==1.5.2
+decorator==4.4.2
+et-xmlfile==1.0.1
+gast==0.2.2
+google-pasta==0.2.0
+grpcio==1.33.2
+h5py==3.1.0
+importlib-metadata==3.1.0
+jdcal==1.4.1
+Keras-Applications==1.0.8
+Keras-Preprocessing==1.1.2
+Markdown==3.3.3
+networkx==2.5
+numpy==1.19.4
+openpyxl==3.0.5
+opt-einsum==3.3.0
+pandas==1.1.4
+protobuf==3.14.0
+python-dateutil==2.8.1
+pytz==2020.4
+six==1.15.0
+tensorboard==1.15.0
+tensorflow==1.15.0
+tensorflow-estimator==1.15.1
+termcolor==1.1.0
+Werkzeug==1.0.1
+wrapt==1.12.1
+XlsxWriter==1.3.7
+zipp==3.4.0
+onnx==1.9.0
+networkx==2.5.1
--- a/kerneldetection/rulelib/init.py
+++ b/kerneldetection/rulelib/init.py
--- a/kerneldetection/rulelib/rule_reader.py
+++ b/kerneldetection/rulelib/rule_reader.py
@ -0,0 +1,69 @@
+import json
+from match_helper import MatchHelper
+from fusion_lib.utils import get_fusion_unit
+from grapher_tool import Grapher
+
+
+class RuleReader:
+    op_map = {
+        'relu': 'ReLU',
+        'reshape': 'Reshape',
+        'conv': 'Conv2D',
+        'dwconv': 'DepthwiseConv2D',
+        'dense': 'FC',
+        'add': 'TwoInputElementWise',
+        'bn': 'BatchNorm',
+    }
+
+    rules_default = {
+        'MON': 0,
+        'RT': True,
+        'FN': True,
+    }
+
+    multiop_blocks = ['se', 'hswish', 'channelshuffle','global-avgpool']
+
+    def __init__(self, rule_file=None):
+        self.rules = {}
+        if rule_file:
+            with open(rule_file, 'r') as fp:
+                self.rules = json.load(fp)
+        self._extract_fusible()
+        self._parse_multiop_block()
+
+    def is_fusible(self, node_type, outnode_type):
+        node_base_type = MatchHelper.get_base_type(node_type)
+        outnode_base_type = MatchHelper.get_base_type(outnode_type)
+        return (node_base_type, outnode_base_type) in self.fusible
+
+    def query_rule(self, rule):
+        if rule not in self.rules or self.rules[rule]['obey'] is None:
+            return self.rules_default[rule]
+        else:
+            return self.rules[rule]['obey']
+
+    def _extract_fusible(self):
+        self.fusible = []
+        self.fusion_units = {}
+        for name, rule in self.rules.items():
+            if rule['obey'] and name.startswith('BF'):
+                ops = name.split('_')[1:]
+                if len(ops) == 2:
+                    self.fusible.append((self.op_map.get(ops[0], ops[0]), self.op_map.get(ops[1], ops[1])))
+                elif len(ops) > 2:
+                    fusion_unit = {}
+                    get_name = lambda i: f'{ops[i]}_{i}'
+                    for i in range(0, len(ops)):
+                        fusion_unit[get_name(i)] = {
+                            'attr': {
+                                'type': self.op_map.get(ops[i], ops[i]),
+                                'attr': {},
+                            },
+                            'inbounds': [get_name(i - 1)] if i > 0 else [],
+                            'outbounds': [get_name(i + 1)] if i < len(ops) - 1 else [],
+                        }
+                    self.fusion_units['_'.join(ops)] = Grapher(graph=fusion_unit)
+
+    def _parse_multiop_block(self):
+        for block in self.multiop_blocks:
+            self.fusion_units[block] = get_fusion_unit(block)
--- a/kerneldetection/rulelib/rule_splitter.py
+++ b/kerneldetection/rulelib/rule_splitter.py
@ -0,0 +1,69 @@
+from re import A
+from typing import Match
+from grapher_tool import Grapher
+import networkx as nx
+from .rule_reader import RuleReader
+from utils.fusio_aware_graph import FusionAwareGraph
+from match_helper import MatchHelper
+
+
+class RuleSplitter:
+    def __init__(self, rule_reader: RuleReader):
+        self.rule_reader = rule_reader
+
+    def fuse_multiop_blocks(self, graph: Grapher):
+        for type, block in self.rule_reader.fusion_units.items():
+            subgraphs = graph.find_subgraphs(block, MatchHelper.op_type_matcher)
+            MatchHelper.tag_matched_nodes(graph, subgraphs)
+            for subgraph in subgraphs:
+                graph.fuse(subgraph.keys(), type)
+
+    def split(self, graph: Grapher):
+        """
+        Apply rules to graph
+        """
+        self.preprocess(graph)
+        fag = FusionAwareGraph(graph)
+
+        i = -1
+        while i < len(fag) - 1:
+            i += 1
+            if fag.is_fused(i):
+                continue
+            fag.mark_ready(i)
+            if not fag.get_outbounds(i):
+                continue
+            # MON
+            mon = self.rule_reader.query_rule('MON')
+            if mon == 0:  # can't fuse if having multiple out node
+                if len(fag.get_outbounds(i)) > 1:
+                    continue
+            # FN: TODO: which one is the first node
+            fused = False
+            for j in fag.get_outbounds(i):
+                if fag.is_fused(j):
+                    continue
+                outnode_type = fag.get_type(j)
+                node_type = fag.get_type(i)
+                if not self.rule_reader.is_fusible(node_type, outnode_type):
+                    continue
+                # RT
+                if self.rule_reader.query_rule('RT'):
+                    if not fag.is_ready(j):
+                        continue
+                # fuse node
+                if mon == 0:
+                    fag.fuse(i, j)
+                else:
+                    fag.fuse(i, j, True)
+                fag.mark_ready(j)
+                fused = True
+                if mon == 1:  # only fused to first outnode
+                    break
+            if fused:
+                i -= 1
+
+        return fag.get_basicblocks()
+
+    def preprocess(self, graph: Grapher):
+        self.fuse_multiop_blocks(graph)
--- a/kerneldetection/rulelib/rules/rule_tflite_cpu.json
+++ b/kerneldetection/rulelib/rules/rule_tflite_cpu.json
@ -0,0 +1,716 @@
+{
+  "RBC": {
+   "latency": {},
+   "obey": null
+  },
+  "CBC": {
+   "latency": {},
+   "obey": null
+  },
+  "BF_bn_relu": {
+    "obey": true
+  },
+  "BF_conv_bn": {
+    "obey": true
+  },
+  "BF_dwconv_bn": {
+    "obey": true
+  },
+  "BF_se_relu": {
+   "latency": {
+    "se": "25.8403 +- 4.0",
+    "relu": "2.70696 +- 0.0",
+    "block": "25.6244 +- 1.0",
+    "ops": "28.547259999999998 +- 4.0"
+   },
+   "obey": true
+  },
+  "BF_conv_se": {
+   "latency": {
+    "conv": "6954.15 +- 498.0",
+    "se": "25.7455 +- 1.0",
+    "block": "7055.32 +- 1061.0",
+    "ops": "6979.8955 +- 498.00100401505216"
+   },
+   "obey": false
+  },
+  "BF_conv_hswish": {
+   "latency": {
+    "conv": "7102.15 +- 1125.0",
+    "hswish": "7.28969 +- 1.0",
+    "block": "7016.94 +- 273.0",
+    "ops": "7109.439689999999 +- 1125.0004444443566"
+   },
+   "obey": true
+  },
+  "BF_conv_bn_relu": {
+    "obey": true
+  },
+  "BF_dwconv_bn_relu": {
+   "obey": true
+  },
+  "BF_pooling_pooling": {
+   "latency": {
+    "pooling_1": "23.8169 +- 2.0",
+    "pooling_2": "6.09348 +- 1.0",
+    "block": "30.3321 +- 3.0",
+    "ops": "29.91038 +- 2.23606797749979"
+   },
+   "obey": false
+  },
+  "BF_pooling_convtrans": {
+   "latency": {
+    "pooling": "23.5467 +- 1.0",
+    "convtrans": "233.437 +- 11.0",
+    "block": "258.664 +- 13.0",
+    "ops": "256.9837 +- 11.045361017187261"
+   },
+   "obey": false
+  },
+  "BF_pooling_concat": {
+   "latency": {
+    "pooling": "23.6651 +- 3.0",
+    "concat": "4.31221 +- 1.0",
+    "block": "27.7833 +- 1.0",
+    "ops": "27.97731 +- 3.1622776601683795"
+   },
+   "obey": false
+  },
+  "BF_pooling_conv": {
+   "latency": {
+    "pooling": "24.0433 +- 3.0",
+    "conv": "234.217 +- 8.0",
+    "block": "259.306 +- 10.0",
+    "ops": "258.26030000000003 +- 8.54400374531753"
+   },
+   "obey": false
+  },
+  "BF_pooling_add": {
+   "latency": {
+    "pooling": "23.7197 +- 3.0",
+    "add": "1.08937 +- 0.0",
+    "block": "25.1265 +- 2.0",
+    "ops": "24.80907 +- 3.0"
+   },
+   "obey": false
+  },
+  "BF_pooling_relu": {
+   "latency": {
+    "pooling": "23.6031 +- 2.0",
+    "relu": "0.813832 +- 0.0",
+    "block": "24.4819 +- 3.0",
+    "ops": "24.416932000000003 +- 2.0"
+   },
+   "obey": false
+  },
+  "BF_pooling_dwconv": {
+   "latency": {
+    "pooling": "24.0672 +- 3.0",
+    "dwconv": "12.2742 +- 1.0",
+    "block": "36.1315 +- 3.0",
+    "ops": "36.3414 +- 3.1622776601683795"
+   },
+   "obey": false
+  },
+  "BF_pooling_reshape": {
+   "latency": {
+    "pooling": "24.0557 +- 2.0",
+    "reshape": "0.546111 +- 0.0",
+    "block": "24.2792 +- 3.0",
+    "ops": "24.601811 +- 2.0"
+   },
+   "obey": true
+  },
+  "BF_dense_dense": {
+   "latency": {
+    "dense_1": "63.0928 +- 3.0",
+    "dense_2": "63.1484 +- 3.0",
+    "block": "126.057 +- 4.0",
+    "ops": "126.24119999999999 +- 4.242640687119285"
+   },
+   "obey": false
+  },
+  "BF_dense_concat": {
+   "latency": {
+    "dense": "63.7282 +- 3.0",
+    "concat": "0.726432 +- 0.0",
+    "block": "63.88 +- 3.0",
+    "ops": "64.454632 +- 3.0"
+   },
+   "obey": true
+  },
+  "BF_dense_add": {
+   "latency": {
+    "dense": "63.747 +- 3.0",
+    "add": "0.395219 +- 0.0",
+    "block": "63.1243 +- 3.0",
+    "ops": "64.142219 +- 3.0"
+   },
+   "obey": true
+  },
+  "BF_dense_relu": {
+   "latency": {
+    "dense": "63.0826 +- 3.0",
+    "relu": "0.29903 +- 0.0",
+    "block": "63.1227 +- 3.0",
+    "ops": "63.38163 +- 3.0"
+   },
+   "obey": true
+  },
+  "BF_dense_reshape": {
+   "latency": {
+    "dense": "63.8388 +- 3.0",
+    "reshape": "0.252402 +- 0.0",
+    "block": "61.1986 +- 2.0",
+    "ops": "64.091202 +- 3.0"
+   },
+   "obey": false
+  },
+  "BF_convtrans_pooling": {
+   "latency": {
+    "convtrans": "365.728 +- 22.0",
+    "pooling": "23.6671 +- 2.0",
+    "block": "390.897 +- 23.0",
+    "ops": "389.3951 +- 22.090722034374522"
+   },
+   "obey": false
+  },
+  "BF_convtrans_convtrans": {
+   "latency": {
+    "convtrans_1": "364.971 +- 43.0",
+    "convtrans_2": "367.671 +- 11.0",
+    "block": "739.779 +- 18.0",
+    "ops": "732.642 +- 44.384682042344295"
+   },
+   "obey": false
+  },
+  "BF_convtrans_concat": {
+   "latency": {
+    "convtrans": "367.681 +- 12.0",
+    "concat": "14.6945 +- 1.0",
+    "block": "386.843 +- 11.0",
+    "ops": "382.3755 +- 12.041594578792296"
+   },
+   "obey": false
+  },
+  "BF_convtrans_conv": {
+   "latency": {
+    "convtrans": "367.905 +- 17.0",
+    "conv": "7055.8 +- 74.0",
+    "block": "7689.96 +- 3269.0",
+    "ops": "7423.705 +- 75.92759709091287"
+   },
+   "obey": false
+  },
+  "BF_convtrans_add": {
+   "latency": {
+    "convtrans": "369.521 +- 12.0",
+    "add": "3.76198 +- 0.0",
+    "block": "375.818 +- 11.0",
+    "ops": "373.28298 +- 12.0"
+   },
+   "obey": false
+  },
+  "BF_convtrans_relu": {
+   "latency": {
+    "convtrans": "368.784 +- 11.0",
+    "relu": "2.64562 +- 0.0",
+    "block": "370.992 +- 24.0",
+    "ops": "371.42962 +- 11.0"
+   },
+   "obey": false
+  },
+  "BF_convtrans_dwconv": {
+   "latency": {
+    "convtrans": "368.08 +- 13.0",
+    "dwconv": "46.1423 +- 2.0",
+    "block": "414.965 +- 13.0",
+    "ops": "414.22229999999996 +- 13.152946437965905"
+   },
+   "obey": false
+  },
+  "BF_convtrans_reshape": {
+   "latency": {
+    "convtrans": "366.354 +- 33.0",
+    "reshape": "4.00439 +- 2.0",
+    "block": "372.361 +- 12.0",
+    "ops": "370.35839 +- 33.06055050963308"
+   },
+   "obey": false
+  },
+  "BF_concat_pooling": {
+   "latency": {
+    "concat": "14.8744 +- 1.0",
+    "pooling": "36.0377 +- 2.0",
+    "block": "50.8331 +- 2.0",
+    "ops": "50.9121 +- 2.23606797749979"
+   },
+   "obey": false
+  },
+  "BF_concat_dense": {
+   "latency": {
+    "concat": "0.707233 +- 0.0",
+    "dense": "288.586 +- 7.0",
+    "block": "284.009 +- 6.0",
+    "ops": "289.293233 +- 7.0"
+   },
+   "obey": true
+  },
+  "BF_concat_convtrans": {
+   "latency": {
+    "concat": "14.705 +- 1.0",
+    "convtrans": "2285.62 +- 29.0",
+    "block": "2293.91 +- 97.0",
+    "ops": "2300.325 +- 29.017236257093817"
+   },
+   "obey": false
+  },
+  "BF_concat_concat": {
+   "latency": {
+    "concat_1": "14.601 +- 1.0",
+    "concat_2": "22.8767 +- 1.0",
+    "block": "37.6837 +- 2.0",
+    "ops": "37.4777 +- 1.4142135623730951"
+   },
+   "obey": false
+  },
+  "BF_concat_conv": {
+   "latency": {
+    "concat": "14.6481 +- 1.0",
+    "conv": "4099.34 +- 489.0",
+    "block": "4111.11 +- 61.0",
+    "ops": "4113.9881000000005 +- 489.00102249381854"
+   },
+   "obey": false
+  },
+  "BF_concat_add": {
+   "latency": {
+    "concat": "14.755 +- 1.0",
+    "add": "8.23119 +- 0.0",
+    "block": "23.5586 +- 1.0",
+    "ops": "22.98619 +- 1.0"
+   },
+   "obey": false
+  },
+  "BF_concat_relu": {
+   "latency": {
+    "concat": "14.7214 +- 1.0",
+    "relu": "4.89513 +- 0.0",
+    "block": "20.003 +- 1.0",
+    "ops": "19.616529999999997 +- 1.0"
+   },
+   "obey": false
+  },
+  "BF_concat_dwconv": {
+   "latency": {
+    "concat": "14.7328 +- 1.0",
+    "dwconv": "75.1136 +- 2.0",
+    "block": "92.6577 +- 3.0",
+    "ops": "89.8464 +- 2.23606797749979"
+   },
+   "obey": false
+  },
+  "BF_concat_reshape": {
+   "latency": {
+    "concat": "14.9525 +- 1.0",
+    "reshape": "3.84862 +- 0.0",
+    "block": "19.3578 +- 1.0",
+    "ops": "18.80112 +- 1.0"
+   },
+   "obey": false
+  },
+  "BF_conv_pooling": {
+   "latency": {
+    "conv": "7032.12 +- 40.0",
+    "pooling": "23.4528 +- 2.0",
+    "block": "6967.84 +- 528.0",
+    "ops": "7055.5728 +- 40.049968789001575"
+   },
+   "obey": true
+  },
+  "BF_conv_convtrans": {
+   "latency": {
+    "conv": "7159.94 +- 1234.0",
+    "convtrans": "367.24 +- 15.0",
+    "block": "7362.82 +- 529.0",
+    "ops": "7527.179999999999 +- 1234.0911635693694"
+   },
+   "obey": false
+  },
+  "BF_conv_concat": {
+   "latency": {
+    "conv": "7017.15 +- 135.0",
+    "concat": "15.1527 +- 1.0",
+    "block": "7096.96 +- 41.0",
+    "ops": "7032.302699999999 +- 135.00370365289984"
+   },
+   "obey": false
+  },
+  "BF_conv_conv": {
+   "latency": {
+    "conv_1": "7044.99 +- 134.0",
+    "conv_2": "6972.37 +- 728.0",
+    "block": "14111.7 +- 219.0",
+    "ops": "14017.36 +- 740.2296940815061"
+   },
+   "obey": false
+  },
+  "BF_conv_add": {
+   "latency": {
+    "conv": "7040.01 +- 109.0",
+    "add": "3.75556 +- 0.0",
+    "block": "7053.88 +- 425.0",
+    "ops": "7043.76556 +- 109.0"
+   },
+   "obey": false
+  },
+  "BF_conv_relu": {
+   "latency": {
+    "conv": "7080.23 +- 49.0",
+    "relu": "2.58677 +- 0.0",
+    "block": "6997.21 +- 658.0",
+    "ops": "7082.816769999999 +- 49.0"
+   },
+   "obey": true
+  },
+  "BF_conv_dwconv": {
+   "latency": {
+    "conv": "7104.57 +- 315.0",
+    "dwconv": "46.2128 +- 3.0",
+    "block": "7084.94 +- 114.0",
+    "ops": "7150.7828 +- 315.0142853903613"
+   },
+   "obey": false
+  },
+  "BF_conv_reshape": {
+   "latency": {
+    "conv": "6950.35 +- 819.0",
+    "reshape": "1.90676 +- 0.0",
+    "block": "7001.77 +- 506.0",
+    "ops": "6952.25676 +- 819.0"
+   },
+   "obey": false
+  },
+  "BF_add_pooling": {
+   "latency": {
+    "add": "3.91771 +- 0.0",
+    "pooling": "23.9784 +- 1.0",
+    "block": "27.4255 +- 2.0",
+    "ops": "27.89611 +- 1.0"
+   },
+   "obey": false
+  },
+  "BF_add_dense": {
+   "latency": {
+    "add": "0.393166 +- 0.0",
+    "dense": "62.4733 +- 3.0",
+    "block": "63.6806 +- 3.0",
+    "ops": "62.866466 +- 3.0"
+   },
+   "obey": false
+  },
+  "BF_add_convtrans": {
+   "latency": {
+    "add": "4.04221 +- 0.0",
+    "convtrans": "368.715 +- 23.0",
+    "block": "375.488 +- 11.0",
+    "ops": "372.75721 +- 23.0"
+   },
+   "obey": false
+  },
+  "BF_add_concat": {
+   "latency": {
+    "add": "3.94385 +- 0.0",
+    "concat": "15.0988 +- 1.0",
+    "block": "19.3268 +- 1.0",
+    "ops": "19.042650000000002 +- 1.0"
+   },
+   "obey": false
+  },
+  "BF_add_conv": {
+   "latency": {
+    "add": "3.84333 +- 0.0",
+    "conv": "7029.83 +- 550.0",
+    "block": "7065.58 +- 809.0",
+    "ops": "7033.67333 +- 550.0"
+   },
+   "obey": false
+  },
+  "BF_add_add": {
+   "latency": {
+    "add_1": "3.77704 +- 0.0",
+    "add_2": "3.77403 +- 0.0",
+    "block": "8.49954 +- 1.0",
+    "ops": "7.55107 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_add_relu": {
+   "latency": {
+    "add": "3.76701 +- 0.0",
+    "relu": "2.80635 +- 1.0",
+    "block": "3.72089 +- 0.0",
+    "ops": "6.57336 +- 1.0"
+   },
+   "obey": true
+  },
+  "BF_add_dwconv": {
+   "latency": {
+    "add": "3.98569 +- 0.0",
+    "dwconv": "45.8759 +- 2.0",
+    "block": "50.57 +- 2.0",
+    "ops": "49.86159 +- 2.0"
+   },
+   "obey": false
+  },
+  "BF_add_reshape": {
+   "latency": {
+    "add": "3.77753 +- 0.0",
+    "reshape": "1.88071 +- 0.0",
+    "block": "6.155 +- 1.0",
+    "ops": "5.65824 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_relu_pooling": {
+   "latency": {
+    "relu": "3.27098 +- 1.0",
+    "pooling": "23.6779 +- 2.0",
+    "block": "26.2079 +- 1.0",
+    "ops": "26.948880000000003 +- 2.23606797749979"
+   },
+   "obey": false
+  },
+  "BF_relu_dense": {
+   "latency": {
+    "relu": "0.295931 +- 0.0",
+    "dense": "63.3644 +- 3.0",
+    "block": "62.6156 +- 2.0",
+    "ops": "63.660331000000006 +- 3.0"
+   },
+   "obey": true
+  },
+  "BF_relu_convtrans": {
+   "latency": {
+    "relu": "2.56089 +- 0.0",
+    "convtrans": "368.91 +- 21.0",
+    "block": "373.776 +- 12.0",
+    "ops": "371.47089 +- 21.0"
+   },
+   "obey": false
+  },
+  "BF_relu_concat": {
+   "latency": {
+    "relu": "3.0442 +- 1.0",
+    "concat": "14.5228 +- 1.0",
+    "block": "17.5088 +- 1.0",
+    "ops": "17.567 +- 1.4142135623730951"
+   },
+   "obey": false
+  },
+  "BF_relu_conv": {
+   "latency": {
+    "relu": "2.54642 +- 0.0",
+    "conv": "7029.89 +- 621.0",
+    "block": "7002.7 +- 891.0",
+    "ops": "7032.43642 +- 621.0"
+   },
+   "obey": false
+  },
+  "BF_relu_add": {
+   "latency": {
+    "relu": "3.46085 +- 1.0",
+    "add": "3.84525 +- 0.0",
+    "block": "6.27844 +- 1.0",
+    "ops": "7.306100000000001 +- 1.0"
+   },
+   "obey": false
+  },
+  "BF_relu_relu": {
+   "latency": {
+    "relu_1": "4.02953 +- 2.0",
+    "relu_2": "2.61024 +- 0.0",
+    "block": "5.15875 +- 0.0",
+    "ops": "6.63977 +- 2.0"
+   },
+   "obey": true
+  },
+  "BF_relu_dwconv": {
+   "latency": {
+    "relu": "2.56401 +- 2.0",
+    "dwconv": "46.2256 +- 2.0",
+    "block": "49.4145 +- 2.0",
+    "ops": "48.78961 +- 2.8284271247461903"
+   },
+   "obey": false
+  },
+  "BF_relu_reshape": {
+   "latency": {
+    "relu": "2.6582 +- 1.0",
+    "reshape": "1.93132 +- 0.0",
+    "block": "4.50433 +- 1.0",
+    "ops": "4.58952 +- 1.0"
+   },
+   "obey": false
+  },
+  "BF_dwconv_pooling": {
+   "latency": {
+    "dwconv": "48.1859 +- 74.0",
+    "pooling": "23.5845 +- 2.0",
+    "block": "70.8907 +- 3.0",
+    "ops": "71.7704 +- 74.02702209328699"
+   },
+   "obey": false
+  },
+  "BF_dwconv_convtrans": {
+   "latency": {
+    "dwconv": "45.9235 +- 3.0",
+    "convtrans": "334.602 +- 81.0",
+    "block": "431.082 +- 63.0",
+    "ops": "380.52549999999997 +- 81.05553651663777"
+   },
+   "obey": false
+  },
+  "BF_dwconv_concat": {
+   "latency": {
+    "dwconv": "45.8703 +- 3.0",
+    "concat": "14.7931 +- 1.0",
+    "block": "61.2615 +- 3.0",
+    "ops": "60.6634 +- 3.1622776601683795"
+   },
+   "obey": false
+  },
+  "BF_dwconv_conv": {
+   "latency": {
+    "dwconv": "46.0654 +- 3.0",
+    "conv": "7009.06 +- 899.0",
+    "block": "7144.07 +- 1303.0",
+    "ops": "7055.125400000001 +- 899.0050055478"
+   },
+   "obey": false
+  },
+  "BF_dwconv_add": {
+   "latency": {
+    "dwconv": "46.4447 +- 3.0",
+    "add": "3.81886 +- 1.0",
+    "block": "50.0171 +- 4.0",
+    "ops": "50.26356 +- 3.1622776601683795"
+   },
+   "obey": false
+  },
+  "BF_dwconv_relu": {
+   "latency": {
+    "dwconv": "46.6762 +- 3.0",
+    "relu": "3.28271 +- 1.0",
+    "block": "46.9826 +- 4.0",
+    "ops": "49.95891 +- 3.1622776601683795"
+   },
+   "obey": true
+  },
+  "MON": {
+   "latency": {},
+   "obey": null
+  },
+  "RT": {
+   "obey": false
+  },
+  "BF_dwconv_dwconv": {
+   "latency": {
+    "dwconv_1": "46.6927 +- 2.0",
+    "dwconv_2": "46.324 +- 2.0",
+    "block": "94.1014 +- 5.0",
+    "ops": "93.0167 +- 2.8284271247461903"
+   },
+   "obey": false
+  },
+  "BF_dwconv_reshape": {
+   "latency": {
+    "dwconv": "47.0474 +- 3.0",
+    "reshape": "1.96371 +- 0.0",
+    "block": "50.2357 +- 10.0",
+    "ops": "49.01111 +- 3.0"
+   },
+   "obey": false
+  },
+  "BF_reshape_pooling": {
+   "latency": {
+    "reshape": "1.96997 +- 0.0",
+    "pooling": "19.5529 +- 2.0",
+    "block": "21.3999 +- 1.0",
+    "ops": "21.52287 +- 2.0"
+   },
+   "obey": false
+  },
+  "BF_reshape_dense": {
+   "latency": {
+    "reshape": "0.25092 +- 0.0",
+    "dense": "194.359 +- 94.0",
+    "block": "186.171 +- 106.0",
+    "ops": "194.60992000000002 +- 94.0"
+   },
+   "obey": false
+  },
+  "BF_reshape_convtrans": {
+   "latency": {
+    "reshape": "2.23773 +- 1.0",
+    "convtrans": "442.038 +- 17.0",
+    "block": "442.948 +- 11.0",
+    "ops": "444.27573 +- 17.029386365926403"
+   },
+   "obey": true
+  },
+  "BF_reshape_concat": {
+   "latency": {
+    "reshape": "1.88479 +- 0.0",
+    "concat": "12.2494 +- 1.0",
+    "block": "15.1344 +- 1.0",
+    "ops": "14.13419 +- 1.0"
+   },
+   "obey": false
+  },
+  "BF_reshape_conv": {
+   "latency": {
+    "reshape": "1.87476 +- 0.0",
+    "conv": "2053.83 +- 58.0",
+    "block": "2045.64 +- 143.0",
+    "ops": "2055.70476 +- 58.0"
+   },
+   "obey": true
+  },
+  "BF_reshape_add": {
+   "latency": {
+    "reshape": "1.93808 +- 0.0",
+    "add": "3.89689 +- 0.0",
+    "block": "5.85239 +- 0.0",
+    "ops": "5.83497 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_reshape_relu": {
+   "latency": {
+    "reshape": "4.05953 +- 2.0",
+    "relu": "3.52503 +- 1.0",
+    "block": "4.44828 +- 0.0",
+    "ops": "7.58456 +- 2.23606797749979"
+   },
+   "obey": true
+  },
+  "BF_reshape_dwconv": {
+   "latency": {
+    "reshape": "3.03105 +- 1.0",
+    "dwconv": "66.6583 +- 2.0",
+    "block": "68.8001 +- 2.0",
+    "ops": "69.68934999999999 +- 2.23606797749979"
+   },
+   "obey": false
+  },
+  "BF_reshape_reshape": {
+   "latency": {
+    "reshape_1": "1.93799 +- 0.0",
+    "reshape_2": "3.64003 +- 2.0",
+    "block": "3.35055 +- 2.0",
+    "ops": "5.57802 +- 2.0"
+   },
+   "obey": true
+  }
+}
--- a/kerneldetection/rulelib/rules/rule_tflite_gpu.json
+++ b/kerneldetection/rulelib/rules/rule_tflite_gpu.json
@ -0,0 +1,716 @@
+{
+  "RBC": {
+   "latency": {},
+   "obey": null
+  },
+  "CBC": {
+   "latency": {},
+   "obey": null
+  },
+  "BF_bn_relu": {
+    "obey": true
+  },
+  "BF_conv_bn": {
+    "obey": true
+  },
+  "BF_dwconv_bn": {
+    "obey": true
+  },
+  "BF_se_relu": {
+   "latency": {
+    "se": "0.20546791 +- 0.0010381392776949536",
+    "relu": "0.00500238 +- 0.00014314",
+    "block": "0.20545152 +- 0.0013197414924435015",
+    "ops": "0.21047029 +- 0.0010479609818561948"
+   },
+   "obey": true
+  },
+  "BF_conv_se": {
+   "latency": {
+    "conv": "0.043619 +- 0.000513079",
+    "se": "0.20576057 +- 0.001196602298561222",
+    "block": "0.25050117 +- 0.0011426133287074853",
+    "ops": "0.24937957 +- 0.0013019627956139914"
+   },
+   "obey": false
+  },
+  "BF_conv_hswish": {
+   "latency": {
+    "conv": "0.0436766 +- 0.000507171",
+    "hswish": "0.00499965 +- 0.000132542",
+    "block": "0.0442943 +- 0.00049569",
+    "ops": "0.048676250000000004 +- 0.0005242039727100512"
+   },
+   "obey": true
+  },
+  "BF_conv_bn_relu": {
+    "obey": true
+  },
+  "BF_dwconv_bn_relu": {
+   "obey": true
+  },
+  "BF_pooling_pooling": {
+   "latency": {
+    "pooling_1": "0.00517836 +- 0.000403584",
+    "pooling_2": "0.00493464 +- 0.000294832",
+    "block": "0.010243559999999999 +- 0.0005576737501667081",
+    "ops": "0.010113 +- 0.00049980591561125"
+   },
+   "obey": false
+  },
+  "BF_pooling_convtrans": {
+   "latency": {
+    "pooling": "0.00510839 +- 0.000360885",
+    "convtrans": "0.0380542 +- 0.000474613",
+    "block": "0.043171709999999995 +- 0.0005396229393057712",
+    "ops": "0.04316259 +- 0.0005962344194979019"
+   },
+   "obey": false
+  },
+  "BF_pooling_concat": {
+   "latency": {
+    "pooling": "0.00508688 +- 0.000309648",
+    "concat": "0.00496677 +- 0.000206225",
+    "block": "0.01011169 +- 0.0003852795290863505",
+    "ops": "0.01005365 +- 0.0003720357973757364"
+   },
+   "obey": false
+  },
+  "BF_pooling_conv": {
+   "latency": {
+    "pooling": "0.00507052 +- 0.000289717",
+    "conv": "0.0348806 +- 0.000393985",
+    "block": "0.03989553 +- 0.0007281222099544829",
+    "ops": "0.03995112 +- 0.0004890399986851792"
+   },
+   "obey": false
+  },
+  "BF_pooling_add": {
+   "latency": {
+    "pooling": "0.00508284 +- 0.0003208",
+    "add": "0.00349847 +- 0.000527459",
+    "block": "0.00599754 +- 0.000192551",
+    "ops": "0.00858131 +- 0.0006173537370754307"
+   },
+   "obey": true
+  },
+  "BF_pooling_relu": {
+   "latency": {
+    "pooling": "0.00510526 +- 0.000338044",
+    "relu": "0.00313837 +- 0.000388286",
+    "block": "0.00516427 +- 0.000393513",
+    "ops": "0.00824363 +- 0.0005148201275513614"
+   },
+   "obey": true
+  },
+  "BF_pooling_dwconv": {
+   "latency": {
+    "pooling": "0.00505895 +- 0.000294403",
+    "dwconv": "0.00613411 +- 0.00037201",
+    "block": "0.01139611 +- 0.0006041011822286065",
+    "ops": "0.011193060000000001 +- 0.0004744097032196959"
+   },
+   "obey": false
+  },
+  "BF_pooling_reshape": {
+   "latency": {
+    "pooling": "0.00510159 +- 0.00032655",
+    "reshape": "0.0160079 +- 0.000176716",
+    "block": "0.02115855 +- 0.0004302313569476311",
+    "ops": "0.021109489999999998 +- 0.0003712996729812726"
+   },
+   "obey": false
+  },
+  "BF_dense_dense": {
+   "latency": {
+    "dense_1": "0.0248513 +- 0.000906386",
+    "dense_2": "0.0246249 +- 0.000890953",
+    "block": "0.0502878 +- 0.0011456834597780487",
+    "ops": "0.0494762 +- 0.0012709574458670912"
+   },
+   "obey": false
+  },
+  "BF_dense_concat": {
+   "latency": {
+    "dense": "0.0245874 +- 0.000837659",
+    "concat": "0.0804531 +- 0.00438398",
+    "block": "0.1095315 +- 0.0035880003786188486",
+    "ops": "0.1050405 +- 0.004463289508947521"
+   },
+   "obey": false
+  },
+  "BF_dense_add": {
+   "latency": {
+    "dense": "0.0245446 +- 0.000906167",
+    "add": "0.00515361 +- 0.000385467",
+    "block": "0.0254636 +- 0.00102568",
+    "ops": "0.02969821 +- 0.0009847453680916708"
+   },
+   "obey": true
+  },
+  "BF_dense_relu": {
+   "latency": {
+    "dense": "0.024372 +- 0.000984813",
+    "relu": "0.00503688 +- 0.000239273",
+    "block": "0.024237 +- 0.000854059",
+    "ops": "0.029408880000000002 +- 0.0010134634741804955"
+   },
+   "obey": true
+  },
+  "BF_dense_reshape": {
+   "latency": {
+    "dense": "0.0245385 +- 0.000853008",
+    "reshape": "0.0249279 +- 0.00030802",
+    "block": "0.0497557 +- 0.000854099739939663",
+    "ops": "0.0494664 +- 0.0009069172886564684"
+   },
+   "obey": false
+  },
+  "BF_convtrans_pooling": {
+   "latency": {
+    "convtrans": "0.123148 +- 0.000602757",
+    "pooling": "0.00515623 +- 0.000383871",
+    "block": "0.12923091 +- 0.0006884228276321174",
+    "ops": "0.12830423 +- 0.0007146138437575919"
+   },
+   "obey": false
+  },
+  "BF_convtrans_convtrans": {
+   "latency": {
+    "convtrans_1": "0.123144 +- 0.00059031",
+    "convtrans_2": "0.123214 +- 0.000633372",
+    "block": "0.24737399999999998 +- 0.0009688145368918655",
+    "ops": "0.24635800000000002 +- 0.0008658094400524864"
+   },
+   "obey": false
+  },
+  "BF_convtrans_concat": {
+   "latency": {
+    "convtrans": "0.123182 +- 0.000615809",
+    "concat": "0.00550231 +- 0.000523855",
+    "block": "0.12930803000000002 +- 0.0007706232627944734",
+    "ops": "0.12868431 +- 0.0008084830149768145"
+   },
+   "obey": false
+  },
+  "BF_convtrans_conv": {
+   "latency": {
+    "convtrans": "0.123158 +- 0.000627148",
+    "conv": "0.0436494 +- 0.000518309",
+    "block": "0.1673439 +- 0.0007921346331034896",
+    "ops": "0.1668074 +- 0.0008136085258802295"
+   },
+   "obey": false
+  },
+  "BF_convtrans_add": {
+   "latency": {
+    "convtrans": "0.123173 +- 0.000633334",
+    "add": "0.0049953 +- 0.000160723",
+    "block": "0.123828 +- 0.000712515",
+    "ops": "0.1281683 +- 0.0006534093956203876"
+   },
+   "obey": true
+  },
+  "BF_convtrans_relu": {
+   "latency": {
+    "convtrans": "0.123229 +- 0.000623858",
+    "relu": "0.00500887 +- 0.00013384",
+    "block": "0.123235 +- 0.000622105",
+    "ops": "0.12823787 +- 0.0006380532499439212"
+   },
+   "obey": true
+  },
+  "BF_convtrans_dwconv": {
+   "latency": {
+    "convtrans": "0.12316 +- 0.000624764",
+    "dwconv": "0.0108685 +- 0.000383578",
+    "block": "0.1343174 +- 0.0007225331664505098",
+    "ops": "0.1340285 +- 0.0007331180926562923"
+   },
+   "obey": false
+  },
+  "BF_convtrans_reshape": {
+   "latency": {
+    "convtrans": "0.123187 +- 0.00060614",
+    "reshape": "0.00800019 +- 0.000169824",
+    "block": "0.13139309 +- 0.0007902118087506665",
+    "ops": "0.13118719 +- 0.0006294806514707184"
+   },
+   "obey": false
+  },
+  "BF_concat_pooling": {
+   "latency": {
+    "concat": "0.00544406 +- 0.000524792",
+    "pooling": "0.00878036 +- 0.000450214",
+    "block": "0.014310719999999999 +- 0.0006994305464261623",
+    "ops": "0.014224419999999998 +- 0.0006914472424270705"
+   },
+   "obey": false
+  },
+  "BF_concat_dense": {
+   "latency": {
+    "concat": "0.0843655 +- 0.00202977",
+    "dense": "0.0505264 +- 0.00201611",
+    "block": "0.1435292 +- 0.002650163298232771",
+    "ops": "0.1348919 +- 0.0028608854896692393"
+   },
+   "obey": false
+  },
+  "BF_concat_convtrans": {
+   "latency": {
+    "concat": "0.00541139 +- 0.000519419",
+    "convtrans": "0.404577 +- 0.00578874",
+    "block": "0.40915066 +- 0.00515087769447703",
+    "ops": "0.40998839000000004 +- 0.0058119968070501375"
+   },
+   "obey": false
+  },
+  "BF_concat_concat": {
+   "latency": {
+    "concat_1": "0.00543931 +- 0.000511373",
+    "concat_2": "0.00801947 +- 0.000324597",
+    "block": "0.014109899999999998 +- 0.0007889001145658429",
+    "ops": "0.01345878 +- 0.0006056942772868174"
+   },
+   "obey": false
+  },
+  "BF_concat_conv": {
+   "latency": {
+    "concat": "0.00541134 +- 0.000510702",
+    "conv": "0.145109 +- 0.00070727",
+    "block": "0.15137509 +- 0.000962184447409643",
+    "ops": "0.15052033999999997 +- 0.0008723802987825893"
+   },
+   "obey": false
+  },
+  "BF_concat_add": {
+   "latency": {
+    "concat": "0.00541042 +- 0.000523619",
+    "add": "0.00701791 +- 0.00019829",
+    "block": "0.00700348 +- 0.000173022",
+    "ops": "0.01242833 +- 0.0005599069398221458"
+   },
+   "obey": true
+  },
+  "BF_concat_relu": {
+   "latency": {
+    "concat": "0.00579558 +- 0.000437018",
+    "relu": "0.00700077 +- 0.0001365",
+    "block": "0.00588983 +- 0.000336043",
+    "ops": "0.01279635 +- 0.00045783947222143267"
+   },
+   "obey": true
+  },
+  "BF_concat_dwconv": {
+   "latency": {
+    "concat": "0.00557311 +- 0.000515421",
+    "dwconv": "0.0164397 +- 0.000862599",
+    "block": "0.02252003 +- 0.0011098063311447632",
+    "ops": "0.02201281 +- 0.0010048561300216067"
+   },
+   "obey": false
+  },
+  "BF_concat_reshape": {
+   "latency": {
+    "concat": "0.00544831 +- 0.000521227",
+    "reshape": "0.012018 +- 0.000199775",
+    "block": "0.01774575 +- 0.0005549274290031084",
+    "ops": "0.01746631 +- 0.0005582003548494035"
+   },
+   "obey": false
+  },
+  "BF_conv_pooling": {
+   "latency": {
+    "conv": "0.0436832 +- 0.000521865",
+    "pooling": "0.00511251 +- 0.000340626",
+    "block": "0.049763230000000006 +- 0.0005371574666426968",
+    "ops": "0.04879571 +- 0.0006231927070345096"
+   },
+   "obey": false
+  },
+  "BF_conv_convtrans": {
+   "latency": {
+    "conv": "0.0436621 +- 0.000515679",
+    "convtrans": "0.123132 +- 0.000628296",
+    "block": "0.167333 +- 0.0008554384902966432",
+    "ops": "0.1667941 +- 0.00081282267110176"
+   },
+   "obey": false
+  },
+  "BF_conv_concat": {
+   "latency": {
+    "conv": "0.0436326 +- 0.000507907",
+    "concat": "0.00540178 +- 0.000511658",
+    "block": "0.04982702 +- 0.0005887968798235941",
+    "ops": "0.04903438 +- 0.0007209462043821301"
+   },
+   "obey": false
+  },
+  "BF_conv_conv": {
+   "latency": {
+    "conv_1": "0.0437074 +- 0.000503405",
+    "conv_2": "0.0436574 +- 0.000516884",
+    "block": "0.0878567 +- 0.0007299562448174547",
+    "ops": "0.08736479999999999 +- 0.0007215162253761172"
+   },
+   "obey": false
+  },
+  "BF_conv_add": {
+   "latency": {
+    "conv": "0.0436893 +- 0.000528903",
+    "add": "0.00501416 +- 0.000136391",
+    "block": "0.0453509 +- 0.000496553",
+    "ops": "0.048703460000000004 +- 0.0005462059028333545"
+   },
+   "obey": true
+  },
+  "BF_conv_relu": {
+   "latency": {
+    "conv": "0.0436254 +- 0.00053131",
+    "relu": "0.00499777 +- 0.000132624",
+    "block": "0.0439324 +- 0.00044468",
+    "ops": "0.04862317 +- 0.0005476124920744596"
+   },
+   "obey": true
+  },
+  "BF_conv_dwconv": {
+   "latency": {
+    "conv": "0.0436282 +- 0.000528856",
+    "dwconv": "0.0108807 +- 0.000366771",
+    "block": "0.0546827 +- 0.0005934267926349467",
+    "ops": "0.0545089 +- 0.0006435912019108093"
+   },
+   "obey": false
+  },
+  "BF_conv_reshape": {
+   "latency": {
+    "conv": "0.0436532 +- 0.000519697",
+    "reshape": "0.00800178 +- 0.000167079",
+    "block": "0.05173697 +- 0.0006043371006598883",
+    "ops": "0.05165498 +- 0.0005458940960021458"
+   },
+   "obey": false
+  },
+  "BF_add_pooling": {
+   "latency": {
+    "add": "0.00500961 +- 0.00014225",
+    "pooling": "0.00508343 +- 0.000293978",
+    "block": "0.010272900000000001 +- 0.00048713193367300406",
+    "ops": "0.010093040000000001 +- 0.00032658555844372546"
+   },
+   "obey": false
+  },
+  "BF_add_dense": {
+   "latency": {
+    "add": "0.00507172 +- 0.000300373",
+    "dense": "0.0244215 +- 0.000864502",
+    "block": "0.029971010000000003 +- 0.0009793448909658946",
+    "ops": "0.02949322 +- 0.0009151981463776026"
+   },
+   "obey": false
+  },
+  "BF_add_convtrans": {
+   "latency": {
+    "add": "0.00500467 +- 0.000147258",
+    "convtrans": "0.123141 +- 0.000617588",
+    "block": "0.12836417 +- 0.0006771971006450042",
+    "ops": "0.12814567 +- 0.0006349014540131404"
+   },
+   "obey": false
+  },
+  "BF_add_concat": {
+   "latency": {
+    "add": "0.00500241 +- 0.000140413",
+    "concat": "0.00545847 +- 0.000520087",
+    "block": "0.01099076 +- 0.00023884178143700066",
+    "ops": "0.010460879999999999 +- 0.0005387079896734409"
+   },
+   "obey": false
+  },
+  "BF_add_conv": {
+   "latency": {
+    "add": "0.00501048 +- 0.000145612",
+    "conv": "0.043651 +- 0.00053472",
+    "block": "0.04854047 +- 0.0005625849267666171",
+    "ops": "0.04866148 +- 0.0005541916030977012"
+   },
+   "obey": false
+  },
+  "BF_add_add": {
+   "latency": {
+    "add_1": "0.0050004 +- 0.000140548",
+    "add_2": "0.00499632 +- 0.000143006",
+    "block": "0.00587968 +- 0.00040072",
+    "ops": "0.00999672 +- 0.00020051048935155487"
+   },
+   "obey": true
+  },
+  "BF_add_relu": {
+   "latency": {
+    "add": "0.00500674 +- 0.000137519",
+    "relu": "0.00500133 +- 0.000132795",
+    "block": "0.00499715 +- 0.000142207",
+    "ops": "0.01000807 +- 0.00019117004834963034"
+   },
+   "obey": true
+  },
+  "BF_add_dwconv": {
+   "latency": {
+    "add": "0.00499906 +- 0.000136903",
+    "dwconv": "0.0108789 +- 0.000353781",
+    "block": "0.01599437 +- 0.0002550740937100434",
+    "ops": "0.01587796 +- 0.0003793460522662652"
+   },
+   "obey": false
+  },
+  "BF_add_reshape": {
+   "latency": {
+    "add": "0.00500962 +- 0.000141982",
+    "reshape": "0.00799501 +- 0.000171203",
+    "block": "0.013011769999999999 +- 0.00022114984205284888",
+    "ops": "0.01300463 +- 0.00022241707563269508"
+   },
+   "obey": false
+  },
+  "BF_relu_pooling": {
+   "latency": {
+    "relu": "0.00500302 +- 0.000132311",
+    "pooling": "0.00508761 +- 0.000316002",
+    "block": "0.01013795 +- 0.00039791231820590823",
+    "ops": "0.01009063 +- 0.0003425835149638698"
+   },
+   "obey": false
+  },
+  "BF_relu_dense": {
+   "latency": {
+    "relu": "0.00500257 +- 0.000184527",
+    "dense": "0.0242465 +- 0.000879434",
+    "block": "0.02926351 +- 0.0008648418951854726",
+    "ops": "0.029249070000000002 +- 0.0008985846504837483"
+   },
+   "obey": false
+  },
+  "BF_relu_convtrans": {
+   "latency": {
+    "relu": "0.00500699 +- 0.000140648",
+    "convtrans": "0.123201 +- 0.00061019",
+    "block": "0.12815026999999998 +- 0.0006484082899423789",
+    "ops": "0.12820799 +- 0.0006261898242577885"
+   },
+   "obey": false
+  },
+  "BF_relu_concat": {
+   "latency": {
+    "relu": "0.0049987 +- 0.000131135",
+    "concat": "0.00551427 +- 0.000517141",
+    "block": "0.010183629999999999 +- 0.0004210801642454795",
+    "ops": "0.01051297 +- 0.0005335083899115365"
+   },
+   "obey": false
+  },
+  "BF_relu_conv": {
+   "latency": {
+    "relu": "0.00500086 +- 0.000144196",
+    "conv": "0.0436385 +- 0.000524813",
+    "block": "0.04874789 +- 0.000499098166748787",
+    "ops": "0.04863936 +- 0.0005442620429397956"
+   },
+   "obey": false
+  },
+  "BF_relu_add": {
+   "latency": {
+    "relu": "0.00500077 +- 0.000145203",
+    "add": "0.00499902 +- 0.000145347",
+    "block": "0.00500272 +- 0.000137891",
+    "ops": "0.00999979 +- 0.0002054499005061818"
+   },
+   "obey": true
+  },
+  "BF_relu_relu": {
+   "latency": {
+    "relu_1": "0.00499975 +- 0.000137148",
+    "relu_2": "0.00499752 +- 0.000146408",
+    "block": "0.0050061 +- 0.000135588",
+    "ops": "0.009997269999999999 +- 0.00020061125683271117"
+   },
+   "obey": true
+  },
+  "BF_relu_dwconv": {
+   "latency": {
+    "relu": "0.00500203 +- 0.000142735",
+    "dwconv": "0.0108665 +- 0.00037116",
+    "block": "0.015849719999999998 +- 0.000418786789205438",
+    "ops": "0.01586853 +- 0.0003976594344725144"
+   },
+   "obey": false
+  },
+  "BF_relu_reshape": {
+   "latency": {
+    "relu": "0.00499816 +- 0.000135118",
+    "reshape": "0.00800168 +- 0.000170518",
+    "block": "0.01299387 +- 0.00021684074637622882",
+    "ops": "0.01299984 +- 0.00021756208825988042"
+   },
+   "obey": false
+  },
+  "BF_dwconv_pooling": {
+   "latency": {
+    "dwconv": "0.0108575 +- 0.00037334",
+    "pooling": "0.00509818 +- 0.000323136",
+    "block": "0.016139439999999998 +- 0.000536163170056467",
+    "ops": "0.01595568 +- 0.0004937607012470717"
+   },
+   "obey": false
+  },
+  "BF_dwconv_convtrans": {
+   "latency": {
+    "dwconv": "0.0110189 +- 0.000265511",
+    "convtrans": "0.123212 +- 0.000618252",
+    "block": "0.1340673 +- 0.0007118831562131528",
+    "ops": "0.13423090000000001 +- 0.0006728533470415377"
+   },
+   "obey": false
+  },
+  "BF_dwconv_concat": {
+   "latency": {
+    "dwconv": "0.0108675 +- 0.00035683",
+    "concat": "0.00545375 +- 0.000520546",
+    "block": "0.01610423 +- 0.0005516837465160633",
+    "ops": "0.016321250000000002 +- 0.0006311067952541788"
+   },
+   "obey": false
+  },
+  "BF_dwconv_conv": {
+   "latency": {
+    "dwconv": "0.0108792 +- 0.000359171",
+    "conv": "0.0436777 +- 0.000525727",
+    "block": "0.054527400000000004 +- 0.0006290891967161731",
+    "ops": "0.0545569 +- 0.0006367045513972709"
+   },
+   "obey": false
+  },
+  "BF_dwconv_add": {
+   "latency": {
+    "dwconv": "0.010869 +- 0.000359517",
+    "add": "0.00500023 +- 0.000143863",
+    "block": "0.012286 +- 0.000500271",
+    "ops": "0.015869229999999998 +- 0.0003872325348650343"
+   },
+   "obey": true
+  },
+  "BF_dwconv_relu": {
+   "latency": {
+    "dwconv": "0.0109065 +- 0.000330342",
+    "relu": "0.0049961 +- 0.000135195",
+    "block": "0.0110057 +- 0.000157802",
+    "ops": "0.0159026 +- 0.00035693630382604687"
+   },
+   "obey": true
+  },
+  "MON": {
+   "latency": {},
+   "obey": null
+  },
+  "RT": {
+   "obey": false
+  },
+  "BF_dwconv_dwconv": {
+   "latency": {
+    "dwconv_1": "0.0108816 +- 0.000343614",
+    "dwconv_2": "0.0108651 +- 0.000379447",
+    "block": "0.021771600000000002 +- 0.0004903100730813105",
+    "ops": "0.0217467 +- 0.0005119087875832959"
+   },
+   "obey": false
+  },
+  "BF_dwconv_reshape": {
+   "latency": {
+    "dwconv": "0.0108739 +- 0.000357921",
+    "reshape": "0.00800842 +- 0.000166909",
+    "block": "0.01889845 +- 0.0003814191652237732",
+    "ops": "0.01888232 +- 0.00039492538095442785"
+   },
+   "obey": false
+  },
+  "BF_reshape_pooling": {
+   "latency": {
+    "reshape": "0.00799864 +- 0.000175056",
+    "pooling": "0.00502314 +- 0.000202911",
+    "block": "0.01305359 +- 0.0003216878493897461",
+    "ops": "0.01302178 +- 0.0002679878300539038"
+   },
+   "obey": false
+  },
+  "BF_reshape_dense": {
+   "latency": {
+    "reshape": "0.024968 +- 0.000223766",
+    "dense": "0 +- 0",
+    "block": "0.09324928 +- 0.0008030103657599944",
+    "ops": "0.024968 +- 0.000223766"
+   },
+   "obey": false
+  },
+  "BF_reshape_convtrans": {
+   "latency": {
+    "reshape": "0.00799118 +- 0.000168458",
+    "convtrans": "0.197468 +- 0.00105919",
+    "block": "0.20527044 +- 0.0011521050967615758",
+    "ops": "0.20545918000000002 +- 0.0010725024726610192"
+   },
+   "obey": false
+  },
+  "BF_reshape_concat": {
+   "latency": {
+    "reshape": "0.00800374 +- 0.000169287",
+    "concat": "0.00799201 +- 0.000195183",
+    "block": "0.01600312 +- 0.00026718165490355057",
+    "ops": "0.015995750000000003 +- 0.0002583689065232115"
+   },
+   "obey": false
+  },
+  "BF_reshape_conv": {
+   "latency": {
+    "reshape": "0.00799733 +- 0.000172346",
+    "conv": "0.0711989 +- 0.000561079",
+    "block": "0.07908414 +- 0.0005806590306901289",
+    "ops": "0.07919622999999999 +- 0.0005869521172608546"
+   },
+   "obey": false
+  },
+  "BF_reshape_add": {
+   "latency": {
+    "reshape": "0.00800295 +- 0.000180739",
+    "add": "0.00500133 +- 0.000145174",
+    "block": "0.0080051 +- 0.000179223",
+    "ops": "0.01300428 +- 0.00023182337327586275"
+   },
+   "obey": true
+  },
+  "BF_reshape_relu": {
+   "latency": {
+    "reshape": "0.00799506 +- 0.000165155",
+    "relu": "0.004999 +- 0.000132581",
+    "block": "0.012994970000000002 +- 0.0002163020089828109",
+    "ops": "0.01299406 +- 0.0002117873829716964"
+   },
+   "obey": false
+  },
+  "BF_reshape_dwconv": {
+   "latency": {
+    "reshape": "0.00801043 +- 0.000167245",
+    "dwconv": "0.0104327 +- 0.000517514",
+    "block": "0.01839616 +- 0.0005324881596298644",
+    "ops": "0.018443130000000002 +- 0.0005438672910012147"
+   },
+   "obey": false
+  },
+  "BF_reshape_reshape": {
+   "latency": {
+    "reshape_1": "0.00799734 +- 0.000163646",
+    "reshape_2": "0.00700027 +- 0.000132258",
+    "block": "0.00704122 +- 0.000225084",
+    "ops": "0.014997610000000001 +- 0.0002104095812457218"
+   },
+   "obey": true
+  }
+}
--- a/kerneldetection/rulelib/rules/rule_vpu.json
+++ b/kerneldetection/rulelib/rules/rule_vpu.json
@ -0,0 +1,713 @@
+{
+  "RBC": {
+   "latency": {},
+   "obey": null
+  },
+  "CBC": {
+   "latency": {},
+   "obey": null
+  },
+  "BF_conv_bn": {
+    "obey": true
+  },
+  "BF_dwconv_bn": {
+    "obey": true
+  },
+  "BF_conv_bn_relu": {
+   "obey": true
+  },
+  "BF_dwconv_bn_relu": {
+   "obey": true
+  },
+  "BF_se_relu": {
+   "latency": {
+    "se": "0.236 +- 0.0",
+    "relu": "0.029 +- 0",
+    "block": "0.23399999999999999 +- 0.0",
+    "ops": "0.265 +- 0.0"
+   },
+   "obey": true
+  },
+  "BF_conv_se": {
+   "latency": {
+    "conv": "0.072 +- 0",
+    "se": "0.236 +- 0.0",
+    "block": "0.30000000000000004 +- 0.0",
+    "ops": "0.308 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_conv_hswish": {
+   "latency": {
+    "conv": "0.069 +- 0",
+    "hswish": "0.08499999999999999 +- 0.0",
+    "block": "0.11099999999999999 +- 0.0",
+    "ops": "0.154 +- 0.0"
+   },
+   "obey": true
+  },
+  "BF_pooling_pooling": {
+   "latency": {
+    "pooling_1": "0.013 +- 0",
+    "pooling_2": "0.01 +- 0",
+    "block": "0.019 +- 0.0",
+    "ops": "0.023 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_pooling_convtrans": {
+   "latency": {
+    "pooling": "0.014 +- 0",
+    "convtrans": "0.08 +- 0",
+    "block": "0.098 +- 0.0",
+    "ops": "0.094 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_pooling_concat": {
+   "latency": {
+    "pooling": "0.014 +- 0",
+    "concat": "0.0 +- 0",
+    "block": "0.035 +- 0.0",
+    "ops": "0.014 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_pooling_conv": {
+   "latency": {
+    "pooling": "0.013 +- 0",
+    "conv": "0.028 +- 0",
+    "block": "0.038 +- 0.0",
+    "ops": "0.041 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_pooling_add": {
+   "latency": {
+    "pooling": "0.014 +- 0",
+    "add": "0.096 +- 0",
+    "block": "0.10200000000000001 +- 0.0",
+    "ops": "0.11 +- 0.0"
+   },
+   "obey": true
+  },
+  "BF_pooling_relu": {
+   "latency": {
+    "pooling": "0.013 +- 0",
+    "relu": "0.026 +- 0",
+    "block": "0.016 +- 0.0",
+    "ops": "0.039 +- 0.0"
+   },
+   "obey": true
+  },
+  "BF_pooling_dwconv": {
+   "latency": {
+    "pooling": "0.013 +- 0",
+    "dwconv": "0.042 +- 0",
+    "block": "0.056 +- 0.0",
+    "ops": "0.055 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_pooling_reshape": {
+   "latency": {
+    "pooling": "0.013 +- 0",
+    "reshape": "0.117 +- 0.0",
+    "block": "0.152 +- 0.0",
+    "ops": "0.13 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_dense_dense": {
+   "latency": {
+    "dense_1": "0.118 +- 0",
+    "dense_2": "0.125 +- 0",
+    "block": "0.218 +- 0.0",
+    "ops": "0.243 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_dense_concat": {
+   "latency": {
+    "dense": "0.127 +- 0",
+    "concat": "0.0 +- 0",
+    "block": "0.134 +- 0.0",
+    "ops": "0.127 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_dense_add": {
+   "latency": {
+    "dense": "0.119 +- 0",
+    "add": "0.093 +- 0",
+    "block": "0.188 +- 0.0",
+    "ops": "0.212 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_dense_relu": {
+   "latency": {
+    "dense": "0.119 +- 0",
+    "relu": "0.039 +- 0",
+    "block": "0.158 +- 0.0",
+    "ops": "0.158 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_dense_reshape": {
+   "latency": {
+    "dense": "0.121 +- 0",
+    "reshape": "0.046 +- 0.0",
+    "block": "0.17099999999999999 +- 0.0",
+    "ops": "0.16699999999999998 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_convtrans_pooling": {
+   "latency": {
+    "convtrans": "0.184 +- 0",
+    "pooling": "0.014 +- 0",
+    "block": "0.199 +- 0.0",
+    "ops": "0.198 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_convtrans_convtrans": {
+   "latency": {
+    "convtrans_1": "0.185 +- 0",
+    "convtrans_2": "0.185 +- 0",
+    "block": "0.364 +- 0.0",
+    "ops": "0.37 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_convtrans_concat": {
+   "latency": {
+    "convtrans": "0.187 +- 0",
+    "concat": "0.0 +- 0",
+    "block": "0.184 +- 0.0",
+    "ops": "0.187 +- 0.0"
+   },
+   "obey": true
+  },
+  "BF_convtrans_conv": {
+   "latency": {
+    "convtrans": "0.185 +- 0",
+    "conv": "0.069 +- 0",
+    "block": "0.253 +- 0.0",
+    "ops": "0.254 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_convtrans_add": {
+   "latency": {
+    "convtrans": "0.185 +- 0",
+    "add": "0.099 +- 0",
+    "block": "0.249 +- 0.0",
+    "ops": "0.28400000000000003 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_convtrans_relu": {
+   "latency": {
+    "convtrans": "0.185 +- 0",
+    "relu": "0.031 +- 0",
+    "block": "0.193 +- 0.0",
+    "ops": "0.216 +- 0.0"
+   },
+   "obey": true
+  },
+  "BF_convtrans_dwconv": {
+   "latency": {
+    "convtrans": "0.186 +- 0",
+    "dwconv": "0.114 +- 0",
+    "block": "0.28600000000000003 +- 0.0",
+    "ops": "0.3 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_convtrans_reshape": {
+   "latency": {
+    "convtrans": "0.183 +- 0",
+    "reshape": "0.16899999999999998 +- 0.0",
+    "block": "0.441 +- 0.0",
+    "ops": "0.352 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_concat_pooling": {
+   "latency": {
+    "concat": "0.0 +- 0",
+    "pooling": "0.019 +- 0",
+    "block": "0.019 +- 0.0",
+    "ops": "0.019 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_concat_dense": {
+   "latency": {
+    "concat": "0 +- 0",
+    "dense": "0.345 +- 0",
+    "block": "0.336 +- 0.0",
+    "ops": "0.345 +- 0.0"
+   },
+   "obey": true
+  },
+  "BF_concat_convtrans": {
+   "latency": {
+    "concat": "0.0 +- 0",
+    "convtrans": "0.301 +- 0",
+    "block": "0.301 +- 0.0",
+    "ops": "0.301 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_concat_concat": {
+   "latency": {
+    "concat_1": "0.0 +- 0",
+    "concat_2": "0.0 +- 0",
+    "block": "0.0 +- 0.0",
+    "ops": "0.0 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_concat_conv": {
+   "latency": {
+    "concat": "0.0 +- 0",
+    "conv": "0.092 +- 0",
+    "block": "0.092 +- 0.0",
+    "ops": "0.092 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_concat_add": {
+   "latency": {
+    "concat": "0.0 +- 0",
+    "add": "0.103 +- 0",
+    "block": "0.104 +- 0.0",
+    "ops": "0.103 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_concat_relu": {
+   "latency": {
+    "concat": "0.0 +- 0",
+    "relu": "0.035 +- 0",
+    "block": "0.057 +- 0.0",
+    "ops": "0.035 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_concat_dwconv": {
+   "latency": {
+    "concat": "0.0 +- 0",
+    "dwconv": "0.137 +- 0",
+    "block": "0.135 +- 0.0",
+    "ops": "0.137 +- 0.0"
+   },
+   "obey": true
+  },
+  "BF_concat_reshape": {
+   "latency": {
+    "concat": "0.0 +- 0",
+    "reshape": "0.23099999999999998 +- 0.0",
+    "block": "0.23199999999999998 +- 0.0",
+    "ops": "0.23099999999999998 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_conv_pooling": {
+   "latency": {
+    "conv": "0.072 +- 0",
+    "pooling": "0.014 +- 0",
+    "block": "0.078 +- 0.0",
+    "ops": "0.086 +- 0.0"
+   },
+   "obey": true
+  },
+  "BF_conv_convtrans": {
+   "latency": {
+    "conv": "0.069 +- 0",
+    "convtrans": "0.185 +- 0",
+    "block": "0.258 +- 0.0",
+    "ops": "0.254 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_conv_concat": {
+   "latency": {
+    "conv": "0.069 +- 0",
+    "concat": "0.0 +- 0",
+    "block": "0.069 +- 0.0",
+    "ops": "0.069 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_conv_conv": {
+   "latency": {
+    "conv_1": "0.072 +- 0",
+    "conv_2": "0.068 +- 0",
+    "block": "0.136 +- 0.0",
+    "ops": "0.14 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_conv_add": {
+   "latency": {
+    "conv": "0.069 +- 0",
+    "add": "0.1 +- 0",
+    "block": "0.146 +- 0.0",
+    "ops": "0.169 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_conv_relu": {
+   "latency": {
+    "conv": "0.073 +- 0",
+    "relu": "0.029 +- 0",
+    "block": "0.074 +- 0.0",
+    "ops": "0.102 +- 0.0"
+   },
+   "obey": true
+  },
+  "BF_conv_dwconv": {
+   "latency": {
+    "conv": "0.071 +- 0",
+    "dwconv": "0.115 +- 0",
+    "block": "0.177 +- 0.0",
+    "ops": "0.186 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_conv_reshape": {
+   "latency": {
+    "conv": "0.069 +- 0",
+    "reshape": "0.17099999999999999 +- 0.0",
+    "block": "0.336 +- 0.0",
+    "ops": "0.24 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_add_pooling": {
+   "latency": {
+    "add": "0.098 +- 0",
+    "pooling": "0.013 +- 0",
+    "block": "0.088 +- 0.0",
+    "ops": "0.111 +- 0.0"
+   },
+   "obey": true
+  },
+  "BF_add_dense": {
+   "latency": {
+    "add": "0.092 +- 0",
+    "dense": "0.122 +- 0",
+    "block": "0.215 +- 0.0",
+    "ops": "0.214 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_add_convtrans": {
+   "latency": {
+    "add": "0.096 +- 0",
+    "convtrans": "0.184 +- 0",
+    "block": "0.271 +- 0.0",
+    "ops": "0.28 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_add_concat": {
+   "latency": {
+    "add": "0.096 +- 0",
+    "concat": "0.0 +- 0",
+    "block": "0.102 +- 0.0",
+    "ops": "0.096 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_add_conv": {
+   "latency": {
+    "add": "0.096 +- 0",
+    "conv": "0.069 +- 0",
+    "block": "0.14400000000000002 +- 0.0",
+    "ops": "0.165 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_add_add": {
+   "latency": {
+    "add_1": "0.097 +- 0",
+    "add_2": "0.097 +- 0",
+    "block": "0.16899999999999998 +- 0.0",
+    "ops": "0.194 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_add_relu": {
+   "latency": {
+    "add": "0.099 +- 0",
+    "relu": "0.03 +- 0",
+    "block": "0.1 +- 0.0",
+    "ops": "0.129 +- 0.0"
+   },
+   "obey": true
+  },
+  "BF_add_dwconv": {
+   "latency": {
+    "add": "0.096 +- 0",
+    "dwconv": "0.114 +- 0",
+    "block": "0.216 +- 0.0",
+    "ops": "0.21000000000000002 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_add_reshape": {
+   "latency": {
+    "add": "0.099 +- 0",
+    "reshape": "0.172 +- 0.0",
+    "block": "0.253 +- 0.0",
+    "ops": "0.271 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_relu_pooling": {
+   "latency": {
+    "relu": "0.031 +- 0",
+    "pooling": "0.014 +- 0",
+    "block": "0.046 +- 0.0",
+    "ops": "0.045 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_relu_dense": {
+   "latency": {
+    "relu": "0.039 +- 0",
+    "dense": "0.121 +- 0",
+    "block": "0.158 +- 0.0",
+    "ops": "0.16 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_relu_convtrans": {
+   "latency": {
+    "relu": "0.03 +- 0",
+    "convtrans": "0.184 +- 0",
+    "block": "0.215 +- 0.0",
+    "ops": "0.214 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_relu_concat": {
+   "latency": {
+    "relu": "0.03 +- 0",
+    "concat": "0.0 +- 0",
+    "block": "0.03 +- 0.0",
+    "ops": "0.03 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_relu_conv": {
+   "latency": {
+    "relu": "0.03 +- 0",
+    "conv": "0.069 +- 0",
+    "block": "0.1 +- 0.0",
+    "ops": "0.099 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_relu_add": {
+   "latency": {
+    "relu": "0.031 +- 0",
+    "add": "0.096 +- 0",
+    "block": "0.13 +- 0.0",
+    "ops": "0.127 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_relu_relu": {
+   "latency": {
+    "relu_1": "0.03 +- 0",
+    "relu_2": "0.03 +- 0",
+    "block": "0.054 +- 0.0",
+    "ops": "0.06 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_relu_dwconv": {
+   "latency": {
+    "relu": "0.031 +- 0",
+    "dwconv": "0.115 +- 0",
+    "block": "0.14500000000000002 +- 0.0",
+    "ops": "0.14600000000000002 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_relu_reshape": {
+   "latency": {
+    "relu": "0.031 +- 0",
+    "reshape": "0.16699999999999998 +- 0.0",
+    "block": "0.201 +- 0.0",
+    "ops": "0.19799999999999998 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_dwconv_pooling": {
+   "latency": {
+    "dwconv": "0.113 +- 0",
+    "pooling": "0.014 +- 0",
+    "block": "0.125 +- 0.0",
+    "ops": "0.127 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_dwconv_convtrans": {
+   "latency": {
+    "dwconv": "0.114 +- 0",
+    "convtrans": "0.185 +- 0",
+    "block": "0.307 +- 0.0",
+    "ops": "0.299 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_dwconv_concat": {
+   "latency": {
+    "dwconv": "0.115 +- 0",
+    "concat": "0.0 +- 0",
+    "block": "0.107 +- 0.0",
+    "ops": "0.115 +- 0.0"
+   },
+   "obey": true
+  },
+  "BF_dwconv_conv": {
+   "latency": {
+    "dwconv": "0.115 +- 0",
+    "conv": "0.071 +- 0",
+    "block": "0.18 +- 0.0",
+    "ops": "0.186 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_dwconv_add": {
+   "latency": {
+    "dwconv": "0.114 +- 0",
+    "add": "0.097 +- 0",
+    "block": "0.191 +- 0.0",
+    "ops": "0.21100000000000002 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_dwconv_relu": {
+   "latency": {
+    "dwconv": "0.114 +- 0",
+    "relu": "0.03 +- 0",
+    "block": "0.125 +- 0.0",
+    "ops": "0.14400000000000002 +- 0.0"
+   },
+   "obey": true
+  },
+  "MON": {
+   "latency": {},
+   "obey": null
+  },
+  "RT": {
+   "obey": false
+  },
+  "BF_dwconv_dwconv": {
+   "latency": {
+    "dwconv_1": "0.114 +- 0",
+    "dwconv_2": "0.115 +- 0",
+    "block": "0.221 +- 0.0",
+    "ops": "0.229 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_dwconv_reshape": {
+   "latency": {
+    "dwconv": "0.112 +- 0",
+    "reshape": "0.16999999999999998 +- 0.0",
+    "block": "0.28500000000000003 +- 0.0",
+    "ops": "0.282 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_reshape_pooling": {
+   "latency": {
+    "reshape": "0.16999999999999998 +- 0.0",
+    "pooling": "0.016 +- 0",
+    "block": "0.252 +- 0.0",
+    "ops": "0.186 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_reshape_dense": {
+   "latency": {
+    "reshape": "0.046 +- 0.0",
+    "dense": "0.14600000000000002 +- 0.0",
+    "block": "0.093 +- 0.0",
+    "ops": "0.192 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_reshape_convtrans": {
+   "latency": {
+    "reshape": "0.17099999999999999 +- 0.0",
+    "convtrans": "0.183 +- 0",
+    "block": "0.34099999999999997 +- 0.0",
+    "ops": "0.354 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_reshape_concat": {
+   "latency": {
+    "reshape": "0.16799999999999998 +- 0.0",
+    "concat": "0.0 +- 0",
+    "block": "0.16899999999999998 +- 0.0",
+    "ops": "0.16799999999999998 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_reshape_conv": {
+   "latency": {
+    "reshape": "0.16899999999999998 +- 0.0",
+    "conv": "0.061 +- 0",
+    "block": "0.23099999999999998 +- 0.0",
+    "ops": "0.22999999999999998 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_reshape_add": {
+   "latency": {
+    "reshape": "0.178 +- 0.0",
+    "add": "0.097 +- 0",
+    "block": "0.252 +- 0.0",
+    "ops": "0.275 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_reshape_relu": {
+   "latency": {
+    "reshape": "0.16999999999999998 +- 0.0",
+    "relu": "0.031 +- 0",
+    "block": "0.20299999999999999 +- 0.0",
+    "ops": "0.20099999999999998 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_reshape_dwconv": {
+   "latency": {
+    "reshape": "0.16799999999999998 +- 0.0",
+    "dwconv": "0.115 +- 0",
+    "block": "0.27999999999999997 +- 0.0",
+    "ops": "0.283 +- 0.0"
+   },
+   "obey": false
+  },
+  "BF_reshape_reshape": {
+   "latency": {
+    "reshape_1": "0.16999999999999998 +- 0.0",
+    "reshape_2": "0.172 +- 0.0",
+    "block": "0.172 +- 0.0",
+    "ops": "0.34199999999999997 +- 0.0"
+   },
+   "obey": true
+  }
+}
--- a/kerneldetection/shape_fetcher.py
+++ b/kerneldetection/shape_fetcher.py
@ -0,0 +1,50 @@
+import tensorflow as tf
+import numpy as np
+from typing import List
+
+
+
+class ShapeFetcher:
+
+    def get_nodes_with_input_tensor(self, tensor):
+        return list(filter(
+            lambda op: (tensor in op.inputs) and (op.type not in ["Shape"]),
+            self.graph.get_operations()
+        ))
+
+    def __init__(self, input_graph):
+        tf.compat.v1.disable_eager_execution()
+
+        graph = tf.Graph()
+
+        with graph.as_default():
+            tf.import_graph_def(graph_def=input_graph, name="")
+
+        ops = graph.get_operations()
+        placeholders = list(filter(lambda op: op.type == "Placeholder", ops))
+        assert len(placeholders) == 1
+        graph_input_tensor = placeholders[0].outputs[0]
+        graph_input_tensor_shape = graph_input_tensor.get_shape().as_list()
+        assert graph_input_tensor_shape[1] == graph_input_tensor_shape[2]
+        assert graph_input_tensor_shape[3] == 3
+        self.imsize = graph_input_tensor_shape[1]
+        self.graph: tf.Graph = graph
+
+        tensors_to_fetch: List[tf.Tensor] = []
+        for op in filter(lambda op: op.type not in [], ops):
+            tensors_to_fetch.extend(op.inputs)
+            tensors_to_fetch.extend(op.outputs)
+
+        shape_tensors = dict()
+        for tensor in tensors_to_fetch:
+            shape_tensors[tensor.name] = tf.compat.v1.shape(tensor)
+        self.shape_results = dict()
+
+        with tf.compat.v1.Session(graph=graph) as sess:
+            fake_input = np.random.randn(1, self.imsize, self.imsize, 3)
+            for tensor_name, shape_tensor in shape_tensors.items():
+                self.shape_results[tensor_name] = sess.run(
+                    shape_tensor, feed_dict={
+                        graph_input_tensor: fake_input
+                    }
+                )
--- a/kerneldetection/utils/init.py
+++ b/kerneldetection/utils/init.py
--- a/kerneldetection/utils/fusio_aware_graph.py
+++ b/kerneldetection/utils/fusio_aware_graph.py
@ -0,0 +1,86 @@
+from grapher_tool import Grapher
+from .union_find import UF
+import networkx as nx
+
+
+class FusionAwareGraph:
+    def __init__(self, graph: Grapher):
+        self._graph = graph
+        self._dag = list(nx.topological_sort(graph.get_networkx_graph()))
+        self._uf = UF(len(self._dag))
+
+        reverse = {}
+        for index, name in enumerate(self._dag):
+            reverse[name] = index
+        outbounds = []
+        inbounds = []
+        for index, name in enumerate(self._dag):
+            outbounds.append({reverse[outbound] for outbound in self._graph.get_node_outbounds(name)})
+            inbounds.append({reverse[inbound] for inbound in self._graph.get_node_inbounds(name)})
+
+        self._outbounds = outbounds
+        self._inbounds = inbounds
+        self._ready = [not inbounds[i] for i in range(0, len(self))]
+        self._types = [graph.get_node_type(name) for name in self._dag]
+
+    @property
+    def nodes(self):
+        return self._dag
+
+    def __len__(self):
+        return len(self._dag)
+
+    def __getitem__(self, key):
+        return self._dag[key]
+
+    def fuse(self, node, outnode, update=False):
+        """
+        node should be root, outnode should be an unfused single node
+        """
+        self._uf.union(node, outnode)
+        if not update:
+            self._outbounds[node] = self._outbounds[outnode]
+        else:
+            self._outbounds[node].update(self._outbounds[outnode])
+
+
+    def mark_ready(self, node):
+        self._ready[node] = True
+
+    def is_ready(self, node):
+        for inbound in self._inbounds[node]:
+            if not self.is_ready[inbound]:
+                return False
+        return True
+
+    def is_visited(self, node):
+        return self._ready[node]
+
+    def get_outbounds(self, node):
+        return self._outbounds[node]
+
+    def get_inbounds(self, node):
+        return self._inbounds[node]
+
+    def get_type(self, node):
+        return self._types[node]
+
+    def get_basicblocks(self):
+        bbs = []
+
+        for _ in range(0, len(self)):
+            bbs.append([])
+
+        for i in range(0, len(self)):
+            root = self._uf.find(i)
+            for node in self._graph.get_primitive_nodes(self[i]):
+                bbs[root].append(node)
+
+        bbs = [bb for bb in bbs if bb]
+        return bbs
+
+    def is_fused(self, node):
+        return self._uf.find(node) != node
+
+    def is_connected(self, p, q):
+        return self._uf.connected(p, q)
--- a/kerneldetection/utils/union_find.py
+++ b/kerneldetection/utils/union_find.py
@ -0,0 +1,20 @@
+class UF:
+    """
+    UnionFind implemented with compression optimization
+    """
+    def __init__(self, N):
+        self._parent = list(range(0, N))
+
+    def find(self, p):
+        while p != self._parent[p]:
+            p = self._parent[p] = self._parent[self._parent[p]]
+        return p
+
+    def union(self, p, q):
+        p = self.find(p)
+        q = self.find(q)
+        self._parent[q] = p
+
+    def connected(self, p, q):
+        return self.find(p) == self.find(q)
+    
--- a/Latencypredictor/get_kernel_predictor.py
+++ b/Latencypredictor/get_kernel_predictor.py
--- a/Latencypredictor/predictors/extract_feature.py
+++ b/Latencypredictor/predictors/extract_feature.py
--- a/Latencypredictor/predictors/kernel_predictor.py
+++ b/Latencypredictor/predictors/kernel_predictor.py
--- a/Latencypredictor/predictors/predict_by_kernel.py
+++ b/Latencypredictor/predictors/predict_by_kernel.py
@ -1,5 +1,5 @@
 import sys
-sys.path.append("Latencypredictor")
+sys.path.append("prediction")
 from predictors.utils import*
 from predictors.extract_feature import*

--- a/Latencypredictor/predictors/predict_by_op.py
+++ b/Latencypredictor/predictors/predict_by_op.py
--- a/Latencypredictor/predictors/utils.py
+++ b/Latencypredictor/predictors/utils.py
--- a/results/alexnet_0_result.xlsx
+++ b/results/alexnet_0_result.xlsx
--- a/results/cpu_alexnet.json
+++ b/results/cpu_alexnet.json
@ -0,0 +1,258 @@
+{
+  "alexnet_0": [
+    {
+      "op": "conv-relu",
+      "ks": [
+        11,
+        11
+      ],
+      "cin": 3,
+      "cout": 64,
+      "strides": [
+        1,
+        4,
+        4,
+        1
+      ],
+      "input_tensors": [
+        [
+          1,
+          224,
+          224,
+          3
+        ]
+      ],
+      "inputh": 224,
+      "inputw": 224
+    },
+    {
+      "op": "maxpool",
+      "ks": [
+        1,
+        3,
+        3,
+        1
+      ],
+      "cin": 64,
+      "cout": 64,
+      "strides": [
+        1,
+        2,
+        2,
+        1
+      ],
+      "input_tensors": [
+        [
+          1,
+          54,
+          54,
+          64
+        ]
+      ],
+      "inputh": 54,
+      "inputw": 54
+    },
+    {
+      "op": "conv-relu",
+      "ks": [
+        5,
+        5
+      ],
+      "cin": 64,
+      "cout": 192,
+      "strides": [
+        1,
+        1,
+        1,
+        1
+      ],
+      "input_tensors": [
+        [
+          1,
+          27,
+          27,
+          64
+        ]
+      ],
+      "inputh": 27,
+      "inputw": 27
+    },
+    {
+      "op": "maxpool",
+      "ks": [
+        1,
+        3,
+        3,
+        1
+      ],
+      "cin": 192,
+      "cout": 192,
+      "strides": [
+        1,
+        2,
+        2,
+        1
+      ],
+      "input_tensors": [
+        [
+          1,
+          27,
+          27,
+          192
+        ]
+      ],
+      "inputh": 27,
+      "inputw": 27
+    },
+    {
+      "op": "conv-relu",
+      "ks": [
+        3,
+        3
+      ],
+      "cin": 192,
+      "cout": 384,
+      "strides": [
+        1,
+        1,
+        1,
+        1
+      ],
+      "input_tensors": [
+        [
+          1,
+          13,
+          13,
+          192
+        ]
+      ],
+      "inputh": 13,
+      "inputw": 13
+    },
+    {
+      "op": "conv-relu",
+      "ks": [
+        3,
+        3
+      ],
+      "cin": 384,
+      "cout": 256,
+      "strides": [
+        1,
+        1,
+        1,
+        1
+      ],
+      "input_tensors": [
+        [
+          1,
+          13,
+          13,
+          384
+        ]
+      ],
+      "inputh": 13,
+      "inputw": 13
+    },
+    {
+      "op": "conv-relu",
+      "ks": [
+        3,
+        3
+      ],
+      "cin": 256,
+      "cout": 256,
+      "strides": [
+        1,
+        1,
+        1,
+        1
+      ],
+      "input_tensors": [
+        [
+          1,
+          13,
+          13,
+          256
+        ]
+      ],
+      "inputh": 13,
+      "inputw": 13
+    },
+    {
+      "op": "maxpool",
+      "ks": [
+        1,
+        3,
+        3,
+        1
+      ],
+      "cin": 256,
+      "cout": 256,
+      "strides": [
+        1,
+        2,
+        2,
+        1
+      ],
+      "input_tensors": [
+        [
+          1,
+          13,
+          13,
+          256
+        ]
+      ],
+      "inputh": 13,
+      "inputw": 13
+    },
+    {
+      "op": "global-avgpool",
+      "cin": 256,
+      "cout": 256,
+      "input_tensors": [
+        [
+          1,
+          6,
+          6,
+          256
+        ]
+      ],
+      "inputh": 6,
+      "inputw": 6
+    },
+    {
+      "op": "fc-relu",
+      "cin": 256,
+      "cout": 4096,
+      "input_tensors": [
+        [
+          1,
+          256
+        ]
+      ]
+    },
+    {
+      "op": "fc-relu",
+      "cin": 4096,
+      "cout": 4096,
+      "input_tensors": [
+        [
+          1,
+          4096
+        ]
+      ]
+    },
+    {
+      "op": "fc",
+      "cin": 4096,
+      "cout": 1000,
+      "input_tensors": [
+        [
+          1,
+          4096
+        ]
+      ]
+    }
+  ]
+}
--- a/results/gpu_alexnet.json
+++ b/results/gpu_alexnet.json
@ -0,0 +1,258 @@
+{
+  "alexnet_0": [
+    {
+      "op": "conv-relu",
+      "ks": [
+        11,
+        11
+      ],
+      "cin": 3,
+      "cout": 64,
+      "strides": [
+        1,
+        4,
+        4,
+        1
+      ],
+      "input_tensors": [
+        [
+          1,
+          224,
+          224,
+          3
+        ]
+      ],
+      "inputh": 224,
+      "inputw": 224
+    },
+    {
+      "op": "maxpool",
+      "ks": [
+        1,
+        3,
+        3,
+        1
+      ],
+      "cin": 64,
+      "cout": 64,
+      "strides": [
+        1,
+        2,
+        2,
+        1
+      ],
+      "input_tensors": [
+        [
+          1,
+          54,
+          54,
+          64
+        ]
+      ],
+      "inputh": 54,
+      "inputw": 54
+    },
+    {
+      "op": "conv-relu",
+      "ks": [
+        5,
+        5
+      ],
+      "cin": 64,
+      "cout": 192,
+      "strides": [
+        1,
+        1,
+        1,
+        1
+      ],
+      "input_tensors": [
+        [
+          1,
+          27,
+          27,
+          64
+        ]
+      ],
+      "inputh": 27,
+      "inputw": 27
+    },
+    {
+      "op": "maxpool",
+      "ks": [
+        1,
+        3,
+        3,
+        1
+      ],
+      "cin": 192,
+      "cout": 192,
+      "strides": [
+        1,
+        2,
+        2,
+        1
+      ],
+      "input_tensors": [
+        [
+          1,
+          27,
+          27,
+          192
+        ]
+      ],
+      "inputh": 27,
+      "inputw": 27
+    },
+    {
+      "op": "conv-relu",
+      "ks": [
+        3,
+        3
+      ],
+      "cin": 192,
+      "cout": 384,
+      "strides": [
+        1,
+        1,
+        1,
+        1
+      ],
+      "input_tensors": [
+        [
+          1,
+          13,
+          13,
+          192
+        ]
+      ],
+      "inputh": 13,
+      "inputw": 13
+    },
+    {
+      "op": "conv-relu",
+      "ks": [
+        3,
+        3
+      ],
+      "cin": 384,
+      "cout": 256,
+      "strides": [
+        1,
+        1,
+        1,
+        1
+      ],
+      "input_tensors": [
+        [
+          1,
+          13,
+          13,
+          384
+        ]
+      ],
+      "inputh": 13,
+      "inputw": 13
+    },
+    {
+      "op": "conv-relu",
+      "ks": [
+        3,
+        3
+      ],
+      "cin": 256,
+      "cout": 256,
+      "strides": [
+        1,
+        1,
+        1,
+        1
+      ],
+      "input_tensors": [
+        [
+          1,
+          13,
+          13,
+          256
+        ]
+      ],
+      "inputh": 13,
+      "inputw": 13
+    },
+    {
+      "op": "maxpool",
+      "ks": [
+        1,
+        3,
+        3,
+        1
+      ],
+      "cin": 256,
+      "cout": 256,
+      "strides": [
+        1,
+        2,
+        2,
+        1
+      ],
+      "input_tensors": [
+        [
+          1,
+          13,
+          13,
+          256
+        ]
+      ],
+      "inputh": 13,
+      "inputw": 13
+    },
+    {
+      "op": "global-avgpool",
+      "cin": 256,
+      "cout": 256,
+      "input_tensors": [
+        [
+          1,
+          6,
+          6,
+          256
+        ]
+      ],
+      "inputh": 6,
+      "inputw": 6
+    },
+    {
+      "op": "fc-relu",
+      "cin": 256,
+      "cout": 4096,
+      "input_tensors": [
+        [
+          1,
+          256
+        ]
+      ]
+    },
+    {
+      "op": "fc-relu",
+      "cin": 4096,
+      "cout": 4096,
+      "input_tensors": [
+        [
+          1,
+          4096
+        ]
+      ]
+    },
+    {
+      "op": "fc",
+      "cin": 4096,
+      "cout": 1000,
+      "input_tensors": [
+        [
+          1,
+          4096
+        ]
+      ]
+    }
+  ]
+}
--- a/results/result-cpu-alexnet-log.csv
+++ b/results/result-cpu-alexnet-log.csv
@ -0,0 +1 @@
+alexnet_0,83.3216703389321,84.5299,0.014293518164198748
--- a/results/result-gpu-alexnet-log.csv
+++ b/results/result-gpu-alexnet-log.csv
@ -0,0 +1 @@
+alexnet_0,13.085394437987977,11.9766,0.09258006763087838
--- a/results/result-gpu1-alexnet-log.csv
+++ b/results/result-gpu1-alexnet-log.csv
@ -0,0 +1 @@
+alexnet_0,16.36299839769042,16.185405,0.01097244076934863
--- a/results/result-vpu-alexnet-log.csv
+++ b/results/result-vpu-alexnet-log.csv
@ -0,0 +1 @@
+alexnet_0,12.104382488684852,10.134,0.19443284869595928
--- a/results/vpu_alexnet.json
+++ b/results/vpu_alexnet.json
@ -0,0 +1,280 @@
+{
+  "alexnet_0": [
+    {
+      "op": "conv-relu",
+      "ks": [
+        11,
+        11
+      ],
+      "cin": 3,
+      "cout": 64,
+      "strides": [
+        1,
+        4,
+        4,
+        1
+      ],
+      "input_tensors": [
+        [
+          1,
+          224,
+          224,
+          3
+        ]
+      ],
+      "inputh": 224,
+      "inputw": 224
+    },
+    {
+      "op": "maxpool",
+      "ks": [
+        1,
+        3,
+        3,
+        1
+      ],
+      "cin": 64,
+      "cout": 64,
+      "strides": [
+        1,
+        2,
+        2,
+        1
+      ],
+      "input_tensors": [
+        [
+          1,
+          54,
+          54,
+          64
+        ]
+      ],
+      "inputh": 54,
+      "inputw": 54
+    },
+    {
+      "op": "conv-relu",
+      "ks": [
+        5,
+        5
+      ],
+      "cin": 64,
+      "cout": 192,
+      "strides": [
+        1,
+        1,
+        1,
+        1
+      ],
+      "input_tensors": [
+        [
+          1,
+          27,
+          27,
+          64
+        ]
+      ],
+      "inputh": 27,
+      "inputw": 27
+    },
+    {
+      "op": "maxpool",
+      "ks": [
+        1,
+        3,
+        3,
+        1
+      ],
+      "cin": 192,
+      "cout": 192,
+      "strides": [
+        1,
+        2,
+        2,
+        1
+      ],
+      "input_tensors": [
+        [
+          1,
+          27,
+          27,
+          192
+        ]
+      ],
+      "inputh": 27,
+      "inputw": 27
+    },
+    {
+      "op": "conv-relu",
+      "ks": [
+        3,
+        3
+      ],
+      "cin": 192,
+      "cout": 384,
+      "strides": [
+        1,
+        1,
+        1,
+        1
+      ],
+      "input_tensors": [
+        [
+          1,
+          13,
+          13,
+          192
+        ]
+      ],
+      "inputh": 13,
+      "inputw": 13
+    },
+    {
+      "op": "conv-relu",
+      "ks": [
+        3,
+        3
+      ],
+      "cin": 384,
+      "cout": 256,
+      "strides": [
+        1,
+        1,
+        1,
+        1
+      ],
+      "input_tensors": [
+        [
+          1,
+          13,
+          13,
+          384
+        ]
+      ],
+      "inputh": 13,
+      "inputw": 13
+    },
+    {
+      "op": "conv-relu",
+      "ks": [
+        3,
+        3
+      ],
+      "cin": 256,
+      "cout": 256,
+      "strides": [
+        1,
+        1,
+        1,
+        1
+      ],
+      "input_tensors": [
+        [
+          1,
+          13,
+          13,
+          256
+        ]
+      ],
+      "inputh": 13,
+      "inputw": 13
+    },
+    {
+      "op": "maxpool",
+      "ks": [
+        1,
+        3,
+        3,
+        1
+      ],
+      "cin": 256,
+      "cout": 256,
+      "strides": [
+        1,
+        2,
+        2,
+        1
+      ],
+      "input_tensors": [
+        [
+          1,
+          13,
+          13,
+          256
+        ]
+      ],
+      "inputh": 13,
+      "inputw": 13
+    },
+    {
+      "op": "global-avgpool",
+      "cin": 256,
+      "cout": 256,
+      "input_tensors": [
+        [
+          1,
+          6,
+          6,
+          256
+        ]
+      ],
+      "inputh": 6,
+      "inputw": 6
+    },
+    {
+      "op": "fc",
+      "cin": 256,
+      "cout": 4096,
+      "input_tensors": [
+        [
+          1,
+          256
+        ]
+      ]
+    },
+    {
+      "op": "relu",
+      "cin": 4096,
+      "cout": 4096,
+      "input_tensors": [
+        [
+          1,
+          4096
+        ]
+      ]
+    },
+    {
+      "op": "fc",
+      "cin": 4096,
+      "cout": 4096,
+      "input_tensors": [
+        [
+          1,
+          4096
+        ]
+      ]
+    },
+    {
+      "op": "relu",
+      "cin": 4096,
+      "cout": 4096,
+      "input_tensors": [
+        [
+          1,
+          4096
+        ]
+      ]
+    },
+    {
+      "op": "fc",
+      "cin": 4096,
+      "cout": 1000,
+      "input_tensors": [
+        [
+          1,
+          4096
+        ]
+      ]
+    }
+  ]
+}
				`@ -0,0 +1 @@`
				`from .utils import model_to_grapher, model_file_to_grapher`
				`@ -0,0 +1 @@`
				`from .frozenpb_converter import FrozenPbConverter`
				`@ -0,0 +1 @@`
				`alexnet_0,83.3216703389321,84.5299,0.014293518164198748`
				`@ -0,0 +1 @@`
				`alexnet_0,13.085394437987977,11.9766,0.09258006763087838`
				`@ -0,0 +1 @@`
				`alexnet_0,16.36299839769042,16.185405,0.01097244076934863`
				`@ -0,0 +1 @@`
				`alexnet_0,12.104382488684852,10.134,0.19443284869595928`