This commit is contained in:
lzhani 2021-05-17 22:28:59 +08:00
Родитель 28e982e3ac
Коммит 4ca17507a1
59 изменённых файлов: 6026 добавлений и 2 удалений

Просмотреть файл

@ -1,7 +1,7 @@
from Latencypredictor.predictors.predict_by_kernel import*
from prediction.predictors.predict_by_kernel import*
from kerneldetection.kernel_detector import*
import pickle,sys,os
import argparse

34
demo_with_converter.py Normal file
Просмотреть файл

@ -0,0 +1,34 @@
from ir_converters.model_to_grapher import*
from prediction.predictors.predict_by_kernel import*
from kerneldetection.kernel_detector import*
import pickle,sys,os
import argparse
parser = argparse.ArgumentParser("predict model latency on device")
parser.add_argument('--hardware', type=str, default='cpu')
parser.add_argument('--mf', type=str, default='alexnet')
parser.add_argument('--input_models', type=str, required=True, help='Path to input models. Either json or pb.')
parser.add_argument( '--save_dir', type=str, default='results', help='Default preserve the original layer names. Readable will assign new kernel names according to types of the layers.')
parser.add_argument( '--rule_dir', type=str, default='data/fusionrules', help='Default preserve the original layer names. Readable will assign new kernel names according to types of the layers.')
args=parser.parse_args()
hardware=args.hardware
input_models=args.input_models
for hardware in ['cpu','gpu','gpu1','vpu']:
print('current hardware',hardware)
if hardware=='gpu1':
hw='gpu'
else:
hw=hardware
latency_file="data/model_latency/"+hardware+"/"+args.mf+"-log.csv"
kernel_types,kernel_with_features=split_model_into_kernels(input_models,hw,args.save_dir)
rmse,rmspe,error,acc5,acc10=main_kernel_predict(hardware,kernel_with_features,latency_file)

Просмотреть файл

@ -0,0 +1 @@
from .utils import model_to_grapher, model_file_to_grapher

Просмотреть файл

@ -0,0 +1 @@
from .frozenpb_converter import FrozenPbConverter

Просмотреть файл

@ -0,0 +1,38 @@
import sys
import os
import numpy as np
from .grapher_tool import Grapher
from .frozenpb_parser import FrozenPbParser
from .shape_inference import ShapeInference
class FrozenPbConverter:
def __init__(self, file_name):
self.graph = Grapher()
# Parse pb to graph
parser = FrozenPbParser(file_name)
parser.parse_graph(self.graph)
# Change split to more firendly scheme
parser.fix_split_naming(self.graph)
# Get the static shape
ShapeInference(self.graph)
# Strip constant and indentity nodes
parser.strip_useless_nodes(self.graph)
def get_flatten_grapher(self):
def np_encoder(d):
for k, v in d.items():
if isinstance(v, dict):
np_encoder(v)
else:
if isinstance(v, np.ndarray):
d[k] = v.tolist()
if isinstance(v, (bytes, bytearray)):
d[k] = v.decode('utf-8')
np_encoder(self.graph.get_graph())
return self.graph.get_graph()

Просмотреть файл

@ -0,0 +1,181 @@
from .protobuf_helper import ProtobufHelper
from .shape_fetcher import ShapeFetcher
from tensorflow import io
from tensorflow import gfile
from google.protobuf import text_format
import tensorflow as tf
import copy
import re
import logging
logging = logging.getLogger(__name__)
class FrozenPbParser:
def __init__(self, pb_file):
f = open(pb_file, 'rb')
graph = tf.GraphDef()
graph.ParseFromString(f.read())
self.graph = graph
@staticmethod
def strip_useless_nodes(graph_helper):
stripped_nodes_type = ['Const', 'Identity']
stripped_nodes_keywords = ['/weight', '/weight/read',
'/ReadVariableOp',
'/kernel', '/gamma',
'/beta', '/moving_mean',
'/moving_variance',
'/bias', '/reduction_indices',
'/shape', '/split_dim', '/axis']
graph = graph_helper.get_graph()
removed_node = []
for key, value in graph.items():
if 'attr' in value.keys():
if value['attr']['type'] in stripped_nodes_type:
for kw in stripped_nodes_keywords:
if kw in key:
removed_node.append(key)
break
for key in removed_node:
del graph[key]
graph_helper.refresh()
graph_helper.refresh()
graph_helper.refresh()
graph_helper.refresh()
graph_helper.refresh()
@staticmethod
def fix_split_naming(graph_helper):
graph = graph_helper.get_graph()
graph_nodes = copy.deepcopy(list(graph.keys()))
remove_node_list = []
for graph_node in graph_nodes:
if graph_node in graph.keys():
if 'attr' in graph[graph_node].keys():
if graph[graph_node]['attr']['type'] == 'Split' and ':' not in graph_node:
logging.info('Find split main node %s.' % graph_node)
split_node_name = graph_node
split_node_child = []
for node_name in graph.keys():
idx = re.findall(
r'%s:(\d+)' %
split_node_name, node_name)
if len(idx) > 0:
idx = int(idx[0])
logging.info(
'Find split child node %s.' % node_name)
graph[graph_node]['outbounds'] += graph[node_name]['outbounds']
graph[graph[node_name]['outbounds']
[0]]['inbounds'] += [graph_node]
remove_node_list.append(node_name)
for node in remove_node_list:
del graph[node]
graph_helper.refresh()
graph_helper.refresh()
graph_helper.refresh()
graph_helper.refresh()
graph_helper.refresh()
def fetch_attr_to_dict(self, node, shape_fetcher):
attr_dict = {}
attr_as_node = {
'Split': {
'node_name': lambda x: x + '/split_dim',
'attr_name': 'split_dim',
'node_value': lambda x: ProtobufHelper.get_tensor_value(x)
},
'Mean': {
'node_name': lambda x: x + '/reduction_indices',
'attr_name': 'reduction_indices',
'node_value': lambda x: ProtobufHelper.pkg42dec(x.tensor_content)
},
'Reshape': {
'node_name': lambda x: x + '/shape',
'attr_name': 'shape',
'node_value': lambda x: ProtobufHelper.pkg42dec(x.tensor_content)
},
'Concat': {
'node_name': lambda x: x + '/axis',
'attr_name': 'axis',
'node_value': lambda x: ProtobufHelper.get_tensor_value(x)
},
'ConcatV2': {
'node_name': lambda x: x + '/axis',
'attr_name': 'axis',
'node_value': lambda x: ProtobufHelper.get_tensor_value(x)
},
'Const': {
'node_name': lambda x: x,
'attr_name': 'constant',
'node_value': lambda x: ProtobufHelper.get_tensor_value(x)
}
}
list_i_nodes = ['dilations', 'strides', 'ksize']
str_nodes = ['padding', 'data_format']
for attr_name in node.attr.keys():
if attr_name in list_i_nodes:
attr_dict[attr_name] = [
int(a) for a in node.attr[attr_name].list.i]
continue
if attr_name in str_nodes:
attr_dict[attr_name] = node.attr[attr_name].s
continue
if attr_name == 'value':
shape = []
for dim in node.attr[attr_name].tensor.tensor_shape.dim:
shape.append(dim.size)
attr_dict['tensor_shape'] = list(map(int, shape))
continue
if attr_name == 'shape':
shape = []
for dim in node.attr[attr_name].shape.dim:
shape.append(dim.size)
attr_dict['shape'] = list(map(int, shape))
continue
if node.op in attr_as_node.keys():
for target_node in self.graph.node:
if target_node.name == attr_as_node[node.op]['node_name'](
node.name):
for attr_name in target_node.attr.keys():
if attr_name == 'value' and 'weight' not in node.name and 'BatchNorm' not in node.name and 'kernel' not in node.name:
# print(target_node.attr[attr_name].tensor)
attr_dict[attr_as_node[node.op]['attr_name']] = \
attr_as_node[node.op]['node_value'](target_node.attr[attr_name].tensor)
# # attr_dict['weight_shape'] = self.find_weights_root(node, shape_fetcher)
# print(node.name, attr_dict)
# print('------------------')
return attr_dict
def parse_graph(
self,
graph_helper,
required_shape=False,
insert_node=False):
if required_shape:
shape_fetcher = ShapeFetcher(self.graph)
for node in self.graph.node:
graph_helper.node(str(node.name), list(map(str, node.input)))
graph_helper.set_node_attr(
node.name, {
'name': str(node.name),
'type': str(node.op),
'output_shape': shape_fetcher.shape_results[node.name + ':0'] if required_shape else [],
'attr': self.fetch_attr_to_dict(node, shape_fetcher if required_shape else None),
# 'node': node if insert_node else None
})
# return shape_fetcher

Просмотреть файл

@ -0,0 +1,255 @@
import numpy as np
import json
class NumpyEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, np.ndarray):
return obj.tolist()
if isinstance(obj, (bytes, bytearray)):
return obj.decode('utf-8')
return json.JSONEncoder.default(self, obj)
class Grapher:
def __init__(self, filename=None):
if filename is not None:
self.graph = json.load(open(filename, 'r'))
else:
self.graph = {}
def node(self, name, inbound_nodes=None):
self.graph[name] = {}
if inbound_nodes is not None:
self.graph[name]['inbounds'] = inbound_nodes
for node in inbound_nodes:
if node not in self.graph.keys():
self.graph[node] = {}
if 'outbounds' not in self.graph[node].keys():
self.graph[node]['outbounds'] = []
self.graph[node]['outbounds'].append(name)
def refresh(self):
for name in self.graph.keys():
self.graph[name]['outbounds'] = []
for name in self.graph.keys():
if 'inbounds' in self.graph[name].keys():
for node in self.graph[name]['inbounds']:
if node not in self.graph.keys():
while node in self.graph[name]['inbounds']:
self.graph[name]['inbounds'].remove(node)
else:
if 'outbounds' not in self.graph[node].keys():
self.graph[node]['outbounds'] = []
self.graph[node]['outbounds'].append(name)
spare_nodes = []
for name in self.graph.keys():
if len(self.graph[name]['outbounds']) == 0 and \
len(self.graph[name]['inbounds']) == 0:
spare_nodes.append(name)
for removing_node_name in spare_nodes:
del self.graph[removing_node_name]
def get_graph(self):
return self.graph
def get_node_inbounds(self, name):
if 'inbounds' in self.graph[name]:
return self.graph[name]['inbounds']
else:
return []
def get_node_outbounds(self, name):
if 'outbounds' in self.graph[name]:
return self.graph[name]['outbounds']
else:
return []
def set_node_inbounds(self, name, inbounds):
self.graph[name]['inbounds'] = inbounds
def set_node_outbounds(self, name, outbounds):
self.graph[name]['outbounds'] = outbounds
def remove_node_inbounds(self, name, inbound):
if inbound in self.graph[name]['inbounds']:
self.graph[name]['inbounds'].remove(inbound)
def remove_node_outbounds(self, name, outbound):
if outbound in self.graph[name]['outbound']:
self.graph[name]['outbounds'].remove(outbound)
def add_node_inbounds(self, name, inbound):
self.graph[name]['inbounds'].append(inbound)
def add_node_outbounds(self, name, outbound):
self.graph[name]['outbounds'].append(outbound)
def get_graph_head(self):
self.heads = []
for (key, value) in self.graph.items():
if 'inbounds' not in value.keys()\
or len(value['inbounds']) == 0:
self.heads.append(key)
return self.heads
def get_graph_tail(self):
self.tails = []
for (key, value) in self.graph.items():
if 'outbounds' not in value.keys()\
or len(value['outbounds']) == 0:
self.tails.append(key)
return self.tails
def add_node_attr(self, name, attr_key, attr_value):
if name not in self.graph.keys():
self.graph[name] = {}
self.graph[name]['attr']['attr'][attr_key] = attr_value
def set_node_attr(self, name, attr):
if name not in self.graph.keys():
self.graph[name] = {}
self.graph[name]['attr'] = attr
def get_node_attr(self, name):
if name in self.graph.keys():
return self.graph[name]['attr']
else:
return None
def get_node_type(self, name):
if name in self.graph.keys() and 'attr' in self.graph[name].keys():
return self.graph[name]['attr']['type']
else:
print(name, self.graph[name])
return None
def plot_graphs(self, comment='Network Grapher View'):
from graphviz import Digraph
dot = Digraph(comment=comment)
for (key, value) in self.graph.items():
dot.node(key, key)
if 'inbounds' in value.keys():
for node in value['inbounds']:
dot.edge(node, key, label=', '.join(str(x)
for x in value['attr']['output_shape']))
dot.render('graph.gv', view=False)
def plot_networkx_graph(self):
import matplotlib.pyplot as plt
import networkx as nx
plt.subplot(121)
nx.draw(
self.get_networkx_graph(),
with_labels=True,
font_weight='bold')
plt.show()
def get_networkx_graph(self):
import networkx as nx
G = nx.MultiDiGraph()
for (key, value) in self.graph.items():
G.add_node(
key,
type=value['attr']['type'],
**value['attr']['attr'])
if 'inbounds' in value.keys():
for node in value['inbounds']:
G.add_edge(node, key)
self.graphx = G
return G
def match_isomorph_vf2(self):
pass
def find_subgraphs(self, sub_graph, match_func):
from networkx.algorithms import isomorphism as iso
GM = iso.MultiDiGraphMatcher(
self.get_networkx_graph(),
sub_graph.get_networkx_graph(),
node_match=match_func)
return list(GM.subgraph_isomorphisms_iter())
def find_weight_roots(self, layer_name):
weight_roots = []
weights_nodes = []
for inbound in self.graph[layer_name]['inbounds']:
if self.graph[inbound]['attr']['type'] == 'Identity' \
and len(self.graph[inbound]['inbounds']) == 1:
if self.graph[self.graph[inbound]['inbounds']
[0]]['attr']['type'] == 'Const':
weight_roots.append(inbound)
weights_nodes.append(inbound)
weights_nodes.append(self.graph[inbound]['inbounds'][0])
if self.graph[inbound]['attr']['type'] == 'Const' \
and len(self.graph[inbound]['inbounds']) == 0:
weight_roots.append(inbound)
weights_nodes.append(inbound)
return weight_roots, weights_nodes
def get_subgraphs(self, sub_graph, match_func):
import tensorflow as tf
import copy
fetched_subgraphs = self.find_subgraphs(sub_graph, match_func)
tar_sub_graphs = []
for sub_fetch_graph in fetched_subgraphs:
tar_sub_graphs.append(tf.GraphDef())
for op_entry in sub_fetch_graph.keys():
# --- Repleace dummy op ---
if sub_graph.get_graph()[
sub_fetch_graph[op_entry]]['attr']['type'] == 'dummy':
dummy_op = tar_sub_graphs[-1].node.add()
dummy_op.op = "Identity"
dummy_op.name = sub_fetch_graph[op_entry]
dummy_op.input.extend(
sub_graph.get_graph()[
sub_fetch_graph[op_entry]]['inbounds'])
dummy_op.attr['T'].type = 1
# if 'graph_head' in sub_graph.get_graph()[sub_fetch_graph[op_entry]]['attr']['attr']:
# dummy_op.attr['shape'] = []
# dummy_op.attr['shape'].dim = list(map(int, sub_graph.get_graph()[sub_fetch_graph[op_entry]]['attr']['attr']['graph_head'].split(',')))
# print(dummy_op)
else:
# --- Fetch the main op ---
node = copy.deepcopy(self.graph[op_entry]['attr']['node'])
node.name = sub_fetch_graph[op_entry]
del node.input[:]
node.input.extend(
sub_graph.get_graph()[
sub_fetch_graph[op_entry]]['inbounds'])
# --- Fetch the constant op ---
roots, nodes = self.find_weight_roots(op_entry)
for weight_root in roots:
node.input.append(weight_root)
for weight_node in nodes:
tar_sub_graphs[-1].node.append(
self.graph[weight_node]['attr']['node'])
tar_sub_graphs[-1].node.append(node)
#tf.io.write_graph(tar_sub_graphs[-1], '', 'a.pb')
return tar_sub_graphs
def dump_json(self, filename):
with open(filename, 'w+') as fp:
json.dump(
self.graph,
fp,
indent=4,
skipkeys=True,
sort_keys=True,
cls=NumpyEncoder)

Просмотреть файл

@ -0,0 +1,103 @@
import logging
logging = logging.getLogger(__name__)
class ProtobufHelper:
@staticmethod
def get_w(x):
l = len(x)
if l == 4:
return x[1]
if l == 2:
return x[0]
return None
@staticmethod
def get_h(x):
l = len(x)
if l == 4:
return x[2]
if l == 2:
return x[1]
return None
@staticmethod
def find_weights_root(graph, node):
NODE_WEIGHT_LUT = {
'Conv2D': [
lambda x: x.replace('/Conv2D', '/weight'),
lambda x: x.replace('/Conv2D', '/kernel'),
],
'DepthwiseConv2dNative': [
lambda x: x.replace('/depthwise', '/weight')
],
'BiasAdd': [
lambda x: x.replace('/BiasAdd', '/bias'),
],
'FusedBatchNorm': [
lambda x: x.replace('/FusedBatchNormV3', '/gamma'),
lambda x: x.replace('/FusedBatchNormV3', '/beta'),
lambda x: x.replace('/FusedBatchNormV3', '/moving_mean'),
lambda x: x.replace('/FusedBatchNormV3', '/moving_variance')
],
'MatMul': [
lambda x: x.replace('/MatMul', '/weight'),
]
}
weight_name = []
if node['attr']['type'] in NODE_WEIGHT_LUT.keys():
for lut_lamba in NODE_WEIGHT_LUT[node['attr']['type']]:
weight_op = lut_lamba(node['attr']['name'])
if weight_op in graph.keys(
) and graph[weight_op]['attr']['type'] != 'Identity':
logging.info(
'Find node %s with its weight op %s.' %
(node['attr']['name'], weight_op))
weight_name.append(weight_op)
return weight_name
@staticmethod
def get_graph_seq(x):
graph_head = x.get_graph_head()
seen = set()
stack = []
order = []
q = [graph_head[0]]
for head in graph_head:
q = [head]
while q:
v = q.pop()
if v not in seen:
seen.add(v)
q.extend(x.get_node_outbounds(v))
while stack and v not in x.get_node_outbounds(
stack[-1]):
order.append(stack.pop())
stack.append(v)
return stack + order[::-1]
@staticmethod
def pkg42dec(x):
total_byte = len(x) // 4
assert(total_byte * 4 == len(x))
num = []
for idx in range(total_byte):
num.append(0)
for i in range(4):
num[-1] += x[idx * 4 + i] << (i * 8)
if num[-1] == 4294967295:
num[-1] = -1
return num
@staticmethod
def get_tensor_value(x):
DTYPE_ENUM = {
1: lambda x: list(map(float, x.float_val)),
3: lambda x: list(map(int, x.int_val))
}
return DTYPE_ENUM[x.dtype](x)

Просмотреть файл

@ -0,0 +1,49 @@
import tensorflow as tf
import numpy as np
from typing import List
class ShapeFetcher:
def get_nodes_with_input_tensor(self, tensor):
return list(filter(
lambda op: (tensor in op.inputs) and (op.type not in ["Shape"]),
self.graph.get_operations()
))
def __init__(self, input_graph):
tf.compat.v1.disable_eager_execution()
graph = tf.Graph()
with graph.as_default():
tf.import_graph_def(graph_def=input_graph, name="")
ops = graph.get_operations()
placeholders = list(filter(lambda op: op.type == "Placeholder", ops))
assert len(placeholders) == 1
graph_input_tensor = placeholders[0].outputs[0]
graph_input_tensor_shape = graph_input_tensor.get_shape().as_list()
assert graph_input_tensor_shape[1] == graph_input_tensor_shape[2]
assert graph_input_tensor_shape[3] == 3
self.imsize = graph_input_tensor_shape[1]
self.graph: tf.Graph = graph
tensors_to_fetch: List[tf.Tensor] = []
for op in filter(lambda op: op.type not in [], ops):
tensors_to_fetch.extend(op.inputs)
tensors_to_fetch.extend(op.outputs)
shape_tensors = dict()
for tensor in tensors_to_fetch:
shape_tensors[tensor.name] = tf.compat.v1.shape(tensor)
self.shape_results = dict()
with tf.compat.v1.Session(graph=graph) as sess:
fake_input = np.random.randn(1, self.imsize, self.imsize, 3)
for tensor_name, shape_tensor in shape_tensors.items():
self.shape_results[tensor_name] = sess.run(
shape_tensor, feed_dict={
graph_input_tensor: fake_input
}
)

Просмотреть файл

@ -0,0 +1,495 @@
from .protobuf_helper import ProtobufHelper as ph
from functools import reduce
import copy
import logging
logging = logging.getLogger(__name__)
class ShapeInference:
@staticmethod
def eval_prodcast(grapher, node):
input_nodes = node['inbounds']
if len(input_nodes) < 2:
logging.warn(
'Invalid input op num for prodcast op %s' %
(node['name']))
if len(input_nodes) == 1:
return grapher[node['inbounds'][0]]['attr']['output_shape'][0]
else:
return None
target_dim = -1
target_shape = [1]
input_shape_list = []
for node_name in input_nodes:
input_shape = grapher[node_name]['attr']['output_shape'][0]
input_shape_list.append(input_shape)
if target_dim < len(input_shape):
target_dim = len(input_shape)
target_shape = input_shape
elif target_dim == len(input_shape):
for i in range(target_dim):
if target_shape[i] < input_shape[i]:
target_shape[i] = input_shape[i]
# if target_dim < len(input_shape):
# for i in range(len(input_shape)):
# if target_shape[i] == 1 or target_shape[i] == input_shape[i]:
# target_dim = len(input_shape)
# target_shape = input_shape
# else:
# logging.warn('Invalid prodcast shape between %s and %s(%s).'
# % (str(target_shape), str(input_shape), node_name))
# return None
# logging.warn('Prodcast from %s to %s(%s).' % (str(target_shape), str(input_shape), node_name))
# for node_name in input_nodes:
# input_shape = grapher[node_name]['attr']['output_shape'][0]
# if largest_dim == len(input_shape):
# prodcast_shape[node_name] = input_shape
# for node_name, shape in prodcast_shape.items():
# if shape != prodcast_shape[prodcast_node_name]:
# logging.warn('Invalid prodcast shape between %s(%s) and %s(%s).'
# % (node_name, str(shape),
# prodcast_node_name, prodcast_shape[prodcast_node_name]))
# return None
return input_shape_list, [target_shape]
@staticmethod
def Const_get_shape(grapher, node):
return [], [node['attr']['attr']['tensor_shape']]
@staticmethod
def Identity_get_shape(grapher, node):
return [], [grapher[node['inbounds'][0]]['attr']['output_shape'][0]]
@staticmethod
def propogate_shape(grapher, node):
in_shape = [grapher[node['inbounds'][0]]['attr']['output_shape'][0]]
return in_shape, in_shape
@staticmethod
def FusedBatchNorm_get_shape(grapher, node):
return ShapeInference.propogate_shape(grapher, node)
@staticmethod
def BiasAdd_get_shape(grapher, node):
return ShapeInference.propogate_shape(grapher, node)
@staticmethod
def Relu_get_shape(grapher, node):
return ShapeInference.propogate_shape(grapher, node)
@staticmethod
def Relu6_get_shape(grapher, node):
return ShapeInference.propogate_shape(grapher, node)
@staticmethod
def LeakyReLU_get_shape(grapher, node):
return ShapeInference.propogate_shape(grapher, node)
@staticmethod
def Add_get_shape(grapher, node):
return ShapeInference.eval_prodcast(grapher, node)
@staticmethod
def Mul_get_shape(grapher, node):
return ShapeInference.eval_prodcast(grapher, node)
@staticmethod
def Pool_get_shape(grapher, node):
if len(node['inbounds']) != 1:
logging.warning(
'Failed to get input node of %s.' %
(node['attr']['name']))
logging.info(node)
return
input_shape = grapher[node['inbounds'][0]]['attr']['output_shape'][0]
logging.info(
'Get input shape of %s from %s, input shape:%s.' %
(node['attr']['name'], node['inbounds'][0], input_shape))
k_size = node['attr']['attr']['ksize']
if node['attr']['attr']['strides'][::3] != [1, 1]:
logging.warning(
'Invalid strides %s of node %s.' %
(str(
node['attr']['attr']['strides']),
node['attr']['name']))
logging.info(node)
return
strides = node['attr']['attr']['strides']
padding = node['attr']['attr']['padding'].decode('utf-8')
logging.info('Op:%s, stride:%s, padding:%s.' %
(node['attr']['name'], str(strides), str(padding)))
if padding == 'SAME':
wpad = ph.get_w(strides) - 1
hpad = ph.get_h(strides) - 1
else:
wpad = 0
hpad = 0
padded_shape = [
ph.get_w(input_shape) + wpad,
ph.get_h(input_shape) + hpad]
logging.info('Op:%s, padding:%s, padded shape:%s.' %
(node['attr']['name'], str([wpad, hpad]), str(padded_shape)))
outw = int(ph.get_w(input_shape) - ph.get_w(k_size)) / \
ph.get_w(strides) + 1
outh = int(ph.get_h(input_shape) - ph.get_w(k_size)) / \
ph.get_h(strides) + 1
output_shape = list(
map(int, [input_shape[0], outh, outw, input_shape[3]]))
return [input_shape], [output_shape]
@staticmethod
def AvgPool_get_shape(grapher, node):
return ShapeInference.Pool_get_shape(grapher, node)
@staticmethod
def AveragePooling2D_get_shape(grapher, node):
return ShapeInference.Pool_get_shape(grapher, node)
@staticmethod
def MaxPool_get_shape(grapher, node):
return ShapeInference.Pool_get_shape(grapher, node)
@staticmethod
def MaxPooling2D_get_shape(grapher, node):
return ShapeInference.Pool_get_shape(grapher, node)
@staticmethod
def Placeholder_get_shape(grapher, node):
return [], [node['attr']['attr']['shape']]
@staticmethod
def Conv2D_get_shape(grapher, node):
weight_node = ph.find_weights_root(grapher, node)
if len(weight_node) != 1:
logging.warning(
'Failed to get shape of node %s.' %
(node['attr']['name']))
logging.info(node)
return
input_node = [x for x in node['inbounds'] if x != weight_node]
input_node = [x for x in input_node if grapher[x]
['attr']['type'] != 'Identity']
if len(input_node) != 1:
logging.warning(
'Failed to get input node of %s.' %
(node['attr']['name']))
logging.info(node)
return
input_shape = grapher[input_node[0]]['attr']['output_shape'][0]
logging.info(
'Get input shape of %s from %s, input shape:%s.' %
(node['attr']['name'], input_node[0], input_shape))
weight_shape = grapher[weight_node[0]]['attr']['attr']['tensor_shape']
if len(weight_shape) != 4:
logging.warning(
'Failed to parse weight shape %s of node %s.' %
(str(weight_shape), node['attr']['name']))
logging.info(node)
return
logging.info(
'Get weight shape of %s from %s, input shape:%s.' %
(node['attr']['name'], weight_node, weight_shape))
k_size = weight_shape[:2]
cin = weight_shape[2]
cout = weight_shape[3]
if node['attr']['attr']['strides'][::3] != [1, 1]:
logging.warning(
'Invalid strides %s of node %s.' %
(str(
node['attr']['attr']['strides']),
node['attr']['name']))
logging.info(node)
return
strides = node['attr']['attr']['strides']
dilation = node['attr']['attr']['dilations']
padding = node['attr']['attr']['padding'].decode('utf-8')
logging.info(
'Op:%s, stride:%s, dilation:%s, padding:%s.' %
(node['attr']['name'], str(strides), str(dilation), str(padding)))
kernel_extent_w = ph.get_w(dilation) * (ph.get_w(strides) - 1) + 1
kernel_extent_h = ph.get_h(dilation) * (ph.get_h(strides) - 1) + 1
if padding == 'SAME':
wpad = kernel_extent_w + int((ph.get_w(input_shape) - 1) / ph.get_w(
dilation)) * ph.get_w(dilation) - ph.get_w(input_shape)
hpad = kernel_extent_h + int((ph.get_h(input_shape) - 1) / ph.get_h(
dilation)) * ph.get_h(dilation) - ph.get_h(input_shape)
else:
wpad = 0
hpad = 0
padded_shape = [
ph.get_w(input_shape) + wpad,
ph.get_h(input_shape) + hpad]
logging.info('Op:%s, kernel_extent:%s, padding:%s, padded shape:%s.' % (node['attr']['name'], str(
[kernel_extent_w, kernel_extent_h]), str([wpad, hpad]), str(padded_shape)))
outw = int(ph.get_w(input_shape) - kernel_extent_w) / \
ph.get_w(strides) + 1
outh = int(ph.get_h(input_shape) - kernel_extent_h) / \
ph.get_h(strides) + 1
output_shape = list(map(int, [input_shape[0], outh, outw, cout]))
return [input_shape], [output_shape]
@staticmethod
def DepthwiseConv2dNative_get_shape(grapher, node):
weight_node = ph.find_weights_root(grapher, node)
if len(weight_node) != 1:
logging.warning(
'Failed to get shape of node %s.' %
(node['attr']['name']))
logging.info(node)
return
input_node = [x for x in node['inbounds'] if x != weight_node]
input_node = [x for x in input_node if grapher[x]
['attr']['type'] != 'Identity']
if len(input_node) != 1:
logging.warning(
'Failed to get input node of %s.' %
(node['attr']['name']))
logging.info(node)
return
input_shape = grapher[input_node[0]]['attr']['output_shape'][0]
logging.info(
'Get input shape of %s from %s, input shape:%s.' %
(node['attr']['name'], input_node[0], input_shape))
weight_shape = grapher[weight_node[0]]['attr']['attr']['tensor_shape']
if len(weight_shape) != 4:
logging.warning(
'Failed to parse weight shape %s of node %s.' %
(str(weight_shape), node['attr']['name']))
logging.info(node)
return
logging.info(
'Get weight shape of %s from %s, input shape:%s.' %
(node['attr']['name'], weight_node, weight_shape))
k_size = weight_shape[:2]
cin = weight_shape[2]
if node['attr']['attr']['strides'][::3] != [1, 1]:
logging.warning(
'Invalid strides %s of node %s.' %
(str(
node['attr']['attr']['strides']),
node['attr']['name']))
logging.info(node)
return
strides = node['attr']['attr']['strides']
dilation = node['attr']['attr']['dilations']
padding = node['attr']['attr']['padding'].decode('utf-8')
logging.info(
'Op:%s, stride:%s, dilation:%s, padding:%s.' %
(node['attr']['name'], str(strides), str(dilation), str(padding)))
kernel_extent_w = ph.get_w(dilation) * (ph.get_w(strides) - 1) + 1
kernel_extent_h = ph.get_h(dilation) * (ph.get_h(strides) - 1) + 1
if padding == 'SAME':
wpad = kernel_extent_w + int((ph.get_w(input_shape) - 1) / ph.get_w(
dilation)) * ph.get_w(dilation) - ph.get_w(input_shape)
hpad = kernel_extent_h + int((ph.get_h(input_shape) - 1) / ph.get_h(
dilation)) * ph.get_h(dilation) - ph.get_h(input_shape)
else:
wpad = 0
hpad = 0
padded_shape = [
ph.get_w(input_shape) + wpad,
ph.get_h(input_shape) + hpad]
logging.info('Op:%s, kernel_extent:%s, padding:%s, padded shape:%s.' % (node['attr']['name'], str(
[kernel_extent_w, kernel_extent_h]), str([wpad, hpad]), str(padded_shape)))
outw = int(ph.get_w(input_shape) - kernel_extent_w) / \
ph.get_w(strides) + 1
outh = int(ph.get_h(input_shape) - kernel_extent_h) / \
ph.get_h(strides) + 1
output_shape = list(map(int, [input_shape[0], outh, outw, cin]))
return [input_shape], [output_shape]
@staticmethod
def Reduce_get_shape(grapher, node):
input_shape = grapher[node['inbounds'][0]]['attr']['output_shape'][0]
output_shape = input_shape
logging.info(
'Get input shape of %s from %s, input shape:%s.' %
(node['attr']['name'], node['inbounds'][0], output_shape))
output_shape[1] = 0
output_shape[2] = 0
reduction_indices = node['attr']['attr']['reduction_indices']
logging.info('Get Reduction Indices %s.', str(reduction_indices))
reduction_cnt = 0
for reduction in sorted(reduction_indices):
del output_shape[reduction - reduction_cnt]
reduction_cnt += 1
return [input_shape], [output_shape]
@staticmethod
def Mean_get_shape(grapher, node):
return ShapeInference.Reduce_get_shape(grapher, node)
@staticmethod
def GlobalAveragePooling2D_get_shape(grapher, node):
return ShapeInference.Reduce_get_shape(grapher, node)
@staticmethod
def GlobalMaxPooling2D_get_shape(grapher, node):
return ShapeInference.Reduce_get_shape(grapher, node)
@staticmethod
def MatMul_get_shape(grapher, node):
weight_node = ph.find_weights_root(grapher, node)
if len(weight_node) != 1:
logging.warning(
'Failed to get shape of node %s.' %
(node['attr']['name']))
logging.info(node)
return
weight_shape = grapher[weight_node[0]]['attr']['attr']['tensor_shape']
if len(weight_shape) != 2:
logging.warning(
'Failed to parse weight shape %s of node %s.' %
(str(weight_shape), node['attr']['name']))
logging.info(node)
return
logging.info(
'Get weight shape of %s from %s, input shape:%s.' %
(node['attr']['name'], weight_node, weight_shape))
input_node = [x for x in node['inbounds'] if x != weight_node]
input_node = [x for x in input_node if grapher[x]
['attr']['type'] != 'Identity']
if len(input_node) != 1:
logging.warning(
'Failed to get input node of %s.' %
(node['attr']['name']))
logging.info(node)
return
input_shape = copy.deepcopy(
grapher[input_node[0]]['attr']['output_shape'][0])
logging.info(
'Get input shape of %s from %s, input shape:%s.' %
(node['attr']['name'], input_node[0], input_shape))
if weight_shape[0] != input_shape[1]:
logging.warning(
'Weight shape and input shape not matched for %s.' %
(node['attr']['name']))
logging.info(node)
return
output_shape = copy.deepcopy(input_shape)
output_shape[1] = weight_shape[1]
return [input_shape], [output_shape]
@staticmethod
def Reshape_get_shape(grapher, node):
input_shape = grapher[node['inbounds'][0]]['attr']['output_shape'][0]
exp_output_shape = node['attr']['attr']['shape']
input_elements = abs(reduce(lambda x, y: x * y, input_shape))
exp_output_shape_elements = abs(
reduce(lambda x, y: x * y, exp_output_shape))
if input_elements != exp_output_shape_elements:
logging.warning('Input shape %s and output shape %s not matched for %s.' % (
str(input_shape, str(output_shape), node['attr']['name'])))
return [input_shape], [exp_output_shape]
@staticmethod
def Concat_get_shape(grapher, node):
input_shape = []
for in_node in node['inbounds']:
in_shape = grapher[in_node]['attr']['output_shape'][0]
if in_shape != []:
input_shape.append(in_shape)
logging.info('Get input shape of %s from %s, input shape:%s.' % (
node['attr']['name'], in_node, input_shape[-1]))
axis = node['attr']['attr']['axis'][0]
output_shape = copy.deepcopy(input_shape[0])
for in_shape in input_shape[1:]:
output_shape[axis] += in_shape[axis]
return [input_shape], [output_shape]
@staticmethod
def Concatenate_get_shape(grapher, node):
return ShapeInference.Concat_get_shape(grapher, node)
@staticmethod
def ConcatV2_get_shape(grapher, node):
return ShapeInference.Concat_get_shape(grapher, node)
@staticmethod
def Split_get_shape(grapher, node):
raise NotImplementedError
@staticmethod
def StridedSlice_get_shape(grapher, node):
return None, None
@staticmethod
def Pack_get_shape(grapher, node):
return None, None
def __init__(self, grapher):
seq = ph.get_graph_seq(grapher)
graph = grapher.get_graph()
for node_name in seq:
node_get_shape_name = grapher.get_node_type(
node_name) + '_get_shape'
if node_get_shape_name in dir(self):
input_shape, output_shape = eval(
'self.' + node_get_shape_name)(graph, graph[node_name])
if output_shape is not None:
graph[node_name]['attr']['output_shape'] = output_shape
if input_shape is not None:
graph[node_name]['attr']['input_shape'] = input_shape
logging.info(
'Input shape of %s op is %s.' %
(node_name, str(input_shape)))
logging.info(
'Output shape of %s op is %s.' %
(node_name, str(output_shape)))
else:
logging.warning(
'Op %s is not support, ignored!' %
grapher.get_node_type(node_name))

Просмотреть файл

@ -0,0 +1,23 @@
from utils import model_file_to_grapher
import argparse
import json
import sys
def main(input_model, output_path):
result = model_file_to_grapher(input_model)
if output_path:
with open(output_path, 'w') as fp:
json.dump(result, fp, indent=4)
else:
json.dump(result, sys.stdout, indent=4)
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--input_model', type=str, required=True)
parser.add_argument('-o', '--output_path', type=str, required=False)
args = parser.parse_args()
if __name__ == '__main__':
main(args.input_model, args.output_path)

Просмотреть файл

@ -0,0 +1 @@
from .converter import OnnxConverter

Просмотреть файл

@ -0,0 +1,48 @@
CONV_TYPE = 'Conv'
BN_TYPE = 'BatchNormalization'
SLICE_TYPE = 'Slice'
CONCAT_TYPE = 'Concat'
MAXPOOL_TYPE = 'MaxPool'
AVGPOOL_TYPE = 'AveragePool'
RELU_TYPE = 'Relu'
ADD_TYPE = 'Add'
FC_TYPE = 'Gemm'
RESHAPE_TYPE = 'Reshape'
GAP_TYPE = 'GlobalAveragePool'
CLIP_TYPE = 'Clip'
MUL_TYPE = 'Mul'
DIV_TYPE = 'Div'
HARDSIGMOID_TYPE = 'HardSigmoid'
FLATTEN_TYPE = 'Flatten'
TRANSPOSE_TYPE = 'Transpose'
REDUCEMEAN_TYPE = 'ReduceMean'
SPLIT_TYPE = 'Split'
PAD_TYPE = 'Pad'
OP_ALIAS = {
CONV_TYPE: 'conv',
BN_TYPE: 'bn',
SLICE_TYPE: 'split',
CONCAT_TYPE: 'concat',
MAXPOOL_TYPE: 'maxpool',
AVGPOOL_TYPE: 'avgpool',
RELU_TYPE: 'relu',
ADD_TYPE: 'add',
FC_TYPE: 'fc',
RESHAPE_TYPE: 'reshape',
GAP_TYPE: 'gap',
CLIP_TYPE: 'clip',
MUL_TYPE: 'mul',
DIV_TYPE: 'div',
HARDSIGMOID_TYPE: 'hardsigmoid',
FLATTEN_TYPE: 'flatten',
TRANSPOSE_TYPE: 'transpose',
REDUCEMEAN_TYPE: 'reducemean',
SPLIT_TYPE: 'split',
PAD_TYPE: 'pad',
}
ATTR_ALIAS = {
'pads': ('padding', '__all__'),
'axis': ('split_dim', ['split']),
}

Просмотреть файл

@ -0,0 +1,137 @@
import networkx as nx
from .utils import get_tensor_shape, convert_attr
from .constants import *
from itertools import chain
import logging
from onnx import AttributeProto, shape_inference
class OnnxConverter:
def __init__(self, model):
inferred_model = shape_inference.infer_shapes(model)
self.graph = inferred_model.graph
self.tensors = {}
for tensor in chain(self.graph.input, self.graph.value_info, self.graph.output):
self.tensors[tensor.name] = {
'shape': get_tensor_shape(tensor),
'inputs': [],
'outputs': [],
}
for node in self.graph.node:
for input_name in node.input:
if input_name in self.tensors:
self.tensors[input_name]['outputs'].append(node)
for output_name in node.output:
if output_name in self.tensors:
self.tensors[output_name]['inputs'].append(node)
self.G = self.to_networkx()
def to_networkx(self):
G = nx.DiGraph()
sliced_tensors = set()
selected_slice = set()
for node in self.graph.node:
if node.op_type == SLICE_TYPE:
tensor = node.input[0]
if tensor in sliced_tensors:
continue
else:
sliced_tensors.add(tensor)
selected_slice.add(node.name)
G.add_node(node.name, **self.fetch_attrs(node))
for node in self.graph.node:
if node.op_type == SLICE_TYPE and node.name not in selected_slice:
continue
for input_name in node.input:
if input_name in self.tensors: # remove dummy ops
G.add_edge(input_name, node.name)
for output_name in node.output:
if output_name in self.tensors:
G.add_edge(node.name, output_name)
if node.op_type == SLICE_TYPE:
for tensor_name in self._get_sibling_slice_output_tensors(node):
G.add_edge(node.name, tensor_name)
return G
def fetch_attrs(self, node):
attrs = {}
input_tensors = []
for input_name in node.input:
if input_name in self.tensors:
input_tensors.append(self.tensors[input_name]['shape'])
output_tensors = []
for output_name in node.output:
if output_name in self.tensors:
output_tensors.append(self.tensors[output_name]['shape'])
if node.op_type == SLICE_TYPE:
for tensor_name in self._get_sibling_slice_output_tensors(node):
output_tensors.append(self.tensors[tensor_name]['shape'])
if len(input_tensors) == 0 or len(input_tensors[0]) <= 1 or len(output_tensors) == 0 or len(output_tensors[0]) <= 1:
return attrs
if node.op_type not in OP_ALIAS:
logging.warning(f'Unsupported OP: {node.op_type}')
attrs['attr'] = {}
attrs['type'] = OP_ALIAS.get(node.op_type, node.op_type)
attrs['input_shape'] = input_tensors
attrs['output_shape'] = output_tensors
for attr in node.attribute:
if attr.type == AttributeProto.FLOAT:
attrs['attr'][attr.name] = attr.f
elif attr.type == AttributeProto.INT:
attrs['attr'][attr.name] = attr.i
elif attr.type == AttributeProto.INTS:
attrs['attr'][attr.name] = list(attr.ints)
elif attr.type == AttributeProto.STRING:
attrs['attr'][attr.name] = str(attr.s)
else:
logging.warning(f'Unsupported attributes type: {attr.type}')
return attrs
def convert(self):
result = {}
for node in self.G.nodes:
node_attrs = self.G.nodes[node]
if node in self.tensors or not node_attrs:
continue
node_attrs['attr'] = convert_attr(node_attrs['attr'], node_attrs['type'])
outbounds = []
inbounds = []
for successor in self.G.successors(node):
try:
outbounds.append(next(self.G.successors(successor)))
except StopIteration:
pass
for predecessor in self.G.predecessors(node):
try:
inbounds.append(next(self.G.predecessors(predecessor)))
except StopIteration:
pass
result[node] = {
'attr': node_attrs,
'outbounds': outbounds,
'inbounds': inbounds,
}
return result
def _get_sibling_slice_output_tensors(self, node):
output_tensors = []
for slice in self.tensors[node.input[0]]['outputs']:
if slice.name != node.name and slice.op_type == SLICE_TYPE:
for output_name in slice.output:
if output_name in self.tensors:
output_tensors.append(output_name)
return output_tensors

Просмотреть файл

@ -0,0 +1,31 @@
from .constants import *
def get_tensor_shape(tensor):
shape = []
for dim in tensor.type.tensor_type.shape.dim:
shape.append(dim.dim_value)
if len(shape) == 4:
shape = [shape[0], shape[2], shape[3], shape[1]]
return shape
def convert_attr(attr, type):
def is_type(type, ts):
if ts is None:
return False
elif ts == '__all__':
return True
else:
return type in ts
new_attr = {}
for name, value in attr.items():
new_name, ts = ATTR_ALIAS.get(name, (name, None))
if is_type(type, ts):
new_attr[new_name] = value
else:
new_attr[name] = value
return new_attr

41
ir_converters/utils.py Normal file
Просмотреть файл

@ -0,0 +1,41 @@
import onnx
from onnx_converter import OnnxConverter
from frozenpb_converter import FrozenPbConverter
def model_to_grapher(model, model_type=None):
if model_type is None:
if isinstance(model, onnx.ModelProto):
model_type = 'onnx'
else:
raise ValueError(f'Invalid model: {type(model)}')
if model_type == 'onnx':
converter = OnnxConverter(model)
result = converter.convert()
elif model_type == 'pb':
raise NotImplementedError
else:
raise ValueError(f'Unsupported model type: {model_type}')
return result
def model_file_to_grapher(filename, model_type=None):
if model_type is None:
if filename.endswith('.onnx'):
model_type = 'onnx'
elif filename.endswith('.pb'):
converter = FrozenPbConverter(filename)
return converter.get_flatten_grapher()
else:
raise ValueError(f'Unknown file type: {filename}')
if model_type == 'onnx':
model = onnx.load(filename)
elif model_type == 'pb':
raise NotImplementedError
else:
raise ValueError(f'Unsupported model type: {model_type}')
return model_to_grapher(model, model_type)

21
kerneldetection/README.md Normal file
Просмотреть файл

@ -0,0 +1,21 @@
# DAGSplitter
Split pb models into kernels on given device
## Prerequisite
Please first use the tool `ruletest` provided by us to generate the rulefiles (or you can choose to handcraft the files), and replace `rulelib/rules` (default rulefiles are presented there).
## Installation
```
pip install -r requirements.txt
```
## Usage
Input models can be either json or pb. Please refer to `/data/raw.json` for json format.
To output readable results:
```
python main.py -i INPUT_MODELS [INPUT_MODELS ...] -f readable
```

Просмотреть файл

@ -0,0 +1,79 @@
import tensorflow as tf
from google.protobuf import text_format
from tensorflow import gfile
from tensorflow import io
from shape_fetcher import ShapeFetcher
class FrozenPbParser:
def __init__(self, pb_file):
f = open(pb_file, 'rb')
graph = tf.GraphDef()
graph.ParseFromString(f.read())
self.graph = graph
def find_weights_root(self, node, shape_fetcher):
if shape_fetcher == None:
return None
if node.op == 'Conv2D':
weight_name = [node.name.replace('/Conv2D', '/weight/read'), node.name.replace('/Conv2D', '/kernel')]
elif node.op == 'DepthwiseConv2dNative':
weight_name = [node.name.replace('/depthwise', '/weight/read')]
elif node.op == 'MatMul':
weight_name = [node.name.replace('/MatMul',' /weight/read')]
else:
return None
for target_node in self.graph.node:
if target_node.name in weight_name:
return [int(i) for i in shape_fetcher.shape_results[target_node.name + ':0']]
def fetch_attr_to_dict(self, node, shape_fetcher):
attr_dict = {}
list_i_nodes = ['dilations', 'strides', 'ksize']
str_nodes = ['padding', 'data_format']
for attr_name in node.attr.keys():
if attr_name in list_i_nodes:
attr_dict[attr_name] = [int(a) for a in node.attr[attr_name].list.i]
continue
if attr_name in str_nodes:
attr_dict[attr_name] = str(node.attr[attr_name].s)
continue
if attr_name == 'value':
shape = []
for dim in node.attr[attr_name].tensor.tensor_shape.dim:
shape.append(dim.size)
attr_dict['tensor_shape'] = list(map(int, shape))
continue
if attr_name == 'shape':
shape = []
for dim in node.attr[attr_name].shape.dim:
shape.append(dim.size)
attr_dict['shape'] = list(map(int, shape))
continue
attr_dict['weight_shape'] = self.find_weights_root(node, shape_fetcher)
return attr_dict
def parse_graph(self, graph_helper, required_shape=False, insert_node=False):
if required_shape:
shape_fetcher = ShapeFetcher(self.graph)
for node in self.graph.node:
graph_helper.node(str(node.name), list(map(str,node.input)))
graph_helper.set_node_attr(node.name, {
'type': str(node.op),
'output_shape': [int(i) for i in shape_fetcher.shape_results[node.name + ':0']] if required_shape else [],
'attr': self.fetch_attr_to_dict(node, shape_fetcher if required_shape else None),
'node': node if insert_node else None
})

Просмотреть файл

Просмотреть файл

@ -0,0 +1,20 @@
{
"add_1": {
"attr": {
"type": "Add",
"attr": {}
},
"inbounds": [
]
},
"relu_2": {
"attr": {
"type": "ReLU",
"attr": {}
},
"inbounds": [
"add_1"
]
}
}

Просмотреть файл

@ -0,0 +1,20 @@
{
"bn_1": {
"attr": {
"type": "FusedBatchNorm",
"attr": {}
},
"inbounds": [
]
},
"relu_2": {
"attr": {
"type": "ReLU",
"attr": {}
},
"inbounds": [
"bn_1"
]
}
}

Просмотреть файл

@ -0,0 +1,112 @@
{
"Reshape": {
"attr": {
"attr": {
"weight_shape": null
},
"node": null,
"shape": [],
"type": "Reshape"
},
"inbounds": [
"packed"
],
"outbounds": [
"transpose"
]
},
"packed": {
"attr": {
"attr": {
"weight_shape": null
},
"node": null,
"shape": [],
"type": "Pack"
},
"inbounds": [
"strided_slice"
],
"outbounds": [
"Reshape"
]
},
"strided_slice": {
"attr": {
"attr": {
"weight_shape": null
},
"node": null,
"shape": [],
"type": "StridedSlice"
},
"inbounds": [
],
"outbounds": [
"packed"
]
},
"transpose": {
"attr": {
"attr": {
"weight_shape": null
},
"node": null,
"shape": [],
"type": "Transpose"
},
"inbounds": [
"Reshape"
],
"outbounds": [
"Reshape_1"
]
},
"Reshape_1": {
"attr": {
"attr": {
"weight_shape": null
},
"node": null,
"shape": [],
"type": "Reshape"
},
"inbounds": [
"transpose",
"packed_1"
],
"outbounds": [
]
},
"packed_1": {
"attr": {
"attr": {
"weight_shape": null
},
"node": null,
"shape": [],
"type": "Pack"
},
"inbounds": [
"strided_slice_1"
],
"outbounds": [
"Reshape_1"
]
},
"strided_slice_1": {
"attr": {
"attr": {
"weight_shape": null
},
"node": null,
"shape": [],
"type": "StridedSlice"
},
"inbounds": [
],
"outbounds": [
"packed_1"
]
}
}

Просмотреть файл

@ -0,0 +1,30 @@
{
"conv_1": {
"attr": {
"type": "Conv2D",
"attr": {}
},
"inbounds": [
]
},
"bn_2": {
"attr": {
"type": "FusedBatchNorm",
"attr": {}
},
"inbounds": [
"conv_1"
]
},
"relu_3": {
"attr": {
"type": "ReLU",
"attr": {}
},
"inbounds": [
"bn_1"
]
}
}

Просмотреть файл

@ -0,0 +1,20 @@
{
"conv_1": {
"attr": {
"type": "Conv2D",
"attr": {}
},
"inbounds": [
]
},
"bn_2": {
"attr": {
"type": "FusedBatchNorm",
"attr": {}
},
"inbounds": [
"conv_1"
]
}
}

Просмотреть файл

@ -0,0 +1,30 @@
{
"dwconv_1": {
"attr": {
"type": "DepthwiseConv2dNative",
"attr": {}
},
"inbounds": [
]
},
"bn_2": {
"attr": {
"type": "FusedBatchNorm",
"attr": {}
},
"inbounds": [
"dwconv_1"
]
},
"relu_3": {
"attr": {
"type": "ReLU",
"attr": {}
},
"inbounds": [
"bn_1"
]
}
}

Просмотреть файл

@ -0,0 +1,20 @@
{
"Elewise_1": {
"attr": {
"type": "Elewise",
"attr": {}
},
"inbounds": [
]
},
"Elewise_2": {
"attr": {
"type": "Elewise",
"attr": {}
},
"inbounds": [
"Elewise_1"
]
}
}

Просмотреть файл

@ -0,0 +1,20 @@
{
"mean_1": {
"attr": {
"type": "Mean",
"attr": {}
},
"inbounds": [
]
},
"reshape_2": {
"attr": {
"type": "Reshape",
"attr": {}
},
"inbounds": [
"mean_1"
]
}
}

Просмотреть файл

@ -0,0 +1,53 @@
{
"conv1.hswish.hswish/Relu6": {
"attr": {
"attr": {},
"type": "Relu6"
},
"inbounds": [
"conv1.hswish.hswish/add"
],
"outbounds": [
"conv1.hswish.hswish/mul"
]
},
"conv1.hswish.hswish/add": {
"attr": {
"attr": {
"weight_shape": null
},
"node": null,
"shape": [],
"type": "Add"
},
"inbounds": [
],
"outbounds": [
"conv1.hswish.hswish/Relu6"
]
},
"conv1.hswish.hswish/mul": {
"attr": {
"attr": {},
"type": "Mul"
},
"inbounds": [
"conv1.hswish.hswish/Relu6"
],
"outbounds": [
"conv1.hswish.hswish/mul_1"
]
},
"conv1.hswish.hswish/mul_1": {
"attr": {
"attr": {},
"type": "Mul"
},
"inbounds": [
"conv1.hswish.hswish/mul"
],
"outbounds": [
"layer2.1.conv/Conv2D"
]
}
}

Просмотреть файл

@ -0,0 +1,145 @@
{
"dummy_input": {
"attr": {
"attr": {},
"type": "dummy"
},
"inbounds": [],
"outbounds": [
"mul_1",
"SE/AvgPool"
]
},
"Add_1": {
"attr": {
"attr": {
"weight_shape": null
},
"node": null,
"type": "Add"
},
"inbounds": [
"SE/conv2d_1/BiasAdd"
],
"outbounds": [
"Relu6"
]
},
"Relu6": {
"attr": {
"attr": {
"weight_shape": null
},
"node": null,
"type": "Relu6"
},
"inbounds": [
"Add_1"
],
"outbounds": [
"mul"
]
},
"mul": {
"attr": {
"attr": {
"weight_shape": null
},
"node": null,
"type": "Mul"
},
"inbounds": [
"Relu6"
],
"outbounds": [
"mul_1"
]
},
"mul_1": {
"attr": {
"attr": {
"weight_shape": null
},
"node": null,
"type": "Mul"
},
"inbounds": [
"mul",
"dummy_input"
],
"outbounds": [
]
},
"SE/AvgPool": {
"attr": {
"attr": {},
"type": "AvgPool"
},
"inbounds": [
"dummy_input"
],
"outbounds": [
"SE/conv2d/Conv2D"
]
},
"SE/Relu": {
"attr": {
"attr": {},
"type": "Relu"
},
"inbounds": [
"SE/conv2d/BiasAdd"
],
"outbounds": [
"SE/conv2d_1/Conv2D"
]
},
"SE/conv2d/BiasAdd": {
"attr": {
"attr": {},
"type": "BiasAdd"
},
"inbounds": [
"SE/conv2d/Conv2D"
],
"outbounds": [
"SE/Relu"
]
},
"SE/conv2d/Conv2D": {
"attr": {
"attr": {},
"type": "Conv2D"
},
"inbounds": [
"SE/AvgPool"
],
"outbounds": [
"SE/conv2d/BiasAdd"
]
},
"SE/conv2d_1/BiasAdd": {
"attr": {
"attr": {},
"type": "BiasAdd"
},
"inbounds": [
"SE/conv2d_1/Conv2D"
],
"outbounds": [
"Add_1"
]
},
"SE/conv2d_1/Conv2D": {
"attr": {
"attr": {},
"type": "Conv2D"
},
"inbounds": [
"SE/Relu"
],
"outbounds": [
"SE/conv2d_1/BiasAdd"
]
}
}

Просмотреть файл

@ -0,0 +1,9 @@
import os
from grapher_tool import Grapher
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
def get_fusion_unit(name):
filename = os.path.join(BASE_DIR, f'{name}_fusionunit.json')
return Grapher(filename)

Просмотреть файл

@ -0,0 +1,301 @@
import json
import copy
class Grapher:
def __init__(self, filename=None, graph=None):
if filename is not None:
self.graph = json.load(open(filename, 'r'))
elif graph is not None:
self.graph = copy.deepcopy(graph)
else:
self.graph = {}
self.refresh()
def node(self, name, inbound_nodes=None):
self.graph[name] = {}
if inbound_nodes is not None:
self.graph[name]['inbounds'] = inbound_nodes
for node in inbound_nodes:
if node not in self.graph.keys():
self.graph[node] = {}
if 'outbounds' not in self.graph[node].keys():
self.graph[node]['outbounds'] = []
self.graph[node]['outbounds'].append(name)
def refresh(self):
for name in self.graph.keys():
self.graph[name]['outbounds'] = []
for name in self.graph.keys():
if 'inbounds' not in self.graph[name].keys():
self.graph[name]['inbounds'] = []
inbounds = self.graph[name]['inbounds'][:]
for node in inbounds:
if node not in self.graph.keys():
while node in self.graph[name]['inbounds']:
self.graph[name]['inbounds'].remove(node)
print(node)
else:
if 'outbounds' not in self.graph[node].keys():
self.graph[node]['outbounds'] = []
self.graph[node]['outbounds'].append(name)
def get_graph(self):
return self.graph
def get_node_inbounds(self, name):
if 'inbounds' in self.graph[name]:
return self.graph[name]['inbounds']
else:
return []
def get_node_outbounds(self, name):
if 'outbounds' in self.graph[name]:
return self.graph[name]['outbounds']
else:
return []
def set_node_inbounds(self, name, inbounds):
self.graph[name]['inbounds'] = inbounds
def set_node_outbounds(self, name, outbounds):
self.graph[name]['outbounds'] = outbounds
def remove_node_inbounds(self, name, inbound):
try:
self.graph[name]['inbounds'].remove(inbound)
except (ValueError, KeyError):
pass
def remove_node_outbounds(self, name, outbound):
try:
self.graph[name]['outbounds'].remove(outbound)
except (ValueError, KeyError):
pass
def add_node_inbounds(self, name, inbound):
try:
self.graph[name]['inbounds'].append(inbound)
except (ValueError, KeyError):
pass
def add_node_outbounds(self, name, outbound):
try:
self.graph[name]['outbounds'].append(outbound)
except (ValueError, KeyError):
pass
def get_graph_heads(self):
self.heads = []
for (key, value) in self.graph.items():
if "graph_head" in value['attr']['attr']:
self.heads.append(key)
return self.heads
def get_graph_tails(self):
self.tails = []
for (key, value) in self.graph.items():
if "graph_tail" in value['attr']['attr']:
self.tails.append(key)
return self.tails
def add_node_attr(self, name, attr_key, attr_value):
if name not in self.graph.keys():
self.graph[name] = {}
self.graph[name]['attr']['attr'][attr_key] = attr_value
def set_node_attr(self, name, attr):
if name not in self.graph.keys():
self.graph[name] = {}
self.graph[name]['attr'] = attr
def get_node_attr(self, name):
if name in self.graph.keys():
return self.graph[name]['attr']
else:
return None
def get_node_type(self, name):
try:
if name in self.graph.keys():
return self.graph[name]['attr']['type']
else:
return None
except:
return None
def set_node_type(self, name, type):
if name not in self.graph.keys():
self.graph[name] = {}
self.graph[name]['attr']['type'] = type
def fuse(self, subgraph, type, name=None, attr=None, is_block=True):
'''
subgraph: list of node name
Nothing will be done if subgraph doesn't exist in self
'''
for node in subgraph:
if node not in self.graph:
return False
if name is None:
name = ';'.join(subgraph)
if attr is None:
attr = {'attr': {}}
attr['type'] = type
if is_block:
attr['attr']['primitive_nodes'] = list(subgraph)
self.graph[name] = {
'attr': attr,
'inbounds': [],
'outbounds': [],
}
for node in subgraph:
for inbound in self.get_node_inbounds(node):
if inbound not in subgraph:
if inbound not in self.get_node_inbounds(name):
self.add_node_inbounds(name, inbound)
self.remove_node_outbounds(inbound, node)
if name not in self.get_node_outbounds(inbound):
self.add_node_outbounds(inbound, name)
for outbound in self.get_node_outbounds(node):
if outbound not in subgraph:
if outbound not in self.get_node_outbounds(name):
self.add_node_outbounds(name, outbound)
self.remove_node_inbounds(outbound, node)
if name not in self.get_node_inbounds(outbound):
self.add_node_inbounds(outbound, name)
for node in subgraph:
del self.graph[node]
return True
def get_primitive_nodes(self, name):
try:
return self.graph[name]['attr']['attr']['primitive_nodes']
except KeyError:
return [name]
def plot_graphs(self, comment='Network Grapher View'):
from graphviz import Digraph
dot = Digraph(comment=comment)
for (key, value) in self.graph.items():
dot.node(key, key)
if 'inbounds' in value.keys():
for node in value['inbounds']:
dot.edge(node, key, label=', '.join(str(x) for x in value['attr']['shape']))
dot.render('graph.gv', view=False)
def plot_networkx_graph(self):
import matplotlib.pyplot as plt
import networkx as nx
plt.subplot(121)
nx.draw(self.get_networkx_graph(), with_labels=True, font_weight='bold')
plt.show()
def get_networkx_graph(self):
import networkx as nx
G = nx.MultiDiGraph()
for (key, value) in self.graph.items():
try:
G.add_node(key, type=value['attr']['type'], **value['attr']['attr'])
if 'inbounds' in value.keys():
for node in value['inbounds']:
G.add_edge(node, key)
except:
continue
self.graphx = G
return G
def match_isomorph_vf2(self):
pass
def find_subgraphs(self, sub_graph, match_func):
from networkx.algorithms import isomorphism as iso
GM = iso.MultiDiGraphMatcher(self.get_networkx_graph(), sub_graph.get_networkx_graph(), node_match=match_func)
matches = []
for match in GM.subgraph_isomorphisms_iter():
matches.append({
key: value
for key, value in match.items()
if sub_graph.get_node_type(value) != 'dummy'
})
return matches
def find_weight_roots(self, layer_name):
weight_roots = []
weights_nodes = []
for inbound in self.graph[layer_name]['inbounds']:
if self.graph[inbound]['attr']['type'] == 'Identity' \
and len(self.graph[inbound]['inbounds']) == 1:
if self.graph[self.graph[inbound]['inbounds'][0]]['attr']['type'] == 'Const':
weight_roots.append(inbound)
weights_nodes.append(inbound)
weights_nodes.append(self.graph[inbound]['inbounds'][0])
if self.graph[inbound]['attr']['type'] == 'Const' \
and len(self.graph[inbound]['inbounds']) == 0:
weight_roots.append(inbound)
weights_nodes.append(inbound)
return weight_roots, weights_nodes
def get_subgraphs(self, sub_graph, match_func):
import tensorflow as tf
import copy
fetched_subgraphs = self.find_subgraphs(sub_graph, match_func)
tar_sub_graphs = []
for sub_fetch_graph in fetched_subgraphs:
tar_sub_graphs.append(tf.GraphDef())
for op_entry in sub_fetch_graph.keys():
# --- Repleace dummy op ---
if sub_graph.get_graph()[sub_fetch_graph[op_entry]]['attr']['type'] == 'dummy':
dummy_op = tar_sub_graphs[-1].node.add()
dummy_op.op = "Identity"
dummy_op.name = sub_fetch_graph[op_entry]
dummy_op.input.extend(sub_graph.get_graph()[sub_fetch_graph[op_entry]]['inbounds'])
dummy_op.attr['T'].type = 1
# if 'graph_head' in sub_graph.get_graph()[sub_fetch_graph[op_entry]]['attr']['attr']:
# dummy_op.attr['shape'] = []
# dummy_op.attr['shape'].dim = list(map(int, sub_graph.get_graph()[sub_fetch_graph[op_entry]]['attr']['attr']['graph_head'].split(',')))
print(dummy_op)
else:
# --- Fetch the main op ---
node = copy.deepcopy(self.graph[op_entry]['attr']['node'])
node.name = sub_fetch_graph[op_entry]
del node.input[:]
node.input.extend(sub_graph.get_graph()[sub_fetch_graph[op_entry]]['inbounds'])
# --- Fetch the constant op ---
roots, nodes = self.find_weight_roots(op_entry)
for weight_root in roots:
node.input.append(weight_root)
for weight_node in nodes:
tar_sub_graphs[-1].node.append(self.graph[weight_node]['attr']['node'])
tar_sub_graphs[-1].node.append(node)
#tf.io.write_graph(tar_sub_graphs[-1], '', 'a.pb')
return tar_sub_graphs
def dump_json(self, filename):
with open(filename, 'w+') as fp:
try:
json.dump(self.graph, fp, indent=4, sort_keys=True)
except:
print('Find unsupport field when dumping to json, skipped.')
json.dump(self.graph, fp, indent=4, skipkeys=True, sort_keys=True)

Просмотреть файл

@ -0,0 +1,108 @@
import json
import h5py
class H5dfParser:
def __init__(self, h5_file):
f = h5py.File(h5_file, mode='r')
self.f = f
model_config_raw = f.attrs.get('model_config')
self.model_config = json.loads(model_config_raw.decode('utf-8'))
self.keras_version = self.get_keras_version()
def get_h5df_file(self):
return self.f
def get_model_config(self):
return self.model_config
def get_keras_version(self):
if 'keras_version' in self.f['model_weights'].attrs:
original_keras_version = self.f['model_weights']\
.attrs['keras_version'].decode('utf8')
return original_keras_version
else:
return '1'
def get_backend_version(self):
if 'backend' in self.f['model_weights'].attrs:
original_backend = self.f['model_weights']\
.attrs['backend'].decode('utf8')
return original_backend
else:
return None
def find_weights_root(self, layer_name):
if self.keras_version != '1':
layer = self.f['model_weights']
else:
layer = self.f
while True:
layer = layer[layer_name]
if (not hasattr(layer, "keys")) or len(layer.keys()) > 1:
break
layer_keys = list(layer.keys())
if len(layer_keys) < 1:
return None
else:
layer_name = list(layer.keys())[0]
return layer
def get_if_sequential(self):
if self.model_config['class_name'] == 'Sequential':
return True
else:
return False
def join_inbound_nodes(self, layer):
inbound_nodes = []
if 'inbound_nodes' in layer.keys():
if len(layer['inbound_nodes']) > 0:
for inbound in layer['inbound_nodes'][0]:
inbound_nodes.append(inbound[0])
return inbound_nodes
def parse_graph(self, graph_helper):
if self.get_if_sequential():
self.parse_sequential_graph(graph_helper)
else:
self.parse_model_graph(
self.get_model_config()['config']['layers'],
graph_helper)
def parse_sequential_graph(self, graph_helper):
self.joined_layers = []
for layers in self.model_config['config']['layers']:
if layers['class_name'] == 'Model':
self.parse_model_graph(
layers['config']['layers'], graph_helper)
else:
if layers['class_name'] + '_helper' in dir(KerasParser):
tails = graph_helper.get_graph_tail()
if len(tails) != 1:
raise NotImplementedError
else:
graph_helper.node(layers['config']['name'], tails)
graph_helper.set_node_attr(
layer['config']['name'], {
'type': layer['class_name'],
'shape': [],
'attr': layer['config'],
#'node': layer
})
else:
raise NotImplementedError
def parse_model_graph(self, model_layers, graph_helper):
for layer in model_layers:
inbound_nodes = self.join_inbound_nodes(layer)
graph_helper.node(layer['name'], inbound_nodes)
graph_helper.set_node_attr(
layer['config']['name'], {
'type': layer['class_name'],
'shape': [],
'attr': layer['config'],
#'node': layer
})

Просмотреть файл

@ -0,0 +1,245 @@
import sys
sys.path.append("kerneldetection")
from rulelib.rule_reader import RuleReader
from rulelib.rule_splitter import RuleSplitter
from frozenpb_parser import FrozenPbParser
from grapher_tool import Grapher
import json
import os
import pandas as pd
import argparse
import copy
from itertools import groupby
backend_maps = {
"cpu":"tflite_cpu",
"gpu":"tflite_gpu",
"vpu":"vpu"
}
dummy_types = [
'Const',
'Identity',
'Placeholder',
]
op_alias = {
'Relu6': 'relu',
'Relu': 'relu',
'Add': 'add',
'Biasadd': 'add',
'Conv2D': 'conv',
'Reshape': 'reshape',
'FusedBatchNorm': 'bn',
'FusedBatchNormV3': 'bn',
'MatMul': 'fc',
'MaxPool': 'maxpool',
'AvgPool': 'avgpool',
'Mean': 'gap',
'Mul': 'mul',
'DepthwiseConv2dNative': 'dwconv',
'ConcatV2': 'concat',
'Split': 'split',
}
fusion_map = {
'SE': 'mul-avgpool-conv-add-relu-conv-add-add-relu-mul',
'hswish': 'relu-mul-mul-add',
'bn':"bnV3",
'channelshuffle': 'reshape-Transpose-reshape-Pack-StridedSlice-Pack-StridedSlice',
'global-avgpool': 'gap-reshape',
}
def get_input_tensors(node, graph):
input_tensors = []
for inbound in graph.get_node_inbounds(node):
try:
shape = graph.get_node_attr(inbound)['output_shape']
type = graph.get_node_type(node)
if shape and type not in dummy_types:
if graph.get_node_type(inbound) == 'Split':
outbounds = graph.get_node_outbounds(inbound)
shapes = shape
for outbound, shape in zip(outbounds, shapes):
if outbound == node:
input_tensors.append(shape)
else:
input_tensors.append(shape)
except:
pass
return input_tensors
def bb_to_kernel(bb, graph):
types = [graph.get_node_type(node) for node in bb]
#print(types)
types = [t for t in types if t and t not in dummy_types]
for old, new in op_alias.items():
for i in range(len(types)):
types[i] = types[i].replace(old, new)
if types:
type = '-'.join(types)
for block, ops in fusion_map.items():
type = type.replace(ops, block)
kernel = {
'op': type,
}
layer = bb[0]
type = types[0]
attr = graph.get_node_attr(layer)['attr']
shape = graph.get_node_attr(layer)['output_shape']
if type in ['conv', 'dwconv']:
weight_shape = attr['weight_shape']
try:
kernel['ks'] = weight_shape[0:2]
kernel['cin'] = weight_shape[2]
kernel['cout'] = weight_shape[3]
kernel['strides'] = attr['strides']
if type=='dwconv':
kernel['cout']=kernel['cin']
except:
print(bb)
elif type in ['maxpool', 'avgpool']:
kernel['ks'] = attr['ksize']
kernel['cin'] = shape[3]
kernel['cout'] = shape[3]
kernel['strides'] = attr['strides']
elif type == 'fc':
kernel['cin'] = shape[1]
kernel['cout'] = shape[1]
elif type == 'gap':
kernel['cin'] = shape[3]
kernel['cout'] = shape[3]
elif type in ['relu','hswish']:
kernel['cin'] = shape[-1]
kernel['cout'] = shape[-1]
input_tensors = get_input_tensors(layer, graph)
kernel['input_tensors'] = input_tensors
#print(type,input_tensors)
if type not in ['relu','bn', 'fc', 'reshape', 'Pack', 'StridedSlice','split']:
input_shape = input_tensors[0]
kernel['inputh'] = input_shape[1]
kernel['inputw'] = input_shape[2]
elif type in ['fc']:
input_shape = input_tensors[0]
kernel['cin']=input_shape[1]
if type == 'split':
kernel['split_dim'] = attr['split_dim']
kernel['output_tensors'] = shape
return kernel
else:
return None
def merge_split(graph: Grapher):
split_nodes = [node for node in graph.get_graph().keys() if graph.get_node_type(node) == 'Split']
for name, group in groupby(split_nodes, lambda name: name.split('/')[0]):
group = list(group)
group.sort(key=lambda name: name.split('/')[1])
split_dim = graph.get_node_attr(group[0])['attr']['split_dim']
inbounds = graph.get_node_inbounds(group[0])
output_shapes = [graph.get_node_attr(node)['output_shape'] for node in group]
# assert
for i, node in enumerate(group):
assert graph.get_node_attr(node)['attr']['idx'] == i
assert graph.get_node_inbounds(node) == inbounds
assert graph.get_node_attr(node)['attr']['split_dim'] == split_dim
graph.fuse(group, 'Split', name, {
'attr': {
'split_dim': split_dim,
},
'output_shape': output_shapes,
}, is_block=False)
def split_model_into_kernels(input_models,hardware,save_dir,rule_dir='data/fusionrules'):
if hardware in backend_maps:
backend=backend_maps[hardware]
else:
raise ValueError('Unsupported hardware')
splitter = RuleSplitter(RuleReader())
kernel_types = {}
print(input_models)
mname=input_models.split('/')[-1].replace(".json","")
input_models=json.load(open(input_models,'r'))
with pd.ExcelWriter(save_dir+'/'+mname+'_result.xlsx', engine='xlsxwriter', mode='w') as writer:
indexes = []
counts = []
kernel_types[backend] = set({})
reader = RuleReader(rule_dir+f'/rule_{backend}.json')
splitter = RuleSplitter(reader)
mdicts={}
for mid in input_models:
model_name=mid
fname=mid.split('_')[0]
model=input_models[model_name]
graph = Grapher(graph=model)
merge_split(graph)
tmp_graph = copy.deepcopy(graph)
result = splitter.split(tmp_graph)
bb_types = {}
kernels = []
for bb in result:
kernel = bb_to_kernel(bb, graph)
if kernel is not None:
type = kernel['op']
bb_types[type] = bb_types.get(type, 0) + 1
kernels.append(kernel)
output = {model_name: kernels}
mdicts[model_name]=kernels
for type, count in bb_types.items():
kernel_types[backend].add(type)
indexes.append((model_name, type))
counts.append(count)
#sys.exit()
#break
index = pd.MultiIndex.from_tuples(indexes, names=['model', 'type'])
df = pd.DataFrame(counts, index=index, columns=['Count'])
df.to_excel(writer, sheet_name=backend)
kernel_types[backend] = list(kernel_types[backend])
filename = os.path.join(save_dir, f'{hardware}_{fname}.json')
os.makedirs(save_dir, exist_ok=True)
with open(filename, 'w', encoding='utf-8') as fp:
json.dump(mdicts, fp,indent=2)
print(json.dumps(kernel_types))
return kernel_types,mdicts
if __name__=="__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--hardware', type=str, default='cpu')
parser.add_argument('-i', '--input_models', type=str, required=True, help='Path to input models. Either json or pb.')
parser.add_argument('-dir', '--save_dir', type=str, default='results', help='Default preserve the original layer names. Readable will assign new kernel names according to types of the layers.')
parser.add_argument('-ruledir', '--rule_dir', type=str, default='data/fusionrules', help='Default preserve the original layer names. Readable will assign new kernel names according to types of the layers.')
#parser.add_argument('-t', '--input_type', type=str, choices=['multi-m','single-m'], default='multi-m', help='input file type: multi-m or single-m')
#parser.add_argument('-backend', '--backend', type=str, choices=['tflite_cpu','tflite_gpu','vpu'], default='tflite_cpu', help='Default preserve the original layer names. Readable will assign new kernel names according to types of the layers.')
args = parser.parse_args()
split_model_into_kernels(args.input_models,args.hardware,args.save_dir,rule_dir=args.rule_dir)

Просмотреть файл

@ -0,0 +1,81 @@
class MatchHelper:
base_type_table = {
'ReLU': [
'Relu',
'Relu6',
'ReLU',
'ReLU6',
],
'BatchNorm': [
'BatchNorm',
'FusedBatchNorm',
'FusedBatchNormV2',
'FusedBatchNormV3',
],
'TwoInputElementWise': [
'BiasAdd',
'Add',
'Mul',
],
'DepthwiseConv2D': [
'DepthwiseConv2dNative',
],
'FC': [
'MatMul',
]
}
@classmethod
def get_base_type(cls, node_type):
for key, value in cls.base_type_table.items():
if node_type in value:
return key
return node_type
@classmethod
def op_type_matcher(cls, node_1, node_2):
def get_ast_by_op(op_name):
for key, value in cls.base_type_table.items():
if op_name in value:
return key
return op_name
if 'type' in node_1 and 'type' in node_2:
if '_tagged' in node_1 or '_tagged' in node_2:
return False
if node_1['type'] == 'dummy' or node_2['type'] == 'dummy':
return True
return get_ast_by_op(node_1['type']) == get_ast_by_op(node_2['type'])
else:
return False
@staticmethod
def strip_useless_nodes(graph_helper):
stripped_nodes = ['Const', 'Identity']
graph = graph_helper.get_graph()
removed_node = []
for key, value in graph.items():
if value['attr']['type'] in stripped_nodes:
removed_node.append(key)
for key in removed_node:
del graph[key]
graph_helper.refresh()
@staticmethod
def tag_matched_nodes(grapher, matched_subgraph):
for matched_unit in matched_subgraph:
for node_name in matched_unit.keys():
grapher.add_node_attr(node_name, '_tagged', '')
@staticmethod
def get_untagged_nodes(grapher):
untagged_node = []
for node in grapher.get_graph().keys():
if '_tagged' not in grapher.get_node_attr(node)['attr']:
untagged_node.append(node)
return untagged_node

Просмотреть файл

@ -0,0 +1,33 @@
absl-py==0.11.0
astor==0.8.1
cached-property==1.5.2
decorator==4.4.2
et-xmlfile==1.0.1
gast==0.2.2
google-pasta==0.2.0
grpcio==1.33.2
h5py==3.1.0
importlib-metadata==3.1.0
jdcal==1.4.1
Keras-Applications==1.0.8
Keras-Preprocessing==1.1.2
Markdown==3.3.3
networkx==2.5
numpy==1.19.4
openpyxl==3.0.5
opt-einsum==3.3.0
pandas==1.1.4
protobuf==3.14.0
python-dateutil==2.8.1
pytz==2020.4
six==1.15.0
tensorboard==1.15.0
tensorflow==1.15.0
tensorflow-estimator==1.15.1
termcolor==1.1.0
Werkzeug==1.0.1
wrapt==1.12.1
XlsxWriter==1.3.7
zipp==3.4.0
onnx==1.9.0
networkx==2.5.1

Просмотреть файл

Просмотреть файл

@ -0,0 +1,69 @@
import json
from match_helper import MatchHelper
from fusion_lib.utils import get_fusion_unit
from grapher_tool import Grapher
class RuleReader:
op_map = {
'relu': 'ReLU',
'reshape': 'Reshape',
'conv': 'Conv2D',
'dwconv': 'DepthwiseConv2D',
'dense': 'FC',
'add': 'TwoInputElementWise',
'bn': 'BatchNorm',
}
rules_default = {
'MON': 0,
'RT': True,
'FN': True,
}
multiop_blocks = ['se', 'hswish', 'channelshuffle','global-avgpool']
def __init__(self, rule_file=None):
self.rules = {}
if rule_file:
with open(rule_file, 'r') as fp:
self.rules = json.load(fp)
self._extract_fusible()
self._parse_multiop_block()
def is_fusible(self, node_type, outnode_type):
node_base_type = MatchHelper.get_base_type(node_type)
outnode_base_type = MatchHelper.get_base_type(outnode_type)
return (node_base_type, outnode_base_type) in self.fusible
def query_rule(self, rule):
if rule not in self.rules or self.rules[rule]['obey'] is None:
return self.rules_default[rule]
else:
return self.rules[rule]['obey']
def _extract_fusible(self):
self.fusible = []
self.fusion_units = {}
for name, rule in self.rules.items():
if rule['obey'] and name.startswith('BF'):
ops = name.split('_')[1:]
if len(ops) == 2:
self.fusible.append((self.op_map.get(ops[0], ops[0]), self.op_map.get(ops[1], ops[1])))
elif len(ops) > 2:
fusion_unit = {}
get_name = lambda i: f'{ops[i]}_{i}'
for i in range(0, len(ops)):
fusion_unit[get_name(i)] = {
'attr': {
'type': self.op_map.get(ops[i], ops[i]),
'attr': {},
},
'inbounds': [get_name(i - 1)] if i > 0 else [],
'outbounds': [get_name(i + 1)] if i < len(ops) - 1 else [],
}
self.fusion_units['_'.join(ops)] = Grapher(graph=fusion_unit)
def _parse_multiop_block(self):
for block in self.multiop_blocks:
self.fusion_units[block] = get_fusion_unit(block)

Просмотреть файл

@ -0,0 +1,69 @@
from re import A
from typing import Match
from grapher_tool import Grapher
import networkx as nx
from .rule_reader import RuleReader
from utils.fusio_aware_graph import FusionAwareGraph
from match_helper import MatchHelper
class RuleSplitter:
def __init__(self, rule_reader: RuleReader):
self.rule_reader = rule_reader
def fuse_multiop_blocks(self, graph: Grapher):
for type, block in self.rule_reader.fusion_units.items():
subgraphs = graph.find_subgraphs(block, MatchHelper.op_type_matcher)
MatchHelper.tag_matched_nodes(graph, subgraphs)
for subgraph in subgraphs:
graph.fuse(subgraph.keys(), type)
def split(self, graph: Grapher):
"""
Apply rules to graph
"""
self.preprocess(graph)
fag = FusionAwareGraph(graph)
i = -1
while i < len(fag) - 1:
i += 1
if fag.is_fused(i):
continue
fag.mark_ready(i)
if not fag.get_outbounds(i):
continue
# MON
mon = self.rule_reader.query_rule('MON')
if mon == 0: # can't fuse if having multiple out node
if len(fag.get_outbounds(i)) > 1:
continue
# FN: TODO: which one is the first node
fused = False
for j in fag.get_outbounds(i):
if fag.is_fused(j):
continue
outnode_type = fag.get_type(j)
node_type = fag.get_type(i)
if not self.rule_reader.is_fusible(node_type, outnode_type):
continue
# RT
if self.rule_reader.query_rule('RT'):
if not fag.is_ready(j):
continue
# fuse node
if mon == 0:
fag.fuse(i, j)
else:
fag.fuse(i, j, True)
fag.mark_ready(j)
fused = True
if mon == 1: # only fused to first outnode
break
if fused:
i -= 1
return fag.get_basicblocks()
def preprocess(self, graph: Grapher):
self.fuse_multiop_blocks(graph)

Просмотреть файл

@ -0,0 +1,716 @@
{
"RBC": {
"latency": {},
"obey": null
},
"CBC": {
"latency": {},
"obey": null
},
"BF_bn_relu": {
"obey": true
},
"BF_conv_bn": {
"obey": true
},
"BF_dwconv_bn": {
"obey": true
},
"BF_se_relu": {
"latency": {
"se": "25.8403 +- 4.0",
"relu": "2.70696 +- 0.0",
"block": "25.6244 +- 1.0",
"ops": "28.547259999999998 +- 4.0"
},
"obey": true
},
"BF_conv_se": {
"latency": {
"conv": "6954.15 +- 498.0",
"se": "25.7455 +- 1.0",
"block": "7055.32 +- 1061.0",
"ops": "6979.8955 +- 498.00100401505216"
},
"obey": false
},
"BF_conv_hswish": {
"latency": {
"conv": "7102.15 +- 1125.0",
"hswish": "7.28969 +- 1.0",
"block": "7016.94 +- 273.0",
"ops": "7109.439689999999 +- 1125.0004444443566"
},
"obey": true
},
"BF_conv_bn_relu": {
"obey": true
},
"BF_dwconv_bn_relu": {
"obey": true
},
"BF_pooling_pooling": {
"latency": {
"pooling_1": "23.8169 +- 2.0",
"pooling_2": "6.09348 +- 1.0",
"block": "30.3321 +- 3.0",
"ops": "29.91038 +- 2.23606797749979"
},
"obey": false
},
"BF_pooling_convtrans": {
"latency": {
"pooling": "23.5467 +- 1.0",
"convtrans": "233.437 +- 11.0",
"block": "258.664 +- 13.0",
"ops": "256.9837 +- 11.045361017187261"
},
"obey": false
},
"BF_pooling_concat": {
"latency": {
"pooling": "23.6651 +- 3.0",
"concat": "4.31221 +- 1.0",
"block": "27.7833 +- 1.0",
"ops": "27.97731 +- 3.1622776601683795"
},
"obey": false
},
"BF_pooling_conv": {
"latency": {
"pooling": "24.0433 +- 3.0",
"conv": "234.217 +- 8.0",
"block": "259.306 +- 10.0",
"ops": "258.26030000000003 +- 8.54400374531753"
},
"obey": false
},
"BF_pooling_add": {
"latency": {
"pooling": "23.7197 +- 3.0",
"add": "1.08937 +- 0.0",
"block": "25.1265 +- 2.0",
"ops": "24.80907 +- 3.0"
},
"obey": false
},
"BF_pooling_relu": {
"latency": {
"pooling": "23.6031 +- 2.0",
"relu": "0.813832 +- 0.0",
"block": "24.4819 +- 3.0",
"ops": "24.416932000000003 +- 2.0"
},
"obey": false
},
"BF_pooling_dwconv": {
"latency": {
"pooling": "24.0672 +- 3.0",
"dwconv": "12.2742 +- 1.0",
"block": "36.1315 +- 3.0",
"ops": "36.3414 +- 3.1622776601683795"
},
"obey": false
},
"BF_pooling_reshape": {
"latency": {
"pooling": "24.0557 +- 2.0",
"reshape": "0.546111 +- 0.0",
"block": "24.2792 +- 3.0",
"ops": "24.601811 +- 2.0"
},
"obey": true
},
"BF_dense_dense": {
"latency": {
"dense_1": "63.0928 +- 3.0",
"dense_2": "63.1484 +- 3.0",
"block": "126.057 +- 4.0",
"ops": "126.24119999999999 +- 4.242640687119285"
},
"obey": false
},
"BF_dense_concat": {
"latency": {
"dense": "63.7282 +- 3.0",
"concat": "0.726432 +- 0.0",
"block": "63.88 +- 3.0",
"ops": "64.454632 +- 3.0"
},
"obey": true
},
"BF_dense_add": {
"latency": {
"dense": "63.747 +- 3.0",
"add": "0.395219 +- 0.0",
"block": "63.1243 +- 3.0",
"ops": "64.142219 +- 3.0"
},
"obey": true
},
"BF_dense_relu": {
"latency": {
"dense": "63.0826 +- 3.0",
"relu": "0.29903 +- 0.0",
"block": "63.1227 +- 3.0",
"ops": "63.38163 +- 3.0"
},
"obey": true
},
"BF_dense_reshape": {
"latency": {
"dense": "63.8388 +- 3.0",
"reshape": "0.252402 +- 0.0",
"block": "61.1986 +- 2.0",
"ops": "64.091202 +- 3.0"
},
"obey": false
},
"BF_convtrans_pooling": {
"latency": {
"convtrans": "365.728 +- 22.0",
"pooling": "23.6671 +- 2.0",
"block": "390.897 +- 23.0",
"ops": "389.3951 +- 22.090722034374522"
},
"obey": false
},
"BF_convtrans_convtrans": {
"latency": {
"convtrans_1": "364.971 +- 43.0",
"convtrans_2": "367.671 +- 11.0",
"block": "739.779 +- 18.0",
"ops": "732.642 +- 44.384682042344295"
},
"obey": false
},
"BF_convtrans_concat": {
"latency": {
"convtrans": "367.681 +- 12.0",
"concat": "14.6945 +- 1.0",
"block": "386.843 +- 11.0",
"ops": "382.3755 +- 12.041594578792296"
},
"obey": false
},
"BF_convtrans_conv": {
"latency": {
"convtrans": "367.905 +- 17.0",
"conv": "7055.8 +- 74.0",
"block": "7689.96 +- 3269.0",
"ops": "7423.705 +- 75.92759709091287"
},
"obey": false
},
"BF_convtrans_add": {
"latency": {
"convtrans": "369.521 +- 12.0",
"add": "3.76198 +- 0.0",
"block": "375.818 +- 11.0",
"ops": "373.28298 +- 12.0"
},
"obey": false
},
"BF_convtrans_relu": {
"latency": {
"convtrans": "368.784 +- 11.0",
"relu": "2.64562 +- 0.0",
"block": "370.992 +- 24.0",
"ops": "371.42962 +- 11.0"
},
"obey": false
},
"BF_convtrans_dwconv": {
"latency": {
"convtrans": "368.08 +- 13.0",
"dwconv": "46.1423 +- 2.0",
"block": "414.965 +- 13.0",
"ops": "414.22229999999996 +- 13.152946437965905"
},
"obey": false
},
"BF_convtrans_reshape": {
"latency": {
"convtrans": "366.354 +- 33.0",
"reshape": "4.00439 +- 2.0",
"block": "372.361 +- 12.0",
"ops": "370.35839 +- 33.06055050963308"
},
"obey": false
},
"BF_concat_pooling": {
"latency": {
"concat": "14.8744 +- 1.0",
"pooling": "36.0377 +- 2.0",
"block": "50.8331 +- 2.0",
"ops": "50.9121 +- 2.23606797749979"
},
"obey": false
},
"BF_concat_dense": {
"latency": {
"concat": "0.707233 +- 0.0",
"dense": "288.586 +- 7.0",
"block": "284.009 +- 6.0",
"ops": "289.293233 +- 7.0"
},
"obey": true
},
"BF_concat_convtrans": {
"latency": {
"concat": "14.705 +- 1.0",
"convtrans": "2285.62 +- 29.0",
"block": "2293.91 +- 97.0",
"ops": "2300.325 +- 29.017236257093817"
},
"obey": false
},
"BF_concat_concat": {
"latency": {
"concat_1": "14.601 +- 1.0",
"concat_2": "22.8767 +- 1.0",
"block": "37.6837 +- 2.0",
"ops": "37.4777 +- 1.4142135623730951"
},
"obey": false
},
"BF_concat_conv": {
"latency": {
"concat": "14.6481 +- 1.0",
"conv": "4099.34 +- 489.0",
"block": "4111.11 +- 61.0",
"ops": "4113.9881000000005 +- 489.00102249381854"
},
"obey": false
},
"BF_concat_add": {
"latency": {
"concat": "14.755 +- 1.0",
"add": "8.23119 +- 0.0",
"block": "23.5586 +- 1.0",
"ops": "22.98619 +- 1.0"
},
"obey": false
},
"BF_concat_relu": {
"latency": {
"concat": "14.7214 +- 1.0",
"relu": "4.89513 +- 0.0",
"block": "20.003 +- 1.0",
"ops": "19.616529999999997 +- 1.0"
},
"obey": false
},
"BF_concat_dwconv": {
"latency": {
"concat": "14.7328 +- 1.0",
"dwconv": "75.1136 +- 2.0",
"block": "92.6577 +- 3.0",
"ops": "89.8464 +- 2.23606797749979"
},
"obey": false
},
"BF_concat_reshape": {
"latency": {
"concat": "14.9525 +- 1.0",
"reshape": "3.84862 +- 0.0",
"block": "19.3578 +- 1.0",
"ops": "18.80112 +- 1.0"
},
"obey": false
},
"BF_conv_pooling": {
"latency": {
"conv": "7032.12 +- 40.0",
"pooling": "23.4528 +- 2.0",
"block": "6967.84 +- 528.0",
"ops": "7055.5728 +- 40.049968789001575"
},
"obey": true
},
"BF_conv_convtrans": {
"latency": {
"conv": "7159.94 +- 1234.0",
"convtrans": "367.24 +- 15.0",
"block": "7362.82 +- 529.0",
"ops": "7527.179999999999 +- 1234.0911635693694"
},
"obey": false
},
"BF_conv_concat": {
"latency": {
"conv": "7017.15 +- 135.0",
"concat": "15.1527 +- 1.0",
"block": "7096.96 +- 41.0",
"ops": "7032.302699999999 +- 135.00370365289984"
},
"obey": false
},
"BF_conv_conv": {
"latency": {
"conv_1": "7044.99 +- 134.0",
"conv_2": "6972.37 +- 728.0",
"block": "14111.7 +- 219.0",
"ops": "14017.36 +- 740.2296940815061"
},
"obey": false
},
"BF_conv_add": {
"latency": {
"conv": "7040.01 +- 109.0",
"add": "3.75556 +- 0.0",
"block": "7053.88 +- 425.0",
"ops": "7043.76556 +- 109.0"
},
"obey": false
},
"BF_conv_relu": {
"latency": {
"conv": "7080.23 +- 49.0",
"relu": "2.58677 +- 0.0",
"block": "6997.21 +- 658.0",
"ops": "7082.816769999999 +- 49.0"
},
"obey": true
},
"BF_conv_dwconv": {
"latency": {
"conv": "7104.57 +- 315.0",
"dwconv": "46.2128 +- 3.0",
"block": "7084.94 +- 114.0",
"ops": "7150.7828 +- 315.0142853903613"
},
"obey": false
},
"BF_conv_reshape": {
"latency": {
"conv": "6950.35 +- 819.0",
"reshape": "1.90676 +- 0.0",
"block": "7001.77 +- 506.0",
"ops": "6952.25676 +- 819.0"
},
"obey": false
},
"BF_add_pooling": {
"latency": {
"add": "3.91771 +- 0.0",
"pooling": "23.9784 +- 1.0",
"block": "27.4255 +- 2.0",
"ops": "27.89611 +- 1.0"
},
"obey": false
},
"BF_add_dense": {
"latency": {
"add": "0.393166 +- 0.0",
"dense": "62.4733 +- 3.0",
"block": "63.6806 +- 3.0",
"ops": "62.866466 +- 3.0"
},
"obey": false
},
"BF_add_convtrans": {
"latency": {
"add": "4.04221 +- 0.0",
"convtrans": "368.715 +- 23.0",
"block": "375.488 +- 11.0",
"ops": "372.75721 +- 23.0"
},
"obey": false
},
"BF_add_concat": {
"latency": {
"add": "3.94385 +- 0.0",
"concat": "15.0988 +- 1.0",
"block": "19.3268 +- 1.0",
"ops": "19.042650000000002 +- 1.0"
},
"obey": false
},
"BF_add_conv": {
"latency": {
"add": "3.84333 +- 0.0",
"conv": "7029.83 +- 550.0",
"block": "7065.58 +- 809.0",
"ops": "7033.67333 +- 550.0"
},
"obey": false
},
"BF_add_add": {
"latency": {
"add_1": "3.77704 +- 0.0",
"add_2": "3.77403 +- 0.0",
"block": "8.49954 +- 1.0",
"ops": "7.55107 +- 0.0"
},
"obey": false
},
"BF_add_relu": {
"latency": {
"add": "3.76701 +- 0.0",
"relu": "2.80635 +- 1.0",
"block": "3.72089 +- 0.0",
"ops": "6.57336 +- 1.0"
},
"obey": true
},
"BF_add_dwconv": {
"latency": {
"add": "3.98569 +- 0.0",
"dwconv": "45.8759 +- 2.0",
"block": "50.57 +- 2.0",
"ops": "49.86159 +- 2.0"
},
"obey": false
},
"BF_add_reshape": {
"latency": {
"add": "3.77753 +- 0.0",
"reshape": "1.88071 +- 0.0",
"block": "6.155 +- 1.0",
"ops": "5.65824 +- 0.0"
},
"obey": false
},
"BF_relu_pooling": {
"latency": {
"relu": "3.27098 +- 1.0",
"pooling": "23.6779 +- 2.0",
"block": "26.2079 +- 1.0",
"ops": "26.948880000000003 +- 2.23606797749979"
},
"obey": false
},
"BF_relu_dense": {
"latency": {
"relu": "0.295931 +- 0.0",
"dense": "63.3644 +- 3.0",
"block": "62.6156 +- 2.0",
"ops": "63.660331000000006 +- 3.0"
},
"obey": true
},
"BF_relu_convtrans": {
"latency": {
"relu": "2.56089 +- 0.0",
"convtrans": "368.91 +- 21.0",
"block": "373.776 +- 12.0",
"ops": "371.47089 +- 21.0"
},
"obey": false
},
"BF_relu_concat": {
"latency": {
"relu": "3.0442 +- 1.0",
"concat": "14.5228 +- 1.0",
"block": "17.5088 +- 1.0",
"ops": "17.567 +- 1.4142135623730951"
},
"obey": false
},
"BF_relu_conv": {
"latency": {
"relu": "2.54642 +- 0.0",
"conv": "7029.89 +- 621.0",
"block": "7002.7 +- 891.0",
"ops": "7032.43642 +- 621.0"
},
"obey": false
},
"BF_relu_add": {
"latency": {
"relu": "3.46085 +- 1.0",
"add": "3.84525 +- 0.0",
"block": "6.27844 +- 1.0",
"ops": "7.306100000000001 +- 1.0"
},
"obey": false
},
"BF_relu_relu": {
"latency": {
"relu_1": "4.02953 +- 2.0",
"relu_2": "2.61024 +- 0.0",
"block": "5.15875 +- 0.0",
"ops": "6.63977 +- 2.0"
},
"obey": true
},
"BF_relu_dwconv": {
"latency": {
"relu": "2.56401 +- 2.0",
"dwconv": "46.2256 +- 2.0",
"block": "49.4145 +- 2.0",
"ops": "48.78961 +- 2.8284271247461903"
},
"obey": false
},
"BF_relu_reshape": {
"latency": {
"relu": "2.6582 +- 1.0",
"reshape": "1.93132 +- 0.0",
"block": "4.50433 +- 1.0",
"ops": "4.58952 +- 1.0"
},
"obey": false
},
"BF_dwconv_pooling": {
"latency": {
"dwconv": "48.1859 +- 74.0",
"pooling": "23.5845 +- 2.0",
"block": "70.8907 +- 3.0",
"ops": "71.7704 +- 74.02702209328699"
},
"obey": false
},
"BF_dwconv_convtrans": {
"latency": {
"dwconv": "45.9235 +- 3.0",
"convtrans": "334.602 +- 81.0",
"block": "431.082 +- 63.0",
"ops": "380.52549999999997 +- 81.05553651663777"
},
"obey": false
},
"BF_dwconv_concat": {
"latency": {
"dwconv": "45.8703 +- 3.0",
"concat": "14.7931 +- 1.0",
"block": "61.2615 +- 3.0",
"ops": "60.6634 +- 3.1622776601683795"
},
"obey": false
},
"BF_dwconv_conv": {
"latency": {
"dwconv": "46.0654 +- 3.0",
"conv": "7009.06 +- 899.0",
"block": "7144.07 +- 1303.0",
"ops": "7055.125400000001 +- 899.0050055478"
},
"obey": false
},
"BF_dwconv_add": {
"latency": {
"dwconv": "46.4447 +- 3.0",
"add": "3.81886 +- 1.0",
"block": "50.0171 +- 4.0",
"ops": "50.26356 +- 3.1622776601683795"
},
"obey": false
},
"BF_dwconv_relu": {
"latency": {
"dwconv": "46.6762 +- 3.0",
"relu": "3.28271 +- 1.0",
"block": "46.9826 +- 4.0",
"ops": "49.95891 +- 3.1622776601683795"
},
"obey": true
},
"MON": {
"latency": {},
"obey": null
},
"RT": {
"obey": false
},
"BF_dwconv_dwconv": {
"latency": {
"dwconv_1": "46.6927 +- 2.0",
"dwconv_2": "46.324 +- 2.0",
"block": "94.1014 +- 5.0",
"ops": "93.0167 +- 2.8284271247461903"
},
"obey": false
},
"BF_dwconv_reshape": {
"latency": {
"dwconv": "47.0474 +- 3.0",
"reshape": "1.96371 +- 0.0",
"block": "50.2357 +- 10.0",
"ops": "49.01111 +- 3.0"
},
"obey": false
},
"BF_reshape_pooling": {
"latency": {
"reshape": "1.96997 +- 0.0",
"pooling": "19.5529 +- 2.0",
"block": "21.3999 +- 1.0",
"ops": "21.52287 +- 2.0"
},
"obey": false
},
"BF_reshape_dense": {
"latency": {
"reshape": "0.25092 +- 0.0",
"dense": "194.359 +- 94.0",
"block": "186.171 +- 106.0",
"ops": "194.60992000000002 +- 94.0"
},
"obey": false
},
"BF_reshape_convtrans": {
"latency": {
"reshape": "2.23773 +- 1.0",
"convtrans": "442.038 +- 17.0",
"block": "442.948 +- 11.0",
"ops": "444.27573 +- 17.029386365926403"
},
"obey": true
},
"BF_reshape_concat": {
"latency": {
"reshape": "1.88479 +- 0.0",
"concat": "12.2494 +- 1.0",
"block": "15.1344 +- 1.0",
"ops": "14.13419 +- 1.0"
},
"obey": false
},
"BF_reshape_conv": {
"latency": {
"reshape": "1.87476 +- 0.0",
"conv": "2053.83 +- 58.0",
"block": "2045.64 +- 143.0",
"ops": "2055.70476 +- 58.0"
},
"obey": true
},
"BF_reshape_add": {
"latency": {
"reshape": "1.93808 +- 0.0",
"add": "3.89689 +- 0.0",
"block": "5.85239 +- 0.0",
"ops": "5.83497 +- 0.0"
},
"obey": false
},
"BF_reshape_relu": {
"latency": {
"reshape": "4.05953 +- 2.0",
"relu": "3.52503 +- 1.0",
"block": "4.44828 +- 0.0",
"ops": "7.58456 +- 2.23606797749979"
},
"obey": true
},
"BF_reshape_dwconv": {
"latency": {
"reshape": "3.03105 +- 1.0",
"dwconv": "66.6583 +- 2.0",
"block": "68.8001 +- 2.0",
"ops": "69.68934999999999 +- 2.23606797749979"
},
"obey": false
},
"BF_reshape_reshape": {
"latency": {
"reshape_1": "1.93799 +- 0.0",
"reshape_2": "3.64003 +- 2.0",
"block": "3.35055 +- 2.0",
"ops": "5.57802 +- 2.0"
},
"obey": true
}
}

Просмотреть файл

@ -0,0 +1,716 @@
{
"RBC": {
"latency": {},
"obey": null
},
"CBC": {
"latency": {},
"obey": null
},
"BF_bn_relu": {
"obey": true
},
"BF_conv_bn": {
"obey": true
},
"BF_dwconv_bn": {
"obey": true
},
"BF_se_relu": {
"latency": {
"se": "0.20546791 +- 0.0010381392776949536",
"relu": "0.00500238 +- 0.00014314",
"block": "0.20545152 +- 0.0013197414924435015",
"ops": "0.21047029 +- 0.0010479609818561948"
},
"obey": true
},
"BF_conv_se": {
"latency": {
"conv": "0.043619 +- 0.000513079",
"se": "0.20576057 +- 0.001196602298561222",
"block": "0.25050117 +- 0.0011426133287074853",
"ops": "0.24937957 +- 0.0013019627956139914"
},
"obey": false
},
"BF_conv_hswish": {
"latency": {
"conv": "0.0436766 +- 0.000507171",
"hswish": "0.00499965 +- 0.000132542",
"block": "0.0442943 +- 0.00049569",
"ops": "0.048676250000000004 +- 0.0005242039727100512"
},
"obey": true
},
"BF_conv_bn_relu": {
"obey": true
},
"BF_dwconv_bn_relu": {
"obey": true
},
"BF_pooling_pooling": {
"latency": {
"pooling_1": "0.00517836 +- 0.000403584",
"pooling_2": "0.00493464 +- 0.000294832",
"block": "0.010243559999999999 +- 0.0005576737501667081",
"ops": "0.010113 +- 0.00049980591561125"
},
"obey": false
},
"BF_pooling_convtrans": {
"latency": {
"pooling": "0.00510839 +- 0.000360885",
"convtrans": "0.0380542 +- 0.000474613",
"block": "0.043171709999999995 +- 0.0005396229393057712",
"ops": "0.04316259 +- 0.0005962344194979019"
},
"obey": false
},
"BF_pooling_concat": {
"latency": {
"pooling": "0.00508688 +- 0.000309648",
"concat": "0.00496677 +- 0.000206225",
"block": "0.01011169 +- 0.0003852795290863505",
"ops": "0.01005365 +- 0.0003720357973757364"
},
"obey": false
},
"BF_pooling_conv": {
"latency": {
"pooling": "0.00507052 +- 0.000289717",
"conv": "0.0348806 +- 0.000393985",
"block": "0.03989553 +- 0.0007281222099544829",
"ops": "0.03995112 +- 0.0004890399986851792"
},
"obey": false
},
"BF_pooling_add": {
"latency": {
"pooling": "0.00508284 +- 0.0003208",
"add": "0.00349847 +- 0.000527459",
"block": "0.00599754 +- 0.000192551",
"ops": "0.00858131 +- 0.0006173537370754307"
},
"obey": true
},
"BF_pooling_relu": {
"latency": {
"pooling": "0.00510526 +- 0.000338044",
"relu": "0.00313837 +- 0.000388286",
"block": "0.00516427 +- 0.000393513",
"ops": "0.00824363 +- 0.0005148201275513614"
},
"obey": true
},
"BF_pooling_dwconv": {
"latency": {
"pooling": "0.00505895 +- 0.000294403",
"dwconv": "0.00613411 +- 0.00037201",
"block": "0.01139611 +- 0.0006041011822286065",
"ops": "0.011193060000000001 +- 0.0004744097032196959"
},
"obey": false
},
"BF_pooling_reshape": {
"latency": {
"pooling": "0.00510159 +- 0.00032655",
"reshape": "0.0160079 +- 0.000176716",
"block": "0.02115855 +- 0.0004302313569476311",
"ops": "0.021109489999999998 +- 0.0003712996729812726"
},
"obey": false
},
"BF_dense_dense": {
"latency": {
"dense_1": "0.0248513 +- 0.000906386",
"dense_2": "0.0246249 +- 0.000890953",
"block": "0.0502878 +- 0.0011456834597780487",
"ops": "0.0494762 +- 0.0012709574458670912"
},
"obey": false
},
"BF_dense_concat": {
"latency": {
"dense": "0.0245874 +- 0.000837659",
"concat": "0.0804531 +- 0.00438398",
"block": "0.1095315 +- 0.0035880003786188486",
"ops": "0.1050405 +- 0.004463289508947521"
},
"obey": false
},
"BF_dense_add": {
"latency": {
"dense": "0.0245446 +- 0.000906167",
"add": "0.00515361 +- 0.000385467",
"block": "0.0254636 +- 0.00102568",
"ops": "0.02969821 +- 0.0009847453680916708"
},
"obey": true
},
"BF_dense_relu": {
"latency": {
"dense": "0.024372 +- 0.000984813",
"relu": "0.00503688 +- 0.000239273",
"block": "0.024237 +- 0.000854059",
"ops": "0.029408880000000002 +- 0.0010134634741804955"
},
"obey": true
},
"BF_dense_reshape": {
"latency": {
"dense": "0.0245385 +- 0.000853008",
"reshape": "0.0249279 +- 0.00030802",
"block": "0.0497557 +- 0.000854099739939663",
"ops": "0.0494664 +- 0.0009069172886564684"
},
"obey": false
},
"BF_convtrans_pooling": {
"latency": {
"convtrans": "0.123148 +- 0.000602757",
"pooling": "0.00515623 +- 0.000383871",
"block": "0.12923091 +- 0.0006884228276321174",
"ops": "0.12830423 +- 0.0007146138437575919"
},
"obey": false
},
"BF_convtrans_convtrans": {
"latency": {
"convtrans_1": "0.123144 +- 0.00059031",
"convtrans_2": "0.123214 +- 0.000633372",
"block": "0.24737399999999998 +- 0.0009688145368918655",
"ops": "0.24635800000000002 +- 0.0008658094400524864"
},
"obey": false
},
"BF_convtrans_concat": {
"latency": {
"convtrans": "0.123182 +- 0.000615809",
"concat": "0.00550231 +- 0.000523855",
"block": "0.12930803000000002 +- 0.0007706232627944734",
"ops": "0.12868431 +- 0.0008084830149768145"
},
"obey": false
},
"BF_convtrans_conv": {
"latency": {
"convtrans": "0.123158 +- 0.000627148",
"conv": "0.0436494 +- 0.000518309",
"block": "0.1673439 +- 0.0007921346331034896",
"ops": "0.1668074 +- 0.0008136085258802295"
},
"obey": false
},
"BF_convtrans_add": {
"latency": {
"convtrans": "0.123173 +- 0.000633334",
"add": "0.0049953 +- 0.000160723",
"block": "0.123828 +- 0.000712515",
"ops": "0.1281683 +- 0.0006534093956203876"
},
"obey": true
},
"BF_convtrans_relu": {
"latency": {
"convtrans": "0.123229 +- 0.000623858",
"relu": "0.00500887 +- 0.00013384",
"block": "0.123235 +- 0.000622105",
"ops": "0.12823787 +- 0.0006380532499439212"
},
"obey": true
},
"BF_convtrans_dwconv": {
"latency": {
"convtrans": "0.12316 +- 0.000624764",
"dwconv": "0.0108685 +- 0.000383578",
"block": "0.1343174 +- 0.0007225331664505098",
"ops": "0.1340285 +- 0.0007331180926562923"
},
"obey": false
},
"BF_convtrans_reshape": {
"latency": {
"convtrans": "0.123187 +- 0.00060614",
"reshape": "0.00800019 +- 0.000169824",
"block": "0.13139309 +- 0.0007902118087506665",
"ops": "0.13118719 +- 0.0006294806514707184"
},
"obey": false
},
"BF_concat_pooling": {
"latency": {
"concat": "0.00544406 +- 0.000524792",
"pooling": "0.00878036 +- 0.000450214",
"block": "0.014310719999999999 +- 0.0006994305464261623",
"ops": "0.014224419999999998 +- 0.0006914472424270705"
},
"obey": false
},
"BF_concat_dense": {
"latency": {
"concat": "0.0843655 +- 0.00202977",
"dense": "0.0505264 +- 0.00201611",
"block": "0.1435292 +- 0.002650163298232771",
"ops": "0.1348919 +- 0.0028608854896692393"
},
"obey": false
},
"BF_concat_convtrans": {
"latency": {
"concat": "0.00541139 +- 0.000519419",
"convtrans": "0.404577 +- 0.00578874",
"block": "0.40915066 +- 0.00515087769447703",
"ops": "0.40998839000000004 +- 0.0058119968070501375"
},
"obey": false
},
"BF_concat_concat": {
"latency": {
"concat_1": "0.00543931 +- 0.000511373",
"concat_2": "0.00801947 +- 0.000324597",
"block": "0.014109899999999998 +- 0.0007889001145658429",
"ops": "0.01345878 +- 0.0006056942772868174"
},
"obey": false
},
"BF_concat_conv": {
"latency": {
"concat": "0.00541134 +- 0.000510702",
"conv": "0.145109 +- 0.00070727",
"block": "0.15137509 +- 0.000962184447409643",
"ops": "0.15052033999999997 +- 0.0008723802987825893"
},
"obey": false
},
"BF_concat_add": {
"latency": {
"concat": "0.00541042 +- 0.000523619",
"add": "0.00701791 +- 0.00019829",
"block": "0.00700348 +- 0.000173022",
"ops": "0.01242833 +- 0.0005599069398221458"
},
"obey": true
},
"BF_concat_relu": {
"latency": {
"concat": "0.00579558 +- 0.000437018",
"relu": "0.00700077 +- 0.0001365",
"block": "0.00588983 +- 0.000336043",
"ops": "0.01279635 +- 0.00045783947222143267"
},
"obey": true
},
"BF_concat_dwconv": {
"latency": {
"concat": "0.00557311 +- 0.000515421",
"dwconv": "0.0164397 +- 0.000862599",
"block": "0.02252003 +- 0.0011098063311447632",
"ops": "0.02201281 +- 0.0010048561300216067"
},
"obey": false
},
"BF_concat_reshape": {
"latency": {
"concat": "0.00544831 +- 0.000521227",
"reshape": "0.012018 +- 0.000199775",
"block": "0.01774575 +- 0.0005549274290031084",
"ops": "0.01746631 +- 0.0005582003548494035"
},
"obey": false
},
"BF_conv_pooling": {
"latency": {
"conv": "0.0436832 +- 0.000521865",
"pooling": "0.00511251 +- 0.000340626",
"block": "0.049763230000000006 +- 0.0005371574666426968",
"ops": "0.04879571 +- 0.0006231927070345096"
},
"obey": false
},
"BF_conv_convtrans": {
"latency": {
"conv": "0.0436621 +- 0.000515679",
"convtrans": "0.123132 +- 0.000628296",
"block": "0.167333 +- 0.0008554384902966432",
"ops": "0.1667941 +- 0.00081282267110176"
},
"obey": false
},
"BF_conv_concat": {
"latency": {
"conv": "0.0436326 +- 0.000507907",
"concat": "0.00540178 +- 0.000511658",
"block": "0.04982702 +- 0.0005887968798235941",
"ops": "0.04903438 +- 0.0007209462043821301"
},
"obey": false
},
"BF_conv_conv": {
"latency": {
"conv_1": "0.0437074 +- 0.000503405",
"conv_2": "0.0436574 +- 0.000516884",
"block": "0.0878567 +- 0.0007299562448174547",
"ops": "0.08736479999999999 +- 0.0007215162253761172"
},
"obey": false
},
"BF_conv_add": {
"latency": {
"conv": "0.0436893 +- 0.000528903",
"add": "0.00501416 +- 0.000136391",
"block": "0.0453509 +- 0.000496553",
"ops": "0.048703460000000004 +- 0.0005462059028333545"
},
"obey": true
},
"BF_conv_relu": {
"latency": {
"conv": "0.0436254 +- 0.00053131",
"relu": "0.00499777 +- 0.000132624",
"block": "0.0439324 +- 0.00044468",
"ops": "0.04862317 +- 0.0005476124920744596"
},
"obey": true
},
"BF_conv_dwconv": {
"latency": {
"conv": "0.0436282 +- 0.000528856",
"dwconv": "0.0108807 +- 0.000366771",
"block": "0.0546827 +- 0.0005934267926349467",
"ops": "0.0545089 +- 0.0006435912019108093"
},
"obey": false
},
"BF_conv_reshape": {
"latency": {
"conv": "0.0436532 +- 0.000519697",
"reshape": "0.00800178 +- 0.000167079",
"block": "0.05173697 +- 0.0006043371006598883",
"ops": "0.05165498 +- 0.0005458940960021458"
},
"obey": false
},
"BF_add_pooling": {
"latency": {
"add": "0.00500961 +- 0.00014225",
"pooling": "0.00508343 +- 0.000293978",
"block": "0.010272900000000001 +- 0.00048713193367300406",
"ops": "0.010093040000000001 +- 0.00032658555844372546"
},
"obey": false
},
"BF_add_dense": {
"latency": {
"add": "0.00507172 +- 0.000300373",
"dense": "0.0244215 +- 0.000864502",
"block": "0.029971010000000003 +- 0.0009793448909658946",
"ops": "0.02949322 +- 0.0009151981463776026"
},
"obey": false
},
"BF_add_convtrans": {
"latency": {
"add": "0.00500467 +- 0.000147258",
"convtrans": "0.123141 +- 0.000617588",
"block": "0.12836417 +- 0.0006771971006450042",
"ops": "0.12814567 +- 0.0006349014540131404"
},
"obey": false
},
"BF_add_concat": {
"latency": {
"add": "0.00500241 +- 0.000140413",
"concat": "0.00545847 +- 0.000520087",
"block": "0.01099076 +- 0.00023884178143700066",
"ops": "0.010460879999999999 +- 0.0005387079896734409"
},
"obey": false
},
"BF_add_conv": {
"latency": {
"add": "0.00501048 +- 0.000145612",
"conv": "0.043651 +- 0.00053472",
"block": "0.04854047 +- 0.0005625849267666171",
"ops": "0.04866148 +- 0.0005541916030977012"
},
"obey": false
},
"BF_add_add": {
"latency": {
"add_1": "0.0050004 +- 0.000140548",
"add_2": "0.00499632 +- 0.000143006",
"block": "0.00587968 +- 0.00040072",
"ops": "0.00999672 +- 0.00020051048935155487"
},
"obey": true
},
"BF_add_relu": {
"latency": {
"add": "0.00500674 +- 0.000137519",
"relu": "0.00500133 +- 0.000132795",
"block": "0.00499715 +- 0.000142207",
"ops": "0.01000807 +- 0.00019117004834963034"
},
"obey": true
},
"BF_add_dwconv": {
"latency": {
"add": "0.00499906 +- 0.000136903",
"dwconv": "0.0108789 +- 0.000353781",
"block": "0.01599437 +- 0.0002550740937100434",
"ops": "0.01587796 +- 0.0003793460522662652"
},
"obey": false
},
"BF_add_reshape": {
"latency": {
"add": "0.00500962 +- 0.000141982",
"reshape": "0.00799501 +- 0.000171203",
"block": "0.013011769999999999 +- 0.00022114984205284888",
"ops": "0.01300463 +- 0.00022241707563269508"
},
"obey": false
},
"BF_relu_pooling": {
"latency": {
"relu": "0.00500302 +- 0.000132311",
"pooling": "0.00508761 +- 0.000316002",
"block": "0.01013795 +- 0.00039791231820590823",
"ops": "0.01009063 +- 0.0003425835149638698"
},
"obey": false
},
"BF_relu_dense": {
"latency": {
"relu": "0.00500257 +- 0.000184527",
"dense": "0.0242465 +- 0.000879434",
"block": "0.02926351 +- 0.0008648418951854726",
"ops": "0.029249070000000002 +- 0.0008985846504837483"
},
"obey": false
},
"BF_relu_convtrans": {
"latency": {
"relu": "0.00500699 +- 0.000140648",
"convtrans": "0.123201 +- 0.00061019",
"block": "0.12815026999999998 +- 0.0006484082899423789",
"ops": "0.12820799 +- 0.0006261898242577885"
},
"obey": false
},
"BF_relu_concat": {
"latency": {
"relu": "0.0049987 +- 0.000131135",
"concat": "0.00551427 +- 0.000517141",
"block": "0.010183629999999999 +- 0.0004210801642454795",
"ops": "0.01051297 +- 0.0005335083899115365"
},
"obey": false
},
"BF_relu_conv": {
"latency": {
"relu": "0.00500086 +- 0.000144196",
"conv": "0.0436385 +- 0.000524813",
"block": "0.04874789 +- 0.000499098166748787",
"ops": "0.04863936 +- 0.0005442620429397956"
},
"obey": false
},
"BF_relu_add": {
"latency": {
"relu": "0.00500077 +- 0.000145203",
"add": "0.00499902 +- 0.000145347",
"block": "0.00500272 +- 0.000137891",
"ops": "0.00999979 +- 0.0002054499005061818"
},
"obey": true
},
"BF_relu_relu": {
"latency": {
"relu_1": "0.00499975 +- 0.000137148",
"relu_2": "0.00499752 +- 0.000146408",
"block": "0.0050061 +- 0.000135588",
"ops": "0.009997269999999999 +- 0.00020061125683271117"
},
"obey": true
},
"BF_relu_dwconv": {
"latency": {
"relu": "0.00500203 +- 0.000142735",
"dwconv": "0.0108665 +- 0.00037116",
"block": "0.015849719999999998 +- 0.000418786789205438",
"ops": "0.01586853 +- 0.0003976594344725144"
},
"obey": false
},
"BF_relu_reshape": {
"latency": {
"relu": "0.00499816 +- 0.000135118",
"reshape": "0.00800168 +- 0.000170518",
"block": "0.01299387 +- 0.00021684074637622882",
"ops": "0.01299984 +- 0.00021756208825988042"
},
"obey": false
},
"BF_dwconv_pooling": {
"latency": {
"dwconv": "0.0108575 +- 0.00037334",
"pooling": "0.00509818 +- 0.000323136",
"block": "0.016139439999999998 +- 0.000536163170056467",
"ops": "0.01595568 +- 0.0004937607012470717"
},
"obey": false
},
"BF_dwconv_convtrans": {
"latency": {
"dwconv": "0.0110189 +- 0.000265511",
"convtrans": "0.123212 +- 0.000618252",
"block": "0.1340673 +- 0.0007118831562131528",
"ops": "0.13423090000000001 +- 0.0006728533470415377"
},
"obey": false
},
"BF_dwconv_concat": {
"latency": {
"dwconv": "0.0108675 +- 0.00035683",
"concat": "0.00545375 +- 0.000520546",
"block": "0.01610423 +- 0.0005516837465160633",
"ops": "0.016321250000000002 +- 0.0006311067952541788"
},
"obey": false
},
"BF_dwconv_conv": {
"latency": {
"dwconv": "0.0108792 +- 0.000359171",
"conv": "0.0436777 +- 0.000525727",
"block": "0.054527400000000004 +- 0.0006290891967161731",
"ops": "0.0545569 +- 0.0006367045513972709"
},
"obey": false
},
"BF_dwconv_add": {
"latency": {
"dwconv": "0.010869 +- 0.000359517",
"add": "0.00500023 +- 0.000143863",
"block": "0.012286 +- 0.000500271",
"ops": "0.015869229999999998 +- 0.0003872325348650343"
},
"obey": true
},
"BF_dwconv_relu": {
"latency": {
"dwconv": "0.0109065 +- 0.000330342",
"relu": "0.0049961 +- 0.000135195",
"block": "0.0110057 +- 0.000157802",
"ops": "0.0159026 +- 0.00035693630382604687"
},
"obey": true
},
"MON": {
"latency": {},
"obey": null
},
"RT": {
"obey": false
},
"BF_dwconv_dwconv": {
"latency": {
"dwconv_1": "0.0108816 +- 0.000343614",
"dwconv_2": "0.0108651 +- 0.000379447",
"block": "0.021771600000000002 +- 0.0004903100730813105",
"ops": "0.0217467 +- 0.0005119087875832959"
},
"obey": false
},
"BF_dwconv_reshape": {
"latency": {
"dwconv": "0.0108739 +- 0.000357921",
"reshape": "0.00800842 +- 0.000166909",
"block": "0.01889845 +- 0.0003814191652237732",
"ops": "0.01888232 +- 0.00039492538095442785"
},
"obey": false
},
"BF_reshape_pooling": {
"latency": {
"reshape": "0.00799864 +- 0.000175056",
"pooling": "0.00502314 +- 0.000202911",
"block": "0.01305359 +- 0.0003216878493897461",
"ops": "0.01302178 +- 0.0002679878300539038"
},
"obey": false
},
"BF_reshape_dense": {
"latency": {
"reshape": "0.024968 +- 0.000223766",
"dense": "0 +- 0",
"block": "0.09324928 +- 0.0008030103657599944",
"ops": "0.024968 +- 0.000223766"
},
"obey": false
},
"BF_reshape_convtrans": {
"latency": {
"reshape": "0.00799118 +- 0.000168458",
"convtrans": "0.197468 +- 0.00105919",
"block": "0.20527044 +- 0.0011521050967615758",
"ops": "0.20545918000000002 +- 0.0010725024726610192"
},
"obey": false
},
"BF_reshape_concat": {
"latency": {
"reshape": "0.00800374 +- 0.000169287",
"concat": "0.00799201 +- 0.000195183",
"block": "0.01600312 +- 0.00026718165490355057",
"ops": "0.015995750000000003 +- 0.0002583689065232115"
},
"obey": false
},
"BF_reshape_conv": {
"latency": {
"reshape": "0.00799733 +- 0.000172346",
"conv": "0.0711989 +- 0.000561079",
"block": "0.07908414 +- 0.0005806590306901289",
"ops": "0.07919622999999999 +- 0.0005869521172608546"
},
"obey": false
},
"BF_reshape_add": {
"latency": {
"reshape": "0.00800295 +- 0.000180739",
"add": "0.00500133 +- 0.000145174",
"block": "0.0080051 +- 0.000179223",
"ops": "0.01300428 +- 0.00023182337327586275"
},
"obey": true
},
"BF_reshape_relu": {
"latency": {
"reshape": "0.00799506 +- 0.000165155",
"relu": "0.004999 +- 0.000132581",
"block": "0.012994970000000002 +- 0.0002163020089828109",
"ops": "0.01299406 +- 0.0002117873829716964"
},
"obey": false
},
"BF_reshape_dwconv": {
"latency": {
"reshape": "0.00801043 +- 0.000167245",
"dwconv": "0.0104327 +- 0.000517514",
"block": "0.01839616 +- 0.0005324881596298644",
"ops": "0.018443130000000002 +- 0.0005438672910012147"
},
"obey": false
},
"BF_reshape_reshape": {
"latency": {
"reshape_1": "0.00799734 +- 0.000163646",
"reshape_2": "0.00700027 +- 0.000132258",
"block": "0.00704122 +- 0.000225084",
"ops": "0.014997610000000001 +- 0.0002104095812457218"
},
"obey": true
}
}

Просмотреть файл

@ -0,0 +1,713 @@
{
"RBC": {
"latency": {},
"obey": null
},
"CBC": {
"latency": {},
"obey": null
},
"BF_conv_bn": {
"obey": true
},
"BF_dwconv_bn": {
"obey": true
},
"BF_conv_bn_relu": {
"obey": true
},
"BF_dwconv_bn_relu": {
"obey": true
},
"BF_se_relu": {
"latency": {
"se": "0.236 +- 0.0",
"relu": "0.029 +- 0",
"block": "0.23399999999999999 +- 0.0",
"ops": "0.265 +- 0.0"
},
"obey": true
},
"BF_conv_se": {
"latency": {
"conv": "0.072 +- 0",
"se": "0.236 +- 0.0",
"block": "0.30000000000000004 +- 0.0",
"ops": "0.308 +- 0.0"
},
"obey": false
},
"BF_conv_hswish": {
"latency": {
"conv": "0.069 +- 0",
"hswish": "0.08499999999999999 +- 0.0",
"block": "0.11099999999999999 +- 0.0",
"ops": "0.154 +- 0.0"
},
"obey": true
},
"BF_pooling_pooling": {
"latency": {
"pooling_1": "0.013 +- 0",
"pooling_2": "0.01 +- 0",
"block": "0.019 +- 0.0",
"ops": "0.023 +- 0.0"
},
"obey": false
},
"BF_pooling_convtrans": {
"latency": {
"pooling": "0.014 +- 0",
"convtrans": "0.08 +- 0",
"block": "0.098 +- 0.0",
"ops": "0.094 +- 0.0"
},
"obey": false
},
"BF_pooling_concat": {
"latency": {
"pooling": "0.014 +- 0",
"concat": "0.0 +- 0",
"block": "0.035 +- 0.0",
"ops": "0.014 +- 0.0"
},
"obey": false
},
"BF_pooling_conv": {
"latency": {
"pooling": "0.013 +- 0",
"conv": "0.028 +- 0",
"block": "0.038 +- 0.0",
"ops": "0.041 +- 0.0"
},
"obey": false
},
"BF_pooling_add": {
"latency": {
"pooling": "0.014 +- 0",
"add": "0.096 +- 0",
"block": "0.10200000000000001 +- 0.0",
"ops": "0.11 +- 0.0"
},
"obey": true
},
"BF_pooling_relu": {
"latency": {
"pooling": "0.013 +- 0",
"relu": "0.026 +- 0",
"block": "0.016 +- 0.0",
"ops": "0.039 +- 0.0"
},
"obey": true
},
"BF_pooling_dwconv": {
"latency": {
"pooling": "0.013 +- 0",
"dwconv": "0.042 +- 0",
"block": "0.056 +- 0.0",
"ops": "0.055 +- 0.0"
},
"obey": false
},
"BF_pooling_reshape": {
"latency": {
"pooling": "0.013 +- 0",
"reshape": "0.117 +- 0.0",
"block": "0.152 +- 0.0",
"ops": "0.13 +- 0.0"
},
"obey": false
},
"BF_dense_dense": {
"latency": {
"dense_1": "0.118 +- 0",
"dense_2": "0.125 +- 0",
"block": "0.218 +- 0.0",
"ops": "0.243 +- 0.0"
},
"obey": false
},
"BF_dense_concat": {
"latency": {
"dense": "0.127 +- 0",
"concat": "0.0 +- 0",
"block": "0.134 +- 0.0",
"ops": "0.127 +- 0.0"
},
"obey": false
},
"BF_dense_add": {
"latency": {
"dense": "0.119 +- 0",
"add": "0.093 +- 0",
"block": "0.188 +- 0.0",
"ops": "0.212 +- 0.0"
},
"obey": false
},
"BF_dense_relu": {
"latency": {
"dense": "0.119 +- 0",
"relu": "0.039 +- 0",
"block": "0.158 +- 0.0",
"ops": "0.158 +- 0.0"
},
"obey": false
},
"BF_dense_reshape": {
"latency": {
"dense": "0.121 +- 0",
"reshape": "0.046 +- 0.0",
"block": "0.17099999999999999 +- 0.0",
"ops": "0.16699999999999998 +- 0.0"
},
"obey": false
},
"BF_convtrans_pooling": {
"latency": {
"convtrans": "0.184 +- 0",
"pooling": "0.014 +- 0",
"block": "0.199 +- 0.0",
"ops": "0.198 +- 0.0"
},
"obey": false
},
"BF_convtrans_convtrans": {
"latency": {
"convtrans_1": "0.185 +- 0",
"convtrans_2": "0.185 +- 0",
"block": "0.364 +- 0.0",
"ops": "0.37 +- 0.0"
},
"obey": false
},
"BF_convtrans_concat": {
"latency": {
"convtrans": "0.187 +- 0",
"concat": "0.0 +- 0",
"block": "0.184 +- 0.0",
"ops": "0.187 +- 0.0"
},
"obey": true
},
"BF_convtrans_conv": {
"latency": {
"convtrans": "0.185 +- 0",
"conv": "0.069 +- 0",
"block": "0.253 +- 0.0",
"ops": "0.254 +- 0.0"
},
"obey": false
},
"BF_convtrans_add": {
"latency": {
"convtrans": "0.185 +- 0",
"add": "0.099 +- 0",
"block": "0.249 +- 0.0",
"ops": "0.28400000000000003 +- 0.0"
},
"obey": false
},
"BF_convtrans_relu": {
"latency": {
"convtrans": "0.185 +- 0",
"relu": "0.031 +- 0",
"block": "0.193 +- 0.0",
"ops": "0.216 +- 0.0"
},
"obey": true
},
"BF_convtrans_dwconv": {
"latency": {
"convtrans": "0.186 +- 0",
"dwconv": "0.114 +- 0",
"block": "0.28600000000000003 +- 0.0",
"ops": "0.3 +- 0.0"
},
"obey": false
},
"BF_convtrans_reshape": {
"latency": {
"convtrans": "0.183 +- 0",
"reshape": "0.16899999999999998 +- 0.0",
"block": "0.441 +- 0.0",
"ops": "0.352 +- 0.0"
},
"obey": false
},
"BF_concat_pooling": {
"latency": {
"concat": "0.0 +- 0",
"pooling": "0.019 +- 0",
"block": "0.019 +- 0.0",
"ops": "0.019 +- 0.0"
},
"obey": false
},
"BF_concat_dense": {
"latency": {
"concat": "0 +- 0",
"dense": "0.345 +- 0",
"block": "0.336 +- 0.0",
"ops": "0.345 +- 0.0"
},
"obey": true
},
"BF_concat_convtrans": {
"latency": {
"concat": "0.0 +- 0",
"convtrans": "0.301 +- 0",
"block": "0.301 +- 0.0",
"ops": "0.301 +- 0.0"
},
"obey": false
},
"BF_concat_concat": {
"latency": {
"concat_1": "0.0 +- 0",
"concat_2": "0.0 +- 0",
"block": "0.0 +- 0.0",
"ops": "0.0 +- 0.0"
},
"obey": false
},
"BF_concat_conv": {
"latency": {
"concat": "0.0 +- 0",
"conv": "0.092 +- 0",
"block": "0.092 +- 0.0",
"ops": "0.092 +- 0.0"
},
"obey": false
},
"BF_concat_add": {
"latency": {
"concat": "0.0 +- 0",
"add": "0.103 +- 0",
"block": "0.104 +- 0.0",
"ops": "0.103 +- 0.0"
},
"obey": false
},
"BF_concat_relu": {
"latency": {
"concat": "0.0 +- 0",
"relu": "0.035 +- 0",
"block": "0.057 +- 0.0",
"ops": "0.035 +- 0.0"
},
"obey": false
},
"BF_concat_dwconv": {
"latency": {
"concat": "0.0 +- 0",
"dwconv": "0.137 +- 0",
"block": "0.135 +- 0.0",
"ops": "0.137 +- 0.0"
},
"obey": true
},
"BF_concat_reshape": {
"latency": {
"concat": "0.0 +- 0",
"reshape": "0.23099999999999998 +- 0.0",
"block": "0.23199999999999998 +- 0.0",
"ops": "0.23099999999999998 +- 0.0"
},
"obey": false
},
"BF_conv_pooling": {
"latency": {
"conv": "0.072 +- 0",
"pooling": "0.014 +- 0",
"block": "0.078 +- 0.0",
"ops": "0.086 +- 0.0"
},
"obey": true
},
"BF_conv_convtrans": {
"latency": {
"conv": "0.069 +- 0",
"convtrans": "0.185 +- 0",
"block": "0.258 +- 0.0",
"ops": "0.254 +- 0.0"
},
"obey": false
},
"BF_conv_concat": {
"latency": {
"conv": "0.069 +- 0",
"concat": "0.0 +- 0",
"block": "0.069 +- 0.0",
"ops": "0.069 +- 0.0"
},
"obey": false
},
"BF_conv_conv": {
"latency": {
"conv_1": "0.072 +- 0",
"conv_2": "0.068 +- 0",
"block": "0.136 +- 0.0",
"ops": "0.14 +- 0.0"
},
"obey": false
},
"BF_conv_add": {
"latency": {
"conv": "0.069 +- 0",
"add": "0.1 +- 0",
"block": "0.146 +- 0.0",
"ops": "0.169 +- 0.0"
},
"obey": false
},
"BF_conv_relu": {
"latency": {
"conv": "0.073 +- 0",
"relu": "0.029 +- 0",
"block": "0.074 +- 0.0",
"ops": "0.102 +- 0.0"
},
"obey": true
},
"BF_conv_dwconv": {
"latency": {
"conv": "0.071 +- 0",
"dwconv": "0.115 +- 0",
"block": "0.177 +- 0.0",
"ops": "0.186 +- 0.0"
},
"obey": false
},
"BF_conv_reshape": {
"latency": {
"conv": "0.069 +- 0",
"reshape": "0.17099999999999999 +- 0.0",
"block": "0.336 +- 0.0",
"ops": "0.24 +- 0.0"
},
"obey": false
},
"BF_add_pooling": {
"latency": {
"add": "0.098 +- 0",
"pooling": "0.013 +- 0",
"block": "0.088 +- 0.0",
"ops": "0.111 +- 0.0"
},
"obey": true
},
"BF_add_dense": {
"latency": {
"add": "0.092 +- 0",
"dense": "0.122 +- 0",
"block": "0.215 +- 0.0",
"ops": "0.214 +- 0.0"
},
"obey": false
},
"BF_add_convtrans": {
"latency": {
"add": "0.096 +- 0",
"convtrans": "0.184 +- 0",
"block": "0.271 +- 0.0",
"ops": "0.28 +- 0.0"
},
"obey": false
},
"BF_add_concat": {
"latency": {
"add": "0.096 +- 0",
"concat": "0.0 +- 0",
"block": "0.102 +- 0.0",
"ops": "0.096 +- 0.0"
},
"obey": false
},
"BF_add_conv": {
"latency": {
"add": "0.096 +- 0",
"conv": "0.069 +- 0",
"block": "0.14400000000000002 +- 0.0",
"ops": "0.165 +- 0.0"
},
"obey": false
},
"BF_add_add": {
"latency": {
"add_1": "0.097 +- 0",
"add_2": "0.097 +- 0",
"block": "0.16899999999999998 +- 0.0",
"ops": "0.194 +- 0.0"
},
"obey": false
},
"BF_add_relu": {
"latency": {
"add": "0.099 +- 0",
"relu": "0.03 +- 0",
"block": "0.1 +- 0.0",
"ops": "0.129 +- 0.0"
},
"obey": true
},
"BF_add_dwconv": {
"latency": {
"add": "0.096 +- 0",
"dwconv": "0.114 +- 0",
"block": "0.216 +- 0.0",
"ops": "0.21000000000000002 +- 0.0"
},
"obey": false
},
"BF_add_reshape": {
"latency": {
"add": "0.099 +- 0",
"reshape": "0.172 +- 0.0",
"block": "0.253 +- 0.0",
"ops": "0.271 +- 0.0"
},
"obey": false
},
"BF_relu_pooling": {
"latency": {
"relu": "0.031 +- 0",
"pooling": "0.014 +- 0",
"block": "0.046 +- 0.0",
"ops": "0.045 +- 0.0"
},
"obey": false
},
"BF_relu_dense": {
"latency": {
"relu": "0.039 +- 0",
"dense": "0.121 +- 0",
"block": "0.158 +- 0.0",
"ops": "0.16 +- 0.0"
},
"obey": false
},
"BF_relu_convtrans": {
"latency": {
"relu": "0.03 +- 0",
"convtrans": "0.184 +- 0",
"block": "0.215 +- 0.0",
"ops": "0.214 +- 0.0"
},
"obey": false
},
"BF_relu_concat": {
"latency": {
"relu": "0.03 +- 0",
"concat": "0.0 +- 0",
"block": "0.03 +- 0.0",
"ops": "0.03 +- 0.0"
},
"obey": false
},
"BF_relu_conv": {
"latency": {
"relu": "0.03 +- 0",
"conv": "0.069 +- 0",
"block": "0.1 +- 0.0",
"ops": "0.099 +- 0.0"
},
"obey": false
},
"BF_relu_add": {
"latency": {
"relu": "0.031 +- 0",
"add": "0.096 +- 0",
"block": "0.13 +- 0.0",
"ops": "0.127 +- 0.0"
},
"obey": false
},
"BF_relu_relu": {
"latency": {
"relu_1": "0.03 +- 0",
"relu_2": "0.03 +- 0",
"block": "0.054 +- 0.0",
"ops": "0.06 +- 0.0"
},
"obey": false
},
"BF_relu_dwconv": {
"latency": {
"relu": "0.031 +- 0",
"dwconv": "0.115 +- 0",
"block": "0.14500000000000002 +- 0.0",
"ops": "0.14600000000000002 +- 0.0"
},
"obey": false
},
"BF_relu_reshape": {
"latency": {
"relu": "0.031 +- 0",
"reshape": "0.16699999999999998 +- 0.0",
"block": "0.201 +- 0.0",
"ops": "0.19799999999999998 +- 0.0"
},
"obey": false
},
"BF_dwconv_pooling": {
"latency": {
"dwconv": "0.113 +- 0",
"pooling": "0.014 +- 0",
"block": "0.125 +- 0.0",
"ops": "0.127 +- 0.0"
},
"obey": false
},
"BF_dwconv_convtrans": {
"latency": {
"dwconv": "0.114 +- 0",
"convtrans": "0.185 +- 0",
"block": "0.307 +- 0.0",
"ops": "0.299 +- 0.0"
},
"obey": false
},
"BF_dwconv_concat": {
"latency": {
"dwconv": "0.115 +- 0",
"concat": "0.0 +- 0",
"block": "0.107 +- 0.0",
"ops": "0.115 +- 0.0"
},
"obey": true
},
"BF_dwconv_conv": {
"latency": {
"dwconv": "0.115 +- 0",
"conv": "0.071 +- 0",
"block": "0.18 +- 0.0",
"ops": "0.186 +- 0.0"
},
"obey": false
},
"BF_dwconv_add": {
"latency": {
"dwconv": "0.114 +- 0",
"add": "0.097 +- 0",
"block": "0.191 +- 0.0",
"ops": "0.21100000000000002 +- 0.0"
},
"obey": false
},
"BF_dwconv_relu": {
"latency": {
"dwconv": "0.114 +- 0",
"relu": "0.03 +- 0",
"block": "0.125 +- 0.0",
"ops": "0.14400000000000002 +- 0.0"
},
"obey": true
},
"MON": {
"latency": {},
"obey": null
},
"RT": {
"obey": false
},
"BF_dwconv_dwconv": {
"latency": {
"dwconv_1": "0.114 +- 0",
"dwconv_2": "0.115 +- 0",
"block": "0.221 +- 0.0",
"ops": "0.229 +- 0.0"
},
"obey": false
},
"BF_dwconv_reshape": {
"latency": {
"dwconv": "0.112 +- 0",
"reshape": "0.16999999999999998 +- 0.0",
"block": "0.28500000000000003 +- 0.0",
"ops": "0.282 +- 0.0"
},
"obey": false
},
"BF_reshape_pooling": {
"latency": {
"reshape": "0.16999999999999998 +- 0.0",
"pooling": "0.016 +- 0",
"block": "0.252 +- 0.0",
"ops": "0.186 +- 0.0"
},
"obey": false
},
"BF_reshape_dense": {
"latency": {
"reshape": "0.046 +- 0.0",
"dense": "0.14600000000000002 +- 0.0",
"block": "0.093 +- 0.0",
"ops": "0.192 +- 0.0"
},
"obey": false
},
"BF_reshape_convtrans": {
"latency": {
"reshape": "0.17099999999999999 +- 0.0",
"convtrans": "0.183 +- 0",
"block": "0.34099999999999997 +- 0.0",
"ops": "0.354 +- 0.0"
},
"obey": false
},
"BF_reshape_concat": {
"latency": {
"reshape": "0.16799999999999998 +- 0.0",
"concat": "0.0 +- 0",
"block": "0.16899999999999998 +- 0.0",
"ops": "0.16799999999999998 +- 0.0"
},
"obey": false
},
"BF_reshape_conv": {
"latency": {
"reshape": "0.16899999999999998 +- 0.0",
"conv": "0.061 +- 0",
"block": "0.23099999999999998 +- 0.0",
"ops": "0.22999999999999998 +- 0.0"
},
"obey": false
},
"BF_reshape_add": {
"latency": {
"reshape": "0.178 +- 0.0",
"add": "0.097 +- 0",
"block": "0.252 +- 0.0",
"ops": "0.275 +- 0.0"
},
"obey": false
},
"BF_reshape_relu": {
"latency": {
"reshape": "0.16999999999999998 +- 0.0",
"relu": "0.031 +- 0",
"block": "0.20299999999999999 +- 0.0",
"ops": "0.20099999999999998 +- 0.0"
},
"obey": false
},
"BF_reshape_dwconv": {
"latency": {
"reshape": "0.16799999999999998 +- 0.0",
"dwconv": "0.115 +- 0",
"block": "0.27999999999999997 +- 0.0",
"ops": "0.283 +- 0.0"
},
"obey": false
},
"BF_reshape_reshape": {
"latency": {
"reshape_1": "0.16999999999999998 +- 0.0",
"reshape_2": "0.172 +- 0.0",
"block": "0.172 +- 0.0",
"ops": "0.34199999999999997 +- 0.0"
},
"obey": true
}
}

Просмотреть файл

@ -0,0 +1,50 @@
import tensorflow as tf
import numpy as np
from typing import List
class ShapeFetcher:
def get_nodes_with_input_tensor(self, tensor):
return list(filter(
lambda op: (tensor in op.inputs) and (op.type not in ["Shape"]),
self.graph.get_operations()
))
def __init__(self, input_graph):
tf.compat.v1.disable_eager_execution()
graph = tf.Graph()
with graph.as_default():
tf.import_graph_def(graph_def=input_graph, name="")
ops = graph.get_operations()
placeholders = list(filter(lambda op: op.type == "Placeholder", ops))
assert len(placeholders) == 1
graph_input_tensor = placeholders[0].outputs[0]
graph_input_tensor_shape = graph_input_tensor.get_shape().as_list()
assert graph_input_tensor_shape[1] == graph_input_tensor_shape[2]
assert graph_input_tensor_shape[3] == 3
self.imsize = graph_input_tensor_shape[1]
self.graph: tf.Graph = graph
tensors_to_fetch: List[tf.Tensor] = []
for op in filter(lambda op: op.type not in [], ops):
tensors_to_fetch.extend(op.inputs)
tensors_to_fetch.extend(op.outputs)
shape_tensors = dict()
for tensor in tensors_to_fetch:
shape_tensors[tensor.name] = tf.compat.v1.shape(tensor)
self.shape_results = dict()
with tf.compat.v1.Session(graph=graph) as sess:
fake_input = np.random.randn(1, self.imsize, self.imsize, 3)
for tensor_name, shape_tensor in shape_tensors.items():
self.shape_results[tensor_name] = sess.run(
shape_tensor, feed_dict={
graph_input_tensor: fake_input
}
)

Просмотреть файл

Просмотреть файл

@ -0,0 +1,86 @@
from grapher_tool import Grapher
from .union_find import UF
import networkx as nx
class FusionAwareGraph:
def __init__(self, graph: Grapher):
self._graph = graph
self._dag = list(nx.topological_sort(graph.get_networkx_graph()))
self._uf = UF(len(self._dag))
reverse = {}
for index, name in enumerate(self._dag):
reverse[name] = index
outbounds = []
inbounds = []
for index, name in enumerate(self._dag):
outbounds.append({reverse[outbound] for outbound in self._graph.get_node_outbounds(name)})
inbounds.append({reverse[inbound] for inbound in self._graph.get_node_inbounds(name)})
self._outbounds = outbounds
self._inbounds = inbounds
self._ready = [not inbounds[i] for i in range(0, len(self))]
self._types = [graph.get_node_type(name) for name in self._dag]
@property
def nodes(self):
return self._dag
def __len__(self):
return len(self._dag)
def __getitem__(self, key):
return self._dag[key]
def fuse(self, node, outnode, update=False):
"""
node should be root, outnode should be an unfused single node
"""
self._uf.union(node, outnode)
if not update:
self._outbounds[node] = self._outbounds[outnode]
else:
self._outbounds[node].update(self._outbounds[outnode])
def mark_ready(self, node):
self._ready[node] = True
def is_ready(self, node):
for inbound in self._inbounds[node]:
if not self.is_ready[inbound]:
return False
return True
def is_visited(self, node):
return self._ready[node]
def get_outbounds(self, node):
return self._outbounds[node]
def get_inbounds(self, node):
return self._inbounds[node]
def get_type(self, node):
return self._types[node]
def get_basicblocks(self):
bbs = []
for _ in range(0, len(self)):
bbs.append([])
for i in range(0, len(self)):
root = self._uf.find(i)
for node in self._graph.get_primitive_nodes(self[i]):
bbs[root].append(node)
bbs = [bb for bb in bbs if bb]
return bbs
def is_fused(self, node):
return self._uf.find(node) != node
def is_connected(self, p, q):
return self._uf.connected(p, q)

Просмотреть файл

@ -0,0 +1,20 @@
class UF:
"""
UnionFind implemented with compression optimization
"""
def __init__(self, N):
self._parent = list(range(0, N))
def find(self, p):
while p != self._parent[p]:
p = self._parent[p] = self._parent[self._parent[p]]
return p
def union(self, p, q):
p = self.find(p)
q = self.find(q)
self._parent[q] = p
def connected(self, p, q):
return self.find(p) == self.find(q)

Просмотреть файл

@ -1,5 +1,5 @@
import sys
sys.path.append("Latencypredictor")
sys.path.append("prediction")
from predictors.utils import*
from predictors.extract_feature import*

Просмотреть файл

Двоичные данные
results/alexnet_0_result.xlsx Normal file

Двоичный файл не отображается.

258
results/cpu_alexnet.json Normal file
Просмотреть файл

@ -0,0 +1,258 @@
{
"alexnet_0": [
{
"op": "conv-relu",
"ks": [
11,
11
],
"cin": 3,
"cout": 64,
"strides": [
1,
4,
4,
1
],
"input_tensors": [
[
1,
224,
224,
3
]
],
"inputh": 224,
"inputw": 224
},
{
"op": "maxpool",
"ks": [
1,
3,
3,
1
],
"cin": 64,
"cout": 64,
"strides": [
1,
2,
2,
1
],
"input_tensors": [
[
1,
54,
54,
64
]
],
"inputh": 54,
"inputw": 54
},
{
"op": "conv-relu",
"ks": [
5,
5
],
"cin": 64,
"cout": 192,
"strides": [
1,
1,
1,
1
],
"input_tensors": [
[
1,
27,
27,
64
]
],
"inputh": 27,
"inputw": 27
},
{
"op": "maxpool",
"ks": [
1,
3,
3,
1
],
"cin": 192,
"cout": 192,
"strides": [
1,
2,
2,
1
],
"input_tensors": [
[
1,
27,
27,
192
]
],
"inputh": 27,
"inputw": 27
},
{
"op": "conv-relu",
"ks": [
3,
3
],
"cin": 192,
"cout": 384,
"strides": [
1,
1,
1,
1
],
"input_tensors": [
[
1,
13,
13,
192
]
],
"inputh": 13,
"inputw": 13
},
{
"op": "conv-relu",
"ks": [
3,
3
],
"cin": 384,
"cout": 256,
"strides": [
1,
1,
1,
1
],
"input_tensors": [
[
1,
13,
13,
384
]
],
"inputh": 13,
"inputw": 13
},
{
"op": "conv-relu",
"ks": [
3,
3
],
"cin": 256,
"cout": 256,
"strides": [
1,
1,
1,
1
],
"input_tensors": [
[
1,
13,
13,
256
]
],
"inputh": 13,
"inputw": 13
},
{
"op": "maxpool",
"ks": [
1,
3,
3,
1
],
"cin": 256,
"cout": 256,
"strides": [
1,
2,
2,
1
],
"input_tensors": [
[
1,
13,
13,
256
]
],
"inputh": 13,
"inputw": 13
},
{
"op": "global-avgpool",
"cin": 256,
"cout": 256,
"input_tensors": [
[
1,
6,
6,
256
]
],
"inputh": 6,
"inputw": 6
},
{
"op": "fc-relu",
"cin": 256,
"cout": 4096,
"input_tensors": [
[
1,
256
]
]
},
{
"op": "fc-relu",
"cin": 4096,
"cout": 4096,
"input_tensors": [
[
1,
4096
]
]
},
{
"op": "fc",
"cin": 4096,
"cout": 1000,
"input_tensors": [
[
1,
4096
]
]
}
]
}

258
results/gpu_alexnet.json Normal file
Просмотреть файл

@ -0,0 +1,258 @@
{
"alexnet_0": [
{
"op": "conv-relu",
"ks": [
11,
11
],
"cin": 3,
"cout": 64,
"strides": [
1,
4,
4,
1
],
"input_tensors": [
[
1,
224,
224,
3
]
],
"inputh": 224,
"inputw": 224
},
{
"op": "maxpool",
"ks": [
1,
3,
3,
1
],
"cin": 64,
"cout": 64,
"strides": [
1,
2,
2,
1
],
"input_tensors": [
[
1,
54,
54,
64
]
],
"inputh": 54,
"inputw": 54
},
{
"op": "conv-relu",
"ks": [
5,
5
],
"cin": 64,
"cout": 192,
"strides": [
1,
1,
1,
1
],
"input_tensors": [
[
1,
27,
27,
64
]
],
"inputh": 27,
"inputw": 27
},
{
"op": "maxpool",
"ks": [
1,
3,
3,
1
],
"cin": 192,
"cout": 192,
"strides": [
1,
2,
2,
1
],
"input_tensors": [
[
1,
27,
27,
192
]
],
"inputh": 27,
"inputw": 27
},
{
"op": "conv-relu",
"ks": [
3,
3
],
"cin": 192,
"cout": 384,
"strides": [
1,
1,
1,
1
],
"input_tensors": [
[
1,
13,
13,
192
]
],
"inputh": 13,
"inputw": 13
},
{
"op": "conv-relu",
"ks": [
3,
3
],
"cin": 384,
"cout": 256,
"strides": [
1,
1,
1,
1
],
"input_tensors": [
[
1,
13,
13,
384
]
],
"inputh": 13,
"inputw": 13
},
{
"op": "conv-relu",
"ks": [
3,
3
],
"cin": 256,
"cout": 256,
"strides": [
1,
1,
1,
1
],
"input_tensors": [
[
1,
13,
13,
256
]
],
"inputh": 13,
"inputw": 13
},
{
"op": "maxpool",
"ks": [
1,
3,
3,
1
],
"cin": 256,
"cout": 256,
"strides": [
1,
2,
2,
1
],
"input_tensors": [
[
1,
13,
13,
256
]
],
"inputh": 13,
"inputw": 13
},
{
"op": "global-avgpool",
"cin": 256,
"cout": 256,
"input_tensors": [
[
1,
6,
6,
256
]
],
"inputh": 6,
"inputw": 6
},
{
"op": "fc-relu",
"cin": 256,
"cout": 4096,
"input_tensors": [
[
1,
256
]
]
},
{
"op": "fc-relu",
"cin": 4096,
"cout": 4096,
"input_tensors": [
[
1,
4096
]
]
},
{
"op": "fc",
"cin": 4096,
"cout": 1000,
"input_tensors": [
[
1,
4096
]
]
}
]
}

Просмотреть файл

@ -0,0 +1 @@
alexnet_0,83.3216703389321,84.5299,0.014293518164198748
1 alexnet_0 83.3216703389321 84.5299 0.014293518164198748

Просмотреть файл

@ -0,0 +1 @@
alexnet_0,13.085394437987977,11.9766,0.09258006763087838
1 alexnet_0 13.085394437987977 11.9766 0.09258006763087838

Просмотреть файл

@ -0,0 +1 @@
alexnet_0,16.36299839769042,16.185405,0.01097244076934863
1 alexnet_0 16.36299839769042 16.185405 0.01097244076934863

Просмотреть файл

@ -0,0 +1 @@
alexnet_0,12.104382488684852,10.134,0.19443284869595928
1 alexnet_0 12.104382488684852 10.134 0.19443284869595928

280
results/vpu_alexnet.json Normal file
Просмотреть файл

@ -0,0 +1,280 @@
{
"alexnet_0": [
{
"op": "conv-relu",
"ks": [
11,
11
],
"cin": 3,
"cout": 64,
"strides": [
1,
4,
4,
1
],
"input_tensors": [
[
1,
224,
224,
3
]
],
"inputh": 224,
"inputw": 224
},
{
"op": "maxpool",
"ks": [
1,
3,
3,
1
],
"cin": 64,
"cout": 64,
"strides": [
1,
2,
2,
1
],
"input_tensors": [
[
1,
54,
54,
64
]
],
"inputh": 54,
"inputw": 54
},
{
"op": "conv-relu",
"ks": [
5,
5
],
"cin": 64,
"cout": 192,
"strides": [
1,
1,
1,
1
],
"input_tensors": [
[
1,
27,
27,
64
]
],
"inputh": 27,
"inputw": 27
},
{
"op": "maxpool",
"ks": [
1,
3,
3,
1
],
"cin": 192,
"cout": 192,
"strides": [
1,
2,
2,
1
],
"input_tensors": [
[
1,
27,
27,
192
]
],
"inputh": 27,
"inputw": 27
},
{
"op": "conv-relu",
"ks": [
3,
3
],
"cin": 192,
"cout": 384,
"strides": [
1,
1,
1,
1
],
"input_tensors": [
[
1,
13,
13,
192
]
],
"inputh": 13,
"inputw": 13
},
{
"op": "conv-relu",
"ks": [
3,
3
],
"cin": 384,
"cout": 256,
"strides": [
1,
1,
1,
1
],
"input_tensors": [
[
1,
13,
13,
384
]
],
"inputh": 13,
"inputw": 13
},
{
"op": "conv-relu",
"ks": [
3,
3
],
"cin": 256,
"cout": 256,
"strides": [
1,
1,
1,
1
],
"input_tensors": [
[
1,
13,
13,
256
]
],
"inputh": 13,
"inputw": 13
},
{
"op": "maxpool",
"ks": [
1,
3,
3,
1
],
"cin": 256,
"cout": 256,
"strides": [
1,
2,
2,
1
],
"input_tensors": [
[
1,
13,
13,
256
]
],
"inputh": 13,
"inputw": 13
},
{
"op": "global-avgpool",
"cin": 256,
"cout": 256,
"input_tensors": [
[
1,
6,
6,
256
]
],
"inputh": 6,
"inputw": 6
},
{
"op": "fc",
"cin": 256,
"cout": 4096,
"input_tensors": [
[
1,
256
]
]
},
{
"op": "relu",
"cin": 4096,
"cout": 4096,
"input_tensors": [
[
1,
4096
]
]
},
{
"op": "fc",
"cin": 4096,
"cout": 4096,
"input_tensors": [
[
1,
4096
]
]
},
{
"op": "relu",
"cin": 4096,
"cout": 4096,
"input_tensors": [
[
1,
4096
]
]
},
{
"op": "fc",
"cin": 4096,
"cout": 1000,
"input_tensors": [
[
1,
4096
]
]
}
]
}