Fix bug in TFLite GPU parser (#92)
This commit is contained in:
Родитель
f719fb21e9
Коммит
a1682dd2d2
|
@ -18,27 +18,8 @@ class TFLiteGPULatencyParser(BaseParser):
|
|||
self.after_fused_graph = ''
|
||||
|
||||
def parse(self, content):
|
||||
result = self._parse_time(content)
|
||||
kernel_operation_map = self._parse_kernel_name(content)
|
||||
work_size = self._parse_work_size(content)
|
||||
self.realtime, self.block_name = self._parse_block(content)
|
||||
self.kernel_sum = sum(value[0] for key, value in result.items())
|
||||
self.kernels = [{}] * len(result)
|
||||
self.before_fused_graph, self.after_fused_graph = self._parse_graph(content)
|
||||
self.comp_avg, self.comp_std = self._parse_comp_time(content)
|
||||
self.nodes = self._parse_node_cpu_time(content)
|
||||
self.errors = self._parse_error(content)
|
||||
for key, value in result.items():
|
||||
self.kernels[key] = {
|
||||
'avg': value[0],
|
||||
'std': value[1],
|
||||
'work_size': work_size[key],
|
||||
'name': kernel_operation_map[key],
|
||||
}
|
||||
|
||||
self.comp_kernel_latency = sum((Latency(kernel['avg'], kernel['std']) for kernel in self.kernels if kernel['name'] != 'to/from tensor'), Latency())
|
||||
|
||||
self.raw_content = content
|
||||
self.comp_kernel_latency = Latency(self.comp_avg, self.comp_std)
|
||||
|
||||
return self
|
||||
|
||||
|
@ -134,7 +115,7 @@ class TFLiteGPULatencyParser(BaseParser):
|
|||
return before_fused_graph, after_fused_graph
|
||||
|
||||
def _parse_comp_time(self, content):
|
||||
comp_time_regex = r'comp_avg_ms=([\d.e-]+) comp_std_ms=([\d.e-]+)'
|
||||
comp_time_regex = r'comp_avg_ms=([\d.\+e-]+) comp_std_ms=([\d.\+e-]+)'
|
||||
comp_avg, comp_std = 0, 0
|
||||
|
||||
for line in content.splitlines():
|
||||
|
|
|
@ -50,7 +50,6 @@ class TFLiteProfiler(BaseProfiler):
|
|||
kernel_cmd = f'--kernel_path={self._dst_kernel_path}' if self._dst_kernel_path else ''
|
||||
close_xnnpack_cmd = f'--use_xnnpack=false' if close_xnnpack else ''
|
||||
res = device.shell(f' {taskset_cmd} {self._benchmark_model_path} {kernel_cmd} {close_xnnpack_cmd}' \
|
||||
f' {close_xnnpack_cmd}' \
|
||||
f' --num_threads={self._num_threads}' \
|
||||
f' --num_runs={self._num_runs}' \
|
||||
f' --warmup_runs={self._warm_ups}' \
|
||||
|
|
|
@ -3,7 +3,7 @@ resnet18, torch, cortexA76cpu_tflite21, 1.0, 216.1971
|
|||
alexnet, torch, cortexA76cpu_tflite21, 1.0, 96.5713
|
||||
vgg16, torch, cortexA76cpu_tflite21, 1.0, 1668.5085
|
||||
squeezenet, torch, cortexA76cpu_tflite21, 1.0, 98.3766
|
||||
densenet161, torch, cortexA76cpu_tflite21, 1.0, 917.2059
|
||||
densenet161, torch, cortexA76cpu_tflite21, 1.0, 917.0045
|
||||
inception_v3, torch, cortexA76cpu_tflite21, 1.0, 586.2565
|
||||
googlenet, torch, cortexA76cpu_tflite21, 1.0, 167.5816
|
||||
shufflenet_v2, torch, cortexA76cpu_tflite21, 1.0, 21.3181
|
||||
|
@ -15,7 +15,7 @@ resnet18, torch, adreno640gpu_tflite21, 1.0, 39.3235
|
|||
alexnet, torch, adreno640gpu_tflite21, 1.0, 13.1267
|
||||
vgg16, torch, adreno640gpu_tflite21, 1.0, 219.2648
|
||||
squeezenet, torch, adreno640gpu_tflite21, 1.0, 18.6742
|
||||
densenet161, torch, adreno640gpu_tflite21, 1.0, 186.5604
|
||||
densenet161, torch, adreno640gpu_tflite21, 1.0, 186.4237
|
||||
inception_v3, torch, adreno640gpu_tflite21, 1.0, 127.9842
|
||||
googlenet, torch, adreno640gpu_tflite21, 1.0, 32.7581
|
||||
shufflenet_v2, torch, adreno640gpu_tflite21, 1.0, 5.4239
|
||||
|
@ -27,7 +27,7 @@ resnet18, torch, adreno630gpu_tflite21, 1.0, 49.4287
|
|||
alexnet, torch, adreno630gpu_tflite21, 1.0, 16.8673
|
||||
vgg16, torch, adreno630gpu_tflite21, 1.0, 286.5998
|
||||
squeezenet, torch, adreno630gpu_tflite21, 1.0, 21.095
|
||||
densenet161, torch, adreno630gpu_tflite21, 1.0, 193.0796
|
||||
densenet161, torch, adreno630gpu_tflite21, 1.0, 192.9225
|
||||
inception_v3, torch, adreno630gpu_tflite21, 1.0, 161.3187
|
||||
googlenet, torch, adreno630gpu_tflite21, 1.0, 37.9612
|
||||
shufflenet_v2, torch, adreno630gpu_tflite21, 1.0, 5.5792
|
||||
|
|
Загрузка…
Ссылка в новой задаче