From e977bbc17f1f9da268498408617d8ecf8ad90ead Mon Sep 17 00:00:00 2001
From: guoshzhao <guzhao@microsoft.com>
Date: Wed, 19 May 2021 17:06:55 +0800
Subject: [PATCH] Benchmarks: Add Benchmark - Add kernel launch overhead
 benchmark. (#74)

* add kernel launch overhead benchmark.
---
 examples/benchmarks/kernel_launch_overhead.py |  22 ++++
 .../benchmarks/micro_benchmarks/__init__.py   |   5 +-
 .../kernel_launch_overhead.py                 | 109 ++++++++++++++++++
 .../test_kernel_launch_overhead.py            |  42 +++++++
 4 files changed, 177 insertions(+), 1 deletion(-)
 create mode 100644 examples/benchmarks/kernel_launch_overhead.py
 create mode 100644 superbench/benchmarks/micro_benchmarks/kernel_launch_overhead.py
 create mode 100644 tests/benchmarks/micro_benchmarks/test_kernel_launch_overhead.py

diff --git a/examples/benchmarks/kernel_launch_overhead.py b/examples/benchmarks/kernel_launch_overhead.py
new file mode 100644
index 00000000..67e51928
--- /dev/null
+++ b/examples/benchmarks/kernel_launch_overhead.py
@@ -0,0 +1,22 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+"""Model benchmark example for matmul with pytorch.
+
+Commands to run:
+  python3 examples/benchmarks/kernel_launch_overhead.py
+"""
+
+from superbench.benchmarks import BenchmarkRegistry
+from superbench.common.utils import logger
+
+if __name__ == '__main__':
+    context = BenchmarkRegistry.create_benchmark_context('kernel-launch')
+
+    benchmark = BenchmarkRegistry.launch_benchmark(context)
+    if benchmark:
+        logger.info(
+            'benchmark: {}, return code: {}, result: {}'.format(
+                benchmark.name, benchmark.return_code, benchmark.result
+            )
+        )
diff --git a/superbench/benchmarks/micro_benchmarks/__init__.py b/superbench/benchmarks/micro_benchmarks/__init__.py
index 7dec1263..408ad2d0 100644
--- a/superbench/benchmarks/micro_benchmarks/__init__.py
+++ b/superbench/benchmarks/micro_benchmarks/__init__.py
@@ -6,5 +6,8 @@
 from superbench.benchmarks.micro_benchmarks.micro_base import MicroBenchmark, MicroBenchmarkWithInvoke
 from superbench.benchmarks.micro_benchmarks.sharding_matmul import ShardingMatmul
 from superbench.benchmarks.micro_benchmarks.computation_communication_overlap import ComputationCommunicationOverlap
+from superbench.benchmarks.micro_benchmarks.kernel_launch_overhead import KernelLaunch
 
-__all__ = ['MicroBenchmark', 'MicroBenchmarkWithInvoke', 'ShardingMatmul', 'ComputationCommunicationOverlap']
+__all__ = [
+    'MicroBenchmark', 'MicroBenchmarkWithInvoke', 'ShardingMatmul', 'ComputationCommunicationOverlap', 'KernelLaunch'
+]
diff --git a/superbench/benchmarks/micro_benchmarks/kernel_launch_overhead.py b/superbench/benchmarks/micro_benchmarks/kernel_launch_overhead.py
new file mode 100644
index 00000000..c60f05e9
--- /dev/null
+++ b/superbench/benchmarks/micro_benchmarks/kernel_launch_overhead.py
@@ -0,0 +1,109 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+"""Module of the Kernel Launch overhead benchmarks."""
+
+import os
+import re
+
+from superbench.common.utils import logger
+from superbench.benchmarks import BenchmarkRegistry
+from superbench.benchmarks.micro_benchmarks import MicroBenchmarkWithInvoke
+
+
+class KernelLaunch(MicroBenchmarkWithInvoke):
+    """The KernelLaunch overhead benchmark class."""
+    def __init__(self, name, parameters=''):
+        """Constructor.
+
+        Args:
+            name (str): benchmark name.
+            parameters (str): benchmark parameters.
+        """
+        super().__init__(name, parameters)
+
+        self._bin_name = 'kernel_launch_overhead'
+
+    def add_parser_arguments(self):
+        """Add the specified arguments."""
+        super().add_parser_arguments()
+
+        self._parser.add_argument(
+            '--num_warmup',
+            type=int,
+            default=100,
+            required=False,
+            help='The number of warmup step.',
+        )
+        self._parser.add_argument(
+            '--num_steps',
+            type=int,
+            default=2000000,
+            required=False,
+            help='The number of test step.',
+        )
+        self._parser.add_argument(
+            '--interval',
+            type=int,
+            default=2000,
+            required=False,
+            help='The interval between different kernel launch tests, unit is millisecond.',
+        )
+
+    def _preprocess(self):
+        """Preprocess/preparation operations before the benchmarking.
+
+        Return:
+            True if _preprocess() succeed.
+        """
+        if not super()._preprocess():
+            return False
+
+        command = os.path.join(self._args.bin_dir, self._bin_name)
+        command += (' -w ' + str(self._args.num_warmup))
+        command += (' -n ' + str(self._args.num_steps))
+        command += (' -i ' + str(self._args.interval))
+        self._commands.append(command)
+
+        return True
+
+    def _process_raw_result(self, cmd_idx, raw_output):
+        """Function to parse raw results and save the summarized results.
+
+          self._result.add_raw_data() and self._result.add_result() need to be called to save the results.
+
+        Args:
+            cmd_idx (int): the index of command corresponding with the raw_output.
+            raw_output (str): raw output string of the micro-benchmark.
+
+        Return:
+            True if the raw output string is valid and result can be extracted.
+        """
+        self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output)
+
+        pattern = r'\d+\.\d+'
+        result = re.findall(pattern, raw_output)
+        if len(result) != 2:
+            logger.error(
+                'Cannot extract kernel launch overhead in event and wall mode - round: {}, benchmark: {}, raw data: {}.'
+                .format(self._curr_run_index, self._name, raw_output)
+            )
+            return False
+
+        try:
+            result = [float(item) for item in result]
+        except BaseException as e:
+            logger.error(
+                'The result format is invalid - round: {}, benchmark: {}, result: {}, message: {}.'.format(
+                    self._curr_run_index, self._name, result, str(e)
+                )
+            )
+            return False
+
+        self._result.add_result('kernel_launch_overhead_event', result[0])
+        self._result.add_result('kernel_launch_overhead_wall', result[1])
+
+        return True
+
+
+BenchmarkRegistry.register_benchmark('kernel-launch', KernelLaunch)
diff --git a/tests/benchmarks/micro_benchmarks/test_kernel_launch_overhead.py b/tests/benchmarks/micro_benchmarks/test_kernel_launch_overhead.py
new file mode 100644
index 00000000..cb544add
--- /dev/null
+++ b/tests/benchmarks/micro_benchmarks/test_kernel_launch_overhead.py
@@ -0,0 +1,42 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""Tests for kernel-launch benchmark."""
+
+import numbers
+
+from tests.helper import decorator
+from superbench.benchmarks import BenchmarkRegistry, BenchmarkType, ReturnCode
+
+
+@decorator.cuda_test
+def test_kernel_launch_overhead():
+    """Test kernel-launch benchmark."""
+    context = BenchmarkRegistry.create_benchmark_context(
+        'kernel-launch', parameters='--num_warmup 200 --num_steps 20000 --interval 100'
+    )
+
+    assert (BenchmarkRegistry.is_benchmark_context_valid(context))
+
+    benchmark = BenchmarkRegistry.launch_benchmark(context)
+
+    # Check basic information.
+    assert (benchmark)
+    assert (benchmark.name == 'kernel-launch')
+    assert (benchmark.type == BenchmarkType.MICRO)
+
+    # Check parameters specified in BenchmarkContext.
+    assert (benchmark._args.num_warmup == 200)
+    assert (benchmark._args.num_steps == 20000)
+    assert (benchmark._args.interval == 100)
+
+    # Check results and metrics.
+    assert (benchmark.run_count == 1)
+    assert (benchmark.return_code == ReturnCode.SUCCESS)
+    assert ('raw_output_0' in benchmark.raw_data)
+    assert (len(benchmark.raw_data['raw_output_0']) == 1)
+    assert (isinstance(benchmark.raw_data['raw_output_0'][0], str))
+    for metric in ['kernel_launch_overhead_event', 'kernel_launch_overhead_wall']:
+        assert (metric in benchmark.result)
+        assert (len(benchmark.result[metric]) == 1)
+        assert (isinstance(benchmark.result[metric][0], numbers.Number))