Benchmarks: Add benchmark - add micro benchmark for cudnn test (#89)

* add python related cudnn microbenchmark
2021-06-01 22:24:35 +08:00 · 2021-06-01 22:24:35 +08:00 · 83235433b2
--- a/examples/benchmarks/cudnn_function.py
+++ b/examples/benchmarks/cudnn_function.py
@ -0,0 +1,25 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+"""Micro benchmark example for cudnn performance benchmark.
+
+Commands to run:
+  python3 examples/benchmarks/cudnn_function.py
+"""
+
+from superbench.benchmarks import BenchmarkRegistry, Platform
+from superbench.common.utils import logger
+
+if __name__ == '__main__':
+    parameters = '--num_warmup 8 --num_steps 100 --num_in_step 1000'
+    context = BenchmarkRegistry.create_benchmark_context(
+        'cudnn-function', platform=Platform.CUDA, parameters=parameters
+    )
+
+    benchmark = BenchmarkRegistry.launch_benchmark(context)
+    if benchmark:
+        logger.info(
+            'benchmark: {}, return code: {}, result: {}'.format(
+                benchmark.name, benchmark.return_code, benchmark.result
+            )
+        )
--- a/superbench/benchmarks/micro_benchmarks/init.py
+++ b/superbench/benchmarks/micro_benchmarks/init.py
@ -8,8 +8,9 @@ from superbench.benchmarks.micro_benchmarks.sharding_matmul import ShardingMatmu
 from superbench.benchmarks.micro_benchmarks.computation_communication_overlap import ComputationCommunicationOverlap
 from superbench.benchmarks.micro_benchmarks.kernel_launch_overhead import KernelLaunch
 from superbench.benchmarks.micro_benchmarks.cublas_function import CublasBenchmark
+from superbench.benchmarks.micro_benchmarks.cudnn_function import CudnnBenchmark

 __all__ = [
    'MicroBenchmark', 'MicroBenchmarkWithInvoke', 'ShardingMatmul', 'ComputationCommunicationOverlap', 'KernelLaunch',
-    'CublasBenchmark'
+    'CublasBenchmark', 'CudnnBenchmark'
 ]
--- a/superbench/benchmarks/micro_benchmarks/cudnn_function.py
+++ b/superbench/benchmarks/micro_benchmarks/cudnn_function.py
@ -0,0 +1,441 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+"""Module of the cudnn functions benchmarks."""
+
+import os
+import json
+import yaml
+
+from superbench.common.utils import logger
+from superbench.benchmarks import Platform, BenchmarkRegistry, ReturnCode
+from superbench.benchmarks.micro_benchmarks import MicroBenchmarkWithInvoke
+
+
+class CudnnBenchmark(MicroBenchmarkWithInvoke):
+    """The cudnn performance benchmark class."""
+    def __init__(self, name, parameters=''):
+        """Constructor.
+
+        Args:
+            name (str): benchmark name.
+            parameters (str): benchmark parameters.
+        """
+        super().__init__(name, parameters)
+
+        self.__default_params_dict_list = [
+            {
+                'name': 'cudnnConvolutionBackwardFilter',
+                'algo': 0,
+                'inputDims': [32, 128, 14, 14],
+                'inputStride': [25088, 196, 14, 1],
+                'inputType': 0,
+                'outputDims': [32, 32, 14, 14],
+                'outputStride': [6272, 196, 14, 1],
+                'convType': 0,
+                'tensorOp': False,
+                'arrayLength': 2,
+                'padA': [1, 1],
+                'filterStrideA': [1, 1],
+                'dilationA': [1, 1],
+                'filterDims': [32, 128, 3, 3],
+                'mode': 1
+            }, {
+                'name': 'cudnnConvolutionBackwardFilter',
+                'algo': 1,
+                'inputDims': [32, 128, 14, 14],
+                'inputStride': [25088, 196, 14, 1],
+                'inputType': 2,
+                'outputDims': [32, 32, 14, 14],
+                'outputStride': [6272, 196, 14, 1],
+                'convType': 0,
+                'tensorOp': True,
+                'arrayLength': 2,
+                'padA': [1, 1],
+                'filterStrideA': [1, 1],
+                'dilationA': [1, 1],
+                'filterDims': [32, 128, 3, 3],
+                'mode': 1
+            }, {
+                'name': 'cudnnConvolutionBackwardFilter',
+                'algo': 1,
+                'inputDims': [32, 256, 14, 14],
+                'inputStride': [50176, 196, 14, 1],
+                'inputType': 0,
+                'outputDims': [32, 1024, 14, 14],
+                'outputStride': [200704, 196, 14, 1],
+                'convType': 0,
+                'tensorOp': False,
+                'arrayLength': 2,
+                'padA': [0, 0],
+                'filterStrideA': [1, 1],
+                'dilationA': [1, 1],
+                'filterDims': [1024, 256, 1, 1],
+                'mode': 1
+            }, {
+                'name': 'cudnnConvolutionBackwardFilter',
+                'algo': 1,
+                'inputDims': [32, 256, 14, 14],
+                'inputStride': [50176, 196, 14, 1],
+                'inputType': 2,
+                'outputDims': [32, 1024, 14, 14],
+                'outputStride': [200704, 196, 14, 1],
+                'convType': 0,
+                'tensorOp': True,
+                'arrayLength': 2,
+                'padA': [0, 0],
+                'filterStrideA': [1, 1],
+                'dilationA': [1, 1],
+                'filterDims': [1024, 256, 1, 1],
+                'mode': 1
+            }, {
+                'name': 'cudnnConvolutionBackwardFilter',
+                'algo': 1,
+                'inputDims': [32, 512, 14, 14],
+                'inputStride': [100352, 196, 14, 1],
+                'inputType': 0,
+                'outputDims': [32, 512, 14, 14],
+                'outputStride': [100352, 196, 14, 1],
+                'convType': 0,
+                'tensorOp': False,
+                'arrayLength': 2,
+                'padA': [1, 1],
+                'filterStrideA': [1, 1],
+                'dilationA': [1, 1],
+                'filterDims': [512, 512, 3, 3],
+                'mode': 1
+            }, {
+                'name': 'cudnnConvolutionBackwardFilter',
+                'algo': 1,
+                'inputDims': [32, 512, 14, 14],
+                'inputStride': [100352, 196, 14, 1],
+                'inputType': 2,
+                'outputDims': [32, 512, 14, 14],
+                'outputStride': [100352, 196, 14, 1],
+                'convType': 0,
+                'tensorOp': True,
+                'arrayLength': 2,
+                'padA': [1, 1],
+                'filterStrideA': [1, 1],
+                'dilationA': [1, 1],
+                'filterDims': [512, 512, 3, 3],
+                'mode': 1
+            }, {
+                'name': 'cudnnConvolutionBackwardData',
+                'algo': 1,
+                'filterDims': [32, 128, 3, 3],
+                'inputType': 2,
+                'inputDims': [32, 32, 14, 14],
+                'inputStride': [6272, 196, 14, 1],
+                'outputDims': [32, 128, 14, 14],
+                'outputStride': [25088, 196, 14, 1],
+                'convType': 0,
+                'tensorOp': True,
+                'arrayLength': 2,
+                'padA': [1, 1],
+                'dilationA': [1, 1],
+                'filterStrideA': [1, 1],
+                'mode': 1
+            }, {
+                'name': 'cudnnConvolutionBackwardData',
+                'algo': 4,
+                'filterDims': [32, 128, 3, 3],
+                'inputType': 0,
+                'inputDims': [32, 32, 14, 14],
+                'inputStride': [6272, 196, 14, 1],
+                'outputDims': [32, 128, 14, 14],
+                'outputStride': [25088, 196, 14, 1],
+                'convType': 0,
+                'tensorOp': False,
+                'arrayLength': 2,
+                'padA': [1, 1],
+                'dilationA': [1, 1],
+                'filterStrideA': [1, 1],
+                'mode': 1
+            }, {
+                'name': 'cudnnConvolutionBackwardData',
+                'algo': 1,
+                'filterDims': [1024, 256, 1, 1],
+                'inputType': 0,
+                'inputDims': [32, 1024, 14, 14],
+                'inputStride': [200704, 196, 14, 1],
+                'outputDims': [32, 256, 14, 14],
+                'outputStride': [50176, 196, 14, 1],
+                'convType': 0,
+                'tensorOp': False,
+                'arrayLength': 2,
+                'padA': [0, 0],
+                'dilationA': [1, 1],
+                'filterStrideA': [1, 1],
+                'mode': 1
+            }, {
+                'name': 'cudnnConvolutionBackwardData',
+                'algo': 1,
+                'filterDims': [1024, 256, 1, 1],
+                'inputType': 2,
+                'inputDims': [32, 1024, 14, 14],
+                'inputStride': [200704, 196, 14, 1],
+                'outputDims': [32, 256, 14, 14],
+                'outputStride': [50176, 196, 14, 1],
+                'convType': 0,
+                'tensorOp': True,
+                'arrayLength': 2,
+                'padA': [0, 0],
+                'dilationA': [1, 1],
+                'filterStrideA': [1, 1],
+                'mode': 1
+            }, {
+                'name': 'cudnnConvolutionBackwardData',
+                'algo': 1,
+                'filterDims': [512, 512, 3, 3],
+                'inputType': 0,
+                'inputDims': [32, 512, 14, 14],
+                'inputStride': [100352, 196, 14, 1],
+                'outputDims': [32, 512, 14, 14],
+                'outputStride': [100352, 196, 14, 1],
+                'convType': 0,
+                'tensorOp': False,
+                'arrayLength': 2,
+                'padA': [1, 1],
+                'dilationA': [1, 1],
+                'filterStrideA': [1, 1],
+                'mode': 1
+            }, {
+                'name': 'cudnnConvolutionBackwardData',
+                'algo': 1,
+                'filterDims': [512, 512, 3, 3],
+                'inputType': 2,
+                'inputDims': [32, 512, 14, 14],
+                'inputStride': [100352, 196, 14, 1],
+                'outputDims': [32, 512, 14, 14],
+                'outputStride': [100352, 196, 14, 1],
+                'convType': 0,
+                'tensorOp': True,
+                'arrayLength': 2,
+                'padA': [1, 1],
+                'dilationA': [1, 1],
+                'filterStrideA': [1, 1],
+                'mode': 1
+            }, {
+                'name': 'cudnnConvolutionForward',
+                'inputDims': [32, 128, 14, 14],
+                'inputStride': [25088, 196, 14, 1],
+                'filterDims': [32, 128, 3, 3],
+                'outputDims': [32, 32, 14, 14],
+                'outputStride': [6272, 196, 14, 1],
+                'arrayLength': 2,
+                'padA': [1, 1],
+                'filterStrideA': [1, 1],
+                'dilationA': [1, 1],
+                'mode': 1,
+                'tensorOp': False,
+                'inputType': 0,
+                'convType': 0,
+                'algo': 6
+            }, {
+                'name': 'cudnnConvolutionForward',
+                'inputDims': [32, 128, 14, 14],
+                'inputStride': [25088, 196, 14, 1],
+                'filterDims': [32, 128, 3, 3],
+                'outputDims': [32, 32, 14, 14],
+                'outputStride': [6272, 196, 14, 1],
+                'arrayLength': 2,
+                'padA': [1, 1],
+                'filterStrideA': [1, 1],
+                'dilationA': [1, 1],
+                'mode': 1,
+                'tensorOp': True,
+                'inputType': 2,
+                'convType': 0,
+                'algo': 1
+            }, {
+                'name': 'cudnnConvolutionForward',
+                'inputDims': [32, 256, 14, 14],
+                'inputStride': [50176, 196, 14, 1],
+                'filterDims': [1024, 256, 1, 1],
+                'outputDims': [32, 1024, 14, 14],
+                'outputStride': [200704, 196, 14, 1],
+                'arrayLength': 2,
+                'padA': [0, 0],
+                'filterStrideA': [1, 1],
+                'dilationA': [1, 1],
+                'mode': 1,
+                'tensorOp': False,
+                'inputType': 0,
+                'convType': 0,
+                'algo': 1
+            }, {
+                'name': 'cudnnConvolutionForward',
+                'inputDims': [32, 256, 14, 14],
+                'inputStride': [50176, 196, 14, 1],
+                'filterDims': [1024, 256, 1, 1],
+                'outputDims': [32, 1024, 14, 14],
+                'outputStride': [200704, 196, 14, 1],
+                'arrayLength': 2,
+                'padA': [0, 0],
+                'filterStrideA': [1, 1],
+                'dilationA': [1, 1],
+                'mode': 1,
+                'tensorOp': True,
+                'inputType': 2,
+                'convType': 0,
+                'algo': 1
+            }, {
+                'name': 'cudnnConvolutionForward',
+                'inputDims': [32, 512, 14, 14],
+                'inputStride': [100352, 196, 14, 1],
+                'filterDims': [512, 512, 3, 3],
+                'outputDims': [32, 512, 14, 14],
+                'outputStride': [100352, 196, 14, 1],
+                'arrayLength': 2,
+                'padA': [1, 1],
+                'filterStrideA': [1, 1],
+                'dilationA': [1, 1],
+                'mode': 1,
+                'tensorOp': False,
+                'inputType': 0,
+                'convType': 0,
+                'algo': 1
+            }, {
+                'name': 'cudnnConvolutionForward',
+                'inputDims': [32, 512, 14, 14],
+                'inputStride': [100352, 196, 14, 1],
+                'filterDims': [512, 512, 3, 3],
+                'outputDims': [32, 512, 14, 14],
+                'outputStride': [100352, 196, 14, 1],
+                'arrayLength': 2,
+                'padA': [1, 1],
+                'filterStrideA': [1, 1],
+                'dilationA': [1, 1],
+                'mode': 1,
+                'tensorOp': True,
+                'inputType': 2,
+                'convType': 0,
+                'algo': 1
+            }
+        ]
+
+        self._bin_name = 'CudnnBenchmark'
+
+    def add_parser_arguments(self):
+        """Add the specified arguments."""
+        super().add_parser_arguments()
+        self._parser.add_argument(
+            '--num_warmup',
+            type=int,
+            default=8,
+            required=False,
+            help='The number of warmup step.',
+        )
+        self._parser.add_argument(
+            '--num_steps',
+            type=int,
+            default=100,
+            required=False,
+            help='The number of test step.',
+        )
+        self._parser.add_argument(
+            '--num_in_step',
+            type=int,
+            default=1000,
+            required=False,
+            help='The number of functions in one step.',
+        )
+        self._parser.add_argument(
+            '--random_seed',
+            type=int,
+            default=33931,
+            required=False,
+            help='The random seed to fill in the data of the function.',
+        )
+        self._parser.add_argument(
+            '--config_json_str',
+            type=str,
+            default=None,
+            required=False,
+            help='The custom json string defining the params in a cudnn function.',
+        )
+
+    def _preprocess(self):
+        """Preprocess/preparation operations before the benchmarking.
+
+        Return:
+            True if _preprocess() succeed.
+        """
+        if not super()._preprocess():
+            return False
+
+        command = os.path.join(self._args.bin_dir, self._bin_name)
+        command += (' --num_test ' + str(self._args.num_steps))
+        command += (' --warm_up ' + str(self._args.num_warmup))
+        command += (' --num_in_step ' + str(self._args.num_in_step))
+        command += (' --random_seed ' + str(self._args.random_seed))
+
+        try:
+            if not self._args.config_json_str:
+                for config_dict in self.__default_params_dict_list:
+                    config_json_str = "\'" + json.dumps(config_dict).replace(' ', '') + "\'"
+                    complete_command = command + (' --config_json ') + config_json_str
+                    self._commands.append(complete_command)
+
+            else:
+                custom_config_str = yaml.safe_load(self._args.config_json_str)
+                config_json_str = "\'" + json.dumps(custom_config_str).replace(' ', '') + "\'"
+                complete_command = command + (' --config_json ') + config_json_str
+                self._commands.append(complete_command)
+        except BaseException as e:
+            logger.error('Invalid input params - benchmark: {},  message: {}'.format(self._name, str(e)))
+            self._result.set_return_code(ReturnCode.INVALID_ARGUMENT)
+            return False
+        return True
+
+    def _process_raw_result(self, cmd_idx, raw_output):
+        """Function to process raw results and save the summarized results.
+
+          self._result.add_raw_data() and self._result.add_result() need to be called to save the results.
+
+        Args:
+            cmd_idx (int): the index of command corresponding with the raw_output.
+            raw_output (str): raw output string of the micro-benchmark.
+
+        Return:
+            True if the raw output string is valid and result can be extracted.
+        """
+        self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output)
+
+        try:
+            lines = raw_output.splitlines()
+            metric = ''
+            error = False
+            raw_data = []
+            for line in lines:
+                if '[function config]' in line:
+                    metric = line[line.index('[function config]: ') + len('[function config]: '):]
+                if '[raw_data]' in line:
+                    raw_data = line[line.index('[raw_data]: ') + len('[raw_data]: '):]
+                    raw_data = raw_data.split(',')
+                    raw_data.pop()
+                    raw_data = [float(item) for item in raw_data]
+                    self._result.add_result(metric, sum(raw_data) / len(raw_data))
+                    self._result.add_raw_data(metric, raw_data)
+                if 'Error' in line:
+                    error = True
+        except BaseException as e:
+            logger.error(
+                'Cannot extract results from cudnn functions - round: {}, index of cmd: {}, \
+                benchmark: {}, raw data: {}, message: {}'.format(
+                    self._curr_run_index, cmd_idx, self._name, raw_output, str(e)
+                )
+            )
+            return False
+        if error:
+            logger.error(
+                'Error in running cudnn test - round: {}, index of cmd: {}, benchmark: {}, raw data: {}'.format(
+                    self._curr_run_index, cmd_idx, self._name, raw_output
+                )
+            )
+            return False
+        return True
+
+
+BenchmarkRegistry.register_benchmark('cudnn-function', CudnnBenchmark, platform=Platform.CUDA)
--- a/tests/benchmarks/micro_benchmarks/test_cudnn_function.py
+++ b/tests/benchmarks/micro_benchmarks/test_cudnn_function.py
@ -0,0 +1,84 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""Tests for cudnn-functions benchmark."""
+
+import numbers
+
+from tests.helper import decorator
+from superbench.benchmarks import BenchmarkRegistry, BenchmarkType, ReturnCode, Platform
+
+
+@decorator.cuda_test
+def test_cudnn_functions():
+    """Test cudnn-function benchmark."""
+    # Test for default configuration
+    context = BenchmarkRegistry.create_benchmark_context(
+        'cudnn-function', platform=Platform.CUDA, parameters='--num_warmup 10 --num_steps 10 --num_in_step 100'
+    )
+
+    assert (BenchmarkRegistry.is_benchmark_context_valid(context))
+
+    benchmark = BenchmarkRegistry.launch_benchmark(context)
+
+    # Check basic information.
+    assert (benchmark)
+    assert (benchmark.name == 'cudnn-function')
+    assert (benchmark.type == BenchmarkType.MICRO)
+
+    # Check parameters specified in BenchmarkContext.
+    assert (benchmark._args.num_warmup == 10)
+    assert (benchmark._args.num_steps == 10)
+    assert (benchmark._args.num_in_step == 100)
+
+    # Check results and metrics.
+    assert (benchmark.run_count == 1)
+    assert (benchmark.return_code == ReturnCode.SUCCESS)
+    assert ('raw_output_0' in benchmark.raw_data)
+    assert (len(benchmark.raw_data['raw_output_0']) == 1)
+    assert (isinstance(benchmark.raw_data['raw_output_0'][0], str))
+
+    assert (18 <= len(benchmark.result))
+    for metric in list(benchmark.result.keys()):
+        assert (len(benchmark.result[metric]) == 1)
+        assert (isinstance(benchmark.result[metric][0], numbers.Number))
+        assert (len(benchmark.raw_data[metric][0]) == benchmark._args.num_steps)
+
+    # Test for custom configuration
+    custom_config_str = '{"algo":0,"arrayLength":2,"convType":0,"dilationA":[1,1],"filterStrideA":[1,1],' \
+        + '"filterDims":[32,128,3,3],"inputDims":[32,128,14,14],"inputStride":[25088,196,14,1],"inputType":0,'\
+        + '"mode":1,"name":"cudnnConvolutionBackwardFilter","outputDims":[32,32,14,14],'\
+        + '"outputStride":[6272,196,14,1],"padA":[1,1],"tensorOp":false}'
+
+    context = BenchmarkRegistry.create_benchmark_context(
+        'cudnn-function',
+        platform=Platform.CUDA,
+        parameters='--num_warmup 10 --num_steps 10 --num_in_step 100 --config_json_str ' + custom_config_str
+    )
+
+    assert (BenchmarkRegistry.is_benchmark_context_valid(context))
+
+    benchmark = BenchmarkRegistry.launch_benchmark(context)
+
+    # Check basic information.
+    assert (benchmark)
+    assert (benchmark.name == 'cudnn-function')
+    assert (benchmark.type == BenchmarkType.MICRO)
+
+    # Check parameters specified in BenchmarkContext.
+    assert (benchmark._args.num_warmup == 10)
+    assert (benchmark._args.num_steps == 10)
+    assert (benchmark._args.num_in_step == 100)
+
+    # Check results and metrics.
+    assert (benchmark.run_count == 1)
+    assert (benchmark.return_code == ReturnCode.SUCCESS)
+    assert ('raw_output_0' in benchmark.raw_data)
+    assert (len(benchmark.raw_data['raw_output_0']) == 1)
+    assert (isinstance(benchmark.raw_data['raw_output_0'][0], str))
+
+    assert (1 == len(benchmark.result))
+    for metric in list(benchmark.result.keys()):
+        assert (len(benchmark.result[metric]) == 1)
+        assert (isinstance(benchmark.result[metric][0], numbers.Number))
+        assert (len(benchmark.raw_data[metric][0]) == benchmark._args.num_steps)