Benchmarks: Add benchmark - add micro benchmark for cublas test (#80)

* add benchmark for cublas test * format * revise error handling and test * add interface to read json file, revise json file path and include .json in packaging * add random_seed in arguments * revise preprocess of cublas benchmark * fix lint error and note error in source code * update according comments * revise input arguments from json file to custom str and convert json file to built-in dict list * restore package config * fit lint issue * update platform and comments * rename files to match source code dir and fix comments error Co-authored-by: root <root@sb-validation-000001.51z1chmys5fuzfqyo4niepozre.bx.internal.cloudapp.net>
2021-05-31 10:31:53 +08:00 · 2021-05-31 10:31:53 +08:00 · 18398fbaa2
--- a/examples/benchmarks/cublas_function.py
+++ b/examples/benchmarks/cublas_function.py
@ -0,0 +1,25 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+"""Micro benchmark example for cublas performance benchmark.
+
+Commands to run:
+  python3 examples/benchmarks/cublas_function.py
+"""
+
+from superbench.benchmarks import BenchmarkRegistry, Platform
+from superbench.common.utils import logger
+
+if __name__ == '__main__':
+    parameters = '--num_warmup 8 --num_steps 100 --num_in_step 1000'
+    context = BenchmarkRegistry.create_benchmark_context(
+        'cublas-function', platform=Platform.CUDA, parameters=parameters
+    )
+
+    benchmark = BenchmarkRegistry.launch_benchmark(context)
+    if benchmark:
+        logger.info(
+            'benchmark: {}, return code: {}, result: {}'.format(
+                benchmark.name, benchmark.return_code, benchmark.result
+            )
+        )
--- a/superbench/benchmarks/micro_benchmarks/init.py
+++ b/superbench/benchmarks/micro_benchmarks/init.py
@ -7,7 +7,9 @@ from superbench.benchmarks.micro_benchmarks.micro_base import MicroBenchmark, Mi
 from superbench.benchmarks.micro_benchmarks.sharding_matmul import ShardingMatmul
 from superbench.benchmarks.micro_benchmarks.computation_communication_overlap import ComputationCommunicationOverlap
 from superbench.benchmarks.micro_benchmarks.kernel_launch_overhead import KernelLaunch
+from superbench.benchmarks.micro_benchmarks.cublas_function import CublasBenchmark

 __all__ = [
-    'MicroBenchmark', 'MicroBenchmarkWithInvoke', 'ShardingMatmul', 'ComputationCommunicationOverlap', 'KernelLaunch'
+    'MicroBenchmark', 'MicroBenchmarkWithInvoke', 'ShardingMatmul', 'ComputationCommunicationOverlap', 'KernelLaunch',
+    'CublasBenchmark'
 ]
--- a/superbench/benchmarks/micro_benchmarks/cublas_function.py
+++ b/superbench/benchmarks/micro_benchmarks/cublas_function.py
@ -0,0 +1,307 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+"""Module of the cublas functions benchmarks."""
+
+import os
+import json
+import yaml
+
+from superbench.common.utils import logger
+from superbench.benchmarks import Platform, BenchmarkRegistry
+from superbench.benchmarks.micro_benchmarks import MicroBenchmarkWithInvoke
+
+
+class CublasBenchmark(MicroBenchmarkWithInvoke):
+    """The Cublas performance benchmark class."""
+    def __init__(self, name, parameters=''):
+        """Constructor.
+
+        Args:
+            name (str): benchmark name.
+            parameters (str): benchmark parameters.
+        """
+        super().__init__(name, parameters)
+
+        self.__default_params_dict_list = [
+            {
+                'name': 'cublasCgemm',
+                'm': 512,
+                'n': 512,
+                'k': 32,
+                'transa': 1,
+                'transb': 0
+            }, {
+                'name': 'cublasCgemm',
+                'm': 2048,
+                'n': 512,
+                'k': 32,
+                'transa': 1,
+                'transb': 0
+            }, {
+                'name': 'cublasCgemm',
+                'm': 512,
+                'n': 2048,
+                'k': 32,
+                'transa': 1,
+                'transb': 0
+            }, {
+                'name': 'cublasCgemm',
+                'm': 640,
+                'n': 1280,
+                'k': 32,
+                'transa': 1,
+                'transb': 0
+            }, {
+                'name': 'cublasCgemm',
+                'm': 896,
+                'n': 1792,
+                'k': 32,
+                'transa': 1,
+                'transb': 0
+            }, {
+                'name': 'cublasCgemm3mStridedBatched',
+                'm': 64,
+                'n': 32,
+                'k': 3,
+                'transa': 0,
+                'transb': 1,
+                'batchCount': 544
+            }, {
+                'name': 'cublasCgemm3mStridedBatched',
+                'm': 64,
+                'n': 32,
+                'k': 64,
+                'transa': 1,
+                'transb': 0,
+                'batchCount': 544
+            }, {
+                'name': 'cublasCgemm3mStridedBatched',
+                'm': 128,
+                'n': 32,
+                'k': 128,
+                'transa': 0,
+                'transb': 1,
+                'batchCount': 544
+            }, {
+                'name': 'cublasCgemm3mStridedBatched',
+                'm': 128,
+                'n': 32,
+                'k': 64,
+                'transa': 0,
+                'transb': 1,
+                'batchCount': 544
+            }, {
+                'name': 'cublasCgemm3mStridedBatched',
+                'm': 64,
+                'n': 32,
+                'k': 128,
+                'transa': 0,
+                'transb': 1,
+                'batchCount': 544
+            }, {
+                'name': 'cublasGemmStridedBatchedEx',
+                'm': 224,
+                'n': 224,
+                'k': 64,
+                'transa': 0,
+                'transb': 0,
+                'datatype': 'half',
+                'use_tensor_core': True,
+                'batchCount': 160
+            }, {
+                'name': 'cublasGemmStridedBatchedEx',
+                'm': 64,
+                'n': 224,
+                'k': 224,
+                'transa': 0,
+                'transb': 0,
+                'datatype': 'half',
+                'use_tensor_core': True,
+                'batchCount': 160
+            }, {
+                'name': 'cublasGemmEx',
+                'm': 4000,
+                'n': 224,
+                'k': 1000,
+                'transa': 0,
+                'transb': 0,
+                'datatype': 'float',
+                'use_tensor_core': False
+            }, {
+                'name': 'cublasGemmEx',
+                'm': 4000,
+                'n': 224,
+                'k': 1000,
+                'transa': 1,
+                'transb': 0,
+                'datatype': 'half',
+                'use_tensor_core': True
+            }, {
+                'name': 'cublasGemmEx',
+                'm': 1000,
+                'n': 224,
+                'k': 4000,
+                'transa': 0,
+                'transb': 0,
+                'datatype': 'half',
+                'use_tensor_core': False
+            }, {
+                'name': 'cublasGemmEx',
+                'm': 1000,
+                'n': 224,
+                'k': 4000,
+                'transa': 0,
+                'transb': 0,
+                'datatype': 'float',
+                'use_tensor_core': False
+            }, {
+                'name': 'cublasSgemm',
+                'm': 1024,
+                'n': 7168,
+                'k': 1024,
+                'transa': 1,
+                'transb': 0
+            }, {
+                'name': 'cublasSgemmStridedBatched',
+                'm': 64,
+                'n': 224,
+                'k': 224,
+                'transa': 0,
+                'transb': 0,
+                'batchCount': 512
+            }, {
+                'name': 'cublasSgemmStridedBatched',
+                'm': 64,
+                'n': 224,
+                'k': 224,
+                'transa': 0,
+                'transb': 0,
+                'batchCount': 160
+            }
+        ]
+
+        self._bin_name = 'CublasBenchmark'
+
+    def add_parser_arguments(self):
+        """Add the specified arguments."""
+        super().add_parser_arguments()
+        self._parser.add_argument(
+            '--num_warmup',
+            type=int,
+            default=8,
+            required=False,
+            help='The number of warmup step.',
+        )
+        self._parser.add_argument(
+            '--num_steps',
+            type=int,
+            default=100,
+            required=False,
+            help='The number of test step.',
+        )
+        self._parser.add_argument(
+            '--num_in_step',
+            type=int,
+            default=1000,
+            required=False,
+            help='The number of functions in one step.',
+        )
+        self._parser.add_argument(
+            '--random_seed',
+            type=int,
+            default=33931,
+            required=False,
+            help='The random seed to fill in the data of the function.',
+        )
+        self._parser.add_argument(
+            '--config_json_str',
+            type=str,
+            default=None,
+            required=False,
+            help='The custom json string defining the params in a cublas function.',
+        )
+
+    def _preprocess(self):
+        """Preprocess/preparation operations before the benchmarking.
+
+        Return:
+            True if _preprocess() succeed.
+        """
+        if not super()._preprocess():
+            return False
+
+        command = os.path.join(self._args.bin_dir, self._bin_name)
+        command += (' --num_test ' + str(self._args.num_steps))
+        command += (' --warm_up ' + str(self._args.num_warmup))
+        command += (' --num_in_step ' + str(self._args.num_in_step))
+        command += (' --random_seed ' + str(self._args.random_seed))
+
+        try:
+            if not self._args.config_json_str:
+                for config_dict in self.__default_params_dict_list:
+                    config_json_str = "\'" + json.dumps(config_dict).replace(' ', '') + "\'"
+                    print(config_json_str)
+                    complete_command = command + (' --config_json ') + config_json_str
+                    self._commands.append(complete_command)
+
+            else:
+                custom_config_str = yaml.safe_load(self._args.config_json_str)
+                config_json_str = "\'" + json.dumps(custom_config_str).replace(' ', '') + "\'"
+                complete_command = command + (' --config_json ') + config_json_str
+                self._commands.append(complete_command)
+        except BaseException as e:
+            logger.error('Invalid input params - benchmark: {},  message: {}'.format(self._name, str(e)))
+            return False
+        return True
+
+    def _process_raw_result(self, cmd_idx, raw_output):
+        """Function to process raw results and save the summarized results.
+
+          self._result.add_raw_data() and self._result.add_result() need to be called to save the results.
+
+        Args:
+            cmd_idx (int): the index of command corresponding with the raw_output.
+            raw_output (str): raw output string of the micro-benchmark.
+
+        Return:
+            True if the raw output string is valid and result can be extracted.
+        """
+        self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output)
+
+        try:
+            lines = raw_output.splitlines()
+            metric = ''
+            error = False
+            raw_data = []
+            for line in lines:
+                if '[function config]' in line:
+                    metric = line[line.index('[function config]: ') + len('[function config]: '):]
+                if '[raw_data]' in line:
+                    raw_data = line[line.index('[raw_data]: ') + len('[raw_data]: '):]
+                    raw_data = raw_data.split(',')
+                    raw_data.pop()
+                    raw_data = [float(item) for item in raw_data]
+                    self._result.add_result(metric, sum(raw_data) / len(raw_data))
+                    self._result.add_raw_data(metric, raw_data)
+                if 'Error' in line:
+                    error = True
+        except BaseException as e:
+            logger.error(
+                'Cannot extract results from cublas functions - round: {}, index of cmd: {}, \
+                benchmark: {}, raw data: {}, message: {}'.format(
+                    self._curr_run_index, cmd_idx, self._name, raw_output, str(e)
+                )
+            )
+            return False
+        if error:
+            logger.error(
+                'Error in running cublas test - round: {}, index of cmd: {}, benchmark: {}, raw data: {}'.format(
+                    self._curr_run_index, cmd_idx, self._name, raw_output
+                )
+            )
+            return False
+        return True
+
+
+BenchmarkRegistry.register_benchmark('cublas-function', CublasBenchmark, platform=Platform.CUDA)
--- a/superbench/benchmarks/micro_benchmarks/cublas_function/cublas_function_helper.h
+++ b/superbench/benchmarks/micro_benchmarks/cublas_function/cublas_function_helper.h
@ -199,7 +199,6 @@ void run_benchmark(Options &options) {
        function.set_random_seed(options.random_seed);
        CublasFunction *p_function = get_cublas_function_pointer(function);
        p_function->benchmark();
-        std::cout << "~delete" << std::endl;
        delete p_function;
    } catch (std::exception &e) {
        std::cout << "Error: " << e.what() << std::endl;
--- a/superbench/benchmarks/micro_benchmarks/cublas_function/cublas_test.cpp
+++ b/superbench/benchmarks/micro_benchmarks/cublas_function/cublas_test.cpp
@ -17,7 +17,8 @@
 *  num_test: test step nums
 *  warm_up: warm up step nums
 *  num_in_step: times each step will invoke the function
- *   config path: the path of 'para_info.json'
+ *  random_seed: the random seed to generate data
+ *  config_json: the json string including the params of the function
 * functions supported:
 *   cublasSgemm
 *   cublasGemmEx
--- a/tests/benchmarks/micro_benchmarks/test_cublas_function.py
+++ b/tests/benchmarks/micro_benchmarks/test_cublas_function.py
@ -0,0 +1,80 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""Tests for cublas-functions benchmark."""
+
+import numbers
+
+from tests.helper import decorator
+from superbench.benchmarks import BenchmarkRegistry, BenchmarkType, ReturnCode, Platform
+
+
+@decorator.cuda_test
+def test_cublas_functions():
+    """Test cublas-function benchmark."""
+    # Test for default configuration
+    context = BenchmarkRegistry.create_benchmark_context(
+        'cublas-function', platform=Platform.CUDA, parameters='--num_warmup 10 --num_steps 10 --num_in_step 100'
+    )
+
+    assert (BenchmarkRegistry.is_benchmark_context_valid(context))
+
+    benchmark = BenchmarkRegistry.launch_benchmark(context)
+
+    # Check basic information.
+    assert (benchmark)
+    assert (benchmark.name == 'cublas-function')
+    assert (benchmark.type == BenchmarkType.MICRO)
+
+    # Check parameters specified in BenchmarkContext.
+    assert (benchmark._args.num_warmup == 10)
+    assert (benchmark._args.num_steps == 10)
+    assert (benchmark._args.num_in_step == 100)
+
+    # Check results and metrics.
+    assert (benchmark.run_count == 1)
+    assert (benchmark.return_code == ReturnCode.SUCCESS)
+    assert ('raw_output_0' in benchmark.raw_data)
+    assert (len(benchmark.raw_data['raw_output_0']) == 1)
+    assert (isinstance(benchmark.raw_data['raw_output_0'][0], str))
+
+    assert (19 <= len(benchmark.result))
+    for metric in list(benchmark.result.keys()):
+        assert (len(benchmark.result[metric]) == 1)
+        assert (isinstance(benchmark.result[metric][0], numbers.Number))
+        assert (len(benchmark.raw_data[metric][0]) == benchmark._args.num_steps)
+
+    # Test for custom configuration
+    custom_config_str = '{"name":"cublasCgemm","m":512,"n":512,"k":32,"transa":1,"transb":0}'
+    context = BenchmarkRegistry.create_benchmark_context(
+        'cublas-function',
+        platform=Platform.CUDA,
+        parameters='--num_warmup 10 --num_steps 10 --num_in_step 100 --config_json_str ' + custom_config_str
+    )
+
+    assert (BenchmarkRegistry.is_benchmark_context_valid(context))
+
+    benchmark = BenchmarkRegistry.launch_benchmark(context)
+
+    # Check basic information.
+    assert (benchmark)
+    assert (benchmark.name == 'cublas-function')
+    assert (benchmark.type == BenchmarkType.MICRO)
+
+    # Check parameters specified in BenchmarkContext.
+    assert (benchmark._args.num_warmup == 10)
+    assert (benchmark._args.num_steps == 10)
+    assert (benchmark._args.num_in_step == 100)
+
+    # Check results and metrics.
+    assert (benchmark.run_count == 1)
+    assert (benchmark.return_code == ReturnCode.SUCCESS)
+    assert ('raw_output_0' in benchmark.raw_data)
+    assert (len(benchmark.raw_data['raw_output_0']) == 1)
+    assert (isinstance(benchmark.raw_data['raw_output_0'][0], str))
+
+    assert (1 == len(benchmark.result))
+    for metric in list(benchmark.result.keys()):
+        assert (len(benchmark.result[metric]) == 1)
+        assert (isinstance(benchmark.result[metric][0], numbers.Number))
+        assert (len(benchmark.raw_data[metric][0]) == benchmark._args.num_steps)