Benchmarks: Micro benchmarks - Add correctness check in cublas-function benchmark (#452)

**Description** Add correctness check in cublas-function benchmark. **Major Revision** - add python code of correctness check in cublas-function benchmark and test
2023-01-03 14:59:30 +08:00 · 2023-01-03 14:59:30 +08:00 · 75573f59da
--- a/superbench/benchmarks/micro_benchmarks/cublas_function.py
+++ b/superbench/benchmarks/micro_benchmarks/cublas_function.py
@ -223,6 +223,19 @@ class CublasBenchmark(MicroBenchmarkWithInvoke):
            required=False,
            help='The custom json string defining the params in a cublas function.',
        )
+        self._parser.add_argument(
+            '--correctness',
+            action='store_true',
+            default=False,
+            help='Enable correctness check for cublas functions.',
+        )
+        self._parser.add_argument(
+            '--eps',
+            type=float,
+            default=None,
+            required=False,
+            help='The acceptable error bound for correctness check.',
+        )

    def _preprocess(self):
        """Preprocess/preparation operations before the benchmarking.
@ -238,6 +251,8 @@ class CublasBenchmark(MicroBenchmarkWithInvoke):
        command += (' --warm_up ' + str(self._args.num_warmup))
        command += (' --num_in_step ' + str(self._args.num_in_step))
        command += (' --random_seed ' + str(self._args.random_seed))
+        command += ' --correctness' if self._args.correctness else ''
+        command += (' --eps ' + str(self._args.eps)) if self._args.eps is not None else ''

        try:
            if not self._args.config_json_str:
@ -260,7 +275,7 @@ class CublasBenchmark(MicroBenchmarkWithInvoke):
            return False
        return True

-    def _process_raw_result(self, cmd_idx, raw_output):
+    def _process_raw_result(self, cmd_idx, raw_output):    # noqa: C901
        """Function to process raw results and save the summarized results.

          self._result.add_raw_data() and self._result.add_result() need to be called to save the results.
@ -299,6 +314,14 @@ class CublasBenchmark(MicroBenchmarkWithInvoke):
                    self._result.add_raw_data(metric.lower() + '_time', raw_data, self._args.log_raw_data)
                if 'Error' in line:
                    error = True
+                if '[correctness]' in line:
+                    if 'PASS' in line:
+                        self._result.add_result(metric.lower() + '_correctness', 1)
+                    elif 'FAIL' in line:
+                        self._result.add_result(metric.lower() + '_correctness', 0)
+                    error_rate = float(line.split(' ')[-1])
+                    self._result.add_result(metric.lower() + '_error_rate', error_rate)
+
        except BaseException as e:
            logger.error(
                'Cannot extract results from cublas functions - round: {}, index of cmd: {}, \
--- a/tests/benchmarks/micro_benchmarks/test_cublas_function.py
+++ b/tests/benchmarks/micro_benchmarks/test_cublas_function.py
@ -118,3 +118,35 @@ def test_cublas_functions():
        assert (isinstance(benchmark.result[metric][0], numbers.Number))
        if metric != 'return_code':
            assert (len(benchmark.raw_data[metric][0]) == benchmark._args.num_steps)
+
+
+@decorator.cuda_test
+def test_cublas_functions_correctness():
+    """Test cublas-function correctness check benchmark."""
+    # Test for correctness check
+    context = BenchmarkRegistry.create_benchmark_context(
+        'cublas-function',
+        platform=Platform.CUDA,
+        parameters='--num_warmup 1 --num_steps 1 --num_in_step 1 --correctness'
+    )
+
+    assert (BenchmarkRegistry.is_benchmark_context_valid(context))
+
+    benchmark = BenchmarkRegistry.launch_benchmark(context)
+
+    # Check basic information.
+    assert (benchmark)
+    assert (benchmark.name == 'cublas-function')
+    assert (benchmark.type == BenchmarkType.MICRO)
+
+    # Check parameters specified in BenchmarkContext.
+    assert (benchmark._args.correctness)
+
+    # Check results and metrics.
+    assert (1 + 3 * (len(benchmark._CublasBenchmark__default_params_dict_list)) == len(benchmark.result))
+    assert (benchmark.run_count == 1)
+    assert (benchmark.return_code == ReturnCode.SUCCESS)
+    for metric in list(benchmark.result.keys()):
+        if 'correctness' in metric or 'error_rate' in metric:
+            assert (len(benchmark.result[metric]) == 1)
+            assert (isinstance(benchmark.result[metric][0], numbers.Number))