Benchmarks: Code Revision - Extract base class for memory bandwidth microbenchmark (#159)

**Description** extract base class for memory bandwidth microbenchmark. **Major Revision** - revise and optimize cuda_memory_bandwidth_performance - extract base class for memory bandwidth microbenchmark - add test for base class
2021-08-26 07:48:07 +08:00 · 2021-08-26 07:48:07 +08:00 · e5e84a2ece
--- a/superbench/benchmarks/micro_benchmarks/init.py
+++ b/superbench/benchmarks/micro_benchmarks/init.py
@ -10,6 +10,7 @@ from superbench.benchmarks.micro_benchmarks.kernel_launch_overhead import Kernel
 from superbench.benchmarks.micro_benchmarks.cublas_function import CublasBenchmark
 from superbench.benchmarks.micro_benchmarks.cudnn_function import CudnnBenchmark
 from superbench.benchmarks.micro_benchmarks.gemm_flops_performance import GemmFlopsCuda
+from superbench.benchmarks.micro_benchmarks.memory_bw_performance_base import MemBwBenchmark
 from superbench.benchmarks.micro_benchmarks.cuda_memory_bw_performance import CudaMemBwBenchmark
 from superbench.benchmarks.micro_benchmarks.disk_performance import DiskBenchmark
 from superbench.benchmarks.micro_benchmarks.ib_loopback_performance import IBLoopbackBenchmark
@ -17,6 +18,6 @@ from superbench.benchmarks.micro_benchmarks.cuda_nccl_bw_performance import Cuda

 __all__ = [
    'MicroBenchmark', 'MicroBenchmarkWithInvoke', 'ShardingMatmul', 'ComputationCommunicationOverlap', 'KernelLaunch',
-    'CublasBenchmark', 'CudnnBenchmark', 'GemmFlopsCuda', 'CudaMemBwBenchmark', 'DiskBenchmark', 'IBLoopbackBenchmark',
-    'CudaNcclBwBenchmark'
+    'CublasBenchmark', 'CudnnBenchmark', 'GemmFlopsCuda', 'MemBwBenchmark', 'CudaMemBwBenchmark', 'DiskBenchmark',
+    'IBLoopbackBenchmark', 'CudaNcclBwBenchmark'
 ]
--- a/superbench/benchmarks/micro_benchmarks/cuda_memory_bw_performance.py
+++ b/superbench/benchmarks/micro_benchmarks/cuda_memory_bw_performance.py
@ -7,11 +7,11 @@ import os
 import re

 from superbench.common.utils import logger
-from superbench.benchmarks import BenchmarkRegistry, Platform, ReturnCode
-from superbench.benchmarks.micro_benchmarks import MicroBenchmarkWithInvoke
+from superbench.benchmarks import BenchmarkRegistry, Platform
+from superbench.benchmarks.micro_benchmarks import MemBwBenchmark


-class CudaMemBwBenchmark(MicroBenchmarkWithInvoke):
+class CudaMemBwBenchmark(MemBwBenchmark):
    """The Cuda memory performance benchmark class."""
    def __init__(self, name, parameters=''):
        """Constructor.
@ -23,31 +23,16 @@ class CudaMemBwBenchmark(MicroBenchmarkWithInvoke):
        super().__init__(name, parameters)

        self._bin_name = 'bandwidthTest'
-        self.__mem_types = ['htod', 'dtoh', 'dtod']
-        self.__memory = ['pageable', 'pinned']

    def add_parser_arguments(self):
        """Add the specified arguments."""
        super().add_parser_arguments()
-        self._parser.add_argument(
-            '--mem_type',
-            type=str,
-            nargs='+',
-            default=self.__mem_types,
-            help='Memory types to benchmark. E.g. {}.'.format(' '.join(self.__mem_types)),
-        )
        self._parser.add_argument(
            '--shmoo_mode',
            action='store_true',
            default=False,
            help='Enable shmoo mode for bandwidthtest.',
        )
-        self._parser.add_argument(
-            '--memory',
-            type=str,
-            default=None,
-            help='Memory argument for bandwidthtest. E.g. {}.'.format(' '.join(self.__memory)),
-        )

    def _preprocess(self):
        """Preprocess/preparation operations before the benchmarking.
@ -58,38 +43,16 @@ class CudaMemBwBenchmark(MicroBenchmarkWithInvoke):
        if not super()._preprocess():
            return False

-        # Format the arguments
-        if not isinstance(self._args.mem_type, list):
-            self._args.mem_type = [self._args.mem_type]
-        self._args.mem_type = [p.lower() for p in self._args.mem_type]
-
        # Check the arguments and generate the commands
        for mem_type in self._args.mem_type:
-            if mem_type not in self.__mem_types:
-                self._result.set_return_code(ReturnCode.INVALID_ARGUMENT)
-                logger.error(
-                    'Unsupported mem_type of bandwidth test - benchmark: {}, mem_type: {}, expected: {}.'.format(
-                        self._name, mem_type, ' '.join(self.__mem_types)
-                    )
-                )
-                return False
-            else:
-                command = os.path.join(self._args.bin_dir, self._bin_name)
-                command += ' --' + mem_type
-                if self._args.shmoo_mode:
-                    command += ' mode=shmoo'
-                if self._args.memory:
-                    if self._args.memory in self.__memory:
-                        command += ' memory=' + self._args.memory
-                    else:
-                        self._result.set_return_code(ReturnCode.INVALID_ARGUMENT)
-                        logger.error(
-                            'Unsupported memory argument of bandwidth test - benchmark: {}, memory: {}, expected: {}.'.
-                            format(self._name, self._args.memory, ' '.join(self.__memory))
-                        )
-                        return False
-                command += ' --csv'
-                self._commands.append(command)
+            command = os.path.join(self._args.bin_dir, self._bin_name)
+            command += ' --' + mem_type
+            if self._args.shmoo_mode:
+                command += ' mode=shmoo'
+            if self._args.memory == 'pinned':
+                command += ' memory=pinned'
+            command += ' --csv'
+            self._commands.append(command)

        return True

@ -108,23 +71,17 @@ class CudaMemBwBenchmark(MicroBenchmarkWithInvoke):
        self._result.add_raw_data('raw_output_' + self._args.mem_type[cmd_idx], raw_output)

        mem_bw = -1
-        metric = ''
        valid = True
        content = raw_output.splitlines()
        try:
-            for index, line in enumerate(content):
-                if 'H2D' in line:
-                    metric = 'H2D_Mem_BW'
-                elif 'D2H' in line:
-                    metric = 'D2H_Mem_BW'
-                elif 'D2D' in line:
-                    metric = 'D2D_Mem_BW'
-                else:
-                    continue
-                line = line.split(',')[1]
-                value = re.search(r'(\d+.\d+)', line)
-                if value:
-                    mem_bw = max(mem_bw, float(value.group(0)))
+            metric = self._metrics[self._mem_types.index(self._args.mem_type[cmd_idx])]
+            parse_logline = self._parse_logline_map[self._args.mem_type[cmd_idx]]
+            for line in content:
+                if parse_logline in line:
+                    line = line.split(',')[1]
+                    value = re.search(r'(\d+.\d+)', line)
+                    if value:
+                        mem_bw = max(mem_bw, float(value.group(0)))

        except BaseException:
            valid = False
--- a/superbench/benchmarks/micro_benchmarks/memory_bw_performance_base.py
+++ b/superbench/benchmarks/micro_benchmarks/memory_bw_performance_base.py
@ -0,0 +1,75 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+"""Module of the memory performance benchmarks base class."""
+
+from superbench.common.utils import logger
+from superbench.benchmarks import ReturnCode
+from superbench.benchmarks.micro_benchmarks import MicroBenchmarkWithInvoke
+
+
+class MemBwBenchmark(MicroBenchmarkWithInvoke):
+    """The Cuda memory performance benchmark class."""
+    def __init__(self, name, parameters=''):
+        """Constructor.
+
+        Args:
+            name (str): benchmark name.
+            parameters (str): benchmark parameters.
+        """
+        super().__init__(name, parameters)
+
+        self._mem_types = ['htod', 'dtoh', 'dtod']
+        self._metrics = ['H2D_Mem_BW', 'D2H_Mem_BW', 'D2D_Mem_BW']
+        self._memory = ['pinned', 'unpinned']
+        self._parse_logline_map = {'htod': 'H2D', 'dtoh': 'D2H', 'dtod': 'D2D'}
+
+    def add_parser_arguments(self):
+        """Add the specified arguments."""
+        super().add_parser_arguments()
+        self._parser.add_argument(
+            '--mem_type',
+            type=str,
+            nargs='+',
+            default=self._mem_types,
+            help='Memory types to benchmark. E.g. {}.'.format(' '.join(self._mem_types)),
+        )
+        self._parser.add_argument(
+            '--memory',
+            type=str,
+            default='pinned',
+            help='Memory argument for bandwidthtest. E.g. {}.'.format(' '.join(self._memory)),
+        )
+
+    def _preprocess(self):
+        """Preprocess/preparation operations before the benchmarking.
+
+        Return:
+            True if _preprocess() succeed.
+        """
+        if not super()._preprocess():
+            return False
+
+        # Format the arguments
+        self._args.mem_type = [p.lower() for p in self._args.mem_type]
+
+        # Check the arguments and generate the commands
+        if self._args.memory not in self._memory:
+            self._result.set_return_code(ReturnCode.INVALID_ARGUMENT)
+            logger.error(
+                'Unsupported mem_type of bandwidth test - benchmark: {}, memory: {}, expected: {}.'.format(
+                    self._name, self._args.memory, ' '.join(self._memory)
+                )
+            )
+            return False
+        for mem_type in self._args.mem_type:
+            if mem_type not in self._mem_types:
+                self._result.set_return_code(ReturnCode.INVALID_ARGUMENT)
+                logger.error(
+                    'Unsupported mem_type of bandwidth test - benchmark: {}, mem_type: {}, expected: {}.'.format(
+                        self._name, mem_type, ' '.join(self._mem_types)
+                    )
+                )
+                return False
+
+        return True
--- a/tests/benchmarks/micro_benchmarks/test_cuda_memory_bw_performance.py
+++ b/tests/benchmarks/micro_benchmarks/test_cuda_memory_bw_performance.py
@ -50,8 +50,8 @@ class CudaMemBwTest(unittest.TestCase):
            'bandwidthTest --dtoh mode=shmoo memory=pinned --csv', 'bandwidthTest --dtod mode=shmoo memory=pinned --csv'
        ]
        for i in range(len(expected_command)):
-            commnad = benchmark._bin_name + benchmark._commands[i].split(benchmark._bin_name)[1]
-            assert (commnad == expected_command[i])
+            command = benchmark._bin_name + benchmark._commands[i].split(benchmark._bin_name)[1]
+            assert (command == expected_command[i])

        # Check results and metrics.
        raw_output = {}
--- a/tests/benchmarks/micro_benchmarks/test_memory_bw_performance_base.py
+++ b/tests/benchmarks/micro_benchmarks/test_memory_bw_performance_base.py
@ -0,0 +1,113 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""Tests for MemBwBenchmark modules."""
+
+import os
+
+from superbench.benchmarks import BenchmarkType, ReturnCode
+from superbench.benchmarks.micro_benchmarks import MemBwBenchmark
+
+
+class FakeMemBwBenchmark(MemBwBenchmark):
+    """Fake benchmark inherit from MemBwBenchmark."""
+    def __init__(self, name, parameters=''):
+        """Constructor.
+
+        Args:
+            name: benchmark name.
+            parameters: benchmark parameters.
+        """
+        super().__init__(name, parameters)
+        self._bin_name = 'echo'
+
+    def _preprocess(self):
+        """Preprocess/preparation operations before the benchmarking.
+
+        Return:
+            True if _preprocess() succeed.
+        """
+        if not super()._preprocess():
+            return False
+
+        # Check the arguments and generate the commands
+        for mem_type in self._args.mem_type:
+            command = os.path.join(self._args.bin_dir, self._bin_name)
+            command += ' "--' + mem_type
+            if self._args.memory == 'pinned':
+                command += ' memory=pinned'
+            command += '"'
+            self._commands.append(command)
+
+        return True
+
+    def _process_raw_result(self, cmd_idx, raw_output):
+        """Function to process raw results and save the summarized results.
+
+          self._result.add_raw_data() and self._result.add_result() need to be called to save the results.
+
+        Args:
+            cmd_idx (int): the index of command corresponding with the raw_output.
+            raw_output (str): raw output string of the micro-benchmark.
+
+        Return:
+            True if the raw output string is valid and result can be extracted.
+        """
+        self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output)
+
+        try:
+            params = raw_output.strip('\n').split(' memory=')
+            if params[0][2:] not in self._mem_types:
+                return False
+            if self._args.memory == 'pinned':
+                if params[1] not in self._memory:
+                    return False
+            metric = self._metrics[self._mem_types.index(self._args.mem_type[cmd_idx])]
+        except BaseException:
+            return False
+
+        self._result.add_result(metric, 0)
+
+        return True
+
+
+def test_memory_bw_performance_base():
+    """Test MemBwBenchmark."""
+    # Positive case - memory=pinned.
+    benchmark = FakeMemBwBenchmark('fake')
+    assert (benchmark._benchmark_type == BenchmarkType.MICRO)
+    assert (benchmark.run())
+    assert (benchmark.return_code == ReturnCode.SUCCESS)
+    # Check command list
+    expected_command = ['echo "--htod memory=pinned"', 'echo "--dtoh memory=pinned"', 'echo "--dtod memory=pinned"']
+    for i in range(len(expected_command)):
+        command = benchmark._bin_name + benchmark._commands[i].split(benchmark._bin_name)[1]
+        assert (command == expected_command[i])
+    for i, metric in enumerate(['H2D_Mem_BW', 'D2H_Mem_BW', 'D2D_Mem_BW']):
+        assert (metric in benchmark.result)
+        assert (len(benchmark.result[metric]) == 1)
+
+    # Positive case - memory=unpinned.
+    benchmark = FakeMemBwBenchmark('fake', parameters='--memory unpinned')
+    assert (benchmark._benchmark_type == BenchmarkType.MICRO)
+    assert (benchmark.run())
+    assert (benchmark.return_code == ReturnCode.SUCCESS)
+    # Check command list
+    expected_command = ['echo "--htod"', 'echo "--dtoh"', 'echo "--dtod"']
+    for i in range(len(expected_command)):
+        command = benchmark._bin_name + benchmark._commands[i].split(benchmark._bin_name)[1]
+        assert (command == expected_command[i])
+    for i, metric in enumerate(['H2D_Mem_BW', 'D2H_Mem_BW', 'D2D_Mem_BW']):
+        assert (metric in benchmark.result)
+        assert (len(benchmark.result[metric]) == 1)
+
+    # Negative case - INVALID_ARGUMENT.
+    benchmark = FakeMemBwBenchmark('fake', parameters='--memory fake')
+    assert (benchmark._benchmark_type == BenchmarkType.MICRO)
+    assert (benchmark.run() is False)
+    assert (benchmark.return_code == ReturnCode.INVALID_ARGUMENT)
+
+    benchmark = FakeMemBwBenchmark('fake', parameters='--mem_type fake')
+    assert (benchmark._benchmark_type == BenchmarkType.MICRO)
+    assert (benchmark.run() is False)
+    assert (benchmark.return_code == ReturnCode.INVALID_ARGUMENT)