From af4cfd5bbfe989b212d5311656be0cbe7cd5ae35 Mon Sep 17 00:00:00 2001 From: Yuting Jiang Date: Wed, 5 Jul 2023 22:07:13 +0800 Subject: [PATCH] Benchmarks: micro benchmarks - add python code for DirecXGPUMemBw (#547) **Description** add python code for DirecXGPUMemBw. --- .github/workflows/build-win.yml | 1 + .../benchmarks/micro_benchmarks/__init__.py | 2 + .../directx_mem_bw_performance.py | 149 ++++++++++++++++++ .../BenchmarkOptions.h | 2 +- .../GPUMemRwBw.vcxproj | 2 + .../benchmarks/micro_benchmarks/micro_base.py | 2 +- superbench/common/utils/process.py | 17 +- .../test_directx_mem_bw_performance.py | 52 ++++++ 8 files changed, 222 insertions(+), 5 deletions(-) create mode 100644 superbench/benchmarks/micro_benchmarks/directx_mem_bw_performance.py create mode 100644 tests/benchmarks/micro_benchmarks/test_directx_mem_bw_performance.py diff --git a/.github/workflows/build-win.yml b/.github/workflows/build-win.yml index 6283544b..d1b9a1c8 100644 --- a/.github/workflows/build-win.yml +++ b/.github/workflows/build-win.yml @@ -23,6 +23,7 @@ jobs: run: | docker system prune -a -f docker volume prune -a -f + shell: pwsh - name: Build Docker image working-directory: . shell: pwsh diff --git a/superbench/benchmarks/micro_benchmarks/__init__.py b/superbench/benchmarks/micro_benchmarks/__init__.py index 57304bc4..9fe14336 100644 --- a/superbench/benchmarks/micro_benchmarks/__init__.py +++ b/superbench/benchmarks/micro_benchmarks/__init__.py @@ -31,6 +31,7 @@ from superbench.benchmarks.micro_benchmarks.rocm_memory_bw_performance import Ro from superbench.benchmarks.micro_benchmarks.sharding_matmul import ShardingMatmul from superbench.benchmarks.micro_benchmarks.tcp_connectivity import TCPConnectivityBenchmark from superbench.benchmarks.micro_benchmarks.tensorrt_inference_performance import TensorRTInferenceBenchmark +from superbench.benchmarks.micro_benchmarks.directx_mem_bw_performance import DirectXGPUMemBw from superbench.benchmarks.micro_benchmarks.directx_gemm_flops_performance import DirectXGPUCoreFlops __all__ = [ @@ -62,5 +63,6 @@ __all__ = [ 'ShardingMatmul', 'TCPConnectivityBenchmark', 'TensorRTInferenceBenchmark', + 'DirectXGPUMemBw', 'DirectXGPUCoreFlops', ] diff --git a/superbench/benchmarks/micro_benchmarks/directx_mem_bw_performance.py b/superbench/benchmarks/micro_benchmarks/directx_mem_bw_performance.py new file mode 100644 index 00000000..ff9d9d23 --- /dev/null +++ b/superbench/benchmarks/micro_benchmarks/directx_mem_bw_performance.py @@ -0,0 +1,149 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +"""Module of the DirectXGPUMemBw performance benchmarks.""" + +import os + +from superbench.common.utils import logger +from superbench.benchmarks import BenchmarkRegistry, Platform +from superbench.benchmarks.micro_benchmarks import MicroBenchmarkWithInvoke + + +class DirectXGPUMemBw(MicroBenchmarkWithInvoke): + """The DirectXGPUMemBw benchmark class.""" + def __init__(self, name, parameters=''): + """Constructor. + + Args: + name (str): benchmark name. + parameters (str): benchmark parameters. + """ + super().__init__(name, parameters) + self._bin_name = 'DirectXGPUMemRwBw.exe' + self._modes = ['read', 'write', 'readwrite'] + + def add_parser_arguments(self): + """Add the specified arguments.""" + super().add_parser_arguments() + self._parser.add_argument( + '--num_warm_up', + type=int, + default=0, + required=False, + help='Number of warm up rounds.', + ) + self._parser.add_argument( + '--num_loop', + type=int, + default=100, + required=False, + help='Number of loop times to measure the performance.', + ) + self._parser.add_argument( + '--size', + type=int, + default=None, + required=False, + help='Size of data for GPU copy.', + ) + self._parser.add_argument( + '--minbytes', + type=int, + default=4096, + required=False, + help='Lower data size bound to test.', + ) + self._parser.add_argument( + '--maxbytes', + type=int, + default=1024 * 1024 * 1024, + required=False, + help='Upper data size bound to test.', + ) + self._parser.add_argument( + '--check_data', + action='store_true', + required=False, + help='Whether check data correctness.', + ) + self._parser.add_argument( + '--mode', + type=str, + nargs='+', + default=list(), + help='Memory operation mode. E.g. {}.'.format(' '.join(self._modes)), + ) + + def _preprocess(self): + """Preprocess/preparation operations before the benchmarking.""" + if not super()._preprocess(): + return False + + self._args.mode = [m.lower() for m in self._args.mode] + for mode in self._args.mode: + if mode not in self._modes: + logger.warning( + 'Unsupported mode - benchmark: {}, mode: {}, expected: {}.'.format(self._name, mode, self._modes) + ) + self._args.mode.remove(mode) + + if len(self._args.mode) == 0: + logger.error('No valid operation modes are provided.') + return False + + for mode in self._args.mode: + command = os.path.join(self._args.bin_dir, self._bin_name) + command += (' --num_warm_up ' + str(self._args.num_warm_up)) + command += (' --num_loop ' + str(self._args.num_loop)) + if self._args.size is not None: + command += (' --size ' + str(self._args.size)) + else: + command += (' --minbytes ' + str(self._args.minbytes)) + command += (' --maxbytes ' + str(self._args.maxbytes)) + if self._args.check_data: + command += (' --check_data') + command += (' --' + mode) + self._commands.append(command) + return True + + def _process_raw_result(self, cmd_idx, raw_output): + """Function to process raw results and save the summarized results. + + self._result.add_raw_data() and self._result.add_result() need to be called to save the results. + + Args: + cmd_idx (int): the index of command corresponding with the raw_output. + raw_output (str): raw output string of the micro-benchmark. + + Return: + True if the raw output string is valid and result can be extracted. + """ + mode = self._args.mode[cmd_idx] + self._result.add_raw_data('raw_output_' + mode, raw_output, self._args.log_raw_data) + + valid = True + + content = raw_output.splitlines() + try: + for line in content: + if 'GPUMemBw:' in line: + size = int(line.split()[-3]) + bw = float(line.split()[-2]) + self._result.add_result(f'{mode}_{size}_bw', bw) + if 'error' in line.lower(): + valid = False + except BaseException: + valid = False + finally: + if not valid: + logger.error( + 'The result format is invalid - round: {}, benchmark: {}, raw output: {}.'.format( + self._curr_run_index, self._name, raw_output + ) + ) + return False + return True + + +BenchmarkRegistry.register_benchmark('directx-gpu-mem-bw', DirectXGPUMemBw, platform=Platform.DIRECTX) diff --git a/superbench/benchmarks/micro_benchmarks/directx_mem_bw_performance/BenchmarkOptions.h b/superbench/benchmarks/micro_benchmarks/directx_mem_bw_performance/BenchmarkOptions.h index 7893fe8a..c9d7507a 100644 --- a/superbench/benchmarks/micro_benchmarks/directx_mem_bw_performance/BenchmarkOptions.h +++ b/superbench/benchmarks/micro_benchmarks/directx_mem_bw_performance/BenchmarkOptions.h @@ -68,7 +68,7 @@ class BenchmarkOptions : public Options { min_size = get_cmd_line_argument_int("--minbytes", 4 * 1024); max_size = get_cmd_line_argument_ulonglong("--maxbytes", static_cast(1LL * 1024 * 1024 * 1024)); - check_data = get_cmd_line_argument_bool("--check"); + check_data = get_cmd_line_argument_bool("--check_data"); if (get_cmd_line_argument_bool("--read")) { mem_type = Memtype::Read; } diff --git a/superbench/benchmarks/micro_benchmarks/directx_mem_bw_performance/GPUMemRwBw.vcxproj b/superbench/benchmarks/micro_benchmarks/directx_mem_bw_performance/GPUMemRwBw.vcxproj index 80ab02e3..b575f804 100644 --- a/superbench/benchmarks/micro_benchmarks/directx_mem_bw_performance/GPUMemRwBw.vcxproj +++ b/superbench/benchmarks/micro_benchmarks/directx_mem_bw_performance/GPUMemRwBw.vcxproj @@ -19,12 +19,14 @@ + DirectXGPUMemRwBw Application true v143 Unicode + DirectXGPUMemRwBw Application false v143 diff --git a/superbench/benchmarks/micro_benchmarks/micro_base.py b/superbench/benchmarks/micro_benchmarks/micro_base.py index 7a2d3602..e1e85405 100644 --- a/superbench/benchmarks/micro_benchmarks/micro_base.py +++ b/superbench/benchmarks/micro_benchmarks/micro_base.py @@ -180,7 +180,7 @@ class MicroBenchmarkWithInvoke(MicroBenchmark): ) ) - output = run_command(self._commands[cmd_idx], flush_output=self._args.log_flushing) + output = run_command(self._commands[cmd_idx], flush_output=self._args.log_flushing, cwd=self._args.bin_dir) if output.returncode != 0: self._result.set_return_code(ReturnCode.MICROBENCHMARK_EXECUTION_FAILURE) logger.error( diff --git a/superbench/common/utils/process.py b/superbench/common/utils/process.py index 334bf766..75767ead 100644 --- a/superbench/common/utils/process.py +++ b/superbench/common/utils/process.py @@ -10,13 +10,14 @@ import shlex from superbench.common.utils import stdout_logger -def run_command(command, quiet=False, flush_output=False): +def run_command(command, quiet=False, flush_output=False, cwd=None): """Run command in string format, return the result with stdout and stderr. Args: command (str): command to run. quiet (bool): no stdout display of the command if quiet is True. flush_output (bool): enable real-time output flush or not when running the command. + cwd (str): working directory to run the command. Return: result (subprocess.CompletedProcess): The return value from subprocess.run(). @@ -26,7 +27,11 @@ def run_command(command, quiet=False, flush_output=False): try: args = shlex.split(command) process = subprocess.Popen( - args, cwd=os.getcwd(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True + args, + cwd=os.getcwd() if cwd is None else cwd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + universal_newlines=True ) output = '' for line in process.stdout: @@ -43,7 +48,13 @@ def run_command(command, quiet=False, flush_output=False): return subprocess.CompletedProcess(args=args, returncode=-1, stdout=str(e)) else: result = subprocess.run( - command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True, check=False, universal_newlines=True + command, + cwd=os.getcwd() if cwd is None else cwd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + shell=True, + check=False, + universal_newlines=True ) if not quiet: stdout_logger.log(result.stdout) diff --git a/tests/benchmarks/micro_benchmarks/test_directx_mem_bw_performance.py b/tests/benchmarks/micro_benchmarks/test_directx_mem_bw_performance.py new file mode 100644 index 00000000..baeed54a --- /dev/null +++ b/tests/benchmarks/micro_benchmarks/test_directx_mem_bw_performance.py @@ -0,0 +1,52 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +"""Tests for DirectXGPUMemBw benchmark.""" + +import numbers + +from tests.helper import decorator +from superbench.benchmarks import BenchmarkRegistry, BenchmarkType, ReturnCode, Platform + + +@decorator.directx_test +def test_directx_gpu_mem_bw(): + """Test DirectXGPUMemBw benchmark.""" + # Test for default configuration + context = BenchmarkRegistry.create_benchmark_context( + 'directx-gpu-mem-bw', + platform=Platform.DIRECTX, + parameters=r'--num_warm_up 0 --num_loop 100 --size 1073741824 --mode read write' + ) + + assert (BenchmarkRegistry.is_benchmark_context_valid(context)) + + benchmark = BenchmarkRegistry.launch_benchmark(context) + + # Check basic information. + assert (benchmark) + assert (benchmark.name == 'directx-gpu-mem-bw') + assert (benchmark.type == BenchmarkType.MICRO) + + # Check parameters specified in BenchmarkContext. + assert (benchmark._args.num_warm_up == 0) + assert (benchmark._args.num_loop == 100) + assert (benchmark._args.size == 1073741824) + assert (sorted(benchmark._args.mode) == ['read', 'write']) + + # Check results and metrics. + assert (benchmark.run_count == 1) + assert (benchmark.return_code == ReturnCode.SUCCESS) + assert ('raw_output_read' in benchmark.raw_data) + assert ('raw_output_write' in benchmark.raw_data) + assert (len(benchmark.raw_data['raw_output_read']) == 1) + assert (len(benchmark.raw_data['raw_output_write']) == 1) + assert (isinstance(benchmark.raw_data['raw_output_read'][0], str)) + assert (isinstance(benchmark.raw_data['raw_output_write'][0], str)) + + assert ('read_1073741824_bw' in benchmark.result) + assert ('write_1073741824_bw' in benchmark.result) + assert (len(benchmark.result['read_1073741824_bw']) == 1) + assert (len(benchmark.result['write_1073741824_bw']) == 1) + assert (isinstance(benchmark.result['read_1073741824_bw'][0], numbers.Number)) + assert (isinstance(benchmark.result['write_1073741824_bw'][0], numbers.Number))