Benchmarks: micro benchmarks - add python code for DirecXGPUMemBw (#547)

**Description**
add python code for DirecXGPUMemBw.
This commit is contained in:
Yuting Jiang 2023-07-05 22:07:13 +08:00 коммит произвёл GitHub
Родитель f1d608aef7
Коммит af4cfd5bbf
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
8 изменённых файлов: 222 добавлений и 5 удалений

1
.github/workflows/build-win.yml поставляемый
Просмотреть файл

@ -23,6 +23,7 @@ jobs:
run: |
docker system prune -a -f
docker volume prune -a -f
shell: pwsh
- name: Build Docker image
working-directory: .
shell: pwsh

Просмотреть файл

@ -31,6 +31,7 @@ from superbench.benchmarks.micro_benchmarks.rocm_memory_bw_performance import Ro
from superbench.benchmarks.micro_benchmarks.sharding_matmul import ShardingMatmul
from superbench.benchmarks.micro_benchmarks.tcp_connectivity import TCPConnectivityBenchmark
from superbench.benchmarks.micro_benchmarks.tensorrt_inference_performance import TensorRTInferenceBenchmark
from superbench.benchmarks.micro_benchmarks.directx_mem_bw_performance import DirectXGPUMemBw
from superbench.benchmarks.micro_benchmarks.directx_gemm_flops_performance import DirectXGPUCoreFlops
__all__ = [
@ -62,5 +63,6 @@ __all__ = [
'ShardingMatmul',
'TCPConnectivityBenchmark',
'TensorRTInferenceBenchmark',
'DirectXGPUMemBw',
'DirectXGPUCoreFlops',
]

Просмотреть файл

@ -0,0 +1,149 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
"""Module of the DirectXGPUMemBw performance benchmarks."""
import os
from superbench.common.utils import logger
from superbench.benchmarks import BenchmarkRegistry, Platform
from superbench.benchmarks.micro_benchmarks import MicroBenchmarkWithInvoke
class DirectXGPUMemBw(MicroBenchmarkWithInvoke):
"""The DirectXGPUMemBw benchmark class."""
def __init__(self, name, parameters=''):
"""Constructor.
Args:
name (str): benchmark name.
parameters (str): benchmark parameters.
"""
super().__init__(name, parameters)
self._bin_name = 'DirectXGPUMemRwBw.exe'
self._modes = ['read', 'write', 'readwrite']
def add_parser_arguments(self):
"""Add the specified arguments."""
super().add_parser_arguments()
self._parser.add_argument(
'--num_warm_up',
type=int,
default=0,
required=False,
help='Number of warm up rounds.',
)
self._parser.add_argument(
'--num_loop',
type=int,
default=100,
required=False,
help='Number of loop times to measure the performance.',
)
self._parser.add_argument(
'--size',
type=int,
default=None,
required=False,
help='Size of data for GPU copy.',
)
self._parser.add_argument(
'--minbytes',
type=int,
default=4096,
required=False,
help='Lower data size bound to test.',
)
self._parser.add_argument(
'--maxbytes',
type=int,
default=1024 * 1024 * 1024,
required=False,
help='Upper data size bound to test.',
)
self._parser.add_argument(
'--check_data',
action='store_true',
required=False,
help='Whether check data correctness.',
)
self._parser.add_argument(
'--mode',
type=str,
nargs='+',
default=list(),
help='Memory operation mode. E.g. {}.'.format(' '.join(self._modes)),
)
def _preprocess(self):
"""Preprocess/preparation operations before the benchmarking."""
if not super()._preprocess():
return False
self._args.mode = [m.lower() for m in self._args.mode]
for mode in self._args.mode:
if mode not in self._modes:
logger.warning(
'Unsupported mode - benchmark: {}, mode: {}, expected: {}.'.format(self._name, mode, self._modes)
)
self._args.mode.remove(mode)
if len(self._args.mode) == 0:
logger.error('No valid operation modes are provided.')
return False
for mode in self._args.mode:
command = os.path.join(self._args.bin_dir, self._bin_name)
command += (' --num_warm_up ' + str(self._args.num_warm_up))
command += (' --num_loop ' + str(self._args.num_loop))
if self._args.size is not None:
command += (' --size ' + str(self._args.size))
else:
command += (' --minbytes ' + str(self._args.minbytes))
command += (' --maxbytes ' + str(self._args.maxbytes))
if self._args.check_data:
command += (' --check_data')
command += (' --' + mode)
self._commands.append(command)
return True
def _process_raw_result(self, cmd_idx, raw_output):
"""Function to process raw results and save the summarized results.
self._result.add_raw_data() and self._result.add_result() need to be called to save the results.
Args:
cmd_idx (int): the index of command corresponding with the raw_output.
raw_output (str): raw output string of the micro-benchmark.
Return:
True if the raw output string is valid and result can be extracted.
"""
mode = self._args.mode[cmd_idx]
self._result.add_raw_data('raw_output_' + mode, raw_output, self._args.log_raw_data)
valid = True
content = raw_output.splitlines()
try:
for line in content:
if 'GPUMemBw:' in line:
size = int(line.split()[-3])
bw = float(line.split()[-2])
self._result.add_result(f'{mode}_{size}_bw', bw)
if 'error' in line.lower():
valid = False
except BaseException:
valid = False
finally:
if not valid:
logger.error(
'The result format is invalid - round: {}, benchmark: {}, raw output: {}.'.format(
self._curr_run_index, self._name, raw_output
)
)
return False
return True
BenchmarkRegistry.register_benchmark('directx-gpu-mem-bw', DirectXGPUMemBw, platform=Platform.DIRECTX)

Просмотреть файл

@ -68,7 +68,7 @@ class BenchmarkOptions : public Options {
min_size = get_cmd_line_argument_int("--minbytes", 4 * 1024);
max_size =
get_cmd_line_argument_ulonglong("--maxbytes", static_cast<unsigned long long>(1LL * 1024 * 1024 * 1024));
check_data = get_cmd_line_argument_bool("--check");
check_data = get_cmd_line_argument_bool("--check_data");
if (get_cmd_line_argument_bool("--read")) {
mem_type = Memtype::Read;
}

Просмотреть файл

@ -19,12 +19,14 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<TargetName>DirectXGPUMemRwBw</TargetName>
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v143</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<TargetName>DirectXGPUMemRwBw</TargetName>
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v143</PlatformToolset>

Просмотреть файл

@ -180,7 +180,7 @@ class MicroBenchmarkWithInvoke(MicroBenchmark):
)
)
output = run_command(self._commands[cmd_idx], flush_output=self._args.log_flushing)
output = run_command(self._commands[cmd_idx], flush_output=self._args.log_flushing, cwd=self._args.bin_dir)
if output.returncode != 0:
self._result.set_return_code(ReturnCode.MICROBENCHMARK_EXECUTION_FAILURE)
logger.error(

Просмотреть файл

@ -10,13 +10,14 @@ import shlex
from superbench.common.utils import stdout_logger
def run_command(command, quiet=False, flush_output=False):
def run_command(command, quiet=False, flush_output=False, cwd=None):
"""Run command in string format, return the result with stdout and stderr.
Args:
command (str): command to run.
quiet (bool): no stdout display of the command if quiet is True.
flush_output (bool): enable real-time output flush or not when running the command.
cwd (str): working directory to run the command.
Return:
result (subprocess.CompletedProcess): The return value from subprocess.run().
@ -26,7 +27,11 @@ def run_command(command, quiet=False, flush_output=False):
try:
args = shlex.split(command)
process = subprocess.Popen(
args, cwd=os.getcwd(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True
args,
cwd=os.getcwd() if cwd is None else cwd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
universal_newlines=True
)
output = ''
for line in process.stdout:
@ -43,7 +48,13 @@ def run_command(command, quiet=False, flush_output=False):
return subprocess.CompletedProcess(args=args, returncode=-1, stdout=str(e))
else:
result = subprocess.run(
command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True, check=False, universal_newlines=True
command,
cwd=os.getcwd() if cwd is None else cwd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
shell=True,
check=False,
universal_newlines=True
)
if not quiet:
stdout_logger.log(result.stdout)

Просмотреть файл

@ -0,0 +1,52 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
"""Tests for DirectXGPUMemBw benchmark."""
import numbers
from tests.helper import decorator
from superbench.benchmarks import BenchmarkRegistry, BenchmarkType, ReturnCode, Platform
@decorator.directx_test
def test_directx_gpu_mem_bw():
"""Test DirectXGPUMemBw benchmark."""
# Test for default configuration
context = BenchmarkRegistry.create_benchmark_context(
'directx-gpu-mem-bw',
platform=Platform.DIRECTX,
parameters=r'--num_warm_up 0 --num_loop 100 --size 1073741824 --mode read write'
)
assert (BenchmarkRegistry.is_benchmark_context_valid(context))
benchmark = BenchmarkRegistry.launch_benchmark(context)
# Check basic information.
assert (benchmark)
assert (benchmark.name == 'directx-gpu-mem-bw')
assert (benchmark.type == BenchmarkType.MICRO)
# Check parameters specified in BenchmarkContext.
assert (benchmark._args.num_warm_up == 0)
assert (benchmark._args.num_loop == 100)
assert (benchmark._args.size == 1073741824)
assert (sorted(benchmark._args.mode) == ['read', 'write'])
# Check results and metrics.
assert (benchmark.run_count == 1)
assert (benchmark.return_code == ReturnCode.SUCCESS)
assert ('raw_output_read' in benchmark.raw_data)
assert ('raw_output_write' in benchmark.raw_data)
assert (len(benchmark.raw_data['raw_output_read']) == 1)
assert (len(benchmark.raw_data['raw_output_write']) == 1)
assert (isinstance(benchmark.raw_data['raw_output_read'][0], str))
assert (isinstance(benchmark.raw_data['raw_output_write'][0], str))
assert ('read_1073741824_bw' in benchmark.result)
assert ('write_1073741824_bw' in benchmark.result)
assert (len(benchmark.result['read_1073741824_bw']) == 1)
assert (len(benchmark.result['write_1073741824_bw']) == 1)
assert (isinstance(benchmark.result['read_1073741824_bw'][0], numbers.Number))
assert (isinstance(benchmark.result['write_1073741824_bw'][0], numbers.Number))