Benchmarks: micro benchmarks - add python code for DirecXGPUMemBw (#547)
**Description** add python code for DirecXGPUMemBw.
This commit is contained in:
Родитель
f1d608aef7
Коммит
af4cfd5bbf
|
@ -23,6 +23,7 @@ jobs:
|
|||
run: |
|
||||
docker system prune -a -f
|
||||
docker volume prune -a -f
|
||||
shell: pwsh
|
||||
- name: Build Docker image
|
||||
working-directory: .
|
||||
shell: pwsh
|
||||
|
|
|
@ -31,6 +31,7 @@ from superbench.benchmarks.micro_benchmarks.rocm_memory_bw_performance import Ro
|
|||
from superbench.benchmarks.micro_benchmarks.sharding_matmul import ShardingMatmul
|
||||
from superbench.benchmarks.micro_benchmarks.tcp_connectivity import TCPConnectivityBenchmark
|
||||
from superbench.benchmarks.micro_benchmarks.tensorrt_inference_performance import TensorRTInferenceBenchmark
|
||||
from superbench.benchmarks.micro_benchmarks.directx_mem_bw_performance import DirectXGPUMemBw
|
||||
from superbench.benchmarks.micro_benchmarks.directx_gemm_flops_performance import DirectXGPUCoreFlops
|
||||
|
||||
__all__ = [
|
||||
|
@ -62,5 +63,6 @@ __all__ = [
|
|||
'ShardingMatmul',
|
||||
'TCPConnectivityBenchmark',
|
||||
'TensorRTInferenceBenchmark',
|
||||
'DirectXGPUMemBw',
|
||||
'DirectXGPUCoreFlops',
|
||||
]
|
||||
|
|
|
@ -0,0 +1,149 @@
|
|||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT license.
|
||||
|
||||
"""Module of the DirectXGPUMemBw performance benchmarks."""
|
||||
|
||||
import os
|
||||
|
||||
from superbench.common.utils import logger
|
||||
from superbench.benchmarks import BenchmarkRegistry, Platform
|
||||
from superbench.benchmarks.micro_benchmarks import MicroBenchmarkWithInvoke
|
||||
|
||||
|
||||
class DirectXGPUMemBw(MicroBenchmarkWithInvoke):
|
||||
"""The DirectXGPUMemBw benchmark class."""
|
||||
def __init__(self, name, parameters=''):
|
||||
"""Constructor.
|
||||
|
||||
Args:
|
||||
name (str): benchmark name.
|
||||
parameters (str): benchmark parameters.
|
||||
"""
|
||||
super().__init__(name, parameters)
|
||||
self._bin_name = 'DirectXGPUMemRwBw.exe'
|
||||
self._modes = ['read', 'write', 'readwrite']
|
||||
|
||||
def add_parser_arguments(self):
|
||||
"""Add the specified arguments."""
|
||||
super().add_parser_arguments()
|
||||
self._parser.add_argument(
|
||||
'--num_warm_up',
|
||||
type=int,
|
||||
default=0,
|
||||
required=False,
|
||||
help='Number of warm up rounds.',
|
||||
)
|
||||
self._parser.add_argument(
|
||||
'--num_loop',
|
||||
type=int,
|
||||
default=100,
|
||||
required=False,
|
||||
help='Number of loop times to measure the performance.',
|
||||
)
|
||||
self._parser.add_argument(
|
||||
'--size',
|
||||
type=int,
|
||||
default=None,
|
||||
required=False,
|
||||
help='Size of data for GPU copy.',
|
||||
)
|
||||
self._parser.add_argument(
|
||||
'--minbytes',
|
||||
type=int,
|
||||
default=4096,
|
||||
required=False,
|
||||
help='Lower data size bound to test.',
|
||||
)
|
||||
self._parser.add_argument(
|
||||
'--maxbytes',
|
||||
type=int,
|
||||
default=1024 * 1024 * 1024,
|
||||
required=False,
|
||||
help='Upper data size bound to test.',
|
||||
)
|
||||
self._parser.add_argument(
|
||||
'--check_data',
|
||||
action='store_true',
|
||||
required=False,
|
||||
help='Whether check data correctness.',
|
||||
)
|
||||
self._parser.add_argument(
|
||||
'--mode',
|
||||
type=str,
|
||||
nargs='+',
|
||||
default=list(),
|
||||
help='Memory operation mode. E.g. {}.'.format(' '.join(self._modes)),
|
||||
)
|
||||
|
||||
def _preprocess(self):
|
||||
"""Preprocess/preparation operations before the benchmarking."""
|
||||
if not super()._preprocess():
|
||||
return False
|
||||
|
||||
self._args.mode = [m.lower() for m in self._args.mode]
|
||||
for mode in self._args.mode:
|
||||
if mode not in self._modes:
|
||||
logger.warning(
|
||||
'Unsupported mode - benchmark: {}, mode: {}, expected: {}.'.format(self._name, mode, self._modes)
|
||||
)
|
||||
self._args.mode.remove(mode)
|
||||
|
||||
if len(self._args.mode) == 0:
|
||||
logger.error('No valid operation modes are provided.')
|
||||
return False
|
||||
|
||||
for mode in self._args.mode:
|
||||
command = os.path.join(self._args.bin_dir, self._bin_name)
|
||||
command += (' --num_warm_up ' + str(self._args.num_warm_up))
|
||||
command += (' --num_loop ' + str(self._args.num_loop))
|
||||
if self._args.size is not None:
|
||||
command += (' --size ' + str(self._args.size))
|
||||
else:
|
||||
command += (' --minbytes ' + str(self._args.minbytes))
|
||||
command += (' --maxbytes ' + str(self._args.maxbytes))
|
||||
if self._args.check_data:
|
||||
command += (' --check_data')
|
||||
command += (' --' + mode)
|
||||
self._commands.append(command)
|
||||
return True
|
||||
|
||||
def _process_raw_result(self, cmd_idx, raw_output):
|
||||
"""Function to process raw results and save the summarized results.
|
||||
|
||||
self._result.add_raw_data() and self._result.add_result() need to be called to save the results.
|
||||
|
||||
Args:
|
||||
cmd_idx (int): the index of command corresponding with the raw_output.
|
||||
raw_output (str): raw output string of the micro-benchmark.
|
||||
|
||||
Return:
|
||||
True if the raw output string is valid and result can be extracted.
|
||||
"""
|
||||
mode = self._args.mode[cmd_idx]
|
||||
self._result.add_raw_data('raw_output_' + mode, raw_output, self._args.log_raw_data)
|
||||
|
||||
valid = True
|
||||
|
||||
content = raw_output.splitlines()
|
||||
try:
|
||||
for line in content:
|
||||
if 'GPUMemBw:' in line:
|
||||
size = int(line.split()[-3])
|
||||
bw = float(line.split()[-2])
|
||||
self._result.add_result(f'{mode}_{size}_bw', bw)
|
||||
if 'error' in line.lower():
|
||||
valid = False
|
||||
except BaseException:
|
||||
valid = False
|
||||
finally:
|
||||
if not valid:
|
||||
logger.error(
|
||||
'The result format is invalid - round: {}, benchmark: {}, raw output: {}.'.format(
|
||||
self._curr_run_index, self._name, raw_output
|
||||
)
|
||||
)
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
BenchmarkRegistry.register_benchmark('directx-gpu-mem-bw', DirectXGPUMemBw, platform=Platform.DIRECTX)
|
|
@ -68,7 +68,7 @@ class BenchmarkOptions : public Options {
|
|||
min_size = get_cmd_line_argument_int("--minbytes", 4 * 1024);
|
||||
max_size =
|
||||
get_cmd_line_argument_ulonglong("--maxbytes", static_cast<unsigned long long>(1LL * 1024 * 1024 * 1024));
|
||||
check_data = get_cmd_line_argument_bool("--check");
|
||||
check_data = get_cmd_line_argument_bool("--check_data");
|
||||
if (get_cmd_line_argument_bool("--read")) {
|
||||
mem_type = Memtype::Read;
|
||||
}
|
||||
|
|
|
@ -19,12 +19,14 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<TargetName>DirectXGPUMemRwBw</TargetName>
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<PlatformToolset>v143</PlatformToolset>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<TargetName>DirectXGPUMemRwBw</TargetName>
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<PlatformToolset>v143</PlatformToolset>
|
||||
|
|
|
@ -180,7 +180,7 @@ class MicroBenchmarkWithInvoke(MicroBenchmark):
|
|||
)
|
||||
)
|
||||
|
||||
output = run_command(self._commands[cmd_idx], flush_output=self._args.log_flushing)
|
||||
output = run_command(self._commands[cmd_idx], flush_output=self._args.log_flushing, cwd=self._args.bin_dir)
|
||||
if output.returncode != 0:
|
||||
self._result.set_return_code(ReturnCode.MICROBENCHMARK_EXECUTION_FAILURE)
|
||||
logger.error(
|
||||
|
|
|
@ -10,13 +10,14 @@ import shlex
|
|||
from superbench.common.utils import stdout_logger
|
||||
|
||||
|
||||
def run_command(command, quiet=False, flush_output=False):
|
||||
def run_command(command, quiet=False, flush_output=False, cwd=None):
|
||||
"""Run command in string format, return the result with stdout and stderr.
|
||||
|
||||
Args:
|
||||
command (str): command to run.
|
||||
quiet (bool): no stdout display of the command if quiet is True.
|
||||
flush_output (bool): enable real-time output flush or not when running the command.
|
||||
cwd (str): working directory to run the command.
|
||||
|
||||
Return:
|
||||
result (subprocess.CompletedProcess): The return value from subprocess.run().
|
||||
|
@ -26,7 +27,11 @@ def run_command(command, quiet=False, flush_output=False):
|
|||
try:
|
||||
args = shlex.split(command)
|
||||
process = subprocess.Popen(
|
||||
args, cwd=os.getcwd(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True
|
||||
args,
|
||||
cwd=os.getcwd() if cwd is None else cwd,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT,
|
||||
universal_newlines=True
|
||||
)
|
||||
output = ''
|
||||
for line in process.stdout:
|
||||
|
@ -43,7 +48,13 @@ def run_command(command, quiet=False, flush_output=False):
|
|||
return subprocess.CompletedProcess(args=args, returncode=-1, stdout=str(e))
|
||||
else:
|
||||
result = subprocess.run(
|
||||
command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True, check=False, universal_newlines=True
|
||||
command,
|
||||
cwd=os.getcwd() if cwd is None else cwd,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT,
|
||||
shell=True,
|
||||
check=False,
|
||||
universal_newlines=True
|
||||
)
|
||||
if not quiet:
|
||||
stdout_logger.log(result.stdout)
|
||||
|
|
|
@ -0,0 +1,52 @@
|
|||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
"""Tests for DirectXGPUMemBw benchmark."""
|
||||
|
||||
import numbers
|
||||
|
||||
from tests.helper import decorator
|
||||
from superbench.benchmarks import BenchmarkRegistry, BenchmarkType, ReturnCode, Platform
|
||||
|
||||
|
||||
@decorator.directx_test
|
||||
def test_directx_gpu_mem_bw():
|
||||
"""Test DirectXGPUMemBw benchmark."""
|
||||
# Test for default configuration
|
||||
context = BenchmarkRegistry.create_benchmark_context(
|
||||
'directx-gpu-mem-bw',
|
||||
platform=Platform.DIRECTX,
|
||||
parameters=r'--num_warm_up 0 --num_loop 100 --size 1073741824 --mode read write'
|
||||
)
|
||||
|
||||
assert (BenchmarkRegistry.is_benchmark_context_valid(context))
|
||||
|
||||
benchmark = BenchmarkRegistry.launch_benchmark(context)
|
||||
|
||||
# Check basic information.
|
||||
assert (benchmark)
|
||||
assert (benchmark.name == 'directx-gpu-mem-bw')
|
||||
assert (benchmark.type == BenchmarkType.MICRO)
|
||||
|
||||
# Check parameters specified in BenchmarkContext.
|
||||
assert (benchmark._args.num_warm_up == 0)
|
||||
assert (benchmark._args.num_loop == 100)
|
||||
assert (benchmark._args.size == 1073741824)
|
||||
assert (sorted(benchmark._args.mode) == ['read', 'write'])
|
||||
|
||||
# Check results and metrics.
|
||||
assert (benchmark.run_count == 1)
|
||||
assert (benchmark.return_code == ReturnCode.SUCCESS)
|
||||
assert ('raw_output_read' in benchmark.raw_data)
|
||||
assert ('raw_output_write' in benchmark.raw_data)
|
||||
assert (len(benchmark.raw_data['raw_output_read']) == 1)
|
||||
assert (len(benchmark.raw_data['raw_output_write']) == 1)
|
||||
assert (isinstance(benchmark.raw_data['raw_output_read'][0], str))
|
||||
assert (isinstance(benchmark.raw_data['raw_output_write'][0], str))
|
||||
|
||||
assert ('read_1073741824_bw' in benchmark.result)
|
||||
assert ('write_1073741824_bw' in benchmark.result)
|
||||
assert (len(benchmark.result['read_1073741824_bw']) == 1)
|
||||
assert (len(benchmark.result['write_1073741824_bw']) == 1)
|
||||
assert (isinstance(benchmark.result['read_1073741824_bw'][0], numbers.Number))
|
||||
assert (isinstance(benchmark.result['write_1073741824_bw'][0], numbers.Number))
|
Загрузка…
Ссылка в новой задаче