Benchmarks: Code Revision - Extract base class for memory bandwidth microbenchmark (#159)
**Description** extract base class for memory bandwidth microbenchmark. **Major Revision** - revise and optimize cuda_memory_bandwidth_performance - extract base class for memory bandwidth microbenchmark - add test for base class
This commit is contained in:
Родитель
0583862d2d
Коммит
e5e84a2ece
|
@ -10,6 +10,7 @@ from superbench.benchmarks.micro_benchmarks.kernel_launch_overhead import Kernel
|
|||
from superbench.benchmarks.micro_benchmarks.cublas_function import CublasBenchmark
|
||||
from superbench.benchmarks.micro_benchmarks.cudnn_function import CudnnBenchmark
|
||||
from superbench.benchmarks.micro_benchmarks.gemm_flops_performance import GemmFlopsCuda
|
||||
from superbench.benchmarks.micro_benchmarks.memory_bw_performance_base import MemBwBenchmark
|
||||
from superbench.benchmarks.micro_benchmarks.cuda_memory_bw_performance import CudaMemBwBenchmark
|
||||
from superbench.benchmarks.micro_benchmarks.disk_performance import DiskBenchmark
|
||||
from superbench.benchmarks.micro_benchmarks.ib_loopback_performance import IBLoopbackBenchmark
|
||||
|
@ -17,6 +18,6 @@ from superbench.benchmarks.micro_benchmarks.cuda_nccl_bw_performance import Cuda
|
|||
|
||||
__all__ = [
|
||||
'MicroBenchmark', 'MicroBenchmarkWithInvoke', 'ShardingMatmul', 'ComputationCommunicationOverlap', 'KernelLaunch',
|
||||
'CublasBenchmark', 'CudnnBenchmark', 'GemmFlopsCuda', 'CudaMemBwBenchmark', 'DiskBenchmark', 'IBLoopbackBenchmark',
|
||||
'CudaNcclBwBenchmark'
|
||||
'CublasBenchmark', 'CudnnBenchmark', 'GemmFlopsCuda', 'MemBwBenchmark', 'CudaMemBwBenchmark', 'DiskBenchmark',
|
||||
'IBLoopbackBenchmark', 'CudaNcclBwBenchmark'
|
||||
]
|
||||
|
|
|
@ -7,11 +7,11 @@ import os
|
|||
import re
|
||||
|
||||
from superbench.common.utils import logger
|
||||
from superbench.benchmarks import BenchmarkRegistry, Platform, ReturnCode
|
||||
from superbench.benchmarks.micro_benchmarks import MicroBenchmarkWithInvoke
|
||||
from superbench.benchmarks import BenchmarkRegistry, Platform
|
||||
from superbench.benchmarks.micro_benchmarks import MemBwBenchmark
|
||||
|
||||
|
||||
class CudaMemBwBenchmark(MicroBenchmarkWithInvoke):
|
||||
class CudaMemBwBenchmark(MemBwBenchmark):
|
||||
"""The Cuda memory performance benchmark class."""
|
||||
def __init__(self, name, parameters=''):
|
||||
"""Constructor.
|
||||
|
@ -23,31 +23,16 @@ class CudaMemBwBenchmark(MicroBenchmarkWithInvoke):
|
|||
super().__init__(name, parameters)
|
||||
|
||||
self._bin_name = 'bandwidthTest'
|
||||
self.__mem_types = ['htod', 'dtoh', 'dtod']
|
||||
self.__memory = ['pageable', 'pinned']
|
||||
|
||||
def add_parser_arguments(self):
|
||||
"""Add the specified arguments."""
|
||||
super().add_parser_arguments()
|
||||
self._parser.add_argument(
|
||||
'--mem_type',
|
||||
type=str,
|
||||
nargs='+',
|
||||
default=self.__mem_types,
|
||||
help='Memory types to benchmark. E.g. {}.'.format(' '.join(self.__mem_types)),
|
||||
)
|
||||
self._parser.add_argument(
|
||||
'--shmoo_mode',
|
||||
action='store_true',
|
||||
default=False,
|
||||
help='Enable shmoo mode for bandwidthtest.',
|
||||
)
|
||||
self._parser.add_argument(
|
||||
'--memory',
|
||||
type=str,
|
||||
default=None,
|
||||
help='Memory argument for bandwidthtest. E.g. {}.'.format(' '.join(self.__memory)),
|
||||
)
|
||||
|
||||
def _preprocess(self):
|
||||
"""Preprocess/preparation operations before the benchmarking.
|
||||
|
@ -58,38 +43,16 @@ class CudaMemBwBenchmark(MicroBenchmarkWithInvoke):
|
|||
if not super()._preprocess():
|
||||
return False
|
||||
|
||||
# Format the arguments
|
||||
if not isinstance(self._args.mem_type, list):
|
||||
self._args.mem_type = [self._args.mem_type]
|
||||
self._args.mem_type = [p.lower() for p in self._args.mem_type]
|
||||
|
||||
# Check the arguments and generate the commands
|
||||
for mem_type in self._args.mem_type:
|
||||
if mem_type not in self.__mem_types:
|
||||
self._result.set_return_code(ReturnCode.INVALID_ARGUMENT)
|
||||
logger.error(
|
||||
'Unsupported mem_type of bandwidth test - benchmark: {}, mem_type: {}, expected: {}.'.format(
|
||||
self._name, mem_type, ' '.join(self.__mem_types)
|
||||
)
|
||||
)
|
||||
return False
|
||||
else:
|
||||
command = os.path.join(self._args.bin_dir, self._bin_name)
|
||||
command += ' --' + mem_type
|
||||
if self._args.shmoo_mode:
|
||||
command += ' mode=shmoo'
|
||||
if self._args.memory:
|
||||
if self._args.memory in self.__memory:
|
||||
command += ' memory=' + self._args.memory
|
||||
else:
|
||||
self._result.set_return_code(ReturnCode.INVALID_ARGUMENT)
|
||||
logger.error(
|
||||
'Unsupported memory argument of bandwidth test - benchmark: {}, memory: {}, expected: {}.'.
|
||||
format(self._name, self._args.memory, ' '.join(self.__memory))
|
||||
)
|
||||
return False
|
||||
command += ' --csv'
|
||||
self._commands.append(command)
|
||||
command = os.path.join(self._args.bin_dir, self._bin_name)
|
||||
command += ' --' + mem_type
|
||||
if self._args.shmoo_mode:
|
||||
command += ' mode=shmoo'
|
||||
if self._args.memory == 'pinned':
|
||||
command += ' memory=pinned'
|
||||
command += ' --csv'
|
||||
self._commands.append(command)
|
||||
|
||||
return True
|
||||
|
||||
|
@ -108,23 +71,17 @@ class CudaMemBwBenchmark(MicroBenchmarkWithInvoke):
|
|||
self._result.add_raw_data('raw_output_' + self._args.mem_type[cmd_idx], raw_output)
|
||||
|
||||
mem_bw = -1
|
||||
metric = ''
|
||||
valid = True
|
||||
content = raw_output.splitlines()
|
||||
try:
|
||||
for index, line in enumerate(content):
|
||||
if 'H2D' in line:
|
||||
metric = 'H2D_Mem_BW'
|
||||
elif 'D2H' in line:
|
||||
metric = 'D2H_Mem_BW'
|
||||
elif 'D2D' in line:
|
||||
metric = 'D2D_Mem_BW'
|
||||
else:
|
||||
continue
|
||||
line = line.split(',')[1]
|
||||
value = re.search(r'(\d+.\d+)', line)
|
||||
if value:
|
||||
mem_bw = max(mem_bw, float(value.group(0)))
|
||||
metric = self._metrics[self._mem_types.index(self._args.mem_type[cmd_idx])]
|
||||
parse_logline = self._parse_logline_map[self._args.mem_type[cmd_idx]]
|
||||
for line in content:
|
||||
if parse_logline in line:
|
||||
line = line.split(',')[1]
|
||||
value = re.search(r'(\d+.\d+)', line)
|
||||
if value:
|
||||
mem_bw = max(mem_bw, float(value.group(0)))
|
||||
|
||||
except BaseException:
|
||||
valid = False
|
||||
|
|
|
@ -0,0 +1,75 @@
|
|||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT license.
|
||||
|
||||
"""Module of the memory performance benchmarks base class."""
|
||||
|
||||
from superbench.common.utils import logger
|
||||
from superbench.benchmarks import ReturnCode
|
||||
from superbench.benchmarks.micro_benchmarks import MicroBenchmarkWithInvoke
|
||||
|
||||
|
||||
class MemBwBenchmark(MicroBenchmarkWithInvoke):
|
||||
"""The Cuda memory performance benchmark class."""
|
||||
def __init__(self, name, parameters=''):
|
||||
"""Constructor.
|
||||
|
||||
Args:
|
||||
name (str): benchmark name.
|
||||
parameters (str): benchmark parameters.
|
||||
"""
|
||||
super().__init__(name, parameters)
|
||||
|
||||
self._mem_types = ['htod', 'dtoh', 'dtod']
|
||||
self._metrics = ['H2D_Mem_BW', 'D2H_Mem_BW', 'D2D_Mem_BW']
|
||||
self._memory = ['pinned', 'unpinned']
|
||||
self._parse_logline_map = {'htod': 'H2D', 'dtoh': 'D2H', 'dtod': 'D2D'}
|
||||
|
||||
def add_parser_arguments(self):
|
||||
"""Add the specified arguments."""
|
||||
super().add_parser_arguments()
|
||||
self._parser.add_argument(
|
||||
'--mem_type',
|
||||
type=str,
|
||||
nargs='+',
|
||||
default=self._mem_types,
|
||||
help='Memory types to benchmark. E.g. {}.'.format(' '.join(self._mem_types)),
|
||||
)
|
||||
self._parser.add_argument(
|
||||
'--memory',
|
||||
type=str,
|
||||
default='pinned',
|
||||
help='Memory argument for bandwidthtest. E.g. {}.'.format(' '.join(self._memory)),
|
||||
)
|
||||
|
||||
def _preprocess(self):
|
||||
"""Preprocess/preparation operations before the benchmarking.
|
||||
|
||||
Return:
|
||||
True if _preprocess() succeed.
|
||||
"""
|
||||
if not super()._preprocess():
|
||||
return False
|
||||
|
||||
# Format the arguments
|
||||
self._args.mem_type = [p.lower() for p in self._args.mem_type]
|
||||
|
||||
# Check the arguments and generate the commands
|
||||
if self._args.memory not in self._memory:
|
||||
self._result.set_return_code(ReturnCode.INVALID_ARGUMENT)
|
||||
logger.error(
|
||||
'Unsupported mem_type of bandwidth test - benchmark: {}, memory: {}, expected: {}.'.format(
|
||||
self._name, self._args.memory, ' '.join(self._memory)
|
||||
)
|
||||
)
|
||||
return False
|
||||
for mem_type in self._args.mem_type:
|
||||
if mem_type not in self._mem_types:
|
||||
self._result.set_return_code(ReturnCode.INVALID_ARGUMENT)
|
||||
logger.error(
|
||||
'Unsupported mem_type of bandwidth test - benchmark: {}, mem_type: {}, expected: {}.'.format(
|
||||
self._name, mem_type, ' '.join(self._mem_types)
|
||||
)
|
||||
)
|
||||
return False
|
||||
|
||||
return True
|
|
@ -50,8 +50,8 @@ class CudaMemBwTest(unittest.TestCase):
|
|||
'bandwidthTest --dtoh mode=shmoo memory=pinned --csv', 'bandwidthTest --dtod mode=shmoo memory=pinned --csv'
|
||||
]
|
||||
for i in range(len(expected_command)):
|
||||
commnad = benchmark._bin_name + benchmark._commands[i].split(benchmark._bin_name)[1]
|
||||
assert (commnad == expected_command[i])
|
||||
command = benchmark._bin_name + benchmark._commands[i].split(benchmark._bin_name)[1]
|
||||
assert (command == expected_command[i])
|
||||
|
||||
# Check results and metrics.
|
||||
raw_output = {}
|
||||
|
|
|
@ -0,0 +1,113 @@
|
|||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
"""Tests for MemBwBenchmark modules."""
|
||||
|
||||
import os
|
||||
|
||||
from superbench.benchmarks import BenchmarkType, ReturnCode
|
||||
from superbench.benchmarks.micro_benchmarks import MemBwBenchmark
|
||||
|
||||
|
||||
class FakeMemBwBenchmark(MemBwBenchmark):
|
||||
"""Fake benchmark inherit from MemBwBenchmark."""
|
||||
def __init__(self, name, parameters=''):
|
||||
"""Constructor.
|
||||
|
||||
Args:
|
||||
name: benchmark name.
|
||||
parameters: benchmark parameters.
|
||||
"""
|
||||
super().__init__(name, parameters)
|
||||
self._bin_name = 'echo'
|
||||
|
||||
def _preprocess(self):
|
||||
"""Preprocess/preparation operations before the benchmarking.
|
||||
|
||||
Return:
|
||||
True if _preprocess() succeed.
|
||||
"""
|
||||
if not super()._preprocess():
|
||||
return False
|
||||
|
||||
# Check the arguments and generate the commands
|
||||
for mem_type in self._args.mem_type:
|
||||
command = os.path.join(self._args.bin_dir, self._bin_name)
|
||||
command += ' "--' + mem_type
|
||||
if self._args.memory == 'pinned':
|
||||
command += ' memory=pinned'
|
||||
command += '"'
|
||||
self._commands.append(command)
|
||||
|
||||
return True
|
||||
|
||||
def _process_raw_result(self, cmd_idx, raw_output):
|
||||
"""Function to process raw results and save the summarized results.
|
||||
|
||||
self._result.add_raw_data() and self._result.add_result() need to be called to save the results.
|
||||
|
||||
Args:
|
||||
cmd_idx (int): the index of command corresponding with the raw_output.
|
||||
raw_output (str): raw output string of the micro-benchmark.
|
||||
|
||||
Return:
|
||||
True if the raw output string is valid and result can be extracted.
|
||||
"""
|
||||
self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output)
|
||||
|
||||
try:
|
||||
params = raw_output.strip('\n').split(' memory=')
|
||||
if params[0][2:] not in self._mem_types:
|
||||
return False
|
||||
if self._args.memory == 'pinned':
|
||||
if params[1] not in self._memory:
|
||||
return False
|
||||
metric = self._metrics[self._mem_types.index(self._args.mem_type[cmd_idx])]
|
||||
except BaseException:
|
||||
return False
|
||||
|
||||
self._result.add_result(metric, 0)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def test_memory_bw_performance_base():
|
||||
"""Test MemBwBenchmark."""
|
||||
# Positive case - memory=pinned.
|
||||
benchmark = FakeMemBwBenchmark('fake')
|
||||
assert (benchmark._benchmark_type == BenchmarkType.MICRO)
|
||||
assert (benchmark.run())
|
||||
assert (benchmark.return_code == ReturnCode.SUCCESS)
|
||||
# Check command list
|
||||
expected_command = ['echo "--htod memory=pinned"', 'echo "--dtoh memory=pinned"', 'echo "--dtod memory=pinned"']
|
||||
for i in range(len(expected_command)):
|
||||
command = benchmark._bin_name + benchmark._commands[i].split(benchmark._bin_name)[1]
|
||||
assert (command == expected_command[i])
|
||||
for i, metric in enumerate(['H2D_Mem_BW', 'D2H_Mem_BW', 'D2D_Mem_BW']):
|
||||
assert (metric in benchmark.result)
|
||||
assert (len(benchmark.result[metric]) == 1)
|
||||
|
||||
# Positive case - memory=unpinned.
|
||||
benchmark = FakeMemBwBenchmark('fake', parameters='--memory unpinned')
|
||||
assert (benchmark._benchmark_type == BenchmarkType.MICRO)
|
||||
assert (benchmark.run())
|
||||
assert (benchmark.return_code == ReturnCode.SUCCESS)
|
||||
# Check command list
|
||||
expected_command = ['echo "--htod"', 'echo "--dtoh"', 'echo "--dtod"']
|
||||
for i in range(len(expected_command)):
|
||||
command = benchmark._bin_name + benchmark._commands[i].split(benchmark._bin_name)[1]
|
||||
assert (command == expected_command[i])
|
||||
for i, metric in enumerate(['H2D_Mem_BW', 'D2H_Mem_BW', 'D2D_Mem_BW']):
|
||||
assert (metric in benchmark.result)
|
||||
assert (len(benchmark.result[metric]) == 1)
|
||||
|
||||
# Negative case - INVALID_ARGUMENT.
|
||||
benchmark = FakeMemBwBenchmark('fake', parameters='--memory fake')
|
||||
assert (benchmark._benchmark_type == BenchmarkType.MICRO)
|
||||
assert (benchmark.run() is False)
|
||||
assert (benchmark.return_code == ReturnCode.INVALID_ARGUMENT)
|
||||
|
||||
benchmark = FakeMemBwBenchmark('fake', parameters='--mem_type fake')
|
||||
assert (benchmark._benchmark_type == BenchmarkType.MICRO)
|
||||
assert (benchmark.run() is False)
|
||||
assert (benchmark.return_code == ReturnCode.INVALID_ARGUMENT)
|
Загрузка…
Ссылка в новой задаче