Benchmarks: Code Revision - Extract base class for memory bandwidth microbenchmark (#159)

**Description**
extract base class for memory bandwidth microbenchmark.

**Major Revision**
- revise and optimize cuda_memory_bandwidth_performance
- extract base class for memory bandwidth microbenchmark
- add test for base class
This commit is contained in:
Yuting Jiang 2021-08-26 07:48:07 +08:00 коммит произвёл GitHub
Родитель 0583862d2d
Коммит e5e84a2ece
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
5 изменённых файлов: 212 добавлений и 66 удалений

Просмотреть файл

@ -10,6 +10,7 @@ from superbench.benchmarks.micro_benchmarks.kernel_launch_overhead import Kernel
from superbench.benchmarks.micro_benchmarks.cublas_function import CublasBenchmark
from superbench.benchmarks.micro_benchmarks.cudnn_function import CudnnBenchmark
from superbench.benchmarks.micro_benchmarks.gemm_flops_performance import GemmFlopsCuda
from superbench.benchmarks.micro_benchmarks.memory_bw_performance_base import MemBwBenchmark
from superbench.benchmarks.micro_benchmarks.cuda_memory_bw_performance import CudaMemBwBenchmark
from superbench.benchmarks.micro_benchmarks.disk_performance import DiskBenchmark
from superbench.benchmarks.micro_benchmarks.ib_loopback_performance import IBLoopbackBenchmark
@ -17,6 +18,6 @@ from superbench.benchmarks.micro_benchmarks.cuda_nccl_bw_performance import Cuda
__all__ = [
'MicroBenchmark', 'MicroBenchmarkWithInvoke', 'ShardingMatmul', 'ComputationCommunicationOverlap', 'KernelLaunch',
'CublasBenchmark', 'CudnnBenchmark', 'GemmFlopsCuda', 'CudaMemBwBenchmark', 'DiskBenchmark', 'IBLoopbackBenchmark',
'CudaNcclBwBenchmark'
'CublasBenchmark', 'CudnnBenchmark', 'GemmFlopsCuda', 'MemBwBenchmark', 'CudaMemBwBenchmark', 'DiskBenchmark',
'IBLoopbackBenchmark', 'CudaNcclBwBenchmark'
]

Просмотреть файл

@ -7,11 +7,11 @@ import os
import re
from superbench.common.utils import logger
from superbench.benchmarks import BenchmarkRegistry, Platform, ReturnCode
from superbench.benchmarks.micro_benchmarks import MicroBenchmarkWithInvoke
from superbench.benchmarks import BenchmarkRegistry, Platform
from superbench.benchmarks.micro_benchmarks import MemBwBenchmark
class CudaMemBwBenchmark(MicroBenchmarkWithInvoke):
class CudaMemBwBenchmark(MemBwBenchmark):
"""The Cuda memory performance benchmark class."""
def __init__(self, name, parameters=''):
"""Constructor.
@ -23,31 +23,16 @@ class CudaMemBwBenchmark(MicroBenchmarkWithInvoke):
super().__init__(name, parameters)
self._bin_name = 'bandwidthTest'
self.__mem_types = ['htod', 'dtoh', 'dtod']
self.__memory = ['pageable', 'pinned']
def add_parser_arguments(self):
"""Add the specified arguments."""
super().add_parser_arguments()
self._parser.add_argument(
'--mem_type',
type=str,
nargs='+',
default=self.__mem_types,
help='Memory types to benchmark. E.g. {}.'.format(' '.join(self.__mem_types)),
)
self._parser.add_argument(
'--shmoo_mode',
action='store_true',
default=False,
help='Enable shmoo mode for bandwidthtest.',
)
self._parser.add_argument(
'--memory',
type=str,
default=None,
help='Memory argument for bandwidthtest. E.g. {}.'.format(' '.join(self.__memory)),
)
def _preprocess(self):
"""Preprocess/preparation operations before the benchmarking.
@ -58,38 +43,16 @@ class CudaMemBwBenchmark(MicroBenchmarkWithInvoke):
if not super()._preprocess():
return False
# Format the arguments
if not isinstance(self._args.mem_type, list):
self._args.mem_type = [self._args.mem_type]
self._args.mem_type = [p.lower() for p in self._args.mem_type]
# Check the arguments and generate the commands
for mem_type in self._args.mem_type:
if mem_type not in self.__mem_types:
self._result.set_return_code(ReturnCode.INVALID_ARGUMENT)
logger.error(
'Unsupported mem_type of bandwidth test - benchmark: {}, mem_type: {}, expected: {}.'.format(
self._name, mem_type, ' '.join(self.__mem_types)
)
)
return False
else:
command = os.path.join(self._args.bin_dir, self._bin_name)
command += ' --' + mem_type
if self._args.shmoo_mode:
command += ' mode=shmoo'
if self._args.memory:
if self._args.memory in self.__memory:
command += ' memory=' + self._args.memory
else:
self._result.set_return_code(ReturnCode.INVALID_ARGUMENT)
logger.error(
'Unsupported memory argument of bandwidth test - benchmark: {}, memory: {}, expected: {}.'.
format(self._name, self._args.memory, ' '.join(self.__memory))
)
return False
command += ' --csv'
self._commands.append(command)
command = os.path.join(self._args.bin_dir, self._bin_name)
command += ' --' + mem_type
if self._args.shmoo_mode:
command += ' mode=shmoo'
if self._args.memory == 'pinned':
command += ' memory=pinned'
command += ' --csv'
self._commands.append(command)
return True
@ -108,23 +71,17 @@ class CudaMemBwBenchmark(MicroBenchmarkWithInvoke):
self._result.add_raw_data('raw_output_' + self._args.mem_type[cmd_idx], raw_output)
mem_bw = -1
metric = ''
valid = True
content = raw_output.splitlines()
try:
for index, line in enumerate(content):
if 'H2D' in line:
metric = 'H2D_Mem_BW'
elif 'D2H' in line:
metric = 'D2H_Mem_BW'
elif 'D2D' in line:
metric = 'D2D_Mem_BW'
else:
continue
line = line.split(',')[1]
value = re.search(r'(\d+.\d+)', line)
if value:
mem_bw = max(mem_bw, float(value.group(0)))
metric = self._metrics[self._mem_types.index(self._args.mem_type[cmd_idx])]
parse_logline = self._parse_logline_map[self._args.mem_type[cmd_idx]]
for line in content:
if parse_logline in line:
line = line.split(',')[1]
value = re.search(r'(\d+.\d+)', line)
if value:
mem_bw = max(mem_bw, float(value.group(0)))
except BaseException:
valid = False

Просмотреть файл

@ -0,0 +1,75 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
"""Module of the memory performance benchmarks base class."""
from superbench.common.utils import logger
from superbench.benchmarks import ReturnCode
from superbench.benchmarks.micro_benchmarks import MicroBenchmarkWithInvoke
class MemBwBenchmark(MicroBenchmarkWithInvoke):
"""The Cuda memory performance benchmark class."""
def __init__(self, name, parameters=''):
"""Constructor.
Args:
name (str): benchmark name.
parameters (str): benchmark parameters.
"""
super().__init__(name, parameters)
self._mem_types = ['htod', 'dtoh', 'dtod']
self._metrics = ['H2D_Mem_BW', 'D2H_Mem_BW', 'D2D_Mem_BW']
self._memory = ['pinned', 'unpinned']
self._parse_logline_map = {'htod': 'H2D', 'dtoh': 'D2H', 'dtod': 'D2D'}
def add_parser_arguments(self):
"""Add the specified arguments."""
super().add_parser_arguments()
self._parser.add_argument(
'--mem_type',
type=str,
nargs='+',
default=self._mem_types,
help='Memory types to benchmark. E.g. {}.'.format(' '.join(self._mem_types)),
)
self._parser.add_argument(
'--memory',
type=str,
default='pinned',
help='Memory argument for bandwidthtest. E.g. {}.'.format(' '.join(self._memory)),
)
def _preprocess(self):
"""Preprocess/preparation operations before the benchmarking.
Return:
True if _preprocess() succeed.
"""
if not super()._preprocess():
return False
# Format the arguments
self._args.mem_type = [p.lower() for p in self._args.mem_type]
# Check the arguments and generate the commands
if self._args.memory not in self._memory:
self._result.set_return_code(ReturnCode.INVALID_ARGUMENT)
logger.error(
'Unsupported mem_type of bandwidth test - benchmark: {}, memory: {}, expected: {}.'.format(
self._name, self._args.memory, ' '.join(self._memory)
)
)
return False
for mem_type in self._args.mem_type:
if mem_type not in self._mem_types:
self._result.set_return_code(ReturnCode.INVALID_ARGUMENT)
logger.error(
'Unsupported mem_type of bandwidth test - benchmark: {}, mem_type: {}, expected: {}.'.format(
self._name, mem_type, ' '.join(self._mem_types)
)
)
return False
return True

Просмотреть файл

@ -50,8 +50,8 @@ class CudaMemBwTest(unittest.TestCase):
'bandwidthTest --dtoh mode=shmoo memory=pinned --csv', 'bandwidthTest --dtod mode=shmoo memory=pinned --csv'
]
for i in range(len(expected_command)):
commnad = benchmark._bin_name + benchmark._commands[i].split(benchmark._bin_name)[1]
assert (commnad == expected_command[i])
command = benchmark._bin_name + benchmark._commands[i].split(benchmark._bin_name)[1]
assert (command == expected_command[i])
# Check results and metrics.
raw_output = {}

Просмотреть файл

@ -0,0 +1,113 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
"""Tests for MemBwBenchmark modules."""
import os
from superbench.benchmarks import BenchmarkType, ReturnCode
from superbench.benchmarks.micro_benchmarks import MemBwBenchmark
class FakeMemBwBenchmark(MemBwBenchmark):
"""Fake benchmark inherit from MemBwBenchmark."""
def __init__(self, name, parameters=''):
"""Constructor.
Args:
name: benchmark name.
parameters: benchmark parameters.
"""
super().__init__(name, parameters)
self._bin_name = 'echo'
def _preprocess(self):
"""Preprocess/preparation operations before the benchmarking.
Return:
True if _preprocess() succeed.
"""
if not super()._preprocess():
return False
# Check the arguments and generate the commands
for mem_type in self._args.mem_type:
command = os.path.join(self._args.bin_dir, self._bin_name)
command += ' "--' + mem_type
if self._args.memory == 'pinned':
command += ' memory=pinned'
command += '"'
self._commands.append(command)
return True
def _process_raw_result(self, cmd_idx, raw_output):
"""Function to process raw results and save the summarized results.
self._result.add_raw_data() and self._result.add_result() need to be called to save the results.
Args:
cmd_idx (int): the index of command corresponding with the raw_output.
raw_output (str): raw output string of the micro-benchmark.
Return:
True if the raw output string is valid and result can be extracted.
"""
self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output)
try:
params = raw_output.strip('\n').split(' memory=')
if params[0][2:] not in self._mem_types:
return False
if self._args.memory == 'pinned':
if params[1] not in self._memory:
return False
metric = self._metrics[self._mem_types.index(self._args.mem_type[cmd_idx])]
except BaseException:
return False
self._result.add_result(metric, 0)
return True
def test_memory_bw_performance_base():
"""Test MemBwBenchmark."""
# Positive case - memory=pinned.
benchmark = FakeMemBwBenchmark('fake')
assert (benchmark._benchmark_type == BenchmarkType.MICRO)
assert (benchmark.run())
assert (benchmark.return_code == ReturnCode.SUCCESS)
# Check command list
expected_command = ['echo "--htod memory=pinned"', 'echo "--dtoh memory=pinned"', 'echo "--dtod memory=pinned"']
for i in range(len(expected_command)):
command = benchmark._bin_name + benchmark._commands[i].split(benchmark._bin_name)[1]
assert (command == expected_command[i])
for i, metric in enumerate(['H2D_Mem_BW', 'D2H_Mem_BW', 'D2D_Mem_BW']):
assert (metric in benchmark.result)
assert (len(benchmark.result[metric]) == 1)
# Positive case - memory=unpinned.
benchmark = FakeMemBwBenchmark('fake', parameters='--memory unpinned')
assert (benchmark._benchmark_type == BenchmarkType.MICRO)
assert (benchmark.run())
assert (benchmark.return_code == ReturnCode.SUCCESS)
# Check command list
expected_command = ['echo "--htod"', 'echo "--dtoh"', 'echo "--dtod"']
for i in range(len(expected_command)):
command = benchmark._bin_name + benchmark._commands[i].split(benchmark._bin_name)[1]
assert (command == expected_command[i])
for i, metric in enumerate(['H2D_Mem_BW', 'D2H_Mem_BW', 'D2D_Mem_BW']):
assert (metric in benchmark.result)
assert (len(benchmark.result[metric]) == 1)
# Negative case - INVALID_ARGUMENT.
benchmark = FakeMemBwBenchmark('fake', parameters='--memory fake')
assert (benchmark._benchmark_type == BenchmarkType.MICRO)
assert (benchmark.run() is False)
assert (benchmark.return_code == ReturnCode.INVALID_ARGUMENT)
benchmark = FakeMemBwBenchmark('fake', parameters='--mem_type fake')
assert (benchmark._benchmark_type == BenchmarkType.MICRO)
assert (benchmark.run() is False)
assert (benchmark.return_code == ReturnCode.INVALID_ARGUMENT)