Benchmarks: Add benchmark - add micro benchmark for cudnn test (#89)

* add python related cudnn microbenchmark
This commit is contained in:
Yuting Jiang 2021-06-01 22:24:35 +08:00 коммит произвёл GitHub
Родитель 0831748167
Коммит 83235433b2
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
4 изменённых файлов: 552 добавлений и 1 удалений

Просмотреть файл

@ -0,0 +1,25 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
"""Micro benchmark example for cudnn performance benchmark.
Commands to run:
python3 examples/benchmarks/cudnn_function.py
"""
from superbench.benchmarks import BenchmarkRegistry, Platform
from superbench.common.utils import logger
if __name__ == '__main__':
parameters = '--num_warmup 8 --num_steps 100 --num_in_step 1000'
context = BenchmarkRegistry.create_benchmark_context(
'cudnn-function', platform=Platform.CUDA, parameters=parameters
)
benchmark = BenchmarkRegistry.launch_benchmark(context)
if benchmark:
logger.info(
'benchmark: {}, return code: {}, result: {}'.format(
benchmark.name, benchmark.return_code, benchmark.result
)
)

Просмотреть файл

@ -8,8 +8,9 @@ from superbench.benchmarks.micro_benchmarks.sharding_matmul import ShardingMatmu
from superbench.benchmarks.micro_benchmarks.computation_communication_overlap import ComputationCommunicationOverlap
from superbench.benchmarks.micro_benchmarks.kernel_launch_overhead import KernelLaunch
from superbench.benchmarks.micro_benchmarks.cublas_function import CublasBenchmark
from superbench.benchmarks.micro_benchmarks.cudnn_function import CudnnBenchmark
__all__ = [
'MicroBenchmark', 'MicroBenchmarkWithInvoke', 'ShardingMatmul', 'ComputationCommunicationOverlap', 'KernelLaunch',
'CublasBenchmark'
'CublasBenchmark', 'CudnnBenchmark'
]

Просмотреть файл

@ -0,0 +1,441 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
"""Module of the cudnn functions benchmarks."""
import os
import json
import yaml
from superbench.common.utils import logger
from superbench.benchmarks import Platform, BenchmarkRegistry, ReturnCode
from superbench.benchmarks.micro_benchmarks import MicroBenchmarkWithInvoke
class CudnnBenchmark(MicroBenchmarkWithInvoke):
"""The cudnn performance benchmark class."""
def __init__(self, name, parameters=''):
"""Constructor.
Args:
name (str): benchmark name.
parameters (str): benchmark parameters.
"""
super().__init__(name, parameters)
self.__default_params_dict_list = [
{
'name': 'cudnnConvolutionBackwardFilter',
'algo': 0,
'inputDims': [32, 128, 14, 14],
'inputStride': [25088, 196, 14, 1],
'inputType': 0,
'outputDims': [32, 32, 14, 14],
'outputStride': [6272, 196, 14, 1],
'convType': 0,
'tensorOp': False,
'arrayLength': 2,
'padA': [1, 1],
'filterStrideA': [1, 1],
'dilationA': [1, 1],
'filterDims': [32, 128, 3, 3],
'mode': 1
}, {
'name': 'cudnnConvolutionBackwardFilter',
'algo': 1,
'inputDims': [32, 128, 14, 14],
'inputStride': [25088, 196, 14, 1],
'inputType': 2,
'outputDims': [32, 32, 14, 14],
'outputStride': [6272, 196, 14, 1],
'convType': 0,
'tensorOp': True,
'arrayLength': 2,
'padA': [1, 1],
'filterStrideA': [1, 1],
'dilationA': [1, 1],
'filterDims': [32, 128, 3, 3],
'mode': 1
}, {
'name': 'cudnnConvolutionBackwardFilter',
'algo': 1,
'inputDims': [32, 256, 14, 14],
'inputStride': [50176, 196, 14, 1],
'inputType': 0,
'outputDims': [32, 1024, 14, 14],
'outputStride': [200704, 196, 14, 1],
'convType': 0,
'tensorOp': False,
'arrayLength': 2,
'padA': [0, 0],
'filterStrideA': [1, 1],
'dilationA': [1, 1],
'filterDims': [1024, 256, 1, 1],
'mode': 1
}, {
'name': 'cudnnConvolutionBackwardFilter',
'algo': 1,
'inputDims': [32, 256, 14, 14],
'inputStride': [50176, 196, 14, 1],
'inputType': 2,
'outputDims': [32, 1024, 14, 14],
'outputStride': [200704, 196, 14, 1],
'convType': 0,
'tensorOp': True,
'arrayLength': 2,
'padA': [0, 0],
'filterStrideA': [1, 1],
'dilationA': [1, 1],
'filterDims': [1024, 256, 1, 1],
'mode': 1
}, {
'name': 'cudnnConvolutionBackwardFilter',
'algo': 1,
'inputDims': [32, 512, 14, 14],
'inputStride': [100352, 196, 14, 1],
'inputType': 0,
'outputDims': [32, 512, 14, 14],
'outputStride': [100352, 196, 14, 1],
'convType': 0,
'tensorOp': False,
'arrayLength': 2,
'padA': [1, 1],
'filterStrideA': [1, 1],
'dilationA': [1, 1],
'filterDims': [512, 512, 3, 3],
'mode': 1
}, {
'name': 'cudnnConvolutionBackwardFilter',
'algo': 1,
'inputDims': [32, 512, 14, 14],
'inputStride': [100352, 196, 14, 1],
'inputType': 2,
'outputDims': [32, 512, 14, 14],
'outputStride': [100352, 196, 14, 1],
'convType': 0,
'tensorOp': True,
'arrayLength': 2,
'padA': [1, 1],
'filterStrideA': [1, 1],
'dilationA': [1, 1],
'filterDims': [512, 512, 3, 3],
'mode': 1
}, {
'name': 'cudnnConvolutionBackwardData',
'algo': 1,
'filterDims': [32, 128, 3, 3],
'inputType': 2,
'inputDims': [32, 32, 14, 14],
'inputStride': [6272, 196, 14, 1],
'outputDims': [32, 128, 14, 14],
'outputStride': [25088, 196, 14, 1],
'convType': 0,
'tensorOp': True,
'arrayLength': 2,
'padA': [1, 1],
'dilationA': [1, 1],
'filterStrideA': [1, 1],
'mode': 1
}, {
'name': 'cudnnConvolutionBackwardData',
'algo': 4,
'filterDims': [32, 128, 3, 3],
'inputType': 0,
'inputDims': [32, 32, 14, 14],
'inputStride': [6272, 196, 14, 1],
'outputDims': [32, 128, 14, 14],
'outputStride': [25088, 196, 14, 1],
'convType': 0,
'tensorOp': False,
'arrayLength': 2,
'padA': [1, 1],
'dilationA': [1, 1],
'filterStrideA': [1, 1],
'mode': 1
}, {
'name': 'cudnnConvolutionBackwardData',
'algo': 1,
'filterDims': [1024, 256, 1, 1],
'inputType': 0,
'inputDims': [32, 1024, 14, 14],
'inputStride': [200704, 196, 14, 1],
'outputDims': [32, 256, 14, 14],
'outputStride': [50176, 196, 14, 1],
'convType': 0,
'tensorOp': False,
'arrayLength': 2,
'padA': [0, 0],
'dilationA': [1, 1],
'filterStrideA': [1, 1],
'mode': 1
}, {
'name': 'cudnnConvolutionBackwardData',
'algo': 1,
'filterDims': [1024, 256, 1, 1],
'inputType': 2,
'inputDims': [32, 1024, 14, 14],
'inputStride': [200704, 196, 14, 1],
'outputDims': [32, 256, 14, 14],
'outputStride': [50176, 196, 14, 1],
'convType': 0,
'tensorOp': True,
'arrayLength': 2,
'padA': [0, 0],
'dilationA': [1, 1],
'filterStrideA': [1, 1],
'mode': 1
}, {
'name': 'cudnnConvolutionBackwardData',
'algo': 1,
'filterDims': [512, 512, 3, 3],
'inputType': 0,
'inputDims': [32, 512, 14, 14],
'inputStride': [100352, 196, 14, 1],
'outputDims': [32, 512, 14, 14],
'outputStride': [100352, 196, 14, 1],
'convType': 0,
'tensorOp': False,
'arrayLength': 2,
'padA': [1, 1],
'dilationA': [1, 1],
'filterStrideA': [1, 1],
'mode': 1
}, {
'name': 'cudnnConvolutionBackwardData',
'algo': 1,
'filterDims': [512, 512, 3, 3],
'inputType': 2,
'inputDims': [32, 512, 14, 14],
'inputStride': [100352, 196, 14, 1],
'outputDims': [32, 512, 14, 14],
'outputStride': [100352, 196, 14, 1],
'convType': 0,
'tensorOp': True,
'arrayLength': 2,
'padA': [1, 1],
'dilationA': [1, 1],
'filterStrideA': [1, 1],
'mode': 1
}, {
'name': 'cudnnConvolutionForward',
'inputDims': [32, 128, 14, 14],
'inputStride': [25088, 196, 14, 1],
'filterDims': [32, 128, 3, 3],
'outputDims': [32, 32, 14, 14],
'outputStride': [6272, 196, 14, 1],
'arrayLength': 2,
'padA': [1, 1],
'filterStrideA': [1, 1],
'dilationA': [1, 1],
'mode': 1,
'tensorOp': False,
'inputType': 0,
'convType': 0,
'algo': 6
}, {
'name': 'cudnnConvolutionForward',
'inputDims': [32, 128, 14, 14],
'inputStride': [25088, 196, 14, 1],
'filterDims': [32, 128, 3, 3],
'outputDims': [32, 32, 14, 14],
'outputStride': [6272, 196, 14, 1],
'arrayLength': 2,
'padA': [1, 1],
'filterStrideA': [1, 1],
'dilationA': [1, 1],
'mode': 1,
'tensorOp': True,
'inputType': 2,
'convType': 0,
'algo': 1
}, {
'name': 'cudnnConvolutionForward',
'inputDims': [32, 256, 14, 14],
'inputStride': [50176, 196, 14, 1],
'filterDims': [1024, 256, 1, 1],
'outputDims': [32, 1024, 14, 14],
'outputStride': [200704, 196, 14, 1],
'arrayLength': 2,
'padA': [0, 0],
'filterStrideA': [1, 1],
'dilationA': [1, 1],
'mode': 1,
'tensorOp': False,
'inputType': 0,
'convType': 0,
'algo': 1
}, {
'name': 'cudnnConvolutionForward',
'inputDims': [32, 256, 14, 14],
'inputStride': [50176, 196, 14, 1],
'filterDims': [1024, 256, 1, 1],
'outputDims': [32, 1024, 14, 14],
'outputStride': [200704, 196, 14, 1],
'arrayLength': 2,
'padA': [0, 0],
'filterStrideA': [1, 1],
'dilationA': [1, 1],
'mode': 1,
'tensorOp': True,
'inputType': 2,
'convType': 0,
'algo': 1
}, {
'name': 'cudnnConvolutionForward',
'inputDims': [32, 512, 14, 14],
'inputStride': [100352, 196, 14, 1],
'filterDims': [512, 512, 3, 3],
'outputDims': [32, 512, 14, 14],
'outputStride': [100352, 196, 14, 1],
'arrayLength': 2,
'padA': [1, 1],
'filterStrideA': [1, 1],
'dilationA': [1, 1],
'mode': 1,
'tensorOp': False,
'inputType': 0,
'convType': 0,
'algo': 1
}, {
'name': 'cudnnConvolutionForward',
'inputDims': [32, 512, 14, 14],
'inputStride': [100352, 196, 14, 1],
'filterDims': [512, 512, 3, 3],
'outputDims': [32, 512, 14, 14],
'outputStride': [100352, 196, 14, 1],
'arrayLength': 2,
'padA': [1, 1],
'filterStrideA': [1, 1],
'dilationA': [1, 1],
'mode': 1,
'tensorOp': True,
'inputType': 2,
'convType': 0,
'algo': 1
}
]
self._bin_name = 'CudnnBenchmark'
def add_parser_arguments(self):
"""Add the specified arguments."""
super().add_parser_arguments()
self._parser.add_argument(
'--num_warmup',
type=int,
default=8,
required=False,
help='The number of warmup step.',
)
self._parser.add_argument(
'--num_steps',
type=int,
default=100,
required=False,
help='The number of test step.',
)
self._parser.add_argument(
'--num_in_step',
type=int,
default=1000,
required=False,
help='The number of functions in one step.',
)
self._parser.add_argument(
'--random_seed',
type=int,
default=33931,
required=False,
help='The random seed to fill in the data of the function.',
)
self._parser.add_argument(
'--config_json_str',
type=str,
default=None,
required=False,
help='The custom json string defining the params in a cudnn function.',
)
def _preprocess(self):
"""Preprocess/preparation operations before the benchmarking.
Return:
True if _preprocess() succeed.
"""
if not super()._preprocess():
return False
command = os.path.join(self._args.bin_dir, self._bin_name)
command += (' --num_test ' + str(self._args.num_steps))
command += (' --warm_up ' + str(self._args.num_warmup))
command += (' --num_in_step ' + str(self._args.num_in_step))
command += (' --random_seed ' + str(self._args.random_seed))
try:
if not self._args.config_json_str:
for config_dict in self.__default_params_dict_list:
config_json_str = "\'" + json.dumps(config_dict).replace(' ', '') + "\'"
complete_command = command + (' --config_json ') + config_json_str
self._commands.append(complete_command)
else:
custom_config_str = yaml.safe_load(self._args.config_json_str)
config_json_str = "\'" + json.dumps(custom_config_str).replace(' ', '') + "\'"
complete_command = command + (' --config_json ') + config_json_str
self._commands.append(complete_command)
except BaseException as e:
logger.error('Invalid input params - benchmark: {}, message: {}'.format(self._name, str(e)))
self._result.set_return_code(ReturnCode.INVALID_ARGUMENT)
return False
return True
def _process_raw_result(self, cmd_idx, raw_output):
"""Function to process raw results and save the summarized results.
self._result.add_raw_data() and self._result.add_result() need to be called to save the results.
Args:
cmd_idx (int): the index of command corresponding with the raw_output.
raw_output (str): raw output string of the micro-benchmark.
Return:
True if the raw output string is valid and result can be extracted.
"""
self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output)
try:
lines = raw_output.splitlines()
metric = ''
error = False
raw_data = []
for line in lines:
if '[function config]' in line:
metric = line[line.index('[function config]: ') + len('[function config]: '):]
if '[raw_data]' in line:
raw_data = line[line.index('[raw_data]: ') + len('[raw_data]: '):]
raw_data = raw_data.split(',')
raw_data.pop()
raw_data = [float(item) for item in raw_data]
self._result.add_result(metric, sum(raw_data) / len(raw_data))
self._result.add_raw_data(metric, raw_data)
if 'Error' in line:
error = True
except BaseException as e:
logger.error(
'Cannot extract results from cudnn functions - round: {}, index of cmd: {}, \
benchmark: {}, raw data: {}, message: {}'.format(
self._curr_run_index, cmd_idx, self._name, raw_output, str(e)
)
)
return False
if error:
logger.error(
'Error in running cudnn test - round: {}, index of cmd: {}, benchmark: {}, raw data: {}'.format(
self._curr_run_index, cmd_idx, self._name, raw_output
)
)
return False
return True
BenchmarkRegistry.register_benchmark('cudnn-function', CudnnBenchmark, platform=Platform.CUDA)

Просмотреть файл

@ -0,0 +1,84 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
"""Tests for cudnn-functions benchmark."""
import numbers
from tests.helper import decorator
from superbench.benchmarks import BenchmarkRegistry, BenchmarkType, ReturnCode, Platform
@decorator.cuda_test
def test_cudnn_functions():
"""Test cudnn-function benchmark."""
# Test for default configuration
context = BenchmarkRegistry.create_benchmark_context(
'cudnn-function', platform=Platform.CUDA, parameters='--num_warmup 10 --num_steps 10 --num_in_step 100'
)
assert (BenchmarkRegistry.is_benchmark_context_valid(context))
benchmark = BenchmarkRegistry.launch_benchmark(context)
# Check basic information.
assert (benchmark)
assert (benchmark.name == 'cudnn-function')
assert (benchmark.type == BenchmarkType.MICRO)
# Check parameters specified in BenchmarkContext.
assert (benchmark._args.num_warmup == 10)
assert (benchmark._args.num_steps == 10)
assert (benchmark._args.num_in_step == 100)
# Check results and metrics.
assert (benchmark.run_count == 1)
assert (benchmark.return_code == ReturnCode.SUCCESS)
assert ('raw_output_0' in benchmark.raw_data)
assert (len(benchmark.raw_data['raw_output_0']) == 1)
assert (isinstance(benchmark.raw_data['raw_output_0'][0], str))
assert (18 <= len(benchmark.result))
for metric in list(benchmark.result.keys()):
assert (len(benchmark.result[metric]) == 1)
assert (isinstance(benchmark.result[metric][0], numbers.Number))
assert (len(benchmark.raw_data[metric][0]) == benchmark._args.num_steps)
# Test for custom configuration
custom_config_str = '{"algo":0,"arrayLength":2,"convType":0,"dilationA":[1,1],"filterStrideA":[1,1],' \
+ '"filterDims":[32,128,3,3],"inputDims":[32,128,14,14],"inputStride":[25088,196,14,1],"inputType":0,'\
+ '"mode":1,"name":"cudnnConvolutionBackwardFilter","outputDims":[32,32,14,14],'\
+ '"outputStride":[6272,196,14,1],"padA":[1,1],"tensorOp":false}'
context = BenchmarkRegistry.create_benchmark_context(
'cudnn-function',
platform=Platform.CUDA,
parameters='--num_warmup 10 --num_steps 10 --num_in_step 100 --config_json_str ' + custom_config_str
)
assert (BenchmarkRegistry.is_benchmark_context_valid(context))
benchmark = BenchmarkRegistry.launch_benchmark(context)
# Check basic information.
assert (benchmark)
assert (benchmark.name == 'cudnn-function')
assert (benchmark.type == BenchmarkType.MICRO)
# Check parameters specified in BenchmarkContext.
assert (benchmark._args.num_warmup == 10)
assert (benchmark._args.num_steps == 10)
assert (benchmark._args.num_in_step == 100)
# Check results and metrics.
assert (benchmark.run_count == 1)
assert (benchmark.return_code == ReturnCode.SUCCESS)
assert ('raw_output_0' in benchmark.raw_data)
assert (len(benchmark.raw_data['raw_output_0']) == 1)
assert (isinstance(benchmark.raw_data['raw_output_0'][0], str))
assert (1 == len(benchmark.result))
for metric in list(benchmark.result.keys()):
assert (len(benchmark.result[metric]) == 1)
assert (isinstance(benchmark.result[metric][0], numbers.Number))
assert (len(benchmark.raw_data[metric][0]) == benchmark._args.num_steps)