Benchmarks: Add benchmark - add micro benchmark for cublas test (#80)

* add benchmark for cublas test

* format

* revise error handling and test

* add interface to read json file, revise json file path and include .json in packaging

* add random_seed in arguments

* revise preprocess of cublas benchmark

* fix lint error and note error in source code

* update according comments

* revise input arguments from json file to custom str and convert json file to built-in dict list

* restore package config

* fit lint issue

* update platform and comments

* rename files to match source code dir and fix comments error

Co-authored-by: root <root@sb-validation-000001.51z1chmys5fuzfqyo4niepozre.bx.internal.cloudapp.net>
This commit is contained in:
Yuting Jiang 2021-05-31 10:31:53 +08:00 коммит произвёл GitHub
Родитель 8b4f613a76
Коммит 18398fbaa2
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
6 изменённых файлов: 417 добавлений и 3 удалений

Просмотреть файл

@ -0,0 +1,25 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
"""Micro benchmark example for cublas performance benchmark.
Commands to run:
python3 examples/benchmarks/cublas_function.py
"""
from superbench.benchmarks import BenchmarkRegistry, Platform
from superbench.common.utils import logger
if __name__ == '__main__':
parameters = '--num_warmup 8 --num_steps 100 --num_in_step 1000'
context = BenchmarkRegistry.create_benchmark_context(
'cublas-function', platform=Platform.CUDA, parameters=parameters
)
benchmark = BenchmarkRegistry.launch_benchmark(context)
if benchmark:
logger.info(
'benchmark: {}, return code: {}, result: {}'.format(
benchmark.name, benchmark.return_code, benchmark.result
)
)

Просмотреть файл

@ -7,7 +7,9 @@ from superbench.benchmarks.micro_benchmarks.micro_base import MicroBenchmark, Mi
from superbench.benchmarks.micro_benchmarks.sharding_matmul import ShardingMatmul
from superbench.benchmarks.micro_benchmarks.computation_communication_overlap import ComputationCommunicationOverlap
from superbench.benchmarks.micro_benchmarks.kernel_launch_overhead import KernelLaunch
from superbench.benchmarks.micro_benchmarks.cublas_function import CublasBenchmark
__all__ = [
'MicroBenchmark', 'MicroBenchmarkWithInvoke', 'ShardingMatmul', 'ComputationCommunicationOverlap', 'KernelLaunch'
'MicroBenchmark', 'MicroBenchmarkWithInvoke', 'ShardingMatmul', 'ComputationCommunicationOverlap', 'KernelLaunch',
'CublasBenchmark'
]

Просмотреть файл

@ -0,0 +1,307 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
"""Module of the cublas functions benchmarks."""
import os
import json
import yaml
from superbench.common.utils import logger
from superbench.benchmarks import Platform, BenchmarkRegistry
from superbench.benchmarks.micro_benchmarks import MicroBenchmarkWithInvoke
class CublasBenchmark(MicroBenchmarkWithInvoke):
"""The Cublas performance benchmark class."""
def __init__(self, name, parameters=''):
"""Constructor.
Args:
name (str): benchmark name.
parameters (str): benchmark parameters.
"""
super().__init__(name, parameters)
self.__default_params_dict_list = [
{
'name': 'cublasCgemm',
'm': 512,
'n': 512,
'k': 32,
'transa': 1,
'transb': 0
}, {
'name': 'cublasCgemm',
'm': 2048,
'n': 512,
'k': 32,
'transa': 1,
'transb': 0
}, {
'name': 'cublasCgemm',
'm': 512,
'n': 2048,
'k': 32,
'transa': 1,
'transb': 0
}, {
'name': 'cublasCgemm',
'm': 640,
'n': 1280,
'k': 32,
'transa': 1,
'transb': 0
}, {
'name': 'cublasCgemm',
'm': 896,
'n': 1792,
'k': 32,
'transa': 1,
'transb': 0
}, {
'name': 'cublasCgemm3mStridedBatched',
'm': 64,
'n': 32,
'k': 3,
'transa': 0,
'transb': 1,
'batchCount': 544
}, {
'name': 'cublasCgemm3mStridedBatched',
'm': 64,
'n': 32,
'k': 64,
'transa': 1,
'transb': 0,
'batchCount': 544
}, {
'name': 'cublasCgemm3mStridedBatched',
'm': 128,
'n': 32,
'k': 128,
'transa': 0,
'transb': 1,
'batchCount': 544
}, {
'name': 'cublasCgemm3mStridedBatched',
'm': 128,
'n': 32,
'k': 64,
'transa': 0,
'transb': 1,
'batchCount': 544
}, {
'name': 'cublasCgemm3mStridedBatched',
'm': 64,
'n': 32,
'k': 128,
'transa': 0,
'transb': 1,
'batchCount': 544
}, {
'name': 'cublasGemmStridedBatchedEx',
'm': 224,
'n': 224,
'k': 64,
'transa': 0,
'transb': 0,
'datatype': 'half',
'use_tensor_core': True,
'batchCount': 160
}, {
'name': 'cublasGemmStridedBatchedEx',
'm': 64,
'n': 224,
'k': 224,
'transa': 0,
'transb': 0,
'datatype': 'half',
'use_tensor_core': True,
'batchCount': 160
}, {
'name': 'cublasGemmEx',
'm': 4000,
'n': 224,
'k': 1000,
'transa': 0,
'transb': 0,
'datatype': 'float',
'use_tensor_core': False
}, {
'name': 'cublasGemmEx',
'm': 4000,
'n': 224,
'k': 1000,
'transa': 1,
'transb': 0,
'datatype': 'half',
'use_tensor_core': True
}, {
'name': 'cublasGemmEx',
'm': 1000,
'n': 224,
'k': 4000,
'transa': 0,
'transb': 0,
'datatype': 'half',
'use_tensor_core': False
}, {
'name': 'cublasGemmEx',
'm': 1000,
'n': 224,
'k': 4000,
'transa': 0,
'transb': 0,
'datatype': 'float',
'use_tensor_core': False
}, {
'name': 'cublasSgemm',
'm': 1024,
'n': 7168,
'k': 1024,
'transa': 1,
'transb': 0
}, {
'name': 'cublasSgemmStridedBatched',
'm': 64,
'n': 224,
'k': 224,
'transa': 0,
'transb': 0,
'batchCount': 512
}, {
'name': 'cublasSgemmStridedBatched',
'm': 64,
'n': 224,
'k': 224,
'transa': 0,
'transb': 0,
'batchCount': 160
}
]
self._bin_name = 'CublasBenchmark'
def add_parser_arguments(self):
"""Add the specified arguments."""
super().add_parser_arguments()
self._parser.add_argument(
'--num_warmup',
type=int,
default=8,
required=False,
help='The number of warmup step.',
)
self._parser.add_argument(
'--num_steps',
type=int,
default=100,
required=False,
help='The number of test step.',
)
self._parser.add_argument(
'--num_in_step',
type=int,
default=1000,
required=False,
help='The number of functions in one step.',
)
self._parser.add_argument(
'--random_seed',
type=int,
default=33931,
required=False,
help='The random seed to fill in the data of the function.',
)
self._parser.add_argument(
'--config_json_str',
type=str,
default=None,
required=False,
help='The custom json string defining the params in a cublas function.',
)
def _preprocess(self):
"""Preprocess/preparation operations before the benchmarking.
Return:
True if _preprocess() succeed.
"""
if not super()._preprocess():
return False
command = os.path.join(self._args.bin_dir, self._bin_name)
command += (' --num_test ' + str(self._args.num_steps))
command += (' --warm_up ' + str(self._args.num_warmup))
command += (' --num_in_step ' + str(self._args.num_in_step))
command += (' --random_seed ' + str(self._args.random_seed))
try:
if not self._args.config_json_str:
for config_dict in self.__default_params_dict_list:
config_json_str = "\'" + json.dumps(config_dict).replace(' ', '') + "\'"
print(config_json_str)
complete_command = command + (' --config_json ') + config_json_str
self._commands.append(complete_command)
else:
custom_config_str = yaml.safe_load(self._args.config_json_str)
config_json_str = "\'" + json.dumps(custom_config_str).replace(' ', '') + "\'"
complete_command = command + (' --config_json ') + config_json_str
self._commands.append(complete_command)
except BaseException as e:
logger.error('Invalid input params - benchmark: {}, message: {}'.format(self._name, str(e)))
return False
return True
def _process_raw_result(self, cmd_idx, raw_output):
"""Function to process raw results and save the summarized results.
self._result.add_raw_data() and self._result.add_result() need to be called to save the results.
Args:
cmd_idx (int): the index of command corresponding with the raw_output.
raw_output (str): raw output string of the micro-benchmark.
Return:
True if the raw output string is valid and result can be extracted.
"""
self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output)
try:
lines = raw_output.splitlines()
metric = ''
error = False
raw_data = []
for line in lines:
if '[function config]' in line:
metric = line[line.index('[function config]: ') + len('[function config]: '):]
if '[raw_data]' in line:
raw_data = line[line.index('[raw_data]: ') + len('[raw_data]: '):]
raw_data = raw_data.split(',')
raw_data.pop()
raw_data = [float(item) for item in raw_data]
self._result.add_result(metric, sum(raw_data) / len(raw_data))
self._result.add_raw_data(metric, raw_data)
if 'Error' in line:
error = True
except BaseException as e:
logger.error(
'Cannot extract results from cublas functions - round: {}, index of cmd: {}, \
benchmark: {}, raw data: {}, message: {}'.format(
self._curr_run_index, cmd_idx, self._name, raw_output, str(e)
)
)
return False
if error:
logger.error(
'Error in running cublas test - round: {}, index of cmd: {}, benchmark: {}, raw data: {}'.format(
self._curr_run_index, cmd_idx, self._name, raw_output
)
)
return False
return True
BenchmarkRegistry.register_benchmark('cublas-function', CublasBenchmark, platform=Platform.CUDA)

Просмотреть файл

@ -199,7 +199,6 @@ void run_benchmark(Options &options) {
function.set_random_seed(options.random_seed);
CublasFunction *p_function = get_cublas_function_pointer(function);
p_function->benchmark();
std::cout << "~delete" << std::endl;
delete p_function;
} catch (std::exception &e) {
std::cout << "Error: " << e.what() << std::endl;

Просмотреть файл

@ -17,7 +17,8 @@
* num_test: test step nums
* warm_up: warm up step nums
* num_in_step: times each step will invoke the function
* config path: the path of 'para_info.json'
* random_seed: the random seed to generate data
* config_json: the json string including the params of the function
* functions supported:
* cublasSgemm
* cublasGemmEx

Просмотреть файл

@ -0,0 +1,80 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
"""Tests for cublas-functions benchmark."""
import numbers
from tests.helper import decorator
from superbench.benchmarks import BenchmarkRegistry, BenchmarkType, ReturnCode, Platform
@decorator.cuda_test
def test_cublas_functions():
"""Test cublas-function benchmark."""
# Test for default configuration
context = BenchmarkRegistry.create_benchmark_context(
'cublas-function', platform=Platform.CUDA, parameters='--num_warmup 10 --num_steps 10 --num_in_step 100'
)
assert (BenchmarkRegistry.is_benchmark_context_valid(context))
benchmark = BenchmarkRegistry.launch_benchmark(context)
# Check basic information.
assert (benchmark)
assert (benchmark.name == 'cublas-function')
assert (benchmark.type == BenchmarkType.MICRO)
# Check parameters specified in BenchmarkContext.
assert (benchmark._args.num_warmup == 10)
assert (benchmark._args.num_steps == 10)
assert (benchmark._args.num_in_step == 100)
# Check results and metrics.
assert (benchmark.run_count == 1)
assert (benchmark.return_code == ReturnCode.SUCCESS)
assert ('raw_output_0' in benchmark.raw_data)
assert (len(benchmark.raw_data['raw_output_0']) == 1)
assert (isinstance(benchmark.raw_data['raw_output_0'][0], str))
assert (19 <= len(benchmark.result))
for metric in list(benchmark.result.keys()):
assert (len(benchmark.result[metric]) == 1)
assert (isinstance(benchmark.result[metric][0], numbers.Number))
assert (len(benchmark.raw_data[metric][0]) == benchmark._args.num_steps)
# Test for custom configuration
custom_config_str = '{"name":"cublasCgemm","m":512,"n":512,"k":32,"transa":1,"transb":0}'
context = BenchmarkRegistry.create_benchmark_context(
'cublas-function',
platform=Platform.CUDA,
parameters='--num_warmup 10 --num_steps 10 --num_in_step 100 --config_json_str ' + custom_config_str
)
assert (BenchmarkRegistry.is_benchmark_context_valid(context))
benchmark = BenchmarkRegistry.launch_benchmark(context)
# Check basic information.
assert (benchmark)
assert (benchmark.name == 'cublas-function')
assert (benchmark.type == BenchmarkType.MICRO)
# Check parameters specified in BenchmarkContext.
assert (benchmark._args.num_warmup == 10)
assert (benchmark._args.num_steps == 10)
assert (benchmark._args.num_in_step == 100)
# Check results and metrics.
assert (benchmark.run_count == 1)
assert (benchmark.return_code == ReturnCode.SUCCESS)
assert ('raw_output_0' in benchmark.raw_data)
assert (len(benchmark.raw_data['raw_output_0']) == 1)
assert (isinstance(benchmark.raw_data['raw_output_0'][0], str))
assert (1 == len(benchmark.result))
for metric in list(benchmark.result.keys()):
assert (len(benchmark.result[metric]) == 1)
assert (isinstance(benchmark.result[metric][0], numbers.Number))
assert (len(benchmark.raw_data[metric][0]) == benchmark._args.num_steps)