Benchmarks: Add benchmark - add micro benchmark for cublas test (#80)
* add benchmark for cublas test * format * revise error handling and test * add interface to read json file, revise json file path and include .json in packaging * add random_seed in arguments * revise preprocess of cublas benchmark * fix lint error and note error in source code * update according comments * revise input arguments from json file to custom str and convert json file to built-in dict list * restore package config * fit lint issue * update platform and comments * rename files to match source code dir and fix comments error Co-authored-by: root <root@sb-validation-000001.51z1chmys5fuzfqyo4niepozre.bx.internal.cloudapp.net>
This commit is contained in:
Родитель
8b4f613a76
Коммит
18398fbaa2
|
@ -0,0 +1,25 @@
|
|||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT license.
|
||||
|
||||
"""Micro benchmark example for cublas performance benchmark.
|
||||
|
||||
Commands to run:
|
||||
python3 examples/benchmarks/cublas_function.py
|
||||
"""
|
||||
|
||||
from superbench.benchmarks import BenchmarkRegistry, Platform
|
||||
from superbench.common.utils import logger
|
||||
|
||||
if __name__ == '__main__':
|
||||
parameters = '--num_warmup 8 --num_steps 100 --num_in_step 1000'
|
||||
context = BenchmarkRegistry.create_benchmark_context(
|
||||
'cublas-function', platform=Platform.CUDA, parameters=parameters
|
||||
)
|
||||
|
||||
benchmark = BenchmarkRegistry.launch_benchmark(context)
|
||||
if benchmark:
|
||||
logger.info(
|
||||
'benchmark: {}, return code: {}, result: {}'.format(
|
||||
benchmark.name, benchmark.return_code, benchmark.result
|
||||
)
|
||||
)
|
|
@ -7,7 +7,9 @@ from superbench.benchmarks.micro_benchmarks.micro_base import MicroBenchmark, Mi
|
|||
from superbench.benchmarks.micro_benchmarks.sharding_matmul import ShardingMatmul
|
||||
from superbench.benchmarks.micro_benchmarks.computation_communication_overlap import ComputationCommunicationOverlap
|
||||
from superbench.benchmarks.micro_benchmarks.kernel_launch_overhead import KernelLaunch
|
||||
from superbench.benchmarks.micro_benchmarks.cublas_function import CublasBenchmark
|
||||
|
||||
__all__ = [
|
||||
'MicroBenchmark', 'MicroBenchmarkWithInvoke', 'ShardingMatmul', 'ComputationCommunicationOverlap', 'KernelLaunch'
|
||||
'MicroBenchmark', 'MicroBenchmarkWithInvoke', 'ShardingMatmul', 'ComputationCommunicationOverlap', 'KernelLaunch',
|
||||
'CublasBenchmark'
|
||||
]
|
||||
|
|
|
@ -0,0 +1,307 @@
|
|||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT license.
|
||||
|
||||
"""Module of the cublas functions benchmarks."""
|
||||
|
||||
import os
|
||||
import json
|
||||
import yaml
|
||||
|
||||
from superbench.common.utils import logger
|
||||
from superbench.benchmarks import Platform, BenchmarkRegistry
|
||||
from superbench.benchmarks.micro_benchmarks import MicroBenchmarkWithInvoke
|
||||
|
||||
|
||||
class CublasBenchmark(MicroBenchmarkWithInvoke):
|
||||
"""The Cublas performance benchmark class."""
|
||||
def __init__(self, name, parameters=''):
|
||||
"""Constructor.
|
||||
|
||||
Args:
|
||||
name (str): benchmark name.
|
||||
parameters (str): benchmark parameters.
|
||||
"""
|
||||
super().__init__(name, parameters)
|
||||
|
||||
self.__default_params_dict_list = [
|
||||
{
|
||||
'name': 'cublasCgemm',
|
||||
'm': 512,
|
||||
'n': 512,
|
||||
'k': 32,
|
||||
'transa': 1,
|
||||
'transb': 0
|
||||
}, {
|
||||
'name': 'cublasCgemm',
|
||||
'm': 2048,
|
||||
'n': 512,
|
||||
'k': 32,
|
||||
'transa': 1,
|
||||
'transb': 0
|
||||
}, {
|
||||
'name': 'cublasCgemm',
|
||||
'm': 512,
|
||||
'n': 2048,
|
||||
'k': 32,
|
||||
'transa': 1,
|
||||
'transb': 0
|
||||
}, {
|
||||
'name': 'cublasCgemm',
|
||||
'm': 640,
|
||||
'n': 1280,
|
||||
'k': 32,
|
||||
'transa': 1,
|
||||
'transb': 0
|
||||
}, {
|
||||
'name': 'cublasCgemm',
|
||||
'm': 896,
|
||||
'n': 1792,
|
||||
'k': 32,
|
||||
'transa': 1,
|
||||
'transb': 0
|
||||
}, {
|
||||
'name': 'cublasCgemm3mStridedBatched',
|
||||
'm': 64,
|
||||
'n': 32,
|
||||
'k': 3,
|
||||
'transa': 0,
|
||||
'transb': 1,
|
||||
'batchCount': 544
|
||||
}, {
|
||||
'name': 'cublasCgemm3mStridedBatched',
|
||||
'm': 64,
|
||||
'n': 32,
|
||||
'k': 64,
|
||||
'transa': 1,
|
||||
'transb': 0,
|
||||
'batchCount': 544
|
||||
}, {
|
||||
'name': 'cublasCgemm3mStridedBatched',
|
||||
'm': 128,
|
||||
'n': 32,
|
||||
'k': 128,
|
||||
'transa': 0,
|
||||
'transb': 1,
|
||||
'batchCount': 544
|
||||
}, {
|
||||
'name': 'cublasCgemm3mStridedBatched',
|
||||
'm': 128,
|
||||
'n': 32,
|
||||
'k': 64,
|
||||
'transa': 0,
|
||||
'transb': 1,
|
||||
'batchCount': 544
|
||||
}, {
|
||||
'name': 'cublasCgemm3mStridedBatched',
|
||||
'm': 64,
|
||||
'n': 32,
|
||||
'k': 128,
|
||||
'transa': 0,
|
||||
'transb': 1,
|
||||
'batchCount': 544
|
||||
}, {
|
||||
'name': 'cublasGemmStridedBatchedEx',
|
||||
'm': 224,
|
||||
'n': 224,
|
||||
'k': 64,
|
||||
'transa': 0,
|
||||
'transb': 0,
|
||||
'datatype': 'half',
|
||||
'use_tensor_core': True,
|
||||
'batchCount': 160
|
||||
}, {
|
||||
'name': 'cublasGemmStridedBatchedEx',
|
||||
'm': 64,
|
||||
'n': 224,
|
||||
'k': 224,
|
||||
'transa': 0,
|
||||
'transb': 0,
|
||||
'datatype': 'half',
|
||||
'use_tensor_core': True,
|
||||
'batchCount': 160
|
||||
}, {
|
||||
'name': 'cublasGemmEx',
|
||||
'm': 4000,
|
||||
'n': 224,
|
||||
'k': 1000,
|
||||
'transa': 0,
|
||||
'transb': 0,
|
||||
'datatype': 'float',
|
||||
'use_tensor_core': False
|
||||
}, {
|
||||
'name': 'cublasGemmEx',
|
||||
'm': 4000,
|
||||
'n': 224,
|
||||
'k': 1000,
|
||||
'transa': 1,
|
||||
'transb': 0,
|
||||
'datatype': 'half',
|
||||
'use_tensor_core': True
|
||||
}, {
|
||||
'name': 'cublasGemmEx',
|
||||
'm': 1000,
|
||||
'n': 224,
|
||||
'k': 4000,
|
||||
'transa': 0,
|
||||
'transb': 0,
|
||||
'datatype': 'half',
|
||||
'use_tensor_core': False
|
||||
}, {
|
||||
'name': 'cublasGemmEx',
|
||||
'm': 1000,
|
||||
'n': 224,
|
||||
'k': 4000,
|
||||
'transa': 0,
|
||||
'transb': 0,
|
||||
'datatype': 'float',
|
||||
'use_tensor_core': False
|
||||
}, {
|
||||
'name': 'cublasSgemm',
|
||||
'm': 1024,
|
||||
'n': 7168,
|
||||
'k': 1024,
|
||||
'transa': 1,
|
||||
'transb': 0
|
||||
}, {
|
||||
'name': 'cublasSgemmStridedBatched',
|
||||
'm': 64,
|
||||
'n': 224,
|
||||
'k': 224,
|
||||
'transa': 0,
|
||||
'transb': 0,
|
||||
'batchCount': 512
|
||||
}, {
|
||||
'name': 'cublasSgemmStridedBatched',
|
||||
'm': 64,
|
||||
'n': 224,
|
||||
'k': 224,
|
||||
'transa': 0,
|
||||
'transb': 0,
|
||||
'batchCount': 160
|
||||
}
|
||||
]
|
||||
|
||||
self._bin_name = 'CublasBenchmark'
|
||||
|
||||
def add_parser_arguments(self):
|
||||
"""Add the specified arguments."""
|
||||
super().add_parser_arguments()
|
||||
self._parser.add_argument(
|
||||
'--num_warmup',
|
||||
type=int,
|
||||
default=8,
|
||||
required=False,
|
||||
help='The number of warmup step.',
|
||||
)
|
||||
self._parser.add_argument(
|
||||
'--num_steps',
|
||||
type=int,
|
||||
default=100,
|
||||
required=False,
|
||||
help='The number of test step.',
|
||||
)
|
||||
self._parser.add_argument(
|
||||
'--num_in_step',
|
||||
type=int,
|
||||
default=1000,
|
||||
required=False,
|
||||
help='The number of functions in one step.',
|
||||
)
|
||||
self._parser.add_argument(
|
||||
'--random_seed',
|
||||
type=int,
|
||||
default=33931,
|
||||
required=False,
|
||||
help='The random seed to fill in the data of the function.',
|
||||
)
|
||||
self._parser.add_argument(
|
||||
'--config_json_str',
|
||||
type=str,
|
||||
default=None,
|
||||
required=False,
|
||||
help='The custom json string defining the params in a cublas function.',
|
||||
)
|
||||
|
||||
def _preprocess(self):
|
||||
"""Preprocess/preparation operations before the benchmarking.
|
||||
|
||||
Return:
|
||||
True if _preprocess() succeed.
|
||||
"""
|
||||
if not super()._preprocess():
|
||||
return False
|
||||
|
||||
command = os.path.join(self._args.bin_dir, self._bin_name)
|
||||
command += (' --num_test ' + str(self._args.num_steps))
|
||||
command += (' --warm_up ' + str(self._args.num_warmup))
|
||||
command += (' --num_in_step ' + str(self._args.num_in_step))
|
||||
command += (' --random_seed ' + str(self._args.random_seed))
|
||||
|
||||
try:
|
||||
if not self._args.config_json_str:
|
||||
for config_dict in self.__default_params_dict_list:
|
||||
config_json_str = "\'" + json.dumps(config_dict).replace(' ', '') + "\'"
|
||||
print(config_json_str)
|
||||
complete_command = command + (' --config_json ') + config_json_str
|
||||
self._commands.append(complete_command)
|
||||
|
||||
else:
|
||||
custom_config_str = yaml.safe_load(self._args.config_json_str)
|
||||
config_json_str = "\'" + json.dumps(custom_config_str).replace(' ', '') + "\'"
|
||||
complete_command = command + (' --config_json ') + config_json_str
|
||||
self._commands.append(complete_command)
|
||||
except BaseException as e:
|
||||
logger.error('Invalid input params - benchmark: {}, message: {}'.format(self._name, str(e)))
|
||||
return False
|
||||
return True
|
||||
|
||||
def _process_raw_result(self, cmd_idx, raw_output):
|
||||
"""Function to process raw results and save the summarized results.
|
||||
|
||||
self._result.add_raw_data() and self._result.add_result() need to be called to save the results.
|
||||
|
||||
Args:
|
||||
cmd_idx (int): the index of command corresponding with the raw_output.
|
||||
raw_output (str): raw output string of the micro-benchmark.
|
||||
|
||||
Return:
|
||||
True if the raw output string is valid and result can be extracted.
|
||||
"""
|
||||
self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output)
|
||||
|
||||
try:
|
||||
lines = raw_output.splitlines()
|
||||
metric = ''
|
||||
error = False
|
||||
raw_data = []
|
||||
for line in lines:
|
||||
if '[function config]' in line:
|
||||
metric = line[line.index('[function config]: ') + len('[function config]: '):]
|
||||
if '[raw_data]' in line:
|
||||
raw_data = line[line.index('[raw_data]: ') + len('[raw_data]: '):]
|
||||
raw_data = raw_data.split(',')
|
||||
raw_data.pop()
|
||||
raw_data = [float(item) for item in raw_data]
|
||||
self._result.add_result(metric, sum(raw_data) / len(raw_data))
|
||||
self._result.add_raw_data(metric, raw_data)
|
||||
if 'Error' in line:
|
||||
error = True
|
||||
except BaseException as e:
|
||||
logger.error(
|
||||
'Cannot extract results from cublas functions - round: {}, index of cmd: {}, \
|
||||
benchmark: {}, raw data: {}, message: {}'.format(
|
||||
self._curr_run_index, cmd_idx, self._name, raw_output, str(e)
|
||||
)
|
||||
)
|
||||
return False
|
||||
if error:
|
||||
logger.error(
|
||||
'Error in running cublas test - round: {}, index of cmd: {}, benchmark: {}, raw data: {}'.format(
|
||||
self._curr_run_index, cmd_idx, self._name, raw_output
|
||||
)
|
||||
)
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
BenchmarkRegistry.register_benchmark('cublas-function', CublasBenchmark, platform=Platform.CUDA)
|
|
@ -199,7 +199,6 @@ void run_benchmark(Options &options) {
|
|||
function.set_random_seed(options.random_seed);
|
||||
CublasFunction *p_function = get_cublas_function_pointer(function);
|
||||
p_function->benchmark();
|
||||
std::cout << "~delete" << std::endl;
|
||||
delete p_function;
|
||||
} catch (std::exception &e) {
|
||||
std::cout << "Error: " << e.what() << std::endl;
|
||||
|
|
|
@ -17,7 +17,8 @@
|
|||
* num_test: test step nums
|
||||
* warm_up: warm up step nums
|
||||
* num_in_step: times each step will invoke the function
|
||||
* config path: the path of 'para_info.json'
|
||||
* random_seed: the random seed to generate data
|
||||
* config_json: the json string including the params of the function
|
||||
* functions supported:
|
||||
* cublasSgemm
|
||||
* cublasGemmEx
|
||||
|
|
|
@ -0,0 +1,80 @@
|
|||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
"""Tests for cublas-functions benchmark."""
|
||||
|
||||
import numbers
|
||||
|
||||
from tests.helper import decorator
|
||||
from superbench.benchmarks import BenchmarkRegistry, BenchmarkType, ReturnCode, Platform
|
||||
|
||||
|
||||
@decorator.cuda_test
|
||||
def test_cublas_functions():
|
||||
"""Test cublas-function benchmark."""
|
||||
# Test for default configuration
|
||||
context = BenchmarkRegistry.create_benchmark_context(
|
||||
'cublas-function', platform=Platform.CUDA, parameters='--num_warmup 10 --num_steps 10 --num_in_step 100'
|
||||
)
|
||||
|
||||
assert (BenchmarkRegistry.is_benchmark_context_valid(context))
|
||||
|
||||
benchmark = BenchmarkRegistry.launch_benchmark(context)
|
||||
|
||||
# Check basic information.
|
||||
assert (benchmark)
|
||||
assert (benchmark.name == 'cublas-function')
|
||||
assert (benchmark.type == BenchmarkType.MICRO)
|
||||
|
||||
# Check parameters specified in BenchmarkContext.
|
||||
assert (benchmark._args.num_warmup == 10)
|
||||
assert (benchmark._args.num_steps == 10)
|
||||
assert (benchmark._args.num_in_step == 100)
|
||||
|
||||
# Check results and metrics.
|
||||
assert (benchmark.run_count == 1)
|
||||
assert (benchmark.return_code == ReturnCode.SUCCESS)
|
||||
assert ('raw_output_0' in benchmark.raw_data)
|
||||
assert (len(benchmark.raw_data['raw_output_0']) == 1)
|
||||
assert (isinstance(benchmark.raw_data['raw_output_0'][0], str))
|
||||
|
||||
assert (19 <= len(benchmark.result))
|
||||
for metric in list(benchmark.result.keys()):
|
||||
assert (len(benchmark.result[metric]) == 1)
|
||||
assert (isinstance(benchmark.result[metric][0], numbers.Number))
|
||||
assert (len(benchmark.raw_data[metric][0]) == benchmark._args.num_steps)
|
||||
|
||||
# Test for custom configuration
|
||||
custom_config_str = '{"name":"cublasCgemm","m":512,"n":512,"k":32,"transa":1,"transb":0}'
|
||||
context = BenchmarkRegistry.create_benchmark_context(
|
||||
'cublas-function',
|
||||
platform=Platform.CUDA,
|
||||
parameters='--num_warmup 10 --num_steps 10 --num_in_step 100 --config_json_str ' + custom_config_str
|
||||
)
|
||||
|
||||
assert (BenchmarkRegistry.is_benchmark_context_valid(context))
|
||||
|
||||
benchmark = BenchmarkRegistry.launch_benchmark(context)
|
||||
|
||||
# Check basic information.
|
||||
assert (benchmark)
|
||||
assert (benchmark.name == 'cublas-function')
|
||||
assert (benchmark.type == BenchmarkType.MICRO)
|
||||
|
||||
# Check parameters specified in BenchmarkContext.
|
||||
assert (benchmark._args.num_warmup == 10)
|
||||
assert (benchmark._args.num_steps == 10)
|
||||
assert (benchmark._args.num_in_step == 100)
|
||||
|
||||
# Check results and metrics.
|
||||
assert (benchmark.run_count == 1)
|
||||
assert (benchmark.return_code == ReturnCode.SUCCESS)
|
||||
assert ('raw_output_0' in benchmark.raw_data)
|
||||
assert (len(benchmark.raw_data['raw_output_0']) == 1)
|
||||
assert (isinstance(benchmark.raw_data['raw_output_0'][0], str))
|
||||
|
||||
assert (1 == len(benchmark.result))
|
||||
for metric in list(benchmark.result.keys()):
|
||||
assert (len(benchmark.result[metric]) == 1)
|
||||
assert (isinstance(benchmark.result[metric][0], numbers.Number))
|
||||
assert (len(benchmark.raw_data[metric][0]) == benchmark._args.num_steps)
|
Загрузка…
Ссылка в новой задаче