update
This commit is contained in:
Родитель
ca5a5b6f94
Коммит
93eaae32a2
|
@ -66,6 +66,21 @@ class HipBlasLtBenchmark(BlasLtBaseBenchmark):
|
|||
required=False,
|
||||
help='Transpose matrix B.',
|
||||
)
|
||||
self._parser.add_argument(
|
||||
'--algo_method',
|
||||
type=str,
|
||||
default='heuristic',
|
||||
choices=['heuristic', 'all', 'index'],
|
||||
required=False,
|
||||
help='Use different algorithm search API. Options: heuristic, all, index.',
|
||||
)
|
||||
self._parser.add_argument(
|
||||
'--solution_index',
|
||||
type=int,
|
||||
default=None,
|
||||
required=False,
|
||||
help='Used with --algo_method index. Specify solution index to use in benchmark. '
|
||||
)
|
||||
|
||||
def _preprocess(self):
|
||||
"""Preprocess/preparation operations before the benchmarking.
|
||||
|
@ -85,7 +100,14 @@ class HipBlasLtBenchmark(BlasLtBaseBenchmark):
|
|||
f' -i {self._args.num_steps} {self._in_type_map[_in_type]}' + \
|
||||
f' --transA {self._args.transA} --transB {self._args.transB}' + \
|
||||
f' --initialization {self._args.initialization}'
|
||||
command = command + f' -b {str(_b)}' if _b > 0 else command
|
||||
command = command + f' --batch_count {str(_b)}' if _b > 0 else command
|
||||
if self._args.algo_method != 'heuristic':
|
||||
command += f' --algo_method {self._args.algo_method}'
|
||||
if self._args.algo_method == 'index':
|
||||
if not self._args.solution_index:
|
||||
logger.error('Solution index must be specified when algo_method is "index".')
|
||||
return False
|
||||
command += f' --solution_index {self._args.solution_index}'
|
||||
logger.info(command)
|
||||
self._commands.append(command)
|
||||
self._precision_in_commands.append(_in_type)
|
||||
|
@ -109,27 +131,24 @@ class HipBlasLtBenchmark(BlasLtBaseBenchmark):
|
|||
try:
|
||||
lines = raw_output.splitlines()
|
||||
index = None
|
||||
tflops = -1
|
||||
metric = None
|
||||
|
||||
# Find the line containing 'hipblaslt-Gflops'
|
||||
for i, line in enumerate(lines):
|
||||
if 'hipblaslt-Gflops' in line:
|
||||
index = i
|
||||
break
|
||||
|
||||
if index is None:
|
||||
raise ValueError('Line with "hipblaslt-Gflops" not found in the log.')
|
||||
|
||||
# Split the line into fields using a comma as the delimiter
|
||||
fields = lines[index + 1].strip().split(',')
|
||||
|
||||
# Check the number of fields and the format of the first two fields
|
||||
if len(fields) != 23:
|
||||
if len(fields) < 23:
|
||||
raise ValueError('Invalid result')
|
||||
metric = f'{self._precision_in_commands[cmd_idx]}_{fields[3]}_{"_".join(fields[4:7])}'
|
||||
tflops = max(tflops, float(fields[21])/1000)
|
||||
if index is None:
|
||||
raise ValueError('Line with "hipblaslt-Gflops" not found in the log.')
|
||||
self._result.add_result(f'{metric}_tflops', tflops)
|
||||
|
||||
self._result.add_result(
|
||||
f'{self._precision_in_commands[cmd_idx]}_{fields[3]}_{"_".join(fields[4:7])}_flops',
|
||||
float(fields[-2]) / 1000
|
||||
)
|
||||
except BaseException as e:
|
||||
self._result.set_return_code(ReturnCode.MICROBENCHMARK_RESULT_PARSING_FAILURE)
|
||||
logger.error(
|
||||
|
|
|
@ -125,7 +125,7 @@ class RocmComposableKernelBenchmark(BlasLtBaseBenchmark):
|
|||
command = f'{self.__bin_path} gemm {params} {self._args.num_warmup} {self._args.num_steps}'
|
||||
self._commands.append(command)
|
||||
logger.info(command)
|
||||
if self._args.splitk:
|
||||
if self._args.splitk and _in_type not in ['fp8']:
|
||||
if not isinstance(self._args.splitk, list):
|
||||
self._args.splitk = [self._args.splitk]
|
||||
for splitk in self._args.splitk:
|
||||
|
@ -133,7 +133,7 @@ class RocmComposableKernelBenchmark(BlasLtBaseBenchmark):
|
|||
f' {self._args.num_warmup} {self._args.num_steps}'
|
||||
self._commands.append(command)
|
||||
logger.info(command)
|
||||
if self._args.streamk:
|
||||
if self._args.streamk and _in_type not in ['fp8']:
|
||||
if not isinstance(self._args.streamk, list):
|
||||
self._args.streamk = [self._args.streamk]
|
||||
for streamk in self._args.streamk:
|
||||
|
@ -203,8 +203,8 @@ class RocmComposableKernelBenchmark(BlasLtBaseBenchmark):
|
|||
return False
|
||||
finally:
|
||||
if cmd_idx == len(self._commands) - 1:
|
||||
for metric in self.results:
|
||||
self.results[metric] = [max(self.results[metric])]
|
||||
for metric in self._result.result:
|
||||
self._result.result[metric] = [max(self._result.result[metric])]
|
||||
return True
|
||||
|
||||
|
||||
|
|
|
@ -150,6 +150,39 @@ superbench:
|
|||
<<: *default_pytorch_mode
|
||||
computation-communication-overlap:
|
||||
<<: *default_pytorch_mode
|
||||
composable-kernel-gemm:
|
||||
<<: *default_local_mode
|
||||
parameters:
|
||||
in_types:
|
||||
- fp16
|
||||
- bf16
|
||||
- fp32
|
||||
- fp8
|
||||
shapes:
|
||||
- 8192,8192,8192
|
||||
- 4096,4096,4096
|
||||
splitk: [2, 4]
|
||||
streamk: -1
|
||||
tolerant_fail: yes
|
||||
num_warmup: 10
|
||||
num_steps: 100
|
||||
hipblaslt-gemm:
|
||||
modes:
|
||||
- name: local
|
||||
proc_num: 8
|
||||
prefix: CUDA_VISIBLE_DEVICES={proc_rank}
|
||||
parallel: yes
|
||||
env:
|
||||
HIP_FORCE_DEV_KERNARG: '1'
|
||||
parameters:
|
||||
algo_method: all
|
||||
shapes:
|
||||
- 8192,8192,8192
|
||||
- 4096,4096,4096
|
||||
in_types: ["fp32", "fp16", "bf16", 'fp8']
|
||||
tolerant_fail: yes
|
||||
num_warmup: 10
|
||||
num_steps: 100
|
||||
ib-traffic:
|
||||
enable: false
|
||||
modes:
|
||||
|
|
|
@ -114,6 +114,7 @@ class SuperBenchExecutor():
|
|||
elif isinstance(val, (str, int, float)):
|
||||
argv.append('--{} {}'.format(name, val))
|
||||
elif isinstance(val, (list, ListConfig)):
|
||||
val = [str(v) for v in val]
|
||||
argv.append('--{} {}'.format(name, ' '.join(val)))
|
||||
return ' '.join(argv)
|
||||
|
||||
|
|
|
@ -128,8 +128,9 @@ rocm_composable_kernel: sb_micro_path
|
|||
if [ -d composable_kernel ]; then rm -rf composable_kernel; fi; \
|
||||
git clone -b ${COMPOSABLEKERNEL_BRANCH} https://github.com/ROCm/composable_kernel; \
|
||||
cd composable_kernel && mkdir build && cd build; \
|
||||
cmake -D CMAKE_PREFIX_PATH=/opt/rocm -D CMAKE_CXX_COMPILER=/opt/rocm/bin/hipcc -D CMAKE_BUILD_TYPE=Release -D DTYPES="fp64;fp32;fp16;fp8;bf16;int8" ..; \
|
||||
make -j ckProfiler install; \
|
||||
cmake -D CMAKE_PREFIX_PATH=/opt/rocm -D CMAKE_CXX_COMPILER=/opt/rocm/bin/hipcc -D CMAKE_BUILD_TYPE=Release -D DTYPES="fp64;fp32;fp16;fp8;bf16;int8" -D GPU_TARGETS="gfx941;gfx942;gfx90a;gfx908" ..; \
|
||||
make -j ckProfiler; \
|
||||
cp -v ./bin/ckProfiler $(SB_MICRO_PATH)/bin/; \
|
||||
fi
|
||||
|
||||
# Build hipBusBandwidth.
|
||||
|
|
Загрузка…
Ссылка в новой задаче