Benchmarks: Micro benchmarks - add support for NVIDIA L4/L40/L40s GPUs in gemm-flops (#634)
**Description** Add support GPU ARCH 8.9 for NVIDIA L4/L40/L40s GPUs in gemm-flops.
This commit is contained in:
Родитель
4e27142a59
Коммит
e304cf1572
|
@ -33,6 +33,6 @@ if(NOT DEFINED NVCC_ARCHS_SUPPORTED)
|
|||
list(APPEND NVCC_ARCHS_SUPPORTED 86)
|
||||
endif()
|
||||
if (NOT CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.8)
|
||||
list(APPEND NVCC_ARCHS_SUPPORTED 90)
|
||||
list(APPEND NVCC_ARCHS_SUPPORTED 89 90)
|
||||
endif()
|
||||
endif()
|
||||
|
|
|
@ -47,6 +47,8 @@ class CudaGemmFlopsBenchmark(GemmFlopsBenchmark):
|
|||
# Skip FP64 for RTX Turing/Ampere and Tesla T4/GA10x due to very limited FP64 TFLOP rate
|
||||
self.__kernel_map[7.5] = {k: self.__kernel_map[7.0][k] for k in self.__kernel_map[7.0] if 'fp64' not in k}
|
||||
self.__kernel_map[8.6] = {k: self.__kernel_map[8.0][k] for k in self.__kernel_map[8.0] if 'fp64' not in k}
|
||||
# Skip FP64 for Ada Lovelace L4/L40 due to no native CUDA/Tensor Cores
|
||||
self.__kernel_map[8.9] = {k: self.__kernel_map[8.0][k] for k in self.__kernel_map[8.0] if 'fp64' not in k}
|
||||
# Skip INT4 for Hopper due to no native CUDA/Tensor Cores
|
||||
self.__kernel_map[9.0] = {k: self.__kernel_map[8.0][k] for k in self.__kernel_map[8.0] if 'int4_tc' not in k}
|
||||
self.__parse_logline = [
|
||||
|
|
|
@ -33,7 +33,7 @@ sb_micro_path:
|
|||
# Build cutlass.
|
||||
cuda_cutlass:
|
||||
ifeq ($(shell echo $(CUDA_VER)">=11.8" | bc -l), 1)
|
||||
$(eval ARCHS := "70;75;80;86;90")
|
||||
$(eval ARCHS := "70;75;80;86;89;90")
|
||||
else
|
||||
$(eval ARCHS := "70;75;80;86")
|
||||
endif
|
||||
|
|
Загрузка…
Ссылка в новой задаче