Benchmarks: Add Benchmark - Add the source code of rocm kernel launch overhead benchmark. (#136)
**Description** Add the source code of rocm kernel launch overhead benchmark. **Major Revision** - Revise cmake build logic to support both cuda and rocm
This commit is contained in:
Родитель
fdc33f406c
Коммит
1ee8f7dcf5
|
@ -2,10 +2,39 @@
|
|||
# Licensed under the MIT License.
|
||||
|
||||
cmake_minimum_required(VERSION 3.18)
|
||||
project(kernel_launch_overhead LANGUAGES CUDA CXX)
|
||||
|
||||
project(kernel_launch_overhead LANGUAGES CXX)
|
||||
|
||||
include(../cuda_common.cmake)
|
||||
find_package(CUDAToolkit QUIET)
|
||||
include(../rocm_common.cmake)
|
||||
find_package(HIP QUIET)
|
||||
|
||||
# Cuda environment
|
||||
if(CUDAToolkit_FOUND)
|
||||
message(STATUS "Found CUDA: " ${CUDAToolkit_VERSION})
|
||||
enable_language(CUDA)
|
||||
|
||||
add_executable(kernel_launch_overhead kernel_launch.cu)
|
||||
set_property(TARGET kernel_launch_overhead PROPERTY CUDA_ARCHITECTURES ${NVCC_ARCHS_SUPPORTED})
|
||||
install(TARGETS kernel_launch_overhead RUNTIME DESTINATION bin)
|
||||
|
||||
# ROCm environment
|
||||
elseif(HIP_FOUND)
|
||||
message(STATUS "Found HIP: " ${HIP_VERSION})
|
||||
|
||||
# Convert cuda code to hip code inplace
|
||||
execute_process(COMMAND hipify-perl -inplace -print-stats kernel_launch.cu
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/)
|
||||
|
||||
# Add HIP targets
|
||||
set_source_files_properties(kernel_launch.cu PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
|
||||
# Link with HIP
|
||||
hip_add_executable(kernel_launch_overhead kernel_launch.cu)
|
||||
# Install tergets
|
||||
install(TARGETS kernel_launch_overhead RUNTIME DESTINATION bin)
|
||||
|
||||
else()
|
||||
message(FATAL_ERROR "No CUDA or ROCm environment found.")
|
||||
endif()
|
||||
|
||||
add_executable(kernel_launch_overhead cuda_kernel_launch.cu)
|
||||
set_property(TARGET kernel_launch_overhead PROPERTY CUDA_ARCHITECTURES ${NVCC_ARCHS_SUPPORTED})
|
||||
install(TARGETS kernel_launch_overhead RUNTIME DESTINATION bin)
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
// Licensed under the MIT License.
|
||||
|
||||
// Kernel launch benchmark which will launch one empty kernel and record the cost in event mode and wall mode.
|
||||
// event mode: using cuda event to record the elapsed time of kernel launch on device.
|
||||
// event mode: using cuda/hip event to record the elapsed time of kernel launch on device.
|
||||
// wall mode: using host timer to record the elapsed time kernel launch on both host and device.
|
||||
|
||||
#include <algorithm>
|
Загрузка…
Ссылка в новой задаче