Benchmarks: Add Benchmark - Add the source code of rocm kernel launch overhead benchmark. (#136)

**Description**
Add the source code of rocm kernel launch overhead benchmark. 

**Major Revision**
- Revise cmake build logic to support both cuda and rocm
This commit is contained in:
Yuting Jiang 2021-07-27 22:22:31 +08:00 коммит произвёл GitHub
Родитель fdc33f406c
Коммит 1ee8f7dcf5
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
2 изменённых файлов: 34 добавлений и 5 удалений

Просмотреть файл

@ -2,10 +2,39 @@
# Licensed under the MIT License.
cmake_minimum_required(VERSION 3.18)
project(kernel_launch_overhead LANGUAGES CUDA CXX)
project(kernel_launch_overhead LANGUAGES CXX)
include(../cuda_common.cmake)
find_package(CUDAToolkit QUIET)
include(../rocm_common.cmake)
find_package(HIP QUIET)
# Cuda environment
if(CUDAToolkit_FOUND)
message(STATUS "Found CUDA: " ${CUDAToolkit_VERSION})
enable_language(CUDA)
add_executable(kernel_launch_overhead kernel_launch.cu)
set_property(TARGET kernel_launch_overhead PROPERTY CUDA_ARCHITECTURES ${NVCC_ARCHS_SUPPORTED})
install(TARGETS kernel_launch_overhead RUNTIME DESTINATION bin)
# ROCm environment
elseif(HIP_FOUND)
message(STATUS "Found HIP: " ${HIP_VERSION})
# Convert cuda code to hip code inplace
execute_process(COMMAND hipify-perl -inplace -print-stats kernel_launch.cu
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/)
# Add HIP targets
set_source_files_properties(kernel_launch.cu PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
# Link with HIP
hip_add_executable(kernel_launch_overhead kernel_launch.cu)
# Install tergets
install(TARGETS kernel_launch_overhead RUNTIME DESTINATION bin)
else()
message(FATAL_ERROR "No CUDA or ROCm environment found.")
endif()
add_executable(kernel_launch_overhead cuda_kernel_launch.cu)
set_property(TARGET kernel_launch_overhead PROPERTY CUDA_ARCHITECTURES ${NVCC_ARCHS_SUPPORTED})
install(TARGETS kernel_launch_overhead RUNTIME DESTINATION bin)

Просмотреть файл

@ -2,7 +2,7 @@
// Licensed under the MIT License.
// Kernel launch benchmark which will launch one empty kernel and record the cost in event mode and wall mode.
// event mode: using cuda event to record the elapsed time of kernel launch on device.
// event mode: using cuda/hip event to record the elapsed time of kernel launch on device.
// wall mode: using host timer to record the elapsed time kernel launch on both host and device.
#include <algorithm>