зеркало из https://github.com/microsoft/ArchProbe.git
ArchProbe source and example output
This commit is contained in:
Родитель
daacaa9031
Коммит
fac4d6ba56
|
@ -348,3 +348,7 @@ MigrationBackup/
|
|||
|
||||
# Ionide (cross platform F# VS Code tools) working folder
|
||||
.ionide/
|
||||
|
||||
build*
|
||||
|
||||
!examples
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
[submodule "third/libopencl-stub"]
|
||||
path = third/libopencl-stub
|
||||
url = https://github.com/ShireFolk/libopencl-stub
|
|
@ -0,0 +1,50 @@
|
|||
cmake_minimum_required (VERSION 3.12)
|
||||
|
||||
project ("ArchProbe" LANGUAGES C CXX)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 14)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
|
||||
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/lib")
|
||||
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/bin")
|
||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/bin")
|
||||
|
||||
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${PROJECT_SOURCE_DIR}/cmake")
|
||||
|
||||
if (NOT CMAKE_BUILD_TYPE)
|
||||
message(STATUS "No build type selected, default to Release")
|
||||
set(CMAKE_BUILD_TYPE "Release" CACHE PATH "Build Type" FORCE)
|
||||
endif()
|
||||
|
||||
message("-- Looking for libopencl-stub")
|
||||
if (EXISTS "${PROJECT_SOURCE_DIR}/third/libopencl-stub")
|
||||
add_compile_definitions(CL_VERSION_2_0)
|
||||
add_subdirectory(third/libopencl-stub)
|
||||
set(OpenCL_INCLUDE_DIR "${PROJECT_SOURCE_DIR}/third/libopencl-stub/include")
|
||||
message("-- OpenCL context enabled")
|
||||
else()
|
||||
message("")
|
||||
message("-- OpenCL not found! OpenCL context is disabled")
|
||||
message("")
|
||||
endif()
|
||||
|
||||
set(LINK_LIBS
|
||||
OpenCL
|
||||
)
|
||||
set(INC_DIRS
|
||||
"${PROJECT_SOURCE_DIR}/include"
|
||||
${OpenCL_INCLUDE_DIR}
|
||||
${CMAKE_SOURCE_DIR}/third/OpenCL-SDK/external/OpenCL-CLHPP/include
|
||||
)
|
||||
|
||||
# The testbench library.
|
||||
include_directories(${INC_DIRS})
|
||||
file(GLOB SRCS "${PROJECT_SOURCE_DIR}/src/*")
|
||||
file(GLOB INCS "${PROJECT_SOURCE_DIR}/include/*")
|
||||
add_library(ArchProbeCore STATIC ${SRCS} ${INCS})
|
||||
target_link_libraries(ArchProbeCore ${LINK_LIBS})
|
||||
target_compile_definitions(ArchProbeCore PUBLIC CL_TARGET_OPENCL_VERSION=200)
|
||||
|
||||
# Testbench apps.
|
||||
make_directory("${CMAKE_BINARY_DIR}/assets/")
|
||||
add_subdirectory("${PROJECT_SOURCE_DIR}/apps")
|
50
README.md
50
README.md
|
@ -1,14 +1,48 @@
|
|||
# Project
|
||||
# ArchProbe
|
||||
|
||||
> This repo has been populated by an initial template to help get you started. Please
|
||||
> make sure to update the content to build a great experience for community-building.
|
||||
ArchProbe is a profiling tool to demythify mobile GPU architectures with great details. The mechanism of ArchProbe is introduced in our technical paper which is still under review.
|
||||
|
||||
As the maintainer of this project, please make a few updates:
|
||||
![Adreno & Mali Architecture Overview](overview.png)
|
||||
*Architecture details collected with ArchProbe, presented in our technical paper.*
|
||||
|
||||
- Improving this README.MD file to provide a great experience
|
||||
- Updating SUPPORT.MD with content about this project's support experience
|
||||
- Understanding the security reporting process in SECURITY.MD
|
||||
- Remove this section from the README
|
||||
## How to Use
|
||||
|
||||
In a clone of ArchProbe code repository, the following commands build ArchProbe for most mobile devices with a 64-bit ARMv8 architecture.
|
||||
|
||||
```powershell
|
||||
git submodule update --init --recursive
|
||||
mkdir build-android-aarch64 && cd build-android-aarch64
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake" -DANDROID_ABI="arm64-v8a" -DANDROID_PLATFORM=android-28 -G "Ninja" ..
|
||||
cmake --build . -t ArchProbe
|
||||
```
|
||||
|
||||
To run ArchProbe in command line via `adb shell`, you need to copy the executables to `/data/local/tmp`.
|
||||
|
||||
If you are using Windows, the PowerShell scripts in `scripts` can be convenient too:
|
||||
|
||||
```powershell
|
||||
scripts/Run-Android.ps1 [-Verbose]
|
||||
```
|
||||
|
||||
### Prebuilt Binaries
|
||||
|
||||
Prebuilt binaries will be available [here](https://github.com/PENGUINLIONG/graphi-t/releases).
|
||||
|
||||
## How to Interpret Outputs
|
||||
|
||||
A GPU hardware has many traits like GFLOPS and cache size. ArchProbe implements a bag of tricks to expose these traits and each implementation is called an *aspect*. Each aspect has its own configurations in `ArchProbe.json`, reports in `ArchProbeReport.json`, and data table of every run of probing kernels in `[ASPECT_NAME].csv`. Currently ArchProbe implements the following aspects:
|
||||
|
||||
- `WarpSizeMethod{A|B}` Two methods to detect the warp size of a GPU core;
|
||||
- `GFLOPS` Peak computational throughput of the device;
|
||||
- `RegCount` Number of registers available to a thread and whether the register file is shared among warps;
|
||||
- `BufferVecWidth` Optimal vector width to read the most data in a single memory access;
|
||||
- `{Image|Buffer}CachelineSize` Top level cacheline size of image/buffer;
|
||||
- `{Image|Buffer}Bandwidth` Peak read-only bandwidth of image/buffer;
|
||||
- `{Image|Buffer}CacheHierarchyPChase` Size of each level of cache of image/buffer by the P-chase method.
|
||||
|
||||
If the `-v` flag is given, ArchProbe prints extra human-readable logs to `stdout` which is also a good source of information.
|
||||
|
||||
Experiment data gathered from Google Pixel 4 can be found [here](examples/adreno640/Google_Pixel_4).
|
||||
|
||||
## Contributing
|
||||
|
||||
|
|
16
SUPPORT.md
16
SUPPORT.md
|
@ -1,13 +1,3 @@
|
|||
# TODO: The maintainer of this repo has not yet edited this file
|
||||
|
||||
**REPO OWNER**: Do you want Customer Service & Support (CSS) support for this product/project?
|
||||
|
||||
- **No CSS support:** Fill out this template with information about how to file issues and get help.
|
||||
- **Yes CSS support:** Fill out an intake form at [aka.ms/spot](https://aka.ms/spot). CSS will work with/help you to determine next steps. More details also available at [aka.ms/onboardsupport](https://aka.ms/onboardsupport).
|
||||
- **Not sure?** Fill out a SPOT intake as though the answer were "Yes". CSS will help you decide.
|
||||
|
||||
*Then remove this first heading from this SUPPORT.MD file before publishing your repo.*
|
||||
|
||||
# Support
|
||||
|
||||
## How to file issues and get help
|
||||
|
@ -16,10 +6,6 @@ This project uses GitHub Issues to track bugs and feature requests. Please searc
|
|||
issues before filing new issues to avoid duplicates. For new issues, file your bug or
|
||||
feature request as a new Issue.
|
||||
|
||||
For help and questions about using this project, please **REPO MAINTAINER: INSERT INSTRUCTIONS HERE
|
||||
FOR HOW TO ENGAGE REPO OWNERS OR COMMUNITY FOR HELP. COULD BE A STACK OVERFLOW TAG OR OTHER
|
||||
CHANNEL. WHERE WILL YOU HELP PEOPLE?**.
|
||||
|
||||
## Microsoft Support Policy
|
||||
|
||||
Support for this **PROJECT or PRODUCT** is limited to the resources listed above.
|
||||
Support for this project is limited to the resources listed above.
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
add_subdirectory(archprobe)
|
|
@ -0,0 +1,4 @@
|
|||
set(APP_NAME ArchProbe)
|
||||
|
||||
add_executable(${APP_NAME} "app.cpp" "env.cpp")
|
||||
target_link_libraries(${APP_NAME} ArchProbeCore)
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,231 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
#include "env.hpp"
|
||||
|
||||
namespace archprobe {
|
||||
|
||||
std::string pretty_data_size(size_t size) {
|
||||
const size_t K = 1024;
|
||||
if (size < K) { return util::format(size, "B"); } size /= K;
|
||||
if (size < K) { return util::format(size, "KB"); } size /= K;
|
||||
if (size < K) { return util::format(size, "MB"); } size /= K;
|
||||
if (size < K) { return util::format(size, "GB"); } size /= K;
|
||||
if (size < K) { return util::format(size, "TB"); } size /= K;
|
||||
archprobe::panic("unsupported data size");
|
||||
return {};
|
||||
}
|
||||
|
||||
DeviceReport collect_dev_report(const cl::Device& dev) {
|
||||
DeviceReport dev_report {};
|
||||
log::info("set-up testing environment");
|
||||
|
||||
// General memory detail.
|
||||
dev_report.has_page_size = CL_SUCCESS ==
|
||||
dev.getInfo(CL_DEVICE_PAGE_SIZE_QCOM, &dev_report.page_size);
|
||||
// Global memory detail.
|
||||
dev_report.buf_cacheline_size =
|
||||
dev.getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
|
||||
dev_report.buf_size_max = dev.getInfo<CL_DEVICE_GLOBAL_MEM_SIZE>();
|
||||
dev_report.buf_cache_size = dev.getInfo<CL_DEVICE_GLOBAL_MEM_CACHE_SIZE>();
|
||||
// Image memory detail.
|
||||
dev_report.support_img = dev.getInfo<CL_DEVICE_IMAGE_SUPPORT>();
|
||||
if (dev_report.support_img) {
|
||||
dev_report.img_width_max = dev.getInfo<CL_DEVICE_IMAGE2D_MAX_WIDTH>();
|
||||
dev_report.img_height_max = dev.getInfo<CL_DEVICE_IMAGE2D_MAX_HEIGHT>();
|
||||
}
|
||||
// Processor detail.
|
||||
dev_report.nsm = dev.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>();
|
||||
dev_report.nthread_logic = dev.getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>();
|
||||
|
||||
log::info("fetched device report");
|
||||
log::push_indent();
|
||||
{
|
||||
if (dev_report.has_page_size) {
|
||||
log::info("(qualcomm extension) device page size is ",
|
||||
pretty_data_size(dev_report.page_size));
|
||||
}
|
||||
log::info(pretty_data_size(dev_report.buf_size_max),
|
||||
" global memory with ", pretty_data_size(dev_report.buf_cache_size),
|
||||
" cache consists of ", pretty_data_size(dev_report.buf_cacheline_size),
|
||||
" cachelines");
|
||||
if (dev_report.support_img) {
|
||||
log::info("images up to [", dev_report.img_width_max, ", ",
|
||||
dev_report.img_height_max, "] texels are supported");
|
||||
} else {
|
||||
log::info("image is not supported");
|
||||
}
|
||||
log::info(dev_report.nsm, " SMs with ", dev_report.nthread_logic,
|
||||
" logical threads in each");
|
||||
}
|
||||
log::pop_indent();
|
||||
|
||||
return dev_report;
|
||||
}
|
||||
|
||||
json::JsonValue load_env_cfg(const char* path) {
|
||||
try {
|
||||
auto json_txt = util::load_text(path);
|
||||
log::debug("loaded configuration '", json_txt, "'");
|
||||
json::JsonValue out {};
|
||||
if (json::try_parse(json_txt, out)) {
|
||||
archprobe::assert(out.is_obj());
|
||||
return out;
|
||||
} else {
|
||||
log::warn("failed to parse environment config from '", path,
|
||||
"', a default configuration will be created to overwrite it");
|
||||
return json::JsonObject {};
|
||||
}
|
||||
} catch (archprobe::AssertionFailedException) {
|
||||
log::warn("configuration file cannot be opened at '", path,
|
||||
"', a default configuration will be created");
|
||||
return json::JsonObject {};
|
||||
}
|
||||
}
|
||||
|
||||
json::JsonValue load_report(const char* path) {
|
||||
try {
|
||||
auto json_txt = util::load_text(path);
|
||||
log::debug("loaded report '", json_txt, "'");
|
||||
json::JsonValue out {};
|
||||
if (json::try_parse(json_txt, out)) {
|
||||
archprobe::assert(out.is_obj());
|
||||
return out;
|
||||
} else {
|
||||
log::warn("failed to parse report from '", path, "', a new report "
|
||||
"will be created to overwrite it");
|
||||
return json::JsonObject {};
|
||||
}
|
||||
} catch (archprobe::AssertionFailedException) {
|
||||
log::warn("report file cannot be opened at '", path, "', a new "
|
||||
"report will be created");
|
||||
return json::JsonObject {};
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void report_dev(Environment& env) {
|
||||
if (env.report_started_lazy("Device")) { return; }
|
||||
env.report_value("SmCount", env.dev_report.nsm);
|
||||
env.report_value("LogicThreadCount", env.dev_report.nthread_logic);
|
||||
env.report_value("MaxBufferSize", env.dev_report.buf_size_max);
|
||||
env.report_value("CacheSize", env.dev_report.buf_cache_size);
|
||||
env.report_value("CachelineSize", env.dev_report.buf_cacheline_size);
|
||||
if (env.dev_report.support_img) {
|
||||
env.report_value("MaxImageWidth", env.dev_report.img_width_max);
|
||||
env.report_value("MaxImageHeight", env.dev_report.img_height_max);
|
||||
}
|
||||
if (env.dev_report.has_page_size) {
|
||||
env.report_value("PageSize_QCOM", env.dev_report.page_size);
|
||||
}
|
||||
env.report_ready(true);
|
||||
}
|
||||
|
||||
|
||||
Environment::Environment(
|
||||
uint32_t idev,
|
||||
const char* cfg_path,
|
||||
const char* report_path
|
||||
) :
|
||||
dev_(archprobe::select_dev(idev)),
|
||||
ctxt_(archprobe::create_ctxt(dev_)),
|
||||
cmd_queue_(archprobe::create_cmd_queue(ctxt_)),
|
||||
aspects_started_(),
|
||||
cur_aspect_(),
|
||||
cur_table_(nullptr),
|
||||
cfg_path_(cfg_path),
|
||||
report_path_(report_path),
|
||||
cfg_(load_env_cfg(cfg_path)),
|
||||
report_(load_report(report_path)),
|
||||
dev_report(collect_dev_report(dev_)),
|
||||
my_report()
|
||||
{
|
||||
report_dev(*this);
|
||||
}
|
||||
Environment::~Environment() {
|
||||
util::save_text(cfg_path_.c_str(), json::print(cfg_));
|
||||
log::info("saved configuration to '", cfg_path_, "'");
|
||||
util::save_text(report_path_.c_str(), json::print(report_));
|
||||
log::info("saved report to '", report_path_, "'");
|
||||
}
|
||||
|
||||
|
||||
void Environment::report_started(const std::string& aspect_name) {
|
||||
archprobe::assert(!aspect_name.empty(), "aspect name cannot be empty");
|
||||
aspects_started_.insert(aspect_name);
|
||||
log::info("[", aspect_name, "]");
|
||||
log::push_indent();
|
||||
cur_aspect_ = aspect_name;
|
||||
}
|
||||
bool Environment::report_started_lazy(const std::string& aspect_name) {
|
||||
auto aspect_it = report_.obj.find(aspect_name);
|
||||
if (aspect_it == report_.obj.end() || !aspect_it->second.is_obj()) {
|
||||
report_started(aspect_name);
|
||||
return false;
|
||||
}
|
||||
auto done_it = aspect_it->second.obj.find("Done");
|
||||
if (done_it == aspect_it->second.obj.end() || !done_it->second.is_bool()) {
|
||||
report_started(aspect_name);
|
||||
return false;
|
||||
}
|
||||
if (done_it->second.b) {
|
||||
log::info("ignored aspect '", aspect_name ,"' because it's done");
|
||||
return true;
|
||||
} else {
|
||||
report_started(aspect_name);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
void Environment::report_ready(bool done) {
|
||||
archprobe::assert(!cur_aspect_.empty(),
|
||||
"announcing ready for an not-yet-started report is not allowed");
|
||||
archprobe::assert(aspects_started_.find(cur_aspect_) != aspects_started_.end(),
|
||||
"aspect has not report to start yet");
|
||||
report_value("Done", done);
|
||||
if (cur_table_ != nullptr) {
|
||||
auto csv = cur_table_->to_csv();
|
||||
auto fname = util::format(cur_aspect_, ".csv");
|
||||
util::save_text(fname.c_str(), csv);
|
||||
log::info("saved data table to '", fname, "'");
|
||||
cur_table_ = nullptr;
|
||||
}
|
||||
cur_aspect_ = {};
|
||||
log::pop_indent();
|
||||
}
|
||||
void Environment::check_dep(const std::string& aspect_name) {
|
||||
bool done = false;
|
||||
archprobe::assert(try_get_aspect_report(aspect_name, "Done", done) && done,
|
||||
"aspect '", aspect_name, "' is required but is not ready yet");
|
||||
}
|
||||
|
||||
|
||||
table::Table& Environment::table() {
|
||||
archprobe::assert(cur_table_ != nullptr, "requested table is not initialized");
|
||||
return *cur_table_;
|
||||
}
|
||||
|
||||
|
||||
// Find the minimal number of iterations that a kernel can run up to
|
||||
// `min_time_us` microseconds.
|
||||
void Environment::ensure_min_niter(
|
||||
double min_time_us,
|
||||
uint32_t& niter,
|
||||
std::function<double()> run
|
||||
) {
|
||||
const uint32_t DEFAULT_NITER = 100;
|
||||
niter = DEFAULT_NITER;
|
||||
for (uint32_t i = 0; i < 100; ++i) {
|
||||
double t = run();
|
||||
if (t > min_time_us * 0.99) {
|
||||
log::info("found minimal niter=", niter, " to take ", min_time_us,
|
||||
"us");
|
||||
return;
|
||||
}
|
||||
log::debug("niter=", niter, " doesn't run long enough (", t,
|
||||
"us <= ", min_time_us, "us)");
|
||||
niter = uint32_t(niter * min_time_us / t);
|
||||
}
|
||||
archprobe::panic("unable to find a minimal iteration number for ",
|
||||
cur_aspect_, "; is your code aggresively optimized by the compiler?");
|
||||
}
|
||||
|
||||
} // namespace archprobe
|
|
@ -0,0 +1,277 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
#pragma once
|
||||
#include <memory>
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
#include <optional>
|
||||
#include "assert.hpp"
|
||||
#include "log.hpp"
|
||||
#include "util.hpp"
|
||||
#include "args.hpp"
|
||||
#include "stats.hpp"
|
||||
#include "table.hpp"
|
||||
#include "json.hpp"
|
||||
#define CL_TARGET_OPENCL_VERSION 200
|
||||
#define CL_HPP_TARGET_OPENCL_VERSION CL_TARGET_OPENCL_VERSION
|
||||
#include "CL/opencl.hpp"
|
||||
#include "cl.hpp"
|
||||
|
||||
namespace archprobe {
|
||||
|
||||
std::string pretty_data_size(size_t size);
|
||||
|
||||
struct DeviceReport {
|
||||
bool has_page_size;
|
||||
size_t page_size;
|
||||
|
||||
size_t buf_cacheline_size;
|
||||
size_t buf_size_max;
|
||||
size_t buf_cache_size;
|
||||
|
||||
bool support_img;
|
||||
uint32_t img_width_max;
|
||||
uint32_t img_height_max;
|
||||
|
||||
uint32_t nsm;
|
||||
uint32_t nthread_logic;
|
||||
};
|
||||
struct ProfiledReport {
|
||||
double timing_std;
|
||||
|
||||
std::map<uint32_t, uint32_t> nthread_logic_for_nreg;
|
||||
|
||||
double gflops_fp16;
|
||||
double gflops_fp32;
|
||||
double gflops_int32;
|
||||
uint32_t nmin_warp;
|
||||
uint32_t nwarp;
|
||||
uint32_t nthread_phys;
|
||||
uint32_t nthread_warp;
|
||||
uint32_t nthread_min_warp;
|
||||
|
||||
uint32_t buf_vec_width;
|
||||
std::string buf_vec_ty;
|
||||
uint32_t buf_cacheline_size;
|
||||
std::vector<uint32_t> buf_cache_sizes;
|
||||
|
||||
uint32_t img_cacheline_size;
|
||||
std::vector<uint32_t> img_cache_sizes;
|
||||
double img_bandwidth;
|
||||
};
|
||||
class Environment {
|
||||
cl::Device dev_;
|
||||
cl::Context ctxt_;
|
||||
cl::CommandQueue cmd_queue_;
|
||||
std::set<std::string> aspects_started_;
|
||||
std::string cur_aspect_;
|
||||
std::unique_ptr<table::Table> cur_table_;
|
||||
std::string cfg_path_;
|
||||
std::string report_path_;
|
||||
json::JsonValue cfg_;
|
||||
json::JsonValue report_;
|
||||
|
||||
public:
|
||||
const DeviceReport dev_report;
|
||||
ProfiledReport my_report;
|
||||
|
||||
Environment(
|
||||
uint32_t idev,
|
||||
const char* cfg_path = "ArchProbe.json",
|
||||
const char* report_path = "ArchProbeReport.json");
|
||||
~Environment();
|
||||
|
||||
|
||||
void report_started(const std::string& aspect_name);
|
||||
// Returns false if there is no existing report about the aspect to be started
|
||||
// or such report is not yet marked with '"Done": true'. It means that when
|
||||
// this method returns true, the aspect can return right a way.
|
||||
bool report_started_lazy(const std::string& aspect_name);
|
||||
void report_ready(bool done = false);
|
||||
void check_dep(const std::string& aspect_name);
|
||||
|
||||
template<typename ... TArgs>
|
||||
void init_table(TArgs&& ... args) {
|
||||
archprobe::assert(!cur_aspect_.empty(),
|
||||
"table can only be initialized in scope of a report");
|
||||
log::info("initialized table for aspect '", cur_aspect_, "'");
|
||||
cur_table_ = std::make_unique<table::Table>(args ...);
|
||||
}
|
||||
table::Table& table();
|
||||
|
||||
inline json::JsonValue& get_aspect_cfg(const std::string& aspect) {
|
||||
auto it = cfg_.obj.find(aspect);
|
||||
if (it == cfg_.obj.end() ||!it->second.is_obj()) {
|
||||
log::warn("aspect configuration ('", cur_aspect_, "') is invalid, "
|
||||
"a new record is created");
|
||||
cfg_.obj[aspect] = json::JsonObject {};
|
||||
}
|
||||
return cfg_.obj[aspect];
|
||||
}
|
||||
inline json::JsonValue& get_cfg() {
|
||||
return get_aspect_cfg(cur_aspect_);
|
||||
}
|
||||
template<typename T>
|
||||
inline T cfg_num(const std::string& name, T default_value) {
|
||||
auto& cfg = get_cfg();
|
||||
if (cfg.obj.find(name) == cfg_.obj.end() || !cfg.obj[name].is_num()) {
|
||||
log::warn("record entry ('", name, "') is invalid, a new record "
|
||||
"is created");
|
||||
cfg.obj[name] = json::JsonValue(default_value);
|
||||
}
|
||||
return (T)cfg[name];
|
||||
}
|
||||
|
||||
inline json::JsonValue& get_report() {
|
||||
return get_aspect_report(cur_aspect_);
|
||||
}
|
||||
inline json::JsonValue& get_aspect_report(const std::string& aspect) {
|
||||
auto it = report_.obj.find(aspect);
|
||||
if (it == report_.obj.end() || !it->second.is_obj()) {
|
||||
log::warn("aspect report ('", aspect, "') is invalid, a new record is "
|
||||
"created");
|
||||
report_.obj[aspect] = json::JsonObject {};
|
||||
}
|
||||
return report_.obj[aspect];
|
||||
}
|
||||
template<typename T>
|
||||
inline bool try_get_report(const std::string& name, T& out) {
|
||||
return try_get_aspect_report(cur_aspect_, name, out);
|
||||
}
|
||||
template<typename T>
|
||||
inline bool try_get_aspect_report(
|
||||
const std::string& aspect,
|
||||
const std::string& name,
|
||||
T& out
|
||||
) {
|
||||
const auto& report = get_aspect_report(aspect);
|
||||
auto it = report.obj.find(name);
|
||||
if (it == report.obj.end()) {
|
||||
return false;
|
||||
} else {
|
||||
out = (T)it->second;
|
||||
log::info("already know that '", name, "' from aspect '", aspect, "' is ",
|
||||
out);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
template<typename T>
|
||||
inline T must_get_aspect_report(
|
||||
const std::string& aspect,
|
||||
const std::string& name
|
||||
) {
|
||||
T out;
|
||||
archprobe::assert(try_get_aspect_report(aspect, name, out),
|
||||
"cannot get report '", name, "' from aspect '", aspect, "'");
|
||||
return out;
|
||||
}
|
||||
template<typename T>
|
||||
inline void report_value(const std::string& name, T value) {
|
||||
auto& report = get_report();
|
||||
log::info("reported '", name, "' = '", value, "'");
|
||||
report.obj[name] = json::JsonValue(value);
|
||||
}
|
||||
|
||||
inline void clear_aspect_report(const std::string& aspect) {
|
||||
if (!aspect.empty()) {
|
||||
get_aspect_report(aspect) = json::JsonObject {};
|
||||
log::info("cleared report of aspect '", aspect, "'");
|
||||
}
|
||||
}
|
||||
|
||||
void ensure_min_niter(
|
||||
double min_time_us,
|
||||
uint32_t& niter,
|
||||
std::function<double()> run);
|
||||
|
||||
inline cl::Program create_program(
|
||||
const std::string& src,
|
||||
const std::string& build_opts
|
||||
) const {
|
||||
return archprobe::create_program(dev_, ctxt_, src.c_str(),
|
||||
build_opts.c_str());
|
||||
}
|
||||
inline cl::Program create_program(
|
||||
const std::vector<uint8_t>& src,
|
||||
const std::string& build_opts
|
||||
) const {
|
||||
return archprobe::create_program(dev_, ctxt_, (const char*)src.data(),
|
||||
build_opts.c_str());
|
||||
}
|
||||
inline cl::Kernel create_kernel(
|
||||
const cl::Program& program,
|
||||
const std::string& kernel_name
|
||||
) const {
|
||||
return archprobe::create_kernel(program, kernel_name);
|
||||
}
|
||||
|
||||
inline cl::Image2D create_img_2d(
|
||||
cl_mem_flags mem_flags,
|
||||
cl::ImageFormat img_fmt,
|
||||
uint32_t width,
|
||||
uint32_t height
|
||||
) const {
|
||||
return archprobe::create_img_2d(ctxt_, mem_flags, img_fmt, width, height);
|
||||
}
|
||||
inline cl::Image1D create_img_1d(
|
||||
cl_mem_flags mem_flags,
|
||||
cl::ImageFormat img_fmt,
|
||||
uint32_t width
|
||||
) const {
|
||||
return archprobe::create_img_1d(ctxt_, mem_flags, img_fmt, width);
|
||||
}
|
||||
inline cl::Buffer create_buf(
|
||||
cl_mem_flags mem_flags,
|
||||
size_t size
|
||||
) const {
|
||||
return archprobe::create_buf(ctxt_, mem_flags, size);
|
||||
}
|
||||
|
||||
inline archprobe::MapBuffer map_buf(const cl::Buffer& buf) const {
|
||||
return archprobe::map_buf(cmd_queue_, buf);
|
||||
}
|
||||
inline void unmap_buf(
|
||||
const cl::Buffer& buf,
|
||||
archprobe::MapBuffer& mapped
|
||||
) const {
|
||||
archprobe::unmap_buf(cmd_queue_, buf, mapped);
|
||||
}
|
||||
|
||||
inline archprobe::MapImage map_img_1d(
|
||||
const cl::Image1D& img
|
||||
) const {
|
||||
return archprobe::map_img_1d(cmd_queue_, img);
|
||||
}
|
||||
inline void unmap_img_1d(
|
||||
const cl::Image1D& img,
|
||||
archprobe::MapImage& mapped
|
||||
) const {
|
||||
archprobe::unmap_img_1d(cmd_queue_, img, mapped);
|
||||
}
|
||||
|
||||
inline archprobe::MapImage map_img_2d(const cl::Image2D& img) const {
|
||||
return archprobe::map_img_2d(cmd_queue_, img);
|
||||
}
|
||||
inline void unmap_img_2d(
|
||||
const cl::Image2D& img,
|
||||
archprobe::MapImage& mapped
|
||||
) const {
|
||||
archprobe::unmap_img_2d(cmd_queue_, img, mapped);
|
||||
}
|
||||
|
||||
// Returns kernel time in microseconds (us).
|
||||
inline double bench_kernel(
|
||||
const cl::Kernel& kernel,
|
||||
cl::NDRange local,
|
||||
cl::NDRange global,
|
||||
uint32_t niter
|
||||
) const {
|
||||
return archprobe::bench_kernel(cmd_queue_, kernel, local, global, niter);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
} // namespace archprobe
|
|
@ -0,0 +1 @@
|
|||
{"BufferCacheHierarchyPChase":{"Compensate":0.01,"DataSizeMax":8.38861e+06,"Threshold":10},"BufferCachelineSize":{"Compensate":0.01,"Threshold":10},"BufferVecWidth":{"Compensate":0.01,"Threshold":10},"Gflops":{"Compensate":0.01,"Threshold":10},"ImageCacheHierarchyPChase":{"Compensate":0.01,"DataSizeMax":262144,"Threshold":10},"ImageCachelineSize":{"Compensate":0.01,"Threshold":10},"RegCount":{"Compensate":0.01,"NGrpMax":64,"NGrpMin":1,"NGrpStep":1,"NRegMax":512,"NRegMin":1,"NRegStep":1,"Threshold":10},"WarpSizeMethodB":{"Compensate":0.01,"Threshold":10}}
|
|
@ -0,0 +1 @@
|
|||
{"BufferBandwidth":{"Done":true,"MaxBandwidth":96.3256,"MinBandwidth":30.0554},"BufferCacheHierarchyPChase":{"CacheVectorCountLv1":125744,"CacheVectorCountLv2":132608,"CacheVectorCountLv3":133728,"CacheVectorCountLv4":136080,"Done":true},"BufferCachelineSize":{"BufTopLevelCachelineSize":64,"Done":true},"BufferVecWidth":{"BufferVecSize":4,"Done":true},"Device":{"CacheSize":131072,"CachelineSize":64,"Done":true,"LogicThreadCount":1024,"MaxBufferSize":2.87688e+09,"MaxImageHeight":16384,"MaxImageWidth":16384,"PageSize_QCOM":4096,"SmCount":2},"Gflops":{"Done":true,"FloatArch":"SISD","FloatGflops":889.891,"FloatVecComponentCount":1,"HalfArch":"SISD","HalfGflops":890.087,"HalfVecComponentCount":1},"ImageBandwidth":{"Done":true,"MaxBandwidth":194.55,"MinBandwidth":68.174},"ImageCacheHierarchyPChase":{"CachePixelCountLv1":1024,"CachePixelCountLv2":139504,"Done":true},"ImageCachelineSize":{"Done":true,"ImgCachelineDim":"X","ImgCachelineSize":32,"ImgMinTimeConcurThreadCountX":64,"ImgMinTimeConcurThreadCountY":32},"RegCount":{"Done":true,"FullRegConcurWorkgroupCount":12,"HalfRegConcurWorkgroupCount":24,"RegCount":183,"RegType":"Pooled"},"WarpSizeMethodA":{"Done":true,"WarpThreadCount":128},"WarpSizeMethodB":{"Done":true,"WarpThreadCount":64}}
|
|
@ -0,0 +1,24 @@
|
|||
range (byte),t (us),bandwidth (gbps)
|
||||
16,14899.6,72.0652
|
||||
32,14884.5,72.1383
|
||||
64,28961.9,37.0743
|
||||
128,29880.1,35.9351
|
||||
256,16474,65.178
|
||||
512,15259.5,70.3654
|
||||
1024,18114.6,59.2751
|
||||
2048,11147,96.3256
|
||||
4096,11239,95.5368
|
||||
8192,11151,96.2913
|
||||
16384,11192.1,95.9378
|
||||
32768,11210.4,95.7811
|
||||
65536,11233.9,95.5803
|
||||
131072,12360.1,86.8719
|
||||
262144,18612,57.6909
|
||||
524288,26379.5,40.7036
|
||||
1.04858e+06,30822.9,34.8358
|
||||
2.09715e+06,33865.6,31.706
|
||||
4.1943e+06,27676.7,38.7959
|
||||
8.38861e+06,35214.5,30.4915
|
||||
1.67772e+07,35725.4,30.0554
|
||||
3.35544e+07,35447.9,30.2907
|
||||
6.71089e+07,35374,30.354
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,20 @@
|
|||
nthread,stride (byte),pitch (byte),niter,t (us)
|
||||
1024,4,256,100,248.576
|
||||
1024,4,256,402,980.992
|
||||
1024,4,256,409,998.016
|
||||
1024,4,256,409,997.12
|
||||
1024,8,256,409,997.504
|
||||
1024,12,256,409,995.968
|
||||
1024,16,256,409,992
|
||||
1024,20,256,409,993.024
|
||||
1024,24,256,409,993.536
|
||||
1024,28,256,409,992.512
|
||||
1024,32,256,409,994.048
|
||||
1024,36,256,409,993.536
|
||||
1024,40,256,409,995.84
|
||||
1024,44,256,409,994.432
|
||||
1024,48,256,409,994.56
|
||||
1024,52,256,409,993.536
|
||||
1024,56,256,409,994.944
|
||||
1024,60,256,409,992.384
|
||||
1024,64,256,409,1471.49
|
|
|
@ -0,0 +1,9 @@
|
|||
size (byte),niter,t (us)
|
||||
4,100,28.16
|
||||
4,3551,864
|
||||
4,4109,984.576
|
||||
4,4173,1067.9
|
||||
4,4173,1067.52
|
||||
8,4173,1029.5
|
||||
16,4173,1041.92
|
||||
32,4173,2793.98
|
|
|
@ -0,0 +1,13 @@
|
|||
float width (bit),ncomp,niter,t (us)
|
||||
16,16,100,15079
|
||||
16,1,100,15198
|
||||
16,2,100,15127
|
||||
16,4,100,15089
|
||||
16,8,100,15073
|
||||
16,16,100,15079.2
|
||||
32,16,100,15083
|
||||
32,1,100,15200
|
||||
32,2,100,15139.1
|
||||
32,4,100,15078.1
|
||||
32,8,100,15076.5
|
||||
32,16,100,15082.5
|
|
|
@ -0,0 +1,24 @@
|
|||
range (byte),t (us),bandwidth (gbps)
|
||||
16,5519.87,194.523
|
||||
32,5520.9,194.487
|
||||
64,5520.64,194.496
|
||||
128,5519.1,194.55
|
||||
256,5521.02,194.482
|
||||
512,5520.13,194.514
|
||||
1024,5520.9,194.487
|
||||
2048,10828,99.1632
|
||||
4096,10764.5,99.748
|
||||
8192,10718.6,100.176
|
||||
16384,10770.9,99.6887
|
||||
32768,10817,99.2641
|
||||
65536,10833.9,99.1093
|
||||
131072,12219,87.8747
|
||||
262144,15750,68.174
|
||||
524288,7897.09,135.967
|
||||
1.04858e+06,6376.58,168.388
|
||||
2.09715e+06,5889.54,182.313
|
||||
4.1943e+06,5684.99,188.873
|
||||
8.38861e+06,5598.98,191.775
|
||||
1.67772e+07,5561.09,193.081
|
||||
3.35544e+07,5539.07,193.849
|
||||
6.71089e+07,5529.47,194.185
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,105 @@
|
|||
nthread,dim (x/y),niter,t (us)
|
||||
1,0,100,15.872
|
||||
1,0,6300,790.016
|
||||
1,0,7974,998.912
|
||||
1,0,7974,998.912
|
||||
2,0,7974,998.912
|
||||
3,0,7974,998.912
|
||||
4,0,7974,998.912
|
||||
5,0,7974,998.912
|
||||
6,0,7974,998.912
|
||||
7,0,7974,998.912
|
||||
8,0,7974,998.912
|
||||
9,0,7974,1026.05
|
||||
10,0,7974,1026.05
|
||||
11,0,7974,1026.05
|
||||
12,0,7974,1026.05
|
||||
13,0,7974,1026.05
|
||||
14,0,7974,1026.05
|
||||
15,0,7974,1026.05
|
||||
16,0,7974,1026.05
|
||||
17,0,7974,1053.18
|
||||
18,0,7974,1052.93
|
||||
19,0,7974,1052.93
|
||||
20,0,7974,1052.93
|
||||
21,0,7974,1052.93
|
||||
22,0,7974,1053.95
|
||||
23,0,7974,1053.18
|
||||
24,0,7974,1053.95
|
||||
25,0,7974,1080.96
|
||||
26,0,7974,1081.09
|
||||
27,0,7974,1081.09
|
||||
28,0,7974,1080.83
|
||||
29,0,7974,1080.83
|
||||
30,0,7974,1080.83
|
||||
31,0,7974,1081.09
|
||||
32,0,7974,1080.96
|
||||
33,0,7974,1107.97
|
||||
34,0,7974,1107.97
|
||||
35,0,7974,1107.97
|
||||
36,0,7974,1107.97
|
||||
37,0,7974,1107.97
|
||||
38,0,7974,1107.97
|
||||
39,0,7974,1107.97
|
||||
40,0,7974,1107.97
|
||||
41,0,7974,1135.1
|
||||
42,0,7974,1135.1
|
||||
43,0,7974,1135.1
|
||||
44,0,7974,1135.1
|
||||
45,0,7974,1135.1
|
||||
46,0,7974,1135.1
|
||||
47,0,7974,1135.1
|
||||
48,0,7974,1135.1
|
||||
49,0,7974,1163.01
|
||||
50,0,7974,1163.01
|
||||
51,0,7974,1163.01
|
||||
52,0,7974,1163.01
|
||||
53,0,7974,1163.01
|
||||
54,0,7974,1162.5
|
||||
55,0,7974,1163.01
|
||||
56,0,7974,1163.01
|
||||
57,0,7974,1189.89
|
||||
58,0,7974,1189.89
|
||||
59,0,7974,1189.89
|
||||
60,0,7974,1189.89
|
||||
61,0,7974,1189.89
|
||||
62,0,7974,1189.89
|
||||
63,0,7974,1189.89
|
||||
64,0,7974,1189.89
|
||||
65,0,7974,1693.95
|
||||
1,1,100,16.128
|
||||
1,1,6200,798.976
|
||||
1,1,7759,998.912
|
||||
1,1,7759,998.912
|
||||
2,1,7759,998.912
|
||||
3,1,7759,998.912
|
||||
4,1,7759,998.912
|
||||
5,1,7759,998.912
|
||||
6,1,7759,998.912
|
||||
7,1,7759,998.912
|
||||
8,1,7759,998.912
|
||||
9,1,7759,1025.02
|
||||
10,1,7759,1025.02
|
||||
11,1,7759,1025.02
|
||||
12,1,7759,1025.02
|
||||
13,1,7759,1025.02
|
||||
14,1,7759,1025.02
|
||||
15,1,7759,1025.02
|
||||
16,1,7759,1025.02
|
||||
17,1,7759,1051.9
|
||||
18,1,7759,1051.9
|
||||
19,1,7759,1051.9
|
||||
20,1,7759,1051.9
|
||||
21,1,7759,1051.9
|
||||
22,1,7759,1051.9
|
||||
23,1,7759,1051.9
|
||||
24,1,7759,1051.9
|
||||
25,1,7759,1078.02
|
||||
26,1,7759,1078.02
|
||||
27,1,7759,1078.02
|
||||
28,1,7759,1078.02
|
||||
29,1,7759,1078.02
|
||||
30,1,7759,1078.02
|
||||
31,1,7759,1078.02
|
||||
32,1,7759,1078.02
|
||||
33,1,7759,1569.02
|
|
|
@ -0,0 +1,226 @@
|
|||
nthread,ngrp,nreg,niter,t (us)
|
||||
1,1,1,100,7.936
|
||||
1,1,1,12600,584.96
|
||||
1,1,1,21539,997.12
|
||||
1,1,1,21539,998.016
|
||||
1,1,2,21539,1071.1
|
||||
1,1,3,21539,1107.97
|
||||
1,1,4,21539,1181.18
|
||||
1,1,5,21539,1255.94
|
||||
1,1,6,21539,1402.88
|
||||
1,1,7,21539,1660.54
|
||||
1,1,8,21539,1698.05
|
||||
1,1,9,21539,2029.06
|
||||
1,1,10,21539,1992.06
|
||||
1,1,11,21539,2249.98
|
||||
1,1,12,21539,2359.94
|
||||
1,1,13,21539,2618.11
|
||||
1,1,14,21539,2582.02
|
||||
1,1,15,21539,2839.04
|
||||
1,1,16,21539,2950.14
|
||||
1,1,17,21539,3207.17
|
||||
1,1,18,21539,3170.94
|
||||
1,1,19,21539,3428.1
|
||||
1,1,20,21539,3538.94
|
||||
1,1,21,21539,3796.99
|
||||
1,1,22,21539,3760.13
|
||||
1,1,23,21539,4018.05
|
||||
1,1,24,21539,4128.51
|
||||
1,1,25,21539,4386.05
|
||||
1,1,26,21539,4349.95
|
||||
1,1,27,21539,4607.1
|
||||
1,1,28,21539,4717.95
|
||||
1,1,29,21539,4975.49
|
||||
1,1,30,21539,4938.5
|
||||
1,1,31,21539,5196.03
|
||||
1,1,32,21539,5307.01
|
||||
1,1,33,21539,5566.59
|
||||
1,1,34,21539,5529.09
|
||||
1,1,35,21539,5787.01
|
||||
1,1,36,21539,5897.98
|
||||
1,1,37,21539,6155.52
|
||||
1,1,38,21539,6119.17
|
||||
1,1,39,21539,6376.58
|
||||
1,1,40,21539,6487.04
|
||||
1,1,41,21539,6745.47
|
||||
1,1,42,21539,6708.99
|
||||
1,1,43,21539,6966.02
|
||||
1,1,44,21539,7076.99
|
||||
1,1,45,21539,7334.91
|
||||
1,1,46,21539,7298.05
|
||||
1,1,47,21539,7556.1
|
||||
1,1,48,21539,7666.43
|
||||
1,1,49,21539,7924.48
|
||||
1,1,50,21539,7887.49
|
||||
1,1,51,21539,8145.54
|
||||
1,1,52,21539,8256
|
||||
1,1,53,21539,8514.05
|
||||
1,1,54,21539,8477.18
|
||||
1,1,55,21539,8734.98
|
||||
1,1,56,21539,8845.95
|
||||
1,1,57,21539,9103.87
|
||||
1,1,58,21539,9067.01
|
||||
1,1,59,21539,9324.03
|
||||
1,1,60,21539,9435.14
|
||||
1,1,61,21539,9693.06
|
||||
1,1,62,21539,9656.06
|
||||
1,1,63,21539,9914.11
|
||||
1,1,64,21539,10025
|
||||
1,1,65,21539,10283
|
||||
1,1,66,21539,10245.9
|
||||
1,1,67,21539,10502.9
|
||||
1,1,68,21539,10614.9
|
||||
1,1,69,21539,10872.1
|
||||
1,1,70,21539,10835.6
|
||||
1,1,71,21539,11093
|
||||
1,1,72,21539,11203.8
|
||||
1,1,73,21539,11461.5
|
||||
1,1,74,21539,11424.9
|
||||
1,1,75,21539,11682.6
|
||||
1,1,76,21539,11793.5
|
||||
1,1,77,21539,12050.9
|
||||
1,1,78,21539,12014.1
|
||||
1,1,79,21539,12271.9
|
||||
1,1,80,21539,12383
|
||||
1,1,81,21539,12640
|
||||
1,1,82,21539,12604
|
||||
1,1,83,21539,12861.4
|
||||
1,1,84,21539,12972
|
||||
1,1,85,21539,13230.1
|
||||
1,1,86,21539,13193.6
|
||||
1,1,87,21539,13451
|
||||
1,1,88,21539,13561.9
|
||||
1,1,89,21539,13819.9
|
||||
1,1,90,21539,13783
|
||||
1,1,91,21539,14041
|
||||
1,1,92,21539,14151.9
|
||||
1,1,93,21539,14409
|
||||
1,1,94,21539,14372.1
|
||||
1,1,95,21539,14630
|
||||
1,1,96,21539,14741
|
||||
1,1,97,21539,14998.5
|
||||
1,1,98,21539,14961.9
|
||||
1,1,99,21539,15220
|
||||
1,1,100,21539,15330
|
||||
1,1,101,21539,15588.1
|
||||
1,1,102,21539,15551
|
||||
1,1,103,21539,15809
|
||||
1,1,104,21539,15920
|
||||
1,1,105,21539,16177.9
|
||||
1,1,106,21539,16141.1
|
||||
1,1,107,21539,16399.1
|
||||
1,1,108,21539,16509.6
|
||||
1,1,109,21539,16767
|
||||
1,1,110,21539,16730.6
|
||||
1,1,111,21539,16988.5
|
||||
1,1,112,21539,17099
|
||||
1,1,113,21539,17356.4
|
||||
1,1,114,21539,17319.9
|
||||
1,1,115,21539,17578
|
||||
1,1,116,21539,17688.1
|
||||
1,1,117,21539,17946.1
|
||||
1,1,118,21539,17909
|
||||
1,1,119,21539,18167
|
||||
1,1,120,21539,18277.9
|
||||
1,1,121,21539,18535.9
|
||||
1,1,122,21539,18499.1
|
||||
1,1,123,21539,18757
|
||||
1,1,124,21539,18867.5
|
||||
1,1,125,21539,19125
|
||||
1,1,126,21539,19089.2
|
||||
1,1,127,21539,19346.2
|
||||
1,1,128,21539,19457
|
||||
1,1,129,21539,19715.1
|
||||
1,1,130,21539,19678.1
|
||||
1,1,131,21539,19936
|
||||
1,1,132,21539,20046.8
|
||||
1,1,133,21539,20304.1
|
||||
1,1,134,21539,20267.5
|
||||
1,1,135,21539,20525.1
|
||||
1,1,136,21539,20636
|
||||
1,1,137,21539,20894
|
||||
1,1,138,21539,20857.1
|
||||
1,1,139,21539,21115
|
||||
1,1,140,21539,21226
|
||||
1,1,141,21539,21483
|
||||
1,1,142,21539,21446.5
|
||||
1,1,143,21539,21704.2
|
||||
1,1,144,21539,21814.9
|
||||
1,1,145,21539,22073
|
||||
1,1,146,21539,22035.1
|
||||
1,1,147,21539,22293
|
||||
1,1,148,21539,22404.1
|
||||
1,1,149,21539,22662.1
|
||||
1,1,150,21539,22625
|
||||
1,1,151,21539,22883.1
|
||||
1,1,152,21539,22994.2
|
||||
1,1,153,21539,23252
|
||||
1,1,154,21539,23215.1
|
||||
1,1,155,21539,23473
|
||||
1,1,156,21539,23583
|
||||
1,1,157,21539,23841
|
||||
1,1,158,21539,23804.2
|
||||
1,1,159,21539,24062.5
|
||||
1,1,160,21539,24173.1
|
||||
1,1,161,21539,24430.8
|
||||
1,1,162,21539,24394.2
|
||||
1,1,163,21539,24651
|
||||
1,1,164,21539,24762
|
||||
1,1,165,21539,25019.9
|
||||
1,1,166,21539,24983
|
||||
1,1,167,21539,25241.5
|
||||
1,1,168,21539,25352.1
|
||||
1,1,169,21539,25609.6
|
||||
1,1,170,21539,25573
|
||||
1,1,171,21539,25830.9
|
||||
1,1,172,21539,25941
|
||||
1,1,173,21539,26199
|
||||
1,1,174,21539,26162.9
|
||||
1,1,175,21539,26420.6
|
||||
1,1,176,21539,26530.9
|
||||
1,1,177,21539,26788.9
|
||||
1,1,178,21539,26751.5
|
||||
1,1,179,21539,26899.1
|
||||
1,1,180,21539,27047.2
|
||||
1,1,181,21539,28998.5
|
||||
1,1,182,21539,29109
|
||||
1,1,183,21539,32718
|
||||
1,1,184,21539,44463.1
|
||||
1,1,183,21539,32717.6
|
||||
1,2,183,21539,32717.4
|
||||
1,3,183,21539,32737.4
|
||||
1,4,183,21539,32774.9
|
||||
1,5,183,21539,32777
|
||||
1,6,183,21539,32814.8
|
||||
1,7,183,21539,32804
|
||||
1,8,183,21539,32779.4
|
||||
1,9,183,21539,32785.9
|
||||
1,10,183,21539,32782.1
|
||||
1,11,183,21539,32785.5
|
||||
1,12,183,21539,32786.9
|
||||
1,13,183,21539,65516
|
||||
1,1,91,21539,14040.4
|
||||
1,2,91,21539,14041.1
|
||||
1,3,91,21539,14041
|
||||
1,4,91,21539,14040.8
|
||||
1,5,91,21539,14041.1
|
||||
1,6,91,21539,14041.1
|
||||
1,7,91,21539,14187
|
||||
1,8,91,21539,14187
|
||||
1,9,91,21539,14187
|
||||
1,10,91,21539,14187
|
||||
1,11,91,21539,14187
|
||||
1,12,91,21539,14187
|
||||
1,13,91,21539,14335
|
||||
1,14,91,21539,14334.6
|
||||
1,15,91,21539,14432.5
|
||||
1,16,91,21539,14580
|
||||
1,17,91,21539,14580
|
||||
1,18,91,21539,14579.6
|
||||
1,19,91,21539,15253
|
||||
1,20,91,21539,15253
|
||||
1,21,91,21539,15291.9
|
||||
1,22,91,21539,15292
|
||||
1,23,91,21539,15474.9
|
||||
1,24,91,21539,15473.9
|
||||
1,25,91,21539,28095
|
|
|
@ -0,0 +1,130 @@
|
|||
nthread,nascend
|
||||
1,1
|
||||
2,2
|
||||
3,3
|
||||
4,4
|
||||
5,5
|
||||
6,6
|
||||
7,7
|
||||
8,8
|
||||
9,9
|
||||
10,10
|
||||
11,11
|
||||
12,12
|
||||
13,13
|
||||
14,14
|
||||
15,15
|
||||
16,16
|
||||
17,17
|
||||
18,18
|
||||
19,19
|
||||
20,20
|
||||
21,21
|
||||
22,22
|
||||
23,23
|
||||
24,24
|
||||
25,25
|
||||
26,26
|
||||
27,27
|
||||
28,28
|
||||
29,29
|
||||
30,30
|
||||
31,31
|
||||
32,32
|
||||
33,33
|
||||
34,34
|
||||
35,35
|
||||
36,36
|
||||
37,37
|
||||
38,38
|
||||
39,39
|
||||
40,40
|
||||
41,41
|
||||
42,42
|
||||
43,43
|
||||
44,44
|
||||
45,45
|
||||
46,46
|
||||
47,47
|
||||
48,48
|
||||
49,49
|
||||
50,50
|
||||
51,51
|
||||
52,52
|
||||
53,53
|
||||
54,54
|
||||
55,55
|
||||
56,56
|
||||
57,57
|
||||
58,58
|
||||
59,59
|
||||
60,60
|
||||
61,61
|
||||
62,62
|
||||
63,63
|
||||
64,64
|
||||
65,65
|
||||
66,66
|
||||
67,67
|
||||
68,68
|
||||
69,69
|
||||
70,70
|
||||
71,71
|
||||
72,72
|
||||
73,73
|
||||
74,74
|
||||
75,75
|
||||
76,76
|
||||
77,77
|
||||
78,78
|
||||
79,79
|
||||
80,80
|
||||
81,81
|
||||
82,82
|
||||
83,83
|
||||
84,84
|
||||
85,85
|
||||
86,86
|
||||
87,87
|
||||
88,88
|
||||
89,89
|
||||
90,90
|
||||
91,91
|
||||
92,92
|
||||
93,93
|
||||
94,94
|
||||
95,95
|
||||
96,96
|
||||
97,97
|
||||
98,98
|
||||
99,99
|
||||
100,100
|
||||
101,101
|
||||
102,102
|
||||
103,103
|
||||
104,104
|
||||
105,105
|
||||
106,106
|
||||
107,107
|
||||
108,108
|
||||
109,109
|
||||
110,110
|
||||
111,111
|
||||
112,112
|
||||
113,113
|
||||
114,114
|
||||
115,115
|
||||
116,116
|
||||
117,117
|
||||
118,118
|
||||
119,119
|
||||
120,120
|
||||
121,121
|
||||
122,122
|
||||
123,123
|
||||
124,124
|
||||
125,125
|
||||
126,126
|
||||
127,127
|
||||
128,128
|
||||
129,128
|
|
|
@ -0,0 +1,67 @@
|
|||
nthread,time (us)
|
||||
1,1122.94
|
||||
1,1123.07
|
||||
2,1123.07
|
||||
3,1122.56
|
||||
4,1123.07
|
||||
5,1122.94
|
||||
6,1122.94
|
||||
7,1122.82
|
||||
8,1123.07
|
||||
9,1123.07
|
||||
10,1122.94
|
||||
11,1122.94
|
||||
12,1122.05
|
||||
13,1122.94
|
||||
14,1122.56
|
||||
15,1122.05
|
||||
16,1122.43
|
||||
17,1122.56
|
||||
18,1122.05
|
||||
19,1122.82
|
||||
20,1122.05
|
||||
21,1122.43
|
||||
22,1122.94
|
||||
23,1122.05
|
||||
24,1122.05
|
||||
25,1123.07
|
||||
26,1122.82
|
||||
27,1122.43
|
||||
28,1123.07
|
||||
29,1123.07
|
||||
30,1123.07
|
||||
31,1123.07
|
||||
32,1123.07
|
||||
33,1123.07
|
||||
34,1123.07
|
||||
35,1122.82
|
||||
36,1123.07
|
||||
37,1122.94
|
||||
38,1123.07
|
||||
39,1123.07
|
||||
40,1123.07
|
||||
41,1123.07
|
||||
42,1123.07
|
||||
43,1123.07
|
||||
44,1123.07
|
||||
45,1123.07
|
||||
46,1123.07
|
||||
47,1123.07
|
||||
48,1123.07
|
||||
49,1123.07
|
||||
50,1123.07
|
||||
51,1123.84
|
||||
52,1123.46
|
||||
53,1123.84
|
||||
54,1123.46
|
||||
55,1123.07
|
||||
56,1123.07
|
||||
57,1123.07
|
||||
58,1123.46
|
||||
59,1123.97
|
||||
60,1123.97
|
||||
61,1123.07
|
||||
62,1123.07
|
||||
63,1123.97
|
||||
64,1124.1
|
||||
65,1265.92
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,185 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
// NOTE: This is a modified excerpt of
|
||||
// https://github.com/PENGUINLIONG/graphi-t/blob/da31ec530df07c9899e056eeced08a64062dcfce/include/args.hpp;
|
||||
// MIT-licensed by Rendong Liang.
|
||||
|
||||
// Argument parsing utilities.
|
||||
// @PENGUINLIONG
|
||||
#pragma once
|
||||
#include <string>
|
||||
|
||||
namespace archprobe {
|
||||
|
||||
namespace args {
|
||||
|
||||
struct ArgumentParseConfig {
|
||||
// Expected number of arguments segments.
|
||||
uint32_t narg;
|
||||
// Returns true if the parsing is successful.
|
||||
bool (*parser)(const char*[], void*);
|
||||
// Returns the literal of default value.
|
||||
std::string (*lit)(const void*);
|
||||
// Destination to be written with parsed value.
|
||||
void* dst;
|
||||
};
|
||||
|
||||
// Optionally initialize argument parser with application name and usage
|
||||
// description.
|
||||
extern void init_arg_parse(const char* app_name, const char* desc);
|
||||
// Get the name of this app set by the user. Empty string is returned if this
|
||||
// function is called before `init_arg_parse`.
|
||||
extern const char* get_app_name();
|
||||
// Print help message to the standard output.
|
||||
extern void print_help();
|
||||
// Erase the type of argument parser and bind the type-erased parser to the
|
||||
// value destination. User code MUST ensure the `dst` buffer can contain the
|
||||
// parsing result.
|
||||
template<typename TTypedParser>
|
||||
ArgumentParseConfig make_parse_cfg(void* dst) {
|
||||
ArgumentParseConfig parse_cfg;
|
||||
parse_cfg.narg = TTypedParser::narg;
|
||||
parse_cfg.dst = dst;
|
||||
parse_cfg.parser = &TTypedParser::parse;
|
||||
parse_cfg.lit = &TTypedParser::lit;
|
||||
return parse_cfg;
|
||||
}
|
||||
// Register customized argument parsing.
|
||||
extern void reg_arg(
|
||||
const char* short_flag,
|
||||
const char* long_flag,
|
||||
const ArgumentParseConfig& parse_cfg,
|
||||
const char* help
|
||||
);
|
||||
// Register a structural argument parsing.
|
||||
template<typename TTypedParser>
|
||||
inline void reg_arg(
|
||||
const char* short_flag,
|
||||
const char* long_flag,
|
||||
typename TTypedParser::arg_ty& dst,
|
||||
const char* help
|
||||
) {
|
||||
reg_arg(short_flag, long_flag, make_parse_cfg<TTypedParser>(&dst), help);
|
||||
}
|
||||
// Parse arguments. Arguments will be matched against argument parsers
|
||||
// registered before.
|
||||
extern void parse_args(int argc, const char** argv);
|
||||
|
||||
|
||||
//
|
||||
// Parsers.
|
||||
//
|
||||
|
||||
|
||||
template<typename T>
|
||||
struct TypedArgumentParser {
|
||||
typedef struct {} arg_ty;
|
||||
// Number of argument entries needed for this argument.
|
||||
static const uint32_t narg = -1;
|
||||
// Parser function. Convert the literal in the first parameter into structured
|
||||
// native representation. Return `true` on success.
|
||||
static bool parse(const char* lit[], void* dst) {
|
||||
return false;
|
||||
}
|
||||
static std::string lit(const void* src) {
|
||||
return {};
|
||||
}
|
||||
};
|
||||
template<>
|
||||
struct TypedArgumentParser<std::string> {
|
||||
typedef std::string arg_ty;
|
||||
static const uint32_t narg = 1;
|
||||
static bool parse(const char* lit[], void* dst) {
|
||||
*(std::string*)dst = lit[0];
|
||||
return true;
|
||||
}
|
||||
static std::string lit(const void* src) {
|
||||
return *(const std::string*)src;
|
||||
}
|
||||
};
|
||||
template<>
|
||||
struct TypedArgumentParser<int32_t> {
|
||||
typedef int arg_ty;
|
||||
static const uint32_t narg = 1;
|
||||
static bool parse(const char* lit[], void* dst) {
|
||||
*(int32_t*)dst = std::atoi(lit[0]);
|
||||
return true;
|
||||
}
|
||||
static std::string lit(const void* src) {
|
||||
return std::to_string(*(const arg_ty*)src);
|
||||
}
|
||||
};
|
||||
template<>
|
||||
struct TypedArgumentParser<uint32_t> {
|
||||
typedef uint32_t arg_ty;
|
||||
static const uint32_t narg = 1;
|
||||
static bool parse(const char* lit[], void* dst) {
|
||||
*(uint32_t*)dst = std::atoi(lit[0]);
|
||||
return true;
|
||||
}
|
||||
static std::string lit(const void* src) {
|
||||
return std::to_string(*(const arg_ty*)src);
|
||||
}
|
||||
};
|
||||
template<>
|
||||
struct TypedArgumentParser<float> {
|
||||
typedef float arg_ty;
|
||||
static const uint32_t narg = 1;
|
||||
static bool parse(const char* lit[], void* dst) {
|
||||
*(float*)dst = std::atof(lit[0]);
|
||||
return true;
|
||||
}
|
||||
static std::string lit(const void* src) {
|
||||
return std::to_string(*(const arg_ty*)src);
|
||||
}
|
||||
};
|
||||
// NOTE: This is used for arguments like `-f true` and `-f false`. If you need a
|
||||
// boolean argument that don't need to be set explicitly. Use
|
||||
// `SwitchArgumentParser` instead.
|
||||
template<>
|
||||
struct TypedArgumentParser<bool> {
|
||||
typedef bool arg_ty;
|
||||
static const uint32_t narg = 1;
|
||||
static bool parse(const char* lit[], void* dst) {
|
||||
if (strcmp(lit[0], "true") == 0 || strcmp(lit[0], "True") == 0) {
|
||||
*(bool*)dst = true;
|
||||
return true;
|
||||
} else if (strcmp(lit[0], "false") == 0 || strcmp(lit[0], "False") == 0) {
|
||||
*(bool*)dst = false;
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
static std::string lit(const void* src) {
|
||||
if (*(const arg_ty*)src) {
|
||||
return "true";
|
||||
} else {
|
||||
return "false";
|
||||
}
|
||||
}
|
||||
};
|
||||
struct SwitchArgumentParser {
|
||||
typedef bool arg_ty;
|
||||
static const uint32_t narg = 0;
|
||||
static bool parse(const char* lit[], void* dst) {
|
||||
*(bool*)dst = true;
|
||||
return true;
|
||||
}
|
||||
static std::string lit(const void* src) {
|
||||
return {};
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
using IntParser = TypedArgumentParser<int32_t>;
|
||||
using UintParser = TypedArgumentParser<uint32_t>;
|
||||
using FloatParser = TypedArgumentParser<float>;
|
||||
using BoolParser = TypedArgumentParser<bool>;
|
||||
using StringParser = TypedArgumentParser<std::string>;
|
||||
using SwitchParser = SwitchArgumentParser;
|
||||
|
||||
} // namespace args
|
||||
|
||||
} // namespace archprobe
|
|
@ -0,0 +1,39 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
// NOTE: This is a modified excerpt of
|
||||
// https://github.com/PENGUINLIONG/graphi-t/blob/d291c3d1ce3795fe4b305e5efd76b4f586d23e3b/include/assert.hpp;
|
||||
// MIT-licensed by Rendong Liang.
|
||||
|
||||
// Assertion.
|
||||
// @PENGUINLIONG
|
||||
#pragma once
|
||||
#include "util.hpp"
|
||||
#undef assert
|
||||
|
||||
namespace archprobe {
|
||||
|
||||
class AssertionFailedException : public std::exception {
|
||||
std::string msg;
|
||||
public:
|
||||
AssertionFailedException(const std::string& msg);
|
||||
|
||||
const char* what() const noexcept override;
|
||||
};
|
||||
|
||||
template<typename ... TArgs>
|
||||
inline void assert(bool pred, const TArgs& ... args) {
|
||||
if (!pred) {
|
||||
throw AssertionFailedException(util::format(args ...));
|
||||
}
|
||||
}
|
||||
template<typename ... TArgs>
|
||||
inline void panic(const TArgs& ... args) {
|
||||
assert<TArgs ...>(false, args ...);
|
||||
}
|
||||
template<typename ... TArgs>
|
||||
inline void unreachable(const TArgs& ... args) {
|
||||
assert<const char*, TArgs ...>(false, "reached unreachable code: ", args ...);
|
||||
}
|
||||
|
||||
} // namespace archprobe
|
|
@ -0,0 +1,166 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
// OpenCL wrappings
|
||||
// @PENGUINLIONG
|
||||
#pragma once
|
||||
#define CL_TARGET_OPENCL_VERSION 200
|
||||
#define CL_HPP_TARGET_OPENCL_VERSION CL_TARGET_OPENCL_VERSION
|
||||
#include <CL/opencl.hpp>
|
||||
|
||||
namespace archprobe {
|
||||
|
||||
class CLException : public std::exception {
|
||||
const char* msg;
|
||||
public:
|
||||
CLException(cl_int code);
|
||||
|
||||
const char* what() const noexcept override;
|
||||
};
|
||||
struct CLAssert {
|
||||
inline const CLAssert& operator<<(cl_int code) const {
|
||||
if (code != CL_SUCCESS) { throw CLException(code); }
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
#define CL_ASSERT (::archprobe::CLAssert{})
|
||||
|
||||
struct DeviceStub {
|
||||
cl_platform_id platform_id;
|
||||
cl_device_id dev_id;
|
||||
std::string platform_exts;
|
||||
std::string dev_exts;
|
||||
std::string desc;
|
||||
};
|
||||
|
||||
extern std::vector<DeviceStub> dev_stubs;
|
||||
|
||||
void initialize();
|
||||
std::string desc_dev(uint32_t idx);
|
||||
cl::Device select_dev(uint32_t idev);
|
||||
cl::Context create_ctxt(const cl::Device& dev);
|
||||
cl::CommandQueue create_cmd_queue(const cl::Context& ctxt);
|
||||
cl::Program create_program(
|
||||
const cl::Device& dev,
|
||||
const cl::Context& ctxt,
|
||||
const char* src,
|
||||
const char* build_opts
|
||||
);
|
||||
inline cl::Program create_program(
|
||||
const cl::Device& dev,
|
||||
const cl::Context& ctxt,
|
||||
const std::string& src,
|
||||
const std::string& build_opts
|
||||
) {
|
||||
return create_program(dev, ctxt, src.c_str(), build_opts.c_str());
|
||||
}
|
||||
cl::Kernel create_kernel(cl::Program program, const std::string& kernel_name);
|
||||
double bench_kernel(
|
||||
const cl::CommandQueue& cmd_queue,
|
||||
const cl::Kernel& kernel,
|
||||
const cl::NDRange& local_size,
|
||||
const cl::NDRange& global_size,
|
||||
uint32_t niter
|
||||
);
|
||||
|
||||
cl::Image2D create_img_2d(
|
||||
const cl::Context& ctxt,
|
||||
cl_mem_flags mem_flags,
|
||||
cl::ImageFormat img_fmt,
|
||||
uint32_t width,
|
||||
uint32_t height
|
||||
);
|
||||
inline cl::Image2D create_img_2d(
|
||||
const cl::Context& ctxt,
|
||||
cl_mem_flags mem_flags,
|
||||
cl_channel_order channel_order,
|
||||
cl_channel_type channel_type,
|
||||
uint32_t width,
|
||||
uint32_t height
|
||||
) {
|
||||
cl::ImageFormat img_fmt(channel_order, channel_type);
|
||||
return create_img_2d(ctxt, mem_flags, img_fmt, width, height);
|
||||
}
|
||||
cl::Image1D create_img_1d(
|
||||
const cl::Context& ctxt,
|
||||
cl_mem_flags mem_flags,
|
||||
cl::ImageFormat img_fmt,
|
||||
uint32_t width
|
||||
);
|
||||
inline cl::Image1D create_img_1d(
|
||||
const cl::Context& ctxt,
|
||||
cl_mem_flags mem_flags,
|
||||
cl_channel_order channel_order,
|
||||
cl_channel_type channel_type,
|
||||
uint32_t width
|
||||
) {
|
||||
cl::ImageFormat img_fmt(channel_order, channel_type);
|
||||
return create_img_1d(ctxt, mem_flags, img_fmt, width);
|
||||
}
|
||||
cl::Buffer create_buf(
|
||||
const cl::Context& ctxt,
|
||||
cl_mem_flags mem_flags,
|
||||
size_t size
|
||||
);
|
||||
|
||||
|
||||
|
||||
struct MapImage {
|
||||
void* data;
|
||||
size_t width;
|
||||
size_t height;
|
||||
size_t depth;
|
||||
size_t row_pitch;
|
||||
size_t slice_pitch;
|
||||
|
||||
operator void*() const {
|
||||
return data;
|
||||
}
|
||||
};
|
||||
struct MapBuffer {
|
||||
void* data;
|
||||
size_t size;
|
||||
|
||||
operator void*() const {
|
||||
return data;
|
||||
}
|
||||
};
|
||||
|
||||
MapImage map_img_2d(
|
||||
const cl::CommandQueue& cmd_queue,
|
||||
const cl::Image2D& img
|
||||
);
|
||||
void unmap_img_2d(
|
||||
const cl::CommandQueue& cmd_queue,
|
||||
const cl::Image2D& img,
|
||||
MapImage& mapped
|
||||
);
|
||||
|
||||
MapImage map_img_1d(
|
||||
const cl::CommandQueue& cmd_queue,
|
||||
const cl::Image1D& img
|
||||
);
|
||||
void unmap_img_1d(
|
||||
const cl::CommandQueue& cmd_queue,
|
||||
const cl::Image1D& img,
|
||||
MapImage& mapped
|
||||
);
|
||||
|
||||
MapBuffer map_buf(
|
||||
const cl::CommandQueue& cmd_queue,
|
||||
const cl::Buffer& buf,
|
||||
size_t offset,
|
||||
size_t size
|
||||
);
|
||||
MapBuffer map_buf(
|
||||
const cl::CommandQueue& cmd_queue,
|
||||
const cl::Buffer& buf
|
||||
);
|
||||
void unmap_buf(
|
||||
const cl::CommandQueue& cmd_queue,
|
||||
const cl::Buffer& buf,
|
||||
MapBuffer& mapped
|
||||
);
|
||||
|
||||
|
||||
} // namespace archprobe
|
|
@ -0,0 +1,203 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
// NOTE: This is a modified excerpt of
|
||||
// https://github.com/PENGUINLIONG/graphi-t/blob/da31ec530df07c9899e056eeced08a64062dcfce/include/json.hpp;
|
||||
// MIT-licensed by Rendong Liang.
|
||||
|
||||
// JSON serialization/deserialization.
|
||||
// @PENGUINLIONG
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <sstream>
|
||||
|
||||
namespace archprobe {
|
||||
namespace json {
|
||||
|
||||
// Any error occured during JSON serialization/deserialization.
|
||||
class JsonException : public std::exception {
|
||||
private:
|
||||
std::string msg;
|
||||
public:
|
||||
JsonException(const char* msg);
|
||||
const char* what() const noexcept override;
|
||||
};
|
||||
|
||||
// Type of JSON value.
|
||||
enum JsonType {
|
||||
L_JSON_NULL,
|
||||
L_JSON_BOOLEAN,
|
||||
L_JSON_NUMBER,
|
||||
L_JSON_STRING,
|
||||
L_JSON_OBJECT,
|
||||
L_JSON_ARRAY,
|
||||
};
|
||||
|
||||
struct JsonValue;
|
||||
|
||||
class JsonElementEnumerator {
|
||||
std::vector<JsonValue>::const_iterator beg_, end_;
|
||||
public:
|
||||
JsonElementEnumerator(const std::vector<JsonValue>& arr) :
|
||||
beg_(arr.cbegin()), end_(arr.cend()) {}
|
||||
|
||||
std::vector<JsonValue>::const_iterator begin() const {
|
||||
return beg_;
|
||||
}
|
||||
std::vector<JsonValue>::const_iterator end() const {
|
||||
return end_;
|
||||
}
|
||||
};
|
||||
|
||||
class JsonFieldEnumerator {
|
||||
std::map<std::string, JsonValue>::const_iterator beg_, end_;
|
||||
public:
|
||||
JsonFieldEnumerator(const std::map<std::string, JsonValue>& obj) :
|
||||
beg_(obj.cbegin()), end_(obj.cend()) {}
|
||||
|
||||
std::map<std::string, JsonValue>::const_iterator begin() const {
|
||||
return beg_;
|
||||
}
|
||||
std::map<std::string, JsonValue>::const_iterator end() const {
|
||||
return end_;
|
||||
}
|
||||
};
|
||||
|
||||
// JSON array builder.
|
||||
struct JsonArray {
|
||||
std::vector<JsonValue> inner;
|
||||
|
||||
inline JsonArray() : inner() {}
|
||||
JsonArray(std::initializer_list<JsonValue>&& elems);
|
||||
};
|
||||
// JSON object builder.
|
||||
struct JsonObject {
|
||||
std::map<std::string, JsonValue> inner;
|
||||
|
||||
inline JsonObject() : inner() {}
|
||||
JsonObject(
|
||||
std::initializer_list<std::pair<const std::string, JsonValue>>&& entries
|
||||
);
|
||||
};
|
||||
|
||||
// Represent a abstract value in JSON representation.
|
||||
struct JsonValue {
|
||||
JsonType ty;
|
||||
bool b;
|
||||
double num;
|
||||
std::string str;
|
||||
std::map<std::string, JsonValue> obj;
|
||||
std::vector<JsonValue> arr;
|
||||
|
||||
inline JsonValue() : ty(L_JSON_NULL) {}
|
||||
inline JsonValue(nullptr_t) : ty(L_JSON_NULL) {}
|
||||
inline JsonValue(bool b) : ty(L_JSON_BOOLEAN), b(b) {}
|
||||
inline JsonValue(double num) : ty(L_JSON_NUMBER), num(num) {}
|
||||
inline JsonValue(float num) : ty(L_JSON_NUMBER), num(num) {}
|
||||
inline JsonValue(int num) : ty(L_JSON_NUMBER), num(num) {}
|
||||
inline JsonValue(unsigned int num) : ty(L_JSON_NUMBER), num(num) {}
|
||||
inline JsonValue(long num) : ty(L_JSON_NUMBER), num(num) {}
|
||||
inline JsonValue(unsigned long num) : ty(L_JSON_NUMBER), num(num) {}
|
||||
inline JsonValue(const char* str) : ty(L_JSON_STRING), str(str) {}
|
||||
inline JsonValue(const std::string& str) : ty(L_JSON_STRING), str(str) {}
|
||||
inline JsonValue(std::string&& str) :
|
||||
ty(L_JSON_STRING),
|
||||
str(std::forward<std::string>(str)) {}
|
||||
JsonValue(JsonObject&& obj);
|
||||
JsonValue(JsonArray&& arr);
|
||||
|
||||
inline JsonValue& operator[](const char* key) {
|
||||
if (!is_obj()) { throw JsonException("value is not an object"); }
|
||||
return obj.at(key);
|
||||
}
|
||||
inline const JsonValue& operator[](const char* key) const {
|
||||
if (!is_obj()) { throw JsonException("value is not an object"); }
|
||||
return obj.at(key);
|
||||
}
|
||||
inline JsonValue& operator[](const std::string& key) {
|
||||
if (!is_obj()) { throw JsonException("value is not an object"); }
|
||||
return obj.at(key);
|
||||
}
|
||||
inline const JsonValue& operator[](const std::string& key) const {
|
||||
if (!is_obj()) { throw JsonException("value is not an object"); }
|
||||
return obj.at(key);
|
||||
}
|
||||
inline JsonValue& operator[](size_t i) {
|
||||
if (!is_arr()) { throw JsonException("value is not an array"); }
|
||||
return arr.at(i);
|
||||
}
|
||||
inline const JsonValue& operator[](size_t i) const {
|
||||
if (!is_arr()) { throw JsonException("value is not an array"); }
|
||||
return arr.at(i);
|
||||
}
|
||||
inline operator bool() const {
|
||||
if (!is_bool()) { throw JsonException("value is not a bool"); }
|
||||
return b;
|
||||
}
|
||||
inline operator double() const {
|
||||
if (!is_num()) { throw JsonException("value is not a number"); }
|
||||
return num;
|
||||
}
|
||||
inline operator float() const {
|
||||
if (!is_num()) { throw JsonException("value is not a number"); }
|
||||
return (float)num;
|
||||
}
|
||||
inline operator int() const {
|
||||
if (!is_num()) { throw JsonException("value is not a number"); }
|
||||
return (int)num;
|
||||
}
|
||||
inline operator unsigned int() const {
|
||||
if (!is_num()) { throw JsonException("value is not a number"); }
|
||||
return (unsigned int)num;
|
||||
}
|
||||
inline operator long() const {
|
||||
if (!is_num()) { throw JsonException("value is not a number"); }
|
||||
return (long)num;
|
||||
}
|
||||
inline operator unsigned long() const {
|
||||
if (!is_num()) { throw JsonException("value is not a number"); }
|
||||
return (unsigned long)num;
|
||||
}
|
||||
inline operator std::string() const {
|
||||
if (!is_str()) { throw JsonException("value is not a string"); }
|
||||
return str;
|
||||
}
|
||||
|
||||
inline bool is_null() const { return ty == L_JSON_NULL; }
|
||||
inline bool is_bool() const { return ty == L_JSON_BOOLEAN; }
|
||||
inline bool is_num() const { return ty == L_JSON_NUMBER; }
|
||||
inline bool is_str() const { return ty == L_JSON_STRING; }
|
||||
inline bool is_obj() const { return ty == L_JSON_OBJECT; }
|
||||
inline bool is_arr() const { return ty == L_JSON_ARRAY; }
|
||||
|
||||
inline size_t size() const {
|
||||
if (is_obj()) {
|
||||
return obj.size();
|
||||
} else if (is_arr()) {
|
||||
return arr.size();
|
||||
} else {
|
||||
throw JsonException("only object and array can have size");
|
||||
}
|
||||
}
|
||||
inline JsonElementEnumerator elems() const {
|
||||
return JsonElementEnumerator(arr);
|
||||
}
|
||||
inline JsonFieldEnumerator fields() const {
|
||||
return JsonFieldEnumerator(obj);
|
||||
}
|
||||
};
|
||||
|
||||
// Parse JSON literal into and `JsonValue` object. If the JSON is invalid or
|
||||
// unsupported, `JsonException` will be raised.
|
||||
JsonValue parse(const std::string& json_lit);
|
||||
// Returns true when JSON parsing successfully finished and parsed value is
|
||||
// returned via `out`. Otherwise, false is returned and out contains incomplete
|
||||
// result.
|
||||
bool try_parse(const std::string& json_lit, JsonValue& out);
|
||||
|
||||
std::string print(const JsonValue& json);
|
||||
|
||||
} // namespace json
|
||||
} // namespace archprobe
|
|
@ -0,0 +1,70 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
// NOTE: This is a modified excerpt of
|
||||
// https://github.com/PENGUINLIONG/graphi-t/blob/0e3c1394b493db3e3d5b443c869545cac712827a/include/log.hpp;
|
||||
// MIT-licensed by Rendong Liang.
|
||||
|
||||
// Logging infrastructure.
|
||||
// @PENGUINLIONG
|
||||
#pragma once
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
#include "util.hpp"
|
||||
|
||||
|
||||
namespace archprobe {
|
||||
|
||||
namespace log {
|
||||
// Logging infrastructure.
|
||||
|
||||
enum class LogLevel {
|
||||
L_LOG_LEVEL_DEBUG,
|
||||
L_LOG_LEVEL_INFO,
|
||||
L_LOG_LEVEL_WARNING,
|
||||
L_LOG_LEVEL_ERROR,
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
extern void (*log_callback)(LogLevel lv, const std::string& msg);
|
||||
extern LogLevel filter_lv;
|
||||
extern uint32_t indent;
|
||||
|
||||
} // namespace detail
|
||||
|
||||
void set_log_callback(decltype(detail::log_callback) cb);
|
||||
void set_log_filter_level(LogLevel lv);
|
||||
template<typename ... TArgs>
|
||||
void log(LogLevel lv, const TArgs& ... msg) {
|
||||
if (detail::log_callback != nullptr && lv >= detail::filter_lv) {
|
||||
std::string indent(detail::indent, ' ');
|
||||
detail::log_callback(lv, util::format(indent, msg...));
|
||||
}
|
||||
}
|
||||
|
||||
void push_indent();
|
||||
void pop_indent();
|
||||
|
||||
template<typename ... TArgs>
|
||||
inline void debug(const TArgs& ... msg) {
|
||||
log(LogLevel::L_LOG_LEVEL_DEBUG, msg...);
|
||||
}
|
||||
template<typename ... TArgs>
|
||||
inline void info(const TArgs& ... msg) {
|
||||
log(LogLevel::L_LOG_LEVEL_INFO, msg...);
|
||||
}
|
||||
template<typename ... TArgs>
|
||||
inline void warn(const TArgs& ... msg) {
|
||||
log(LogLevel::L_LOG_LEVEL_WARNING, msg...);
|
||||
}
|
||||
template<typename ... TArgs>
|
||||
inline void error(const TArgs& ... msg) {
|
||||
log(LogLevel::L_LOG_LEVEL_ERROR, msg...);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} // namespace archprobe
|
|
@ -0,0 +1,269 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
// Tools for statistics.
|
||||
// @PENGUINLIONG
|
||||
#pragma once
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
#include <algorithm>
|
||||
|
||||
namespace archprobe {
|
||||
|
||||
namespace stats {
|
||||
|
||||
template<typename T>
|
||||
class MinStats {
|
||||
T mn_ = std::numeric_limits<T>::max();
|
||||
public:
|
||||
typedef T value_t;
|
||||
|
||||
// Returns true if the value has been updated.
|
||||
bool push(T value) {
|
||||
if (mn_ > value) {
|
||||
mn_ = value;
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
inline bool has_value() const {
|
||||
return mn_ != std::numeric_limits<T>::max();
|
||||
}
|
||||
operator T() const {
|
||||
if (!has_value()) {
|
||||
archprobe::log::warn("`MinStats` has not collected any data yet");
|
||||
}
|
||||
return mn_;
|
||||
}
|
||||
friend std::ostream& operator <<(std::ostream& out, const MinStats<T>& x) {
|
||||
out << (T)(x);
|
||||
return out;
|
||||
}
|
||||
};
|
||||
template<typename T>
|
||||
class MaxStats {
|
||||
T mx_ = -std::numeric_limits<T>::max();
|
||||
public:
|
||||
typedef T value_t;
|
||||
|
||||
// Returns true if the value has been updated.
|
||||
bool push(T value) {
|
||||
if (mx_ < value) {
|
||||
mx_ = value;
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
inline bool has_value() const {
|
||||
return mx_ != -std::numeric_limits<T>::max();
|
||||
}
|
||||
operator T() const {
|
||||
if (!has_value()) {
|
||||
archprobe::log::warn("`MaxStats` has not collected any data yet");
|
||||
}
|
||||
return mx_;
|
||||
}
|
||||
friend std::ostream& operator <<(std::ostream& out, const MaxStats<T>& x) {
|
||||
out << (T)(x);
|
||||
return out;
|
||||
}
|
||||
};
|
||||
template<typename T>
|
||||
class AvgStats {
|
||||
T sum_ = 0;
|
||||
uint64_t n_ = 0;
|
||||
public:
|
||||
typedef T value_t;
|
||||
|
||||
void push(T value) {
|
||||
sum_ += value;
|
||||
n_ += 1;
|
||||
}
|
||||
inline bool has_value() const {
|
||||
return n_ != 0;
|
||||
}
|
||||
operator T() const {
|
||||
if (!has_value()) {
|
||||
archprobe::log::warn("`AvgStats` has not collected any data yet");
|
||||
}
|
||||
return sum_ / n_;
|
||||
}
|
||||
friend std::ostream& operator <<(std::ostream& out, const AvgStats<T>& x) {
|
||||
out << (T)(x);
|
||||
return out;
|
||||
}
|
||||
};
|
||||
template<typename T, size_t NTap>
|
||||
class NTapAvgStats {
|
||||
std::array<double, NTap> hist_;
|
||||
size_t cur_idx_;
|
||||
bool ready_;
|
||||
public:
|
||||
typedef T value_t;
|
||||
|
||||
void push(T value) {
|
||||
hist_[cur_idx_++] = value;
|
||||
if (cur_idx_ >= NTap) {
|
||||
cur_idx_ = 0;
|
||||
ready_ = true;
|
||||
}
|
||||
}
|
||||
inline bool has_value() const {
|
||||
return ready_;
|
||||
}
|
||||
operator T() const {
|
||||
if (!has_value()) {
|
||||
archprobe::log::warn("`NTapStats` has not collected any data yet");
|
||||
}
|
||||
|
||||
double out = 0.0;
|
||||
for (double x : hist_) {
|
||||
out += x;
|
||||
}
|
||||
out /= NTap;
|
||||
return out;
|
||||
}
|
||||
friend std::ostream& operator <<(
|
||||
std::ostream& out,
|
||||
const NTapAvgStats<T, NTap>& x
|
||||
) {
|
||||
out << (T)(x);
|
||||
return out;
|
||||
}
|
||||
};
|
||||
template<typename T>
|
||||
class StdStats {
|
||||
AvgStats<T> avg_ {};
|
||||
std::vector<T> values_ {};
|
||||
public:
|
||||
typedef T value_t;
|
||||
|
||||
void push(T value) {
|
||||
avg_.push(value);
|
||||
values_.push_back(value);
|
||||
}
|
||||
inline bool has_value() const {
|
||||
return avg_.has_value();
|
||||
}
|
||||
operator T() const {
|
||||
if (!has_value()) {
|
||||
archprobe::log::warn("`StdStats` has not collected any data yet");
|
||||
}
|
||||
T avg = avg_;
|
||||
T sqr_sum = 0;
|
||||
for (auto value : values_) {
|
||||
auto temp = value - avg;
|
||||
sqr_sum += temp * temp;
|
||||
}
|
||||
return std::sqrt(sqr_sum / values_.size());
|
||||
}
|
||||
friend std::ostream& operator <<(std::ostream& out, const StdStats<T>& x) {
|
||||
out << (T)(x);
|
||||
return out;
|
||||
}
|
||||
};
|
||||
template<typename T>
|
||||
class MedianStats {
|
||||
std::vector<T> values_ {};
|
||||
public:
|
||||
typedef T value_t;
|
||||
|
||||
void push(T value) {
|
||||
values_.push_back(value);
|
||||
}
|
||||
inline bool has_value() const {
|
||||
return !values_.empty();
|
||||
}
|
||||
operator T() {
|
||||
if (!has_value()) {
|
||||
archprobe::log::warn("`MedianStats` has not collected any data yet");
|
||||
}
|
||||
std::sort(values_.begin(), values_.end());
|
||||
size_t imid = values_.size() / 2;
|
||||
if (values_.size() & 1) {
|
||||
return values_[imid];
|
||||
} else {
|
||||
return (values_[imid] + values_[imid + 1]) / 2;
|
||||
}
|
||||
}
|
||||
friend std::ostream& operator <<(std::ostream& out, const MedianStats<T>& x) {
|
||||
out << (T)(x);
|
||||
return out;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<typename TStats>
|
||||
class GeomDeltaStats {
|
||||
TStats stats_ {};
|
||||
bool has_ratio_ = false;
|
||||
typename TStats::value_t ratio_ {};
|
||||
public:
|
||||
typedef typename TStats::value_t value_t;
|
||||
|
||||
void push(value_t value) {
|
||||
if (stats_.has_value()) {
|
||||
ratio_ = value / (value_t)stats_;
|
||||
has_ratio_ = true;
|
||||
}
|
||||
stats_.push(value);
|
||||
}
|
||||
inline bool has_value() const {
|
||||
return has_ratio_;
|
||||
}
|
||||
operator value_t() const {
|
||||
if (!has_value()) {
|
||||
archprobe::log::warn("`GeomDeltaStats` has not collected enough data yet");
|
||||
}
|
||||
return ratio_;
|
||||
}
|
||||
friend std::ostream& operator <<(
|
||||
std::ostream& out,
|
||||
const GeomDeltaStats<TStats>& x
|
||||
) {
|
||||
if (x.has_value()) {
|
||||
out << (typename TStats::value_t)(x.ratio_);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
};
|
||||
template<typename TStats>
|
||||
class ArithDeltaStats {
|
||||
TStats stats_ {};
|
||||
bool has_delta_ = false;
|
||||
typename TStats::value_t delta_ {};
|
||||
public:
|
||||
typedef typename TStats::value_t value_t;
|
||||
|
||||
void push(value_t value) {
|
||||
if (stats_.has_value()) {
|
||||
delta_ = value - (value_t)stats_;
|
||||
has_delta_ = true;
|
||||
}
|
||||
stats_.push(value);
|
||||
}
|
||||
inline bool has_value() const {
|
||||
return has_delta_;
|
||||
}
|
||||
operator value_t() const {
|
||||
if (!has_value()) {
|
||||
archprobe::log::warn("`ArithDeltaStats` has not collected enough data yet");
|
||||
}
|
||||
return delta_;
|
||||
}
|
||||
friend std::ostream& operator <<(
|
||||
std::ostream& out,
|
||||
const ArithDeltaStats<TStats>& x
|
||||
) {
|
||||
if (x.has_value()) {
|
||||
out << (typename TStats::value_t)(x.delta_);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace stats
|
||||
|
||||
} // namespace archprobe
|
|
@ -0,0 +1,40 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
// Numeric data table.
|
||||
// @PENGUINLIONG
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
#include <iomanip>
|
||||
#include "assert.hpp"
|
||||
|
||||
namespace archprobe {
|
||||
namespace table {
|
||||
|
||||
struct Table {
|
||||
std::vector<std::string> headers;
|
||||
std::vector<std::vector<double>> rows;
|
||||
|
||||
template<typename ... THeaders>
|
||||
Table(THeaders&& ... headers) :
|
||||
Table(std::vector<std::string> { std::string(headers) ... }) {}
|
||||
Table(std::vector<std::string>&& headers);
|
||||
Table(
|
||||
std::vector<std::string>&& headers,
|
||||
std::vector<std::vector<double>>&& rows);
|
||||
|
||||
template<typename ... TArgs>
|
||||
void push(TArgs&& ... values) {
|
||||
std::vector<double> row { (double)values ... };
|
||||
archprobe::assert(row.size() == headers.size(),
|
||||
"row length mismatches header length");
|
||||
rows.emplace_back(std::move(row));
|
||||
}
|
||||
|
||||
std::string to_csv(uint32_t nsig_digit = 6) const;
|
||||
static Table from_csv(std::string csv);
|
||||
};
|
||||
|
||||
} // namespace table
|
||||
} // namespace archprobe
|
|
@ -0,0 +1,61 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
// NOTE: This is a modified exerpt of
|
||||
// https://github.com/PENGUINLIONG/graphi-t/blob/da31ec530df07c9899e056eeced08a64062dcfce/include/util.hpp;
|
||||
// MIT-licensed by Rendong Liang.
|
||||
|
||||
// HAL independent utilities.
|
||||
// @PENGUINLIONG
|
||||
#pragma once
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
|
||||
namespace archprobe {
|
||||
|
||||
namespace util {
|
||||
|
||||
namespace {
|
||||
|
||||
template<typename ... TArgs>
|
||||
struct format_impl_t;
|
||||
template<>
|
||||
struct format_impl_t<> {
|
||||
static inline void format_impl(std::stringstream& ss) {}
|
||||
};
|
||||
template<typename T>
|
||||
struct format_impl_t<T> {
|
||||
static inline void format_impl(std::stringstream& ss, const T& x) {
|
||||
ss << x;
|
||||
}
|
||||
};
|
||||
template<typename T, typename ... TArgs>
|
||||
struct format_impl_t<T, TArgs ...> {
|
||||
static inline void format_impl(std::stringstream& ss, const T& x, const TArgs& ... others) {
|
||||
format_impl_t<T>::format_impl(ss, x);
|
||||
format_impl_t<TArgs...>::format_impl(ss, others...);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
template<typename ... TArgs>
|
||||
inline std::string format(const TArgs& ... args) {
|
||||
std::stringstream ss {};
|
||||
format_impl_t<TArgs...>::format_impl(ss, args...);
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
|
||||
|
||||
extern std::vector<uint8_t> load_file(const char* path);
|
||||
extern std::string load_text(const char* path);
|
||||
extern void save_file(const char* path, const void* data, size_t size);
|
||||
extern void save_text(const char* path, const std::string& txt);
|
||||
|
||||
} // namespace util
|
||||
|
||||
} // namespace archprobe
|
Двоичный файл не отображается.
После Ширина: | Высота: | Размер: 75 KiB |
|
@ -0,0 +1,5 @@
|
|||
if (-not(Test-Path tmp)) {
|
||||
New-Item -ItemType Directory tmp
|
||||
}
|
||||
|
||||
adb pull /data/local/tmp/gpu-testbench ./tmp/
|
|
@ -0,0 +1,5 @@
|
|||
if (Test-Path tmp/gpu-testbench) {
|
||||
Remove-Item tmp/gpu-testbench -Recurse -Force
|
||||
}
|
||||
|
||||
adb shell rm -r /data/local/tmp/gpu-testbench
|
|
@ -0,0 +1,45 @@
|
|||
param(
|
||||
[switch] $Verbose,
|
||||
[switch] $BuildOnly,
|
||||
[string] $ClearAspect
|
||||
)
|
||||
|
||||
if (-not(Test-Path "build-android-aarch64")) {
|
||||
New-Item "build-android-aarch64" -ItemType Directory
|
||||
}
|
||||
|
||||
$NdkHome = $null
|
||||
if ($env:ANDROID_NDK -ne $null) {
|
||||
$NdkHome = $env:ANDROID_NDK
|
||||
}
|
||||
if ($env:ANDROID_NDK_HOME -ne $null) {
|
||||
$NdkHome = $env:ANDROID_NDK_HOME
|
||||
}
|
||||
|
||||
if ($NdkHome -eq $null) {
|
||||
Write-Host "Couldn't find `ANDROID_NDK` in environment variables. Is NDK installed?"
|
||||
return -1
|
||||
}
|
||||
|
||||
Push-Location "build-android-aarch64"
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE="$NdkHome/build/cmake/android.toolchain.cmake" -DANDROID_ABI="arm64-v8a" -DANDROID_PLATFORM=android-28 -G "Ninja" ..
|
||||
cmake --build . -t ArchProbe
|
||||
Pop-Location
|
||||
|
||||
if ($BuildOnly) {
|
||||
return
|
||||
}
|
||||
|
||||
$Args = ""
|
||||
if ($Verbose) {
|
||||
$Args += "-v "
|
||||
}
|
||||
if ($ClearAspect) {
|
||||
$Args += "-c $ClearAspect "
|
||||
}
|
||||
|
||||
adb reconnect offline
|
||||
adb push ./build-android-aarch64/assets/ /data/local/tmp/gpu-testbench/
|
||||
adb push ./build-android-aarch64/bin/ /data/local/tmp/gpu-testbench/
|
||||
adb shell chmod 777 /data/local/tmp/gpu-testbench/bin/ArchProbe
|
||||
adb shell "cd /data/local/tmp/gpu-testbench/bin && ./ArchProbe $Args"
|
|
@ -0,0 +1,131 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
// NOTE: This is a modified excerpt of
|
||||
// https://github.com/PENGUINLIONG/graphi-t/blob/da31ec530df07c9899e056eeced08a64062dcfce/src/args.cpp;
|
||||
// MIT-licensed by Rendong Liang.
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include "args.hpp"
|
||||
#include "assert.hpp"
|
||||
|
||||
namespace archprobe {
|
||||
|
||||
namespace args {
|
||||
|
||||
struct ArgumentHelp {
|
||||
std::string short_flag;
|
||||
std::string long_flag;
|
||||
std::string help;
|
||||
};
|
||||
struct ArgumentConfig {
|
||||
std::string app_name = "[APPNAME]";
|
||||
std::string desc;
|
||||
// Short flag name -> ID.
|
||||
std::map<char, size_t> short_map;
|
||||
// Long flag name -> ID.
|
||||
std::map<std::string, size_t> long_map;
|
||||
// Argument parsing info.
|
||||
std::vector<ArgumentParseConfig> parse_cfgs;
|
||||
// Argument help info.
|
||||
std::vector<ArgumentHelp> helps;
|
||||
} arg_cfg;
|
||||
|
||||
|
||||
|
||||
void init_arg_parse(const char* app_name, const char* desc) {
|
||||
arg_cfg.app_name = app_name;
|
||||
arg_cfg.desc = desc;
|
||||
}
|
||||
const char* get_app_name() {
|
||||
return arg_cfg.app_name.c_str();
|
||||
}
|
||||
void print_help() {
|
||||
std::cout << "usage: " << arg_cfg.app_name << " [OPTIONS]" << std::endl;
|
||||
if (!arg_cfg.desc.empty()) {
|
||||
std::cout << arg_cfg.desc << std::endl;
|
||||
}
|
||||
for (const auto& help : arg_cfg.helps) {
|
||||
std::cout << help.short_flag << "\t"
|
||||
<< help.long_flag << "\t\t"
|
||||
<< help.help << std::endl;
|
||||
}
|
||||
std::cout << "-h\t--help\t\tPrint this message." << std::endl;
|
||||
std::exit(0);
|
||||
}
|
||||
void report_unknown_arg(const char* arg) {
|
||||
std::cout << "unknown argument: " << arg << std::endl;
|
||||
print_help();
|
||||
}
|
||||
|
||||
void reg_arg(
|
||||
const char* short_flag,
|
||||
const char* long_flag,
|
||||
const ArgumentParseConfig& parse_cfg,
|
||||
const char* help
|
||||
) {
|
||||
using std::strlen;
|
||||
size_t i = arg_cfg.parse_cfgs.size();
|
||||
if (strlen(short_flag) == 2 && short_flag[0] == '-') {
|
||||
arg_cfg.short_map[short_flag[1]] = i;
|
||||
}
|
||||
if (strlen(long_flag) > 3 && long_flag[1] == '-' && long_flag[0] == '-') {
|
||||
arg_cfg.long_map[long_flag + 2] = i;
|
||||
}
|
||||
arg_cfg.parse_cfgs.emplace_back(parse_cfg);
|
||||
std::string help_str = help;
|
||||
auto lit = parse_cfg.lit(parse_cfg.dst);
|
||||
if (!lit.empty()) {
|
||||
help_str += " (default=" + lit + ")";
|
||||
}
|
||||
ArgumentHelp arg_help { short_flag, long_flag, help_str };
|
||||
arg_cfg.helps.emplace_back(std::move(arg_help));
|
||||
}
|
||||
|
||||
void parse_args(int argc, const char** argv) {
|
||||
auto i = 1;
|
||||
int iarg_entry = -1;
|
||||
while (i < argc || iarg_entry >= 0) {
|
||||
if (iarg_entry >= 0) {
|
||||
auto& parse_cfg = arg_cfg.parse_cfgs[iarg_entry];
|
||||
archprobe::assert(parse_cfg.parser(argv + i, parse_cfg.dst),
|
||||
"unable to parse argument");
|
||||
archprobe::assert((argc - i >= parse_cfg.narg),
|
||||
"no enough argument segments");
|
||||
i += parse_cfg.narg;
|
||||
iarg_entry = -1;
|
||||
} else {
|
||||
const char* arg = argv[i];
|
||||
if (arg[0] != '-') {
|
||||
// Free argument.
|
||||
archprobe::panic("free argument is currently unsupported");
|
||||
} else if (arg[1] != '-') {
|
||||
if (arg[1] == 'h') { print_help(); }
|
||||
// Short flag argument.
|
||||
auto it = arg_cfg.short_map.find(arg[1]);
|
||||
if (it != arg_cfg.short_map.end()) {
|
||||
iarg_entry = it->second;
|
||||
} else {
|
||||
report_unknown_arg(arg);
|
||||
}
|
||||
++i;
|
||||
} else {
|
||||
if (std::strcmp(arg + 2, "help") == 0) { print_help(); }
|
||||
// Long flag argument.
|
||||
auto it = (arg_cfg.long_map.find(arg + 2));
|
||||
if (it != arg_cfg.long_map.end()) {
|
||||
iarg_entry = it->second;
|
||||
} else {
|
||||
report_unknown_arg(arg);
|
||||
}
|
||||
++i;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} // namespace args
|
||||
|
||||
} // namespace archprobe
|
|
@ -0,0 +1,17 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
// NOTE: This is a modified excerpt of
|
||||
// https://github.com/PENGUINLIONG/graphi-t/blob/d291c3d1ce3795fe4b305e5efd76b4f586d23e3b/src/assert.cpp;
|
||||
// MIT-licensed by Rendong Liang.
|
||||
#include "assert.hpp"
|
||||
|
||||
namespace archprobe {
|
||||
|
||||
AssertionFailedException::AssertionFailedException(const std::string& msg) :
|
||||
msg(msg) {}
|
||||
const char* AssertionFailedException::what() const noexcept {
|
||||
return msg.c_str();
|
||||
}
|
||||
|
||||
} // namespace archprobe
|
|
@ -0,0 +1,391 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
#include "cl.hpp"
|
||||
#include "util.hpp"
|
||||
#include "log.hpp"
|
||||
#include "stats.hpp"
|
||||
|
||||
namespace archprobe {
|
||||
|
||||
CLException::CLException(cl_int code) {
|
||||
if (code <= -69) {
|
||||
msg = "invalid something";
|
||||
return;
|
||||
}
|
||||
switch (code) {
|
||||
case CL_INVALID_VALUE: msg = "invalid value"; break;
|
||||
case CL_INVALID_DEVICE_TYPE: msg = "invalid device type"; break;
|
||||
case CL_INVALID_PLATFORM: msg = "invalid platform"; break;
|
||||
case CL_INVALID_DEVICE: msg = "invalid device"; break;
|
||||
case CL_INVALID_CONTEXT: msg = "invalid context"; break;
|
||||
case CL_INVALID_QUEUE_PROPERTIES: msg = "invalid queue properties"; break;
|
||||
case CL_INVALID_COMMAND_QUEUE: msg = "invalid command queue"; break;
|
||||
case CL_INVALID_HOST_PTR: msg = "invalid host pointer"; break;
|
||||
case CL_INVALID_MEM_OBJECT: msg = "invalid memory object"; break;
|
||||
case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: msg = "invalid image format descriptor"; break;
|
||||
case CL_INVALID_IMAGE_SIZE: msg = "invalid image size"; break;
|
||||
case CL_INVALID_SAMPLER: msg = "invalid sampler"; break;
|
||||
case CL_INVALID_BINARY: msg = "invalid binary"; break;
|
||||
case CL_INVALID_BUILD_OPTIONS: msg = "invalid build options"; break;
|
||||
case CL_INVALID_PROGRAM: msg = "invalid program"; break;
|
||||
case CL_INVALID_PROGRAM_EXECUTABLE: msg = "invalid program executable"; break;
|
||||
case CL_INVALID_KERNEL_NAME: msg = "invalid kernel name"; break;
|
||||
case CL_INVALID_KERNEL_DEFINITION: msg = "invalid kernel definition"; break;
|
||||
case CL_INVALID_KERNEL: msg = "invalid kernel"; break;
|
||||
case CL_INVALID_ARG_INDEX: msg = "invalid arg index"; break;
|
||||
case CL_INVALID_ARG_VALUE: msg = "invalid arg value"; break;
|
||||
case CL_INVALID_ARG_SIZE: msg = "invalid arg size"; break;
|
||||
case CL_INVALID_KERNEL_ARGS: msg = "invalid kernel args"; break;
|
||||
case CL_INVALID_WORK_DIMENSION: msg = "invalid work dimension"; break;
|
||||
case CL_INVALID_WORK_GROUP_SIZE: msg = "invalid work group size"; break;
|
||||
case CL_INVALID_WORK_ITEM_SIZE: msg = "invalid work item_size"; break;
|
||||
case CL_INVALID_GLOBAL_OFFSET: msg = "invalid global offset"; break;
|
||||
case CL_INVALID_EVENT_WAIT_LIST: msg = "invalid event wait list"; break;
|
||||
case CL_INVALID_EVENT: msg = "invalid event"; break;
|
||||
case CL_INVALID_OPERATION: msg = "invalid operation"; break;
|
||||
case CL_INVALID_GL_OBJECT: msg = "invalid gl object"; break;
|
||||
case CL_INVALID_BUFFER_SIZE: msg = "invalid buffer size"; break;
|
||||
case CL_INVALID_MIP_LEVEL: msg = "invalid mip level"; break;
|
||||
case CL_INVALID_GLOBAL_WORK_SIZE: msg = "invalid global work size"; break;
|
||||
case CL_INVALID_PROPERTY: msg = "invalid property"; break;
|
||||
case CL_INVALID_IMAGE_DESCRIPTOR: msg = "invalid image descriptor"; break;
|
||||
case CL_INVALID_COMPILER_OPTIONS: msg = "invalid compiler options"; break;
|
||||
case CL_INVALID_LINKER_OPTIONS: msg = "invalid linker options"; break;
|
||||
case CL_INVALID_DEVICE_PARTITION_COUNT: msg = "invalid device partition count"; break;
|
||||
|
||||
case CL_DEVICE_NOT_FOUND: msg = "device not found"; break;
|
||||
case CL_DEVICE_NOT_AVAILABLE: msg = "device not available"; break;
|
||||
case CL_COMPILER_NOT_AVAILABLE: msg = "compiler not available"; break;
|
||||
case CL_MEM_OBJECT_ALLOCATION_FAILURE: msg = "memory object allocation failure"; break;
|
||||
case CL_OUT_OF_RESOURCES: msg = "out of resources"; break;
|
||||
case CL_OUT_OF_HOST_MEMORY: msg = "out of host memory"; break;
|
||||
case CL_PROFILING_INFO_NOT_AVAILABLE: msg = "profilng info not available"; break;
|
||||
case CL_MEM_COPY_OVERLAP: msg = "memory copy overlap"; break;
|
||||
case CL_IMAGE_FORMAT_MISMATCH: msg = "image format mismatch"; break;
|
||||
case CL_IMAGE_FORMAT_NOT_SUPPORTED: msg = "image format not supported"; break;
|
||||
case CL_BUILD_PROGRAM_FAILURE: msg = "build program failure"; break;
|
||||
case CL_MISALIGNED_SUB_BUFFER_OFFSET: msg = "misaligned sub-buffer offset"; break;
|
||||
case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: msg = "execution status error for events in wait list"; break;
|
||||
case CL_COMPILE_PROGRAM_FAILURE: msg = "compile program failure"; break;
|
||||
case CL_LINKER_NOT_AVAILABLE: msg = "linker not available"; break;
|
||||
case CL_LINK_PROGRAM_FAILURE: msg = "link program failure"; break;
|
||||
case CL_DEVICE_PARTITION_FAILED: msg = "device partition failed"; break;
|
||||
case CL_KERNEL_ARG_INFO_NOT_AVAILABLE: msg = "kernel argument information not available"; break;
|
||||
default: msg = "unknown opencl error"; break;
|
||||
}
|
||||
}
|
||||
|
||||
const char* CLException::what() const noexcept { return msg; }
|
||||
|
||||
|
||||
|
||||
// Global variables.
|
||||
|
||||
std::vector<DeviceStub> dev_stubs;
|
||||
|
||||
|
||||
std::vector<cl_platform_id> _enum_platform_ids() {
|
||||
cl_uint nplatform_id;
|
||||
CL_ASSERT << clGetPlatformIDs(0, nullptr, &nplatform_id);
|
||||
std::vector<cl_platform_id> platform_ids;
|
||||
platform_ids.resize(nplatform_id);
|
||||
CL_ASSERT << clGetPlatformIDs(nplatform_id, platform_ids.data(), nullptr);
|
||||
return platform_ids;
|
||||
}
|
||||
std::string _get_platform_info_str(
|
||||
cl_platform_id platform_id,
|
||||
cl_platform_info platform_info
|
||||
) {
|
||||
size_t len = 0;
|
||||
std::string rv;
|
||||
CL_ASSERT << clGetPlatformInfo(platform_id, platform_info, 0, nullptr, &len);
|
||||
rv.reserve(len);
|
||||
rv.resize(len - 1);
|
||||
CL_ASSERT << clGetPlatformInfo(platform_id, platform_info,
|
||||
len, (char*)rv.data(), nullptr);
|
||||
return rv;
|
||||
}
|
||||
std::vector<cl_device_id> _enum_dev_ids(cl_platform_id platform_id) {
|
||||
cl_uint ndev_id;
|
||||
CL_ASSERT << clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_ALL,
|
||||
0, nullptr, &ndev_id);
|
||||
std::vector<cl_device_id> dev_ids;
|
||||
dev_ids.resize(ndev_id);
|
||||
CL_ASSERT << clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_ALL,
|
||||
ndev_id, dev_ids.data(), nullptr);
|
||||
return dev_ids;
|
||||
}
|
||||
std::string _get_dev_info_str(
|
||||
cl_device_id device_id,
|
||||
cl_device_info device_info
|
||||
) {
|
||||
size_t len = 0;
|
||||
std::string rv;
|
||||
CL_ASSERT << clGetDeviceInfo(device_id, device_info, 0, nullptr, &len);
|
||||
rv.reserve(len);
|
||||
rv.resize(len - 1);
|
||||
CL_ASSERT << clGetDeviceInfo(device_id, device_info,
|
||||
len, (char*)rv.data(), nullptr);
|
||||
return rv;
|
||||
}
|
||||
void initialize() {
|
||||
auto platform_ids = _enum_platform_ids();
|
||||
for (auto platform_id : platform_ids) {
|
||||
auto platform_name =
|
||||
_get_platform_info_str(platform_id, CL_PLATFORM_NAME);
|
||||
auto platform_prof =
|
||||
_get_platform_info_str(platform_id, CL_PLATFORM_PROFILE);
|
||||
auto platform_exts =
|
||||
_get_platform_info_str(platform_id, CL_PLATFORM_EXTENSIONS);
|
||||
|
||||
auto platform_desc =
|
||||
util::format(platform_name, " (", platform_prof, ") - ");
|
||||
|
||||
auto dev_ids = _enum_dev_ids(platform_id);
|
||||
for (auto dev_id : dev_ids) {
|
||||
auto dev_name = _get_dev_info_str(dev_id, CL_DEVICE_NAME);
|
||||
auto dev_ver = _get_dev_info_str(dev_id, CL_DEVICE_VERSION);
|
||||
auto dev_exts = _get_dev_info_str(dev_id, CL_DEVICE_EXTENSIONS);
|
||||
cl_device_type dev_ty;
|
||||
CL_ASSERT << clGetDeviceInfo(dev_id, CL_DEVICE_TYPE,
|
||||
sizeof(dev_ty), &dev_ty, nullptr);
|
||||
const char* dev_ty_lit;
|
||||
switch (dev_ty) {
|
||||
case CL_DEVICE_TYPE_CPU: dev_ty_lit = "CPU"; break;
|
||||
case CL_DEVICE_TYPE_GPU: dev_ty_lit = "GPU"; break;
|
||||
case CL_DEVICE_TYPE_ACCELERATOR: dev_ty_lit = "Accelerator"; break;
|
||||
default: dev_ty_lit = "Unknown"; break;
|
||||
}
|
||||
|
||||
auto desc = platform_desc +
|
||||
util::format(dev_name, " (", dev_ty, ", ", dev_ver, ")");
|
||||
|
||||
DeviceStub stub { platform_id, dev_id, platform_exts, dev_exts, desc };
|
||||
dev_stubs.emplace_back(std::move(stub));
|
||||
}
|
||||
}
|
||||
archprobe::log::info("initialized opencl environment");
|
||||
}
|
||||
std::string desc_dev(uint32_t idx) {
|
||||
return idx < dev_stubs.size() ? dev_stubs[idx].desc : std::string {};
|
||||
}
|
||||
|
||||
cl::Device select_dev(uint32_t idev) {
|
||||
const auto& dev_stub = archprobe::dev_stubs[idev];
|
||||
archprobe::log::info("selected device #", idev, ": ", dev_stub.desc);
|
||||
cl::Device dev(dev_stub.dev_id);
|
||||
|
||||
return dev;
|
||||
}
|
||||
|
||||
cl::Context create_ctxt(const cl::Device& dev) {
|
||||
|
||||
// Create context.
|
||||
cl_context_properties ctxt_props[] = {
|
||||
CL_CONTEXT_PLATFORM,
|
||||
(cl_context_properties)dev.getInfo<CL_DEVICE_PLATFORM>(),
|
||||
0,
|
||||
};
|
||||
|
||||
cl_int err;
|
||||
cl::Context ctxt = cl::Context(dev, ctxt_props, nullptr, nullptr, &err);
|
||||
CL_ASSERT << err;
|
||||
|
||||
return ctxt;
|
||||
}
|
||||
|
||||
cl::CommandQueue create_cmd_queue(const cl::Context& ctxt) {
|
||||
cl_int err;
|
||||
cl::CommandQueue cmd_queue(ctxt, CL_QUEUE_PROFILING_ENABLE, &err);
|
||||
CL_ASSERT << err;
|
||||
|
||||
return cmd_queue;
|
||||
}
|
||||
|
||||
cl::Program create_program(
|
||||
const cl::Device& dev,
|
||||
const cl::Context& ctxt,
|
||||
const char* src,
|
||||
const char* build_opts
|
||||
) {
|
||||
cl_int err;
|
||||
cl::Program::Sources sources;
|
||||
sources.push_back(src);
|
||||
cl::Program program(ctxt, sources, &err);
|
||||
CL_ASSERT << err;
|
||||
|
||||
err = program.build({dev}, build_opts);
|
||||
if (program.getBuildInfo<CL_PROGRAM_BUILD_STATUS>(dev) == CL_BUILD_ERROR) {
|
||||
std::string build_log = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(dev);
|
||||
archprobe::log::error(build_log);
|
||||
}
|
||||
CL_ASSERT << err;
|
||||
|
||||
return program;
|
||||
}
|
||||
|
||||
cl::Kernel create_kernel(cl::Program program, const std::string& kernel_name) {
|
||||
cl_int err;
|
||||
cl::Kernel kernel(program, kernel_name.c_str(), &err);
|
||||
CL_ASSERT << err;
|
||||
return kernel;
|
||||
}
|
||||
|
||||
|
||||
double bench_kernel(
|
||||
const cl::CommandQueue& cmd_queue,
|
||||
const cl::Kernel& kernel,
|
||||
const cl::NDRange& local_size,
|
||||
const cl::NDRange& global_size,
|
||||
uint32_t niter
|
||||
) {
|
||||
std::vector<cl::Event> events;
|
||||
|
||||
auto run_kernel = [&]() {
|
||||
cl::Event event;
|
||||
CL_ASSERT << cmd_queue.enqueueNDRangeKernel(kernel, cl::NDRange(0, 0, 0),
|
||||
global_size, local_size, nullptr, &event);
|
||||
events.push_back(event);
|
||||
};
|
||||
|
||||
run_kernel();
|
||||
run_kernel();
|
||||
cmd_queue.finish();
|
||||
events.clear();
|
||||
|
||||
for (auto i = 0; i < niter; ++i) {
|
||||
run_kernel();
|
||||
}
|
||||
cmd_queue.finish();
|
||||
|
||||
archprobe::stats::MedianStats<double> time_avg;
|
||||
for (const auto& event : events) {
|
||||
uint64_t start = event.getProfilingInfo<CL_PROFILING_COMMAND_START>();
|
||||
uint64_t end = event.getProfilingInfo<CL_PROFILING_COMMAND_END>();
|
||||
time_avg.push(end - start);
|
||||
}
|
||||
double time = (double)time_avg / 1000;
|
||||
events.clear();
|
||||
return time;
|
||||
}
|
||||
|
||||
cl::Image2D create_img_2d(
|
||||
const cl::Context& ctxt,
|
||||
cl_mem_flags mem_flags,
|
||||
cl::ImageFormat img_fmt,
|
||||
uint32_t width,
|
||||
uint32_t height
|
||||
) {
|
||||
cl_int err;
|
||||
cl::Image2D img(ctxt, mem_flags, img_fmt, width, height, 0, nullptr, &err);
|
||||
CL_ASSERT << err;
|
||||
return img;
|
||||
}
|
||||
cl::Image1D create_img_1d(
|
||||
const cl::Context& ctxt,
|
||||
cl_mem_flags mem_flags,
|
||||
cl::ImageFormat img_fmt,
|
||||
uint32_t width
|
||||
) {
|
||||
cl_int err;
|
||||
cl::Image1D img(ctxt, mem_flags, img_fmt, width, nullptr, &err);
|
||||
CL_ASSERT << err;
|
||||
return img;
|
||||
}
|
||||
cl::Buffer create_buf(
|
||||
const cl::Context& ctxt,
|
||||
cl_mem_flags mem_flags,
|
||||
size_t size
|
||||
) {
|
||||
cl_int err;
|
||||
cl::Buffer buf(ctxt, mem_flags, size, 0, &err);
|
||||
CL_ASSERT << err;
|
||||
return buf;
|
||||
}
|
||||
|
||||
MapImage map_img_2d(
|
||||
const cl::CommandQueue& cmd_queue,
|
||||
const cl::Image2D& img
|
||||
) {
|
||||
cl_int err;
|
||||
|
||||
const size_t width = img.getImageInfo<CL_IMAGE_WIDTH>();
|
||||
const size_t height = img.getImageInfo<CL_IMAGE_HEIGHT>();
|
||||
|
||||
size_t row_pitch;
|
||||
size_t slice_pitch;
|
||||
cl::array<size_t, 3> origin {};
|
||||
cl::array<size_t, 3> region { width, height, 1 };
|
||||
|
||||
float* data = (float*)cmd_queue.enqueueMapImage(img, true,
|
||||
CL_MAP_READ | CL_MAP_WRITE, origin, region, &row_pitch, &slice_pitch,
|
||||
nullptr, nullptr, &err);
|
||||
CL_ASSERT << err;
|
||||
return MapImage { data, width, height, 1, row_pitch, slice_pitch };
|
||||
}
|
||||
void unmap_img_2d(
|
||||
const cl::CommandQueue& cmd_queue,
|
||||
const cl::Image2D& img,
|
||||
MapImage& mapped
|
||||
) {
|
||||
CL_ASSERT << cmd_queue.enqueueUnmapMemObject(img, mapped);
|
||||
mapped = {};
|
||||
}
|
||||
|
||||
MapImage map_img_1d(
|
||||
const cl::CommandQueue& cmd_queue,
|
||||
const cl::Image1D& img
|
||||
) {
|
||||
cl_int err;
|
||||
|
||||
const size_t width = img.getImageInfo<CL_IMAGE_WIDTH>();
|
||||
|
||||
size_t row_pitch;
|
||||
size_t slice_pitch;
|
||||
cl::array<size_t, 3> origin {};
|
||||
cl::array<size_t, 3> region { width, 1, 1 };
|
||||
|
||||
float* data = (float*)cmd_queue.enqueueMapImage(img, true,
|
||||
CL_MAP_READ | CL_MAP_WRITE, origin, region, &row_pitch, &slice_pitch,
|
||||
nullptr, nullptr, &err);
|
||||
CL_ASSERT << err;
|
||||
return MapImage { data, width, 1, 1, row_pitch, slice_pitch };
|
||||
}
|
||||
void unmap_img_1d(
|
||||
const cl::CommandQueue& cmd_queue,
|
||||
const cl::Image1D& img,
|
||||
MapImage& mapped
|
||||
) {
|
||||
CL_ASSERT << cmd_queue.enqueueUnmapMemObject(img, mapped);
|
||||
mapped = {};
|
||||
}
|
||||
|
||||
MapBuffer map_buf(
|
||||
const cl::CommandQueue& cmd_queue,
|
||||
const cl::Buffer& buf,
|
||||
size_t offset,
|
||||
size_t size
|
||||
) {
|
||||
cl_int err;
|
||||
auto rv = (float*)cmd_queue.enqueueMapBuffer(buf, true,
|
||||
CL_MAP_READ | CL_MAP_WRITE, offset, size, nullptr, nullptr, &err);
|
||||
CL_ASSERT << err;
|
||||
return MapBuffer { rv, size };
|
||||
}
|
||||
MapBuffer map_buf(
|
||||
const cl::CommandQueue& cmd_queue,
|
||||
const cl::Buffer& buf
|
||||
) {
|
||||
const size_t size = buf.getInfo<CL_MEM_SIZE>();
|
||||
return map_buf(cmd_queue, buf, 0, size);
|
||||
}
|
||||
void unmap_buf(
|
||||
const cl::CommandQueue& cmd_queue,
|
||||
const cl::Buffer& buf,
|
||||
MapBuffer& mapped
|
||||
) {
|
||||
CL_ASSERT << cmd_queue.enqueueUnmapMemObject(buf, mapped);
|
||||
mapped = {};
|
||||
}
|
||||
|
||||
} // namespace archprobe
|
|
@ -0,0 +1,387 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
// NOTE: This is a modified excerpt of
|
||||
// https://github.com/PENGUINLIONG/graphi-t/blob/da31ec530df07c9899e056eeced08a64062dcfce/src/json.cpp;
|
||||
// MIT-licensed by Rendong Liang.
|
||||
|
||||
// JSON serialization/deserialization.
|
||||
// @PENGUINLIONG
|
||||
#include <sstream>
|
||||
#include "log.hpp"
|
||||
#include "json.hpp"
|
||||
|
||||
namespace archprobe {
|
||||
namespace json {
|
||||
|
||||
JsonException::JsonException(const char* msg) : msg(msg) {}
|
||||
const char* JsonException::what() const noexcept {
|
||||
return msg.c_str();
|
||||
}
|
||||
|
||||
|
||||
|
||||
JsonArray::JsonArray(
|
||||
std::initializer_list<JsonValue>&& elems
|
||||
) : inner(elems) {}
|
||||
JsonObject::JsonObject(
|
||||
std::initializer_list<std::pair<const std::string, JsonValue>>&& entries
|
||||
) : inner(std::forward<std::initializer_list<std::pair<const std::string, JsonValue>>>(entries)) {}
|
||||
JsonValue::JsonValue(JsonObject&& obj) :
|
||||
ty(L_JSON_OBJECT),
|
||||
obj(std::forward<std::map<std::string, JsonValue>>(obj.inner)) {}
|
||||
JsonValue::JsonValue(JsonArray&& arr) :
|
||||
ty(L_JSON_ARRAY),
|
||||
arr(std::forward<std::vector<JsonValue>>(arr.inner)) {}
|
||||
|
||||
|
||||
|
||||
enum JsonTokenType {
|
||||
L_JSON_TOKEN_UNDEFINED,
|
||||
L_JSON_TOKEN_NULL,
|
||||
L_JSON_TOKEN_TRUE,
|
||||
L_JSON_TOKEN_FALSE,
|
||||
L_JSON_TOKEN_STRING,
|
||||
L_JSON_TOKEN_NUMBER,
|
||||
L_JSON_TOKEN_COLON,
|
||||
L_JSON_TOKEN_COMMA,
|
||||
L_JSON_TOKEN_OPEN_BRACE,
|
||||
L_JSON_TOKEN_CLOSE_BRACE,
|
||||
L_JSON_TOKEN_OPEN_BRACKET,
|
||||
L_JSON_TOKEN_CLOSE_BRACKET,
|
||||
};
|
||||
struct JsonToken {
|
||||
JsonTokenType ty;
|
||||
double num;
|
||||
std::string str;
|
||||
};
|
||||
|
||||
struct Tokenizer {
|
||||
std::string lit;
|
||||
std::string::const_iterator pos;
|
||||
std::string::const_iterator end;
|
||||
|
||||
Tokenizer(const std::string& json) :
|
||||
lit(json),
|
||||
pos(lit.cbegin()),
|
||||
end(lit.cend()) {}
|
||||
|
||||
// Check the range first before calling this method.
|
||||
bool unsafe_starts_with(const char* head) {
|
||||
auto i = 0;
|
||||
while (*head != '\0') {
|
||||
if (pos[i++] != *(head++)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
bool next_token(JsonToken& out) {
|
||||
std::stringstream ss;
|
||||
while (pos != end) {
|
||||
char c = *pos;
|
||||
|
||||
// Ignore whitespaces.
|
||||
if (c == ' ' || c == '\t' || c == '\r' || c == '\n') {
|
||||
pos += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Try parse scope punctuations.
|
||||
switch (c) {
|
||||
case ':': out.ty = L_JSON_TOKEN_COLON; pos += 1; return true;
|
||||
case ',': out.ty = L_JSON_TOKEN_COMMA; pos += 1; return true;
|
||||
case '{': out.ty = L_JSON_TOKEN_OPEN_BRACE; pos += 1; return true;
|
||||
case '}': out.ty = L_JSON_TOKEN_CLOSE_BRACE; pos += 1; return true;
|
||||
case '[': out.ty = L_JSON_TOKEN_OPEN_BRACKET; pos += 1; return true;
|
||||
case ']': out.ty = L_JSON_TOKEN_CLOSE_BRACKET; pos += 1; return true;
|
||||
}
|
||||
|
||||
// Try parse numbers.
|
||||
if (c == '+' || c == '-' || (c >= '0' && c <= '9')) {
|
||||
out.ty = L_JSON_TOKEN_NUMBER;
|
||||
const int STATE_INTEGRAL = 0;
|
||||
const int STATE_FRACTION = 1;
|
||||
const int STATE_EXPONENT = 2;
|
||||
int state = STATE_INTEGRAL;
|
||||
do {
|
||||
c = *pos;
|
||||
if (state == STATE_INTEGRAL) {
|
||||
if (c == '.') {
|
||||
state = STATE_FRACTION;
|
||||
ss.put(c);
|
||||
continue;
|
||||
}
|
||||
if (c == 'e') {
|
||||
state = STATE_EXPONENT;
|
||||
ss.put(c);
|
||||
continue;
|
||||
}
|
||||
if (c != '+' && c != '-' && (c < '0' || c > '9')) {
|
||||
break;
|
||||
}
|
||||
} else if (state == STATE_FRACTION) {
|
||||
if (c == 'e') {
|
||||
state = STATE_EXPONENT;
|
||||
ss.put(c);
|
||||
continue;
|
||||
}
|
||||
if (c < '0' || c > '9') {
|
||||
break;
|
||||
}
|
||||
} else if (state == STATE_EXPONENT) {
|
||||
if (c != '+' && c != '-' && (c < '0' || c > '9')) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
ss.put(c);
|
||||
} while (++pos != end);
|
||||
out.num = std::atof(ss.str().c_str());
|
||||
return true;
|
||||
}
|
||||
|
||||
// Try parse strings.
|
||||
if (c == '"') {
|
||||
out.ty = L_JSON_TOKEN_STRING;
|
||||
bool escape = false;
|
||||
while (++pos != end) {
|
||||
c = *pos;
|
||||
if (escape) {
|
||||
switch (c) {
|
||||
case '"':
|
||||
case '/':
|
||||
break;
|
||||
case 'b': c = '\b'; break;
|
||||
case 'f': c = '\f'; break;
|
||||
case 'n': c = '\n'; break;
|
||||
case 'r': c = '\r'; break;
|
||||
case 't': c = '\t'; break;
|
||||
case 'u':
|
||||
throw JsonException("unicode escape is not supported");
|
||||
default:
|
||||
throw JsonException("invalid escape charater");
|
||||
}
|
||||
escape = false;
|
||||
} else {
|
||||
if (c == '\\') {
|
||||
escape = true;
|
||||
continue;
|
||||
} else if (c == '"') {
|
||||
if (escape != false) {
|
||||
throw JsonException("incomplete escape sequence");
|
||||
}
|
||||
out.str = ss.str();
|
||||
pos += 1;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
ss.put(c);
|
||||
}
|
||||
throw JsonException("unexpected end of string");
|
||||
}
|
||||
|
||||
// Try parse literals.
|
||||
if (pos + 4 <= end) {
|
||||
if (unsafe_starts_with("null")) {
|
||||
out.ty = L_JSON_TOKEN_NULL;
|
||||
pos += 4;
|
||||
return true;
|
||||
}
|
||||
if (unsafe_starts_with("true")) {
|
||||
out.ty = L_JSON_TOKEN_TRUE;
|
||||
pos += 4;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if (pos + 5 <= end) {
|
||||
if (unsafe_starts_with("false")) {
|
||||
out.ty = L_JSON_TOKEN_FALSE;
|
||||
pos += 5;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
out.ty = L_JSON_TOKEN_UNDEFINED;
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
bool try_parse_impl(
|
||||
Tokenizer& tokenizer,
|
||||
JsonValue& out
|
||||
) {
|
||||
JsonToken token;
|
||||
while (tokenizer.next_token(token)) {
|
||||
JsonValue val;
|
||||
switch (token.ty) {
|
||||
case L_JSON_TOKEN_TRUE:
|
||||
out.ty = L_JSON_BOOLEAN;
|
||||
out.b = true;
|
||||
return true;
|
||||
case L_JSON_TOKEN_FALSE:
|
||||
out.ty = L_JSON_BOOLEAN;
|
||||
out.b = false;
|
||||
return true;
|
||||
case L_JSON_TOKEN_NULL:
|
||||
out.ty = L_JSON_NULL;
|
||||
return true;
|
||||
case L_JSON_TOKEN_STRING:
|
||||
out.ty = L_JSON_STRING;
|
||||
out.str = std::move(token.str);
|
||||
return true;
|
||||
case L_JSON_TOKEN_NUMBER:
|
||||
out.ty = L_JSON_NUMBER;
|
||||
out.num = token.num;
|
||||
return true;
|
||||
case L_JSON_TOKEN_OPEN_BRACKET:
|
||||
out.ty = L_JSON_ARRAY;
|
||||
for (;;) {
|
||||
if (!try_parse_impl(tokenizer, val)) {
|
||||
// When the array has no element.
|
||||
break;
|
||||
}
|
||||
out.arr.emplace_back(std::move(val));
|
||||
if (tokenizer.next_token(token)) {
|
||||
if (token.ty == L_JSON_TOKEN_COMMA) {
|
||||
continue;
|
||||
} else if (token.ty == L_JSON_TOKEN_CLOSE_BRACKET) {
|
||||
break;
|
||||
} else {
|
||||
throw JsonException("unexpected token in array");
|
||||
}
|
||||
} else {
|
||||
throw JsonException("unexpected end of array");
|
||||
}
|
||||
}
|
||||
return true;
|
||||
case L_JSON_TOKEN_OPEN_BRACE:
|
||||
out.ty = L_JSON_OBJECT;
|
||||
for (;;) {
|
||||
// Match the key.
|
||||
std::string key;
|
||||
if (tokenizer.next_token(token)) {
|
||||
if (token.ty == L_JSON_TOKEN_STRING) {
|
||||
key = std::move(token.str);
|
||||
} else if (token.ty == L_JSON_TOKEN_CLOSE_BRACE) {
|
||||
// The object has no field.
|
||||
break;
|
||||
} else {
|
||||
throw JsonException("unexpected object field key type");
|
||||
}
|
||||
} else {
|
||||
throw JsonException("unexpected end of object");
|
||||
}
|
||||
// Match the colon.
|
||||
if (!tokenizer.next_token(token)) {
|
||||
throw JsonException("unexpected end of object");
|
||||
}
|
||||
if (token.ty != L_JSON_TOKEN_COLON) {
|
||||
throw JsonException("unexpected token in object");
|
||||
}
|
||||
// Match the value.
|
||||
if (!try_parse_impl(tokenizer, val)) {
|
||||
throw JsonException("unexpected end of object");
|
||||
}
|
||||
out.obj[key] = std::move(val);
|
||||
// Should we head for another round?
|
||||
if (tokenizer.next_token(token)) {
|
||||
if (token.ty == L_JSON_TOKEN_COMMA) {
|
||||
continue;
|
||||
} else if (token.ty == L_JSON_TOKEN_CLOSE_BRACE) {
|
||||
break;
|
||||
} else {
|
||||
throw JsonException("unexpected token in object");
|
||||
}
|
||||
} else {
|
||||
throw JsonException("unexpected end of object");
|
||||
}
|
||||
}
|
||||
return true;
|
||||
case L_JSON_TOKEN_CLOSE_BRACE:
|
||||
case L_JSON_TOKEN_CLOSE_BRACKET:
|
||||
return false;
|
||||
default:
|
||||
throw JsonException("unexpected token");
|
||||
}
|
||||
}
|
||||
throw JsonException("unexpected program state");
|
||||
}
|
||||
|
||||
|
||||
|
||||
JsonValue parse(const std::string& json_lit) {
|
||||
if (json_lit.empty()) {
|
||||
throw JsonException("json text is empty");
|
||||
}
|
||||
JsonValue rv;
|
||||
Tokenizer tokenizer(json_lit);
|
||||
if (!try_parse_impl(tokenizer, rv)) {
|
||||
throw JsonException("unexpected close token");
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
bool try_parse(const std::string& json_lit, JsonValue& out) {
|
||||
try {
|
||||
out = parse(json_lit);
|
||||
} catch (JsonException e) {
|
||||
log::error("failed to parse json: ", e.what());
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void print_impl(const JsonValue& json, std::stringstream& out) {
|
||||
switch (json.ty) {
|
||||
case L_JSON_NULL:
|
||||
out << "null";
|
||||
return;
|
||||
case L_JSON_BOOLEAN:
|
||||
out << (json.b ? "true" : "false");
|
||||
return;
|
||||
case L_JSON_NUMBER:
|
||||
out << json.num;
|
||||
return;
|
||||
case L_JSON_STRING:
|
||||
out << "\"" << json.str << "\"";
|
||||
return;
|
||||
case L_JSON_OBJECT:
|
||||
out << "{";
|
||||
{
|
||||
bool is_first_iter = true;
|
||||
for (const auto& pair : json.obj) {
|
||||
if (is_first_iter) {
|
||||
is_first_iter = false;
|
||||
} else {
|
||||
out << ",";
|
||||
}
|
||||
out << "\"" << pair.first << "\":";
|
||||
print_impl(pair.second, out);
|
||||
}
|
||||
}
|
||||
out << "}";
|
||||
return;
|
||||
case L_JSON_ARRAY:
|
||||
out << "[";
|
||||
{
|
||||
bool is_first_iter = true;
|
||||
for (const auto& elem : json.arr) {
|
||||
if (is_first_iter) {
|
||||
is_first_iter = false;
|
||||
} else {
|
||||
out << ",";
|
||||
}
|
||||
print_impl(elem, out);
|
||||
}
|
||||
}
|
||||
out << "]";
|
||||
return;
|
||||
}
|
||||
}
|
||||
std::string print(const JsonValue& json) {
|
||||
std::stringstream ss;
|
||||
print_impl(json, ss);
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
} // namespace json
|
||||
} // namespace archprobe
|
|
@ -0,0 +1,40 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
// NOTE: This is a modified excerpt of
|
||||
// https://github.com/PENGUINLIONG/graphi-t/blob/0e3c1394b493db3e3d5b443c869545cac712827a/src/log.cpp;
|
||||
// MIT-licensed by Rendong Liang.
|
||||
#include "log.hpp"
|
||||
|
||||
namespace archprobe {
|
||||
|
||||
namespace log {
|
||||
|
||||
namespace detail {
|
||||
|
||||
decltype(log_callback) log_callback = nullptr;
|
||||
LogLevel filter_lv;
|
||||
uint32_t indent;
|
||||
|
||||
} // namespace detail
|
||||
|
||||
|
||||
|
||||
void set_log_callback(decltype(detail::log_callback) cb) {
|
||||
detail::log_callback = cb;
|
||||
}
|
||||
void set_log_filter_level(LogLevel lv) {
|
||||
detail::filter_lv = lv;
|
||||
}
|
||||
|
||||
void push_indent() {
|
||||
detail::indent += 4;
|
||||
}
|
||||
void pop_indent() {
|
||||
detail::indent -= 4;
|
||||
}
|
||||
|
||||
} // namespace log
|
||||
|
||||
|
||||
} // namespace archprobe
|
|
@ -0,0 +1,101 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
#include "table.hpp"
|
||||
|
||||
namespace archprobe {
|
||||
namespace table {
|
||||
|
||||
Table::Table(std::vector<std::string>&& headers) :
|
||||
headers(headers), rows() {}
|
||||
Table::Table(
|
||||
std::vector<std::string>&& headers,
|
||||
std::vector<std::vector<double>>&& rows
|
||||
) : headers(headers), rows(rows) {}
|
||||
|
||||
std::string Table::to_csv(uint32_t nsig_digit) const {
|
||||
std::stringstream ss;
|
||||
ss << std::setprecision(nsig_digit);
|
||||
|
||||
{
|
||||
bool first_col = true;
|
||||
for (const auto& header : headers) {
|
||||
if (first_col) {
|
||||
first_col = false;
|
||||
} else {
|
||||
ss << ",";
|
||||
}
|
||||
ss << header;
|
||||
}
|
||||
// Enforce newline character to be `\n`.
|
||||
ss << '\n';
|
||||
}
|
||||
|
||||
{
|
||||
for (const auto& row : rows) {
|
||||
bool first_col = true;
|
||||
for (const auto& cell : row) {
|
||||
if (first_col) {
|
||||
first_col = false;
|
||||
} else {
|
||||
ss << ",";
|
||||
}
|
||||
ss << cell;
|
||||
}
|
||||
ss << '\n';
|
||||
}
|
||||
}
|
||||
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::vector<std::string> parse_header_row(std::istringstream& ss) {
|
||||
std::vector<std::string> out;
|
||||
std::string buf;
|
||||
while (ss.peek() != EOF) {
|
||||
buf.clear();
|
||||
std::getline(ss, buf, ',');
|
||||
out.emplace_back(std::move(buf));
|
||||
}
|
||||
return out;
|
||||
}
|
||||
std::vector<double> parse_data_row(std::istringstream& ss) {
|
||||
std::vector<double> out;
|
||||
std::string buf;
|
||||
while (ss.peek() != EOF) {
|
||||
buf.clear();
|
||||
std::getline(ss, buf, ',');
|
||||
out.emplace_back(std::atof(buf.c_str()));
|
||||
}
|
||||
return out;
|
||||
}
|
||||
Table Table::from_csv(std::string csv) {
|
||||
std::istringstream ss;
|
||||
ss.str(csv);
|
||||
std::string line;
|
||||
|
||||
// Capture the header.
|
||||
std::vector<std::string> headers;
|
||||
if (ss.peek() != EOF){
|
||||
line.clear();
|
||||
std::getline(ss, line, '\n');
|
||||
|
||||
std::istringstream sss;
|
||||
sss.str(line);
|
||||
headers = parse_header_row(sss);
|
||||
}
|
||||
|
||||
std::vector<std::vector<double>> data_rows;
|
||||
while (ss.peek() != EOF) {
|
||||
line.clear();
|
||||
std::getline(ss, line, '\n');
|
||||
|
||||
std::istringstream sss;
|
||||
sss.str(line);
|
||||
data_rows.emplace_back(parse_data_row(sss));
|
||||
}
|
||||
|
||||
return Table(std::move(headers), std::move(data_rows));
|
||||
}
|
||||
|
||||
} // namespace table
|
||||
} // namespace archprobe
|
|
@ -0,0 +1,53 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
// NOTE: This is a modified excerpt of
|
||||
// https://github.com/PENGUINLIONG/graphi-t/blob/da31ec530df07c9899e056eeced08a64062dcfce/src/util.cpp;
|
||||
// MIT-licensed by Rendong Liang.
|
||||
#include "util.hpp"
|
||||
#include "assert.hpp"
|
||||
|
||||
namespace archprobe {
|
||||
|
||||
namespace util {
|
||||
|
||||
std::vector<uint8_t> load_file(const char* path) {
|
||||
std::ifstream f(path, std::ios::ate | std::ios::binary | std::ios::in);
|
||||
assert(f.is_open(), "unable to open file: ", path);
|
||||
size_t size = f.tellg();
|
||||
f.seekg(std::ios::beg);
|
||||
std::vector<uint8_t> buf;
|
||||
buf.resize(size);
|
||||
f.read((char*)buf.data(), size);
|
||||
f.close();
|
||||
return buf;
|
||||
}
|
||||
std::string load_text(const char* path) {
|
||||
std::ifstream f(path, std::ios::ate | std::ios::binary | std::ios::in);
|
||||
assert(f.is_open(), "unable to open file: ", path);
|
||||
size_t size = f.tellg();
|
||||
f.seekg(std::ios::beg);
|
||||
std::string buf;
|
||||
buf.reserve(size + 1);
|
||||
buf.resize(size);
|
||||
f.read((char*)buf.data(), size);
|
||||
f.close();
|
||||
return buf;
|
||||
}
|
||||
void save_file(const char* path, const void* data, size_t size) {
|
||||
std::ofstream f(path, std::ios::trunc | std::ios::out | std::ios::binary);
|
||||
assert(f.is_open(), "unable to open file: ", path);
|
||||
f.write((const char*)data, size);
|
||||
f.close();
|
||||
}
|
||||
void save_text(const char* path, const std::string& txt) {
|
||||
std::ofstream f(path, std::ios::trunc | std::ios::out | std::ios::binary);
|
||||
assert(f.is_open(), "unable to open file: ", path);
|
||||
f << txt;
|
||||
f.close();
|
||||
}
|
||||
|
||||
|
||||
} // namespace util
|
||||
|
||||
} // namespace archprobe
|
|
@ -0,0 +1 @@
|
|||
Subproject commit 00c3a657d134447315c22d2cc344e2e2e9c4c026
|
Загрузка…
Ссылка в новой задаче