ArchProbe source and example output

This commit is contained in:
PENGUINLIONG 2022-01-08 11:15:45 +08:00
Родитель daacaa9031
Коммит fac4d6ba56
44 изменённых файлов: 139209 добавлений и 23 удалений

4
.gitignore поставляемый
Просмотреть файл

@ -348,3 +348,7 @@ MigrationBackup/
# Ionide (cross platform F# VS Code tools) working folder
.ionide/
build*
!examples

3
.gitmodules поставляемый Normal file
Просмотреть файл

@ -0,0 +1,3 @@
[submodule "third/libopencl-stub"]
path = third/libopencl-stub
url = https://github.com/ShireFolk/libopencl-stub

50
CMakeLists.txt Normal file
Просмотреть файл

@ -0,0 +1,50 @@
cmake_minimum_required (VERSION 3.12)
project ("ArchProbe" LANGUAGES C CXX)
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/lib")
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/bin")
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/bin")
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${PROJECT_SOURCE_DIR}/cmake")
if (NOT CMAKE_BUILD_TYPE)
message(STATUS "No build type selected, default to Release")
set(CMAKE_BUILD_TYPE "Release" CACHE PATH "Build Type" FORCE)
endif()
message("-- Looking for libopencl-stub")
if (EXISTS "${PROJECT_SOURCE_DIR}/third/libopencl-stub")
add_compile_definitions(CL_VERSION_2_0)
add_subdirectory(third/libopencl-stub)
set(OpenCL_INCLUDE_DIR "${PROJECT_SOURCE_DIR}/third/libopencl-stub/include")
message("-- OpenCL context enabled")
else()
message("")
message("-- OpenCL not found! OpenCL context is disabled")
message("")
endif()
set(LINK_LIBS
OpenCL
)
set(INC_DIRS
"${PROJECT_SOURCE_DIR}/include"
${OpenCL_INCLUDE_DIR}
${CMAKE_SOURCE_DIR}/third/OpenCL-SDK/external/OpenCL-CLHPP/include
)
# The testbench library.
include_directories(${INC_DIRS})
file(GLOB SRCS "${PROJECT_SOURCE_DIR}/src/*")
file(GLOB INCS "${PROJECT_SOURCE_DIR}/include/*")
add_library(ArchProbeCore STATIC ${SRCS} ${INCS})
target_link_libraries(ArchProbeCore ${LINK_LIBS})
target_compile_definitions(ArchProbeCore PUBLIC CL_TARGET_OPENCL_VERSION=200)
# Testbench apps.
make_directory("${CMAKE_BINARY_DIR}/assets/")
add_subdirectory("${PROJECT_SOURCE_DIR}/apps")

Просмотреть файл

@ -1,14 +1,48 @@
# Project
# ArchProbe
> This repo has been populated by an initial template to help get you started. Please
> make sure to update the content to build a great experience for community-building.
ArchProbe is a profiling tool to demythify mobile GPU architectures with great details. The mechanism of ArchProbe is introduced in our technical paper which is still under review.
As the maintainer of this project, please make a few updates:
![Adreno & Mali Architecture Overview](overview.png)
*Architecture details collected with ArchProbe, presented in our technical paper.*
- Improving this README.MD file to provide a great experience
- Updating SUPPORT.MD with content about this project's support experience
- Understanding the security reporting process in SECURITY.MD
- Remove this section from the README
## How to Use
In a clone of ArchProbe code repository, the following commands build ArchProbe for most mobile devices with a 64-bit ARMv8 architecture.
```powershell
git submodule update --init --recursive
mkdir build-android-aarch64 && cd build-android-aarch64
cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake" -DANDROID_ABI="arm64-v8a" -DANDROID_PLATFORM=android-28 -G "Ninja" ..
cmake --build . -t ArchProbe
```
To run ArchProbe in command line via `adb shell`, you need to copy the executables to `/data/local/tmp`.
If you are using Windows, the PowerShell scripts in `scripts` can be convenient too:
```powershell
scripts/Run-Android.ps1 [-Verbose]
```
### Prebuilt Binaries
Prebuilt binaries will be available [here](https://github.com/PENGUINLIONG/graphi-t/releases).
## How to Interpret Outputs
A GPU hardware has many traits like GFLOPS and cache size. ArchProbe implements a bag of tricks to expose these traits and each implementation is called an *aspect*. Each aspect has its own configurations in `ArchProbe.json`, reports in `ArchProbeReport.json`, and data table of every run of probing kernels in `[ASPECT_NAME].csv`. Currently ArchProbe implements the following aspects:
- `WarpSizeMethod{A|B}` Two methods to detect the warp size of a GPU core;
- `GFLOPS` Peak computational throughput of the device;
- `RegCount` Number of registers available to a thread and whether the register file is shared among warps;
- `BufferVecWidth` Optimal vector width to read the most data in a single memory access;
- `{Image|Buffer}CachelineSize` Top level cacheline size of image/buffer;
- `{Image|Buffer}Bandwidth` Peak read-only bandwidth of image/buffer;
- `{Image|Buffer}CacheHierarchyPChase` Size of each level of cache of image/buffer by the P-chase method.
If the `-v` flag is given, ArchProbe prints extra human-readable logs to `stdout` which is also a good source of information.
Experiment data gathered from Google Pixel 4 can be found [here](examples/adreno640/Google_Pixel_4).
## Contributing

Просмотреть файл

@ -1,13 +1,3 @@
# TODO: The maintainer of this repo has not yet edited this file
**REPO OWNER**: Do you want Customer Service & Support (CSS) support for this product/project?
- **No CSS support:** Fill out this template with information about how to file issues and get help.
- **Yes CSS support:** Fill out an intake form at [aka.ms/spot](https://aka.ms/spot). CSS will work with/help you to determine next steps. More details also available at [aka.ms/onboardsupport](https://aka.ms/onboardsupport).
- **Not sure?** Fill out a SPOT intake as though the answer were "Yes". CSS will help you decide.
*Then remove this first heading from this SUPPORT.MD file before publishing your repo.*
# Support
## How to file issues and get help
@ -16,10 +6,6 @@ This project uses GitHub Issues to track bugs and feature requests. Please searc
issues before filing new issues to avoid duplicates. For new issues, file your bug or
feature request as a new Issue.
For help and questions about using this project, please **REPO MAINTAINER: INSERT INSTRUCTIONS HERE
FOR HOW TO ENGAGE REPO OWNERS OR COMMUNITY FOR HELP. COULD BE A STACK OVERFLOW TAG OR OTHER
CHANNEL. WHERE WILL YOU HELP PEOPLE?**.
## Microsoft Support Policy
Support for this **PROJECT or PRODUCT** is limited to the resources listed above.
Support for this project is limited to the resources listed above.

1
apps/CMakeLists.txt Normal file
Просмотреть файл

@ -0,0 +1 @@
add_subdirectory(archprobe)

Просмотреть файл

@ -0,0 +1,4 @@
set(APP_NAME ArchProbe)
add_executable(${APP_NAME} "app.cpp" "env.cpp")
target_link_libraries(${APP_NAME} ArchProbeCore)

1590
apps/archprobe/app.cpp Normal file

Разница между файлами не показана из-за своего большого размера Загрузить разницу

231
apps/archprobe/env.cpp Normal file
Просмотреть файл

@ -0,0 +1,231 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include "env.hpp"
namespace archprobe {
std::string pretty_data_size(size_t size) {
const size_t K = 1024;
if (size < K) { return util::format(size, "B"); } size /= K;
if (size < K) { return util::format(size, "KB"); } size /= K;
if (size < K) { return util::format(size, "MB"); } size /= K;
if (size < K) { return util::format(size, "GB"); } size /= K;
if (size < K) { return util::format(size, "TB"); } size /= K;
archprobe::panic("unsupported data size");
return {};
}
DeviceReport collect_dev_report(const cl::Device& dev) {
DeviceReport dev_report {};
log::info("set-up testing environment");
// General memory detail.
dev_report.has_page_size = CL_SUCCESS ==
dev.getInfo(CL_DEVICE_PAGE_SIZE_QCOM, &dev_report.page_size);
// Global memory detail.
dev_report.buf_cacheline_size =
dev.getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
dev_report.buf_size_max = dev.getInfo<CL_DEVICE_GLOBAL_MEM_SIZE>();
dev_report.buf_cache_size = dev.getInfo<CL_DEVICE_GLOBAL_MEM_CACHE_SIZE>();
// Image memory detail.
dev_report.support_img = dev.getInfo<CL_DEVICE_IMAGE_SUPPORT>();
if (dev_report.support_img) {
dev_report.img_width_max = dev.getInfo<CL_DEVICE_IMAGE2D_MAX_WIDTH>();
dev_report.img_height_max = dev.getInfo<CL_DEVICE_IMAGE2D_MAX_HEIGHT>();
}
// Processor detail.
dev_report.nsm = dev.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>();
dev_report.nthread_logic = dev.getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>();
log::info("fetched device report");
log::push_indent();
{
if (dev_report.has_page_size) {
log::info("(qualcomm extension) device page size is ",
pretty_data_size(dev_report.page_size));
}
log::info(pretty_data_size(dev_report.buf_size_max),
" global memory with ", pretty_data_size(dev_report.buf_cache_size),
" cache consists of ", pretty_data_size(dev_report.buf_cacheline_size),
" cachelines");
if (dev_report.support_img) {
log::info("images up to [", dev_report.img_width_max, ", ",
dev_report.img_height_max, "] texels are supported");
} else {
log::info("image is not supported");
}
log::info(dev_report.nsm, " SMs with ", dev_report.nthread_logic,
" logical threads in each");
}
log::pop_indent();
return dev_report;
}
json::JsonValue load_env_cfg(const char* path) {
try {
auto json_txt = util::load_text(path);
log::debug("loaded configuration '", json_txt, "'");
json::JsonValue out {};
if (json::try_parse(json_txt, out)) {
archprobe::assert(out.is_obj());
return out;
} else {
log::warn("failed to parse environment config from '", path,
"', a default configuration will be created to overwrite it");
return json::JsonObject {};
}
} catch (archprobe::AssertionFailedException) {
log::warn("configuration file cannot be opened at '", path,
"', a default configuration will be created");
return json::JsonObject {};
}
}
json::JsonValue load_report(const char* path) {
try {
auto json_txt = util::load_text(path);
log::debug("loaded report '", json_txt, "'");
json::JsonValue out {};
if (json::try_parse(json_txt, out)) {
archprobe::assert(out.is_obj());
return out;
} else {
log::warn("failed to parse report from '", path, "', a new report "
"will be created to overwrite it");
return json::JsonObject {};
}
} catch (archprobe::AssertionFailedException) {
log::warn("report file cannot be opened at '", path, "', a new "
"report will be created");
return json::JsonObject {};
}
}
void report_dev(Environment& env) {
if (env.report_started_lazy("Device")) { return; }
env.report_value("SmCount", env.dev_report.nsm);
env.report_value("LogicThreadCount", env.dev_report.nthread_logic);
env.report_value("MaxBufferSize", env.dev_report.buf_size_max);
env.report_value("CacheSize", env.dev_report.buf_cache_size);
env.report_value("CachelineSize", env.dev_report.buf_cacheline_size);
if (env.dev_report.support_img) {
env.report_value("MaxImageWidth", env.dev_report.img_width_max);
env.report_value("MaxImageHeight", env.dev_report.img_height_max);
}
if (env.dev_report.has_page_size) {
env.report_value("PageSize_QCOM", env.dev_report.page_size);
}
env.report_ready(true);
}
Environment::Environment(
uint32_t idev,
const char* cfg_path,
const char* report_path
) :
dev_(archprobe::select_dev(idev)),
ctxt_(archprobe::create_ctxt(dev_)),
cmd_queue_(archprobe::create_cmd_queue(ctxt_)),
aspects_started_(),
cur_aspect_(),
cur_table_(nullptr),
cfg_path_(cfg_path),
report_path_(report_path),
cfg_(load_env_cfg(cfg_path)),
report_(load_report(report_path)),
dev_report(collect_dev_report(dev_)),
my_report()
{
report_dev(*this);
}
Environment::~Environment() {
util::save_text(cfg_path_.c_str(), json::print(cfg_));
log::info("saved configuration to '", cfg_path_, "'");
util::save_text(report_path_.c_str(), json::print(report_));
log::info("saved report to '", report_path_, "'");
}
void Environment::report_started(const std::string& aspect_name) {
archprobe::assert(!aspect_name.empty(), "aspect name cannot be empty");
aspects_started_.insert(aspect_name);
log::info("[", aspect_name, "]");
log::push_indent();
cur_aspect_ = aspect_name;
}
bool Environment::report_started_lazy(const std::string& aspect_name) {
auto aspect_it = report_.obj.find(aspect_name);
if (aspect_it == report_.obj.end() || !aspect_it->second.is_obj()) {
report_started(aspect_name);
return false;
}
auto done_it = aspect_it->second.obj.find("Done");
if (done_it == aspect_it->second.obj.end() || !done_it->second.is_bool()) {
report_started(aspect_name);
return false;
}
if (done_it->second.b) {
log::info("ignored aspect '", aspect_name ,"' because it's done");
return true;
} else {
report_started(aspect_name);
return false;
}
}
void Environment::report_ready(bool done) {
archprobe::assert(!cur_aspect_.empty(),
"announcing ready for an not-yet-started report is not allowed");
archprobe::assert(aspects_started_.find(cur_aspect_) != aspects_started_.end(),
"aspect has not report to start yet");
report_value("Done", done);
if (cur_table_ != nullptr) {
auto csv = cur_table_->to_csv();
auto fname = util::format(cur_aspect_, ".csv");
util::save_text(fname.c_str(), csv);
log::info("saved data table to '", fname, "'");
cur_table_ = nullptr;
}
cur_aspect_ = {};
log::pop_indent();
}
void Environment::check_dep(const std::string& aspect_name) {
bool done = false;
archprobe::assert(try_get_aspect_report(aspect_name, "Done", done) && done,
"aspect '", aspect_name, "' is required but is not ready yet");
}
table::Table& Environment::table() {
archprobe::assert(cur_table_ != nullptr, "requested table is not initialized");
return *cur_table_;
}
// Find the minimal number of iterations that a kernel can run up to
// `min_time_us` microseconds.
void Environment::ensure_min_niter(
double min_time_us,
uint32_t& niter,
std::function<double()> run
) {
const uint32_t DEFAULT_NITER = 100;
niter = DEFAULT_NITER;
for (uint32_t i = 0; i < 100; ++i) {
double t = run();
if (t > min_time_us * 0.99) {
log::info("found minimal niter=", niter, " to take ", min_time_us,
"us");
return;
}
log::debug("niter=", niter, " doesn't run long enough (", t,
"us <= ", min_time_us, "us)");
niter = uint32_t(niter * min_time_us / t);
}
archprobe::panic("unable to find a minimal iteration number for ",
cur_aspect_, "; is your code aggresively optimized by the compiler?");
}
} // namespace archprobe

277
apps/archprobe/env.hpp Normal file
Просмотреть файл

@ -0,0 +1,277 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#pragma once
#include <memory>
#include <functional>
#include <map>
#include <set>
#include <iostream>
#include <algorithm>
#include <optional>
#include "assert.hpp"
#include "log.hpp"
#include "util.hpp"
#include "args.hpp"
#include "stats.hpp"
#include "table.hpp"
#include "json.hpp"
#define CL_TARGET_OPENCL_VERSION 200
#define CL_HPP_TARGET_OPENCL_VERSION CL_TARGET_OPENCL_VERSION
#include "CL/opencl.hpp"
#include "cl.hpp"
namespace archprobe {
std::string pretty_data_size(size_t size);
struct DeviceReport {
bool has_page_size;
size_t page_size;
size_t buf_cacheline_size;
size_t buf_size_max;
size_t buf_cache_size;
bool support_img;
uint32_t img_width_max;
uint32_t img_height_max;
uint32_t nsm;
uint32_t nthread_logic;
};
struct ProfiledReport {
double timing_std;
std::map<uint32_t, uint32_t> nthread_logic_for_nreg;
double gflops_fp16;
double gflops_fp32;
double gflops_int32;
uint32_t nmin_warp;
uint32_t nwarp;
uint32_t nthread_phys;
uint32_t nthread_warp;
uint32_t nthread_min_warp;
uint32_t buf_vec_width;
std::string buf_vec_ty;
uint32_t buf_cacheline_size;
std::vector<uint32_t> buf_cache_sizes;
uint32_t img_cacheline_size;
std::vector<uint32_t> img_cache_sizes;
double img_bandwidth;
};
class Environment {
cl::Device dev_;
cl::Context ctxt_;
cl::CommandQueue cmd_queue_;
std::set<std::string> aspects_started_;
std::string cur_aspect_;
std::unique_ptr<table::Table> cur_table_;
std::string cfg_path_;
std::string report_path_;
json::JsonValue cfg_;
json::JsonValue report_;
public:
const DeviceReport dev_report;
ProfiledReport my_report;
Environment(
uint32_t idev,
const char* cfg_path = "ArchProbe.json",
const char* report_path = "ArchProbeReport.json");
~Environment();
void report_started(const std::string& aspect_name);
// Returns false if there is no existing report about the aspect to be started
// or such report is not yet marked with '"Done": true'. It means that when
// this method returns true, the aspect can return right a way.
bool report_started_lazy(const std::string& aspect_name);
void report_ready(bool done = false);
void check_dep(const std::string& aspect_name);
template<typename ... TArgs>
void init_table(TArgs&& ... args) {
archprobe::assert(!cur_aspect_.empty(),
"table can only be initialized in scope of a report");
log::info("initialized table for aspect '", cur_aspect_, "'");
cur_table_ = std::make_unique<table::Table>(args ...);
}
table::Table& table();
inline json::JsonValue& get_aspect_cfg(const std::string& aspect) {
auto it = cfg_.obj.find(aspect);
if (it == cfg_.obj.end() ||!it->second.is_obj()) {
log::warn("aspect configuration ('", cur_aspect_, "') is invalid, "
"a new record is created");
cfg_.obj[aspect] = json::JsonObject {};
}
return cfg_.obj[aspect];
}
inline json::JsonValue& get_cfg() {
return get_aspect_cfg(cur_aspect_);
}
template<typename T>
inline T cfg_num(const std::string& name, T default_value) {
auto& cfg = get_cfg();
if (cfg.obj.find(name) == cfg_.obj.end() || !cfg.obj[name].is_num()) {
log::warn("record entry ('", name, "') is invalid, a new record "
"is created");
cfg.obj[name] = json::JsonValue(default_value);
}
return (T)cfg[name];
}
inline json::JsonValue& get_report() {
return get_aspect_report(cur_aspect_);
}
inline json::JsonValue& get_aspect_report(const std::string& aspect) {
auto it = report_.obj.find(aspect);
if (it == report_.obj.end() || !it->second.is_obj()) {
log::warn("aspect report ('", aspect, "') is invalid, a new record is "
"created");
report_.obj[aspect] = json::JsonObject {};
}
return report_.obj[aspect];
}
template<typename T>
inline bool try_get_report(const std::string& name, T& out) {
return try_get_aspect_report(cur_aspect_, name, out);
}
template<typename T>
inline bool try_get_aspect_report(
const std::string& aspect,
const std::string& name,
T& out
) {
const auto& report = get_aspect_report(aspect);
auto it = report.obj.find(name);
if (it == report.obj.end()) {
return false;
} else {
out = (T)it->second;
log::info("already know that '", name, "' from aspect '", aspect, "' is ",
out);
return true;
}
}
template<typename T>
inline T must_get_aspect_report(
const std::string& aspect,
const std::string& name
) {
T out;
archprobe::assert(try_get_aspect_report(aspect, name, out),
"cannot get report '", name, "' from aspect '", aspect, "'");
return out;
}
template<typename T>
inline void report_value(const std::string& name, T value) {
auto& report = get_report();
log::info("reported '", name, "' = '", value, "'");
report.obj[name] = json::JsonValue(value);
}
inline void clear_aspect_report(const std::string& aspect) {
if (!aspect.empty()) {
get_aspect_report(aspect) = json::JsonObject {};
log::info("cleared report of aspect '", aspect, "'");
}
}
void ensure_min_niter(
double min_time_us,
uint32_t& niter,
std::function<double()> run);
inline cl::Program create_program(
const std::string& src,
const std::string& build_opts
) const {
return archprobe::create_program(dev_, ctxt_, src.c_str(),
build_opts.c_str());
}
inline cl::Program create_program(
const std::vector<uint8_t>& src,
const std::string& build_opts
) const {
return archprobe::create_program(dev_, ctxt_, (const char*)src.data(),
build_opts.c_str());
}
inline cl::Kernel create_kernel(
const cl::Program& program,
const std::string& kernel_name
) const {
return archprobe::create_kernel(program, kernel_name);
}
inline cl::Image2D create_img_2d(
cl_mem_flags mem_flags,
cl::ImageFormat img_fmt,
uint32_t width,
uint32_t height
) const {
return archprobe::create_img_2d(ctxt_, mem_flags, img_fmt, width, height);
}
inline cl::Image1D create_img_1d(
cl_mem_flags mem_flags,
cl::ImageFormat img_fmt,
uint32_t width
) const {
return archprobe::create_img_1d(ctxt_, mem_flags, img_fmt, width);
}
inline cl::Buffer create_buf(
cl_mem_flags mem_flags,
size_t size
) const {
return archprobe::create_buf(ctxt_, mem_flags, size);
}
inline archprobe::MapBuffer map_buf(const cl::Buffer& buf) const {
return archprobe::map_buf(cmd_queue_, buf);
}
inline void unmap_buf(
const cl::Buffer& buf,
archprobe::MapBuffer& mapped
) const {
archprobe::unmap_buf(cmd_queue_, buf, mapped);
}
inline archprobe::MapImage map_img_1d(
const cl::Image1D& img
) const {
return archprobe::map_img_1d(cmd_queue_, img);
}
inline void unmap_img_1d(
const cl::Image1D& img,
archprobe::MapImage& mapped
) const {
archprobe::unmap_img_1d(cmd_queue_, img, mapped);
}
inline archprobe::MapImage map_img_2d(const cl::Image2D& img) const {
return archprobe::map_img_2d(cmd_queue_, img);
}
inline void unmap_img_2d(
const cl::Image2D& img,
archprobe::MapImage& mapped
) const {
archprobe::unmap_img_2d(cmd_queue_, img, mapped);
}
// Returns kernel time in microseconds (us).
inline double bench_kernel(
const cl::Kernel& kernel,
cl::NDRange local,
cl::NDRange global,
uint32_t niter
) const {
return archprobe::bench_kernel(cmd_queue_, kernel, local, global, niter);
}
};
} // namespace archprobe

Просмотреть файл

@ -0,0 +1 @@
{"BufferCacheHierarchyPChase":{"Compensate":0.01,"DataSizeMax":8.38861e+06,"Threshold":10},"BufferCachelineSize":{"Compensate":0.01,"Threshold":10},"BufferVecWidth":{"Compensate":0.01,"Threshold":10},"Gflops":{"Compensate":0.01,"Threshold":10},"ImageCacheHierarchyPChase":{"Compensate":0.01,"DataSizeMax":262144,"Threshold":10},"ImageCachelineSize":{"Compensate":0.01,"Threshold":10},"RegCount":{"Compensate":0.01,"NGrpMax":64,"NGrpMin":1,"NGrpStep":1,"NRegMax":512,"NRegMin":1,"NRegStep":1,"Threshold":10},"WarpSizeMethodB":{"Compensate":0.01,"Threshold":10}}

Просмотреть файл

@ -0,0 +1 @@
{"BufferBandwidth":{"Done":true,"MaxBandwidth":96.3256,"MinBandwidth":30.0554},"BufferCacheHierarchyPChase":{"CacheVectorCountLv1":125744,"CacheVectorCountLv2":132608,"CacheVectorCountLv3":133728,"CacheVectorCountLv4":136080,"Done":true},"BufferCachelineSize":{"BufTopLevelCachelineSize":64,"Done":true},"BufferVecWidth":{"BufferVecSize":4,"Done":true},"Device":{"CacheSize":131072,"CachelineSize":64,"Done":true,"LogicThreadCount":1024,"MaxBufferSize":2.87688e+09,"MaxImageHeight":16384,"MaxImageWidth":16384,"PageSize_QCOM":4096,"SmCount":2},"Gflops":{"Done":true,"FloatArch":"SISD","FloatGflops":889.891,"FloatVecComponentCount":1,"HalfArch":"SISD","HalfGflops":890.087,"HalfVecComponentCount":1},"ImageBandwidth":{"Done":true,"MaxBandwidth":194.55,"MinBandwidth":68.174},"ImageCacheHierarchyPChase":{"CachePixelCountLv1":1024,"CachePixelCountLv2":139504,"Done":true},"ImageCachelineSize":{"Done":true,"ImgCachelineDim":"X","ImgCachelineSize":32,"ImgMinTimeConcurThreadCountX":64,"ImgMinTimeConcurThreadCountY":32},"RegCount":{"Done":true,"FullRegConcurWorkgroupCount":12,"HalfRegConcurWorkgroupCount":24,"RegCount":183,"RegType":"Pooled"},"WarpSizeMethodA":{"Done":true,"WarpThreadCount":128},"WarpSizeMethodB":{"Done":true,"WarpThreadCount":64}}

Просмотреть файл

@ -0,0 +1,24 @@
range (byte),t (us),bandwidth (gbps)
16,14899.6,72.0652
32,14884.5,72.1383
64,28961.9,37.0743
128,29880.1,35.9351
256,16474,65.178
512,15259.5,70.3654
1024,18114.6,59.2751
2048,11147,96.3256
4096,11239,95.5368
8192,11151,96.2913
16384,11192.1,95.9378
32768,11210.4,95.7811
65536,11233.9,95.5803
131072,12360.1,86.8719
262144,18612,57.6909
524288,26379.5,40.7036
1.04858e+06,30822.9,34.8358
2.09715e+06,33865.6,31.706
4.1943e+06,27676.7,38.7959
8.38861e+06,35214.5,30.4915
1.67772e+07,35725.4,30.0554
3.35544e+07,35447.9,30.2907
6.71089e+07,35374,30.354
1 range (byte) t (us) bandwidth (gbps)
2 16 14899.6 72.0652
3 32 14884.5 72.1383
4 64 28961.9 37.0743
5 128 29880.1 35.9351
6 256 16474 65.178
7 512 15259.5 70.3654
8 1024 18114.6 59.2751
9 2048 11147 96.3256
10 4096 11239 95.5368
11 8192 11151 96.2913
12 16384 11192.1 95.9378
13 32768 11210.4 95.7811
14 65536 11233.9 95.5803
15 131072 12360.1 86.8719
16 262144 18612 57.6909
17 524288 26379.5 40.7036
18 1.04858e+06 30822.9 34.8358
19 2.09715e+06 33865.6 31.706
20 4.1943e+06 27676.7 38.7959
21 8.38861e+06 35214.5 30.4915
22 1.67772e+07 35725.4 30.0554
23 3.35544e+07 35447.9 30.2907
24 6.71089e+07 35374 30.354

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -0,0 +1,20 @@
nthread,stride (byte),pitch (byte),niter,t (us)
1024,4,256,100,248.576
1024,4,256,402,980.992
1024,4,256,409,998.016
1024,4,256,409,997.12
1024,8,256,409,997.504
1024,12,256,409,995.968
1024,16,256,409,992
1024,20,256,409,993.024
1024,24,256,409,993.536
1024,28,256,409,992.512
1024,32,256,409,994.048
1024,36,256,409,993.536
1024,40,256,409,995.84
1024,44,256,409,994.432
1024,48,256,409,994.56
1024,52,256,409,993.536
1024,56,256,409,994.944
1024,60,256,409,992.384
1024,64,256,409,1471.49
1 nthread stride (byte) pitch (byte) niter t (us)
2 1024 4 256 100 248.576
3 1024 4 256 402 980.992
4 1024 4 256 409 998.016
5 1024 4 256 409 997.12
6 1024 8 256 409 997.504
7 1024 12 256 409 995.968
8 1024 16 256 409 992
9 1024 20 256 409 993.024
10 1024 24 256 409 993.536
11 1024 28 256 409 992.512
12 1024 32 256 409 994.048
13 1024 36 256 409 993.536
14 1024 40 256 409 995.84
15 1024 44 256 409 994.432
16 1024 48 256 409 994.56
17 1024 52 256 409 993.536
18 1024 56 256 409 994.944
19 1024 60 256 409 992.384
20 1024 64 256 409 1471.49

Просмотреть файл

@ -0,0 +1,9 @@
size (byte),niter,t (us)
4,100,28.16
4,3551,864
4,4109,984.576
4,4173,1067.9
4,4173,1067.52
8,4173,1029.5
16,4173,1041.92
32,4173,2793.98
1 size (byte) niter t (us)
2 4 100 28.16
3 4 3551 864
4 4 4109 984.576
5 4 4173 1067.9
6 4 4173 1067.52
7 8 4173 1029.5
8 16 4173 1041.92
9 32 4173 2793.98

Просмотреть файл

@ -0,0 +1,13 @@
float width (bit),ncomp,niter,t (us)
16,16,100,15079
16,1,100,15198
16,2,100,15127
16,4,100,15089
16,8,100,15073
16,16,100,15079.2
32,16,100,15083
32,1,100,15200
32,2,100,15139.1
32,4,100,15078.1
32,8,100,15076.5
32,16,100,15082.5
1 float width (bit) ncomp niter t (us)
2 16 16 100 15079
3 16 1 100 15198
4 16 2 100 15127
5 16 4 100 15089
6 16 8 100 15073
7 16 16 100 15079.2
8 32 16 100 15083
9 32 1 100 15200
10 32 2 100 15139.1
11 32 4 100 15078.1
12 32 8 100 15076.5
13 32 16 100 15082.5

Просмотреть файл

@ -0,0 +1,24 @@
range (byte),t (us),bandwidth (gbps)
16,5519.87,194.523
32,5520.9,194.487
64,5520.64,194.496
128,5519.1,194.55
256,5521.02,194.482
512,5520.13,194.514
1024,5520.9,194.487
2048,10828,99.1632
4096,10764.5,99.748
8192,10718.6,100.176
16384,10770.9,99.6887
32768,10817,99.2641
65536,10833.9,99.1093
131072,12219,87.8747
262144,15750,68.174
524288,7897.09,135.967
1.04858e+06,6376.58,168.388
2.09715e+06,5889.54,182.313
4.1943e+06,5684.99,188.873
8.38861e+06,5598.98,191.775
1.67772e+07,5561.09,193.081
3.35544e+07,5539.07,193.849
6.71089e+07,5529.47,194.185
1 range (byte) t (us) bandwidth (gbps)
2 16 5519.87 194.523
3 32 5520.9 194.487
4 64 5520.64 194.496
5 128 5519.1 194.55
6 256 5521.02 194.482
7 512 5520.13 194.514
8 1024 5520.9 194.487
9 2048 10828 99.1632
10 4096 10764.5 99.748
11 8192 10718.6 100.176
12 16384 10770.9 99.6887
13 32768 10817 99.2641
14 65536 10833.9 99.1093
15 131072 12219 87.8747
16 262144 15750 68.174
17 524288 7897.09 135.967
18 1.04858e+06 6376.58 168.388
19 2.09715e+06 5889.54 182.313
20 4.1943e+06 5684.99 188.873
21 8.38861e+06 5598.98 191.775
22 1.67772e+07 5561.09 193.081
23 3.35544e+07 5539.07 193.849
24 6.71089e+07 5529.47 194.185

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -0,0 +1,105 @@
nthread,dim (x/y),niter,t (us)
1,0,100,15.872
1,0,6300,790.016
1,0,7974,998.912
1,0,7974,998.912
2,0,7974,998.912
3,0,7974,998.912
4,0,7974,998.912
5,0,7974,998.912
6,0,7974,998.912
7,0,7974,998.912
8,0,7974,998.912
9,0,7974,1026.05
10,0,7974,1026.05
11,0,7974,1026.05
12,0,7974,1026.05
13,0,7974,1026.05
14,0,7974,1026.05
15,0,7974,1026.05
16,0,7974,1026.05
17,0,7974,1053.18
18,0,7974,1052.93
19,0,7974,1052.93
20,0,7974,1052.93
21,0,7974,1052.93
22,0,7974,1053.95
23,0,7974,1053.18
24,0,7974,1053.95
25,0,7974,1080.96
26,0,7974,1081.09
27,0,7974,1081.09
28,0,7974,1080.83
29,0,7974,1080.83
30,0,7974,1080.83
31,0,7974,1081.09
32,0,7974,1080.96
33,0,7974,1107.97
34,0,7974,1107.97
35,0,7974,1107.97
36,0,7974,1107.97
37,0,7974,1107.97
38,0,7974,1107.97
39,0,7974,1107.97
40,0,7974,1107.97
41,0,7974,1135.1
42,0,7974,1135.1
43,0,7974,1135.1
44,0,7974,1135.1
45,0,7974,1135.1
46,0,7974,1135.1
47,0,7974,1135.1
48,0,7974,1135.1
49,0,7974,1163.01
50,0,7974,1163.01
51,0,7974,1163.01
52,0,7974,1163.01
53,0,7974,1163.01
54,0,7974,1162.5
55,0,7974,1163.01
56,0,7974,1163.01
57,0,7974,1189.89
58,0,7974,1189.89
59,0,7974,1189.89
60,0,7974,1189.89
61,0,7974,1189.89
62,0,7974,1189.89
63,0,7974,1189.89
64,0,7974,1189.89
65,0,7974,1693.95
1,1,100,16.128
1,1,6200,798.976
1,1,7759,998.912
1,1,7759,998.912
2,1,7759,998.912
3,1,7759,998.912
4,1,7759,998.912
5,1,7759,998.912
6,1,7759,998.912
7,1,7759,998.912
8,1,7759,998.912
9,1,7759,1025.02
10,1,7759,1025.02
11,1,7759,1025.02
12,1,7759,1025.02
13,1,7759,1025.02
14,1,7759,1025.02
15,1,7759,1025.02
16,1,7759,1025.02
17,1,7759,1051.9
18,1,7759,1051.9
19,1,7759,1051.9
20,1,7759,1051.9
21,1,7759,1051.9
22,1,7759,1051.9
23,1,7759,1051.9
24,1,7759,1051.9
25,1,7759,1078.02
26,1,7759,1078.02
27,1,7759,1078.02
28,1,7759,1078.02
29,1,7759,1078.02
30,1,7759,1078.02
31,1,7759,1078.02
32,1,7759,1078.02
33,1,7759,1569.02
1 nthread dim (x/y) niter t (us)
2 1 0 100 15.872
3 1 0 6300 790.016
4 1 0 7974 998.912
5 1 0 7974 998.912
6 2 0 7974 998.912
7 3 0 7974 998.912
8 4 0 7974 998.912
9 5 0 7974 998.912
10 6 0 7974 998.912
11 7 0 7974 998.912
12 8 0 7974 998.912
13 9 0 7974 1026.05
14 10 0 7974 1026.05
15 11 0 7974 1026.05
16 12 0 7974 1026.05
17 13 0 7974 1026.05
18 14 0 7974 1026.05
19 15 0 7974 1026.05
20 16 0 7974 1026.05
21 17 0 7974 1053.18
22 18 0 7974 1052.93
23 19 0 7974 1052.93
24 20 0 7974 1052.93
25 21 0 7974 1052.93
26 22 0 7974 1053.95
27 23 0 7974 1053.18
28 24 0 7974 1053.95
29 25 0 7974 1080.96
30 26 0 7974 1081.09
31 27 0 7974 1081.09
32 28 0 7974 1080.83
33 29 0 7974 1080.83
34 30 0 7974 1080.83
35 31 0 7974 1081.09
36 32 0 7974 1080.96
37 33 0 7974 1107.97
38 34 0 7974 1107.97
39 35 0 7974 1107.97
40 36 0 7974 1107.97
41 37 0 7974 1107.97
42 38 0 7974 1107.97
43 39 0 7974 1107.97
44 40 0 7974 1107.97
45 41 0 7974 1135.1
46 42 0 7974 1135.1
47 43 0 7974 1135.1
48 44 0 7974 1135.1
49 45 0 7974 1135.1
50 46 0 7974 1135.1
51 47 0 7974 1135.1
52 48 0 7974 1135.1
53 49 0 7974 1163.01
54 50 0 7974 1163.01
55 51 0 7974 1163.01
56 52 0 7974 1163.01
57 53 0 7974 1163.01
58 54 0 7974 1162.5
59 55 0 7974 1163.01
60 56 0 7974 1163.01
61 57 0 7974 1189.89
62 58 0 7974 1189.89
63 59 0 7974 1189.89
64 60 0 7974 1189.89
65 61 0 7974 1189.89
66 62 0 7974 1189.89
67 63 0 7974 1189.89
68 64 0 7974 1189.89
69 65 0 7974 1693.95
70 1 1 100 16.128
71 1 1 6200 798.976
72 1 1 7759 998.912
73 1 1 7759 998.912
74 2 1 7759 998.912
75 3 1 7759 998.912
76 4 1 7759 998.912
77 5 1 7759 998.912
78 6 1 7759 998.912
79 7 1 7759 998.912
80 8 1 7759 998.912
81 9 1 7759 1025.02
82 10 1 7759 1025.02
83 11 1 7759 1025.02
84 12 1 7759 1025.02
85 13 1 7759 1025.02
86 14 1 7759 1025.02
87 15 1 7759 1025.02
88 16 1 7759 1025.02
89 17 1 7759 1051.9
90 18 1 7759 1051.9
91 19 1 7759 1051.9
92 20 1 7759 1051.9
93 21 1 7759 1051.9
94 22 1 7759 1051.9
95 23 1 7759 1051.9
96 24 1 7759 1051.9
97 25 1 7759 1078.02
98 26 1 7759 1078.02
99 27 1 7759 1078.02
100 28 1 7759 1078.02
101 29 1 7759 1078.02
102 30 1 7759 1078.02
103 31 1 7759 1078.02
104 32 1 7759 1078.02
105 33 1 7759 1569.02

Просмотреть файл

@ -0,0 +1,226 @@
nthread,ngrp,nreg,niter,t (us)
1,1,1,100,7.936
1,1,1,12600,584.96
1,1,1,21539,997.12
1,1,1,21539,998.016
1,1,2,21539,1071.1
1,1,3,21539,1107.97
1,1,4,21539,1181.18
1,1,5,21539,1255.94
1,1,6,21539,1402.88
1,1,7,21539,1660.54
1,1,8,21539,1698.05
1,1,9,21539,2029.06
1,1,10,21539,1992.06
1,1,11,21539,2249.98
1,1,12,21539,2359.94
1,1,13,21539,2618.11
1,1,14,21539,2582.02
1,1,15,21539,2839.04
1,1,16,21539,2950.14
1,1,17,21539,3207.17
1,1,18,21539,3170.94
1,1,19,21539,3428.1
1,1,20,21539,3538.94
1,1,21,21539,3796.99
1,1,22,21539,3760.13
1,1,23,21539,4018.05
1,1,24,21539,4128.51
1,1,25,21539,4386.05
1,1,26,21539,4349.95
1,1,27,21539,4607.1
1,1,28,21539,4717.95
1,1,29,21539,4975.49
1,1,30,21539,4938.5
1,1,31,21539,5196.03
1,1,32,21539,5307.01
1,1,33,21539,5566.59
1,1,34,21539,5529.09
1,1,35,21539,5787.01
1,1,36,21539,5897.98
1,1,37,21539,6155.52
1,1,38,21539,6119.17
1,1,39,21539,6376.58
1,1,40,21539,6487.04
1,1,41,21539,6745.47
1,1,42,21539,6708.99
1,1,43,21539,6966.02
1,1,44,21539,7076.99
1,1,45,21539,7334.91
1,1,46,21539,7298.05
1,1,47,21539,7556.1
1,1,48,21539,7666.43
1,1,49,21539,7924.48
1,1,50,21539,7887.49
1,1,51,21539,8145.54
1,1,52,21539,8256
1,1,53,21539,8514.05
1,1,54,21539,8477.18
1,1,55,21539,8734.98
1,1,56,21539,8845.95
1,1,57,21539,9103.87
1,1,58,21539,9067.01
1,1,59,21539,9324.03
1,1,60,21539,9435.14
1,1,61,21539,9693.06
1,1,62,21539,9656.06
1,1,63,21539,9914.11
1,1,64,21539,10025
1,1,65,21539,10283
1,1,66,21539,10245.9
1,1,67,21539,10502.9
1,1,68,21539,10614.9
1,1,69,21539,10872.1
1,1,70,21539,10835.6
1,1,71,21539,11093
1,1,72,21539,11203.8
1,1,73,21539,11461.5
1,1,74,21539,11424.9
1,1,75,21539,11682.6
1,1,76,21539,11793.5
1,1,77,21539,12050.9
1,1,78,21539,12014.1
1,1,79,21539,12271.9
1,1,80,21539,12383
1,1,81,21539,12640
1,1,82,21539,12604
1,1,83,21539,12861.4
1,1,84,21539,12972
1,1,85,21539,13230.1
1,1,86,21539,13193.6
1,1,87,21539,13451
1,1,88,21539,13561.9
1,1,89,21539,13819.9
1,1,90,21539,13783
1,1,91,21539,14041
1,1,92,21539,14151.9
1,1,93,21539,14409
1,1,94,21539,14372.1
1,1,95,21539,14630
1,1,96,21539,14741
1,1,97,21539,14998.5
1,1,98,21539,14961.9
1,1,99,21539,15220
1,1,100,21539,15330
1,1,101,21539,15588.1
1,1,102,21539,15551
1,1,103,21539,15809
1,1,104,21539,15920
1,1,105,21539,16177.9
1,1,106,21539,16141.1
1,1,107,21539,16399.1
1,1,108,21539,16509.6
1,1,109,21539,16767
1,1,110,21539,16730.6
1,1,111,21539,16988.5
1,1,112,21539,17099
1,1,113,21539,17356.4
1,1,114,21539,17319.9
1,1,115,21539,17578
1,1,116,21539,17688.1
1,1,117,21539,17946.1
1,1,118,21539,17909
1,1,119,21539,18167
1,1,120,21539,18277.9
1,1,121,21539,18535.9
1,1,122,21539,18499.1
1,1,123,21539,18757
1,1,124,21539,18867.5
1,1,125,21539,19125
1,1,126,21539,19089.2
1,1,127,21539,19346.2
1,1,128,21539,19457
1,1,129,21539,19715.1
1,1,130,21539,19678.1
1,1,131,21539,19936
1,1,132,21539,20046.8
1,1,133,21539,20304.1
1,1,134,21539,20267.5
1,1,135,21539,20525.1
1,1,136,21539,20636
1,1,137,21539,20894
1,1,138,21539,20857.1
1,1,139,21539,21115
1,1,140,21539,21226
1,1,141,21539,21483
1,1,142,21539,21446.5
1,1,143,21539,21704.2
1,1,144,21539,21814.9
1,1,145,21539,22073
1,1,146,21539,22035.1
1,1,147,21539,22293
1,1,148,21539,22404.1
1,1,149,21539,22662.1
1,1,150,21539,22625
1,1,151,21539,22883.1
1,1,152,21539,22994.2
1,1,153,21539,23252
1,1,154,21539,23215.1
1,1,155,21539,23473
1,1,156,21539,23583
1,1,157,21539,23841
1,1,158,21539,23804.2
1,1,159,21539,24062.5
1,1,160,21539,24173.1
1,1,161,21539,24430.8
1,1,162,21539,24394.2
1,1,163,21539,24651
1,1,164,21539,24762
1,1,165,21539,25019.9
1,1,166,21539,24983
1,1,167,21539,25241.5
1,1,168,21539,25352.1
1,1,169,21539,25609.6
1,1,170,21539,25573
1,1,171,21539,25830.9
1,1,172,21539,25941
1,1,173,21539,26199
1,1,174,21539,26162.9
1,1,175,21539,26420.6
1,1,176,21539,26530.9
1,1,177,21539,26788.9
1,1,178,21539,26751.5
1,1,179,21539,26899.1
1,1,180,21539,27047.2
1,1,181,21539,28998.5
1,1,182,21539,29109
1,1,183,21539,32718
1,1,184,21539,44463.1
1,1,183,21539,32717.6
1,2,183,21539,32717.4
1,3,183,21539,32737.4
1,4,183,21539,32774.9
1,5,183,21539,32777
1,6,183,21539,32814.8
1,7,183,21539,32804
1,8,183,21539,32779.4
1,9,183,21539,32785.9
1,10,183,21539,32782.1
1,11,183,21539,32785.5
1,12,183,21539,32786.9
1,13,183,21539,65516
1,1,91,21539,14040.4
1,2,91,21539,14041.1
1,3,91,21539,14041
1,4,91,21539,14040.8
1,5,91,21539,14041.1
1,6,91,21539,14041.1
1,7,91,21539,14187
1,8,91,21539,14187
1,9,91,21539,14187
1,10,91,21539,14187
1,11,91,21539,14187
1,12,91,21539,14187
1,13,91,21539,14335
1,14,91,21539,14334.6
1,15,91,21539,14432.5
1,16,91,21539,14580
1,17,91,21539,14580
1,18,91,21539,14579.6
1,19,91,21539,15253
1,20,91,21539,15253
1,21,91,21539,15291.9
1,22,91,21539,15292
1,23,91,21539,15474.9
1,24,91,21539,15473.9
1,25,91,21539,28095
1 nthread ngrp nreg niter t (us)
2 1 1 1 100 7.936
3 1 1 1 12600 584.96
4 1 1 1 21539 997.12
5 1 1 1 21539 998.016
6 1 1 2 21539 1071.1
7 1 1 3 21539 1107.97
8 1 1 4 21539 1181.18
9 1 1 5 21539 1255.94
10 1 1 6 21539 1402.88
11 1 1 7 21539 1660.54
12 1 1 8 21539 1698.05
13 1 1 9 21539 2029.06
14 1 1 10 21539 1992.06
15 1 1 11 21539 2249.98
16 1 1 12 21539 2359.94
17 1 1 13 21539 2618.11
18 1 1 14 21539 2582.02
19 1 1 15 21539 2839.04
20 1 1 16 21539 2950.14
21 1 1 17 21539 3207.17
22 1 1 18 21539 3170.94
23 1 1 19 21539 3428.1
24 1 1 20 21539 3538.94
25 1 1 21 21539 3796.99
26 1 1 22 21539 3760.13
27 1 1 23 21539 4018.05
28 1 1 24 21539 4128.51
29 1 1 25 21539 4386.05
30 1 1 26 21539 4349.95
31 1 1 27 21539 4607.1
32 1 1 28 21539 4717.95
33 1 1 29 21539 4975.49
34 1 1 30 21539 4938.5
35 1 1 31 21539 5196.03
36 1 1 32 21539 5307.01
37 1 1 33 21539 5566.59
38 1 1 34 21539 5529.09
39 1 1 35 21539 5787.01
40 1 1 36 21539 5897.98
41 1 1 37 21539 6155.52
42 1 1 38 21539 6119.17
43 1 1 39 21539 6376.58
44 1 1 40 21539 6487.04
45 1 1 41 21539 6745.47
46 1 1 42 21539 6708.99
47 1 1 43 21539 6966.02
48 1 1 44 21539 7076.99
49 1 1 45 21539 7334.91
50 1 1 46 21539 7298.05
51 1 1 47 21539 7556.1
52 1 1 48 21539 7666.43
53 1 1 49 21539 7924.48
54 1 1 50 21539 7887.49
55 1 1 51 21539 8145.54
56 1 1 52 21539 8256
57 1 1 53 21539 8514.05
58 1 1 54 21539 8477.18
59 1 1 55 21539 8734.98
60 1 1 56 21539 8845.95
61 1 1 57 21539 9103.87
62 1 1 58 21539 9067.01
63 1 1 59 21539 9324.03
64 1 1 60 21539 9435.14
65 1 1 61 21539 9693.06
66 1 1 62 21539 9656.06
67 1 1 63 21539 9914.11
68 1 1 64 21539 10025
69 1 1 65 21539 10283
70 1 1 66 21539 10245.9
71 1 1 67 21539 10502.9
72 1 1 68 21539 10614.9
73 1 1 69 21539 10872.1
74 1 1 70 21539 10835.6
75 1 1 71 21539 11093
76 1 1 72 21539 11203.8
77 1 1 73 21539 11461.5
78 1 1 74 21539 11424.9
79 1 1 75 21539 11682.6
80 1 1 76 21539 11793.5
81 1 1 77 21539 12050.9
82 1 1 78 21539 12014.1
83 1 1 79 21539 12271.9
84 1 1 80 21539 12383
85 1 1 81 21539 12640
86 1 1 82 21539 12604
87 1 1 83 21539 12861.4
88 1 1 84 21539 12972
89 1 1 85 21539 13230.1
90 1 1 86 21539 13193.6
91 1 1 87 21539 13451
92 1 1 88 21539 13561.9
93 1 1 89 21539 13819.9
94 1 1 90 21539 13783
95 1 1 91 21539 14041
96 1 1 92 21539 14151.9
97 1 1 93 21539 14409
98 1 1 94 21539 14372.1
99 1 1 95 21539 14630
100 1 1 96 21539 14741
101 1 1 97 21539 14998.5
102 1 1 98 21539 14961.9
103 1 1 99 21539 15220
104 1 1 100 21539 15330
105 1 1 101 21539 15588.1
106 1 1 102 21539 15551
107 1 1 103 21539 15809
108 1 1 104 21539 15920
109 1 1 105 21539 16177.9
110 1 1 106 21539 16141.1
111 1 1 107 21539 16399.1
112 1 1 108 21539 16509.6
113 1 1 109 21539 16767
114 1 1 110 21539 16730.6
115 1 1 111 21539 16988.5
116 1 1 112 21539 17099
117 1 1 113 21539 17356.4
118 1 1 114 21539 17319.9
119 1 1 115 21539 17578
120 1 1 116 21539 17688.1
121 1 1 117 21539 17946.1
122 1 1 118 21539 17909
123 1 1 119 21539 18167
124 1 1 120 21539 18277.9
125 1 1 121 21539 18535.9
126 1 1 122 21539 18499.1
127 1 1 123 21539 18757
128 1 1 124 21539 18867.5
129 1 1 125 21539 19125
130 1 1 126 21539 19089.2
131 1 1 127 21539 19346.2
132 1 1 128 21539 19457
133 1 1 129 21539 19715.1
134 1 1 130 21539 19678.1
135 1 1 131 21539 19936
136 1 1 132 21539 20046.8
137 1 1 133 21539 20304.1
138 1 1 134 21539 20267.5
139 1 1 135 21539 20525.1
140 1 1 136 21539 20636
141 1 1 137 21539 20894
142 1 1 138 21539 20857.1
143 1 1 139 21539 21115
144 1 1 140 21539 21226
145 1 1 141 21539 21483
146 1 1 142 21539 21446.5
147 1 1 143 21539 21704.2
148 1 1 144 21539 21814.9
149 1 1 145 21539 22073
150 1 1 146 21539 22035.1
151 1 1 147 21539 22293
152 1 1 148 21539 22404.1
153 1 1 149 21539 22662.1
154 1 1 150 21539 22625
155 1 1 151 21539 22883.1
156 1 1 152 21539 22994.2
157 1 1 153 21539 23252
158 1 1 154 21539 23215.1
159 1 1 155 21539 23473
160 1 1 156 21539 23583
161 1 1 157 21539 23841
162 1 1 158 21539 23804.2
163 1 1 159 21539 24062.5
164 1 1 160 21539 24173.1
165 1 1 161 21539 24430.8
166 1 1 162 21539 24394.2
167 1 1 163 21539 24651
168 1 1 164 21539 24762
169 1 1 165 21539 25019.9
170 1 1 166 21539 24983
171 1 1 167 21539 25241.5
172 1 1 168 21539 25352.1
173 1 1 169 21539 25609.6
174 1 1 170 21539 25573
175 1 1 171 21539 25830.9
176 1 1 172 21539 25941
177 1 1 173 21539 26199
178 1 1 174 21539 26162.9
179 1 1 175 21539 26420.6
180 1 1 176 21539 26530.9
181 1 1 177 21539 26788.9
182 1 1 178 21539 26751.5
183 1 1 179 21539 26899.1
184 1 1 180 21539 27047.2
185 1 1 181 21539 28998.5
186 1 1 182 21539 29109
187 1 1 183 21539 32718
188 1 1 184 21539 44463.1
189 1 1 183 21539 32717.6
190 1 2 183 21539 32717.4
191 1 3 183 21539 32737.4
192 1 4 183 21539 32774.9
193 1 5 183 21539 32777
194 1 6 183 21539 32814.8
195 1 7 183 21539 32804
196 1 8 183 21539 32779.4
197 1 9 183 21539 32785.9
198 1 10 183 21539 32782.1
199 1 11 183 21539 32785.5
200 1 12 183 21539 32786.9
201 1 13 183 21539 65516
202 1 1 91 21539 14040.4
203 1 2 91 21539 14041.1
204 1 3 91 21539 14041
205 1 4 91 21539 14040.8
206 1 5 91 21539 14041.1
207 1 6 91 21539 14041.1
208 1 7 91 21539 14187
209 1 8 91 21539 14187
210 1 9 91 21539 14187
211 1 10 91 21539 14187
212 1 11 91 21539 14187
213 1 12 91 21539 14187
214 1 13 91 21539 14335
215 1 14 91 21539 14334.6
216 1 15 91 21539 14432.5
217 1 16 91 21539 14580
218 1 17 91 21539 14580
219 1 18 91 21539 14579.6
220 1 19 91 21539 15253
221 1 20 91 21539 15253
222 1 21 91 21539 15291.9
223 1 22 91 21539 15292
224 1 23 91 21539 15474.9
225 1 24 91 21539 15473.9
226 1 25 91 21539 28095

Просмотреть файл

@ -0,0 +1,130 @@
nthread,nascend
1,1
2,2
3,3
4,4
5,5
6,6
7,7
8,8
9,9
10,10
11,11
12,12
13,13
14,14
15,15
16,16
17,17
18,18
19,19
20,20
21,21
22,22
23,23
24,24
25,25
26,26
27,27
28,28
29,29
30,30
31,31
32,32
33,33
34,34
35,35
36,36
37,37
38,38
39,39
40,40
41,41
42,42
43,43
44,44
45,45
46,46
47,47
48,48
49,49
50,50
51,51
52,52
53,53
54,54
55,55
56,56
57,57
58,58
59,59
60,60
61,61
62,62
63,63
64,64
65,65
66,66
67,67
68,68
69,69
70,70
71,71
72,72
73,73
74,74
75,75
76,76
77,77
78,78
79,79
80,80
81,81
82,82
83,83
84,84
85,85
86,86
87,87
88,88
89,89
90,90
91,91
92,92
93,93
94,94
95,95
96,96
97,97
98,98
99,99
100,100
101,101
102,102
103,103
104,104
105,105
106,106
107,107
108,108
109,109
110,110
111,111
112,112
113,113
114,114
115,115
116,116
117,117
118,118
119,119
120,120
121,121
122,122
123,123
124,124
125,125
126,126
127,127
128,128
129,128
1 nthread nascend
2 1 1
3 2 2
4 3 3
5 4 4
6 5 5
7 6 6
8 7 7
9 8 8
10 9 9
11 10 10
12 11 11
13 12 12
14 13 13
15 14 14
16 15 15
17 16 16
18 17 17
19 18 18
20 19 19
21 20 20
22 21 21
23 22 22
24 23 23
25 24 24
26 25 25
27 26 26
28 27 27
29 28 28
30 29 29
31 30 30
32 31 31
33 32 32
34 33 33
35 34 34
36 35 35
37 36 36
38 37 37
39 38 38
40 39 39
41 40 40
42 41 41
43 42 42
44 43 43
45 44 44
46 45 45
47 46 46
48 47 47
49 48 48
50 49 49
51 50 50
52 51 51
53 52 52
54 53 53
55 54 54
56 55 55
57 56 56
58 57 57
59 58 58
60 59 59
61 60 60
62 61 61
63 62 62
64 63 63
65 64 64
66 65 65
67 66 66
68 67 67
69 68 68
70 69 69
71 70 70
72 71 71
73 72 72
74 73 73
75 74 74
76 75 75
77 76 76
78 77 77
79 78 78
80 79 79
81 80 80
82 81 81
83 82 82
84 83 83
85 84 84
86 85 85
87 86 86
88 87 87
89 88 88
90 89 89
91 90 90
92 91 91
93 92 92
94 93 93
95 94 94
96 95 95
97 96 96
98 97 97
99 98 98
100 99 99
101 100 100
102 101 101
103 102 102
104 103 103
105 104 104
106 105 105
107 106 106
108 107 107
109 108 108
110 109 109
111 110 110
112 111 111
113 112 112
114 113 113
115 114 114
116 115 115
117 116 116
118 117 117
119 118 118
120 119 119
121 120 120
122 121 121
123 122 122
124 123 123
125 124 124
126 125 125
127 126 126
128 127 127
129 128 128
130 129 128

Просмотреть файл

@ -0,0 +1,67 @@
nthread,time (us)
1,1122.94
1,1123.07
2,1123.07
3,1122.56
4,1123.07
5,1122.94
6,1122.94
7,1122.82
8,1123.07
9,1123.07
10,1122.94
11,1122.94
12,1122.05
13,1122.94
14,1122.56
15,1122.05
16,1122.43
17,1122.56
18,1122.05
19,1122.82
20,1122.05
21,1122.43
22,1122.94
23,1122.05
24,1122.05
25,1123.07
26,1122.82
27,1122.43
28,1123.07
29,1123.07
30,1123.07
31,1123.07
32,1123.07
33,1123.07
34,1123.07
35,1122.82
36,1123.07
37,1122.94
38,1123.07
39,1123.07
40,1123.07
41,1123.07
42,1123.07
43,1123.07
44,1123.07
45,1123.07
46,1123.07
47,1123.07
48,1123.07
49,1123.07
50,1123.07
51,1123.84
52,1123.46
53,1123.84
54,1123.46
55,1123.07
56,1123.07
57,1123.07
58,1123.46
59,1123.97
60,1123.97
61,1123.07
62,1123.07
63,1123.97
64,1124.1
65,1265.92
1 nthread time (us)
2 1 1122.94
3 1 1123.07
4 2 1123.07
5 3 1122.56
6 4 1123.07
7 5 1122.94
8 6 1122.94
9 7 1122.82
10 8 1123.07
11 9 1123.07
12 10 1122.94
13 11 1122.94
14 12 1122.05
15 13 1122.94
16 14 1122.56
17 15 1122.05
18 16 1122.43
19 17 1122.56
20 18 1122.05
21 19 1122.82
22 20 1122.05
23 21 1122.43
24 22 1122.94
25 23 1122.05
26 24 1122.05
27 25 1123.07
28 26 1122.82
29 27 1122.43
30 28 1123.07
31 29 1123.07
32 30 1123.07
33 31 1123.07
34 32 1123.07
35 33 1123.07
36 34 1123.07
37 35 1122.82
38 36 1123.07
39 37 1122.94
40 38 1123.07
41 39 1123.07
42 40 1123.07
43 41 1123.07
44 42 1123.07
45 43 1123.07
46 44 1123.07
47 45 1123.07
48 46 1123.07
49 47 1123.07
50 48 1123.07
51 49 1123.07
52 50 1123.07
53 51 1123.84
54 52 1123.46
55 53 1123.84
56 54 1123.46
57 55 1123.07
58 56 1123.07
59 57 1123.07
60 58 1123.46
61 59 1123.97
62 60 1123.97
63 61 1123.07
64 62 1123.07
65 63 1123.97
66 64 1124.1
67 65 1265.92

Разница между файлами не показана из-за своего большого размера Загрузить разницу

185
include/args.hpp Normal file
Просмотреть файл

@ -0,0 +1,185 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// NOTE: This is a modified excerpt of
// https://github.com/PENGUINLIONG/graphi-t/blob/da31ec530df07c9899e056eeced08a64062dcfce/include/args.hpp;
// MIT-licensed by Rendong Liang.
// Argument parsing utilities.
// @PENGUINLIONG
#pragma once
#include <string>
namespace archprobe {
namespace args {
struct ArgumentParseConfig {
// Expected number of arguments segments.
uint32_t narg;
// Returns true if the parsing is successful.
bool (*parser)(const char*[], void*);
// Returns the literal of default value.
std::string (*lit)(const void*);
// Destination to be written with parsed value.
void* dst;
};
// Optionally initialize argument parser with application name and usage
// description.
extern void init_arg_parse(const char* app_name, const char* desc);
// Get the name of this app set by the user. Empty string is returned if this
// function is called before `init_arg_parse`.
extern const char* get_app_name();
// Print help message to the standard output.
extern void print_help();
// Erase the type of argument parser and bind the type-erased parser to the
// value destination. User code MUST ensure the `dst` buffer can contain the
// parsing result.
template<typename TTypedParser>
ArgumentParseConfig make_parse_cfg(void* dst) {
ArgumentParseConfig parse_cfg;
parse_cfg.narg = TTypedParser::narg;
parse_cfg.dst = dst;
parse_cfg.parser = &TTypedParser::parse;
parse_cfg.lit = &TTypedParser::lit;
return parse_cfg;
}
// Register customized argument parsing.
extern void reg_arg(
const char* short_flag,
const char* long_flag,
const ArgumentParseConfig& parse_cfg,
const char* help
);
// Register a structural argument parsing.
template<typename TTypedParser>
inline void reg_arg(
const char* short_flag,
const char* long_flag,
typename TTypedParser::arg_ty& dst,
const char* help
) {
reg_arg(short_flag, long_flag, make_parse_cfg<TTypedParser>(&dst), help);
}
// Parse arguments. Arguments will be matched against argument parsers
// registered before.
extern void parse_args(int argc, const char** argv);
//
// Parsers.
//
template<typename T>
struct TypedArgumentParser {
typedef struct {} arg_ty;
// Number of argument entries needed for this argument.
static const uint32_t narg = -1;
// Parser function. Convert the literal in the first parameter into structured
// native representation. Return `true` on success.
static bool parse(const char* lit[], void* dst) {
return false;
}
static std::string lit(const void* src) {
return {};
}
};
template<>
struct TypedArgumentParser<std::string> {
typedef std::string arg_ty;
static const uint32_t narg = 1;
static bool parse(const char* lit[], void* dst) {
*(std::string*)dst = lit[0];
return true;
}
static std::string lit(const void* src) {
return *(const std::string*)src;
}
};
template<>
struct TypedArgumentParser<int32_t> {
typedef int arg_ty;
static const uint32_t narg = 1;
static bool parse(const char* lit[], void* dst) {
*(int32_t*)dst = std::atoi(lit[0]);
return true;
}
static std::string lit(const void* src) {
return std::to_string(*(const arg_ty*)src);
}
};
template<>
struct TypedArgumentParser<uint32_t> {
typedef uint32_t arg_ty;
static const uint32_t narg = 1;
static bool parse(const char* lit[], void* dst) {
*(uint32_t*)dst = std::atoi(lit[0]);
return true;
}
static std::string lit(const void* src) {
return std::to_string(*(const arg_ty*)src);
}
};
template<>
struct TypedArgumentParser<float> {
typedef float arg_ty;
static const uint32_t narg = 1;
static bool parse(const char* lit[], void* dst) {
*(float*)dst = std::atof(lit[0]);
return true;
}
static std::string lit(const void* src) {
return std::to_string(*(const arg_ty*)src);
}
};
// NOTE: This is used for arguments like `-f true` and `-f false`. If you need a
// boolean argument that don't need to be set explicitly. Use
// `SwitchArgumentParser` instead.
template<>
struct TypedArgumentParser<bool> {
typedef bool arg_ty;
static const uint32_t narg = 1;
static bool parse(const char* lit[], void* dst) {
if (strcmp(lit[0], "true") == 0 || strcmp(lit[0], "True") == 0) {
*(bool*)dst = true;
return true;
} else if (strcmp(lit[0], "false") == 0 || strcmp(lit[0], "False") == 0) {
*(bool*)dst = false;
return true;
} else {
return false;
}
}
static std::string lit(const void* src) {
if (*(const arg_ty*)src) {
return "true";
} else {
return "false";
}
}
};
struct SwitchArgumentParser {
typedef bool arg_ty;
static const uint32_t narg = 0;
static bool parse(const char* lit[], void* dst) {
*(bool*)dst = true;
return true;
}
static std::string lit(const void* src) {
return {};
}
};
using IntParser = TypedArgumentParser<int32_t>;
using UintParser = TypedArgumentParser<uint32_t>;
using FloatParser = TypedArgumentParser<float>;
using BoolParser = TypedArgumentParser<bool>;
using StringParser = TypedArgumentParser<std::string>;
using SwitchParser = SwitchArgumentParser;
} // namespace args
} // namespace archprobe

39
include/assert.hpp Normal file
Просмотреть файл

@ -0,0 +1,39 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// NOTE: This is a modified excerpt of
// https://github.com/PENGUINLIONG/graphi-t/blob/d291c3d1ce3795fe4b305e5efd76b4f586d23e3b/include/assert.hpp;
// MIT-licensed by Rendong Liang.
// Assertion.
// @PENGUINLIONG
#pragma once
#include "util.hpp"
#undef assert
namespace archprobe {
class AssertionFailedException : public std::exception {
std::string msg;
public:
AssertionFailedException(const std::string& msg);
const char* what() const noexcept override;
};
template<typename ... TArgs>
inline void assert(bool pred, const TArgs& ... args) {
if (!pred) {
throw AssertionFailedException(util::format(args ...));
}
}
template<typename ... TArgs>
inline void panic(const TArgs& ... args) {
assert<TArgs ...>(false, args ...);
}
template<typename ... TArgs>
inline void unreachable(const TArgs& ... args) {
assert<const char*, TArgs ...>(false, "reached unreachable code: ", args ...);
}
} // namespace archprobe

166
include/cl.hpp Normal file
Просмотреть файл

@ -0,0 +1,166 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// OpenCL wrappings
// @PENGUINLIONG
#pragma once
#define CL_TARGET_OPENCL_VERSION 200
#define CL_HPP_TARGET_OPENCL_VERSION CL_TARGET_OPENCL_VERSION
#include <CL/opencl.hpp>
namespace archprobe {
class CLException : public std::exception {
const char* msg;
public:
CLException(cl_int code);
const char* what() const noexcept override;
};
struct CLAssert {
inline const CLAssert& operator<<(cl_int code) const {
if (code != CL_SUCCESS) { throw CLException(code); }
return *this;
}
};
#define CL_ASSERT (::archprobe::CLAssert{})
struct DeviceStub {
cl_platform_id platform_id;
cl_device_id dev_id;
std::string platform_exts;
std::string dev_exts;
std::string desc;
};
extern std::vector<DeviceStub> dev_stubs;
void initialize();
std::string desc_dev(uint32_t idx);
cl::Device select_dev(uint32_t idev);
cl::Context create_ctxt(const cl::Device& dev);
cl::CommandQueue create_cmd_queue(const cl::Context& ctxt);
cl::Program create_program(
const cl::Device& dev,
const cl::Context& ctxt,
const char* src,
const char* build_opts
);
inline cl::Program create_program(
const cl::Device& dev,
const cl::Context& ctxt,
const std::string& src,
const std::string& build_opts
) {
return create_program(dev, ctxt, src.c_str(), build_opts.c_str());
}
cl::Kernel create_kernel(cl::Program program, const std::string& kernel_name);
double bench_kernel(
const cl::CommandQueue& cmd_queue,
const cl::Kernel& kernel,
const cl::NDRange& local_size,
const cl::NDRange& global_size,
uint32_t niter
);
cl::Image2D create_img_2d(
const cl::Context& ctxt,
cl_mem_flags mem_flags,
cl::ImageFormat img_fmt,
uint32_t width,
uint32_t height
);
inline cl::Image2D create_img_2d(
const cl::Context& ctxt,
cl_mem_flags mem_flags,
cl_channel_order channel_order,
cl_channel_type channel_type,
uint32_t width,
uint32_t height
) {
cl::ImageFormat img_fmt(channel_order, channel_type);
return create_img_2d(ctxt, mem_flags, img_fmt, width, height);
}
cl::Image1D create_img_1d(
const cl::Context& ctxt,
cl_mem_flags mem_flags,
cl::ImageFormat img_fmt,
uint32_t width
);
inline cl::Image1D create_img_1d(
const cl::Context& ctxt,
cl_mem_flags mem_flags,
cl_channel_order channel_order,
cl_channel_type channel_type,
uint32_t width
) {
cl::ImageFormat img_fmt(channel_order, channel_type);
return create_img_1d(ctxt, mem_flags, img_fmt, width);
}
cl::Buffer create_buf(
const cl::Context& ctxt,
cl_mem_flags mem_flags,
size_t size
);
struct MapImage {
void* data;
size_t width;
size_t height;
size_t depth;
size_t row_pitch;
size_t slice_pitch;
operator void*() const {
return data;
}
};
struct MapBuffer {
void* data;
size_t size;
operator void*() const {
return data;
}
};
MapImage map_img_2d(
const cl::CommandQueue& cmd_queue,
const cl::Image2D& img
);
void unmap_img_2d(
const cl::CommandQueue& cmd_queue,
const cl::Image2D& img,
MapImage& mapped
);
MapImage map_img_1d(
const cl::CommandQueue& cmd_queue,
const cl::Image1D& img
);
void unmap_img_1d(
const cl::CommandQueue& cmd_queue,
const cl::Image1D& img,
MapImage& mapped
);
MapBuffer map_buf(
const cl::CommandQueue& cmd_queue,
const cl::Buffer& buf,
size_t offset,
size_t size
);
MapBuffer map_buf(
const cl::CommandQueue& cmd_queue,
const cl::Buffer& buf
);
void unmap_buf(
const cl::CommandQueue& cmd_queue,
const cl::Buffer& buf,
MapBuffer& mapped
);
} // namespace archprobe

203
include/json.hpp Normal file
Просмотреть файл

@ -0,0 +1,203 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// NOTE: This is a modified excerpt of
// https://github.com/PENGUINLIONG/graphi-t/blob/da31ec530df07c9899e056eeced08a64062dcfce/include/json.hpp;
// MIT-licensed by Rendong Liang.
// JSON serialization/deserialization.
// @PENGUINLIONG
#pragma once
#include <string>
#include <vector>
#include <map>
#include <sstream>
namespace archprobe {
namespace json {
// Any error occured during JSON serialization/deserialization.
class JsonException : public std::exception {
private:
std::string msg;
public:
JsonException(const char* msg);
const char* what() const noexcept override;
};
// Type of JSON value.
enum JsonType {
L_JSON_NULL,
L_JSON_BOOLEAN,
L_JSON_NUMBER,
L_JSON_STRING,
L_JSON_OBJECT,
L_JSON_ARRAY,
};
struct JsonValue;
class JsonElementEnumerator {
std::vector<JsonValue>::const_iterator beg_, end_;
public:
JsonElementEnumerator(const std::vector<JsonValue>& arr) :
beg_(arr.cbegin()), end_(arr.cend()) {}
std::vector<JsonValue>::const_iterator begin() const {
return beg_;
}
std::vector<JsonValue>::const_iterator end() const {
return end_;
}
};
class JsonFieldEnumerator {
std::map<std::string, JsonValue>::const_iterator beg_, end_;
public:
JsonFieldEnumerator(const std::map<std::string, JsonValue>& obj) :
beg_(obj.cbegin()), end_(obj.cend()) {}
std::map<std::string, JsonValue>::const_iterator begin() const {
return beg_;
}
std::map<std::string, JsonValue>::const_iterator end() const {
return end_;
}
};
// JSON array builder.
struct JsonArray {
std::vector<JsonValue> inner;
inline JsonArray() : inner() {}
JsonArray(std::initializer_list<JsonValue>&& elems);
};
// JSON object builder.
struct JsonObject {
std::map<std::string, JsonValue> inner;
inline JsonObject() : inner() {}
JsonObject(
std::initializer_list<std::pair<const std::string, JsonValue>>&& entries
);
};
// Represent a abstract value in JSON representation.
struct JsonValue {
JsonType ty;
bool b;
double num;
std::string str;
std::map<std::string, JsonValue> obj;
std::vector<JsonValue> arr;
inline JsonValue() : ty(L_JSON_NULL) {}
inline JsonValue(nullptr_t) : ty(L_JSON_NULL) {}
inline JsonValue(bool b) : ty(L_JSON_BOOLEAN), b(b) {}
inline JsonValue(double num) : ty(L_JSON_NUMBER), num(num) {}
inline JsonValue(float num) : ty(L_JSON_NUMBER), num(num) {}
inline JsonValue(int num) : ty(L_JSON_NUMBER), num(num) {}
inline JsonValue(unsigned int num) : ty(L_JSON_NUMBER), num(num) {}
inline JsonValue(long num) : ty(L_JSON_NUMBER), num(num) {}
inline JsonValue(unsigned long num) : ty(L_JSON_NUMBER), num(num) {}
inline JsonValue(const char* str) : ty(L_JSON_STRING), str(str) {}
inline JsonValue(const std::string& str) : ty(L_JSON_STRING), str(str) {}
inline JsonValue(std::string&& str) :
ty(L_JSON_STRING),
str(std::forward<std::string>(str)) {}
JsonValue(JsonObject&& obj);
JsonValue(JsonArray&& arr);
inline JsonValue& operator[](const char* key) {
if (!is_obj()) { throw JsonException("value is not an object"); }
return obj.at(key);
}
inline const JsonValue& operator[](const char* key) const {
if (!is_obj()) { throw JsonException("value is not an object"); }
return obj.at(key);
}
inline JsonValue& operator[](const std::string& key) {
if (!is_obj()) { throw JsonException("value is not an object"); }
return obj.at(key);
}
inline const JsonValue& operator[](const std::string& key) const {
if (!is_obj()) { throw JsonException("value is not an object"); }
return obj.at(key);
}
inline JsonValue& operator[](size_t i) {
if (!is_arr()) { throw JsonException("value is not an array"); }
return arr.at(i);
}
inline const JsonValue& operator[](size_t i) const {
if (!is_arr()) { throw JsonException("value is not an array"); }
return arr.at(i);
}
inline operator bool() const {
if (!is_bool()) { throw JsonException("value is not a bool"); }
return b;
}
inline operator double() const {
if (!is_num()) { throw JsonException("value is not a number"); }
return num;
}
inline operator float() const {
if (!is_num()) { throw JsonException("value is not a number"); }
return (float)num;
}
inline operator int() const {
if (!is_num()) { throw JsonException("value is not a number"); }
return (int)num;
}
inline operator unsigned int() const {
if (!is_num()) { throw JsonException("value is not a number"); }
return (unsigned int)num;
}
inline operator long() const {
if (!is_num()) { throw JsonException("value is not a number"); }
return (long)num;
}
inline operator unsigned long() const {
if (!is_num()) { throw JsonException("value is not a number"); }
return (unsigned long)num;
}
inline operator std::string() const {
if (!is_str()) { throw JsonException("value is not a string"); }
return str;
}
inline bool is_null() const { return ty == L_JSON_NULL; }
inline bool is_bool() const { return ty == L_JSON_BOOLEAN; }
inline bool is_num() const { return ty == L_JSON_NUMBER; }
inline bool is_str() const { return ty == L_JSON_STRING; }
inline bool is_obj() const { return ty == L_JSON_OBJECT; }
inline bool is_arr() const { return ty == L_JSON_ARRAY; }
inline size_t size() const {
if (is_obj()) {
return obj.size();
} else if (is_arr()) {
return arr.size();
} else {
throw JsonException("only object and array can have size");
}
}
inline JsonElementEnumerator elems() const {
return JsonElementEnumerator(arr);
}
inline JsonFieldEnumerator fields() const {
return JsonFieldEnumerator(obj);
}
};
// Parse JSON literal into and `JsonValue` object. If the JSON is invalid or
// unsupported, `JsonException` will be raised.
JsonValue parse(const std::string& json_lit);
// Returns true when JSON parsing successfully finished and parsed value is
// returned via `out`. Otherwise, false is returned and out contains incomplete
// result.
bool try_parse(const std::string& json_lit, JsonValue& out);
std::string print(const JsonValue& json);
} // namespace json
} // namespace archprobe

70
include/log.hpp Normal file
Просмотреть файл

@ -0,0 +1,70 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// NOTE: This is a modified excerpt of
// https://github.com/PENGUINLIONG/graphi-t/blob/0e3c1394b493db3e3d5b443c869545cac712827a/include/log.hpp;
// MIT-licensed by Rendong Liang.
// Logging infrastructure.
// @PENGUINLIONG
#pragma once
#include <cstdint>
#include <string>
#include <sstream>
#include <vector>
#include "util.hpp"
namespace archprobe {
namespace log {
// Logging infrastructure.
enum class LogLevel {
L_LOG_LEVEL_DEBUG,
L_LOG_LEVEL_INFO,
L_LOG_LEVEL_WARNING,
L_LOG_LEVEL_ERROR,
};
namespace detail {
extern void (*log_callback)(LogLevel lv, const std::string& msg);
extern LogLevel filter_lv;
extern uint32_t indent;
} // namespace detail
void set_log_callback(decltype(detail::log_callback) cb);
void set_log_filter_level(LogLevel lv);
template<typename ... TArgs>
void log(LogLevel lv, const TArgs& ... msg) {
if (detail::log_callback != nullptr && lv >= detail::filter_lv) {
std::string indent(detail::indent, ' ');
detail::log_callback(lv, util::format(indent, msg...));
}
}
void push_indent();
void pop_indent();
template<typename ... TArgs>
inline void debug(const TArgs& ... msg) {
log(LogLevel::L_LOG_LEVEL_DEBUG, msg...);
}
template<typename ... TArgs>
inline void info(const TArgs& ... msg) {
log(LogLevel::L_LOG_LEVEL_INFO, msg...);
}
template<typename ... TArgs>
inline void warn(const TArgs& ... msg) {
log(LogLevel::L_LOG_LEVEL_WARNING, msg...);
}
template<typename ... TArgs>
inline void error(const TArgs& ... msg) {
log(LogLevel::L_LOG_LEVEL_ERROR, msg...);
}
}
} // namespace archprobe

269
include/stats.hpp Normal file
Просмотреть файл

@ -0,0 +1,269 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// Tools for statistics.
// @PENGUINLIONG
#pragma once
#include <cmath>
#include <limits>
#include <algorithm>
namespace archprobe {
namespace stats {
template<typename T>
class MinStats {
T mn_ = std::numeric_limits<T>::max();
public:
typedef T value_t;
// Returns true if the value has been updated.
bool push(T value) {
if (mn_ > value) {
mn_ = value;
return true;
} else {
return false;
}
}
inline bool has_value() const {
return mn_ != std::numeric_limits<T>::max();
}
operator T() const {
if (!has_value()) {
archprobe::log::warn("`MinStats` has not collected any data yet");
}
return mn_;
}
friend std::ostream& operator <<(std::ostream& out, const MinStats<T>& x) {
out << (T)(x);
return out;
}
};
template<typename T>
class MaxStats {
T mx_ = -std::numeric_limits<T>::max();
public:
typedef T value_t;
// Returns true if the value has been updated.
bool push(T value) {
if (mx_ < value) {
mx_ = value;
return true;
} else {
return false;
}
}
inline bool has_value() const {
return mx_ != -std::numeric_limits<T>::max();
}
operator T() const {
if (!has_value()) {
archprobe::log::warn("`MaxStats` has not collected any data yet");
}
return mx_;
}
friend std::ostream& operator <<(std::ostream& out, const MaxStats<T>& x) {
out << (T)(x);
return out;
}
};
template<typename T>
class AvgStats {
T sum_ = 0;
uint64_t n_ = 0;
public:
typedef T value_t;
void push(T value) {
sum_ += value;
n_ += 1;
}
inline bool has_value() const {
return n_ != 0;
}
operator T() const {
if (!has_value()) {
archprobe::log::warn("`AvgStats` has not collected any data yet");
}
return sum_ / n_;
}
friend std::ostream& operator <<(std::ostream& out, const AvgStats<T>& x) {
out << (T)(x);
return out;
}
};
template<typename T, size_t NTap>
class NTapAvgStats {
std::array<double, NTap> hist_;
size_t cur_idx_;
bool ready_;
public:
typedef T value_t;
void push(T value) {
hist_[cur_idx_++] = value;
if (cur_idx_ >= NTap) {
cur_idx_ = 0;
ready_ = true;
}
}
inline bool has_value() const {
return ready_;
}
operator T() const {
if (!has_value()) {
archprobe::log::warn("`NTapStats` has not collected any data yet");
}
double out = 0.0;
for (double x : hist_) {
out += x;
}
out /= NTap;
return out;
}
friend std::ostream& operator <<(
std::ostream& out,
const NTapAvgStats<T, NTap>& x
) {
out << (T)(x);
return out;
}
};
template<typename T>
class StdStats {
AvgStats<T> avg_ {};
std::vector<T> values_ {};
public:
typedef T value_t;
void push(T value) {
avg_.push(value);
values_.push_back(value);
}
inline bool has_value() const {
return avg_.has_value();
}
operator T() const {
if (!has_value()) {
archprobe::log::warn("`StdStats` has not collected any data yet");
}
T avg = avg_;
T sqr_sum = 0;
for (auto value : values_) {
auto temp = value - avg;
sqr_sum += temp * temp;
}
return std::sqrt(sqr_sum / values_.size());
}
friend std::ostream& operator <<(std::ostream& out, const StdStats<T>& x) {
out << (T)(x);
return out;
}
};
template<typename T>
class MedianStats {
std::vector<T> values_ {};
public:
typedef T value_t;
void push(T value) {
values_.push_back(value);
}
inline bool has_value() const {
return !values_.empty();
}
operator T() {
if (!has_value()) {
archprobe::log::warn("`MedianStats` has not collected any data yet");
}
std::sort(values_.begin(), values_.end());
size_t imid = values_.size() / 2;
if (values_.size() & 1) {
return values_[imid];
} else {
return (values_[imid] + values_[imid + 1]) / 2;
}
}
friend std::ostream& operator <<(std::ostream& out, const MedianStats<T>& x) {
out << (T)(x);
return out;
}
};
template<typename TStats>
class GeomDeltaStats {
TStats stats_ {};
bool has_ratio_ = false;
typename TStats::value_t ratio_ {};
public:
typedef typename TStats::value_t value_t;
void push(value_t value) {
if (stats_.has_value()) {
ratio_ = value / (value_t)stats_;
has_ratio_ = true;
}
stats_.push(value);
}
inline bool has_value() const {
return has_ratio_;
}
operator value_t() const {
if (!has_value()) {
archprobe::log::warn("`GeomDeltaStats` has not collected enough data yet");
}
return ratio_;
}
friend std::ostream& operator <<(
std::ostream& out,
const GeomDeltaStats<TStats>& x
) {
if (x.has_value()) {
out << (typename TStats::value_t)(x.ratio_);
}
return out;
}
};
template<typename TStats>
class ArithDeltaStats {
TStats stats_ {};
bool has_delta_ = false;
typename TStats::value_t delta_ {};
public:
typedef typename TStats::value_t value_t;
void push(value_t value) {
if (stats_.has_value()) {
delta_ = value - (value_t)stats_;
has_delta_ = true;
}
stats_.push(value);
}
inline bool has_value() const {
return has_delta_;
}
operator value_t() const {
if (!has_value()) {
archprobe::log::warn("`ArithDeltaStats` has not collected enough data yet");
}
return delta_;
}
friend std::ostream& operator <<(
std::ostream& out,
const ArithDeltaStats<TStats>& x
) {
if (x.has_value()) {
out << (typename TStats::value_t)(x.delta_);
}
return out;
}
};
} // namespace stats
} // namespace archprobe

40
include/table.hpp Normal file
Просмотреть файл

@ -0,0 +1,40 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// Numeric data table.
// @PENGUINLIONG
#include <string>
#include <sstream>
#include <vector>
#include <iomanip>
#include "assert.hpp"
namespace archprobe {
namespace table {
struct Table {
std::vector<std::string> headers;
std::vector<std::vector<double>> rows;
template<typename ... THeaders>
Table(THeaders&& ... headers) :
Table(std::vector<std::string> { std::string(headers) ... }) {}
Table(std::vector<std::string>&& headers);
Table(
std::vector<std::string>&& headers,
std::vector<std::vector<double>>&& rows);
template<typename ... TArgs>
void push(TArgs&& ... values) {
std::vector<double> row { (double)values ... };
archprobe::assert(row.size() == headers.size(),
"row length mismatches header length");
rows.emplace_back(std::move(row));
}
std::string to_csv(uint32_t nsig_digit = 6) const;
static Table from_csv(std::string csv);
};
} // namespace table
} // namespace archprobe

61
include/util.hpp Normal file
Просмотреть файл

@ -0,0 +1,61 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// NOTE: This is a modified exerpt of
// https://github.com/PENGUINLIONG/graphi-t/blob/da31ec530df07c9899e056eeced08a64062dcfce/include/util.hpp;
// MIT-licensed by Rendong Liang.
// HAL independent utilities.
// @PENGUINLIONG
#pragma once
#include <cstdint>
#include <vector>
#include <string>
#include <sstream>
#include <fstream>
namespace archprobe {
namespace util {
namespace {
template<typename ... TArgs>
struct format_impl_t;
template<>
struct format_impl_t<> {
static inline void format_impl(std::stringstream& ss) {}
};
template<typename T>
struct format_impl_t<T> {
static inline void format_impl(std::stringstream& ss, const T& x) {
ss << x;
}
};
template<typename T, typename ... TArgs>
struct format_impl_t<T, TArgs ...> {
static inline void format_impl(std::stringstream& ss, const T& x, const TArgs& ... others) {
format_impl_t<T>::format_impl(ss, x);
format_impl_t<TArgs...>::format_impl(ss, others...);
}
};
} // namespace
template<typename ... TArgs>
inline std::string format(const TArgs& ... args) {
std::stringstream ss {};
format_impl_t<TArgs...>::format_impl(ss, args...);
return ss.str();
}
extern std::vector<uint8_t> load_file(const char* path);
extern std::string load_text(const char* path);
extern void save_file(const char* path, const void* data, size_t size);
extern void save_text(const char* path, const std::string& txt);
} // namespace util
} // namespace archprobe

Двоичные данные
overview.png Normal file

Двоичный файл не отображается.

После

Ширина:  |  Высота:  |  Размер: 75 KiB

Просмотреть файл

@ -0,0 +1,5 @@
if (-not(Test-Path tmp)) {
New-Item -ItemType Directory tmp
}
adb pull /data/local/tmp/gpu-testbench ./tmp/

Просмотреть файл

@ -0,0 +1,5 @@
if (Test-Path tmp/gpu-testbench) {
Remove-Item tmp/gpu-testbench -Recurse -Force
}
adb shell rm -r /data/local/tmp/gpu-testbench

45
scripts/Run-Android.ps1 Normal file
Просмотреть файл

@ -0,0 +1,45 @@
param(
[switch] $Verbose,
[switch] $BuildOnly,
[string] $ClearAspect
)
if (-not(Test-Path "build-android-aarch64")) {
New-Item "build-android-aarch64" -ItemType Directory
}
$NdkHome = $null
if ($env:ANDROID_NDK -ne $null) {
$NdkHome = $env:ANDROID_NDK
}
if ($env:ANDROID_NDK_HOME -ne $null) {
$NdkHome = $env:ANDROID_NDK_HOME
}
if ($NdkHome -eq $null) {
Write-Host "Couldn't find `ANDROID_NDK` in environment variables. Is NDK installed?"
return -1
}
Push-Location "build-android-aarch64"
cmake -DCMAKE_TOOLCHAIN_FILE="$NdkHome/build/cmake/android.toolchain.cmake" -DANDROID_ABI="arm64-v8a" -DANDROID_PLATFORM=android-28 -G "Ninja" ..
cmake --build . -t ArchProbe
Pop-Location
if ($BuildOnly) {
return
}
$Args = ""
if ($Verbose) {
$Args += "-v "
}
if ($ClearAspect) {
$Args += "-c $ClearAspect "
}
adb reconnect offline
adb push ./build-android-aarch64/assets/ /data/local/tmp/gpu-testbench/
adb push ./build-android-aarch64/bin/ /data/local/tmp/gpu-testbench/
adb shell chmod 777 /data/local/tmp/gpu-testbench/bin/ArchProbe
adb shell "cd /data/local/tmp/gpu-testbench/bin && ./ArchProbe $Args"

131
src/args.cpp Normal file
Просмотреть файл

@ -0,0 +1,131 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// NOTE: This is a modified excerpt of
// https://github.com/PENGUINLIONG/graphi-t/blob/da31ec530df07c9899e056eeced08a64062dcfce/src/args.cpp;
// MIT-licensed by Rendong Liang.
#include <map>
#include <vector>
#include <iostream>
#include "args.hpp"
#include "assert.hpp"
namespace archprobe {
namespace args {
struct ArgumentHelp {
std::string short_flag;
std::string long_flag;
std::string help;
};
struct ArgumentConfig {
std::string app_name = "[APPNAME]";
std::string desc;
// Short flag name -> ID.
std::map<char, size_t> short_map;
// Long flag name -> ID.
std::map<std::string, size_t> long_map;
// Argument parsing info.
std::vector<ArgumentParseConfig> parse_cfgs;
// Argument help info.
std::vector<ArgumentHelp> helps;
} arg_cfg;
void init_arg_parse(const char* app_name, const char* desc) {
arg_cfg.app_name = app_name;
arg_cfg.desc = desc;
}
const char* get_app_name() {
return arg_cfg.app_name.c_str();
}
void print_help() {
std::cout << "usage: " << arg_cfg.app_name << " [OPTIONS]" << std::endl;
if (!arg_cfg.desc.empty()) {
std::cout << arg_cfg.desc << std::endl;
}
for (const auto& help : arg_cfg.helps) {
std::cout << help.short_flag << "\t"
<< help.long_flag << "\t\t"
<< help.help << std::endl;
}
std::cout << "-h\t--help\t\tPrint this message." << std::endl;
std::exit(0);
}
void report_unknown_arg(const char* arg) {
std::cout << "unknown argument: " << arg << std::endl;
print_help();
}
void reg_arg(
const char* short_flag,
const char* long_flag,
const ArgumentParseConfig& parse_cfg,
const char* help
) {
using std::strlen;
size_t i = arg_cfg.parse_cfgs.size();
if (strlen(short_flag) == 2 && short_flag[0] == '-') {
arg_cfg.short_map[short_flag[1]] = i;
}
if (strlen(long_flag) > 3 && long_flag[1] == '-' && long_flag[0] == '-') {
arg_cfg.long_map[long_flag + 2] = i;
}
arg_cfg.parse_cfgs.emplace_back(parse_cfg);
std::string help_str = help;
auto lit = parse_cfg.lit(parse_cfg.dst);
if (!lit.empty()) {
help_str += " (default=" + lit + ")";
}
ArgumentHelp arg_help { short_flag, long_flag, help_str };
arg_cfg.helps.emplace_back(std::move(arg_help));
}
void parse_args(int argc, const char** argv) {
auto i = 1;
int iarg_entry = -1;
while (i < argc || iarg_entry >= 0) {
if (iarg_entry >= 0) {
auto& parse_cfg = arg_cfg.parse_cfgs[iarg_entry];
archprobe::assert(parse_cfg.parser(argv + i, parse_cfg.dst),
"unable to parse argument");
archprobe::assert((argc - i >= parse_cfg.narg),
"no enough argument segments");
i += parse_cfg.narg;
iarg_entry = -1;
} else {
const char* arg = argv[i];
if (arg[0] != '-') {
// Free argument.
archprobe::panic("free argument is currently unsupported");
} else if (arg[1] != '-') {
if (arg[1] == 'h') { print_help(); }
// Short flag argument.
auto it = arg_cfg.short_map.find(arg[1]);
if (it != arg_cfg.short_map.end()) {
iarg_entry = it->second;
} else {
report_unknown_arg(arg);
}
++i;
} else {
if (std::strcmp(arg + 2, "help") == 0) { print_help(); }
// Long flag argument.
auto it = (arg_cfg.long_map.find(arg + 2));
if (it != arg_cfg.long_map.end()) {
iarg_entry = it->second;
} else {
report_unknown_arg(arg);
}
++i;
}
}
}
}
} // namespace args
} // namespace archprobe

17
src/assert.cpp Normal file
Просмотреть файл

@ -0,0 +1,17 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// NOTE: This is a modified excerpt of
// https://github.com/PENGUINLIONG/graphi-t/blob/d291c3d1ce3795fe4b305e5efd76b4f586d23e3b/src/assert.cpp;
// MIT-licensed by Rendong Liang.
#include "assert.hpp"
namespace archprobe {
AssertionFailedException::AssertionFailedException(const std::string& msg) :
msg(msg) {}
const char* AssertionFailedException::what() const noexcept {
return msg.c_str();
}
} // namespace archprobe

391
src/cl.cpp Normal file
Просмотреть файл

@ -0,0 +1,391 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include "cl.hpp"
#include "util.hpp"
#include "log.hpp"
#include "stats.hpp"
namespace archprobe {
CLException::CLException(cl_int code) {
if (code <= -69) {
msg = "invalid something";
return;
}
switch (code) {
case CL_INVALID_VALUE: msg = "invalid value"; break;
case CL_INVALID_DEVICE_TYPE: msg = "invalid device type"; break;
case CL_INVALID_PLATFORM: msg = "invalid platform"; break;
case CL_INVALID_DEVICE: msg = "invalid device"; break;
case CL_INVALID_CONTEXT: msg = "invalid context"; break;
case CL_INVALID_QUEUE_PROPERTIES: msg = "invalid queue properties"; break;
case CL_INVALID_COMMAND_QUEUE: msg = "invalid command queue"; break;
case CL_INVALID_HOST_PTR: msg = "invalid host pointer"; break;
case CL_INVALID_MEM_OBJECT: msg = "invalid memory object"; break;
case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: msg = "invalid image format descriptor"; break;
case CL_INVALID_IMAGE_SIZE: msg = "invalid image size"; break;
case CL_INVALID_SAMPLER: msg = "invalid sampler"; break;
case CL_INVALID_BINARY: msg = "invalid binary"; break;
case CL_INVALID_BUILD_OPTIONS: msg = "invalid build options"; break;
case CL_INVALID_PROGRAM: msg = "invalid program"; break;
case CL_INVALID_PROGRAM_EXECUTABLE: msg = "invalid program executable"; break;
case CL_INVALID_KERNEL_NAME: msg = "invalid kernel name"; break;
case CL_INVALID_KERNEL_DEFINITION: msg = "invalid kernel definition"; break;
case CL_INVALID_KERNEL: msg = "invalid kernel"; break;
case CL_INVALID_ARG_INDEX: msg = "invalid arg index"; break;
case CL_INVALID_ARG_VALUE: msg = "invalid arg value"; break;
case CL_INVALID_ARG_SIZE: msg = "invalid arg size"; break;
case CL_INVALID_KERNEL_ARGS: msg = "invalid kernel args"; break;
case CL_INVALID_WORK_DIMENSION: msg = "invalid work dimension"; break;
case CL_INVALID_WORK_GROUP_SIZE: msg = "invalid work group size"; break;
case CL_INVALID_WORK_ITEM_SIZE: msg = "invalid work item_size"; break;
case CL_INVALID_GLOBAL_OFFSET: msg = "invalid global offset"; break;
case CL_INVALID_EVENT_WAIT_LIST: msg = "invalid event wait list"; break;
case CL_INVALID_EVENT: msg = "invalid event"; break;
case CL_INVALID_OPERATION: msg = "invalid operation"; break;
case CL_INVALID_GL_OBJECT: msg = "invalid gl object"; break;
case CL_INVALID_BUFFER_SIZE: msg = "invalid buffer size"; break;
case CL_INVALID_MIP_LEVEL: msg = "invalid mip level"; break;
case CL_INVALID_GLOBAL_WORK_SIZE: msg = "invalid global work size"; break;
case CL_INVALID_PROPERTY: msg = "invalid property"; break;
case CL_INVALID_IMAGE_DESCRIPTOR: msg = "invalid image descriptor"; break;
case CL_INVALID_COMPILER_OPTIONS: msg = "invalid compiler options"; break;
case CL_INVALID_LINKER_OPTIONS: msg = "invalid linker options"; break;
case CL_INVALID_DEVICE_PARTITION_COUNT: msg = "invalid device partition count"; break;
case CL_DEVICE_NOT_FOUND: msg = "device not found"; break;
case CL_DEVICE_NOT_AVAILABLE: msg = "device not available"; break;
case CL_COMPILER_NOT_AVAILABLE: msg = "compiler not available"; break;
case CL_MEM_OBJECT_ALLOCATION_FAILURE: msg = "memory object allocation failure"; break;
case CL_OUT_OF_RESOURCES: msg = "out of resources"; break;
case CL_OUT_OF_HOST_MEMORY: msg = "out of host memory"; break;
case CL_PROFILING_INFO_NOT_AVAILABLE: msg = "profilng info not available"; break;
case CL_MEM_COPY_OVERLAP: msg = "memory copy overlap"; break;
case CL_IMAGE_FORMAT_MISMATCH: msg = "image format mismatch"; break;
case CL_IMAGE_FORMAT_NOT_SUPPORTED: msg = "image format not supported"; break;
case CL_BUILD_PROGRAM_FAILURE: msg = "build program failure"; break;
case CL_MISALIGNED_SUB_BUFFER_OFFSET: msg = "misaligned sub-buffer offset"; break;
case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: msg = "execution status error for events in wait list"; break;
case CL_COMPILE_PROGRAM_FAILURE: msg = "compile program failure"; break;
case CL_LINKER_NOT_AVAILABLE: msg = "linker not available"; break;
case CL_LINK_PROGRAM_FAILURE: msg = "link program failure"; break;
case CL_DEVICE_PARTITION_FAILED: msg = "device partition failed"; break;
case CL_KERNEL_ARG_INFO_NOT_AVAILABLE: msg = "kernel argument information not available"; break;
default: msg = "unknown opencl error"; break;
}
}
const char* CLException::what() const noexcept { return msg; }
// Global variables.
std::vector<DeviceStub> dev_stubs;
std::vector<cl_platform_id> _enum_platform_ids() {
cl_uint nplatform_id;
CL_ASSERT << clGetPlatformIDs(0, nullptr, &nplatform_id);
std::vector<cl_platform_id> platform_ids;
platform_ids.resize(nplatform_id);
CL_ASSERT << clGetPlatformIDs(nplatform_id, platform_ids.data(), nullptr);
return platform_ids;
}
std::string _get_platform_info_str(
cl_platform_id platform_id,
cl_platform_info platform_info
) {
size_t len = 0;
std::string rv;
CL_ASSERT << clGetPlatformInfo(platform_id, platform_info, 0, nullptr, &len);
rv.reserve(len);
rv.resize(len - 1);
CL_ASSERT << clGetPlatformInfo(platform_id, platform_info,
len, (char*)rv.data(), nullptr);
return rv;
}
std::vector<cl_device_id> _enum_dev_ids(cl_platform_id platform_id) {
cl_uint ndev_id;
CL_ASSERT << clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_ALL,
0, nullptr, &ndev_id);
std::vector<cl_device_id> dev_ids;
dev_ids.resize(ndev_id);
CL_ASSERT << clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_ALL,
ndev_id, dev_ids.data(), nullptr);
return dev_ids;
}
std::string _get_dev_info_str(
cl_device_id device_id,
cl_device_info device_info
) {
size_t len = 0;
std::string rv;
CL_ASSERT << clGetDeviceInfo(device_id, device_info, 0, nullptr, &len);
rv.reserve(len);
rv.resize(len - 1);
CL_ASSERT << clGetDeviceInfo(device_id, device_info,
len, (char*)rv.data(), nullptr);
return rv;
}
void initialize() {
auto platform_ids = _enum_platform_ids();
for (auto platform_id : platform_ids) {
auto platform_name =
_get_platform_info_str(platform_id, CL_PLATFORM_NAME);
auto platform_prof =
_get_platform_info_str(platform_id, CL_PLATFORM_PROFILE);
auto platform_exts =
_get_platform_info_str(platform_id, CL_PLATFORM_EXTENSIONS);
auto platform_desc =
util::format(platform_name, " (", platform_prof, ") - ");
auto dev_ids = _enum_dev_ids(platform_id);
for (auto dev_id : dev_ids) {
auto dev_name = _get_dev_info_str(dev_id, CL_DEVICE_NAME);
auto dev_ver = _get_dev_info_str(dev_id, CL_DEVICE_VERSION);
auto dev_exts = _get_dev_info_str(dev_id, CL_DEVICE_EXTENSIONS);
cl_device_type dev_ty;
CL_ASSERT << clGetDeviceInfo(dev_id, CL_DEVICE_TYPE,
sizeof(dev_ty), &dev_ty, nullptr);
const char* dev_ty_lit;
switch (dev_ty) {
case CL_DEVICE_TYPE_CPU: dev_ty_lit = "CPU"; break;
case CL_DEVICE_TYPE_GPU: dev_ty_lit = "GPU"; break;
case CL_DEVICE_TYPE_ACCELERATOR: dev_ty_lit = "Accelerator"; break;
default: dev_ty_lit = "Unknown"; break;
}
auto desc = platform_desc +
util::format(dev_name, " (", dev_ty, ", ", dev_ver, ")");
DeviceStub stub { platform_id, dev_id, platform_exts, dev_exts, desc };
dev_stubs.emplace_back(std::move(stub));
}
}
archprobe::log::info("initialized opencl environment");
}
std::string desc_dev(uint32_t idx) {
return idx < dev_stubs.size() ? dev_stubs[idx].desc : std::string {};
}
cl::Device select_dev(uint32_t idev) {
const auto& dev_stub = archprobe::dev_stubs[idev];
archprobe::log::info("selected device #", idev, ": ", dev_stub.desc);
cl::Device dev(dev_stub.dev_id);
return dev;
}
cl::Context create_ctxt(const cl::Device& dev) {
// Create context.
cl_context_properties ctxt_props[] = {
CL_CONTEXT_PLATFORM,
(cl_context_properties)dev.getInfo<CL_DEVICE_PLATFORM>(),
0,
};
cl_int err;
cl::Context ctxt = cl::Context(dev, ctxt_props, nullptr, nullptr, &err);
CL_ASSERT << err;
return ctxt;
}
cl::CommandQueue create_cmd_queue(const cl::Context& ctxt) {
cl_int err;
cl::CommandQueue cmd_queue(ctxt, CL_QUEUE_PROFILING_ENABLE, &err);
CL_ASSERT << err;
return cmd_queue;
}
cl::Program create_program(
const cl::Device& dev,
const cl::Context& ctxt,
const char* src,
const char* build_opts
) {
cl_int err;
cl::Program::Sources sources;
sources.push_back(src);
cl::Program program(ctxt, sources, &err);
CL_ASSERT << err;
err = program.build({dev}, build_opts);
if (program.getBuildInfo<CL_PROGRAM_BUILD_STATUS>(dev) == CL_BUILD_ERROR) {
std::string build_log = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(dev);
archprobe::log::error(build_log);
}
CL_ASSERT << err;
return program;
}
cl::Kernel create_kernel(cl::Program program, const std::string& kernel_name) {
cl_int err;
cl::Kernel kernel(program, kernel_name.c_str(), &err);
CL_ASSERT << err;
return kernel;
}
double bench_kernel(
const cl::CommandQueue& cmd_queue,
const cl::Kernel& kernel,
const cl::NDRange& local_size,
const cl::NDRange& global_size,
uint32_t niter
) {
std::vector<cl::Event> events;
auto run_kernel = [&]() {
cl::Event event;
CL_ASSERT << cmd_queue.enqueueNDRangeKernel(kernel, cl::NDRange(0, 0, 0),
global_size, local_size, nullptr, &event);
events.push_back(event);
};
run_kernel();
run_kernel();
cmd_queue.finish();
events.clear();
for (auto i = 0; i < niter; ++i) {
run_kernel();
}
cmd_queue.finish();
archprobe::stats::MedianStats<double> time_avg;
for (const auto& event : events) {
uint64_t start = event.getProfilingInfo<CL_PROFILING_COMMAND_START>();
uint64_t end = event.getProfilingInfo<CL_PROFILING_COMMAND_END>();
time_avg.push(end - start);
}
double time = (double)time_avg / 1000;
events.clear();
return time;
}
cl::Image2D create_img_2d(
const cl::Context& ctxt,
cl_mem_flags mem_flags,
cl::ImageFormat img_fmt,
uint32_t width,
uint32_t height
) {
cl_int err;
cl::Image2D img(ctxt, mem_flags, img_fmt, width, height, 0, nullptr, &err);
CL_ASSERT << err;
return img;
}
cl::Image1D create_img_1d(
const cl::Context& ctxt,
cl_mem_flags mem_flags,
cl::ImageFormat img_fmt,
uint32_t width
) {
cl_int err;
cl::Image1D img(ctxt, mem_flags, img_fmt, width, nullptr, &err);
CL_ASSERT << err;
return img;
}
cl::Buffer create_buf(
const cl::Context& ctxt,
cl_mem_flags mem_flags,
size_t size
) {
cl_int err;
cl::Buffer buf(ctxt, mem_flags, size, 0, &err);
CL_ASSERT << err;
return buf;
}
MapImage map_img_2d(
const cl::CommandQueue& cmd_queue,
const cl::Image2D& img
) {
cl_int err;
const size_t width = img.getImageInfo<CL_IMAGE_WIDTH>();
const size_t height = img.getImageInfo<CL_IMAGE_HEIGHT>();
size_t row_pitch;
size_t slice_pitch;
cl::array<size_t, 3> origin {};
cl::array<size_t, 3> region { width, height, 1 };
float* data = (float*)cmd_queue.enqueueMapImage(img, true,
CL_MAP_READ | CL_MAP_WRITE, origin, region, &row_pitch, &slice_pitch,
nullptr, nullptr, &err);
CL_ASSERT << err;
return MapImage { data, width, height, 1, row_pitch, slice_pitch };
}
void unmap_img_2d(
const cl::CommandQueue& cmd_queue,
const cl::Image2D& img,
MapImage& mapped
) {
CL_ASSERT << cmd_queue.enqueueUnmapMemObject(img, mapped);
mapped = {};
}
MapImage map_img_1d(
const cl::CommandQueue& cmd_queue,
const cl::Image1D& img
) {
cl_int err;
const size_t width = img.getImageInfo<CL_IMAGE_WIDTH>();
size_t row_pitch;
size_t slice_pitch;
cl::array<size_t, 3> origin {};
cl::array<size_t, 3> region { width, 1, 1 };
float* data = (float*)cmd_queue.enqueueMapImage(img, true,
CL_MAP_READ | CL_MAP_WRITE, origin, region, &row_pitch, &slice_pitch,
nullptr, nullptr, &err);
CL_ASSERT << err;
return MapImage { data, width, 1, 1, row_pitch, slice_pitch };
}
void unmap_img_1d(
const cl::CommandQueue& cmd_queue,
const cl::Image1D& img,
MapImage& mapped
) {
CL_ASSERT << cmd_queue.enqueueUnmapMemObject(img, mapped);
mapped = {};
}
MapBuffer map_buf(
const cl::CommandQueue& cmd_queue,
const cl::Buffer& buf,
size_t offset,
size_t size
) {
cl_int err;
auto rv = (float*)cmd_queue.enqueueMapBuffer(buf, true,
CL_MAP_READ | CL_MAP_WRITE, offset, size, nullptr, nullptr, &err);
CL_ASSERT << err;
return MapBuffer { rv, size };
}
MapBuffer map_buf(
const cl::CommandQueue& cmd_queue,
const cl::Buffer& buf
) {
const size_t size = buf.getInfo<CL_MEM_SIZE>();
return map_buf(cmd_queue, buf, 0, size);
}
void unmap_buf(
const cl::CommandQueue& cmd_queue,
const cl::Buffer& buf,
MapBuffer& mapped
) {
CL_ASSERT << cmd_queue.enqueueUnmapMemObject(buf, mapped);
mapped = {};
}
} // namespace archprobe

387
src/json.cpp Normal file
Просмотреть файл

@ -0,0 +1,387 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// NOTE: This is a modified excerpt of
// https://github.com/PENGUINLIONG/graphi-t/blob/da31ec530df07c9899e056eeced08a64062dcfce/src/json.cpp;
// MIT-licensed by Rendong Liang.
// JSON serialization/deserialization.
// @PENGUINLIONG
#include <sstream>
#include "log.hpp"
#include "json.hpp"
namespace archprobe {
namespace json {
JsonException::JsonException(const char* msg) : msg(msg) {}
const char* JsonException::what() const noexcept {
return msg.c_str();
}
JsonArray::JsonArray(
std::initializer_list<JsonValue>&& elems
) : inner(elems) {}
JsonObject::JsonObject(
std::initializer_list<std::pair<const std::string, JsonValue>>&& entries
) : inner(std::forward<std::initializer_list<std::pair<const std::string, JsonValue>>>(entries)) {}
JsonValue::JsonValue(JsonObject&& obj) :
ty(L_JSON_OBJECT),
obj(std::forward<std::map<std::string, JsonValue>>(obj.inner)) {}
JsonValue::JsonValue(JsonArray&& arr) :
ty(L_JSON_ARRAY),
arr(std::forward<std::vector<JsonValue>>(arr.inner)) {}
enum JsonTokenType {
L_JSON_TOKEN_UNDEFINED,
L_JSON_TOKEN_NULL,
L_JSON_TOKEN_TRUE,
L_JSON_TOKEN_FALSE,
L_JSON_TOKEN_STRING,
L_JSON_TOKEN_NUMBER,
L_JSON_TOKEN_COLON,
L_JSON_TOKEN_COMMA,
L_JSON_TOKEN_OPEN_BRACE,
L_JSON_TOKEN_CLOSE_BRACE,
L_JSON_TOKEN_OPEN_BRACKET,
L_JSON_TOKEN_CLOSE_BRACKET,
};
struct JsonToken {
JsonTokenType ty;
double num;
std::string str;
};
struct Tokenizer {
std::string lit;
std::string::const_iterator pos;
std::string::const_iterator end;
Tokenizer(const std::string& json) :
lit(json),
pos(lit.cbegin()),
end(lit.cend()) {}
// Check the range first before calling this method.
bool unsafe_starts_with(const char* head) {
auto i = 0;
while (*head != '\0') {
if (pos[i++] != *(head++)) {
return false;
}
}
return true;
}
bool next_token(JsonToken& out) {
std::stringstream ss;
while (pos != end) {
char c = *pos;
// Ignore whitespaces.
if (c == ' ' || c == '\t' || c == '\r' || c == '\n') {
pos += 1;
continue;
}
// Try parse scope punctuations.
switch (c) {
case ':': out.ty = L_JSON_TOKEN_COLON; pos += 1; return true;
case ',': out.ty = L_JSON_TOKEN_COMMA; pos += 1; return true;
case '{': out.ty = L_JSON_TOKEN_OPEN_BRACE; pos += 1; return true;
case '}': out.ty = L_JSON_TOKEN_CLOSE_BRACE; pos += 1; return true;
case '[': out.ty = L_JSON_TOKEN_OPEN_BRACKET; pos += 1; return true;
case ']': out.ty = L_JSON_TOKEN_CLOSE_BRACKET; pos += 1; return true;
}
// Try parse numbers.
if (c == '+' || c == '-' || (c >= '0' && c <= '9')) {
out.ty = L_JSON_TOKEN_NUMBER;
const int STATE_INTEGRAL = 0;
const int STATE_FRACTION = 1;
const int STATE_EXPONENT = 2;
int state = STATE_INTEGRAL;
do {
c = *pos;
if (state == STATE_INTEGRAL) {
if (c == '.') {
state = STATE_FRACTION;
ss.put(c);
continue;
}
if (c == 'e') {
state = STATE_EXPONENT;
ss.put(c);
continue;
}
if (c != '+' && c != '-' && (c < '0' || c > '9')) {
break;
}
} else if (state == STATE_FRACTION) {
if (c == 'e') {
state = STATE_EXPONENT;
ss.put(c);
continue;
}
if (c < '0' || c > '9') {
break;
}
} else if (state == STATE_EXPONENT) {
if (c != '+' && c != '-' && (c < '0' || c > '9')) {
break;
}
}
ss.put(c);
} while (++pos != end);
out.num = std::atof(ss.str().c_str());
return true;
}
// Try parse strings.
if (c == '"') {
out.ty = L_JSON_TOKEN_STRING;
bool escape = false;
while (++pos != end) {
c = *pos;
if (escape) {
switch (c) {
case '"':
case '/':
break;
case 'b': c = '\b'; break;
case 'f': c = '\f'; break;
case 'n': c = '\n'; break;
case 'r': c = '\r'; break;
case 't': c = '\t'; break;
case 'u':
throw JsonException("unicode escape is not supported");
default:
throw JsonException("invalid escape charater");
}
escape = false;
} else {
if (c == '\\') {
escape = true;
continue;
} else if (c == '"') {
if (escape != false) {
throw JsonException("incomplete escape sequence");
}
out.str = ss.str();
pos += 1;
return true;
}
}
ss.put(c);
}
throw JsonException("unexpected end of string");
}
// Try parse literals.
if (pos + 4 <= end) {
if (unsafe_starts_with("null")) {
out.ty = L_JSON_TOKEN_NULL;
pos += 4;
return true;
}
if (unsafe_starts_with("true")) {
out.ty = L_JSON_TOKEN_TRUE;
pos += 4;
return true;
}
}
if (pos + 5 <= end) {
if (unsafe_starts_with("false")) {
out.ty = L_JSON_TOKEN_FALSE;
pos += 5;
return true;
}
}
}
out.ty = L_JSON_TOKEN_UNDEFINED;
return false;
}
};
bool try_parse_impl(
Tokenizer& tokenizer,
JsonValue& out
) {
JsonToken token;
while (tokenizer.next_token(token)) {
JsonValue val;
switch (token.ty) {
case L_JSON_TOKEN_TRUE:
out.ty = L_JSON_BOOLEAN;
out.b = true;
return true;
case L_JSON_TOKEN_FALSE:
out.ty = L_JSON_BOOLEAN;
out.b = false;
return true;
case L_JSON_TOKEN_NULL:
out.ty = L_JSON_NULL;
return true;
case L_JSON_TOKEN_STRING:
out.ty = L_JSON_STRING;
out.str = std::move(token.str);
return true;
case L_JSON_TOKEN_NUMBER:
out.ty = L_JSON_NUMBER;
out.num = token.num;
return true;
case L_JSON_TOKEN_OPEN_BRACKET:
out.ty = L_JSON_ARRAY;
for (;;) {
if (!try_parse_impl(tokenizer, val)) {
// When the array has no element.
break;
}
out.arr.emplace_back(std::move(val));
if (tokenizer.next_token(token)) {
if (token.ty == L_JSON_TOKEN_COMMA) {
continue;
} else if (token.ty == L_JSON_TOKEN_CLOSE_BRACKET) {
break;
} else {
throw JsonException("unexpected token in array");
}
} else {
throw JsonException("unexpected end of array");
}
}
return true;
case L_JSON_TOKEN_OPEN_BRACE:
out.ty = L_JSON_OBJECT;
for (;;) {
// Match the key.
std::string key;
if (tokenizer.next_token(token)) {
if (token.ty == L_JSON_TOKEN_STRING) {
key = std::move(token.str);
} else if (token.ty == L_JSON_TOKEN_CLOSE_BRACE) {
// The object has no field.
break;
} else {
throw JsonException("unexpected object field key type");
}
} else {
throw JsonException("unexpected end of object");
}
// Match the colon.
if (!tokenizer.next_token(token)) {
throw JsonException("unexpected end of object");
}
if (token.ty != L_JSON_TOKEN_COLON) {
throw JsonException("unexpected token in object");
}
// Match the value.
if (!try_parse_impl(tokenizer, val)) {
throw JsonException("unexpected end of object");
}
out.obj[key] = std::move(val);
// Should we head for another round?
if (tokenizer.next_token(token)) {
if (token.ty == L_JSON_TOKEN_COMMA) {
continue;
} else if (token.ty == L_JSON_TOKEN_CLOSE_BRACE) {
break;
} else {
throw JsonException("unexpected token in object");
}
} else {
throw JsonException("unexpected end of object");
}
}
return true;
case L_JSON_TOKEN_CLOSE_BRACE:
case L_JSON_TOKEN_CLOSE_BRACKET:
return false;
default:
throw JsonException("unexpected token");
}
}
throw JsonException("unexpected program state");
}
JsonValue parse(const std::string& json_lit) {
if (json_lit.empty()) {
throw JsonException("json text is empty");
}
JsonValue rv;
Tokenizer tokenizer(json_lit);
if (!try_parse_impl(tokenizer, rv)) {
throw JsonException("unexpected close token");
}
return rv;
}
bool try_parse(const std::string& json_lit, JsonValue& out) {
try {
out = parse(json_lit);
} catch (JsonException e) {
log::error("failed to parse json: ", e.what());
return false;
}
return true;
}
void print_impl(const JsonValue& json, std::stringstream& out) {
switch (json.ty) {
case L_JSON_NULL:
out << "null";
return;
case L_JSON_BOOLEAN:
out << (json.b ? "true" : "false");
return;
case L_JSON_NUMBER:
out << json.num;
return;
case L_JSON_STRING:
out << "\"" << json.str << "\"";
return;
case L_JSON_OBJECT:
out << "{";
{
bool is_first_iter = true;
for (const auto& pair : json.obj) {
if (is_first_iter) {
is_first_iter = false;
} else {
out << ",";
}
out << "\"" << pair.first << "\":";
print_impl(pair.second, out);
}
}
out << "}";
return;
case L_JSON_ARRAY:
out << "[";
{
bool is_first_iter = true;
for (const auto& elem : json.arr) {
if (is_first_iter) {
is_first_iter = false;
} else {
out << ",";
}
print_impl(elem, out);
}
}
out << "]";
return;
}
}
std::string print(const JsonValue& json) {
std::stringstream ss;
print_impl(json, ss);
return ss.str();
}
} // namespace json
} // namespace archprobe

40
src/log.cpp Normal file
Просмотреть файл

@ -0,0 +1,40 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// NOTE: This is a modified excerpt of
// https://github.com/PENGUINLIONG/graphi-t/blob/0e3c1394b493db3e3d5b443c869545cac712827a/src/log.cpp;
// MIT-licensed by Rendong Liang.
#include "log.hpp"
namespace archprobe {
namespace log {
namespace detail {
decltype(log_callback) log_callback = nullptr;
LogLevel filter_lv;
uint32_t indent;
} // namespace detail
void set_log_callback(decltype(detail::log_callback) cb) {
detail::log_callback = cb;
}
void set_log_filter_level(LogLevel lv) {
detail::filter_lv = lv;
}
void push_indent() {
detail::indent += 4;
}
void pop_indent() {
detail::indent -= 4;
}
} // namespace log
} // namespace archprobe

101
src/table.cpp Normal file
Просмотреть файл

@ -0,0 +1,101 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include "table.hpp"
namespace archprobe {
namespace table {
Table::Table(std::vector<std::string>&& headers) :
headers(headers), rows() {}
Table::Table(
std::vector<std::string>&& headers,
std::vector<std::vector<double>>&& rows
) : headers(headers), rows(rows) {}
std::string Table::to_csv(uint32_t nsig_digit) const {
std::stringstream ss;
ss << std::setprecision(nsig_digit);
{
bool first_col = true;
for (const auto& header : headers) {
if (first_col) {
first_col = false;
} else {
ss << ",";
}
ss << header;
}
// Enforce newline character to be `\n`.
ss << '\n';
}
{
for (const auto& row : rows) {
bool first_col = true;
for (const auto& cell : row) {
if (first_col) {
first_col = false;
} else {
ss << ",";
}
ss << cell;
}
ss << '\n';
}
}
return ss.str();
}
std::vector<std::string> parse_header_row(std::istringstream& ss) {
std::vector<std::string> out;
std::string buf;
while (ss.peek() != EOF) {
buf.clear();
std::getline(ss, buf, ',');
out.emplace_back(std::move(buf));
}
return out;
}
std::vector<double> parse_data_row(std::istringstream& ss) {
std::vector<double> out;
std::string buf;
while (ss.peek() != EOF) {
buf.clear();
std::getline(ss, buf, ',');
out.emplace_back(std::atof(buf.c_str()));
}
return out;
}
Table Table::from_csv(std::string csv) {
std::istringstream ss;
ss.str(csv);
std::string line;
// Capture the header.
std::vector<std::string> headers;
if (ss.peek() != EOF){
line.clear();
std::getline(ss, line, '\n');
std::istringstream sss;
sss.str(line);
headers = parse_header_row(sss);
}
std::vector<std::vector<double>> data_rows;
while (ss.peek() != EOF) {
line.clear();
std::getline(ss, line, '\n');
std::istringstream sss;
sss.str(line);
data_rows.emplace_back(parse_data_row(sss));
}
return Table(std::move(headers), std::move(data_rows));
}
} // namespace table
} // namespace archprobe

53
src/util.cpp Normal file
Просмотреть файл

@ -0,0 +1,53 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// NOTE: This is a modified excerpt of
// https://github.com/PENGUINLIONG/graphi-t/blob/da31ec530df07c9899e056eeced08a64062dcfce/src/util.cpp;
// MIT-licensed by Rendong Liang.
#include "util.hpp"
#include "assert.hpp"
namespace archprobe {
namespace util {
std::vector<uint8_t> load_file(const char* path) {
std::ifstream f(path, std::ios::ate | std::ios::binary | std::ios::in);
assert(f.is_open(), "unable to open file: ", path);
size_t size = f.tellg();
f.seekg(std::ios::beg);
std::vector<uint8_t> buf;
buf.resize(size);
f.read((char*)buf.data(), size);
f.close();
return buf;
}
std::string load_text(const char* path) {
std::ifstream f(path, std::ios::ate | std::ios::binary | std::ios::in);
assert(f.is_open(), "unable to open file: ", path);
size_t size = f.tellg();
f.seekg(std::ios::beg);
std::string buf;
buf.reserve(size + 1);
buf.resize(size);
f.read((char*)buf.data(), size);
f.close();
return buf;
}
void save_file(const char* path, const void* data, size_t size) {
std::ofstream f(path, std::ios::trunc | std::ios::out | std::ios::binary);
assert(f.is_open(), "unable to open file: ", path);
f.write((const char*)data, size);
f.close();
}
void save_text(const char* path, const std::string& txt) {
std::ofstream f(path, std::ios::trunc | std::ios::out | std::ios::binary);
assert(f.is_open(), "unable to open file: ", path);
f << txt;
f.close();
}
} // namespace util
} // namespace archprobe

1
third/libopencl-stub Submodule

@ -0,0 +1 @@
Subproject commit 00c3a657d134447315c22d2cc344e2e2e9c4c026