зеркало из https://github.com/microsoft/X-Mem.git
Finished converting all int64_t and uint64_t to 32 bit versions to improve cross-platform compatibility later. I do not yet see any issues from this. Also the benchmark kernel code is now officially very messy due to tons of OS and architectural dependences and pound-defs :(
This commit is contained in:
Родитель
edbaaab5e4
Коммит
a425635999
2
Doxyfile
2
Doxyfile
|
@ -38,7 +38,7 @@ PROJECT_NAME = X-Mem
|
|||
# could be handy for archiving the generated documentation or if some version
|
||||
# control system is used.
|
||||
|
||||
PROJECT_NUMBER = 2.1.9
|
||||
PROJECT_NUMBER = 2.1.10
|
||||
|
||||
# Using the PROJECT_BRIEF tag one can provide an optional one line description
|
||||
# for a project that appears at the top of each page and should give viewer a
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
README
|
||||
------------------------------------------------------------------------------------------------------------
|
||||
|
||||
X-Mem: Extensible Memory Benchmarking Tool v2.1.9
|
||||
X-Mem: Extensible Memory Benchmarking Tool v2.1.10
|
||||
------------------------------------------------------------------------------------------------------------
|
||||
|
||||
The flexible open-source research tool for characterizing memory hierarchy throughput, latency, and power.
|
||||
|
@ -10,7 +10,7 @@ Originally authored by Mark Gottscho (Email: <mgottscho@ucla.edu>) as a Summer 2
|
|||
|
||||
This project is under active development. Stay tuned for more updates.
|
||||
|
||||
PROJECT REVISION DATE: April 16, 2015.
|
||||
PROJECT REVISION DATE: April 21, 2015.
|
||||
|
||||
------------------------------------------------------------------------------------------------------------
|
||||
LICENSE
|
||||
|
|
Двоичные данные
X-Mem_Developer_Manual.pdf
Двоичные данные
X-Mem_Developer_Manual.pdf
Двоичный файл не отображается.
|
@ -50,7 +50,7 @@ LatencyWorker::LatencyWorker(
|
|||
void* mem_array,
|
||||
size_t len,
|
||||
#ifdef USE_SIZE_BASED_BENCHMARKS
|
||||
uint64_t passes_per_iteration,
|
||||
uint32_t passes_per_iteration,
|
||||
#endif
|
||||
RandomFunction kernel_fptr,
|
||||
RandomFunction kernel_dummy_fptr,
|
||||
|
@ -127,7 +127,7 @@ void LatencyWorker::run() {
|
|||
std::cerr << "WARNING: Failed to boost scheduling priority. Perhaps running in Administrator mode would help." << std::endl;
|
||||
|
||||
//Prime memory
|
||||
for (uint64_t i = 0; i < 4; i++) {
|
||||
for (uint32_t i = 0; i < 4; i++) {
|
||||
void* prime_start_address = mem_array;
|
||||
void* prime_end_address = reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(mem_array) + len);
|
||||
forwSequentialRead_Word64(prime_start_address, prime_end_address); //dependent reads on the memory, make sure caches are ready, coherence, etc...
|
||||
|
|
|
@ -183,7 +183,7 @@ void LoadWorker::run() {
|
|||
start_tick = start_timer();
|
||||
UNROLL1024(
|
||||
(*kernel_fptr_seq)(start_address, end_address);
|
||||
start_address = reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(mem_array)+(reinterpret_cast<uint64_t>(start_address)+bytes_per_pass) % len);
|
||||
start_address = reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(mem_array)+(reinterpret_cast<uintptr_t>(start_address)+bytes_per_pass) % len);
|
||||
end_address = reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(start_address) + bytes_per_pass);
|
||||
)
|
||||
stop_tick = stop_timer();
|
||||
|
@ -207,7 +207,7 @@ void LoadWorker::run() {
|
|||
start_tick = start_timer();
|
||||
UNROLL1024(
|
||||
(*kernel_dummy_fptr_seq)(start_address, end_address);
|
||||
start_address = reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(mem_array)+(reinterpret_cast<uint64_t>(start_address)+bytes_per_pass) % len);
|
||||
start_address = reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(mem_array)+(reinterpret_cast<uintptr_t>(start_address)+bytes_per_pass) % len);
|
||||
end_address = reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(start_address) + bytes_per_pass);
|
||||
)
|
||||
stop_tick = stop_timer();
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
README
|
||||
------------------------------------------------------------------------------------------------------------
|
||||
|
||||
X-Mem: Extensible Memory Benchmarking Tool v2.1.9
|
||||
X-Mem: Extensible Memory Benchmarking Tool v2.1.10
|
||||
------------------------------------------------------------------------------------------------------------
|
||||
|
||||
The flexible open-source research tool for characterizing memory hierarchy throughput, latency, and power.
|
||||
|
@ -10,7 +10,7 @@ Originally authored by Mark Gottscho (Email: <mgottscho@ucla.edu>) as a Summer 2
|
|||
|
||||
This project is under active development. Stay tuned for more updates.
|
||||
|
||||
PROJECT REVISION DATE: April 16, 2015.
|
||||
PROJECT REVISION DATE: April 21, 2015.
|
||||
|
||||
------------------------------------------------------------------------------------------------------------
|
||||
LICENSE
|
||||
|
|
|
@ -46,13 +46,7 @@ Timer::Timer() :
|
|||
_ticks_per_ms(0),
|
||||
_ns_per_tick(0)
|
||||
{
|
||||
#ifdef ARCH_64BIT
|
||||
uint64_t
|
||||
#else
|
||||
uint32_t
|
||||
#endif
|
||||
start_tick, stop_tick;
|
||||
|
||||
tick_t start_tick, stop_tick;
|
||||
|
||||
start_tick = start_timer();
|
||||
#ifdef _WIN32
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -57,7 +57,7 @@ DelayInjectedLoadedLatencyBenchmark::DelayInjectedLoadedLatencyBenchmark(
|
|||
size_t len,
|
||||
uint32_t iterations,
|
||||
#ifdef USE_SIZE_BASED_BENCHMARKS
|
||||
uint64_t passes_per_iteration,
|
||||
uint32_t passes_per_iteration,
|
||||
#endif
|
||||
uint32_t num_worker_threads,
|
||||
uint32_t mem_node,
|
||||
|
@ -300,18 +300,18 @@ bool DelayInjectedLoadedLatencyBenchmark::_run_core() {
|
|||
bool iter_warning = false;
|
||||
|
||||
//Compute latency metric
|
||||
uint64_t lat_passes = workers[0]->getPasses();
|
||||
uint64_t lat_adjusted_ticks = workers[0]->getAdjustedTicks();
|
||||
uint64_t lat_elapsed_dummy_ticks = workers[0]->getElapsedDummyTicks();
|
||||
uint64_t lat_bytes_per_pass = workers[0]->getBytesPerPass();
|
||||
uint64_t lat_accesses_per_pass = lat_bytes_per_pass / 8;
|
||||
uint32_t lat_passes = workers[0]->getPasses();
|
||||
tick_t lat_adjusted_ticks = workers[0]->getAdjustedTicks();
|
||||
tick_t lat_elapsed_dummy_ticks = workers[0]->getElapsedDummyTicks();
|
||||
uint32_t lat_bytes_per_pass = workers[0]->getBytesPerPass();
|
||||
uint32_t lat_accesses_per_pass = lat_bytes_per_pass / 8;
|
||||
iter_warning |= workers[0]->hadWarning();
|
||||
|
||||
//Compute throughput generated by load threads
|
||||
uint64_t load_total_passes = 0;
|
||||
uint64_t load_total_adjusted_ticks = 0;
|
||||
uint64_t load_total_elapsed_dummy_ticks = 0;
|
||||
uint64_t load_bytes_per_pass = 0;
|
||||
uint32_t load_total_passes = 0;
|
||||
tick_t load_total_adjusted_ticks = 0;
|
||||
tick_t load_total_elapsed_dummy_ticks = 0;
|
||||
uint32_t load_bytes_per_pass = 0;
|
||||
double load_avg_adjusted_ticks = 0;
|
||||
for (uint32_t t = 1; t < _num_worker_threads; t++) {
|
||||
load_total_passes += workers[t]->getPasses();
|
||||
|
|
|
@ -54,7 +54,7 @@ namespace xmem {
|
|||
void* mem_array,
|
||||
size_t len,
|
||||
#ifdef USE_SIZE_BASED_BENCHMARKS
|
||||
uint64_t passes_per_iteration,
|
||||
uint32_t passes_per_iteration,
|
||||
#endif
|
||||
RandomFunction kernel_fptr,
|
||||
RandomFunction kernel_dummy_fptr,
|
||||
|
|
|
@ -45,7 +45,7 @@
|
|||
|
||||
namespace xmem {
|
||||
|
||||
#define VERSION "2.1.9"
|
||||
#define VERSION "2.1.10"
|
||||
|
||||
#if !defined(_WIN32) && !defined(__gnu_linux__)
|
||||
#error Neither Windows/GNULinux build environments were detected!
|
||||
|
|
|
@ -56,7 +56,7 @@ namespace xmem {
|
|||
size_t len,
|
||||
uint32_t iterations,
|
||||
#ifdef USE_SIZE_BASED_BENCHMARKS
|
||||
uint64_t passes_per_iteration,
|
||||
uint32_t passes_per_iteration,
|
||||
#endif
|
||||
uint32_t num_worker_threads,
|
||||
uint32_t mem_node,
|
||||
|
|
|
@ -100,8 +100,8 @@ void WindowsDRAMPowerReader::run() {
|
|||
}
|
||||
|
||||
calculateMetrics();
|
||||
uint64_t stop_tick = stop_timer();
|
||||
uint64_t elapsed_ticks = stop_tick - start_tick;
|
||||
tick_t stop_tick = stop_timer();
|
||||
tick_t elapsed_ticks = stop_tick - start_tick;
|
||||
Sleep(static_cast<DWORD>(_sampling_period - elapsed_ticks*g_ns_per_tick*1e-9*1000)); //Account for any loop overhead
|
||||
}
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче