Finished converting all int64_t and uint64_t to 32 bit versions to improve cross-platform compatibility later. I do not yet see any issues from this. Also the benchmark kernel code is now officially very messy due to tons of OS and architectural dependences and pound-defs :(

This commit is contained in:
Mark Gottscho 2015-04-21 18:44:51 -07:00
Родитель edbaaab5e4
Коммит a425635999
13 изменённых файлов: 560 добавлений и 253 удалений

Просмотреть файл

@ -38,7 +38,7 @@ PROJECT_NAME = X-Mem
# could be handy for archiving the generated documentation or if some version
# control system is used.
PROJECT_NUMBER = 2.1.9
PROJECT_NUMBER = 2.1.10
# Using the PROJECT_BRIEF tag one can provide an optional one line description
# for a project that appears at the top of each page and should give viewer a

Просмотреть файл

@ -1,7 +1,7 @@
README
------------------------------------------------------------------------------------------------------------
X-Mem: Extensible Memory Benchmarking Tool v2.1.9
X-Mem: Extensible Memory Benchmarking Tool v2.1.10
------------------------------------------------------------------------------------------------------------
The flexible open-source research tool for characterizing memory hierarchy throughput, latency, and power.
@ -10,7 +10,7 @@ Originally authored by Mark Gottscho (Email: <mgottscho@ucla.edu>) as a Summer 2
This project is under active development. Stay tuned for more updates.
PROJECT REVISION DATE: April 16, 2015.
PROJECT REVISION DATE: April 21, 2015.
------------------------------------------------------------------------------------------------------------
LICENSE

Двоичные данные
X-Mem_Developer_Manual.pdf

Двоичный файл не отображается.

Просмотреть файл

@ -50,7 +50,7 @@ LatencyWorker::LatencyWorker(
void* mem_array,
size_t len,
#ifdef USE_SIZE_BASED_BENCHMARKS
uint64_t passes_per_iteration,
uint32_t passes_per_iteration,
#endif
RandomFunction kernel_fptr,
RandomFunction kernel_dummy_fptr,
@ -127,7 +127,7 @@ void LatencyWorker::run() {
std::cerr << "WARNING: Failed to boost scheduling priority. Perhaps running in Administrator mode would help." << std::endl;
//Prime memory
for (uint64_t i = 0; i < 4; i++) {
for (uint32_t i = 0; i < 4; i++) {
void* prime_start_address = mem_array;
void* prime_end_address = reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(mem_array) + len);
forwSequentialRead_Word64(prime_start_address, prime_end_address); //dependent reads on the memory, make sure caches are ready, coherence, etc...

Просмотреть файл

@ -183,7 +183,7 @@ void LoadWorker::run() {
start_tick = start_timer();
UNROLL1024(
(*kernel_fptr_seq)(start_address, end_address);
start_address = reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(mem_array)+(reinterpret_cast<uint64_t>(start_address)+bytes_per_pass) % len);
start_address = reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(mem_array)+(reinterpret_cast<uintptr_t>(start_address)+bytes_per_pass) % len);
end_address = reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(start_address) + bytes_per_pass);
)
stop_tick = stop_timer();
@ -207,7 +207,7 @@ void LoadWorker::run() {
start_tick = start_timer();
UNROLL1024(
(*kernel_dummy_fptr_seq)(start_address, end_address);
start_address = reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(mem_array)+(reinterpret_cast<uint64_t>(start_address)+bytes_per_pass) % len);
start_address = reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(mem_array)+(reinterpret_cast<uintptr_t>(start_address)+bytes_per_pass) % len);
end_address = reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(start_address) + bytes_per_pass);
)
stop_tick = stop_timer();

Просмотреть файл

@ -1,7 +1,7 @@
README
------------------------------------------------------------------------------------------------------------
X-Mem: Extensible Memory Benchmarking Tool v2.1.9
X-Mem: Extensible Memory Benchmarking Tool v2.1.10
------------------------------------------------------------------------------------------------------------
The flexible open-source research tool for characterizing memory hierarchy throughput, latency, and power.
@ -10,7 +10,7 @@ Originally authored by Mark Gottscho (Email: <mgottscho@ucla.edu>) as a Summer 2
This project is under active development. Stay tuned for more updates.
PROJECT REVISION DATE: April 16, 2015.
PROJECT REVISION DATE: April 21, 2015.
------------------------------------------------------------------------------------------------------------
LICENSE

Просмотреть файл

@ -46,13 +46,7 @@ Timer::Timer() :
_ticks_per_ms(0),
_ns_per_tick(0)
{
#ifdef ARCH_64BIT
uint64_t
#else
uint32_t
#endif
start_tick, stop_tick;
tick_t start_tick, stop_tick;
start_tick = start_timer();
#ifdef _WIN32

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -57,7 +57,7 @@ DelayInjectedLoadedLatencyBenchmark::DelayInjectedLoadedLatencyBenchmark(
size_t len,
uint32_t iterations,
#ifdef USE_SIZE_BASED_BENCHMARKS
uint64_t passes_per_iteration,
uint32_t passes_per_iteration,
#endif
uint32_t num_worker_threads,
uint32_t mem_node,
@ -300,18 +300,18 @@ bool DelayInjectedLoadedLatencyBenchmark::_run_core() {
bool iter_warning = false;
//Compute latency metric
uint64_t lat_passes = workers[0]->getPasses();
uint64_t lat_adjusted_ticks = workers[0]->getAdjustedTicks();
uint64_t lat_elapsed_dummy_ticks = workers[0]->getElapsedDummyTicks();
uint64_t lat_bytes_per_pass = workers[0]->getBytesPerPass();
uint64_t lat_accesses_per_pass = lat_bytes_per_pass / 8;
uint32_t lat_passes = workers[0]->getPasses();
tick_t lat_adjusted_ticks = workers[0]->getAdjustedTicks();
tick_t lat_elapsed_dummy_ticks = workers[0]->getElapsedDummyTicks();
uint32_t lat_bytes_per_pass = workers[0]->getBytesPerPass();
uint32_t lat_accesses_per_pass = lat_bytes_per_pass / 8;
iter_warning |= workers[0]->hadWarning();
//Compute throughput generated by load threads
uint64_t load_total_passes = 0;
uint64_t load_total_adjusted_ticks = 0;
uint64_t load_total_elapsed_dummy_ticks = 0;
uint64_t load_bytes_per_pass = 0;
uint32_t load_total_passes = 0;
tick_t load_total_adjusted_ticks = 0;
tick_t load_total_elapsed_dummy_ticks = 0;
uint32_t load_bytes_per_pass = 0;
double load_avg_adjusted_ticks = 0;
for (uint32_t t = 1; t < _num_worker_threads; t++) {
load_total_passes += workers[t]->getPasses();

Просмотреть файл

@ -54,7 +54,7 @@ namespace xmem {
void* mem_array,
size_t len,
#ifdef USE_SIZE_BASED_BENCHMARKS
uint64_t passes_per_iteration,
uint32_t passes_per_iteration,
#endif
RandomFunction kernel_fptr,
RandomFunction kernel_dummy_fptr,

Просмотреть файл

@ -45,7 +45,7 @@
namespace xmem {
#define VERSION "2.1.9"
#define VERSION "2.1.10"
#if !defined(_WIN32) && !defined(__gnu_linux__)
#error Neither Windows/GNULinux build environments were detected!

Просмотреть файл

@ -56,7 +56,7 @@ namespace xmem {
size_t len,
uint32_t iterations,
#ifdef USE_SIZE_BASED_BENCHMARKS
uint64_t passes_per_iteration,
uint32_t passes_per_iteration,
#endif
uint32_t num_worker_threads,
uint32_t mem_node,

Просмотреть файл

@ -100,8 +100,8 @@ void WindowsDRAMPowerReader::run() {
}
calculateMetrics();
uint64_t stop_tick = stop_timer();
uint64_t elapsed_ticks = stop_tick - start_tick;
tick_t stop_tick = stop_timer();
tick_t elapsed_ticks = stop_tick - start_tick;
Sleep(static_cast<DWORD>(_sampling_period - elapsed_ticks*g_ns_per_tick*1e-9*1000)); //Account for any loop overhead
}
}