Finished variable/function renaming for Benchmark, LatencyBenchmark, ThroughputBenchmark, DelayInjectedLoadedLatencyBenchmark classes.

This commit is contained in:
Mark Gottscho 2016-04-05 02:01:51 -07:00
Родитель de5dff8803
Коммит babee8e73f
10 изменённых файлов: 119 добавлений и 115 удалений

Просмотреть файл

@ -38,7 +38,7 @@ PROJECT_NAME = X-Mem
# could be handy for archiving the generated documentation or if some version
# control system is used.
PROJECT_NUMBER = 2.4.0
PROJECT_NUMBER = 2.4.1
# Using the PROJECT_BRIEF tag one can provide an optional one line description
# for a project that appears at the top of each page and should give viewer a

Просмотреть файл

@ -1,12 +1,12 @@
README
------------------------------------------------------------------------------------------------------------
X-Mem: A Cross-Platform and Extensible Memory Characterization Tool for the Cloud v2.4.0
X-Mem: A Cross-Platform and Extensible Memory Characterization Tool for the Cloud v2.4.1
------------------------------------------------------------------------------------------------------------
X-Mem is a flexible open-source research tool for characterizing memory hierarchy throughput, latency, power, and more. The tool was developed jointly by Microsoft and the UCLA NanoCAD Lab. This project was started by Mark Gottscho (Email: mgottscho@ucla.edu) as a Summer 2014 PhD intern at Microsoft Research. X-Mem is released freely and open-source under the MIT License. The project is under active development.
PROJECT REVISION DATE: April 4, 2016
PROJECT REVISION DATE: April 5, 2016
------------------------------------------------------------------------------------------------------------
RESEARCH PAPER & ATTRIBUTION

Просмотреть файл

@ -75,11 +75,11 @@ Benchmark::Benchmark(
metric_on_iter_(),
mean_metric_(0),
min_metric_(0),
25_percentile_metric_(0),
percentile_25_metric_(0),
median_metric_(0),
75_percentile_metric_(0),
95_percentile_metric_(0),
99_percentile_metric_(0),
percentile_75_metric_(0),
percentile_95_metric_(0),
percentile_99_metric_(0),
max_metric_(0),
mode_metric_(0),
metric_units_(metric_units),
@ -87,7 +87,7 @@ Benchmark::Benchmark(
peak_dram_power_socket_(),
name_(name),
obj_valid_(false),
hasRun_(false),
has_run_(false),
warning_(false)
{
@ -107,7 +107,7 @@ bool Benchmark::run() {
//Write to all of the memory region of interest to make sure
//pages are resident in physical memory and are not shared
forwSequentialWrite_Word32(_mem_array,
forwSequentialWrite_Word32(mem_array_,
reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(mem_array_) + len_));
bool success = runCore();
@ -191,7 +191,7 @@ void Benchmark::reportBenchmarkInfo() const {
std::cout << "read";
break;
case WRITE:
if (_pattern_mode == RANDOM) //special case
if (pattern_mode_ == RANDOM) //special case
std::cout << "read+write";
else
std::cout << "write";
@ -215,7 +215,7 @@ void Benchmark::reportResults() const {
std::cout << "***" << std::endl;
std::cout << std::endl;
if (hasRun_) {
if (has_run_) {
for (uint32_t i = 0; i < iterations_; i++) {
std::printf("Iter #%4d: %0.3f %s", i, metric_on_iter_[i], metric_units_.c_str());
//std::cout << "Iter #" << i << ": " << metric_on_iter_[i] << " " << metric_units_;
@ -237,7 +237,7 @@ void Benchmark::reportResults() const {
std::cout << " (WARNING)";
std::cout << std::endl;
std::cout << "25th Percentile: " << 25_percentile_metric_ << " " << metric_units_;
std::cout << "25th Percentile: " << percentile_25_metric_ << " " << metric_units_;
if (warning_)
std::cout << " (WARNING)";
std::cout << std::endl;
@ -247,17 +247,17 @@ void Benchmark::reportResults() const {
std::cout << " (WARNING)";
std::cout << std::endl;
std::cout << "75th Percentile: " << 75_percentile_metric_ << " " << metric_units_;
std::cout << "75th Percentile: " << percentile_75_metric_ << " " << metric_units_;
if (warning_)
std::cout << " (WARNING)";
std::cout << std::endl;
std::cout << "95th Percentile: " << 95_percentile_metric_ << " " << metric_units_;
std::cout << "95th Percentile: " << percentile_95_metric_ << " " << metric_units_;
if (warning_)
std::cout << " (WARNING)";
std::cout << std::endl;
std::cout << "99th Percentile: " << 99_percentile_metric_ << " " << metric_units_;
std::cout << "99th Percentile: " << percentile_99_metric_ << " " << metric_units_;
if (warning_)
std::cout << " (WARNING)";
std::cout << std::endl;
@ -314,7 +314,7 @@ double Benchmark::getMinMetric() const {
double Benchmark::get25PercentileMetric() const {
if (has_run_)
return 25_percentile_metric_;
return percentile_25_metric_;
else //bad call
return -1;
}
@ -328,21 +328,21 @@ double Benchmark::getMedianMetric() const {
double Benchmark::get75PercentileMetric() const {
if (has_run_)
return 75_percentile_metric_;
return percentile_75_metric_;
else //bad call
return -1;
}
double Benchmark::get95PercentileMetric() const {
if (has_run_)
return 95_percentile_metric_;
return percentile_95_metric_;
else //bad call
return -1;
}
double Benchmark::get99PercentileMetric() const {
if (has_run_)
return 99_percentile_metric_;
return percentile_99_metric_;
else //bad call
return -1;
}
@ -433,11 +433,11 @@ void Benchmark::computeMetrics() {
//Compute percentiles
min_metric_ = sortedMetrics.front();
25_percentile_metric_ = sortedMetrics[sortedMetrics.size()/4];
75_percentile_metric_ = sortedMetrics[sortedMetrics.size()*3/4];
percentile_25_metric_ = sortedMetrics[sortedMetrics.size()/4];
percentile_75_metric_ = sortedMetrics[sortedMetrics.size()*3/4];
median_metric_ = sortedMetrics[sortedMetrics.size()/2];
95_percentile_metric_ = sortedMetrics[sortedMetrics.size()*95/100];
99_percentile_metric_ = sortedMetrics[sortedMetrics.size()*99/100];
percentile_95_metric_ = sortedMetrics[sortedMetrics.size()*95/100];
percentile_99_metric_ = sortedMetrics[sortedMetrics.size()*99/100];
max_metric_ = sortedMetrics.back();
//Compute mode

Просмотреть файл

@ -192,7 +192,7 @@ bool BenchmarkManager::runThroughputBenchmarks() {
for (uint32_t i = 0; i < __tp_benchmarks.size(); i++) {
__tp_benchmarks[i]->run();
__tp_benchmarks[i]->report_results(); //to console
__tp_benchmarks[i]->reportResults(); //to console
//Write to results file if necessary
if (__config.useOutputFile()) {
@ -306,7 +306,7 @@ bool BenchmarkManager::runLatencyBenchmarks() {
for (uint32_t i = 0; i < __lat_benchmarks.size(); i++) {
__lat_benchmarks[i]->run();
__lat_benchmarks[i]->report_results(); //to console
__lat_benchmarks[i]->reportResults(); //to console
//Write to results file if necessary
if (__config.useOutputFile()) {
@ -775,7 +775,7 @@ bool BenchmarkManager::runExtDelayInjectedLoadedLatencyBenchmark() {
//Run benchmarks
for (uint32_t i = 0; i < del_lat_benchmarks.size(); i++) {
del_lat_benchmarks[i]->run();
del_lat_benchmarks[i]->report_results(); //to console
del_lat_benchmarks[i]->reportResults(); //to console
//Write to results file if necessary
if (__config.useOutputFile()) {

Просмотреть файл

@ -87,7 +87,7 @@ LatencyBenchmark::LatencyBenchmark(
load_metric_on_iter_.push_back(0);
}
void LatencyBenchmark::report_benchmark_info() const {
void LatencyBenchmark::reportBenchmarkInfo() const {
std::cout << "CPU NUMA Node: " << cpu_node_ << std::endl;
std::cout << "Memory NUMA Node: " << mem_node_ << std::endl;
std::cout << "Latency measurement chunk size: ";
@ -168,7 +168,7 @@ void LatencyBenchmark::report_benchmark_info() const {
}
void LatencyBenchmark::report_results() const {
void LatencyBenchmark::reportResults() const {
std::cout << std::endl;
std::cout << "*** RESULTS";
std::cout << "***" << std::endl;
@ -196,7 +196,7 @@ void LatencyBenchmark::report_results() const {
std::cout << " (WARNING)";
std::cout << std::endl;
std::cout << "25th Percentile: " << 25_percentile_metric_ << " " << metric_units_;
std::cout << "25th Percentile: " << percentile_25_metric_ << " " << metric_units_;
if (warning_)
std::cout << " (WARNING)";
std::cout << std::endl;
@ -206,17 +206,17 @@ void LatencyBenchmark::report_results() const {
std::cout << " (WARNING)";
std::cout << std::endl;
std::cout << "75th Percentile: " << 75_percentile_metric_ << " " << metric_units_;
std::cout << "75th Percentile: " << percentile_75_metric_ << " " << metric_units_;
if (warning_)
std::cout << " (WARNING)";
std::cout << std::endl;
std::cout << "95th Percentile: " << 95_percentile_metric_ << " " << metric_units_;
std::cout << "95th Percentile: " << percentile_95_metric_ << " " << metric_units_;
if (warning_)
std::cout << " (WARNING)";
std::cout << std::endl;
std::cout << "99th Percentile: " << 99_percentile_metric_ << " " << metric_units_;
std::cout << "99th Percentile: " << percentile_99_metric_ << " " << metric_units_;
if (warning_)
std::cout << " (WARNING)";
std::cout << std::endl;
@ -260,8 +260,8 @@ double LatencyBenchmark::getMeanLoadMetric() const {
return -1;
}
bool LatencyBenchmark::_run_core() {
size_t len_per_thread = len_ / _num_worker_threads; //Carve up memory space so each worker has its own area to play in
bool LatencyBenchmark::runCore() {
size_t len_per_thread = len_ / num_worker_threads_; //Carve up memory space so each worker has its own area to play in
//Set up latency measurement kernel function pointers
RandomFunction lat_kernel_fptr = &chasePointers;
@ -295,7 +295,7 @@ bool LatencyBenchmark::_run_core() {
std::cerr << "ERROR: Failed to find appropriate benchmark kernel." << std::endl;
return false;
}
} else if (_pattern_mode == RANDOM) {
} else if (pattern_mode_ == RANDOM) {
if (!determineRandomKernel(rw_mode_, chunk_size_, &load_kernel_fptr_ran, &load_kernel_dummy_fptr_ran)) {
std::cerr << "ERROR: Failed to find appropriate benchmark kernel." << std::endl;
return false;
@ -395,7 +395,7 @@ bool LatencyBenchmark::_run_core() {
tick_t load_total_elapsed_dummy_ticks = 0;
uint32_t load_bytes_per_pass = 0;
double load_avg_adjusted_ticks = 0;
for (uint32_t t = 1; t < _num_worker_threads; t++) {
for (uint32_t t = 1; t < num_worker_threads_; t++) {
load_total_passes += workers[t]->getPasses();
load_total_adjusted_ticks += workers[t]->getAdjustedTicks();
load_total_elapsed_dummy_ticks += workers[t]->getElapsedDummyTicks();
@ -405,7 +405,7 @@ bool LatencyBenchmark::_run_core() {
//Compute load metrics for this iteration
load_avg_adjusted_ticks = static_cast<double>(load_total_adjusted_ticks) / (num_worker_threads_-1);
if (_num_worker_threads > 1)
if (num_worker_threads_ > 1)
load_metric_on_iter_[i] = (((static_cast<double>(load_total_passes) * static_cast<double>(load_bytes_per_pass)) / static_cast<double>(MB))) / ((load_avg_adjusted_ticks * g_ns_per_tick) / 1e9);
if (iterwarning)

Просмотреть файл

@ -96,8 +96,8 @@ bool ThroughputBenchmark::runCore() {
//Build pointer indices. Note that the pointers for each thread must stay within its respective region, otherwise sharing may occur.
for (uint32_t i = 0; i < num_worker_threads_; i++) {
if (!buildRandomPointerPermutation(reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(_mem_array) + i*len_per_thread), //casts to silence compiler warnings
reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(_mem_array) + (i+1)*len_per_thread), //casts to silence compiler warnings
if (!buildRandomPointerPermutation(reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(mem_array_) + i*len_per_thread), //casts to silence compiler warnings
reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(mem_array_) + (i+1)*len_per_thread), //casts to silence compiler warnings
chunk_size_)) {
std::cerr << "ERROR: Failed to build a random pointer permutation for a worker thread!" << std::endl;
return false;
@ -115,7 +115,7 @@ bool ThroughputBenchmark::runCore() {
//Start power measurement
if (g_verbose)
std::cout << "Starting power measurement threads...";
if (!_start_power_threads()) {
if (!startPowerThreads()) {
if (g_verbose)
std::cout << "FAIL" << std::endl;
std::cerr << "WARNING: Failed to start power measurement threads." << std::endl;
@ -127,21 +127,21 @@ bool ThroughputBenchmark::runCore() {
std::cout << "Running benchmark." << std::endl << std::endl;
//Do a bunch of iterations of the core benchmark routines
for (uint32_t i = 0; i < _iterations; i++) {
for (uint32_t i = 0; i < iterations_; i++) {
//Create workers and worker threads
for (uint32_t t = 0; t < num_worker_threads_; t++) {
void* thread_mem_array = reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(_mem_array) + t * len_per_thread);
int32_t cpu_id = cpu_id_in_numa_node(_cpu_node, t);
void* threadmem_array_ = reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(mem_array_) + t * len_per_thread);
int32_t cpu_id = cpu_id_in_numa_node(cpu_node_, t);
if (cpu_id < 0)
std::cerr << "WARNING: Failed to find logical CPU " << t << " in NUMA node " << _cpu_node << std::endl;
std::cerr << "WARNING: Failed to find logical CPU " << t << " in NUMA node " << cpu_node_ << std::endl;
if (pattern_mode_ == SEQUENTIAL)
workers.push_back(new LoadWorker(thread_mem_array,
workers.push_back(new LoadWorker(threadmem_array_,
len_per_thread,
kernel_fptr_seq,
kernel_dummy_fptr_seq,
cpu_id));
else if (pattern_mode_ == RANDOM)
workers.push_back(new LoadWorker(thread_mem_array,
workers.push_back(new LoadWorker(threadmem_array_,
len_per_thread,
kernel_fptr_ran,
kernel_dummy_fptr_ran,
@ -177,7 +177,7 @@ bool ThroughputBenchmark::runCore() {
avg_adjusted_ticks = total_adjusted_ticks / num_worker_threads_;
if (iter_warning)
_warning = true;
warning_ = true;
if (g_verbose ) { //Report duration for this iteration
std::cout << "Iter " << i+1 << " had " << total_passes << " passes in total across " << num_worker_threads_ << " threads, with " << bytes_per_pass << " bytes touched per pass:";

Просмотреть файл

@ -80,33 +80,33 @@ DelayInjectedLoadedLatencyBenchmark::DelayInjectedLoadedLatencyBenchmark(
dram_power_readers,
name
),
__delay(delay)
delay_(delay)
{
}
void DelayInjectedLoadedLatencyBenchmark::report_benchmark_info() const {
LatencyBenchmark::report_benchmark_info();
std::cout << "Load worker kernel delay value: " << __delay << std::endl;
void DelayInjectedLoadedLatencyBenchmark::reportBenchmarkInfo() const {
LatencyBenchmark::reportBenchmarkInfo();
std::cout << "Load worker kernel delay value: " << delay_ << std::endl;
}
uint32_t DelayInjectedLoadedLatencyBenchmark::getDelay() const {
return __delay;
return delay_;
}
bool DelayInjectedLoadedLatencyBenchmark::_run_core() {
size_t len_per_thread = _len / _num_worker_threads; //Carve up memory space so each worker has its own area to play in
bool DelayInjectedLoadedLatencyBenchmark::runCore() {
size_t len_per_thread = len_ / num_worker_threads_; //Carve up memory space so each worker has its own area to play in
//Set up latency measurement kernel function pointers
RandomFunction lat_kernel_fptr = &chasePointers;
RandomFunction lat_kernel_dummy_fptr = &dummy_chasePointers;
//Initialize memory regions for all threads by writing to them, causing the memory to be physically resident.
forwSequentialWrite_Word32(_mem_array,
reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(_mem_array)+_len)); //static casts to silence compiler warnings
forwSequentialWrite_Word32(mem_array_,
reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(mem_array_)+len_)); //static casts to silence compiler warnings
//Build pointer indices for random-access latency thread. We assume that latency thread is the first one, so we use beginning of memory region.
if (!buildRandomPointerPermutation(_mem_array,
reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(_mem_array)+len_per_thread), //static casts to silence compiler warnings
if (!buildRandomPointerPermutation(mem_array_,
reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(mem_array_)+len_per_thread), //static casts to silence compiler warnings
#ifndef HAS_WORD_64 //special case: 32-bit architectures
CHUNK_32b)) {
#endif
@ -120,10 +120,10 @@ bool DelayInjectedLoadedLatencyBenchmark::_run_core() {
//Set up load generation kernel function pointers
SequentialFunction load_kernel_fptr = NULL;
SequentialFunction load_kernel_dummy_fptr = NULL;
if (_num_worker_threads > 1) { //If we only have one worker thread, it is used for latency measurement only, and no load threads will be used.
switch (_chunk_size) {
if (num_worker_threads_ > 1) { //If we only have one worker thread, it is used for latency measurement only, and no load threads will be used.
switch (chunk_size_) {
case CHUNK_32b:
switch (__delay) {
switch (delay_) {
case 0:
load_kernel_fptr = &forwSequentialRead_Word32; //not an extended kernel
load_kernel_dummy_fptr = &dummy_forwSequentialLoop_Word32; //not an extended kernel
@ -179,7 +179,7 @@ bool DelayInjectedLoadedLatencyBenchmark::_run_core() {
break;
#ifdef HAS_WORD_64
case CHUNK_64b:
switch (__delay) {
switch (delay_) {
case 0:
load_kernel_fptr = &forwSequentialRead_Word64; //not an extended kernel
load_kernel_dummy_fptr = &dummy_forwSequentialLoop_Word64; //not an extended kernel
@ -236,7 +236,7 @@ bool DelayInjectedLoadedLatencyBenchmark::_run_core() {
#endif
#ifdef HAS_WORD_128
case CHUNK_128b:
switch (__delay) {
switch (delay_) {
case 0:
load_kernel_fptr = &forwSequentialRead_Word128; //not an extended kernel
load_kernel_dummy_fptr = &dummy_forwSequentialLoop_Word128; //not an extended kernel
@ -293,7 +293,7 @@ bool DelayInjectedLoadedLatencyBenchmark::_run_core() {
#endif
#ifdef HAS_WORD_256
case CHUNK_256b:
switch (__delay) {
switch (delay_) {
case 0:
load_kernel_fptr = &forwSequentialRead_Word256; //not an extended kernel
load_kernel_dummy_fptr = &dummy_forwSequentialLoop_Word256; //not an extended kernel
@ -362,7 +362,7 @@ bool DelayInjectedLoadedLatencyBenchmark::_run_core() {
if (g_verbose)
std::cout << "Starting power measurement threads...";
if (!_start_power_threads()) {
if (!startPowerThreads()) {
if (g_verbose)
std::cout << "FAIL" << std::endl;
std::cerr << "WARNING: Failed to start power threads." << std::endl;
@ -374,22 +374,22 @@ bool DelayInjectedLoadedLatencyBenchmark::_run_core() {
std::cout << "Running benchmark." << std::endl << std::endl;
//Do a bunch of iterations of the core benchmark routine
for (uint32_t i = 0; i < _iterations; i++) {
for (uint32_t i = 0; i < iterations_; i++) {
//Create load workers and load worker threads
for (uint32_t t = 0; t < _num_worker_threads; t++) {
void* thread_mem_array = reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(_mem_array) + t*len_per_thread);
int32_t cpu_id = cpu_id_in_numa_node(_cpu_node, t);
for (uint32_t t = 0; t < num_worker_threads_; t++) {
void* threadmem_array_ = reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(mem_array_) + t*len_per_thread);
int32_t cpu_id = cpu_id_in_numa_node(cpu_node_, t);
if (cpu_id < 0)
std::cerr << "WARNING: Failed to find logical CPU " << t << " in NUMA node " << _cpu_node << std::endl;
std::cerr << "WARNING: Failed to find logical CPU " << t << " in NUMA node " << cpu_node_ << std::endl;
if (t == 0) { //special case: thread 0 is always latency thread
workers.push_back(new LatencyWorker(thread_mem_array,
workers.push_back(new LatencyWorker(threadmem_array_,
len_per_thread,
lat_kernel_fptr,
lat_kernel_dummy_fptr,
cpu_id));
} else {
workers.push_back(new LoadWorker(thread_mem_array,
workers.push_back(new LoadWorker(threadmem_array_,
len_per_thread,
load_kernel_fptr,
load_kernel_dummy_fptr,
@ -399,16 +399,16 @@ bool DelayInjectedLoadedLatencyBenchmark::_run_core() {
}
//Start worker threads! gogogo
for (uint32_t t = 0; t < _num_worker_threads; t++)
for (uint32_t t = 0; t < num_worker_threads_; t++)
worker_threads[t]->create_and_start();
//Wait for all threads to complete
for (uint32_t t = 0; t < _num_worker_threads; t++)
for (uint32_t t = 0; t < num_worker_threads_; t++)
if (!worker_threads[t]->join())
std::cerr << "WARNING: A worker thread failed to complete correctly!" << std::endl;
//Compute metrics for this iteration
bool iter_warning = false;
bool iterwarning_ = false;
//Compute latency metric
uint32_t lat_passes = workers[0]->getPasses();
@ -416,7 +416,7 @@ bool DelayInjectedLoadedLatencyBenchmark::_run_core() {
tick_t lat_elapsed_dummy_ticks = workers[0]->getElapsedDummyTicks();
uint32_t lat_bytes_per_pass = workers[0]->getBytesPerPass();
uint32_t lat_accesses_per_pass = lat_bytes_per_pass / 8;
iter_warning |= workers[0]->hadWarning();
iterwarning_ |= workers[0]->hadWarning();
//Compute throughput generated by load threads
uint32_t load_total_passes = 0;
@ -424,65 +424,65 @@ bool DelayInjectedLoadedLatencyBenchmark::_run_core() {
tick_t load_total_elapsed_dummy_ticks = 0;
uint32_t load_bytes_per_pass = 0;
double load_avg_adjusted_ticks = 0;
for (uint32_t t = 1; t < _num_worker_threads; t++) {
for (uint32_t t = 1; t < num_worker_threads_; t++) {
load_total_passes += workers[t]->getPasses();
load_total_adjusted_ticks += workers[t]->getAdjustedTicks();
load_total_elapsed_dummy_ticks += workers[t]->getElapsedDummyTicks();
load_bytes_per_pass = workers[t]->getBytesPerPass(); //all should be the same.
iter_warning |= workers[t]->hadWarning();
iterwarning_ |= workers[t]->hadWarning();
}
//Compute load metrics for this iteration
load_avg_adjusted_ticks = static_cast<double>(load_total_adjusted_ticks) / (_num_worker_threads-1);
if (_num_worker_threads > 1)
_loadMetricOnIter[i] = (((static_cast<double>(load_total_passes) * static_cast<double>(load_bytes_per_pass)) / static_cast<double>(MB))) / ((load_avg_adjusted_ticks * g_ns_per_tick) / 1e9);
load_avg_adjusted_ticks = static_cast<double>(load_total_adjusted_ticks) / (num_worker_threads_-1);
if (num_worker_threads_ > 1)
load_metric_on_iter_[i] = (((static_cast<double>(load_total_passes) * static_cast<double>(load_bytes_per_pass)) / static_cast<double>(MB))) / ((load_avg_adjusted_ticks * g_ns_per_tick) / 1e9);
if (iter_warning)
_warning = true;
if (iterwarning_)
warning_ = true;
if (g_verbose) { //Report metrics for this iteration
//Latency thread
std::cout << "Iter " << i+1 << " had " << lat_passes << " latency measurement passes, with " << lat_accesses_per_pass << " accesses per pass:";
if (iter_warning) std::cout << " -- WARNING";
if (iterwarning_) std::cout << " -- WARNING";
std::cout << std::endl;
std::cout << "...lat clock ticks == " << lat_adjusted_ticks << " (adjusted by -" << lat_elapsed_dummy_ticks << ")";
if (iter_warning) std::cout << " -- WARNING";
if (iterwarning_) std::cout << " -- WARNING";
std::cout << std::endl;
std::cout << "...lat ns == " << lat_adjusted_ticks * g_ns_per_tick << " (adjusted by -" << lat_elapsed_dummy_ticks * g_ns_per_tick << ")";
if (iter_warning) std::cout << " -- WARNING";
if (iterwarning_) std::cout << " -- WARNING";
std::cout << std::endl;
std::cout << "...lat sec == " << lat_adjusted_ticks * g_ns_per_tick / 1e9 << " (adjusted by -" << lat_elapsed_dummy_ticks * g_ns_per_tick / 1e9 << ")";
if (iter_warning) std::cout << " -- WARNING";
if (iterwarning_) std::cout << " -- WARNING";
std::cout << std::endl;
//Load threads
if (_num_worker_threads > 1) {
if (num_worker_threads_ > 1) {
std::cout << "Iter " << i+1 << " had " << load_total_passes << " total load generation passes, with " << load_bytes_per_pass << " bytes per pass:";
if (iter_warning) std::cout << " -- WARNING";
if (iterwarning_) std::cout << " -- WARNING";
std::cout << std::endl;
std::cout << "...load total clock ticks across " << _num_worker_threads-1 << " threads == " << load_total_adjusted_ticks << " (adjusted by -" << load_total_elapsed_dummy_ticks << ")";
if (iter_warning) std::cout << " -- WARNING";
std::cout << "...load total clock ticks across " << num_worker_threads_-1 << " threads == " << load_total_adjusted_ticks << " (adjusted by -" << load_total_elapsed_dummy_ticks << ")";
if (iterwarning_) std::cout << " -- WARNING";
std::cout << std::endl;
std::cout << "...load total ns across " << _num_worker_threads-1 << " threads == " << load_total_adjusted_ticks * g_ns_per_tick << " (adjusted by -" << load_total_elapsed_dummy_ticks * g_ns_per_tick << ")";
if (iter_warning) std::cout << " -- WARNING";
std::cout << "...load total ns across " << num_worker_threads_-1 << " threads == " << load_total_adjusted_ticks * g_ns_per_tick << " (adjusted by -" << load_total_elapsed_dummy_ticks * g_ns_per_tick << ")";
if (iterwarning_) std::cout << " -- WARNING";
std::cout << std::endl;
std::cout << "...load total sec across " << _num_worker_threads-1 << " threads == " << load_total_adjusted_ticks * g_ns_per_tick / 1e9 << " (adjusted by -" << load_total_elapsed_dummy_ticks * g_ns_per_tick / 1e9 << ")";
if (iter_warning) std::cout << " -- WARNING";
std::cout << "...load total sec across " << num_worker_threads_-1 << " threads == " << load_total_adjusted_ticks * g_ns_per_tick / 1e9 << " (adjusted by -" << load_total_elapsed_dummy_ticks * g_ns_per_tick / 1e9 << ")";
if (iterwarning_) std::cout << " -- WARNING";
std::cout << std::endl;
}
}
//Compute overall metrics for this iteration
_metricOnIter[i] = static_cast<double>(lat_adjusted_ticks * g_ns_per_tick) / static_cast<double>(lat_accesses_per_pass * lat_passes);
metric_on_iter_[i] = static_cast<double>(lat_adjusted_ticks * g_ns_per_tick) / static_cast<double>(lat_accesses_per_pass * lat_passes);
//Clean up workers and threads for this iteration
for (uint32_t t = 0; t < _num_worker_threads; t++) {
for (uint32_t t = 0; t < num_worker_threads_; t++) {
delete worker_threads[t];
delete workers[t];
}
@ -496,7 +496,7 @@ bool DelayInjectedLoadedLatencyBenchmark::_run_core() {
std::cout << "Stopping power measurement threads...";
}
if (!_stop_power_threads()) {
if (!stopPowerThreads()) {
if (g_verbose)
std::cout << "FAIL" << std::endl;
std::cerr << "WARNING: Failed to stop power measurement threads." << std::endl;
@ -504,13 +504,13 @@ bool DelayInjectedLoadedLatencyBenchmark::_run_core() {
std::cout << "done" << std::endl;
//Run metadata
_hasRun = true;
has_run_ = true;
//Get mean load metrics -- these aren't part of Benchmark class thus not covered by _computeMetrics()
_computeMetrics();
for (uint32_t i = 0; i < _iterations; i++)
_meanLoadMetric += _loadMetricOnIter[i];
_meanLoadMetric /= static_cast<double>(_iterations);
//Get mean load metrics -- these aren't part of Benchmark class thus not covered by computeMetrics()
computeMetrics();
for (uint32_t i = 0; i < iterations_; i++)
mean_load_metric_ += load_metric_on_iter_[i];
mean_load_metric_ /= static_cast<double>(iterations_);
return true;
}

Просмотреть файл

@ -319,11 +319,11 @@ namespace xmem {
std::vector<double> metric_on_iter_; /**< Metrics for each iteration of the benchmark. Unit-less because any benchmark can set this metric as needed. It is up to the descendant class to interpret units. */
double mean_metric_; /**< Average metric over all iterations. Unit-less because any benchmark can set this metric as needed. It is up to the descendant class to interpret units. */
double min_metric_; /**< Minimum metric over all iterations. Unit-less because any benchmark can set this metric as needed. It is up to the descendant class to interpret units. */
double 25_percentile_metric_; /**< 25th percentile metric over all iterations. Unit-less because any benchmark can set this metric as needed. It is up to the descendant class to interpret units. */
double percentile_25_metric_; /**< 25th percentile metric over all iterations. Unit-less because any benchmark can set this metric as needed. It is up to the descendant class to interpret units. */
double median_metric_; /**< Median metric over all iterations. Unit-less because any benchmark can set this metric as needed. It is up to the descendant class to interpret units. */
double 75_percentile_metric_; /**< 75th percentile metric over all iterations. Unit-less because any benchmark can set this metric as needed. It is up to the descendant class to interpret units. */
double 95_percentile_metric_; /**< 95th percentile metric over all iterations. Unit-less because any benchmark can set this metric as needed. It is up to the descendant class to interpret units. */
double 99_percentile_metric_; /**< 99th percentile metric over all iterations. Unit-less because any benchmark can set this metric as needed. It is up to the descendant class to interpret units. */
double percentile_75_metric_; /**< 75th percentile metric over all iterations. Unit-less because any benchmark can set this metric as needed. It is up to the descendant class to interpret units. */
double percentile_95_metric_; /**< 95th percentile metric over all iterations. Unit-less because any benchmark can set this metric as needed. It is up to the descendant class to interpret units. */
double percentile_99_metric_; /**< 99th percentile metric over all iterations. Unit-less because any benchmark can set this metric as needed. It is up to the descendant class to interpret units. */
double max_metric_; /**< Maximum metric over all iterations. Unit-less because any benchmark can set this metric as needed. It is up to the descendant class to interpret units. */
double mode_metric_; /**< Mode metric over all iterations. Unit-less because any benchmark can set this metric as needed. It is up to the descendant class to interpret units. */
std::string metric_units_; /**< String representing the units of measurement for the metric. */

Просмотреть файл

@ -31,8 +31,8 @@
#ifdef EXT_DELAY_INJECTED_LOADED_LATENCY_BENCHMARK
#ifndef __DELAY_INJECTED_LOADED_LATENCY_BENCHMARK_H
#define __DELAY_INJECTED_LOADED_LATENCY_BENCHMARK_H
#ifndef DELAY_INJECTED_LOADED_LATENCY_BENCHMARK_H
#define DELAY_INJECTED_LOADED_LATENCY_BENCHMARK_H
//Headers
#include <LatencyBenchmark.h>
@ -74,7 +74,7 @@ namespace xmem {
/**
* @brief Reports benchmark configuration details to the console.
*/
virtual void report_benchmark_info() const;
virtual void reportBenchmarkInfo() const;
/**
* @brief Gets the delay injection used in load thread kernels. A delay of 5 corresponds to 5 nop instructions.
@ -83,10 +83,10 @@ namespace xmem {
uint32_t getDelay() const;
protected:
virtual bool _run_core();
virtual bool runCore();
private:
uint32_t __delay; /**< Number of nops to insert between load thread memory instructions. This is a form of delay injection to reduce memory loading. */
uint32_t delay_; /**< Number of nops to insert between load thread memory instructions. This is a form of delay injection to reduce memory loading. */
};
};

Просмотреть файл

@ -105,6 +105,7 @@ void xmem::print_welcome_message() {
* @brief The main entry point to the program.
*/
int main(int argc, char* argv[]) {
bool configSuccess = false;
try {
init_globals();
print_welcome_message();
@ -117,7 +118,7 @@ int main(int argc, char* argv[]) {
//Configure runtime based on user inputs
Configurator config;
bool configSuccess = !config.configureFromInput(argc, argv);
configSuccess = !config.configureFromInput(argc, argv);
if (configSuccess) {
if (g_verbose) {
@ -176,5 +177,8 @@ int main(int argc, char* argv[]) {
return EXIT_FAILURE;
}
(configSuccess) ? return EXIT_SUCCESS : return EXIT_FAILURE;
if (configSuccess)
return EXIT_SUCCESS;
else
return EXIT_FAILURE;
}