Finished converting all int64_t and uint64_t to 32 bit versions to improve cross-platform compatibility later. I do not yet see any issues from this. Also the benchmark kernel code is now officially very messy due to tons of OS and architectural dependences and pound-defs :(

2015-04-21 18:44:51 -07:00 · 2015-04-21 18:44:51 -07:00 · a425635999
--- a/2
+++ b/2
@ -38,7 +38,7 @@ PROJECT_NAME           = X-Mem
 # could be handy for archiving the generated documentation or if some version
 # control system is used.

-PROJECT_NUMBER         = 2.1.9
+PROJECT_NUMBER         = 2.1.10

 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
--- a/README.md
+++ b/README.md
@ -1,7 +1,7 @@
 README
 ------------------------------------------------------------------------------------------------------------

-X-Mem: Extensible Memory Benchmarking Tool v2.1.9
+X-Mem: Extensible Memory Benchmarking Tool v2.1.10
 ------------------------------------------------------------------------------------------------------------

 The flexible open-source research tool for characterizing memory hierarchy throughput, latency, and power. 
@ -10,7 +10,7 @@ Originally authored by Mark Gottscho (Email: <mgottscho@ucla.edu>) as a Summer 2

 This project is under active development. Stay tuned for more updates.

-PROJECT REVISION DATE: April 16, 2015.
+PROJECT REVISION DATE: April 21, 2015.

 ------------------------------------------------------------------------------------------------------------
 LICENSE
--- a/X-Mem_Developer_Manual.pdf
+++ b/X-Mem_Developer_Manual.pdf
--- a/src/LatencyWorker.cpp
+++ b/src/LatencyWorker.cpp
@ -50,7 +50,7 @@ LatencyWorker::LatencyWorker(
 		void* mem_array,
 		size_t len,
 	#ifdef USE_SIZE_BASED_BENCHMARKS
-		uint64_t passes_per_iteration,
+		uint32_t passes_per_iteration,
 	#endif
 		RandomFunction kernel_fptr,
 		RandomFunction kernel_dummy_fptr,
@ -127,7 +127,7 @@ void LatencyWorker::run() {
 		std::cerr << "WARNING: Failed to boost scheduling priority. Perhaps running in Administrator mode would help." << std::endl;

 	//Prime memory
-	for (uint64_t i = 0; i < 4; i++) {
+	for (uint32_t i = 0; i < 4; i++) {
 		void* prime_start_address = mem_array; 
 		void* prime_end_address = reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(mem_array) + len);
 		forwSequentialRead_Word64(prime_start_address, prime_end_address); //dependent reads on the memory, make sure caches are ready, coherence, etc...
--- a/src/LoadWorker.cpp
+++ b/src/LoadWorker.cpp
@ -183,7 +183,7 @@ void LoadWorker::run() {
 			start_tick = start_timer();
 			UNROLL1024(
 				(*kernel_fptr_seq)(start_address, end_address);
-				start_address = reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(mem_array)+(reinterpret_cast<uint64_t>(start_address)+bytes_per_pass) % len);
+				start_address = reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(mem_array)+(reinterpret_cast<uintptr_t>(start_address)+bytes_per_pass) % len);
 				end_address = reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(start_address) + bytes_per_pass);
 			)
 			stop_tick = stop_timer();
@ -207,7 +207,7 @@ void LoadWorker::run() {
 			start_tick = start_timer();
 			UNROLL1024(
 				(*kernel_dummy_fptr_seq)(start_address, end_address);
-				start_address = reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(mem_array)+(reinterpret_cast<uint64_t>(start_address)+bytes_per_pass) % len);
+				start_address = reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(mem_array)+(reinterpret_cast<uintptr_t>(start_address)+bytes_per_pass) % len);
 				end_address = reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(start_address) + bytes_per_pass);
 			)
 			stop_tick = stop_timer();
--- a/src/README.md
+++ b/src/README.md
@ -1,7 +1,7 @@
 README
 ------------------------------------------------------------------------------------------------------------

-X-Mem: Extensible Memory Benchmarking Tool v2.1.9
+X-Mem: Extensible Memory Benchmarking Tool v2.1.10
 ------------------------------------------------------------------------------------------------------------

 The flexible open-source research tool for characterizing memory hierarchy throughput, latency, and power. 
@ -10,7 +10,7 @@ Originally authored by Mark Gottscho (Email: <mgottscho@ucla.edu>) as a Summer 2

 This project is under active development. Stay tuned for more updates.

-PROJECT REVISION DATE: April 16, 2015.
+PROJECT REVISION DATE: April 21, 2015.

 ------------------------------------------------------------------------------------------------------------
 LICENSE
--- a/src/Timer.cpp
+++ b/src/Timer.cpp
@ -46,13 +46,7 @@ Timer::Timer() :
 	_ticks_per_ms(0),
 	_ns_per_tick(0)
 {	
-#ifdef ARCH_64BIT
-	uint64_t
-#else
-	uint32_t
-#endif
-	start_tick, stop_tick;
-
+	tick_t start_tick, stop_tick;
 	
 	start_tick = start_timer();
 #ifdef _WIN32
--- a/src/benchmark_kernels.cpp
+++ b/src/benchmark_kernels.cpp
--- a/src/ext/DelayInjectedLoadedLatencyBenchmark/DelayInjectedLoadedLatencyBenchmark.cpp
+++ b/src/ext/DelayInjectedLoadedLatencyBenchmark/DelayInjectedLoadedLatencyBenchmark.cpp
@ -57,7 +57,7 @@ DelayInjectedLoadedLatencyBenchmark::DelayInjectedLoadedLatencyBenchmark(
 		size_t len,
 		uint32_t iterations,
 #ifdef USE_SIZE_BASED_BENCHMARKS
-		uint64_t passes_per_iteration,
+		uint32_t passes_per_iteration,
 #endif
 		uint32_t num_worker_threads,
 		uint32_t mem_node,
@ -300,18 +300,18 @@ bool DelayInjectedLoadedLatencyBenchmark::_run_core() {
 		bool iter_warning = false;

 		//Compute latency metric
-		uint64_t lat_passes = workers[0]->getPasses();	
-		uint64_t lat_adjusted_ticks = workers[0]->getAdjustedTicks();
-		uint64_t lat_elapsed_dummy_ticks = workers[0]->getElapsedDummyTicks();
-		uint64_t lat_bytes_per_pass = workers[0]->getBytesPerPass();
-		uint64_t lat_accesses_per_pass = lat_bytes_per_pass / 8;
+		uint32_t lat_passes = workers[0]->getPasses();	
+		tick_t lat_adjusted_ticks = workers[0]->getAdjustedTicks();
+		tick_t lat_elapsed_dummy_ticks = workers[0]->getElapsedDummyTicks();
+		uint32_t lat_bytes_per_pass = workers[0]->getBytesPerPass();
+		uint32_t lat_accesses_per_pass = lat_bytes_per_pass / 8;
 		iter_warning |= workers[0]->hadWarning();
 		
 		//Compute throughput generated by load threads
-		uint64_t load_total_passes = 0;
-		uint64_t load_total_adjusted_ticks = 0;
-		uint64_t load_total_elapsed_dummy_ticks = 0;
-		uint64_t load_bytes_per_pass = 0;
+		uint32_t load_total_passes = 0;
+		tick_t load_total_adjusted_ticks = 0;
+		tick_t load_total_elapsed_dummy_ticks = 0;
+		uint32_t load_bytes_per_pass = 0;
 		double load_avg_adjusted_ticks = 0;
 		for (uint32_t t = 1; t < _num_worker_threads; t++) {
 			load_total_passes += workers[t]->getPasses();
--- a/src/include/LatencyWorker.h
+++ b/src/include/LatencyWorker.h
@ -54,7 +54,7 @@ namespace xmem {
 				void* mem_array,
 				size_t len,
 #ifdef USE_SIZE_BASED_BENCHMARKS
-				uint64_t passes_per_iteration,
+				uint32_t passes_per_iteration,
 #endif
 				RandomFunction kernel_fptr,
 				RandomFunction kernel_dummy_fptr,
--- a/src/include/common.h
+++ b/src/include/common.h
@ -45,7 +45,7 @@

 namespace xmem {

-#define VERSION "2.1.9"
+#define VERSION "2.1.10"

 #if !defined(_WIN32) && !defined(__gnu_linux__)
 #error Neither Windows/GNULinux build environments were detected!
--- a/src/include/ext/DelayInjectedLoadedLatencyBenchmark/DelayInjectedLoadedLatencyBenchmark.h
+++ b/src/include/ext/DelayInjectedLoadedLatencyBenchmark/DelayInjectedLoadedLatencyBenchmark.h
@ -56,7 +56,7 @@ namespace xmem {
 			size_t len,
 			uint32_t iterations,
 #ifdef USE_SIZE_BASED_BENCHMARKS
-			uint64_t passes_per_iteration,
+			uint32_t passes_per_iteration,
 #endif
 			uint32_t num_worker_threads,
 			uint32_t mem_node,
--- a/src/win/WindowsDRAMPowerReader.cpp
+++ b/src/win/WindowsDRAMPowerReader.cpp
@ -100,8 +100,8 @@ void WindowsDRAMPowerReader::run() {
 			}

 			calculateMetrics();
-			uint64_t stop_tick = stop_timer();
-			uint64_t elapsed_ticks = stop_tick - start_tick;
+			tick_t stop_tick = stop_timer();
+			tick_t elapsed_ticks = stop_tick - start_tick;
 			Sleep(static_cast<DWORD>(_sampling_period - elapsed_ticks*g_ns_per_tick*1e-9*1000)); //Account for any loop overhead
 		}
 	}