Porting the init sequence for querying system information to Linux. Right now it is in a rough and bandaid shape. Also the program segfaults in ThroughputBenchmark and crashes on illegal instruction in LatencyBenchmark for Linux.

This commit is contained in:
Mark Gottscho 2015-01-17 16:21:41 -08:00
Родитель 439f11eec8
Коммит aaf39cbd5b
7 изменённых файлов: 977 добавлений и 882 удалений

Просмотреть файл

@ -9,7 +9,7 @@ env = Environment()
# Customize build settings
# LINUX
env.Append(CPPFLAGS = '-Wall -g -O3 -std=c++11 -mavx')
env.Append(CPPFLAGS = '-Wall -g -O3 -std=c++11 -mavx -mavx2')
env.Append(CPPPATH = ['src/include', '/usr/include'])
env.Append(LIBS = ['pthread', 'numa'])

Просмотреть файл

@ -52,10 +52,17 @@
namespace xmem {
namespace common {
size_t g_page_size; /**< Default page size on the system, in bytes. */
#ifdef USE_LARGE_PAGES
size_t g_large_page_size; /**< Large page size on the system, in bytes. */
#endif
uint32_t g_num_nodes; /**< Number of NUMA nodes in the system. */
uint32_t g_num_logical_cpus; /**< Number of logical CPU cores in the system. */
uint32_t g_num_logical_cpus; /**< Number of logical CPU cores in the system. This may be different than physical CPUs, e.g. Intel hyperthreading. */
uint32_t g_num_physical_cpus; /**< Number of physical CPU cores in the system. */
uint32_t g_num_physical_packages; /**< Number of physical CPU packages in the system. Generally this is the same as number of NUMA nodes, unless UMA emulation is done in hardware. */
uint32_t g_total_l1_caches; /**< Total number of L1 caches in the system. */
uint32_t g_total_l2_caches; /**< Total number of L2 caches in the system. */
uint32_t g_total_l3_caches; /**< Total number of L3 caches in the system. */
uint32_t g_total_l4_caches; /**< Total number of L4 caches in the system. */
uint32_t g_starting_test_index; /**< Numeric identifier for the first benchmark test. */
uint32_t g_test_index; /**< Numeric identifier for the current benchmark test. */
};
@ -110,6 +117,9 @@ void xmem::common::print_compile_time_options() {
#ifdef _WIN64
std::cout << "Win64" << std::endl;
#endif
#ifdef __unix__
std::cout << "Unix" << std::endl;
#endif
#ifdef ARCH_INTEL_X86
std::cout << "ARCH_INTEL_X86" << std::endl;
#endif
@ -382,3 +392,69 @@ size_t xmem::common::compute_number_of_passes(size_t working_set_size_KB) {
passes = 1;
return passes;
}
int32_t xmem::common::query_sys_info() {
#ifdef VERBOSE
std::cout << std::endl;
std::cout << "Initializing default system information...";
#endif
//Initialize to defaults.
g_num_nodes = DEFAULT_NUM_NODES;
g_num_physical_packages = DEFAULT_NUM_PHYSICAL_PACKAGES;
g_num_physical_cpus = DEFAULT_NUM_PHYSICAL_CPUS;
g_num_logical_cpus = DEFAULT_NUM_LOGICAL_CPUS;
g_total_l1_caches = DEFAULT_NUM_L1_CACHES;
g_total_l2_caches = DEFAULT_NUM_L2_CACHES;
g_total_l3_caches = DEFAULT_NUM_L3_CACHES;
g_total_l4_caches = DEFAULT_NUM_L4_CACHES;
g_page_size = DEFAULT_PAGE_SIZE;
#ifdef USE_LARGE_PAGES
g_large_page_size = DEFAULT_LARGE_PAGE_SIZE;
#endif
#ifdef VERBOSE
std::cout << "done" << std::endl;
std::cout << "Querying system information...";
#endif
#ifdef _WIN32
//TODO: refactor from win_common_third_party.cpp
#endif
#ifdef __unix__
//Check that NUMA is available.
if (numa_available() == -1) {
std::cout << "FATAL: NUMA API is not available on this system." << std::endl;
exit(-1);
}
g_num_nodes = numa_max_node()+1;
g_num_physical_packages = g_num_nodes; //FIXME: this is totally a bandaid
g_num_logical_cpus = sysconf(_SC_NPROCESSORS_ONLN); //FIXME: this isn't really portable -- requires glibc extensions to sysconf()
g_num_physical_cpus = g_num_logical_cpus / 2; //FIXME: this is totally a bandaid and assumes something like Intel HyperThreading
g_total_l1_caches = g_num_physical_cpus; //FIXME: this is totally a bandaid
g_total_l2_caches = g_num_physical_cpus; //FIXME: this is totally a bandaid
g_total_l3_caches = 1; //FIXME: this is totally a bandaid
g_total_l4_caches = 0; //FIXME: this is totally a bandaid
g_page_size = static_cast<uint64_t>(sysconf(_SC_PAGESIZE));
#ifdef USE_LARGE_PAGES
//g_large_page_size = //FIXME: implement
#endif
#endif
#ifdef VERBOSE
std::cout << "done" << std::endl;
std::cout << "Number of NUMA nodes: " << g_num_nodes << std::endl;
std::cout << "Number of physical processor packages: " << g_num_physical_packages << std::endl;
std::cout << "Number of physical processor cores: " << g_num_physical_cpus << std::endl;
std::cout << "Number of logical processor cores: " << g_num_logical_cpus << std::endl;
std::cout << "Number of processor L1/L2/L3/L4 caches: " << g_total_l1_caches << "/" << g_total_l2_caches << "/" << g_total_l3_caches << "/" << g_total_l4_caches << std::endl;
#ifdef USE_LARGE_PAGES
std::cout << "(Large) page size to be used for benchmarks: " << g_large_page_size << " B" << std::endl;
#else
std::cout << "(Regular) page size to be used for benchmarks: " << g_page_size << " B" << std::endl;
#endif
#endif
return 0;
}

Просмотреть файл

@ -129,11 +129,18 @@ namespace xmem {
#define GB_4 4294967296
//Default compile-time constants
#define DEFAULT_PAGE_SIZE 4096 /**< Default platform page size in bytes. This generally should not be relied on, but is a failsafe. */
#define DEFAULT_PAGE_SIZE 4*KB /**< Default platform page size in bytes. This generally should not be relied on, but is a failsafe. */
#define DEFAULT_LARGE_PAGE_SIZE 2*MB /**< Default platform large page size in bytes. This generally should not be relied on, but is a failsafe. */
#define DEFAULT_WORKING_SET_SIZE DEFAULT_PAGE_SIZE /**< Default working set size in bytes. */
#define DEFAULT_NUM_CPUS 1 /**< Default number of logical CPU cores. */
#define DEFAULT_NUM_NODES 1 /**< Default number of NUMA nodes. */
#define DEFAULT_THREAD_JOIN_TIMEOUT 600000 /**< Default number of milliseconds to wait for a thread to join. Negative values mean indefinite wait. */
#define DEFAULT_NUM_PHYSICAL_PACKAGES 1 /**< Default number of physical packages. */
#define DEFAULT_NUM_PHYSICAL_CPUS 1 /**< Default number of physical CPU cores. */
#define DEFAULT_NUM_LOGICAL_CPUS 1 /**< Default number of logical CPU cores. */
#define DEFAULT_NUM_L1_CACHES 0 /**< Default number of L1 caches. */
#define DEFAULT_NUM_L2_CACHES 0 /**< Default number of L2 caches. */
#define DEFAULT_NUM_L3_CACHES 0 /**< Default number of L3 caches. */
#define DEFAULT_NUM_L4_CACHES 0 /**< Default number of L4 caches. */
#define DEFAULT_THREAD_JOIN_TIMEOUT 600000 /**< Default number of milliseconds to wait for a thread to join. Negative values mean indefinite wait. TODO: remove this */
#define MIN_ELAPSED_TICKS 10000 /**< If any routine measured fewer than this number of ticks its results should be viewed with suspicion. This is because the latency of the timer itself will matter. */
@ -202,9 +209,12 @@ namespace xmem {
#ifdef ARCH_INTEL_X86_64 //DO NOT COMMENT THIS OUT
//#define USE_CHUNK_64b /**< RECOMMENDED DISABLED. Use 64-bit chunks. */
//#define USE_CHUNK_128b /**< RECOMMENDED DISABLED. Use 128-bit chunks. x86-64 processors with SSE only. TODO: Not yet implemented. */
#ifdef ARCH_INTEL_X86_64_AVX
#ifdef ARCH_INTEL_X86_64_AVX //TODO: Is this supposed to be AVX2 instead of AVX?
#define USE_CHUNK_256b /**< RECOMMENDED ENABLED. Use 256-bit chunks. x86-64 processors only with AVX ISA extensions. */
#endif
#ifdef ARCH_INTEL_X86_64_AVX512
//#define USE_CHUNK_512b /**< TODO. Not yet implemented. */
#endif
#endif //DO NOT COMMENT THIS OUT
//Throughput benchmark access patterns
@ -447,6 +457,12 @@ namespace xmem {
* @returns False if the default value has to be used because the appropriate values could not be queried successfully from the OS.
*/
bool config_page_size();
/**
* @brief Sets up global variables based on system information at runtime.
* @returns 0 on success.
*/
int32_t query_sys_info();
};
};

Просмотреть файл

@ -39,7 +39,7 @@
#include <BenchmarkManager.h>
#ifdef _WIN32
//FIXME
//FIXME. Clean this up.
#include <win/win_common.h>
#include <win/win_common_third_party.h>
#endif
@ -60,13 +60,16 @@ int main(int argc, char* argv[]) {
common::print_compile_time_options();
#endif
//FIXME. Clean this up.
#ifdef _WIN32
//FIXME
if (common::win::third_party::query_sys_info()) {
#endif
#ifdef __unix__
if (common::query_sys_info()) {
#endif
std::cerr << "ERROR occurred while querying CPU information." << std::endl;
return -1;
}
#endif
config::Configurator config;
bool configSuccess = !config.configureFromInput(argc, argv);