Changes to ARM build. It can now do 64-bit and 128-bit operations. Still need to do a Windows ARM build and validate generated assembly code.

This commit is contained in:
Mark Gottscho 2015-04-24 16:18:54 -07:00
Родитель ff9f4bcc86
Коммит 7cad48d089
11 изменённых файлов: 25 добавлений и 14 удалений

Просмотреть файл

@ -38,7 +38,7 @@ PROJECT_NAME = X-Mem
# could be handy for archiving the generated documentation or if some version
# control system is used.
PROJECT_NUMBER = 2.1.15
PROJECT_NUMBER = 2.1.16
# Using the PROJECT_BRIEF tag one can provide an optional one line description
# for a project that appears at the top of each page and should give viewer a

Просмотреть файл

@ -1,7 +1,7 @@
README
------------------------------------------------------------------------------------------------------------
X-Mem: Extensible Memory Benchmarking Tool v2.1.15
X-Mem: Extensible Memory Benchmarking Tool v2.1.16
------------------------------------------------------------------------------------------------------------
The flexible open-source research tool for characterizing memory hierarchy throughput, latency, and power.
@ -10,7 +10,7 @@ Originally authored by Mark Gottscho (Email: <mgottscho@ucla.edu>) as a Summer 2
This project is under active development. Stay tuned for more updates.
PROJECT REVISION DATE: April 23, 2015.
PROJECT REVISION DATE: April 24, 2015.
------------------------------------------------------------------------------------------------------------
LICENSE

Просмотреть файл

@ -9,7 +9,7 @@ env = Environment(CXX = "arm-linux-gnueabihf-g++-4.8")
# Customize build settings
# LINUX
env.Append(CPPFLAGS = '-Wall -Wno-unused-but-set-variable -Wno-unused-variable -g -O3 -std=c++11 -fabi-version=6')
env.Append(CPPFLAGS = '-Wall -Wno-unused-but-set-variable -Wno-unused-variable -g -O3 -std=c++11 -fabi-version=6 -mfloat-abi=hard -mfpu=vfpv3 -mfpu=neon')
env.Append(CPPPATH = ['src/include'])
env.Append(CPPPATH = ['src/include/ext/DelayInjectedLoadedLatencyBenchmark']) # Extension: Delay-injected loaded latency benchmark
env.Append(CPPPATH = ['src/include/ext/StreamBenchmark']) # Extension: Stream benchmark

Двоичные данные
X-Mem_Developer_Manual.pdf

Двоичный файл не отображается.

Двоичные данные
bin/xmem-linux-arm

Двоичный файл не отображается.

Двоичные данные
bin/xmem-linux-x64

Двоичный файл не отображается.

Двоичные данные
bin/xmem-linux-x64_avx

Двоичный файл не отображается.

Двоичные данные
bin/xmem-linux-x86

Двоичный файл не отображается.

Просмотреть файл

@ -1,7 +1,7 @@
README
------------------------------------------------------------------------------------------------------------
X-Mem: Extensible Memory Benchmarking Tool v2.1.15
X-Mem: Extensible Memory Benchmarking Tool v2.1.16
------------------------------------------------------------------------------------------------------------
The flexible open-source research tool for characterizing memory hierarchy throughput, latency, and power.
@ -10,7 +10,7 @@ Originally authored by Mark Gottscho (Email: <mgottscho@ucla.edu>) as a Summer 2
This project is under active development. Stay tuned for more updates.
PROJECT REVISION DATE: April 23, 2015.
PROJECT REVISION DATE: April 24, 2015.
------------------------------------------------------------------------------------------------------------
LICENSE

Просмотреть файл

@ -51,6 +51,10 @@
#include <smmintrin.h>
#endif
#if defined(ARCH_ARM) && defined(ARCH_ARM_NEON)
#include <arm_neon.h>
#endif
using namespace xmem;
#if defined(__gnu_linux__) && defined(ARCH_INTEL_X86_64) && (defined(HAS_WORD_128) || defined(HAS_WORD_256))
@ -65,7 +69,12 @@ using namespace xmem;
#define my_64b_extractLSB_256b(w) _mm256_extract_epi64(w, 0) //AVX intrinsic, corresponds to ??? instruction. Header: immintrin.h
#endif
//TODO: ARM intrinsics?
#if defined(ARCH_ARM) && defined(ARCH_ARM_NEON)
#define my_64b_set_128b_word(a, b) vcombine_u64(a, b)
#define my_32b_extractLSB_128b(w) vget_low_u32(w) //NEON intrinsic, corresponds to "vmov" instruction. Header: arm_neon.h
#define my_64b_extractLSB_128b(w) vget_low_u64(w) //NEON intrinsic, corresponds to "vmov" instruction. Header: arm_neon.h
#endif
#if defined(_WIN32) && defined(ARCH_INTEL_X86_64)
/* Hand-coded assembly functions for the 128-bit and 256-bit benchmark kernels on Windows x86-64 where applicable.

Просмотреть файл

@ -43,9 +43,13 @@
#include <immintrin.h> //for Intel __m256i datatype
#endif
#if defined(__arm__) || defined(_M_ARM)
#include <arm_neon.h> //For ARM uint64x2_t datatype
#endif
namespace xmem {
#define VERSION "2.1.15"
#define VERSION "2.1.16"
#if !defined(_WIN32) && !defined(__gnu_linux__)
#error Neither Windows/GNULinux build environments were detected!
@ -331,10 +335,8 @@ namespace xmem {
extern double g_ns_per_tick;
//Typedef the platform specific stuff to word sizes to match 4 different chunk options
//TODO: better way to detect 64-bit ARM other than NEON extensions?
#if defined(ARCH_64BIT) || defined(ARCH_ARM_NEON)
#define HAS_WORD_64
#endif
#define HAS_WORD_64 //For now, assume this is always available, even on native 32-bit architectures. The compiler will emulate 64-bit operations.
#if defined(ARCH_INTEL_AVX) || defined(ARCH_ARM_NEON)
#define HAS_WORD_128
#endif
@ -350,8 +352,8 @@ namespace xmem {
#ifdef ARCH_INTEL
typedef __m128i Word128_t;
#endif
#ifdef ARCH_ARM
#error TODO: Implement for ARM
#if defined(ARCH_ARM) && defined(ARCH_ARM_NEON)
typedef uint64x2_t Word128_t;
#endif
#endif
#ifdef HAS_WORD_256