зеркало из https://github.com/microsoft/X-Mem.git
Changes to ARM build. It can now do 64-bit and 128-bit operations. Still need to do a Windows ARM build and validate generated assembly code.
This commit is contained in:
Родитель
ff9f4bcc86
Коммит
7cad48d089
2
Doxyfile
2
Doxyfile
|
@ -38,7 +38,7 @@ PROJECT_NAME = X-Mem
|
|||
# could be handy for archiving the generated documentation or if some version
|
||||
# control system is used.
|
||||
|
||||
PROJECT_NUMBER = 2.1.15
|
||||
PROJECT_NUMBER = 2.1.16
|
||||
|
||||
# Using the PROJECT_BRIEF tag one can provide an optional one line description
|
||||
# for a project that appears at the top of each page and should give viewer a
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
README
|
||||
------------------------------------------------------------------------------------------------------------
|
||||
|
||||
X-Mem: Extensible Memory Benchmarking Tool v2.1.15
|
||||
X-Mem: Extensible Memory Benchmarking Tool v2.1.16
|
||||
------------------------------------------------------------------------------------------------------------
|
||||
|
||||
The flexible open-source research tool for characterizing memory hierarchy throughput, latency, and power.
|
||||
|
@ -10,7 +10,7 @@ Originally authored by Mark Gottscho (Email: <mgottscho@ucla.edu>) as a Summer 2
|
|||
|
||||
This project is under active development. Stay tuned for more updates.
|
||||
|
||||
PROJECT REVISION DATE: April 23, 2015.
|
||||
PROJECT REVISION DATE: April 24, 2015.
|
||||
|
||||
------------------------------------------------------------------------------------------------------------
|
||||
LICENSE
|
||||
|
|
|
@ -9,7 +9,7 @@ env = Environment(CXX = "arm-linux-gnueabihf-g++-4.8")
|
|||
# Customize build settings
|
||||
|
||||
# LINUX
|
||||
env.Append(CPPFLAGS = '-Wall -Wno-unused-but-set-variable -Wno-unused-variable -g -O3 -std=c++11 -fabi-version=6')
|
||||
env.Append(CPPFLAGS = '-Wall -Wno-unused-but-set-variable -Wno-unused-variable -g -O3 -std=c++11 -fabi-version=6 -mfloat-abi=hard -mfpu=vfpv3 -mfpu=neon')
|
||||
env.Append(CPPPATH = ['src/include'])
|
||||
env.Append(CPPPATH = ['src/include/ext/DelayInjectedLoadedLatencyBenchmark']) # Extension: Delay-injected loaded latency benchmark
|
||||
env.Append(CPPPATH = ['src/include/ext/StreamBenchmark']) # Extension: Stream benchmark
|
||||
|
|
Двоичные данные
X-Mem_Developer_Manual.pdf
Двоичные данные
X-Mem_Developer_Manual.pdf
Двоичный файл не отображается.
Двоичные данные
bin/xmem-linux-arm
Двоичные данные
bin/xmem-linux-arm
Двоичный файл не отображается.
Двоичные данные
bin/xmem-linux-x64
Двоичные данные
bin/xmem-linux-x64
Двоичный файл не отображается.
Двоичные данные
bin/xmem-linux-x64_avx
Двоичные данные
bin/xmem-linux-x64_avx
Двоичный файл не отображается.
Двоичные данные
bin/xmem-linux-x86
Двоичные данные
bin/xmem-linux-x86
Двоичный файл не отображается.
|
@ -1,7 +1,7 @@
|
|||
README
|
||||
------------------------------------------------------------------------------------------------------------
|
||||
|
||||
X-Mem: Extensible Memory Benchmarking Tool v2.1.15
|
||||
X-Mem: Extensible Memory Benchmarking Tool v2.1.16
|
||||
------------------------------------------------------------------------------------------------------------
|
||||
|
||||
The flexible open-source research tool for characterizing memory hierarchy throughput, latency, and power.
|
||||
|
@ -10,7 +10,7 @@ Originally authored by Mark Gottscho (Email: <mgottscho@ucla.edu>) as a Summer 2
|
|||
|
||||
This project is under active development. Stay tuned for more updates.
|
||||
|
||||
PROJECT REVISION DATE: April 23, 2015.
|
||||
PROJECT REVISION DATE: April 24, 2015.
|
||||
|
||||
------------------------------------------------------------------------------------------------------------
|
||||
LICENSE
|
||||
|
|
|
@ -51,6 +51,10 @@
|
|||
#include <smmintrin.h>
|
||||
#endif
|
||||
|
||||
#if defined(ARCH_ARM) && defined(ARCH_ARM_NEON)
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
using namespace xmem;
|
||||
|
||||
#if defined(__gnu_linux__) && defined(ARCH_INTEL_X86_64) && (defined(HAS_WORD_128) || defined(HAS_WORD_256))
|
||||
|
@ -65,7 +69,12 @@ using namespace xmem;
|
|||
#define my_64b_extractLSB_256b(w) _mm256_extract_epi64(w, 0) //AVX intrinsic, corresponds to ??? instruction. Header: immintrin.h
|
||||
#endif
|
||||
|
||||
//TODO: ARM intrinsics?
|
||||
#if defined(ARCH_ARM) && defined(ARCH_ARM_NEON)
|
||||
#define my_64b_set_128b_word(a, b) vcombine_u64(a, b)
|
||||
|
||||
#define my_32b_extractLSB_128b(w) vget_low_u32(w) //NEON intrinsic, corresponds to "vmov" instruction. Header: arm_neon.h
|
||||
#define my_64b_extractLSB_128b(w) vget_low_u64(w) //NEON intrinsic, corresponds to "vmov" instruction. Header: arm_neon.h
|
||||
#endif
|
||||
|
||||
#if defined(_WIN32) && defined(ARCH_INTEL_X86_64)
|
||||
/* Hand-coded assembly functions for the 128-bit and 256-bit benchmark kernels on Windows x86-64 where applicable.
|
||||
|
|
|
@ -43,9 +43,13 @@
|
|||
#include <immintrin.h> //for Intel __m256i datatype
|
||||
#endif
|
||||
|
||||
#if defined(__arm__) || defined(_M_ARM)
|
||||
#include <arm_neon.h> //For ARM uint64x2_t datatype
|
||||
#endif
|
||||
|
||||
namespace xmem {
|
||||
|
||||
#define VERSION "2.1.15"
|
||||
#define VERSION "2.1.16"
|
||||
|
||||
#if !defined(_WIN32) && !defined(__gnu_linux__)
|
||||
#error Neither Windows/GNULinux build environments were detected!
|
||||
|
@ -331,10 +335,8 @@ namespace xmem {
|
|||
extern double g_ns_per_tick;
|
||||
|
||||
//Typedef the platform specific stuff to word sizes to match 4 different chunk options
|
||||
//TODO: better way to detect 64-bit ARM other than NEON extensions?
|
||||
#if defined(ARCH_64BIT) || defined(ARCH_ARM_NEON)
|
||||
#define HAS_WORD_64
|
||||
#endif
|
||||
#define HAS_WORD_64 //For now, assume this is always available, even on native 32-bit architectures. The compiler will emulate 64-bit operations.
|
||||
|
||||
#if defined(ARCH_INTEL_AVX) || defined(ARCH_ARM_NEON)
|
||||
#define HAS_WORD_128
|
||||
#endif
|
||||
|
@ -350,8 +352,8 @@ namespace xmem {
|
|||
#ifdef ARCH_INTEL
|
||||
typedef __m128i Word128_t;
|
||||
#endif
|
||||
#ifdef ARCH_ARM
|
||||
#error TODO: Implement for ARM
|
||||
#if defined(ARCH_ARM) && defined(ARCH_ARM_NEON)
|
||||
typedef uint64x2_t Word128_t;
|
||||
#endif
|
||||
#endif
|
||||
#ifdef HAS_WORD_256
|
||||
|
|
Загрузка…
Ссылка в новой задаче