This commit is contained in:
Mark Gottscho 2015-04-23 15:23:59 -07:00
Родитель 5d537ae69e
Коммит 6a67eb4c98
21 изменённых файлов: 74 добавлений и 78 удалений

Просмотреть файл

@ -23,11 +23,9 @@ env.Append(CPPPATH = ['src/include/ext/StreamBenchmark']) # Extension: Stream be
# List all C++ source files
sources = [
Glob('src/win/x86_64/*.asm'), # Have some hand-coded assembler files for Windows x86-64 only
Glob('src/*.cpp'),
Glob('src/ext/*/*.cpp'), # All extensions
Glob('src/win/*.cpp'),
Glob('src/x86_64/*.cpp')
]
defaultBuild = env.Program(target = 'xmem', source = sources)

Просмотреть файл

@ -27,7 +27,6 @@ sources = [
Glob('src/*.cpp'),
Glob('src/ext/*/*.cpp'), # All extensions
Glob('src/win/*.cpp'),
Glob('src/x86_64/*.cpp')
]
defaultBuild = env.Program(target = 'xmem', source = sources)

Просмотреть файл

@ -23,11 +23,9 @@ env.Append(CPPPATH = ['src/include/ext/StreamBenchmark']) # Extension: Stream be
# List all C++ source files
sources = [
#Glob('src/win/x86_64/*.asm'), # Have some hand-coded assembler files for Windows x86-64 only
Glob('src/*.cpp'),
Glob('src/ext/*/*.cpp'), # All extensions
Glob('src/win/*.cpp'),
#Glob('src/x86_64/*.cpp')
]
defaultBuild = env.Program(target = 'xmem', source = sources)

Двоичные данные
bin/xmem-win-x64.exe

Двоичный файл не отображается.

Двоичные данные
bin/xmem-win-x64_avx.exe

Двоичный файл не отображается.

Двоичные данные
bin/xmem-win-x86.exe

Двоичный файл не отображается.

Просмотреть файл

@ -44,13 +44,15 @@
#include <random>
#include <algorithm>
#include <time.h>
#if defined(__gnu_linux__) && defined(ARCH_INTEL_X86_64) && (defined(HAS_WORD_128) || defined(HAS_WORD_256))
#if defined(ARCH_INTEL_X86_64) && (defined(HAS_WORD_128) || defined(HAS_WORD_256))
//Intel intrinsics
#include <emmintrin.h>
#include <immintrin.h>
#include <smmintrin.h>
#endif
using namespace xmem;
#if defined(__gnu_linux__) && defined(ARCH_INTEL_X86_64) && (defined(HAS_WORD_128) || defined(HAS_WORD_256))
#define my_32b_set_128b_word(a, b, c, d) _mm_set_epi32(a, b, c, d) //SSE2 intrinsic, corresponds to ??? instruction. Header: emmintrin.h
#define my_32b_set_256b_word(a, b, c, d, e, f, g, h) _mm256_set_epi32(a, b, c, d, e, f, g, h) //AVX intrinsic, corresponds to ??? instruction. Header: immintrin.h
@ -78,35 +80,34 @@
#ifdef HAS_WORD_128
//128-bit
extern "C" int win_x86_64_asm_forwSequentialRead_Word128(Word128_t* first_word, Word128_t* last_word);
extern "C" int win_x86_64_asm_revSequentialRead_Word128(Word128_t* last_word, Word128_t* first_word);
extern "C" int win_x86_64_asm_forwSequentialWrite_Word128(Word128_t* first_word, Word128_t* last_word);
extern "C" int win_x86_64_asm_revSequentialWrite_Word128(Word128_t* last_word, Word128_t* first_word);
extern "C" int32_t win_x86_64_asm_forwSequentialRead_Word128(Word128_t* first_word, Word128_t* last_word);
extern "C" int32_t win_x86_64_asm_revSequentialRead_Word128(Word128_t* last_word, Word128_t* first_word);
extern "C" int32_t win_x86_64_asm_forwSequentialWrite_Word128(Word128_t* first_word, Word128_t* last_word);
extern "C" int32_t win_x86_64_asm_revSequentialWrite_Word128(Word128_t* last_word, Word128_t* first_word);
#endif
#ifdef HAS_WORD_256
//256-bit
extern "C" int win_x86_64_asm_forwSequentialRead_Word256(Word256_t* first_word, Word256_t* last_word);
extern "C" int win_x86_64_asm_revSequentialRead_Word256(Word256_t* last_word, Word256_t* first_word);
extern "C" int win_x86_64_asm_forwSequentialWrite_Word256(Word256_t* first_word, Word256_t* last_word);
extern "C" int win_x86_64_asm_revSequentialWrite_Word256(Word256_t* last_word, Word256_t* first_word);
extern "C" int32_t win_x86_64_asm_forwSequentialRead_Word256(Word256_t* first_word, Word256_t* last_word);
extern "C" int32_t win_x86_64_asm_revSequentialRead_Word256(Word256_t* last_word, Word256_t* first_word);
extern "C" int32_t win_x86_64_asm_forwSequentialWrite_Word256(Word256_t* first_word, Word256_t* last_word);
extern "C" int32_t win_x86_64_asm_revSequentialWrite_Word256(Word256_t* last_word, Word256_t* first_word);
#endif
//Dummies
#ifdef HAS_WORD_128
//128-bit
extern "C" int win_x86_64_asm_dummy_forwSequentialLoop_Word128(Word128_t* first_word, Word128_t* last_word);
extern "C" int win_x86_64_asm_dummy_revSequentialLoop_Word128(Word128_t* first_word, Word128_t* last_word);
extern "C" int32_t win_x86_64_asm_dummy_forwSequentialLoop_Word128(Word128_t* first_word, Word128_t* last_word);
extern "C" int32_t win_x86_64_asm_dummy_revSequentialLoop_Word128(Word128_t* first_word, Word128_t* last_word);
#endif
#ifdef HAS_WORD_256
//256-bit
extern "C" int win_x86_64_asm_dummy_forwSequentialLoop_Word256(Word256_t* first_word, Word256_t* last_word);
extern "C" int win_x86_64_asm_dummy_revSequentialLoop_Word256(Word256_t* first_word, Word256_t* last_word);
extern "C" int32_t win_x86_64_asm_dummy_forwSequentialLoop_Word256(Word256_t* first_word, Word256_t* last_word);
extern "C" int32_t win_x86_64_asm_dummy_revSequentialLoop_Word256(Word256_t* first_word, Word256_t* last_word);
#endif
#endif
using namespace xmem;
bool xmem::determineSequentialKernel(rw_mode_t rw_mode, chunk_size_t chunk_size, int32_t stride_size, SequentialFunction* kernel_function, SequentialFunction* dummy_kernel_function) {
switch (rw_mode) {

Просмотреть файл

@ -147,20 +147,20 @@ void xmem::print_compile_time_options() {
#ifdef ARCH_INTEL_X86_64
std::cout << "ARCH_INTEL_X86_64" << std::endl;
#endif
#ifdef ARCH_INTEL_X86_64_SSE
std::cout << "ARCH_INTEL_X86_64_SSE" << std::endl;
#ifdef ARCH_INTEL_SSE
std::cout << "ARCH_INTEL_SSE" << std::endl;
#endif
#ifdef ARCH_INTEL_X86_64_SSE2
std::cout << "ARCH_INTEL_X86_64_SSE2" << std::endl;
#ifdef ARCH_INTEL_SSE2
std::cout << "ARCH_INTEL_SSE2" << std::endl;
#endif
#ifdef ARCH_INTEL_X86_64_SSE3
std::cout << "ARCH_INTEL_X86_64_SSE3" << std::endl;
#ifdef ARCH_INTEL_SSE3
std::cout << "ARCH_INTEL_SSE3" << std::endl;
#endif
#ifdef ARCH_INTEL_X86_64_AVX
std::cout << "ARCH_INTEL_X86_64_AVX" << std::endl;
#ifdef ARCH_INTEL_AVX
std::cout << "ARCH_INTEL_AVX" << std::endl;
#endif
#ifdef ARCH_INTEL_X86_64_AVX2
std::cout << "ARCH_INTEL_X86_64_AVX2" << std::endl;
#ifdef ARCH_INTEL_AVX2
std::cout << "ARCH_INTEL_AVX2" << std::endl;
#endif
#ifdef ARCH_AMD64
std::cout << "ARCH_AMD64" << std::endl;

Просмотреть файл

@ -36,11 +36,11 @@
#ifdef _WIN32
#include <windows.h>
#include <intrin.h>
#endif
#if defined(__gnu_linux__) && (defined(_M_IX64) || defined(__x86_64__))
#include <immintrin.h>
#if defined(__x86_64__) || defined(_M_X64)
#include <emmintrin.h> //for Intel __m128i datatype
#include <immintrin.h> //for Intel __m256i datatype
#endif
namespace xmem {
@ -66,25 +66,25 @@ namespace xmem {
#define HAS_NUMA
#endif
#ifdef _M_IX86_FP //Intel x86-64 SSE2 extensions
#ifdef _M_IX86_FP //Intel SSE/SSE2 extensions
#define ARCH_INTEL
#if _M_IX86_FP == 1
#define ARCH_INTEL_X86_64_SSE
#define ARCH_INTEL_SSE
#endif
#if _M_IX86_FP == 2
#define ARCH_INTEL_X86_64_SSE
#define ARCH_INTEL_X86_64_SSE2
#define ARCH_INTEL_SSE
#define ARCH_INTEL_SSE2
#endif
#endif
#ifdef __AVX__ //Intel x86-64 AVX extensions
#ifdef __AVX__ //Intel AVX extensions
#define ARCH_INTEL
#define ARCH_INTEL_X86_64_AVX
#define ARCH_INTEL_AVX
#endif
#ifdef __AVX2__ //Intel x86-64 AVX2 extensions
#ifdef __AVX2__ //Intel AVX2 extensions
#define ARCH_INTEL
#define ARCH_INTEL_X86_64_AVX2
#define ARCH_INTEL_AVX2
#endif
#ifdef _M_AMD64 //AMD64
@ -125,29 +125,29 @@ namespace xmem {
#define HAS_NUMA
#endif
#ifdef __SSE__ //Intel x86-64 SSE extensions
#ifdef __SSE__ //Intel SSE extensions
#define ARCH_INTEL
#define ARCH_INTEL_X86_64_SSE
#define ARCH_INTEL_SSE
#endif
#ifdef __SSE2__ //Intel x86-64 SSE2 extensions
#ifdef __SSE2__ //Intel SSE2 extensions
#define ARCH_INTEL
#define ARCH_INTEL_X86_64_SSE2
#define ARCH_INTEL_SSE2
#endif
#ifdef __SSE3__ //Intel x86-64 SSE3 extensions
#ifdef __SSE3__ //Intel SSE3 extensions
#define ARCH_INTEL
#define ARCH_INTEL_X86_64_SSE3
#define ARCH_INTEL_SSE3
#endif
#ifdef __AVX__ //Intel x86-64 AVX extensions
#ifdef __AVX__ //Intel AVX extensions
#define ARCH_INTEL
#define ARCH_INTEL_X86_64_AVX
#define ARCH_INTEL_AVX
#endif
#ifdef __AVX2__ //Intel x86-64 AVX2 extensions
#ifdef __AVX2__ //Intel AVX2 extensions
#define ARCH_INTEL
#define ARCH_INTEL_X86_64_AVX2
#define ARCH_INTEL_AVX2
#endif
#ifdef __amd64__ //AMD64
@ -333,10 +333,10 @@ namespace xmem {
#if defined(ARCH_64BIT) || defined(ARCH_ARM_NEON)
#define HAS_WORD_64
#endif
#if defined(ARCH_INTEL_X86_64_AVX) || defined(ARCH_ARM_NEON)
#if defined(ARCH_INTEL_AVX) || defined(ARCH_ARM_NEON)
#define HAS_WORD_128
#endif
#ifdef ARCH_INTEL_X86_64_AVX
#ifdef ARCH_INTEL_AVX
#define HAS_WORD_256
#endif
@ -349,11 +349,11 @@ namespace xmem {
typedef __m128i Word128_t;
#endif
#ifdef ARCH_ARM
#error TODO: Implement ARM NEON support for 128-bit memory operations.
#error TODO: Implement for ARM
#endif
#endif
#ifdef HAS_WORD_256
#ifdef ARCH_INTEL_X86_64_AVX
#ifdef ARCH_INTEL
typedef __m256i Word256_t; //Not possible on current ARM systems.
#endif
#ifdef ARCH_ARM

Просмотреть файл

@ -23,7 +23,7 @@
.code
win_asm_dummy_forwSequentialLoop_Word128 proc
win_x86_64_asm_dummy_forwSequentialLoop_Word128 proc
; Arguments:
; rcx is address of the first 128-bit word in the array
@ -49,5 +49,5 @@ done:
xor eax,eax ; return 0
ret
win_asm_dummy_forwSequentialLoop_Word128 endp
win_x86_64_asm_dummy_forwSequentialLoop_Word128 endp
end

Просмотреть файл

@ -23,7 +23,7 @@
.code
win_asm_dummy_forwSequentialLoop_Word256 proc
win_x86_64_asm_dummy_forwSequentialLoop_Word256 proc
; Arguments:
; rcx is address of the first 256-bit word in the array
@ -49,5 +49,5 @@ done:
xor eax,eax ; return 0
ret
win_asm_dummy_forwSequentialLoop_Word256 endp
win_x86_64_asm_dummy_forwSequentialLoop_Word256 endp
end

Просмотреть файл

@ -23,7 +23,7 @@
.code
win_asm_dummy_revSequentialLoop_Word128 proc
win_x86_64_asm_dummy_revSequentialLoop_Word128 proc
; Arguments:
; rcx is address of the last 128-bit word in the array
@ -49,5 +49,5 @@ done:
xor eax,eax ; return 0
ret
win_asm_dummy_revSequentialLoop_Word128 endp
win_x86_64_asm_dummy_revSequentialLoop_Word128 endp
end

Просмотреть файл

@ -21,7 +21,7 @@
; SOFTWARE.
.code
win_asm_dummy_revSequentialLoop_Word256 proc
win_x86_64_asm_dummy_revSequentialLoop_Word256 proc
; Arguments:
; rcx is address of the last 256-bit word in the array
@ -47,5 +47,5 @@ done:
xor eax,eax ; return 0
ret
win_asm_dummy_revSequentialLoop_Word256 endp
win_x86_64_asm_dummy_revSequentialLoop_Word256 endp
end

Просмотреть файл

@ -21,7 +21,7 @@
; SOFTWARE.
.code
win_asm_forwSequentialRead_Word128 proc
win_x86_64_asm_forwSequentialRead_Word128 proc
; Arguments:
; rcx is address of the first 128-bit word in the array
@ -303,5 +303,5 @@ done:
xor eax,eax ; return 0
ret
win_asm_forwSequentialRead_Word128 endp
win_x86_64_asm_forwSequentialRead_Word128 endp
end

Просмотреть файл

@ -21,7 +21,7 @@
; SOFTWARE.
.code
win_asm_forwSequentialRead_Word256 proc
win_x86_64_asm_forwSequentialRead_Word256 proc
; Arguments:
; rcx is address of the first 256-bit word in the array
@ -174,5 +174,5 @@ done:
xor eax,eax ; return 0
ret
win_asm_forwSequentialRead_Word256 endp
win_x86_64_asm_forwSequentialRead_Word256 endp
end

Просмотреть файл

@ -21,7 +21,7 @@
; SOFTWARE.
.code
win_asm_forwSequentialWrite_Word128 proc
win_x86_64_asm_forwSequentialWrite_Word128 proc
; Arguments:
; rcx is address of the first 128-bit word in the array
@ -312,5 +312,5 @@ done:
xor eax,eax ; return 0
ret
win_asm_forwSequentialWrite_Word128 endp
win_x86_64_asm_forwSequentialWrite_Word128 endp
end

Просмотреть файл

@ -21,7 +21,7 @@
; SOFTWARE.
.code
win_asm_forwSequentialWrite_Word256 proc
win_x86_64_asm_forwSequentialWrite_Word256 proc
; Arguments:
; rcx is address of the first 256-bit word in the array
@ -189,5 +189,5 @@ done:
xor eax,eax ; return 0
ret
win_asm_forwSequentialWrite_Word256 endp
win_x86_64_asm_forwSequentialWrite_Word256 endp
end

Просмотреть файл

@ -21,7 +21,7 @@
; SOFTWARE.
.code
win_asm_revSequentialRead_Word128 proc
win_x86_64_asm_revSequentialRead_Word128 proc
; Arguments:
; rcx is address of the last 128-bit word in the array
@ -303,5 +303,5 @@ done:
xor eax,eax ; return 0
ret
win_asm_revSequentialRead_Word128 endp
win_x86_64_asm_revSequentialRead_Word128 endp
end

Просмотреть файл

@ -21,7 +21,7 @@
; SOFTWARE.
.code
win_asm_revSequentialRead_Word256 proc
win_x86_64_asm_revSequentialRead_Word256 proc
; Arguments:
; rcx is address of the last 256-bit word in the array
@ -176,5 +176,5 @@ done:
xor eax,eax ; return 0
ret
win_asm_revSequentialRead_Word256 endp
win_x86_64_asm_revSequentialRead_Word256 endp
end

Просмотреть файл

@ -21,7 +21,7 @@
; SOFTWARE.
.code
win_asm_revSequentialWrite_Word128 proc
win_x86_64_asm_revSequentialWrite_Word128 proc
; Arguments:
; rcx is address of the last 128-bit word in the array
@ -312,5 +312,5 @@ done:
xor eax,eax ; return 0
ret
win_asm_revSequentialWrite_Word128 endp
win_x86_64_asm_revSequentialWrite_Word128 endp
end

Просмотреть файл

@ -21,7 +21,7 @@
; SOFTWARE.
.code
win_asm_revSequentialWrite_Word256 proc
win_x86_64_asm_revSequentialWrite_Word256 proc
; Arguments:
; rcx is address of the last 256-bit word in the array
@ -189,5 +189,5 @@ done:
xor eax,eax ; return 0
ret
win_asm_revSequentialWrite_Word256 endp
win_x86_64_asm_revSequentialWrite_Word256 endp
end