зеркало из https://github.com/microsoft/X-Mem.git
quick windows fixes
This commit is contained in:
Родитель
5d537ae69e
Коммит
6a67eb4c98
|
@ -23,11 +23,9 @@ env.Append(CPPPATH = ['src/include/ext/StreamBenchmark']) # Extension: Stream be
|
|||
|
||||
# List all C++ source files
|
||||
sources = [
|
||||
Glob('src/win/x86_64/*.asm'), # Have some hand-coded assembler files for Windows x86-64 only
|
||||
Glob('src/*.cpp'),
|
||||
Glob('src/ext/*/*.cpp'), # All extensions
|
||||
Glob('src/win/*.cpp'),
|
||||
Glob('src/x86_64/*.cpp')
|
||||
]
|
||||
|
||||
defaultBuild = env.Program(target = 'xmem', source = sources)
|
||||
|
|
|
@ -27,7 +27,6 @@ sources = [
|
|||
Glob('src/*.cpp'),
|
||||
Glob('src/ext/*/*.cpp'), # All extensions
|
||||
Glob('src/win/*.cpp'),
|
||||
Glob('src/x86_64/*.cpp')
|
||||
]
|
||||
|
||||
defaultBuild = env.Program(target = 'xmem', source = sources)
|
||||
|
|
|
@ -23,11 +23,9 @@ env.Append(CPPPATH = ['src/include/ext/StreamBenchmark']) # Extension: Stream be
|
|||
|
||||
# List all C++ source files
|
||||
sources = [
|
||||
#Glob('src/win/x86_64/*.asm'), # Have some hand-coded assembler files for Windows x86-64 only
|
||||
Glob('src/*.cpp'),
|
||||
Glob('src/ext/*/*.cpp'), # All extensions
|
||||
Glob('src/win/*.cpp'),
|
||||
#Glob('src/x86_64/*.cpp')
|
||||
]
|
||||
|
||||
defaultBuild = env.Program(target = 'xmem', source = sources)
|
||||
|
|
Двоичные данные
bin/xmem-win-x64.exe
Двоичные данные
bin/xmem-win-x64.exe
Двоичный файл не отображается.
Двоичные данные
bin/xmem-win-x64_avx.exe
Двоичные данные
bin/xmem-win-x64_avx.exe
Двоичный файл не отображается.
Двоичные данные
bin/xmem-win-x86.exe
Двоичные данные
bin/xmem-win-x86.exe
Двоичный файл не отображается.
|
@ -44,13 +44,15 @@
|
|||
#include <random>
|
||||
#include <algorithm>
|
||||
#include <time.h>
|
||||
#if defined(__gnu_linux__) && defined(ARCH_INTEL_X86_64) && (defined(HAS_WORD_128) || defined(HAS_WORD_256))
|
||||
#if defined(ARCH_INTEL_X86_64) && (defined(HAS_WORD_128) || defined(HAS_WORD_256))
|
||||
//Intel intrinsics
|
||||
#include <emmintrin.h>
|
||||
#include <immintrin.h>
|
||||
#include <smmintrin.h>
|
||||
#endif
|
||||
|
||||
using namespace xmem;
|
||||
|
||||
#if defined(__gnu_linux__) && defined(ARCH_INTEL_X86_64) && (defined(HAS_WORD_128) || defined(HAS_WORD_256))
|
||||
#define my_32b_set_128b_word(a, b, c, d) _mm_set_epi32(a, b, c, d) //SSE2 intrinsic, corresponds to ??? instruction. Header: emmintrin.h
|
||||
#define my_32b_set_256b_word(a, b, c, d, e, f, g, h) _mm256_set_epi32(a, b, c, d, e, f, g, h) //AVX intrinsic, corresponds to ??? instruction. Header: immintrin.h
|
||||
|
@ -78,35 +80,34 @@
|
|||
|
||||
#ifdef HAS_WORD_128
|
||||
//128-bit
|
||||
extern "C" int win_x86_64_asm_forwSequentialRead_Word128(Word128_t* first_word, Word128_t* last_word);
|
||||
extern "C" int win_x86_64_asm_revSequentialRead_Word128(Word128_t* last_word, Word128_t* first_word);
|
||||
extern "C" int win_x86_64_asm_forwSequentialWrite_Word128(Word128_t* first_word, Word128_t* last_word);
|
||||
extern "C" int win_x86_64_asm_revSequentialWrite_Word128(Word128_t* last_word, Word128_t* first_word);
|
||||
extern "C" int32_t win_x86_64_asm_forwSequentialRead_Word128(Word128_t* first_word, Word128_t* last_word);
|
||||
extern "C" int32_t win_x86_64_asm_revSequentialRead_Word128(Word128_t* last_word, Word128_t* first_word);
|
||||
extern "C" int32_t win_x86_64_asm_forwSequentialWrite_Word128(Word128_t* first_word, Word128_t* last_word);
|
||||
extern "C" int32_t win_x86_64_asm_revSequentialWrite_Word128(Word128_t* last_word, Word128_t* first_word);
|
||||
#endif
|
||||
|
||||
#ifdef HAS_WORD_256
|
||||
//256-bit
|
||||
extern "C" int win_x86_64_asm_forwSequentialRead_Word256(Word256_t* first_word, Word256_t* last_word);
|
||||
extern "C" int win_x86_64_asm_revSequentialRead_Word256(Word256_t* last_word, Word256_t* first_word);
|
||||
extern "C" int win_x86_64_asm_forwSequentialWrite_Word256(Word256_t* first_word, Word256_t* last_word);
|
||||
extern "C" int win_x86_64_asm_revSequentialWrite_Word256(Word256_t* last_word, Word256_t* first_word);
|
||||
extern "C" int32_t win_x86_64_asm_forwSequentialRead_Word256(Word256_t* first_word, Word256_t* last_word);
|
||||
extern "C" int32_t win_x86_64_asm_revSequentialRead_Word256(Word256_t* last_word, Word256_t* first_word);
|
||||
extern "C" int32_t win_x86_64_asm_forwSequentialWrite_Word256(Word256_t* first_word, Word256_t* last_word);
|
||||
extern "C" int32_t win_x86_64_asm_revSequentialWrite_Word256(Word256_t* last_word, Word256_t* first_word);
|
||||
#endif
|
||||
|
||||
//Dummies
|
||||
#ifdef HAS_WORD_128
|
||||
//128-bit
|
||||
extern "C" int win_x86_64_asm_dummy_forwSequentialLoop_Word128(Word128_t* first_word, Word128_t* last_word);
|
||||
extern "C" int win_x86_64_asm_dummy_revSequentialLoop_Word128(Word128_t* first_word, Word128_t* last_word);
|
||||
extern "C" int32_t win_x86_64_asm_dummy_forwSequentialLoop_Word128(Word128_t* first_word, Word128_t* last_word);
|
||||
extern "C" int32_t win_x86_64_asm_dummy_revSequentialLoop_Word128(Word128_t* first_word, Word128_t* last_word);
|
||||
#endif
|
||||
|
||||
#ifdef HAS_WORD_256
|
||||
//256-bit
|
||||
extern "C" int win_x86_64_asm_dummy_forwSequentialLoop_Word256(Word256_t* first_word, Word256_t* last_word);
|
||||
extern "C" int win_x86_64_asm_dummy_revSequentialLoop_Word256(Word256_t* first_word, Word256_t* last_word);
|
||||
extern "C" int32_t win_x86_64_asm_dummy_forwSequentialLoop_Word256(Word256_t* first_word, Word256_t* last_word);
|
||||
extern "C" int32_t win_x86_64_asm_dummy_revSequentialLoop_Word256(Word256_t* first_word, Word256_t* last_word);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
using namespace xmem;
|
||||
|
||||
bool xmem::determineSequentialKernel(rw_mode_t rw_mode, chunk_size_t chunk_size, int32_t stride_size, SequentialFunction* kernel_function, SequentialFunction* dummy_kernel_function) {
|
||||
switch (rw_mode) {
|
||||
|
|
|
@ -147,20 +147,20 @@ void xmem::print_compile_time_options() {
|
|||
#ifdef ARCH_INTEL_X86_64
|
||||
std::cout << "ARCH_INTEL_X86_64" << std::endl;
|
||||
#endif
|
||||
#ifdef ARCH_INTEL_X86_64_SSE
|
||||
std::cout << "ARCH_INTEL_X86_64_SSE" << std::endl;
|
||||
#ifdef ARCH_INTEL_SSE
|
||||
std::cout << "ARCH_INTEL_SSE" << std::endl;
|
||||
#endif
|
||||
#ifdef ARCH_INTEL_X86_64_SSE2
|
||||
std::cout << "ARCH_INTEL_X86_64_SSE2" << std::endl;
|
||||
#ifdef ARCH_INTEL_SSE2
|
||||
std::cout << "ARCH_INTEL_SSE2" << std::endl;
|
||||
#endif
|
||||
#ifdef ARCH_INTEL_X86_64_SSE3
|
||||
std::cout << "ARCH_INTEL_X86_64_SSE3" << std::endl;
|
||||
#ifdef ARCH_INTEL_SSE3
|
||||
std::cout << "ARCH_INTEL_SSE3" << std::endl;
|
||||
#endif
|
||||
#ifdef ARCH_INTEL_X86_64_AVX
|
||||
std::cout << "ARCH_INTEL_X86_64_AVX" << std::endl;
|
||||
#ifdef ARCH_INTEL_AVX
|
||||
std::cout << "ARCH_INTEL_AVX" << std::endl;
|
||||
#endif
|
||||
#ifdef ARCH_INTEL_X86_64_AVX2
|
||||
std::cout << "ARCH_INTEL_X86_64_AVX2" << std::endl;
|
||||
#ifdef ARCH_INTEL_AVX2
|
||||
std::cout << "ARCH_INTEL_AVX2" << std::endl;
|
||||
#endif
|
||||
#ifdef ARCH_AMD64
|
||||
std::cout << "ARCH_AMD64" << std::endl;
|
||||
|
|
|
@ -36,11 +36,11 @@
|
|||
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
#if defined(__gnu_linux__) && (defined(_M_IX64) || defined(__x86_64__))
|
||||
#include <immintrin.h>
|
||||
#if defined(__x86_64__) || defined(_M_X64)
|
||||
#include <emmintrin.h> //for Intel __m128i datatype
|
||||
#include <immintrin.h> //for Intel __m256i datatype
|
||||
#endif
|
||||
|
||||
namespace xmem {
|
||||
|
@ -66,25 +66,25 @@ namespace xmem {
|
|||
#define HAS_NUMA
|
||||
#endif
|
||||
|
||||
#ifdef _M_IX86_FP //Intel x86-64 SSE2 extensions
|
||||
#ifdef _M_IX86_FP //Intel SSE/SSE2 extensions
|
||||
#define ARCH_INTEL
|
||||
#if _M_IX86_FP == 1
|
||||
#define ARCH_INTEL_X86_64_SSE
|
||||
#define ARCH_INTEL_SSE
|
||||
#endif
|
||||
#if _M_IX86_FP == 2
|
||||
#define ARCH_INTEL_X86_64_SSE
|
||||
#define ARCH_INTEL_X86_64_SSE2
|
||||
#define ARCH_INTEL_SSE
|
||||
#define ARCH_INTEL_SSE2
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef __AVX__ //Intel x86-64 AVX extensions
|
||||
#ifdef __AVX__ //Intel AVX extensions
|
||||
#define ARCH_INTEL
|
||||
#define ARCH_INTEL_X86_64_AVX
|
||||
#define ARCH_INTEL_AVX
|
||||
#endif
|
||||
|
||||
#ifdef __AVX2__ //Intel x86-64 AVX2 extensions
|
||||
#ifdef __AVX2__ //Intel AVX2 extensions
|
||||
#define ARCH_INTEL
|
||||
#define ARCH_INTEL_X86_64_AVX2
|
||||
#define ARCH_INTEL_AVX2
|
||||
#endif
|
||||
|
||||
#ifdef _M_AMD64 //AMD64
|
||||
|
@ -125,29 +125,29 @@ namespace xmem {
|
|||
#define HAS_NUMA
|
||||
#endif
|
||||
|
||||
#ifdef __SSE__ //Intel x86-64 SSE extensions
|
||||
#ifdef __SSE__ //Intel SSE extensions
|
||||
#define ARCH_INTEL
|
||||
#define ARCH_INTEL_X86_64_SSE
|
||||
#define ARCH_INTEL_SSE
|
||||
#endif
|
||||
|
||||
#ifdef __SSE2__ //Intel x86-64 SSE2 extensions
|
||||
#ifdef __SSE2__ //Intel SSE2 extensions
|
||||
#define ARCH_INTEL
|
||||
#define ARCH_INTEL_X86_64_SSE2
|
||||
#define ARCH_INTEL_SSE2
|
||||
#endif
|
||||
|
||||
#ifdef __SSE3__ //Intel x86-64 SSE3 extensions
|
||||
#ifdef __SSE3__ //Intel SSE3 extensions
|
||||
#define ARCH_INTEL
|
||||
#define ARCH_INTEL_X86_64_SSE3
|
||||
#define ARCH_INTEL_SSE3
|
||||
#endif
|
||||
|
||||
#ifdef __AVX__ //Intel x86-64 AVX extensions
|
||||
#ifdef __AVX__ //Intel AVX extensions
|
||||
#define ARCH_INTEL
|
||||
#define ARCH_INTEL_X86_64_AVX
|
||||
#define ARCH_INTEL_AVX
|
||||
#endif
|
||||
|
||||
#ifdef __AVX2__ //Intel x86-64 AVX2 extensions
|
||||
#ifdef __AVX2__ //Intel AVX2 extensions
|
||||
#define ARCH_INTEL
|
||||
#define ARCH_INTEL_X86_64_AVX2
|
||||
#define ARCH_INTEL_AVX2
|
||||
#endif
|
||||
|
||||
#ifdef __amd64__ //AMD64
|
||||
|
@ -333,10 +333,10 @@ namespace xmem {
|
|||
#if defined(ARCH_64BIT) || defined(ARCH_ARM_NEON)
|
||||
#define HAS_WORD_64
|
||||
#endif
|
||||
#if defined(ARCH_INTEL_X86_64_AVX) || defined(ARCH_ARM_NEON)
|
||||
#if defined(ARCH_INTEL_AVX) || defined(ARCH_ARM_NEON)
|
||||
#define HAS_WORD_128
|
||||
#endif
|
||||
#ifdef ARCH_INTEL_X86_64_AVX
|
||||
#ifdef ARCH_INTEL_AVX
|
||||
#define HAS_WORD_256
|
||||
#endif
|
||||
|
||||
|
@ -349,11 +349,11 @@ namespace xmem {
|
|||
typedef __m128i Word128_t;
|
||||
#endif
|
||||
#ifdef ARCH_ARM
|
||||
#error TODO: Implement ARM NEON support for 128-bit memory operations.
|
||||
#error TODO: Implement for ARM
|
||||
#endif
|
||||
#endif
|
||||
#ifdef HAS_WORD_256
|
||||
#ifdef ARCH_INTEL_X86_64_AVX
|
||||
#ifdef ARCH_INTEL
|
||||
typedef __m256i Word256_t; //Not possible on current ARM systems.
|
||||
#endif
|
||||
#ifdef ARCH_ARM
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
|
||||
|
||||
.code
|
||||
win_asm_dummy_forwSequentialLoop_Word128 proc
|
||||
win_x86_64_asm_dummy_forwSequentialLoop_Word128 proc
|
||||
|
||||
; Arguments:
|
||||
; rcx is address of the first 128-bit word in the array
|
||||
|
@ -49,5 +49,5 @@ done:
|
|||
xor eax,eax ; return 0
|
||||
ret
|
||||
|
||||
win_asm_dummy_forwSequentialLoop_Word128 endp
|
||||
win_x86_64_asm_dummy_forwSequentialLoop_Word128 endp
|
||||
end
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
|
||||
|
||||
.code
|
||||
win_asm_dummy_forwSequentialLoop_Word256 proc
|
||||
win_x86_64_asm_dummy_forwSequentialLoop_Word256 proc
|
||||
|
||||
; Arguments:
|
||||
; rcx is address of the first 256-bit word in the array
|
||||
|
@ -49,5 +49,5 @@ done:
|
|||
xor eax,eax ; return 0
|
||||
ret
|
||||
|
||||
win_asm_dummy_forwSequentialLoop_Word256 endp
|
||||
win_x86_64_asm_dummy_forwSequentialLoop_Word256 endp
|
||||
end
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
|
||||
|
||||
.code
|
||||
win_asm_dummy_revSequentialLoop_Word128 proc
|
||||
win_x86_64_asm_dummy_revSequentialLoop_Word128 proc
|
||||
|
||||
; Arguments:
|
||||
; rcx is address of the last 128-bit word in the array
|
||||
|
@ -49,5 +49,5 @@ done:
|
|||
xor eax,eax ; return 0
|
||||
ret
|
||||
|
||||
win_asm_dummy_revSequentialLoop_Word128 endp
|
||||
win_x86_64_asm_dummy_revSequentialLoop_Word128 endp
|
||||
end
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
; SOFTWARE.
|
||||
|
||||
.code
|
||||
win_asm_dummy_revSequentialLoop_Word256 proc
|
||||
win_x86_64_asm_dummy_revSequentialLoop_Word256 proc
|
||||
|
||||
; Arguments:
|
||||
; rcx is address of the last 256-bit word in the array
|
||||
|
@ -47,5 +47,5 @@ done:
|
|||
xor eax,eax ; return 0
|
||||
ret
|
||||
|
||||
win_asm_dummy_revSequentialLoop_Word256 endp
|
||||
win_x86_64_asm_dummy_revSequentialLoop_Word256 endp
|
||||
end
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
; SOFTWARE.
|
||||
|
||||
.code
|
||||
win_asm_forwSequentialRead_Word128 proc
|
||||
win_x86_64_asm_forwSequentialRead_Word128 proc
|
||||
|
||||
; Arguments:
|
||||
; rcx is address of the first 128-bit word in the array
|
||||
|
@ -303,5 +303,5 @@ done:
|
|||
xor eax,eax ; return 0
|
||||
ret
|
||||
|
||||
win_asm_forwSequentialRead_Word128 endp
|
||||
win_x86_64_asm_forwSequentialRead_Word128 endp
|
||||
end
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
; SOFTWARE.
|
||||
|
||||
.code
|
||||
win_asm_forwSequentialRead_Word256 proc
|
||||
win_x86_64_asm_forwSequentialRead_Word256 proc
|
||||
|
||||
; Arguments:
|
||||
; rcx is address of the first 256-bit word in the array
|
||||
|
@ -174,5 +174,5 @@ done:
|
|||
xor eax,eax ; return 0
|
||||
ret
|
||||
|
||||
win_asm_forwSequentialRead_Word256 endp
|
||||
win_x86_64_asm_forwSequentialRead_Word256 endp
|
||||
end
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
; SOFTWARE.
|
||||
|
||||
.code
|
||||
win_asm_forwSequentialWrite_Word128 proc
|
||||
win_x86_64_asm_forwSequentialWrite_Word128 proc
|
||||
|
||||
; Arguments:
|
||||
; rcx is address of the first 128-bit word in the array
|
||||
|
@ -312,5 +312,5 @@ done:
|
|||
xor eax,eax ; return 0
|
||||
ret
|
||||
|
||||
win_asm_forwSequentialWrite_Word128 endp
|
||||
win_x86_64_asm_forwSequentialWrite_Word128 endp
|
||||
end
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
; SOFTWARE.
|
||||
|
||||
.code
|
||||
win_asm_forwSequentialWrite_Word256 proc
|
||||
win_x86_64_asm_forwSequentialWrite_Word256 proc
|
||||
|
||||
; Arguments:
|
||||
; rcx is address of the first 256-bit word in the array
|
||||
|
@ -189,5 +189,5 @@ done:
|
|||
xor eax,eax ; return 0
|
||||
ret
|
||||
|
||||
win_asm_forwSequentialWrite_Word256 endp
|
||||
win_x86_64_asm_forwSequentialWrite_Word256 endp
|
||||
end
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
; SOFTWARE.
|
||||
|
||||
.code
|
||||
win_asm_revSequentialRead_Word128 proc
|
||||
win_x86_64_asm_revSequentialRead_Word128 proc
|
||||
|
||||
; Arguments:
|
||||
; rcx is address of the last 128-bit word in the array
|
||||
|
@ -303,5 +303,5 @@ done:
|
|||
xor eax,eax ; return 0
|
||||
ret
|
||||
|
||||
win_asm_revSequentialRead_Word128 endp
|
||||
win_x86_64_asm_revSequentialRead_Word128 endp
|
||||
end
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
; SOFTWARE.
|
||||
|
||||
.code
|
||||
win_asm_revSequentialRead_Word256 proc
|
||||
win_x86_64_asm_revSequentialRead_Word256 proc
|
||||
|
||||
; Arguments:
|
||||
; rcx is address of the last 256-bit word in the array
|
||||
|
@ -176,5 +176,5 @@ done:
|
|||
xor eax,eax ; return 0
|
||||
ret
|
||||
|
||||
win_asm_revSequentialRead_Word256 endp
|
||||
win_x86_64_asm_revSequentialRead_Word256 endp
|
||||
end
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
; SOFTWARE.
|
||||
|
||||
.code
|
||||
win_asm_revSequentialWrite_Word128 proc
|
||||
win_x86_64_asm_revSequentialWrite_Word128 proc
|
||||
|
||||
; Arguments:
|
||||
; rcx is address of the last 128-bit word in the array
|
||||
|
@ -312,5 +312,5 @@ done:
|
|||
xor eax,eax ; return 0
|
||||
ret
|
||||
|
||||
win_asm_revSequentialWrite_Word128 endp
|
||||
win_x86_64_asm_revSequentialWrite_Word128 endp
|
||||
end
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
; SOFTWARE.
|
||||
|
||||
.code
|
||||
win_asm_revSequentialWrite_Word256 proc
|
||||
win_x86_64_asm_revSequentialWrite_Word256 proc
|
||||
|
||||
; Arguments:
|
||||
; rcx is address of the last 256-bit word in the array
|
||||
|
@ -189,5 +189,5 @@ done:
|
|||
xor eax,eax ; return 0
|
||||
ret
|
||||
|
||||
win_asm_revSequentialWrite_Word256 endp
|
||||
win_x86_64_asm_revSequentialWrite_Word256 endp
|
||||
end
|
||||
|
|
Загрузка…
Ссылка в новой задаче