crypto: crc32-pclmul - remove useless relative addressing
In 32-bit mode, the x86 architecture can hold full 32-bit pointers. Therefore, the code that copies the current address to the %ecx register and uses %ecx-relative addressing is useless, we could just use absolute addressing. The processors have a stack of return addresses for branch prediction. If we use a call instruction and pop the return address, it desynchronizes the return stack and causes branch prediction misses. This patch also moves the data to the .rodata section. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
Родитель
c07f7c29d1
Коммит
5e1a646204
|
@ -41,6 +41,7 @@
|
||||||
#include <asm/inst.h>
|
#include <asm/inst.h>
|
||||||
|
|
||||||
|
|
||||||
|
.section .rodata
|
||||||
.align 16
|
.align 16
|
||||||
/*
|
/*
|
||||||
* [x4*128+32 mod P(x) << 32)]' << 1 = 0x154442bd4
|
* [x4*128+32 mod P(x) << 32)]' << 1 = 0x154442bd4
|
||||||
|
@ -111,19 +112,13 @@ ENTRY(crc32_pclmul_le_16) /* buffer and buffer size are 16 bytes aligned */
|
||||||
pxor CONSTANT, %xmm1
|
pxor CONSTANT, %xmm1
|
||||||
sub $0x40, LEN
|
sub $0x40, LEN
|
||||||
add $0x40, BUF
|
add $0x40, BUF
|
||||||
#ifndef __x86_64__
|
|
||||||
/* This is for position independent code(-fPIC) support for 32bit */
|
|
||||||
call delta
|
|
||||||
delta:
|
|
||||||
pop %ecx
|
|
||||||
#endif
|
|
||||||
cmp $0x40, LEN
|
cmp $0x40, LEN
|
||||||
jb less_64
|
jb less_64
|
||||||
|
|
||||||
#ifdef __x86_64__
|
#ifdef __x86_64__
|
||||||
movdqa .Lconstant_R2R1(%rip), CONSTANT
|
movdqa .Lconstant_R2R1(%rip), CONSTANT
|
||||||
#else
|
#else
|
||||||
movdqa .Lconstant_R2R1 - delta(%ecx), CONSTANT
|
movdqa .Lconstant_R2R1, CONSTANT
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
loop_64:/* 64 bytes Full cache line folding */
|
loop_64:/* 64 bytes Full cache line folding */
|
||||||
|
@ -172,7 +167,7 @@ less_64:/* Folding cache line into 128bit */
|
||||||
#ifdef __x86_64__
|
#ifdef __x86_64__
|
||||||
movdqa .Lconstant_R4R3(%rip), CONSTANT
|
movdqa .Lconstant_R4R3(%rip), CONSTANT
|
||||||
#else
|
#else
|
||||||
movdqa .Lconstant_R4R3 - delta(%ecx), CONSTANT
|
movdqa .Lconstant_R4R3, CONSTANT
|
||||||
#endif
|
#endif
|
||||||
prefetchnta (BUF)
|
prefetchnta (BUF)
|
||||||
|
|
||||||
|
@ -220,8 +215,8 @@ fold_64:
|
||||||
movdqa .Lconstant_R5(%rip), CONSTANT
|
movdqa .Lconstant_R5(%rip), CONSTANT
|
||||||
movdqa .Lconstant_mask32(%rip), %xmm3
|
movdqa .Lconstant_mask32(%rip), %xmm3
|
||||||
#else
|
#else
|
||||||
movdqa .Lconstant_R5 - delta(%ecx), CONSTANT
|
movdqa .Lconstant_R5, CONSTANT
|
||||||
movdqa .Lconstant_mask32 - delta(%ecx), %xmm3
|
movdqa .Lconstant_mask32, %xmm3
|
||||||
#endif
|
#endif
|
||||||
psrldq $0x04, %xmm2
|
psrldq $0x04, %xmm2
|
||||||
pand %xmm3, %xmm1
|
pand %xmm3, %xmm1
|
||||||
|
@ -232,7 +227,7 @@ fold_64:
|
||||||
#ifdef __x86_64__
|
#ifdef __x86_64__
|
||||||
movdqa .Lconstant_RUpoly(%rip), CONSTANT
|
movdqa .Lconstant_RUpoly(%rip), CONSTANT
|
||||||
#else
|
#else
|
||||||
movdqa .Lconstant_RUpoly - delta(%ecx), CONSTANT
|
movdqa .Lconstant_RUpoly, CONSTANT
|
||||||
#endif
|
#endif
|
||||||
movdqa %xmm1, %xmm2
|
movdqa %xmm1, %xmm2
|
||||||
pand %xmm3, %xmm1
|
pand %xmm3, %xmm1
|
||||||
|
|
Загрузка…
Ссылка в новой задаче