crypto: crc32-pclmul - remove useless relative addressing

In 32-bit mode, the x86 architecture can hold full 32-bit pointers.
Therefore, the code that copies the current address to the %ecx register
and uses %ecx-relative addressing is useless, we could just use absolute
addressing.

The processors have a stack of return addresses for branch prediction. If
we use a call instruction and pop the return address, it desynchronizes
the return stack and causes branch prediction misses.

This patch also moves the data to the .rodata section.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
Mikulas Patocka 2017-09-06 22:41:21 -04:00 коммит произвёл Herbert Xu
Родитель c07f7c29d1
Коммит 5e1a646204
1 изменённых файлов: 6 добавлений и 11 удалений

Просмотреть файл

@ -41,6 +41,7 @@
#include <asm/inst.h> #include <asm/inst.h>
.section .rodata
.align 16 .align 16
/* /*
* [x4*128+32 mod P(x) << 32)]' << 1 = 0x154442bd4 * [x4*128+32 mod P(x) << 32)]' << 1 = 0x154442bd4
@ -111,19 +112,13 @@ ENTRY(crc32_pclmul_le_16) /* buffer and buffer size are 16 bytes aligned */
pxor CONSTANT, %xmm1 pxor CONSTANT, %xmm1
sub $0x40, LEN sub $0x40, LEN
add $0x40, BUF add $0x40, BUF
#ifndef __x86_64__
/* This is for position independent code(-fPIC) support for 32bit */
call delta
delta:
pop %ecx
#endif
cmp $0x40, LEN cmp $0x40, LEN
jb less_64 jb less_64
#ifdef __x86_64__ #ifdef __x86_64__
movdqa .Lconstant_R2R1(%rip), CONSTANT movdqa .Lconstant_R2R1(%rip), CONSTANT
#else #else
movdqa .Lconstant_R2R1 - delta(%ecx), CONSTANT movdqa .Lconstant_R2R1, CONSTANT
#endif #endif
loop_64:/* 64 bytes Full cache line folding */ loop_64:/* 64 bytes Full cache line folding */
@ -172,7 +167,7 @@ less_64:/* Folding cache line into 128bit */
#ifdef __x86_64__ #ifdef __x86_64__
movdqa .Lconstant_R4R3(%rip), CONSTANT movdqa .Lconstant_R4R3(%rip), CONSTANT
#else #else
movdqa .Lconstant_R4R3 - delta(%ecx), CONSTANT movdqa .Lconstant_R4R3, CONSTANT
#endif #endif
prefetchnta (BUF) prefetchnta (BUF)
@ -220,8 +215,8 @@ fold_64:
movdqa .Lconstant_R5(%rip), CONSTANT movdqa .Lconstant_R5(%rip), CONSTANT
movdqa .Lconstant_mask32(%rip), %xmm3 movdqa .Lconstant_mask32(%rip), %xmm3
#else #else
movdqa .Lconstant_R5 - delta(%ecx), CONSTANT movdqa .Lconstant_R5, CONSTANT
movdqa .Lconstant_mask32 - delta(%ecx), %xmm3 movdqa .Lconstant_mask32, %xmm3
#endif #endif
psrldq $0x04, %xmm2 psrldq $0x04, %xmm2
pand %xmm3, %xmm1 pand %xmm3, %xmm1
@ -232,7 +227,7 @@ fold_64:
#ifdef __x86_64__ #ifdef __x86_64__
movdqa .Lconstant_RUpoly(%rip), CONSTANT movdqa .Lconstant_RUpoly(%rip), CONSTANT
#else #else
movdqa .Lconstant_RUpoly - delta(%ecx), CONSTANT movdqa .Lconstant_RUpoly, CONSTANT
#endif #endif
movdqa %xmm1, %xmm2 movdqa %xmm1, %xmm2
pand %xmm3, %xmm1 pand %xmm3, %xmm1