crypto: aesni - Fix out-of-bounds access of the AAD buffer in generic-gcm-aesni
The aesni_gcm_enc/dec functions can access memory after the end of
the AAD buffer if the AAD length is not a multiple of 4 bytes.
It didn't matter with rfc4106-gcm-aesni as in that case the AAD was
always followed by the 8 byte IV, but that is no longer the case with
generic-gcm-aesni. This can potentially result in accessing a page that
is not mapped and thus causing the machine to crash. This patch fixes
that by reading the last <16 byte block of the AAD byte-by-byte and
optionally via an 8-byte load if the block was at least 8 bytes.
Fixes: 0487ccac
("crypto: aesni - make non-AVX AES-GCM work with any aadlen")
Cc: <stable@vger.kernel.org>
Signed-off-by: Junaid Shahid <junaids@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
Родитель
b20209c91e
Коммит
1ecdd37e30
|
@ -89,30 +89,6 @@ SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100
|
|||
ALL_F: .octa 0xffffffffffffffffffffffffffffffff
|
||||
.octa 0x00000000000000000000000000000000
|
||||
|
||||
.section .rodata
|
||||
.align 16
|
||||
.type aad_shift_arr, @object
|
||||
.size aad_shift_arr, 272
|
||||
aad_shift_arr:
|
||||
.octa 0xffffffffffffffffffffffffffffffff
|
||||
.octa 0xffffffffffffffffffffffffffffff0C
|
||||
.octa 0xffffffffffffffffffffffffffff0D0C
|
||||
.octa 0xffffffffffffffffffffffffff0E0D0C
|
||||
.octa 0xffffffffffffffffffffffff0F0E0D0C
|
||||
.octa 0xffffffffffffffffffffff0C0B0A0908
|
||||
.octa 0xffffffffffffffffffff0D0C0B0A0908
|
||||
.octa 0xffffffffffffffffff0E0D0C0B0A0908
|
||||
.octa 0xffffffffffffffff0F0E0D0C0B0A0908
|
||||
.octa 0xffffffffffffff0C0B0A090807060504
|
||||
.octa 0xffffffffffff0D0C0B0A090807060504
|
||||
.octa 0xffffffffff0E0D0C0B0A090807060504
|
||||
.octa 0xffffffff0F0E0D0C0B0A090807060504
|
||||
.octa 0xffffff0C0B0A09080706050403020100
|
||||
.octa 0xffff0D0C0B0A09080706050403020100
|
||||
.octa 0xff0E0D0C0B0A09080706050403020100
|
||||
.octa 0x0F0E0D0C0B0A09080706050403020100
|
||||
|
||||
|
||||
.text
|
||||
|
||||
|
||||
|
@ -303,62 +279,30 @@ _done_read_partial_block_\@:
|
|||
XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
|
||||
MOVADQ SHUF_MASK(%rip), %xmm14
|
||||
mov arg7, %r10 # %r10 = AAD
|
||||
mov arg8, %r12 # %r12 = aadLen
|
||||
mov %r12, %r11
|
||||
mov arg8, %r11 # %r11 = aadLen
|
||||
pxor %xmm\i, %xmm\i
|
||||
pxor \XMM2, \XMM2
|
||||
|
||||
cmp $16, %r11
|
||||
jl _get_AAD_rest8\num_initial_blocks\operation
|
||||
jl _get_AAD_rest\num_initial_blocks\operation
|
||||
_get_AAD_blocks\num_initial_blocks\operation:
|
||||
movdqu (%r10), %xmm\i
|
||||
PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data
|
||||
pxor %xmm\i, \XMM2
|
||||
GHASH_MUL \XMM2, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
|
||||
add $16, %r10
|
||||
sub $16, %r12
|
||||
sub $16, %r11
|
||||
cmp $16, %r11
|
||||
jge _get_AAD_blocks\num_initial_blocks\operation
|
||||
|
||||
movdqu \XMM2, %xmm\i
|
||||
|
||||
/* read the last <16B of AAD */
|
||||
_get_AAD_rest\num_initial_blocks\operation:
|
||||
cmp $0, %r11
|
||||
je _get_AAD_done\num_initial_blocks\operation
|
||||
|
||||
pxor %xmm\i,%xmm\i
|
||||
|
||||
/* read the last <16B of AAD. since we have at least 4B of
|
||||
data right after the AAD (the ICV, and maybe some CT), we can
|
||||
read 4B/8B blocks safely, and then get rid of the extra stuff */
|
||||
_get_AAD_rest8\num_initial_blocks\operation:
|
||||
cmp $4, %r11
|
||||
jle _get_AAD_rest4\num_initial_blocks\operation
|
||||
movq (%r10), \TMP1
|
||||
add $8, %r10
|
||||
sub $8, %r11
|
||||
pslldq $8, \TMP1
|
||||
psrldq $8, %xmm\i
|
||||
pxor \TMP1, %xmm\i
|
||||
jmp _get_AAD_rest8\num_initial_blocks\operation
|
||||
_get_AAD_rest4\num_initial_blocks\operation:
|
||||
cmp $0, %r11
|
||||
jle _get_AAD_rest0\num_initial_blocks\operation
|
||||
mov (%r10), %eax
|
||||
movq %rax, \TMP1
|
||||
add $4, %r10
|
||||
sub $4, %r10
|
||||
pslldq $12, \TMP1
|
||||
psrldq $4, %xmm\i
|
||||
pxor \TMP1, %xmm\i
|
||||
_get_AAD_rest0\num_initial_blocks\operation:
|
||||
/* finalize: shift out the extra bytes we read, and align
|
||||
left. since pslldq can only shift by an immediate, we use
|
||||
vpshufb and an array of shuffle masks */
|
||||
movq %r12, %r11
|
||||
salq $4, %r11
|
||||
movdqu aad_shift_arr(%r11), \TMP1
|
||||
PSHUFB_XMM \TMP1, %xmm\i
|
||||
_get_AAD_rest_final\num_initial_blocks\operation:
|
||||
READ_PARTIAL_BLOCK %r10, %r11, \TMP1, %xmm\i
|
||||
PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data
|
||||
pxor \XMM2, %xmm\i
|
||||
GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
|
||||
|
@ -562,62 +506,30 @@ _initial_blocks_done\num_initial_blocks\operation:
|
|||
XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
|
||||
MOVADQ SHUF_MASK(%rip), %xmm14
|
||||
mov arg7, %r10 # %r10 = AAD
|
||||
mov arg8, %r12 # %r12 = aadLen
|
||||
mov %r12, %r11
|
||||
mov arg8, %r11 # %r11 = aadLen
|
||||
pxor %xmm\i, %xmm\i
|
||||
pxor \XMM2, \XMM2
|
||||
|
||||
cmp $16, %r11
|
||||
jl _get_AAD_rest8\num_initial_blocks\operation
|
||||
jl _get_AAD_rest\num_initial_blocks\operation
|
||||
_get_AAD_blocks\num_initial_blocks\operation:
|
||||
movdqu (%r10), %xmm\i
|
||||
PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data
|
||||
pxor %xmm\i, \XMM2
|
||||
GHASH_MUL \XMM2, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
|
||||
add $16, %r10
|
||||
sub $16, %r12
|
||||
sub $16, %r11
|
||||
cmp $16, %r11
|
||||
jge _get_AAD_blocks\num_initial_blocks\operation
|
||||
|
||||
movdqu \XMM2, %xmm\i
|
||||
|
||||
/* read the last <16B of AAD */
|
||||
_get_AAD_rest\num_initial_blocks\operation:
|
||||
cmp $0, %r11
|
||||
je _get_AAD_done\num_initial_blocks\operation
|
||||
|
||||
pxor %xmm\i,%xmm\i
|
||||
|
||||
/* read the last <16B of AAD. since we have at least 4B of
|
||||
data right after the AAD (the ICV, and maybe some PT), we can
|
||||
read 4B/8B blocks safely, and then get rid of the extra stuff */
|
||||
_get_AAD_rest8\num_initial_blocks\operation:
|
||||
cmp $4, %r11
|
||||
jle _get_AAD_rest4\num_initial_blocks\operation
|
||||
movq (%r10), \TMP1
|
||||
add $8, %r10
|
||||
sub $8, %r11
|
||||
pslldq $8, \TMP1
|
||||
psrldq $8, %xmm\i
|
||||
pxor \TMP1, %xmm\i
|
||||
jmp _get_AAD_rest8\num_initial_blocks\operation
|
||||
_get_AAD_rest4\num_initial_blocks\operation:
|
||||
cmp $0, %r11
|
||||
jle _get_AAD_rest0\num_initial_blocks\operation
|
||||
mov (%r10), %eax
|
||||
movq %rax, \TMP1
|
||||
add $4, %r10
|
||||
sub $4, %r10
|
||||
pslldq $12, \TMP1
|
||||
psrldq $4, %xmm\i
|
||||
pxor \TMP1, %xmm\i
|
||||
_get_AAD_rest0\num_initial_blocks\operation:
|
||||
/* finalize: shift out the extra bytes we read, and align
|
||||
left. since pslldq can only shift by an immediate, we use
|
||||
vpshufb and an array of shuffle masks */
|
||||
movq %r12, %r11
|
||||
salq $4, %r11
|
||||
movdqu aad_shift_arr(%r11), \TMP1
|
||||
PSHUFB_XMM \TMP1, %xmm\i
|
||||
_get_AAD_rest_final\num_initial_blocks\operation:
|
||||
READ_PARTIAL_BLOCK %r10, %r11, \TMP1, %xmm\i
|
||||
PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data
|
||||
pxor \XMM2, %xmm\i
|
||||
GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
|
||||
|
|
Загрузка…
Ссылка в новой задаче