crypto: arm64/aes-blk - revert NEON yield for skciphers
The reasoning of commitf10dc56c64
("crypto: arm64 - revert NEON yield for fast AEAD implementations") applies equally to skciphers: the walk API already guarantees that the input size of each call into the NEON code is bounded to the size of a page, and so there is no need for an additional TIF_NEED_RESCHED flag check inside the inner loop. So revert the skcipher changes to aes-modes.S (but retain the mac ones) This partially reverts commit0c8f838a52
. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
Родитель
557ecb4543
Коммит
6e7de6af91
|
@ -14,12 +14,12 @@
|
||||||
.align 4
|
.align 4
|
||||||
|
|
||||||
aes_encrypt_block4x:
|
aes_encrypt_block4x:
|
||||||
encrypt_block4x v0, v1, v2, v3, w22, x21, x8, w7
|
encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
|
||||||
ret
|
ret
|
||||||
ENDPROC(aes_encrypt_block4x)
|
ENDPROC(aes_encrypt_block4x)
|
||||||
|
|
||||||
aes_decrypt_block4x:
|
aes_decrypt_block4x:
|
||||||
decrypt_block4x v0, v1, v2, v3, w22, x21, x8, w7
|
decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
|
||||||
ret
|
ret
|
||||||
ENDPROC(aes_decrypt_block4x)
|
ENDPROC(aes_decrypt_block4x)
|
||||||
|
|
||||||
|
@ -31,71 +31,57 @@ ENDPROC(aes_decrypt_block4x)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
AES_ENTRY(aes_ecb_encrypt)
|
AES_ENTRY(aes_ecb_encrypt)
|
||||||
frame_push 5
|
stp x29, x30, [sp, #-16]!
|
||||||
|
mov x29, sp
|
||||||
|
|
||||||
mov x19, x0
|
enc_prepare w3, x2, x5
|
||||||
mov x20, x1
|
|
||||||
mov x21, x2
|
|
||||||
mov x22, x3
|
|
||||||
mov x23, x4
|
|
||||||
|
|
||||||
.Lecbencrestart:
|
|
||||||
enc_prepare w22, x21, x5
|
|
||||||
|
|
||||||
.LecbencloopNx:
|
.LecbencloopNx:
|
||||||
subs w23, w23, #4
|
subs w4, w4, #4
|
||||||
bmi .Lecbenc1x
|
bmi .Lecbenc1x
|
||||||
ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */
|
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
|
||||||
bl aes_encrypt_block4x
|
bl aes_encrypt_block4x
|
||||||
st1 {v0.16b-v3.16b}, [x19], #64
|
st1 {v0.16b-v3.16b}, [x0], #64
|
||||||
cond_yield_neon .Lecbencrestart
|
|
||||||
b .LecbencloopNx
|
b .LecbencloopNx
|
||||||
.Lecbenc1x:
|
.Lecbenc1x:
|
||||||
adds w23, w23, #4
|
adds w4, w4, #4
|
||||||
beq .Lecbencout
|
beq .Lecbencout
|
||||||
.Lecbencloop:
|
.Lecbencloop:
|
||||||
ld1 {v0.16b}, [x20], #16 /* get next pt block */
|
ld1 {v0.16b}, [x1], #16 /* get next pt block */
|
||||||
encrypt_block v0, w22, x21, x5, w6
|
encrypt_block v0, w3, x2, x5, w6
|
||||||
st1 {v0.16b}, [x19], #16
|
st1 {v0.16b}, [x0], #16
|
||||||
subs w23, w23, #1
|
subs w4, w4, #1
|
||||||
bne .Lecbencloop
|
bne .Lecbencloop
|
||||||
.Lecbencout:
|
.Lecbencout:
|
||||||
frame_pop
|
ldp x29, x30, [sp], #16
|
||||||
ret
|
ret
|
||||||
AES_ENDPROC(aes_ecb_encrypt)
|
AES_ENDPROC(aes_ecb_encrypt)
|
||||||
|
|
||||||
|
|
||||||
AES_ENTRY(aes_ecb_decrypt)
|
AES_ENTRY(aes_ecb_decrypt)
|
||||||
frame_push 5
|
stp x29, x30, [sp, #-16]!
|
||||||
|
mov x29, sp
|
||||||
|
|
||||||
mov x19, x0
|
dec_prepare w3, x2, x5
|
||||||
mov x20, x1
|
|
||||||
mov x21, x2
|
|
||||||
mov x22, x3
|
|
||||||
mov x23, x4
|
|
||||||
|
|
||||||
.Lecbdecrestart:
|
|
||||||
dec_prepare w22, x21, x5
|
|
||||||
|
|
||||||
.LecbdecloopNx:
|
.LecbdecloopNx:
|
||||||
subs w23, w23, #4
|
subs w4, w4, #4
|
||||||
bmi .Lecbdec1x
|
bmi .Lecbdec1x
|
||||||
ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */
|
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
|
||||||
bl aes_decrypt_block4x
|
bl aes_decrypt_block4x
|
||||||
st1 {v0.16b-v3.16b}, [x19], #64
|
st1 {v0.16b-v3.16b}, [x0], #64
|
||||||
cond_yield_neon .Lecbdecrestart
|
|
||||||
b .LecbdecloopNx
|
b .LecbdecloopNx
|
||||||
.Lecbdec1x:
|
.Lecbdec1x:
|
||||||
adds w23, w23, #4
|
adds w4, w4, #4
|
||||||
beq .Lecbdecout
|
beq .Lecbdecout
|
||||||
.Lecbdecloop:
|
.Lecbdecloop:
|
||||||
ld1 {v0.16b}, [x20], #16 /* get next ct block */
|
ld1 {v0.16b}, [x1], #16 /* get next ct block */
|
||||||
decrypt_block v0, w22, x21, x5, w6
|
decrypt_block v0, w3, x2, x5, w6
|
||||||
st1 {v0.16b}, [x19], #16
|
st1 {v0.16b}, [x0], #16
|
||||||
subs w23, w23, #1
|
subs w4, w4, #1
|
||||||
bne .Lecbdecloop
|
bne .Lecbdecloop
|
||||||
.Lecbdecout:
|
.Lecbdecout:
|
||||||
frame_pop
|
ldp x29, x30, [sp], #16
|
||||||
ret
|
ret
|
||||||
AES_ENDPROC(aes_ecb_decrypt)
|
AES_ENDPROC(aes_ecb_decrypt)
|
||||||
|
|
||||||
|
@ -108,100 +94,78 @@ AES_ENDPROC(aes_ecb_decrypt)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
AES_ENTRY(aes_cbc_encrypt)
|
AES_ENTRY(aes_cbc_encrypt)
|
||||||
frame_push 6
|
ld1 {v4.16b}, [x5] /* get iv */
|
||||||
|
enc_prepare w3, x2, x6
|
||||||
mov x19, x0
|
|
||||||
mov x20, x1
|
|
||||||
mov x21, x2
|
|
||||||
mov x22, x3
|
|
||||||
mov x23, x4
|
|
||||||
mov x24, x5
|
|
||||||
|
|
||||||
.Lcbcencrestart:
|
|
||||||
ld1 {v4.16b}, [x24] /* get iv */
|
|
||||||
enc_prepare w22, x21, x6
|
|
||||||
|
|
||||||
.Lcbcencloop4x:
|
.Lcbcencloop4x:
|
||||||
subs w23, w23, #4
|
subs w4, w4, #4
|
||||||
bmi .Lcbcenc1x
|
bmi .Lcbcenc1x
|
||||||
ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */
|
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
|
||||||
eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */
|
eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */
|
||||||
encrypt_block v0, w22, x21, x6, w7
|
encrypt_block v0, w3, x2, x6, w7
|
||||||
eor v1.16b, v1.16b, v0.16b
|
eor v1.16b, v1.16b, v0.16b
|
||||||
encrypt_block v1, w22, x21, x6, w7
|
encrypt_block v1, w3, x2, x6, w7
|
||||||
eor v2.16b, v2.16b, v1.16b
|
eor v2.16b, v2.16b, v1.16b
|
||||||
encrypt_block v2, w22, x21, x6, w7
|
encrypt_block v2, w3, x2, x6, w7
|
||||||
eor v3.16b, v3.16b, v2.16b
|
eor v3.16b, v3.16b, v2.16b
|
||||||
encrypt_block v3, w22, x21, x6, w7
|
encrypt_block v3, w3, x2, x6, w7
|
||||||
st1 {v0.16b-v3.16b}, [x19], #64
|
st1 {v0.16b-v3.16b}, [x0], #64
|
||||||
mov v4.16b, v3.16b
|
mov v4.16b, v3.16b
|
||||||
st1 {v4.16b}, [x24] /* return iv */
|
|
||||||
cond_yield_neon .Lcbcencrestart
|
|
||||||
b .Lcbcencloop4x
|
b .Lcbcencloop4x
|
||||||
.Lcbcenc1x:
|
.Lcbcenc1x:
|
||||||
adds w23, w23, #4
|
adds w4, w4, #4
|
||||||
beq .Lcbcencout
|
beq .Lcbcencout
|
||||||
.Lcbcencloop:
|
.Lcbcencloop:
|
||||||
ld1 {v0.16b}, [x20], #16 /* get next pt block */
|
ld1 {v0.16b}, [x1], #16 /* get next pt block */
|
||||||
eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */
|
eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */
|
||||||
encrypt_block v4, w22, x21, x6, w7
|
encrypt_block v4, w3, x2, x6, w7
|
||||||
st1 {v4.16b}, [x19], #16
|
st1 {v4.16b}, [x0], #16
|
||||||
subs w23, w23, #1
|
subs w4, w4, #1
|
||||||
bne .Lcbcencloop
|
bne .Lcbcencloop
|
||||||
.Lcbcencout:
|
.Lcbcencout:
|
||||||
st1 {v4.16b}, [x24] /* return iv */
|
st1 {v4.16b}, [x5] /* return iv */
|
||||||
frame_pop
|
|
||||||
ret
|
ret
|
||||||
AES_ENDPROC(aes_cbc_encrypt)
|
AES_ENDPROC(aes_cbc_encrypt)
|
||||||
|
|
||||||
|
|
||||||
AES_ENTRY(aes_cbc_decrypt)
|
AES_ENTRY(aes_cbc_decrypt)
|
||||||
frame_push 6
|
stp x29, x30, [sp, #-16]!
|
||||||
|
mov x29, sp
|
||||||
|
|
||||||
mov x19, x0
|
ld1 {v7.16b}, [x5] /* get iv */
|
||||||
mov x20, x1
|
dec_prepare w3, x2, x6
|
||||||
mov x21, x2
|
|
||||||
mov x22, x3
|
|
||||||
mov x23, x4
|
|
||||||
mov x24, x5
|
|
||||||
|
|
||||||
.Lcbcdecrestart:
|
|
||||||
ld1 {v7.16b}, [x24] /* get iv */
|
|
||||||
dec_prepare w22, x21, x6
|
|
||||||
|
|
||||||
.LcbcdecloopNx:
|
.LcbcdecloopNx:
|
||||||
subs w23, w23, #4
|
subs w4, w4, #4
|
||||||
bmi .Lcbcdec1x
|
bmi .Lcbcdec1x
|
||||||
ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */
|
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
|
||||||
mov v4.16b, v0.16b
|
mov v4.16b, v0.16b
|
||||||
mov v5.16b, v1.16b
|
mov v5.16b, v1.16b
|
||||||
mov v6.16b, v2.16b
|
mov v6.16b, v2.16b
|
||||||
bl aes_decrypt_block4x
|
bl aes_decrypt_block4x
|
||||||
sub x20, x20, #16
|
sub x1, x1, #16
|
||||||
eor v0.16b, v0.16b, v7.16b
|
eor v0.16b, v0.16b, v7.16b
|
||||||
eor v1.16b, v1.16b, v4.16b
|
eor v1.16b, v1.16b, v4.16b
|
||||||
ld1 {v7.16b}, [x20], #16 /* reload 1 ct block */
|
ld1 {v7.16b}, [x1], #16 /* reload 1 ct block */
|
||||||
eor v2.16b, v2.16b, v5.16b
|
eor v2.16b, v2.16b, v5.16b
|
||||||
eor v3.16b, v3.16b, v6.16b
|
eor v3.16b, v3.16b, v6.16b
|
||||||
st1 {v0.16b-v3.16b}, [x19], #64
|
st1 {v0.16b-v3.16b}, [x0], #64
|
||||||
st1 {v7.16b}, [x24] /* return iv */
|
|
||||||
cond_yield_neon .Lcbcdecrestart
|
|
||||||
b .LcbcdecloopNx
|
b .LcbcdecloopNx
|
||||||
.Lcbcdec1x:
|
.Lcbcdec1x:
|
||||||
adds w23, w23, #4
|
adds w4, w4, #4
|
||||||
beq .Lcbcdecout
|
beq .Lcbcdecout
|
||||||
.Lcbcdecloop:
|
.Lcbcdecloop:
|
||||||
ld1 {v1.16b}, [x20], #16 /* get next ct block */
|
ld1 {v1.16b}, [x1], #16 /* get next ct block */
|
||||||
mov v0.16b, v1.16b /* ...and copy to v0 */
|
mov v0.16b, v1.16b /* ...and copy to v0 */
|
||||||
decrypt_block v0, w22, x21, x6, w7
|
decrypt_block v0, w3, x2, x6, w7
|
||||||
eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */
|
eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */
|
||||||
mov v7.16b, v1.16b /* ct is next iv */
|
mov v7.16b, v1.16b /* ct is next iv */
|
||||||
st1 {v0.16b}, [x19], #16
|
st1 {v0.16b}, [x0], #16
|
||||||
subs w23, w23, #1
|
subs w4, w4, #1
|
||||||
bne .Lcbcdecloop
|
bne .Lcbcdecloop
|
||||||
.Lcbcdecout:
|
.Lcbcdecout:
|
||||||
st1 {v7.16b}, [x24] /* return iv */
|
st1 {v7.16b}, [x5] /* return iv */
|
||||||
frame_pop
|
ldp x29, x30, [sp], #16
|
||||||
ret
|
ret
|
||||||
AES_ENDPROC(aes_cbc_decrypt)
|
AES_ENDPROC(aes_cbc_decrypt)
|
||||||
|
|
||||||
|
@ -212,26 +176,19 @@ AES_ENDPROC(aes_cbc_decrypt)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
AES_ENTRY(aes_ctr_encrypt)
|
AES_ENTRY(aes_ctr_encrypt)
|
||||||
frame_push 6
|
stp x29, x30, [sp, #-16]!
|
||||||
|
mov x29, sp
|
||||||
|
|
||||||
mov x19, x0
|
enc_prepare w3, x2, x6
|
||||||
mov x20, x1
|
ld1 {v4.16b}, [x5]
|
||||||
mov x21, x2
|
|
||||||
mov x22, x3
|
|
||||||
mov x23, x4
|
|
||||||
mov x24, x5
|
|
||||||
|
|
||||||
.Lctrrestart:
|
|
||||||
enc_prepare w22, x21, x6
|
|
||||||
ld1 {v4.16b}, [x24]
|
|
||||||
|
|
||||||
umov x6, v4.d[1] /* keep swabbed ctr in reg */
|
umov x6, v4.d[1] /* keep swabbed ctr in reg */
|
||||||
rev x6, x6
|
rev x6, x6
|
||||||
|
cmn w6, w4 /* 32 bit overflow? */
|
||||||
|
bcs .Lctrloop
|
||||||
.LctrloopNx:
|
.LctrloopNx:
|
||||||
subs w23, w23, #4
|
subs w4, w4, #4
|
||||||
bmi .Lctr1x
|
bmi .Lctr1x
|
||||||
cmn w6, #4 /* 32 bit overflow? */
|
|
||||||
bcs .Lctr1x
|
|
||||||
add w7, w6, #1
|
add w7, w6, #1
|
||||||
mov v0.16b, v4.16b
|
mov v0.16b, v4.16b
|
||||||
add w8, w6, #2
|
add w8, w6, #2
|
||||||
|
@ -245,27 +202,25 @@ AES_ENTRY(aes_ctr_encrypt)
|
||||||
rev w9, w9
|
rev w9, w9
|
||||||
mov v2.s[3], w8
|
mov v2.s[3], w8
|
||||||
mov v3.s[3], w9
|
mov v3.s[3], w9
|
||||||
ld1 {v5.16b-v7.16b}, [x20], #48 /* get 3 input blocks */
|
ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */
|
||||||
bl aes_encrypt_block4x
|
bl aes_encrypt_block4x
|
||||||
eor v0.16b, v5.16b, v0.16b
|
eor v0.16b, v5.16b, v0.16b
|
||||||
ld1 {v5.16b}, [x20], #16 /* get 1 input block */
|
ld1 {v5.16b}, [x1], #16 /* get 1 input block */
|
||||||
eor v1.16b, v6.16b, v1.16b
|
eor v1.16b, v6.16b, v1.16b
|
||||||
eor v2.16b, v7.16b, v2.16b
|
eor v2.16b, v7.16b, v2.16b
|
||||||
eor v3.16b, v5.16b, v3.16b
|
eor v3.16b, v5.16b, v3.16b
|
||||||
st1 {v0.16b-v3.16b}, [x19], #64
|
st1 {v0.16b-v3.16b}, [x0], #64
|
||||||
add x6, x6, #4
|
add x6, x6, #4
|
||||||
rev x7, x6
|
rev x7, x6
|
||||||
ins v4.d[1], x7
|
ins v4.d[1], x7
|
||||||
cbz w23, .Lctrout
|
cbz w4, .Lctrout
|
||||||
st1 {v4.16b}, [x24] /* return next CTR value */
|
|
||||||
cond_yield_neon .Lctrrestart
|
|
||||||
b .LctrloopNx
|
b .LctrloopNx
|
||||||
.Lctr1x:
|
.Lctr1x:
|
||||||
adds w23, w23, #4
|
adds w4, w4, #4
|
||||||
beq .Lctrout
|
beq .Lctrout
|
||||||
.Lctrloop:
|
.Lctrloop:
|
||||||
mov v0.16b, v4.16b
|
mov v0.16b, v4.16b
|
||||||
encrypt_block v0, w22, x21, x8, w7
|
encrypt_block v0, w3, x2, x8, w7
|
||||||
|
|
||||||
adds x6, x6, #1 /* increment BE ctr */
|
adds x6, x6, #1 /* increment BE ctr */
|
||||||
rev x7, x6
|
rev x7, x6
|
||||||
|
@ -273,22 +228,22 @@ AES_ENTRY(aes_ctr_encrypt)
|
||||||
bcs .Lctrcarry /* overflow? */
|
bcs .Lctrcarry /* overflow? */
|
||||||
|
|
||||||
.Lctrcarrydone:
|
.Lctrcarrydone:
|
||||||
subs w23, w23, #1
|
subs w4, w4, #1
|
||||||
bmi .Lctrtailblock /* blocks <0 means tail block */
|
bmi .Lctrtailblock /* blocks <0 means tail block */
|
||||||
ld1 {v3.16b}, [x20], #16
|
ld1 {v3.16b}, [x1], #16
|
||||||
eor v3.16b, v0.16b, v3.16b
|
eor v3.16b, v0.16b, v3.16b
|
||||||
st1 {v3.16b}, [x19], #16
|
st1 {v3.16b}, [x0], #16
|
||||||
bne .Lctrloop
|
bne .Lctrloop
|
||||||
|
|
||||||
.Lctrout:
|
.Lctrout:
|
||||||
st1 {v4.16b}, [x24] /* return next CTR value */
|
st1 {v4.16b}, [x5] /* return next CTR value */
|
||||||
.Lctrret:
|
ldp x29, x30, [sp], #16
|
||||||
frame_pop
|
|
||||||
ret
|
ret
|
||||||
|
|
||||||
.Lctrtailblock:
|
.Lctrtailblock:
|
||||||
st1 {v0.16b}, [x19]
|
st1 {v0.16b}, [x0]
|
||||||
b .Lctrret
|
ldp x29, x30, [sp], #16
|
||||||
|
ret
|
||||||
|
|
||||||
.Lctrcarry:
|
.Lctrcarry:
|
||||||
umov x7, v4.d[0] /* load upper word of ctr */
|
umov x7, v4.d[0] /* load upper word of ctr */
|
||||||
|
@ -321,16 +276,10 @@ CPU_LE( .quad 1, 0x87 )
|
||||||
CPU_BE( .quad 0x87, 1 )
|
CPU_BE( .quad 0x87, 1 )
|
||||||
|
|
||||||
AES_ENTRY(aes_xts_encrypt)
|
AES_ENTRY(aes_xts_encrypt)
|
||||||
frame_push 6
|
stp x29, x30, [sp, #-16]!
|
||||||
|
mov x29, sp
|
||||||
|
|
||||||
mov x19, x0
|
ld1 {v4.16b}, [x6]
|
||||||
mov x20, x1
|
|
||||||
mov x21, x2
|
|
||||||
mov x22, x3
|
|
||||||
mov x23, x4
|
|
||||||
mov x24, x6
|
|
||||||
|
|
||||||
ld1 {v4.16b}, [x24]
|
|
||||||
cbz w7, .Lxtsencnotfirst
|
cbz w7, .Lxtsencnotfirst
|
||||||
|
|
||||||
enc_prepare w3, x5, x8
|
enc_prepare w3, x5, x8
|
||||||
|
@ -339,17 +288,15 @@ AES_ENTRY(aes_xts_encrypt)
|
||||||
ldr q7, .Lxts_mul_x
|
ldr q7, .Lxts_mul_x
|
||||||
b .LxtsencNx
|
b .LxtsencNx
|
||||||
|
|
||||||
.Lxtsencrestart:
|
|
||||||
ld1 {v4.16b}, [x24]
|
|
||||||
.Lxtsencnotfirst:
|
.Lxtsencnotfirst:
|
||||||
enc_prepare w22, x21, x8
|
enc_prepare w3, x2, x8
|
||||||
.LxtsencloopNx:
|
.LxtsencloopNx:
|
||||||
ldr q7, .Lxts_mul_x
|
ldr q7, .Lxts_mul_x
|
||||||
next_tweak v4, v4, v7, v8
|
next_tweak v4, v4, v7, v8
|
||||||
.LxtsencNx:
|
.LxtsencNx:
|
||||||
subs w23, w23, #4
|
subs w4, w4, #4
|
||||||
bmi .Lxtsenc1x
|
bmi .Lxtsenc1x
|
||||||
ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */
|
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
|
||||||
next_tweak v5, v4, v7, v8
|
next_tweak v5, v4, v7, v8
|
||||||
eor v0.16b, v0.16b, v4.16b
|
eor v0.16b, v0.16b, v4.16b
|
||||||
next_tweak v6, v5, v7, v8
|
next_tweak v6, v5, v7, v8
|
||||||
|
@ -362,43 +309,35 @@ AES_ENTRY(aes_xts_encrypt)
|
||||||
eor v0.16b, v0.16b, v4.16b
|
eor v0.16b, v0.16b, v4.16b
|
||||||
eor v1.16b, v1.16b, v5.16b
|
eor v1.16b, v1.16b, v5.16b
|
||||||
eor v2.16b, v2.16b, v6.16b
|
eor v2.16b, v2.16b, v6.16b
|
||||||
st1 {v0.16b-v3.16b}, [x19], #64
|
st1 {v0.16b-v3.16b}, [x0], #64
|
||||||
mov v4.16b, v7.16b
|
mov v4.16b, v7.16b
|
||||||
cbz w23, .Lxtsencout
|
cbz w4, .Lxtsencout
|
||||||
st1 {v4.16b}, [x24]
|
|
||||||
cond_yield_neon .Lxtsencrestart
|
|
||||||
b .LxtsencloopNx
|
b .LxtsencloopNx
|
||||||
.Lxtsenc1x:
|
.Lxtsenc1x:
|
||||||
adds w23, w23, #4
|
adds w4, w4, #4
|
||||||
beq .Lxtsencout
|
beq .Lxtsencout
|
||||||
.Lxtsencloop:
|
.Lxtsencloop:
|
||||||
ld1 {v1.16b}, [x20], #16
|
ld1 {v1.16b}, [x1], #16
|
||||||
eor v0.16b, v1.16b, v4.16b
|
eor v0.16b, v1.16b, v4.16b
|
||||||
encrypt_block v0, w22, x21, x8, w7
|
encrypt_block v0, w3, x2, x8, w7
|
||||||
eor v0.16b, v0.16b, v4.16b
|
eor v0.16b, v0.16b, v4.16b
|
||||||
st1 {v0.16b}, [x19], #16
|
st1 {v0.16b}, [x0], #16
|
||||||
subs w23, w23, #1
|
subs w4, w4, #1
|
||||||
beq .Lxtsencout
|
beq .Lxtsencout
|
||||||
next_tweak v4, v4, v7, v8
|
next_tweak v4, v4, v7, v8
|
||||||
b .Lxtsencloop
|
b .Lxtsencloop
|
||||||
.Lxtsencout:
|
.Lxtsencout:
|
||||||
st1 {v4.16b}, [x24]
|
st1 {v4.16b}, [x6]
|
||||||
frame_pop
|
ldp x29, x30, [sp], #16
|
||||||
ret
|
ret
|
||||||
AES_ENDPROC(aes_xts_encrypt)
|
AES_ENDPROC(aes_xts_encrypt)
|
||||||
|
|
||||||
|
|
||||||
AES_ENTRY(aes_xts_decrypt)
|
AES_ENTRY(aes_xts_decrypt)
|
||||||
frame_push 6
|
stp x29, x30, [sp, #-16]!
|
||||||
|
mov x29, sp
|
||||||
|
|
||||||
mov x19, x0
|
ld1 {v4.16b}, [x6]
|
||||||
mov x20, x1
|
|
||||||
mov x21, x2
|
|
||||||
mov x22, x3
|
|
||||||
mov x23, x4
|
|
||||||
mov x24, x6
|
|
||||||
|
|
||||||
ld1 {v4.16b}, [x24]
|
|
||||||
cbz w7, .Lxtsdecnotfirst
|
cbz w7, .Lxtsdecnotfirst
|
||||||
|
|
||||||
enc_prepare w3, x5, x8
|
enc_prepare w3, x5, x8
|
||||||
|
@ -407,17 +346,15 @@ AES_ENTRY(aes_xts_decrypt)
|
||||||
ldr q7, .Lxts_mul_x
|
ldr q7, .Lxts_mul_x
|
||||||
b .LxtsdecNx
|
b .LxtsdecNx
|
||||||
|
|
||||||
.Lxtsdecrestart:
|
|
||||||
ld1 {v4.16b}, [x24]
|
|
||||||
.Lxtsdecnotfirst:
|
.Lxtsdecnotfirst:
|
||||||
dec_prepare w22, x21, x8
|
dec_prepare w3, x2, x8
|
||||||
.LxtsdecloopNx:
|
.LxtsdecloopNx:
|
||||||
ldr q7, .Lxts_mul_x
|
ldr q7, .Lxts_mul_x
|
||||||
next_tweak v4, v4, v7, v8
|
next_tweak v4, v4, v7, v8
|
||||||
.LxtsdecNx:
|
.LxtsdecNx:
|
||||||
subs w23, w23, #4
|
subs w4, w4, #4
|
||||||
bmi .Lxtsdec1x
|
bmi .Lxtsdec1x
|
||||||
ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */
|
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
|
||||||
next_tweak v5, v4, v7, v8
|
next_tweak v5, v4, v7, v8
|
||||||
eor v0.16b, v0.16b, v4.16b
|
eor v0.16b, v0.16b, v4.16b
|
||||||
next_tweak v6, v5, v7, v8
|
next_tweak v6, v5, v7, v8
|
||||||
|
@ -430,28 +367,26 @@ AES_ENTRY(aes_xts_decrypt)
|
||||||
eor v0.16b, v0.16b, v4.16b
|
eor v0.16b, v0.16b, v4.16b
|
||||||
eor v1.16b, v1.16b, v5.16b
|
eor v1.16b, v1.16b, v5.16b
|
||||||
eor v2.16b, v2.16b, v6.16b
|
eor v2.16b, v2.16b, v6.16b
|
||||||
st1 {v0.16b-v3.16b}, [x19], #64
|
st1 {v0.16b-v3.16b}, [x0], #64
|
||||||
mov v4.16b, v7.16b
|
mov v4.16b, v7.16b
|
||||||
cbz w23, .Lxtsdecout
|
cbz w4, .Lxtsdecout
|
||||||
st1 {v4.16b}, [x24]
|
|
||||||
cond_yield_neon .Lxtsdecrestart
|
|
||||||
b .LxtsdecloopNx
|
b .LxtsdecloopNx
|
||||||
.Lxtsdec1x:
|
.Lxtsdec1x:
|
||||||
adds w23, w23, #4
|
adds w4, w4, #4
|
||||||
beq .Lxtsdecout
|
beq .Lxtsdecout
|
||||||
.Lxtsdecloop:
|
.Lxtsdecloop:
|
||||||
ld1 {v1.16b}, [x20], #16
|
ld1 {v1.16b}, [x1], #16
|
||||||
eor v0.16b, v1.16b, v4.16b
|
eor v0.16b, v1.16b, v4.16b
|
||||||
decrypt_block v0, w22, x21, x8, w7
|
decrypt_block v0, w3, x2, x8, w7
|
||||||
eor v0.16b, v0.16b, v4.16b
|
eor v0.16b, v0.16b, v4.16b
|
||||||
st1 {v0.16b}, [x19], #16
|
st1 {v0.16b}, [x0], #16
|
||||||
subs w23, w23, #1
|
subs w4, w4, #1
|
||||||
beq .Lxtsdecout
|
beq .Lxtsdecout
|
||||||
next_tweak v4, v4, v7, v8
|
next_tweak v4, v4, v7, v8
|
||||||
b .Lxtsdecloop
|
b .Lxtsdecloop
|
||||||
.Lxtsdecout:
|
.Lxtsdecout:
|
||||||
st1 {v4.16b}, [x24]
|
st1 {v4.16b}, [x6]
|
||||||
frame_pop
|
ldp x29, x30, [sp], #16
|
||||||
ret
|
ret
|
||||||
AES_ENDPROC(aes_xts_decrypt)
|
AES_ENDPROC(aes_xts_decrypt)
|
||||||
|
|
||||||
|
|
Загрузка…
Ссылка в новой задаче