зеркало из https://github.com/mozilla/pjs.git
1651 строка
53 KiB
ArmAsm
1651 строка
53 KiB
ArmAsm
/* ***** BEGIN LICENSE BLOCK *****
|
|
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
|
*
|
|
* The contents of this file are subject to the Mozilla Public License Version
|
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
|
* the License. You may obtain a copy of the License at
|
|
* http://www.mozilla.org/MPL/
|
|
*
|
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
* for the specific language governing rights and limitations under the
|
|
* License.
|
|
*
|
|
* The Original Code is Network Security Services.
|
|
*
|
|
* The Initial Developer of the Original Code is
|
|
* Red Hat Inc.
|
|
* Portions created by the Initial Developer are Copyright (C) 2009
|
|
* the Initial Developer. All Rights Reserved.
|
|
*
|
|
* Contributor(s):
|
|
* Ulrich Drepper <drepper@redhat.com>
|
|
*
|
|
* Alternatively, the contents of this file may be used under the terms of
|
|
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
|
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
|
* of those above. If you wish to allow use of your version of this file only
|
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
|
* use your version of this file under the terms of the MPL, indicate your
|
|
* decision by deleting the provisions above and replace them with the notice
|
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
|
* the provisions above, a recipient may use your version of this file under
|
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
|
*
|
|
* ***** END LICENSE BLOCK ***** */
|
|
|
|
.text
|
|
|
|
#define IV_OFFSET 16
|
|
#define EXPANDED_KEY_OFFSET 48
|
|
|
|
|
|
/* in %rdi : the key
|
|
in %rsi : buffer for expanded key
|
|
*/
|
|
.type intel_aes_encrypt_init_128,@function
|
|
.globl intel_aes_encrypt_init_128
|
|
.align 16
|
|
intel_aes_encrypt_init_128:
|
|
movups (%rdi), %xmm1
|
|
movups %xmm1, (%rsi)
|
|
leaq 16(%rsi), %rsi
|
|
xorl %eax, %eax
|
|
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x01 /* aeskeygenassist $0x01, %xmm1, %xmm2 */
|
|
call key_expansion128
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x02 /* aeskeygenassist $0x02, %xmm1, %xmm2 */
|
|
call key_expansion128
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x04 /* aeskeygenassist $0x04, %xmm1, %xmm2 */
|
|
call key_expansion128
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x08 /* aeskeygenassist $0x08, %xmm1, %xmm2 */
|
|
call key_expansion128
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x10 /* aeskeygenassist $0x10, %xmm1, %xmm2 */
|
|
call key_expansion128
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x20 /* aeskeygenassist $0x20, %xmm1, %xmm2 */
|
|
call key_expansion128
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x40 /* aeskeygenassist $0x40, %xmm1, %xmm2 */
|
|
call key_expansion128
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x80 /* aeskeygenassist $0x80, %xmm1, %xmm2 */
|
|
call key_expansion128
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x1b /* aeskeygenassist $0x1b, %xmm1, %xmm2 */
|
|
call key_expansion128
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x36 /* aeskeygenassist $0x36, %xmm1, %xmm2 */
|
|
call key_expansion128
|
|
|
|
ret
|
|
.size intel_aes_encrypt_init_128, .-intel_aes_encrypt_init_128
|
|
|
|
|
|
/* in %rdi : the key
|
|
in %rsi : buffer for expanded key
|
|
*/
|
|
.type intel_aes_decrypt_init_128,@function
|
|
.globl intel_aes_decrypt_init_128
|
|
.align 16
|
|
intel_aes_decrypt_init_128:
|
|
movups (%rdi), %xmm1
|
|
movups %xmm1, (%rsi)
|
|
leaq 16(%rsi), %rsi
|
|
xorl %eax, %eax
|
|
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x01 /* aeskeygenassist $0x01, %xmm1, %xmm2 */
|
|
call key_expansion128
|
|
.byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
|
|
movups %xmm2, -16(%rsi)
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x02 /* aeskeygenassist $0x02, %xmm1, %xmm2 */
|
|
call key_expansion128
|
|
.byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
|
|
movups %xmm2, -16(%rsi)
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x04 /* aeskeygenassist $0x04, %xmm1, %xmm2 */
|
|
call key_expansion128
|
|
.byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
|
|
movups %xmm2, -16(%rsi)
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x08 /* aeskeygenassist $0x08, %xmm1, %xmm2 */
|
|
call key_expansion128
|
|
.byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
|
|
movups %xmm2, -16(%rsi)
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x10 /* aeskeygenassist $0x10, %xmm1, %xmm2 */
|
|
call key_expansion128
|
|
.byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
|
|
movups %xmm2, -16(%rsi)
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x20 /* aeskeygenassist $0x20, %xmm1, %xmm2 */
|
|
call key_expansion128
|
|
.byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
|
|
movups %xmm2, -16(%rsi)
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x40 /* aeskeygenassist $0x40, %xmm1, %xmm2 */
|
|
call key_expansion128
|
|
.byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
|
|
movups %xmm2, -16(%rsi)
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x80 /* aeskeygenassist $0x80, %xmm1, %xmm2 */
|
|
call key_expansion128
|
|
.byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
|
|
movups %xmm2, -16(%rsi)
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x1b /* aeskeygenassist $0x1b, %xmm1, %xmm2 */
|
|
call key_expansion128
|
|
.byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
|
|
movups %xmm2, -16(%rsi)
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x36 /* aeskeygenassist $0x36, %xmm1, %xmm2 */
|
|
call key_expansion128
|
|
|
|
ret
|
|
.size intel_aes_decrypt_init_128, .-intel_aes_decrypt_init_128
|
|
|
|
|
|
.type key_expansion128,@function
|
|
.align 16
|
|
key_expansion128:
|
|
movd %eax, %xmm3
|
|
pshufd $0xff, %xmm2, %xmm2
|
|
shufps $0x10, %xmm1, %xmm3
|
|
pxor %xmm3, %xmm1
|
|
shufps $0x8c, %xmm1, %xmm3
|
|
pxor %xmm2, %xmm1
|
|
pxor %xmm3, %xmm1
|
|
movdqu %xmm1, (%rsi)
|
|
addq $16, %rsi
|
|
ret
|
|
.size key_expansion128, .-key_expansion128
|
|
|
|
|
|
/* in %rdi : cx - context
|
|
in %rsi : output - pointer to output buffer
|
|
in %rdx : outputLen - pointer to variable for length of output
|
|
(filled by caller)
|
|
in %rcx : maxOutputLen - length of output buffer
|
|
in %r8 : input - pointer to input buffer
|
|
in %r9 : inputLen - length of input buffer
|
|
on stack: blocksize - AES blocksize (always 16, unused)
|
|
*/
|
|
.type intel_aes_encrypt_ecb_128,@function
|
|
.globl intel_aes_encrypt_ecb_128
|
|
.align 16
|
|
intel_aes_encrypt_ecb_128:
|
|
// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
|
|
leaq 48(%rdi), %rdi
|
|
|
|
movdqu (%rdi), %xmm2
|
|
movdqu 160(%rdi), %xmm12
|
|
xor %eax, %eax
|
|
// cmpq $8*16, %r9
|
|
cmpq $128, %r9
|
|
jb 1f
|
|
// leaq -8*16(%r9), %r11
|
|
leaq -128(%r9), %r11
|
|
2: movdqu (%r8, %rax), %xmm3
|
|
movdqu 16(%r8, %rax), %xmm4
|
|
movdqu 32(%r8, %rax), %xmm5
|
|
movdqu 48(%r8, %rax), %xmm6
|
|
movdqu 64(%r8, %rax), %xmm7
|
|
movdqu 80(%r8, %rax), %xmm8
|
|
movdqu 96(%r8, %rax), %xmm9
|
|
movdqu 112(%r8, %rax), %xmm10
|
|
pxor %xmm2, %xmm3
|
|
pxor %xmm2, %xmm4
|
|
pxor %xmm2, %xmm5
|
|
pxor %xmm2, %xmm6
|
|
pxor %xmm2, %xmm7
|
|
pxor %xmm2, %xmm8
|
|
pxor %xmm2, %xmm9
|
|
pxor %xmm2, %xmm10
|
|
movq $16, %r10
|
|
3: movdqu (%rdi, %r10), %xmm1
|
|
.byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
|
|
addq $16, %r10
|
|
cmpq $160, %r10
|
|
jne 3b
|
|
.byte 0x66,0x41,0x0f,0x38,0xdd,0xdc /* aesenclast %xmm12, %xmm3 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdd,0xe4 /* aesenclast %xmm12, %xmm4 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdd,0xec /* aesenclast %xmm12, %xmm5 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdd,0xf4 /* aesenclast %xmm12, %xmm6 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdd,0xfc /* aesenclast %xmm12, %xmm7 */
|
|
.byte 0x66,0x45,0x0f,0x38,0xdd,0xc4 /* aesenclast %xmm12, %xmm8 */
|
|
.byte 0x66,0x45,0x0f,0x38,0xdd,0xcc /* aesenclast %xmm12, %xmm9 */
|
|
.byte 0x66,0x45,0x0f,0x38,0xdd,0xd4 /* aesenclast %xmm12, %xmm10 */
|
|
movdqu %xmm3, (%rsi, %rax)
|
|
movdqu %xmm4, 16(%rsi, %rax)
|
|
movdqu %xmm5, 32(%rsi, %rax)
|
|
movdqu %xmm6, 48(%rsi, %rax)
|
|
movdqu %xmm7, 64(%rsi, %rax)
|
|
movdqu %xmm8, 80(%rsi, %rax)
|
|
movdqu %xmm9, 96(%rsi, %rax)
|
|
movdqu %xmm10, 112(%rsi, %rax)
|
|
// addq $8*16, %rax
|
|
addq $128, %rax
|
|
cmpq %r11, %rax
|
|
jbe 2b
|
|
1: cmpq %rax, %r9
|
|
je 5f
|
|
|
|
movdqu 16(%rdi), %xmm3
|
|
movdqu 32(%rdi), %xmm4
|
|
movdqu 48(%rdi), %xmm5
|
|
movdqu 64(%rdi), %xmm6
|
|
movdqu 80(%rdi), %xmm7
|
|
movdqu 96(%rdi), %xmm8
|
|
movdqu 112(%rdi), %xmm9
|
|
movdqu 128(%rdi), %xmm10
|
|
movdqu 144(%rdi), %xmm11
|
|
|
|
4: movdqu (%r8, %rax), %xmm1
|
|
pxor %xmm2, %xmm1
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdd,0xcc /* aesenclast %xmm12, %xmm1 */
|
|
movdqu %xmm1, (%rsi, %rax)
|
|
addq $16, %rax
|
|
cmpq %rax, %r9
|
|
jne 4b
|
|
|
|
5: xor %eax, %eax
|
|
ret
|
|
.size intel_aes_encrypt_ecb_128, .-intel_aes_encrypt_ecb_128
|
|
|
|
|
|
/* in %rdi : cx - context
|
|
in %rsi : output - pointer to output buffer
|
|
in %rdx : outputLen - pointer to variable for length of output
|
|
(filled by caller)
|
|
in %rcx : maxOutputLen - length of output buffer
|
|
in %r8 : input - pointer to input buffer
|
|
in %r9 : inputLen - length of input buffer
|
|
on stack: blocksize - AES blocksize (always 16, unused)
|
|
*/
|
|
.type intel_aes_decrypt_ecb_128,@function
|
|
.globl intel_aes_decrypt_ecb_128
|
|
.align 16
|
|
intel_aes_decrypt_ecb_128:
|
|
// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
|
|
leaq 48(%rdi), %rdi
|
|
|
|
movdqu (%rdi), %xmm2
|
|
movdqu 160(%rdi), %xmm12
|
|
xorl %eax, %eax
|
|
// cmpq $8*16, %r9
|
|
cmpq $128, %r9
|
|
jb 1f
|
|
// leaq -8*16(%r9), %r11
|
|
leaq -128(%r9), %r11
|
|
2: movdqu (%r8, %rax), %xmm3
|
|
movdqu 16(%r8, %rax), %xmm4
|
|
movdqu 32(%r8, %rax), %xmm5
|
|
movdqu 48(%r8, %rax), %xmm6
|
|
movdqu 64(%r8, %rax), %xmm7
|
|
movdqu 80(%r8, %rax), %xmm8
|
|
movdqu 96(%r8, %rax), %xmm9
|
|
movdqu 112(%r8, %rax), %xmm10
|
|
pxor %xmm12, %xmm3
|
|
pxor %xmm12, %xmm4
|
|
pxor %xmm12, %xmm5
|
|
pxor %xmm12, %xmm6
|
|
pxor %xmm12, %xmm7
|
|
pxor %xmm12, %xmm8
|
|
pxor %xmm12, %xmm9
|
|
pxor %xmm12, %xmm10
|
|
movq $144, %r10
|
|
3: movdqu (%rdi, %r10), %xmm1
|
|
.byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm8 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm8 */
|
|
subq $16, %r10
|
|
jne 3b
|
|
.byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm3 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm3 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm3 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm3 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */
|
|
movdqu %xmm3, (%rsi, %rax)
|
|
movdqu %xmm4, 16(%rsi, %rax)
|
|
movdqu %xmm5, 32(%rsi, %rax)
|
|
movdqu %xmm6, 48(%rsi, %rax)
|
|
movdqu %xmm7, 64(%rsi, %rax)
|
|
movdqu %xmm8, 80(%rsi, %rax)
|
|
movdqu %xmm9, 96(%rsi, %rax)
|
|
movdqu %xmm10, 112(%rsi, %rax)
|
|
// addq $8*16, %rax
|
|
addq $128, %rax
|
|
cmpq %r11, %rax
|
|
jbe 2b
|
|
1: cmpq %rax, %r9
|
|
je 5f
|
|
|
|
movdqu 16(%rdi), %xmm3
|
|
movdqu 32(%rdi), %xmm4
|
|
movdqu 48(%rdi), %xmm5
|
|
movdqu 64(%rdi), %xmm6
|
|
movdqu 80(%rdi), %xmm7
|
|
movdqu 96(%rdi), %xmm8
|
|
movdqu 112(%rdi), %xmm9
|
|
movdqu 128(%rdi), %xmm10
|
|
movdqu 144(%rdi), %xmm11
|
|
|
|
4: movdqu (%r8, %rax), %xmm1
|
|
pxor %xmm12, %xmm1
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm7, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm7, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm7, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm7, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm1 */
|
|
movdqu %xmm1, (%rsi, %rax)
|
|
addq $16, %rax
|
|
cmpq %rax, %r9
|
|
jne 4b
|
|
|
|
5: xor %eax, %eax
|
|
ret
|
|
.size intel_aes_decrypt_ecb_128, .-intel_aes_decrypt_ecb_128
|
|
|
|
|
|
/* in %rdi : cx - context
|
|
in %rsi : output - pointer to output buffer
|
|
in %rdx : outputLen - pointer to variable for length of output
|
|
(filled by caller)
|
|
in %rcx : maxOutputLen - length of output buffer
|
|
in %r8 : input - pointer to input buffer
|
|
in %r9 : inputLen - length of input buffer
|
|
on stack: blocksize - AES blocksize (always 16, unused)
|
|
*/
|
|
.type intel_aes_encrypt_cbc_128,@function
|
|
.globl intel_aes_encrypt_cbc_128
|
|
.align 16
|
|
intel_aes_encrypt_cbc_128:
|
|
testq %r9, %r9
|
|
je 2f
|
|
|
|
// leaq IV_OFFSET(%rdi), %rdx
|
|
// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
|
|
leaq 16(%rdi), %rdx
|
|
leaq 48(%rdi), %rdi
|
|
|
|
movdqu (%rdx), %xmm0
|
|
movdqu (%rdi), %xmm2
|
|
movdqu 16(%rdi), %xmm3
|
|
movdqu 32(%rdi), %xmm4
|
|
movdqu 48(%rdi), %xmm5
|
|
movdqu 64(%rdi), %xmm6
|
|
movdqu 80(%rdi), %xmm7
|
|
movdqu 96(%rdi), %xmm8
|
|
movdqu 112(%rdi), %xmm9
|
|
movdqu 128(%rdi), %xmm10
|
|
movdqu 144(%rdi), %xmm11
|
|
movdqu 160(%rdi), %xmm12
|
|
|
|
xorl %eax, %eax
|
|
1: movdqu (%r8, %rax), %xmm1
|
|
pxor %xmm0, %xmm1
|
|
pxor %xmm2, %xmm1
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmma, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmmb, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdd,0xcc /* aesenclast %xmm12, %xmm1 */
|
|
movdqu %xmm1, (%rsi, %rax)
|
|
movdqa %xmm1, %xmm0
|
|
addq $16, %rax
|
|
cmpq %rax, %r9
|
|
jne 1b
|
|
|
|
movdqu %xmm0, (%rdx)
|
|
|
|
2: xor %eax, %eax
|
|
ret
|
|
.size intel_aes_encrypt_cbc_128, .-intel_aes_encrypt_cbc_128
|
|
|
|
|
|
/* in %rdi : cx - context
|
|
in %rsi : output - pointer to output buffer
|
|
in %rdx : outputLen - pointer to variable for length of output
|
|
(filled by caller)
|
|
in %rcx : maxOutputLen - length of output buffer
|
|
in %r8 : input - pointer to input buffer
|
|
in %r9 : inputLen - length of input buffer
|
|
on stack: blocksize - AES blocksize (always 16, unused)
|
|
*/
|
|
.type intel_aes_decrypt_cbc_128,@function
|
|
.globl intel_aes_decrypt_cbc_128
|
|
.align 16
|
|
intel_aes_decrypt_cbc_128:
|
|
// leaq IV_OFFSET(%rdi), %rdx
|
|
// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
|
|
leaq 16(%rdi), %rdx
|
|
leaq 48(%rdi), %rdi
|
|
|
|
movdqu (%rdx), %xmm0
|
|
movdqu (%rdi), %xmm2
|
|
movdqu 160(%rdi), %xmm12
|
|
xorl %eax, %eax
|
|
// cmpq $8*16, %r9
|
|
cmpq $128, %r9
|
|
jb 1f
|
|
// leaq -8*16(%r9), %r11
|
|
leaq -128(%r9), %r11
|
|
2: movdqu (%r8, %rax), %xmm3
|
|
movdqu 16(%r8, %rax), %xmm4
|
|
movdqu 32(%r8, %rax), %xmm5
|
|
movdqu 48(%r8, %rax), %xmm6
|
|
movdqu 64(%r8, %rax), %xmm7
|
|
movdqu 80(%r8, %rax), %xmm8
|
|
movdqu 96(%r8, %rax), %xmm9
|
|
movdqu 112(%r8, %rax), %xmm10
|
|
pxor %xmm12, %xmm3
|
|
pxor %xmm12, %xmm4
|
|
pxor %xmm12, %xmm5
|
|
pxor %xmm12, %xmm6
|
|
pxor %xmm12, %xmm7
|
|
pxor %xmm12, %xmm8
|
|
pxor %xmm12, %xmm9
|
|
pxor %xmm12, %xmm10
|
|
movq $144, %r10
|
|
3: movdqu (%rdi, %r10), %xmm1
|
|
.byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
|
|
subq $16, %r10
|
|
jne 3b
|
|
.byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */
|
|
pxor %xmm0, %xmm3
|
|
pxor (%r8, %rax), %xmm4
|
|
pxor 16(%r8, %rax), %xmm5
|
|
pxor 32(%r8, %rax), %xmm6
|
|
pxor 48(%r8, %rax), %xmm7
|
|
pxor 64(%r8, %rax), %xmm8
|
|
pxor 80(%r8, %rax), %xmm9
|
|
pxor 96(%r8, %rax), %xmm10
|
|
movdqu 112(%r8, %rax), %xmm0
|
|
movdqu %xmm3, (%rsi, %rax)
|
|
movdqu %xmm4, 16(%rsi, %rax)
|
|
movdqu %xmm5, 32(%rsi, %rax)
|
|
movdqu %xmm6, 48(%rsi, %rax)
|
|
movdqu %xmm7, 64(%rsi, %rax)
|
|
movdqu %xmm8, 80(%rsi, %rax)
|
|
movdqu %xmm9, 96(%rsi, %rax)
|
|
movdqu %xmm10, 112(%rsi, %rax)
|
|
// addq $8*16, %rax
|
|
addq $128, %rax
|
|
cmpq %r11, %rax
|
|
jbe 2b
|
|
1: cmpq %rax, %r9
|
|
je 5f
|
|
|
|
movdqu 16(%rdi), %xmm3
|
|
movdqu 32(%rdi), %xmm4
|
|
movdqu 48(%rdi), %xmm5
|
|
movdqu 64(%rdi), %xmm6
|
|
movdqu 80(%rdi), %xmm7
|
|
movdqu 96(%rdi), %xmm8
|
|
movdqu 112(%rdi), %xmm9
|
|
movdqu 128(%rdi), %xmm10
|
|
movdqu 144(%rdi), %xmm11
|
|
|
|
4: movdqu (%r8, %rax), %xmm1
|
|
movdqa %xmm1, %xmm13
|
|
pxor %xmm12, %xmm1
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm1 */
|
|
pxor %xmm0, %xmm1
|
|
movdqu %xmm1, (%rsi, %rax)
|
|
movdqa %xmm13, %xmm0
|
|
addq $16, %rax
|
|
cmpq %rax, %r9
|
|
jne 4b
|
|
|
|
5: movdqu %xmm0, (%rdx)
|
|
|
|
xor %eax, %eax
|
|
ret
|
|
.size intel_aes_decrypt_cbc_128, .-intel_aes_decrypt_cbc_128
|
|
|
|
|
|
/* in %rdi : the key
|
|
in %rsi : buffer for expanded key
|
|
*/
|
|
.type intel_aes_encrypt_init_192,@function
|
|
.globl intel_aes_encrypt_init_192
|
|
.align 16
|
|
intel_aes_encrypt_init_192:
|
|
movdqu (%rdi), %xmm1
|
|
movq 16(%rdi), %xmm3
|
|
movdqu %xmm1, (%rsi)
|
|
movq %xmm3, 16(%rsi)
|
|
leaq 24(%rsi), %rsi
|
|
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 /* aeskeygenassist $0x01, %xmm3, %xmm2 */
|
|
call key_expansion192
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 /* aeskeygenassist $0x02, %xmm3, %xmm2 */
|
|
call key_expansion192
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 /* aeskeygenassist $0x04, %xmm3, %xmm2 */
|
|
call key_expansion192
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 /* aeskeygenassist $0x08, %xmm3, %xmm2 */
|
|
call key_expansion192
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 /* aeskeygenassist $0x10, %xmm3, %xmm2 */
|
|
call key_expansion192
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 /* aeskeygenassist $0x20, %xmm3, %xmm2 */
|
|
call key_expansion192
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 /* aeskeygenassist $0x40, %xmm3, %xmm2 */
|
|
call key_expansion192
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x80 /* aeskeygenassist $0x80, %xmm3, %xmm2 */
|
|
call key_expansion192
|
|
|
|
ret
|
|
.size intel_aes_encrypt_init_192, .-intel_aes_encrypt_init_192
|
|
|
|
|
|
/* in %rdi : the key
|
|
in %rsi : buffer for expanded key
|
|
*/
|
|
.type intel_aes_decrypt_init_192,@function
|
|
.globl intel_aes_decrypt_init_192
|
|
.align 16
|
|
intel_aes_decrypt_init_192:
|
|
movdqu (%rdi), %xmm1
|
|
movq 16(%rdi), %xmm3
|
|
movdqu %xmm1, (%rsi)
|
|
movq %xmm3, 16(%rsi)
|
|
leaq 24(%rsi), %rsi
|
|
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 /* aeskeygenassist $0x01, %xmm3, %xmm2 */
|
|
call key_expansion192
|
|
movups -32(%rsi), %xmm2
|
|
movups -16(%rsi), %xmm4
|
|
.byte 0x66,0x0f,0x38,0xdb,0xd2 /* aesimc %xmm2, %xmm2 */
|
|
.byte 0x66,0x0f,0x38,0xdb,0xe4 /* aesimc %xmm4, %xmm4 */
|
|
movups %xmm2, -32(%rsi)
|
|
movups %xmm4, -16(%rsi)
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 /* aeskeygenassist $0x02, %xmm3, %xmm2 */
|
|
call key_expansion192
|
|
.byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
|
|
movups %xmm2, -24(%rsi)
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 /* aeskeygenassist $0x04, %xmm3, %xmm2 */
|
|
call key_expansion192
|
|
movups -32(%rsi), %xmm2
|
|
movups -16(%rsi), %xmm4
|
|
.byte 0x66,0x0f,0x38,0xdb,0xd2 /* aesimc %xmm2, %xmm2 */
|
|
.byte 0x66,0x0f,0x38,0xdb,0xe4 /* aesimc %xmm4, %xmm4 */
|
|
movups %xmm2, -32(%rsi)
|
|
movups %xmm4, -16(%rsi)
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 /* aeskeygenassist $0x08, %xmm3, %xmm2 */
|
|
call key_expansion192
|
|
.byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
|
|
movups %xmm2, -24(%rsi)
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 /* aeskeygenassist $0x10, %xmm3, %xmm2 */
|
|
call key_expansion192
|
|
movups -32(%rsi), %xmm2
|
|
movups -16(%rsi), %xmm4
|
|
.byte 0x66,0x0f,0x38,0xdb,0xd2 /* aesimc %xmm2, %xmm2 */
|
|
.byte 0x66,0x0f,0x38,0xdb,0xe4 /* aesimc %xmm4, %xmm4 */
|
|
movups %xmm2, -32(%rsi)
|
|
movups %xmm4, -16(%rsi)
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 /* aeskeygenassist $0x20, %xmm3, %xmm2 */
|
|
call key_expansion192
|
|
.byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
|
|
movups %xmm2, -24(%rsi)
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 /* aeskeygenassist $0x40, %xmm3, %xmm2 */
|
|
call key_expansion192
|
|
movups -32(%rsi), %xmm2
|
|
movups -16(%rsi), %xmm4
|
|
.byte 0x66,0x0f,0x38,0xdb,0xd2 /* aesimc %xmm2, %xmm2 */
|
|
.byte 0x66,0x0f,0x38,0xdb,0xe4 /* aesimc %xmm4, %xmm4 */
|
|
movups %xmm2, -32(%rsi)
|
|
movups %xmm4, -16(%rsi)
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x80 /* aeskeygenassist $0x80, %xmm3, %xmm2 */
|
|
call key_expansion192
|
|
|
|
ret
|
|
.size intel_aes_decrypt_init_192, .-intel_aes_decrypt_init_192
|
|
|
|
|
|
.type key_expansion192,@function
|
|
.align 16
|
|
key_expansion192:
|
|
pshufd $0x55, %xmm2, %xmm2
|
|
xor %eax, %eax
|
|
movd %eax, %xmm4
|
|
shufps $0x10, %xmm1, %xmm4
|
|
pxor %xmm4, %xmm1
|
|
shufps $0x8c, %xmm1, %xmm4
|
|
pxor %xmm2, %xmm1
|
|
pxor %xmm4, %xmm1
|
|
movdqu %xmm1, (%rsi)
|
|
addq $16, %rsi
|
|
|
|
pshufd $0xff, %xmm1, %xmm4
|
|
movd %eax, %xmm5
|
|
shufps $0x00, %xmm3, %xmm5
|
|
shufps $0x08, %xmm3, %xmm5
|
|
pxor %xmm4, %xmm3
|
|
pxor %xmm5, %xmm3
|
|
movq %xmm3, (%rsi)
|
|
addq $8, %rsi
|
|
ret
|
|
.size key_expansion192, .-key_expansion192
|
|
|
|
|
|
/* in %rdi : cx - context
|
|
in %rsi : output - pointer to output buffer
|
|
in %rdx : outputLen - pointer to variable for length of output
|
|
(filled by caller)
|
|
in %rcx : maxOutputLen - length of output buffer
|
|
in %r8 : input - pointer to input buffer
|
|
in %r9 : inputLen - length of input buffer
|
|
on stack: blocksize - AES blocksize (always 16, unused)
|
|
*/
|
|
.type intel_aes_encrypt_ecb_192,@function
|
|
.globl intel_aes_encrypt_ecb_192
|
|
.align 16
|
|
intel_aes_encrypt_ecb_192:
|
|
// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
|
|
leaq 48(%rdi), %rdi
|
|
|
|
movdqu (%rdi), %xmm2
|
|
movdqu 192(%rdi), %xmm14
|
|
xorl %eax, %eax
|
|
// cmpq $8*16, %r9
|
|
cmpq $128, %r9
|
|
jb 1f
|
|
// leaq -8*16(%r9), %r11
|
|
leaq -128(%r9), %r11
|
|
2: movdqu (%r8, %rax), %xmm3
|
|
movdqu 16(%r8, %rax), %xmm4
|
|
movdqu 32(%r8, %rax), %xmm5
|
|
movdqu 48(%r8, %rax), %xmm6
|
|
movdqu 64(%r8, %rax), %xmm7
|
|
movdqu 80(%r8, %rax), %xmm8
|
|
movdqu 96(%r8, %rax), %xmm9
|
|
movdqu 112(%r8, %rax), %xmm10
|
|
pxor %xmm2, %xmm3
|
|
pxor %xmm2, %xmm4
|
|
pxor %xmm2, %xmm5
|
|
pxor %xmm2, %xmm6
|
|
pxor %xmm2, %xmm7
|
|
pxor %xmm2, %xmm8
|
|
pxor %xmm2, %xmm9
|
|
pxor %xmm2, %xmm10
|
|
movq $16, %r10
|
|
3: movdqu (%rdi, %r10), %xmm1
|
|
.byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
|
|
addq $16, %r10
|
|
cmpq $192, %r10
|
|
jne 3b
|
|
.byte 0x66,0x41,0x0f,0x38,0xdd,0xde /* aesenclast %xmm14, %xmm3 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdd,0xe6 /* aesenclast %xmm14, %xmm4 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdd,0xee /* aesenclast %xmm14, %xmm5 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdd,0xf6 /* aesenclast %xmm14, %xmm7 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdd,0xfe /* aesenclast %xmm14, %xmm3 */
|
|
.byte 0x66,0x45,0x0f,0x38,0xdd,0xc6 /* aesenclast %xmm14, %xmm8 */
|
|
.byte 0x66,0x45,0x0f,0x38,0xdd,0xce /* aesenclast %xmm14, %xmm9 */
|
|
.byte 0x66,0x45,0x0f,0x38,0xdd,0xd6 /* aesenclast %xmm14, %xmm10 */
|
|
movdqu %xmm3, (%rsi, %rax)
|
|
movdqu %xmm4, 16(%rsi, %rax)
|
|
movdqu %xmm5, 32(%rsi, %rax)
|
|
movdqu %xmm6, 48(%rsi, %rax)
|
|
movdqu %xmm7, 64(%rsi, %rax)
|
|
movdqu %xmm8, 80(%rsi, %rax)
|
|
movdqu %xmm9, 96(%rsi, %rax)
|
|
movdqu %xmm10, 112(%rsi, %rax)
|
|
// addq $8*16, %rax
|
|
addq $128, %rax
|
|
cmpq %r11, %rax
|
|
jbe 2b
|
|
1: cmpq %rax, %r9
|
|
je 5f
|
|
|
|
movdqu 16(%rdi), %xmm3
|
|
movdqu 32(%rdi), %xmm4
|
|
movdqu 48(%rdi), %xmm5
|
|
movdqu 64(%rdi), %xmm6
|
|
movdqu 80(%rdi), %xmm7
|
|
movdqu 96(%rdi), %xmm8
|
|
movdqu 112(%rdi), %xmm9
|
|
movdqu 128(%rdi), %xmm10
|
|
movdqu 144(%rdi), %xmm11
|
|
movdqu 160(%rdi), %xmm12
|
|
movdqu 176(%rdi), %xmm13
|
|
|
|
4: movdqu (%r8, %rax), %xmm1
|
|
pxor %xmm2, %xmm1
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcc /* aesenc %xmm12, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcd /* aesenc %xmm13, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdd,0xce /* aesenclast %xmm14, %xmm1 */
|
|
movdqu %xmm1, (%rsi, %rax)
|
|
addq $16, %rax
|
|
cmpq %rax, %r9
|
|
jne 4b
|
|
|
|
5: xor %eax, %eax
|
|
ret
|
|
.size intel_aes_encrypt_ecb_192, .-intel_aes_encrypt_ecb_192
|
|
|
|
|
|
/* in %rdi : cx - context
|
|
in %rsi : output - pointer to output buffer
|
|
in %rdx : outputLen - pointer to variable for length of output
|
|
(filled by caller)
|
|
in %rcx : maxOutputLen - length of output buffer
|
|
in %r8 : input - pointer to input buffer
|
|
in %r9 : inputLen - length of input buffer
|
|
on stack: blocksize - AES blocksize (always 16, unused)
|
|
*/
|
|
.type intel_aes_decrypt_ecb_192,@function
|
|
.globl intel_aes_decrypt_ecb_192
|
|
.align 16
|
|
intel_aes_decrypt_ecb_192:
|
|
// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
|
|
leaq 48(%rdi), %rdi
|
|
|
|
movdqu (%rdi), %xmm2
|
|
movdqu 192(%rdi), %xmm14
|
|
xorl %eax, %eax
|
|
// cmpq $8*16, %r9
|
|
cmpq $128, %r9
|
|
jb 1f
|
|
// leaq -8*16(%r9), %r11
|
|
leaq -128(%r9), %r11
|
|
2: movdqu (%r8, %rax), %xmm3
|
|
movdqu 16(%r8, %rax), %xmm4
|
|
movdqu 32(%r8, %rax), %xmm5
|
|
movdqu 48(%r8, %rax), %xmm6
|
|
movdqu 64(%r8, %rax), %xmm7
|
|
movdqu 80(%r8, %rax), %xmm8
|
|
movdqu 96(%r8, %rax), %xmm9
|
|
movdqu 112(%r8, %rax), %xmm10
|
|
pxor %xmm14, %xmm3
|
|
pxor %xmm14, %xmm4
|
|
pxor %xmm14, %xmm5
|
|
pxor %xmm14, %xmm6
|
|
pxor %xmm14, %xmm7
|
|
pxor %xmm14, %xmm8
|
|
pxor %xmm14, %xmm9
|
|
pxor %xmm14, %xmm10
|
|
movq $176, %r10
|
|
3: movdqu (%rdi, %r10), %xmm1
|
|
.byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
|
|
subq $16, %r10
|
|
jne 3b
|
|
.byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */
|
|
movdqu %xmm3, (%rsi, %rax)
|
|
movdqu %xmm4, 16(%rsi, %rax)
|
|
movdqu %xmm5, 32(%rsi, %rax)
|
|
movdqu %xmm6, 48(%rsi, %rax)
|
|
movdqu %xmm7, 64(%rsi, %rax)
|
|
movdqu %xmm8, 80(%rsi, %rax)
|
|
movdqu %xmm9, 96(%rsi, %rax)
|
|
movdqu %xmm10, 112(%rsi, %rax)
|
|
// addq $8*16, %rax
|
|
addq $128, %rax
|
|
cmpq %r11, %rax
|
|
jbe 2b
|
|
1: cmpq %rax, %r9
|
|
je 5f
|
|
|
|
movdqu 16(%rdi), %xmm3
|
|
movdqu 32(%rdi), %xmm4
|
|
movdqu 48(%rdi), %xmm5
|
|
movdqu 64(%rdi), %xmm6
|
|
movdqu 80(%rdi), %xmm7
|
|
movdqu 96(%rdi), %xmm8
|
|
movdqu 112(%rdi), %xmm9
|
|
movdqu 128(%rdi), %xmm10
|
|
movdqu 144(%rdi), %xmm11
|
|
movdqu 160(%rdi), %xmm12
|
|
movdqu 176(%rdi), %xmm13
|
|
|
|
4: movdqu (%r8, %rax), %xmm1
|
|
pxor %xmm14, %xmm1
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xcd /* aesdec %xmm13, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xcc /* aesdec %xmm12, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm1 */
|
|
movdqu %xmm1, (%rsi, %rax)
|
|
addq $16, %rax
|
|
cmpq %rax, %r9
|
|
jne 4b
|
|
|
|
5: xor %eax, %eax
|
|
ret
|
|
.size intel_aes_decrypt_ecb_192, .-intel_aes_decrypt_ecb_192
|
|
|
|
|
|
/* in %rdi : cx - context
|
|
in %rsi : output - pointer to output buffer
|
|
in %rdx : outputLen - pointer to variable for length of output
|
|
(filled by caller)
|
|
in %rcx : maxOutputLen - length of output buffer
|
|
in %r8 : input - pointer to input buffer
|
|
in %r9 : inputLen - length of input buffer
|
|
on stack: blocksize - AES blocksize (always 16, unused)
|
|
*/
|
|
.type intel_aes_encrypt_cbc_192,@function
|
|
.globl intel_aes_encrypt_cbc_192
|
|
.align 16
|
|
intel_aes_encrypt_cbc_192:
|
|
testq %r9, %r9
|
|
je 2f
|
|
|
|
// leaq IV_OFFSET(%rdi), %rdx
|
|
// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
|
|
leaq 16(%rdi), %rdx
|
|
leaq 48(%rdi), %rdi
|
|
|
|
movdqu (%rdx), %xmm0
|
|
movdqu (%rdi), %xmm2
|
|
movdqu 16(%rdi), %xmm3
|
|
movdqu 32(%rdi), %xmm4
|
|
movdqu 48(%rdi), %xmm5
|
|
movdqu 64(%rdi), %xmm6
|
|
movdqu 80(%rdi), %xmm7
|
|
movdqu 96(%rdi), %xmm8
|
|
movdqu 112(%rdi), %xmm9
|
|
movdqu 128(%rdi), %xmm10
|
|
movdqu 144(%rdi), %xmm11
|
|
movdqu 160(%rdi), %xmm12
|
|
movdqu 176(%rdi), %xmm13
|
|
movdqu 192(%rdi), %xmm14
|
|
|
|
xorl %eax, %eax
|
|
1: movdqu (%r8, %rax), %xmm1
|
|
pxor %xmm0, %xmm1
|
|
pxor %xmm2, %xmm1
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcc /* aesenc %xmm12, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcd /* aesenc %xmm13, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdd,0xce /* aesenclast %xmm14, %xmm1 */
|
|
movdqu %xmm1, (%rsi, %rax)
|
|
movdqa %xmm1, %xmm0
|
|
addq $16, %rax
|
|
cmpq %rax, %r9
|
|
jne 1b
|
|
|
|
movdqu %xmm0, (%rdx)
|
|
|
|
2: xor %eax, %eax
|
|
ret
|
|
.size intel_aes_encrypt_cbc_192, .-intel_aes_encrypt_cbc_192
|
|
|
|
|
|
/* in %rdi : cx - context
|
|
in %rsi : output - pointer to output buffer
|
|
in %rdx : outputLen - pointer to variable for length of output
|
|
(filled by caller)
|
|
in %rcx : maxOutputLen - length of output buffer
|
|
in %r8 : input - pointer to input buffer
|
|
in %r9 : inputLen - length of input buffer
|
|
on stack: blocksize - AES blocksize (always 16, unused)
|
|
*/
|
|
.type intel_aes_decrypt_cbc_192,@function
|
|
.globl intel_aes_decrypt_cbc_192
|
|
.align 16
|
|
intel_aes_decrypt_cbc_192:
|
|
// leaq IV_OFFSET(%rdi), %rdx
|
|
// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
|
|
leaq 16(%rdi), %rdx
|
|
leaq 48(%rdi), %rdi
|
|
|
|
movdqu (%rdx), %xmm0
|
|
movdqu (%rdi), %xmm2
|
|
movdqu 192(%rdi), %xmm14
|
|
xorl %eax, %eax
|
|
// cmpq $8*16, %r9
|
|
cmpq $128, %r9
|
|
jb 1f
|
|
// leaq -8*16(%r9), %r11
|
|
leaq -128(%r9), %r11
|
|
2: movdqu (%r8, %rax), %xmm3
|
|
movdqu 16(%r8, %rax), %xmm4
|
|
movdqu 32(%r8, %rax), %xmm5
|
|
movdqu 48(%r8, %rax), %xmm6
|
|
movdqu 64(%r8, %rax), %xmm7
|
|
movdqu 80(%r8, %rax), %xmm8
|
|
movdqu 96(%r8, %rax), %xmm9
|
|
movdqu 112(%r8, %rax), %xmm10
|
|
pxor %xmm14, %xmm3
|
|
pxor %xmm14, %xmm4
|
|
pxor %xmm14, %xmm5
|
|
pxor %xmm14, %xmm6
|
|
pxor %xmm14, %xmm7
|
|
pxor %xmm14, %xmm8
|
|
pxor %xmm14, %xmm9
|
|
pxor %xmm14, %xmm10
|
|
movq $176, %r10
|
|
3: movdqu (%rdi, %r10), %xmm1
|
|
.byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
|
|
subq $16, %r10
|
|
jne 3b
|
|
.byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */
|
|
pxor %xmm0, %xmm3
|
|
pxor (%r8, %rax), %xmm4
|
|
pxor 16(%r8, %rax), %xmm5
|
|
pxor 32(%r8, %rax), %xmm6
|
|
pxor 48(%r8, %rax), %xmm7
|
|
pxor 64(%r8, %rax), %xmm8
|
|
pxor 80(%r8, %rax), %xmm9
|
|
pxor 96(%r8, %rax), %xmm10
|
|
movdqu 112(%r8, %rax), %xmm0
|
|
movdqu %xmm3, (%rsi, %rax)
|
|
movdqu %xmm4, 16(%rsi, %rax)
|
|
movdqu %xmm5, 32(%rsi, %rax)
|
|
movdqu %xmm6, 48(%rsi, %rax)
|
|
movdqu %xmm7, 64(%rsi, %rax)
|
|
movdqu %xmm8, 80(%rsi, %rax)
|
|
movdqu %xmm9, 96(%rsi, %rax)
|
|
movdqu %xmm10, 112(%rsi, %rax)
|
|
// addq $8*16, %rax
|
|
addq $128, %rax
|
|
cmpq %r11, %rax
|
|
jbe 2b
|
|
1: cmpq %rax, %r9
|
|
je 5f
|
|
|
|
movdqu 16(%rdi), %xmm3
|
|
movdqu 32(%rdi), %xmm4
|
|
movdqu 48(%rdi), %xmm5
|
|
movdqu 64(%rdi), %xmm6
|
|
movdqu 80(%rdi), %xmm7
|
|
movdqu 96(%rdi), %xmm8
|
|
movdqu 112(%rdi), %xmm9
|
|
movdqu 128(%rdi), %xmm10
|
|
movdqu 144(%rdi), %xmm11
|
|
movdqu 160(%rdi), %xmm12
|
|
movdqu 176(%rdi), %xmm13
|
|
|
|
4: movdqu (%r8, %rax), %xmm1
|
|
movdqa %xmm1, %xmm15
|
|
pxor %xmm14, %xmm1
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xcd /* aesdec %xmm13, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xcc /* aesdec %xmm12, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm1 */
|
|
pxor %xmm0, %xmm1
|
|
movdqu %xmm1, (%rsi, %rax)
|
|
movdqa %xmm15, %xmm0
|
|
addq $16, %rax
|
|
cmpq %rax, %r9
|
|
jne 4b
|
|
|
|
5: movdqu %xmm0, (%rdx)
|
|
|
|
xor %eax, %eax
|
|
ret
|
|
.size intel_aes_decrypt_cbc_192, .-intel_aes_decrypt_cbc_192
|
|
|
|
|
|
/* in %rdi : the key
|
|
in %rsi : buffer for expanded key
|
|
*/
|
|
.type intel_aes_encrypt_init_256,@function
|
|
.globl intel_aes_encrypt_init_256
|
|
.align 16
|
|
intel_aes_encrypt_init_256:
|
|
movdqu (%rdi), %xmm1
|
|
movdqu 16(%rdi), %xmm3
|
|
movdqu %xmm1, (%rsi)
|
|
movdqu %xmm3, 16(%rsi)
|
|
leaq 32(%rsi), %rsi
|
|
xor %eax, %eax
|
|
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 /* aeskeygenassist $0x01, %xmm3, %xmm2 */
|
|
call key_expansion256
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 /* aeskeygenassist $0x02, %xmm3, %xmm2 */
|
|
call key_expansion256
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 /* aeskeygenassist $0x04, %xmm3, %xmm2 */
|
|
call key_expansion256
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 /* aeskeygenassist $0x08, %xmm3, %xmm2 */
|
|
call key_expansion256
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 /* aeskeygenassist $0x10, %xmm3, %xmm2 */
|
|
call key_expansion256
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 /* aeskeygenassist $0x20, %xmm3, %xmm2 */
|
|
call key_expansion256
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 /* aeskeygenassist $0x40, %xmm3, %xmm2 */
|
|
pxor %xmm6, %xmm6
|
|
pshufd $0xff, %xmm2, %xmm2
|
|
shufps $0x10, %xmm1, %xmm6
|
|
pxor %xmm6, %xmm1
|
|
shufps $0x8c, %xmm1, %xmm6
|
|
pxor %xmm2, %xmm1
|
|
pxor %xmm6, %xmm1
|
|
movdqu %xmm1, (%rsi)
|
|
|
|
ret
|
|
.size intel_aes_encrypt_init_256, .-intel_aes_encrypt_init_256
|
|
|
|
|
|
/* in %rdi : the key
|
|
in %rsi : buffer for expanded key
|
|
*/
|
|
.type intel_aes_decrypt_init_256,@function
|
|
.globl intel_aes_decrypt_init_256
|
|
.align 16
|
|
intel_aes_decrypt_init_256:
|
|
movdqu (%rdi), %xmm1
|
|
movdqu 16(%rdi), %xmm3
|
|
movdqu %xmm1, (%rsi)
|
|
.byte 0x66,0x0f,0x38,0xdb,0xe3 /* aesimc %xmm3, %xmm4 */
|
|
movdqu %xmm4, 16(%rsi)
|
|
leaq 32(%rsi), %rsi
|
|
xor %eax, %eax
|
|
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 /* aeskeygenassist $0x01, %xmm3, %xmm2 */
|
|
call key_expansion256
|
|
.byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */
|
|
.byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */
|
|
movdqu %xmm4, -32(%rsi)
|
|
movdqu %xmm5, -16(%rsi)
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 /* aeskeygenassist $0x02, %xmm3, %xmm2 */
|
|
call key_expansion256
|
|
.byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */
|
|
.byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */
|
|
movdqu %xmm4, -32(%rsi)
|
|
movdqu %xmm5, -16(%rsi)
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 /* aeskeygenassist $0x04, %xmm3, %xmm2 */
|
|
call key_expansion256
|
|
.byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */
|
|
.byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */
|
|
movdqu %xmm4, -32(%rsi)
|
|
movdqu %xmm5, -16(%rsi)
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 /* aeskeygenassist $0x08, %xmm3, %xmm2 */
|
|
call key_expansion256
|
|
.byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */
|
|
.byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */
|
|
movdqu %xmm4, -32(%rsi)
|
|
movdqu %xmm5, -16(%rsi)
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 /* aeskeygenassist $0x10, %xmm3, %xmm2 */
|
|
call key_expansion256
|
|
.byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */
|
|
.byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */
|
|
movdqu %xmm4, -32(%rsi)
|
|
movdqu %xmm5, -16(%rsi)
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 /* aeskeygenassist $0x20, %xmm3, %xmm2 */
|
|
call key_expansion256
|
|
.byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */
|
|
.byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */
|
|
movdqu %xmm4, -32(%rsi)
|
|
movdqu %xmm5, -16(%rsi)
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 /* aeskeygenassist $0x40, %xmm3, %xmm2 */
|
|
pxor %xmm6, %xmm6
|
|
pshufd $0xff, %xmm2, %xmm2
|
|
shufps $0x10, %xmm1, %xmm6
|
|
pxor %xmm6, %xmm1
|
|
shufps $0x8c, %xmm1, %xmm6
|
|
pxor %xmm2, %xmm1
|
|
pxor %xmm6, %xmm1
|
|
movdqu %xmm1, (%rsi)
|
|
|
|
ret
|
|
.size intel_aes_decrypt_init_256, .-intel_aes_decrypt_init_256
|
|
|
|
|
|
.type key_expansion256,@function
|
|
.align 16
|
|
key_expansion256:
|
|
movd %eax, %xmm6
|
|
pshufd $0xff, %xmm2, %xmm2
|
|
shufps $0x10, %xmm1, %xmm6
|
|
pxor %xmm6, %xmm1
|
|
shufps $0x8c, %xmm1, %xmm6
|
|
pxor %xmm2, %xmm1
|
|
pxor %xmm6, %xmm1
|
|
movdqu %xmm1, (%rsi)
|
|
|
|
addq $16, %rsi
|
|
.byte 0x66,0x0f,0x3a,0xdf,0xe1,0x00 /* aeskeygenassist $0, %xmm1, %xmm4 */
|
|
pshufd $0xaa, %xmm4, %xmm4
|
|
shufps $0x10, %xmm3, %xmm6
|
|
pxor %xmm6, %xmm3
|
|
shufps $0x8c, %xmm3, %xmm6
|
|
pxor %xmm4, %xmm3
|
|
pxor %xmm6, %xmm3
|
|
movdqu %xmm3, (%rsi)
|
|
addq $16, %rsi
|
|
ret
|
|
.size key_expansion256, .-key_expansion256
|
|
|
|
|
|
/* in %rdi : cx - context
|
|
in %rsi : output - pointer to output buffer
|
|
in %rdx : outputLen - pointer to variable for length of output
|
|
(filled by caller)
|
|
in %rcx : maxOutputLen - length of output buffer
|
|
in %r8 : input - pointer to input buffer
|
|
in %r9 : inputLen - length of input buffer
|
|
on stack: blocksize - AES blocksize (always 16, unused)
|
|
*/
|
|
.type intel_aes_encrypt_ecb_256,@function
|
|
.globl intel_aes_encrypt_ecb_256
|
|
.align 16
|
|
intel_aes_encrypt_ecb_256:
|
|
// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
|
|
leaq 48(%rdi), %rdi
|
|
|
|
movdqu (%rdi), %xmm2
|
|
movdqu 224(%rdi), %xmm15
|
|
xorl %eax, %eax
|
|
// cmpq $8*16, %r9
|
|
cmpq $128, %r9
|
|
jb 1f
|
|
// leaq -8*16(%r9), %r11
|
|
leaq -128(%r9), %r11
|
|
2: movdqu (%r8, %rax), %xmm3
|
|
movdqu 16(%r8, %rax), %xmm4
|
|
movdqu 32(%r8, %rax), %xmm5
|
|
movdqu 48(%r8, %rax), %xmm6
|
|
movdqu 64(%r8, %rax), %xmm7
|
|
movdqu 80(%r8, %rax), %xmm8
|
|
movdqu 96(%r8, %rax), %xmm9
|
|
movdqu 112(%r8, %rax), %xmm10
|
|
pxor %xmm2, %xmm3
|
|
pxor %xmm2, %xmm4
|
|
pxor %xmm2, %xmm5
|
|
pxor %xmm2, %xmm6
|
|
pxor %xmm2, %xmm7
|
|
pxor %xmm2, %xmm8
|
|
pxor %xmm2, %xmm9
|
|
pxor %xmm2, %xmm10
|
|
movq $16, %r10
|
|
3: movdqu (%rdi, %r10), %xmm1
|
|
.byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
|
|
addq $16, %r10
|
|
cmpq $224, %r10
|
|
jne 3b
|
|
.byte 0x66,0x41,0x0f,0x38,0xdd,0xdf /* aesenclast %xmm15, %xmm3 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdd,0xe7 /* aesenclast %xmm15, %xmm4 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdd,0xef /* aesenclast %xmm15, %xmm5 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdd,0xf7 /* aesenclast %xmm15, %xmm6 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdd,0xff /* aesenclast %xmm15, %xmm7 */
|
|
.byte 0x66,0x45,0x0f,0x38,0xdd,0xc7 /* aesenclast %xmm15, %xmm8 */
|
|
.byte 0x66,0x45,0x0f,0x38,0xdd,0xcf /* aesenclast %xmm15, %xmm9 */
|
|
.byte 0x66,0x45,0x0f,0x38,0xdd,0xd7 /* aesenclast %xmm15, %xmm10 */
|
|
movdqu %xmm3, (%rsi, %rax)
|
|
movdqu %xmm4, 16(%rsi, %rax)
|
|
movdqu %xmm5, 32(%rsi, %rax)
|
|
movdqu %xmm6, 48(%rsi, %rax)
|
|
movdqu %xmm7, 64(%rsi, %rax)
|
|
movdqu %xmm8, 80(%rsi, %rax)
|
|
movdqu %xmm9, 96(%rsi, %rax)
|
|
movdqu %xmm10, 112(%rsi, %rax)
|
|
// addq $8*16, %rax
|
|
addq $128, %rax
|
|
cmpq %r11, %rax
|
|
jbe 2b
|
|
1: cmpq %rax, %r9
|
|
je 5f
|
|
|
|
movdqu 16(%rdi), %xmm2
|
|
movdqu 32(%rdi), %xmm3
|
|
movdqu 48(%rdi), %xmm4
|
|
movdqu 64(%rdi), %xmm5
|
|
movdqu 80(%rdi), %xmm6
|
|
movdqu 96(%rdi), %xmm7
|
|
movdqu 112(%rdi), %xmm8
|
|
movdqu 128(%rdi), %xmm9
|
|
movdqu 144(%rdi), %xmm10
|
|
movdqu 160(%rdi), %xmm11
|
|
movdqu 176(%rdi), %xmm12
|
|
movdqu 192(%rdi), %xmm13
|
|
movdqu 208(%rdi), %xmm14
|
|
|
|
4: movdqu (%r8, %rax), %xmm1
|
|
pxor (%rdi), %xmm1
|
|
.byte 0x66,0x0f,0x38,0xdc,0xca /* aesenc %xmm2, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcc /* aesenc %xmm12, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcd /* aesenc %xmm13, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xce /* aesenc %xmm14, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdd,0xcf /* aesenclast %xmm15, %xmm1 */
|
|
movdqu %xmm1, (%rsi, %rax)
|
|
addq $16, %rax
|
|
cmpq %rax, %r9
|
|
jne 4b
|
|
|
|
5: xor %eax, %eax
|
|
ret
|
|
.size intel_aes_encrypt_ecb_256, .-intel_aes_encrypt_ecb_256
|
|
|
|
|
|
/* in %rdi : cx - context
|
|
in %rsi : output - pointer to output buffer
|
|
in %rdx : outputLen - pointer to variable for length of output
|
|
(filled by caller)
|
|
in %rcx : maxOutputLen - length of output buffer
|
|
in %r8 : input - pointer to input buffer
|
|
in %r9 : inputLen - length of input buffer
|
|
on stack: blocksize - AES blocksize (always 16, unused)
|
|
*/
|
|
.type intel_aes_decrypt_ecb_256,@function
|
|
.globl intel_aes_decrypt_ecb_256
|
|
.align 16
|
|
intel_aes_decrypt_ecb_256:
|
|
// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
|
|
leaq 48(%rdi), %rdi
|
|
|
|
movdqu (%rdi), %xmm2
|
|
movdqu 224(%rdi), %xmm15
|
|
xorl %eax, %eax
|
|
// cmpq $8*16, %r9
|
|
cmpq $128, %r9
|
|
jb 1f
|
|
// leaq -8*16(%r9), %r11
|
|
leaq -128(%r9), %r11
|
|
2: movdqu (%r8, %rax), %xmm3
|
|
movdqu 16(%r8, %rax), %xmm4
|
|
movdqu 32(%r8, %rax), %xmm5
|
|
movdqu 48(%r8, %rax), %xmm6
|
|
movdqu 64(%r8, %rax), %xmm7
|
|
movdqu 80(%r8, %rax), %xmm8
|
|
movdqu 96(%r8, %rax), %xmm9
|
|
movdqu 112(%r8, %rax), %xmm10
|
|
pxor %xmm15, %xmm3
|
|
pxor %xmm15, %xmm4
|
|
pxor %xmm15, %xmm5
|
|
pxor %xmm15, %xmm6
|
|
pxor %xmm15, %xmm7
|
|
pxor %xmm15, %xmm8
|
|
pxor %xmm15, %xmm9
|
|
pxor %xmm15, %xmm10
|
|
movq $208, %r10
|
|
3: movdqu (%rdi, %r10), %xmm1
|
|
.byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
|
|
subq $16, %r10
|
|
jne 3b
|
|
.byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */
|
|
movdqu %xmm3, (%rsi, %rax)
|
|
movdqu %xmm4, 16(%rsi, %rax)
|
|
movdqu %xmm5, 32(%rsi, %rax)
|
|
movdqu %xmm6, 48(%rsi, %rax)
|
|
movdqu %xmm7, 64(%rsi, %rax)
|
|
movdqu %xmm8, 80(%rsi, %rax)
|
|
movdqu %xmm9, 96(%rsi, %rax)
|
|
movdqu %xmm10, 112(%rsi, %rax)
|
|
// addq $8*16, %rax
|
|
addq $128, %rax
|
|
cmpq %r11, %rax
|
|
jbe 2b
|
|
1: cmpq %rax, %r9
|
|
je 5f
|
|
|
|
movdqu 16(%rdi), %xmm2
|
|
movdqu 32(%rdi), %xmm3
|
|
movdqu 48(%rdi), %xmm4
|
|
movdqu 64(%rdi), %xmm5
|
|
movdqu 80(%rdi), %xmm6
|
|
movdqu 96(%rdi), %xmm7
|
|
movdqu 112(%rdi), %xmm8
|
|
movdqu 128(%rdi), %xmm9
|
|
movdqu 144(%rdi), %xmm10
|
|
movdqu 160(%rdi), %xmm11
|
|
movdqu 176(%rdi), %xmm12
|
|
movdqu 192(%rdi), %xmm13
|
|
movdqu 208(%rdi), %xmm14
|
|
|
|
4: movdqu (%r8, %rax), %xmm1
|
|
pxor %xmm15, %xmm1
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xce /* aesdec %xmm14, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xcd /* aesdec %xmm13, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xcc /* aesdec %xmm12, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xca /* aesdec %xmm2, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0x0f /* aesdeclast (%rdi), %xmm1 */
|
|
movdqu %xmm1, (%rsi, %rax)
|
|
addq $16, %rax
|
|
cmpq %rax, %r9
|
|
jne 4b
|
|
|
|
5: xor %eax, %eax
|
|
ret
|
|
.size intel_aes_decrypt_ecb_256, .-intel_aes_decrypt_ecb_256
|
|
|
|
|
|
/* in %rdi : cx - context
|
|
in %rsi : output - pointer to output buffer
|
|
in %rdx : outputLen - pointer to variable for length of output
|
|
(filled by caller)
|
|
in %rcx : maxOutputLen - length of output buffer
|
|
in %r8 : input - pointer to input buffer
|
|
in %r9 : inputLen - length of input buffer
|
|
on stack: blocksize - AES blocksize (always 16, unused)
|
|
*/
|
|
.type intel_aes_encrypt_cbc_256,@function
|
|
.globl intel_aes_encrypt_cbc_256
|
|
.align 16
|
|
intel_aes_encrypt_cbc_256:
|
|
testq %r9, %r9
|
|
je 2f
|
|
|
|
// leaq IV_OFFSET(%rdi), %rdx
|
|
// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
|
|
leaq 16(%rdi), %rdx
|
|
leaq 48(%rdi), %rdi
|
|
|
|
movdqu (%rdx), %xmm0
|
|
movdqu 16(%rdi), %xmm2
|
|
movdqu 32(%rdi), %xmm3
|
|
movdqu 48(%rdi), %xmm4
|
|
movdqu 64(%rdi), %xmm5
|
|
movdqu 80(%rdi), %xmm6
|
|
movdqu 96(%rdi), %xmm7
|
|
movdqu 112(%rdi), %xmm8
|
|
movdqu 128(%rdi), %xmm9
|
|
movdqu 144(%rdi), %xmm10
|
|
movdqu 160(%rdi), %xmm11
|
|
movdqu 176(%rdi), %xmm12
|
|
movdqu 192(%rdi), %xmm13
|
|
movdqu 208(%rdi), %xmm14
|
|
movdqu 224(%rdi), %xmm15
|
|
|
|
xorl %eax, %eax
|
|
1: movdqu (%r8, %rax), %xmm1
|
|
pxor %xmm0, %xmm1
|
|
pxor (%rdi), %xmm1
|
|
.byte 0x66,0x0f,0x38,0xdc,0xca /* aesenc %xmm2, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcc /* aesenc %xmm12, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcd /* aesenc %xmm13, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdc,0xce /* aesenc %xmm14, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xdd,0xcf /* aesenclast %xmm15, %xmm1 */
|
|
movdqu %xmm1, (%rsi, %rax)
|
|
movdqa %xmm1, %xmm0
|
|
addq $16, %rax
|
|
cmpq %rax, %r9
|
|
jne 1b
|
|
|
|
movdqu %xmm0, (%rdx)
|
|
|
|
2: xor %eax, %eax
|
|
ret
|
|
.size intel_aes_encrypt_cbc_256, .-intel_aes_encrypt_cbc_256
|
|
|
|
|
|
/* in %rdi : cx - context
|
|
in %rsi : output - pointer to output buffer
|
|
in %rdx : outputLen - pointer to variable for length of output
|
|
(filled by caller)
|
|
in %rcx : maxOutputLen - length of output buffer
|
|
in %r8 : input - pointer to input buffer
|
|
in %r9 : inputLen - length of input buffer
|
|
on stack: blocksize - AES blocksize (always 16, unused)
|
|
*/
|
|
.type intel_aes_decrypt_cbc_256,@function
|
|
.globl intel_aes_decrypt_cbc_256
|
|
.align 16
|
|
intel_aes_decrypt_cbc_256:
|
|
// leaq IV_OFFSET(%rdi), %rdx
|
|
// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
|
|
leaq 16(%rdi), %rdx
|
|
leaq 48(%rdi), %rdi
|
|
|
|
movdqu (%rdx), %xmm0
|
|
movdqu (%rdi), %xmm2
|
|
movdqu 224(%rdi), %xmm15
|
|
xorl %eax, %eax
|
|
// cmpq $8*16, %r9
|
|
cmpq $128, %r9
|
|
jb 1f
|
|
// leaq -8*16(%r9), %r11
|
|
leaq -128(%r9), %r11
|
|
2: movdqu (%r8, %rax), %xmm3
|
|
movdqu 16(%r8, %rax), %xmm4
|
|
movdqu 32(%r8, %rax), %xmm5
|
|
movdqu 48(%r8, %rax), %xmm6
|
|
movdqu 64(%r8, %rax), %xmm7
|
|
movdqu 80(%r8, %rax), %xmm8
|
|
movdqu 96(%r8, %rax), %xmm9
|
|
movdqu 112(%r8, %rax), %xmm10
|
|
pxor %xmm15, %xmm3
|
|
pxor %xmm15, %xmm4
|
|
pxor %xmm15, %xmm5
|
|
pxor %xmm15, %xmm6
|
|
pxor %xmm15, %xmm7
|
|
pxor %xmm15, %xmm8
|
|
pxor %xmm15, %xmm9
|
|
pxor %xmm15, %xmm10
|
|
movq $208, %r10
|
|
3: movdqu (%rdi, %r10), %xmm1
|
|
.byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
|
|
subq $16, %r10
|
|
jne 3b
|
|
.byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */
|
|
.byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */
|
|
pxor %xmm0, %xmm3
|
|
pxor (%r8, %rax), %xmm4
|
|
pxor 16(%r8, %rax), %xmm5
|
|
pxor 32(%r8, %rax), %xmm6
|
|
pxor 48(%r8, %rax), %xmm7
|
|
pxor 64(%r8, %rax), %xmm8
|
|
pxor 80(%r8, %rax), %xmm9
|
|
pxor 96(%r8, %rax), %xmm10
|
|
movdqu 112(%r8, %rax), %xmm0
|
|
movdqu %xmm3, (%rsi, %rax)
|
|
movdqu %xmm4, 16(%rsi, %rax)
|
|
movdqu %xmm5, 32(%rsi, %rax)
|
|
movdqu %xmm6, 48(%rsi, %rax)
|
|
movdqu %xmm7, 64(%rsi, %rax)
|
|
movdqu %xmm8, 80(%rsi, %rax)
|
|
movdqu %xmm9, 96(%rsi, %rax)
|
|
movdqu %xmm10, 112(%rsi, %rax)
|
|
// addq $8*16, %rax
|
|
addq $128, %rax
|
|
cmpq %r11, %rax
|
|
jbe 2b
|
|
1: cmpq %rax, %r9
|
|
je 5f
|
|
|
|
movdqu 16(%rdi), %xmm2
|
|
movdqu 32(%rdi), %xmm3
|
|
movdqu 48(%rdi), %xmm4
|
|
movdqu 64(%rdi), %xmm5
|
|
movdqu 80(%rdi), %xmm6
|
|
movdqu 96(%rdi), %xmm7
|
|
movdqu 112(%rdi), %xmm8
|
|
movdqu 128(%rdi), %xmm9
|
|
movdqu 144(%rdi), %xmm10
|
|
movdqu 160(%rdi), %xmm11
|
|
movdqu 176(%rdi), %xmm12
|
|
movdqu 192(%rdi), %xmm13
|
|
movdqu 208(%rdi), %xmm14
|
|
|
|
4: movdqu (%r8, %rax), %xmm1
|
|
pxor %xmm15, %xmm1
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xce /* aesdec %xmm14, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xcd /* aesdec %xmm13, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xcc /* aesdec %xmm12, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */
|
|
.byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xde,0xca /* aesdec %xmm2, %xmm1 */
|
|
.byte 0x66,0x0f,0x38,0xdf,0x0f /* aesdeclast (%rdi), %xmm1 */
|
|
pxor %xmm0, %xmm1
|
|
movdqu %xmm1, (%rsi, %rax)
|
|
movdqu (%r8, %rax), %xmm0
|
|
addq $16, %rax
|
|
cmpq %rax, %r9
|
|
jne 4b
|
|
|
|
5: movdqu %xmm0, (%rdx)
|
|
|
|
xor %eax, %eax
|
|
ret
|
|
.size intel_aes_decrypt_cbc_256, .-intel_aes_decrypt_cbc_256
|