881 строка
16 KiB
ArmAsm
881 строка
16 KiB
ArmAsm
// Code generated by command: go run salsa20_amd64_asm.go -out ../salsa20_amd64.s -pkg salsa. DO NOT EDIT.
|
|
|
|
//go:build amd64 && !purego && gc
|
|
|
|
// func salsa2020XORKeyStream(out *byte, in *byte, n uint64, nonce *byte, key *byte)
|
|
// Requires: SSE2
|
|
TEXT ·salsa2020XORKeyStream(SB), $456-40
|
|
// This needs up to 64 bytes at 360(R12); hence the non-obvious frame size.
|
|
MOVQ out+0(FP), DI
|
|
MOVQ in+8(FP), SI
|
|
MOVQ n+16(FP), DX
|
|
MOVQ nonce+24(FP), CX
|
|
MOVQ key+32(FP), R8
|
|
MOVQ SP, R12
|
|
ADDQ $0x1f, R12
|
|
ANDQ $-32, R12
|
|
MOVQ DX, R9
|
|
MOVQ CX, DX
|
|
MOVQ R8, R10
|
|
CMPQ R9, $0x00
|
|
JBE DONE
|
|
MOVL 20(R10), CX
|
|
MOVL (R10), R8
|
|
MOVL (DX), AX
|
|
MOVL 16(R10), R11
|
|
MOVL CX, (R12)
|
|
MOVL R8, 4(R12)
|
|
MOVL AX, 8(R12)
|
|
MOVL R11, 12(R12)
|
|
MOVL 8(DX), CX
|
|
MOVL 24(R10), R8
|
|
MOVL 4(R10), AX
|
|
MOVL 4(DX), R11
|
|
MOVL CX, 16(R12)
|
|
MOVL R8, 20(R12)
|
|
MOVL AX, 24(R12)
|
|
MOVL R11, 28(R12)
|
|
MOVL 12(DX), CX
|
|
MOVL 12(R10), DX
|
|
MOVL 28(R10), R8
|
|
MOVL 8(R10), AX
|
|
MOVL DX, 32(R12)
|
|
MOVL CX, 36(R12)
|
|
MOVL R8, 40(R12)
|
|
MOVL AX, 44(R12)
|
|
MOVQ $0x61707865, DX
|
|
MOVQ $0x3320646e, CX
|
|
MOVQ $0x79622d32, R8
|
|
MOVQ $0x6b206574, AX
|
|
MOVL DX, 48(R12)
|
|
MOVL CX, 52(R12)
|
|
MOVL R8, 56(R12)
|
|
MOVL AX, 60(R12)
|
|
CMPQ R9, $0x00000100
|
|
JB BYTESBETWEEN1AND255
|
|
MOVOA 48(R12), X0
|
|
PSHUFL $0x55, X0, X1
|
|
PSHUFL $0xaa, X0, X2
|
|
PSHUFL $0xff, X0, X3
|
|
PSHUFL $0x00, X0, X0
|
|
MOVOA X1, 64(R12)
|
|
MOVOA X2, 80(R12)
|
|
MOVOA X3, 96(R12)
|
|
MOVOA X0, 112(R12)
|
|
MOVOA (R12), X0
|
|
PSHUFL $0xaa, X0, X1
|
|
PSHUFL $0xff, X0, X2
|
|
PSHUFL $0x00, X0, X3
|
|
PSHUFL $0x55, X0, X0
|
|
MOVOA X1, 128(R12)
|
|
MOVOA X2, 144(R12)
|
|
MOVOA X3, 160(R12)
|
|
MOVOA X0, 176(R12)
|
|
MOVOA 16(R12), X0
|
|
PSHUFL $0xff, X0, X1
|
|
PSHUFL $0x55, X0, X2
|
|
PSHUFL $0xaa, X0, X0
|
|
MOVOA X1, 192(R12)
|
|
MOVOA X2, 208(R12)
|
|
MOVOA X0, 224(R12)
|
|
MOVOA 32(R12), X0
|
|
PSHUFL $0x00, X0, X1
|
|
PSHUFL $0xaa, X0, X2
|
|
PSHUFL $0xff, X0, X0
|
|
MOVOA X1, 240(R12)
|
|
MOVOA X2, 256(R12)
|
|
MOVOA X0, 272(R12)
|
|
|
|
BYTESATLEAST256:
|
|
MOVL 16(R12), DX
|
|
MOVL 36(R12), CX
|
|
MOVL DX, 288(R12)
|
|
MOVL CX, 304(R12)
|
|
SHLQ $0x20, CX
|
|
ADDQ CX, DX
|
|
ADDQ $0x01, DX
|
|
MOVQ DX, CX
|
|
SHRQ $0x20, CX
|
|
MOVL DX, 292(R12)
|
|
MOVL CX, 308(R12)
|
|
ADDQ $0x01, DX
|
|
MOVQ DX, CX
|
|
SHRQ $0x20, CX
|
|
MOVL DX, 296(R12)
|
|
MOVL CX, 312(R12)
|
|
ADDQ $0x01, DX
|
|
MOVQ DX, CX
|
|
SHRQ $0x20, CX
|
|
MOVL DX, 300(R12)
|
|
MOVL CX, 316(R12)
|
|
ADDQ $0x01, DX
|
|
MOVQ DX, CX
|
|
SHRQ $0x20, CX
|
|
MOVL DX, 16(R12)
|
|
MOVL CX, 36(R12)
|
|
MOVQ R9, 352(R12)
|
|
MOVQ $0x00000014, DX
|
|
MOVOA 64(R12), X0
|
|
MOVOA 80(R12), X1
|
|
MOVOA 96(R12), X2
|
|
MOVOA 256(R12), X3
|
|
MOVOA 272(R12), X4
|
|
MOVOA 128(R12), X5
|
|
MOVOA 144(R12), X6
|
|
MOVOA 176(R12), X7
|
|
MOVOA 192(R12), X8
|
|
MOVOA 208(R12), X9
|
|
MOVOA 224(R12), X10
|
|
MOVOA 304(R12), X11
|
|
MOVOA 112(R12), X12
|
|
MOVOA 160(R12), X13
|
|
MOVOA 240(R12), X14
|
|
MOVOA 288(R12), X15
|
|
|
|
MAINLOOP1:
|
|
MOVOA X1, 320(R12)
|
|
MOVOA X2, 336(R12)
|
|
MOVOA X13, X1
|
|
PADDL X12, X1
|
|
MOVOA X1, X2
|
|
PSLLL $0x07, X1
|
|
PXOR X1, X14
|
|
PSRLL $0x19, X2
|
|
PXOR X2, X14
|
|
MOVOA X7, X1
|
|
PADDL X0, X1
|
|
MOVOA X1, X2
|
|
PSLLL $0x07, X1
|
|
PXOR X1, X11
|
|
PSRLL $0x19, X2
|
|
PXOR X2, X11
|
|
MOVOA X12, X1
|
|
PADDL X14, X1
|
|
MOVOA X1, X2
|
|
PSLLL $0x09, X1
|
|
PXOR X1, X15
|
|
PSRLL $0x17, X2
|
|
PXOR X2, X15
|
|
MOVOA X0, X1
|
|
PADDL X11, X1
|
|
MOVOA X1, X2
|
|
PSLLL $0x09, X1
|
|
PXOR X1, X9
|
|
PSRLL $0x17, X2
|
|
PXOR X2, X9
|
|
MOVOA X14, X1
|
|
PADDL X15, X1
|
|
MOVOA X1, X2
|
|
PSLLL $0x0d, X1
|
|
PXOR X1, X13
|
|
PSRLL $0x13, X2
|
|
PXOR X2, X13
|
|
MOVOA X11, X1
|
|
PADDL X9, X1
|
|
MOVOA X1, X2
|
|
PSLLL $0x0d, X1
|
|
PXOR X1, X7
|
|
PSRLL $0x13, X2
|
|
PXOR X2, X7
|
|
MOVOA X15, X1
|
|
PADDL X13, X1
|
|
MOVOA X1, X2
|
|
PSLLL $0x12, X1
|
|
PXOR X1, X12
|
|
PSRLL $0x0e, X2
|
|
PXOR X2, X12
|
|
MOVOA 320(R12), X1
|
|
MOVOA X12, 320(R12)
|
|
MOVOA X9, X2
|
|
PADDL X7, X2
|
|
MOVOA X2, X12
|
|
PSLLL $0x12, X2
|
|
PXOR X2, X0
|
|
PSRLL $0x0e, X12
|
|
PXOR X12, X0
|
|
MOVOA X5, X2
|
|
PADDL X1, X2
|
|
MOVOA X2, X12
|
|
PSLLL $0x07, X2
|
|
PXOR X2, X3
|
|
PSRLL $0x19, X12
|
|
PXOR X12, X3
|
|
MOVOA 336(R12), X2
|
|
MOVOA X0, 336(R12)
|
|
MOVOA X6, X0
|
|
PADDL X2, X0
|
|
MOVOA X0, X12
|
|
PSLLL $0x07, X0
|
|
PXOR X0, X4
|
|
PSRLL $0x19, X12
|
|
PXOR X12, X4
|
|
MOVOA X1, X0
|
|
PADDL X3, X0
|
|
MOVOA X0, X12
|
|
PSLLL $0x09, X0
|
|
PXOR X0, X10
|
|
PSRLL $0x17, X12
|
|
PXOR X12, X10
|
|
MOVOA X2, X0
|
|
PADDL X4, X0
|
|
MOVOA X0, X12
|
|
PSLLL $0x09, X0
|
|
PXOR X0, X8
|
|
PSRLL $0x17, X12
|
|
PXOR X12, X8
|
|
MOVOA X3, X0
|
|
PADDL X10, X0
|
|
MOVOA X0, X12
|
|
PSLLL $0x0d, X0
|
|
PXOR X0, X5
|
|
PSRLL $0x13, X12
|
|
PXOR X12, X5
|
|
MOVOA X4, X0
|
|
PADDL X8, X0
|
|
MOVOA X0, X12
|
|
PSLLL $0x0d, X0
|
|
PXOR X0, X6
|
|
PSRLL $0x13, X12
|
|
PXOR X12, X6
|
|
MOVOA X10, X0
|
|
PADDL X5, X0
|
|
MOVOA X0, X12
|
|
PSLLL $0x12, X0
|
|
PXOR X0, X1
|
|
PSRLL $0x0e, X12
|
|
PXOR X12, X1
|
|
MOVOA 320(R12), X0
|
|
MOVOA X1, 320(R12)
|
|
MOVOA X4, X1
|
|
PADDL X0, X1
|
|
MOVOA X1, X12
|
|
PSLLL $0x07, X1
|
|
PXOR X1, X7
|
|
PSRLL $0x19, X12
|
|
PXOR X12, X7
|
|
MOVOA X8, X1
|
|
PADDL X6, X1
|
|
MOVOA X1, X12
|
|
PSLLL $0x12, X1
|
|
PXOR X1, X2
|
|
PSRLL $0x0e, X12
|
|
PXOR X12, X2
|
|
MOVOA 336(R12), X12
|
|
MOVOA X2, 336(R12)
|
|
MOVOA X14, X1
|
|
PADDL X12, X1
|
|
MOVOA X1, X2
|
|
PSLLL $0x07, X1
|
|
PXOR X1, X5
|
|
PSRLL $0x19, X2
|
|
PXOR X2, X5
|
|
MOVOA X0, X1
|
|
PADDL X7, X1
|
|
MOVOA X1, X2
|
|
PSLLL $0x09, X1
|
|
PXOR X1, X10
|
|
PSRLL $0x17, X2
|
|
PXOR X2, X10
|
|
MOVOA X12, X1
|
|
PADDL X5, X1
|
|
MOVOA X1, X2
|
|
PSLLL $0x09, X1
|
|
PXOR X1, X8
|
|
PSRLL $0x17, X2
|
|
PXOR X2, X8
|
|
MOVOA X7, X1
|
|
PADDL X10, X1
|
|
MOVOA X1, X2
|
|
PSLLL $0x0d, X1
|
|
PXOR X1, X4
|
|
PSRLL $0x13, X2
|
|
PXOR X2, X4
|
|
MOVOA X5, X1
|
|
PADDL X8, X1
|
|
MOVOA X1, X2
|
|
PSLLL $0x0d, X1
|
|
PXOR X1, X14
|
|
PSRLL $0x13, X2
|
|
PXOR X2, X14
|
|
MOVOA X10, X1
|
|
PADDL X4, X1
|
|
MOVOA X1, X2
|
|
PSLLL $0x12, X1
|
|
PXOR X1, X0
|
|
PSRLL $0x0e, X2
|
|
PXOR X2, X0
|
|
MOVOA 320(R12), X1
|
|
MOVOA X0, 320(R12)
|
|
MOVOA X8, X0
|
|
PADDL X14, X0
|
|
MOVOA X0, X2
|
|
PSLLL $0x12, X0
|
|
PXOR X0, X12
|
|
PSRLL $0x0e, X2
|
|
PXOR X2, X12
|
|
MOVOA X11, X0
|
|
PADDL X1, X0
|
|
MOVOA X0, X2
|
|
PSLLL $0x07, X0
|
|
PXOR X0, X6
|
|
PSRLL $0x19, X2
|
|
PXOR X2, X6
|
|
MOVOA 336(R12), X2
|
|
MOVOA X12, 336(R12)
|
|
MOVOA X3, X0
|
|
PADDL X2, X0
|
|
MOVOA X0, X12
|
|
PSLLL $0x07, X0
|
|
PXOR X0, X13
|
|
PSRLL $0x19, X12
|
|
PXOR X12, X13
|
|
MOVOA X1, X0
|
|
PADDL X6, X0
|
|
MOVOA X0, X12
|
|
PSLLL $0x09, X0
|
|
PXOR X0, X15
|
|
PSRLL $0x17, X12
|
|
PXOR X12, X15
|
|
MOVOA X2, X0
|
|
PADDL X13, X0
|
|
MOVOA X0, X12
|
|
PSLLL $0x09, X0
|
|
PXOR X0, X9
|
|
PSRLL $0x17, X12
|
|
PXOR X12, X9
|
|
MOVOA X6, X0
|
|
PADDL X15, X0
|
|
MOVOA X0, X12
|
|
PSLLL $0x0d, X0
|
|
PXOR X0, X11
|
|
PSRLL $0x13, X12
|
|
PXOR X12, X11
|
|
MOVOA X13, X0
|
|
PADDL X9, X0
|
|
MOVOA X0, X12
|
|
PSLLL $0x0d, X0
|
|
PXOR X0, X3
|
|
PSRLL $0x13, X12
|
|
PXOR X12, X3
|
|
MOVOA X15, X0
|
|
PADDL X11, X0
|
|
MOVOA X0, X12
|
|
PSLLL $0x12, X0
|
|
PXOR X0, X1
|
|
PSRLL $0x0e, X12
|
|
PXOR X12, X1
|
|
MOVOA X9, X0
|
|
PADDL X3, X0
|
|
MOVOA X0, X12
|
|
PSLLL $0x12, X0
|
|
PXOR X0, X2
|
|
PSRLL $0x0e, X12
|
|
PXOR X12, X2
|
|
MOVOA 320(R12), X12
|
|
MOVOA 336(R12), X0
|
|
SUBQ $0x02, DX
|
|
JA MAINLOOP1
|
|
PADDL 112(R12), X12
|
|
PADDL 176(R12), X7
|
|
PADDL 224(R12), X10
|
|
PADDL 272(R12), X4
|
|
MOVD X12, DX
|
|
MOVD X7, CX
|
|
MOVD X10, R8
|
|
MOVD X4, R9
|
|
PSHUFL $0x39, X12, X12
|
|
PSHUFL $0x39, X7, X7
|
|
PSHUFL $0x39, X10, X10
|
|
PSHUFL $0x39, X4, X4
|
|
XORL (SI), DX
|
|
XORL 4(SI), CX
|
|
XORL 8(SI), R8
|
|
XORL 12(SI), R9
|
|
MOVL DX, (DI)
|
|
MOVL CX, 4(DI)
|
|
MOVL R8, 8(DI)
|
|
MOVL R9, 12(DI)
|
|
MOVD X12, DX
|
|
MOVD X7, CX
|
|
MOVD X10, R8
|
|
MOVD X4, R9
|
|
PSHUFL $0x39, X12, X12
|
|
PSHUFL $0x39, X7, X7
|
|
PSHUFL $0x39, X10, X10
|
|
PSHUFL $0x39, X4, X4
|
|
XORL 64(SI), DX
|
|
XORL 68(SI), CX
|
|
XORL 72(SI), R8
|
|
XORL 76(SI), R9
|
|
MOVL DX, 64(DI)
|
|
MOVL CX, 68(DI)
|
|
MOVL R8, 72(DI)
|
|
MOVL R9, 76(DI)
|
|
MOVD X12, DX
|
|
MOVD X7, CX
|
|
MOVD X10, R8
|
|
MOVD X4, R9
|
|
PSHUFL $0x39, X12, X12
|
|
PSHUFL $0x39, X7, X7
|
|
PSHUFL $0x39, X10, X10
|
|
PSHUFL $0x39, X4, X4
|
|
XORL 128(SI), DX
|
|
XORL 132(SI), CX
|
|
XORL 136(SI), R8
|
|
XORL 140(SI), R9
|
|
MOVL DX, 128(DI)
|
|
MOVL CX, 132(DI)
|
|
MOVL R8, 136(DI)
|
|
MOVL R9, 140(DI)
|
|
MOVD X12, DX
|
|
MOVD X7, CX
|
|
MOVD X10, R8
|
|
MOVD X4, R9
|
|
XORL 192(SI), DX
|
|
XORL 196(SI), CX
|
|
XORL 200(SI), R8
|
|
XORL 204(SI), R9
|
|
MOVL DX, 192(DI)
|
|
MOVL CX, 196(DI)
|
|
MOVL R8, 200(DI)
|
|
MOVL R9, 204(DI)
|
|
PADDL 240(R12), X14
|
|
PADDL 64(R12), X0
|
|
PADDL 128(R12), X5
|
|
PADDL 192(R12), X8
|
|
MOVD X14, DX
|
|
MOVD X0, CX
|
|
MOVD X5, R8
|
|
MOVD X8, R9
|
|
PSHUFL $0x39, X14, X14
|
|
PSHUFL $0x39, X0, X0
|
|
PSHUFL $0x39, X5, X5
|
|
PSHUFL $0x39, X8, X8
|
|
XORL 16(SI), DX
|
|
XORL 20(SI), CX
|
|
XORL 24(SI), R8
|
|
XORL 28(SI), R9
|
|
MOVL DX, 16(DI)
|
|
MOVL CX, 20(DI)
|
|
MOVL R8, 24(DI)
|
|
MOVL R9, 28(DI)
|
|
MOVD X14, DX
|
|
MOVD X0, CX
|
|
MOVD X5, R8
|
|
MOVD X8, R9
|
|
PSHUFL $0x39, X14, X14
|
|
PSHUFL $0x39, X0, X0
|
|
PSHUFL $0x39, X5, X5
|
|
PSHUFL $0x39, X8, X8
|
|
XORL 80(SI), DX
|
|
XORL 84(SI), CX
|
|
XORL 88(SI), R8
|
|
XORL 92(SI), R9
|
|
MOVL DX, 80(DI)
|
|
MOVL CX, 84(DI)
|
|
MOVL R8, 88(DI)
|
|
MOVL R9, 92(DI)
|
|
MOVD X14, DX
|
|
MOVD X0, CX
|
|
MOVD X5, R8
|
|
MOVD X8, R9
|
|
PSHUFL $0x39, X14, X14
|
|
PSHUFL $0x39, X0, X0
|
|
PSHUFL $0x39, X5, X5
|
|
PSHUFL $0x39, X8, X8
|
|
XORL 144(SI), DX
|
|
XORL 148(SI), CX
|
|
XORL 152(SI), R8
|
|
XORL 156(SI), R9
|
|
MOVL DX, 144(DI)
|
|
MOVL CX, 148(DI)
|
|
MOVL R8, 152(DI)
|
|
MOVL R9, 156(DI)
|
|
MOVD X14, DX
|
|
MOVD X0, CX
|
|
MOVD X5, R8
|
|
MOVD X8, R9
|
|
XORL 208(SI), DX
|
|
XORL 212(SI), CX
|
|
XORL 216(SI), R8
|
|
XORL 220(SI), R9
|
|
MOVL DX, 208(DI)
|
|
MOVL CX, 212(DI)
|
|
MOVL R8, 216(DI)
|
|
MOVL R9, 220(DI)
|
|
PADDL 288(R12), X15
|
|
PADDL 304(R12), X11
|
|
PADDL 80(R12), X1
|
|
PADDL 144(R12), X6
|
|
MOVD X15, DX
|
|
MOVD X11, CX
|
|
MOVD X1, R8
|
|
MOVD X6, R9
|
|
PSHUFL $0x39, X15, X15
|
|
PSHUFL $0x39, X11, X11
|
|
PSHUFL $0x39, X1, X1
|
|
PSHUFL $0x39, X6, X6
|
|
XORL 32(SI), DX
|
|
XORL 36(SI), CX
|
|
XORL 40(SI), R8
|
|
XORL 44(SI), R9
|
|
MOVL DX, 32(DI)
|
|
MOVL CX, 36(DI)
|
|
MOVL R8, 40(DI)
|
|
MOVL R9, 44(DI)
|
|
MOVD X15, DX
|
|
MOVD X11, CX
|
|
MOVD X1, R8
|
|
MOVD X6, R9
|
|
PSHUFL $0x39, X15, X15
|
|
PSHUFL $0x39, X11, X11
|
|
PSHUFL $0x39, X1, X1
|
|
PSHUFL $0x39, X6, X6
|
|
XORL 96(SI), DX
|
|
XORL 100(SI), CX
|
|
XORL 104(SI), R8
|
|
XORL 108(SI), R9
|
|
MOVL DX, 96(DI)
|
|
MOVL CX, 100(DI)
|
|
MOVL R8, 104(DI)
|
|
MOVL R9, 108(DI)
|
|
MOVD X15, DX
|
|
MOVD X11, CX
|
|
MOVD X1, R8
|
|
MOVD X6, R9
|
|
PSHUFL $0x39, X15, X15
|
|
PSHUFL $0x39, X11, X11
|
|
PSHUFL $0x39, X1, X1
|
|
PSHUFL $0x39, X6, X6
|
|
XORL 160(SI), DX
|
|
XORL 164(SI), CX
|
|
XORL 168(SI), R8
|
|
XORL 172(SI), R9
|
|
MOVL DX, 160(DI)
|
|
MOVL CX, 164(DI)
|
|
MOVL R8, 168(DI)
|
|
MOVL R9, 172(DI)
|
|
MOVD X15, DX
|
|
MOVD X11, CX
|
|
MOVD X1, R8
|
|
MOVD X6, R9
|
|
XORL 224(SI), DX
|
|
XORL 228(SI), CX
|
|
XORL 232(SI), R8
|
|
XORL 236(SI), R9
|
|
MOVL DX, 224(DI)
|
|
MOVL CX, 228(DI)
|
|
MOVL R8, 232(DI)
|
|
MOVL R9, 236(DI)
|
|
PADDL 160(R12), X13
|
|
PADDL 208(R12), X9
|
|
PADDL 256(R12), X3
|
|
PADDL 96(R12), X2
|
|
MOVD X13, DX
|
|
MOVD X9, CX
|
|
MOVD X3, R8
|
|
MOVD X2, R9
|
|
PSHUFL $0x39, X13, X13
|
|
PSHUFL $0x39, X9, X9
|
|
PSHUFL $0x39, X3, X3
|
|
PSHUFL $0x39, X2, X2
|
|
XORL 48(SI), DX
|
|
XORL 52(SI), CX
|
|
XORL 56(SI), R8
|
|
XORL 60(SI), R9
|
|
MOVL DX, 48(DI)
|
|
MOVL CX, 52(DI)
|
|
MOVL R8, 56(DI)
|
|
MOVL R9, 60(DI)
|
|
MOVD X13, DX
|
|
MOVD X9, CX
|
|
MOVD X3, R8
|
|
MOVD X2, R9
|
|
PSHUFL $0x39, X13, X13
|
|
PSHUFL $0x39, X9, X9
|
|
PSHUFL $0x39, X3, X3
|
|
PSHUFL $0x39, X2, X2
|
|
XORL 112(SI), DX
|
|
XORL 116(SI), CX
|
|
XORL 120(SI), R8
|
|
XORL 124(SI), R9
|
|
MOVL DX, 112(DI)
|
|
MOVL CX, 116(DI)
|
|
MOVL R8, 120(DI)
|
|
MOVL R9, 124(DI)
|
|
MOVD X13, DX
|
|
MOVD X9, CX
|
|
MOVD X3, R8
|
|
MOVD X2, R9
|
|
PSHUFL $0x39, X13, X13
|
|
PSHUFL $0x39, X9, X9
|
|
PSHUFL $0x39, X3, X3
|
|
PSHUFL $0x39, X2, X2
|
|
XORL 176(SI), DX
|
|
XORL 180(SI), CX
|
|
XORL 184(SI), R8
|
|
XORL 188(SI), R9
|
|
MOVL DX, 176(DI)
|
|
MOVL CX, 180(DI)
|
|
MOVL R8, 184(DI)
|
|
MOVL R9, 188(DI)
|
|
MOVD X13, DX
|
|
MOVD X9, CX
|
|
MOVD X3, R8
|
|
MOVD X2, R9
|
|
XORL 240(SI), DX
|
|
XORL 244(SI), CX
|
|
XORL 248(SI), R8
|
|
XORL 252(SI), R9
|
|
MOVL DX, 240(DI)
|
|
MOVL CX, 244(DI)
|
|
MOVL R8, 248(DI)
|
|
MOVL R9, 252(DI)
|
|
MOVQ 352(R12), R9
|
|
SUBQ $0x00000100, R9
|
|
ADDQ $0x00000100, SI
|
|
ADDQ $0x00000100, DI
|
|
CMPQ R9, $0x00000100
|
|
JAE BYTESATLEAST256
|
|
CMPQ R9, $0x00
|
|
JBE DONE
|
|
|
|
BYTESBETWEEN1AND255:
|
|
CMPQ R9, $0x40
|
|
JAE NOCOPY
|
|
MOVQ DI, DX
|
|
LEAQ 360(R12), DI
|
|
MOVQ R9, CX
|
|
REP; MOVSB
|
|
LEAQ 360(R12), DI
|
|
LEAQ 360(R12), SI
|
|
|
|
NOCOPY:
|
|
MOVQ R9, 352(R12)
|
|
MOVOA 48(R12), X0
|
|
MOVOA (R12), X1
|
|
MOVOA 16(R12), X2
|
|
MOVOA 32(R12), X3
|
|
MOVOA X1, X4
|
|
MOVQ $0x00000014, CX
|
|
|
|
MAINLOOP2:
|
|
PADDL X0, X4
|
|
MOVOA X0, X5
|
|
MOVOA X4, X6
|
|
PSLLL $0x07, X4
|
|
PSRLL $0x19, X6
|
|
PXOR X4, X3
|
|
PXOR X6, X3
|
|
PADDL X3, X5
|
|
MOVOA X3, X4
|
|
MOVOA X5, X6
|
|
PSLLL $0x09, X5
|
|
PSRLL $0x17, X6
|
|
PXOR X5, X2
|
|
PSHUFL $0x93, X3, X3
|
|
PXOR X6, X2
|
|
PADDL X2, X4
|
|
MOVOA X2, X5
|
|
MOVOA X4, X6
|
|
PSLLL $0x0d, X4
|
|
PSRLL $0x13, X6
|
|
PXOR X4, X1
|
|
PSHUFL $0x4e, X2, X2
|
|
PXOR X6, X1
|
|
PADDL X1, X5
|
|
MOVOA X3, X4
|
|
MOVOA X5, X6
|
|
PSLLL $0x12, X5
|
|
PSRLL $0x0e, X6
|
|
PXOR X5, X0
|
|
PSHUFL $0x39, X1, X1
|
|
PXOR X6, X0
|
|
PADDL X0, X4
|
|
MOVOA X0, X5
|
|
MOVOA X4, X6
|
|
PSLLL $0x07, X4
|
|
PSRLL $0x19, X6
|
|
PXOR X4, X1
|
|
PXOR X6, X1
|
|
PADDL X1, X5
|
|
MOVOA X1, X4
|
|
MOVOA X5, X6
|
|
PSLLL $0x09, X5
|
|
PSRLL $0x17, X6
|
|
PXOR X5, X2
|
|
PSHUFL $0x93, X1, X1
|
|
PXOR X6, X2
|
|
PADDL X2, X4
|
|
MOVOA X2, X5
|
|
MOVOA X4, X6
|
|
PSLLL $0x0d, X4
|
|
PSRLL $0x13, X6
|
|
PXOR X4, X3
|
|
PSHUFL $0x4e, X2, X2
|
|
PXOR X6, X3
|
|
PADDL X3, X5
|
|
MOVOA X1, X4
|
|
MOVOA X5, X6
|
|
PSLLL $0x12, X5
|
|
PSRLL $0x0e, X6
|
|
PXOR X5, X0
|
|
PSHUFL $0x39, X3, X3
|
|
PXOR X6, X0
|
|
PADDL X0, X4
|
|
MOVOA X0, X5
|
|
MOVOA X4, X6
|
|
PSLLL $0x07, X4
|
|
PSRLL $0x19, X6
|
|
PXOR X4, X3
|
|
PXOR X6, X3
|
|
PADDL X3, X5
|
|
MOVOA X3, X4
|
|
MOVOA X5, X6
|
|
PSLLL $0x09, X5
|
|
PSRLL $0x17, X6
|
|
PXOR X5, X2
|
|
PSHUFL $0x93, X3, X3
|
|
PXOR X6, X2
|
|
PADDL X2, X4
|
|
MOVOA X2, X5
|
|
MOVOA X4, X6
|
|
PSLLL $0x0d, X4
|
|
PSRLL $0x13, X6
|
|
PXOR X4, X1
|
|
PSHUFL $0x4e, X2, X2
|
|
PXOR X6, X1
|
|
PADDL X1, X5
|
|
MOVOA X3, X4
|
|
MOVOA X5, X6
|
|
PSLLL $0x12, X5
|
|
PSRLL $0x0e, X6
|
|
PXOR X5, X0
|
|
PSHUFL $0x39, X1, X1
|
|
PXOR X6, X0
|
|
PADDL X0, X4
|
|
MOVOA X0, X5
|
|
MOVOA X4, X6
|
|
PSLLL $0x07, X4
|
|
PSRLL $0x19, X6
|
|
PXOR X4, X1
|
|
PXOR X6, X1
|
|
PADDL X1, X5
|
|
MOVOA X1, X4
|
|
MOVOA X5, X6
|
|
PSLLL $0x09, X5
|
|
PSRLL $0x17, X6
|
|
PXOR X5, X2
|
|
PSHUFL $0x93, X1, X1
|
|
PXOR X6, X2
|
|
PADDL X2, X4
|
|
MOVOA X2, X5
|
|
MOVOA X4, X6
|
|
PSLLL $0x0d, X4
|
|
PSRLL $0x13, X6
|
|
PXOR X4, X3
|
|
PSHUFL $0x4e, X2, X2
|
|
PXOR X6, X3
|
|
SUBQ $0x04, CX
|
|
PADDL X3, X5
|
|
MOVOA X1, X4
|
|
MOVOA X5, X6
|
|
PSLLL $0x12, X5
|
|
PXOR X7, X7
|
|
PSRLL $0x0e, X6
|
|
PXOR X5, X0
|
|
PSHUFL $0x39, X3, X3
|
|
PXOR X6, X0
|
|
JA MAINLOOP2
|
|
PADDL 48(R12), X0
|
|
PADDL (R12), X1
|
|
PADDL 16(R12), X2
|
|
PADDL 32(R12), X3
|
|
MOVD X0, CX
|
|
MOVD X1, R8
|
|
MOVD X2, R9
|
|
MOVD X3, AX
|
|
PSHUFL $0x39, X0, X0
|
|
PSHUFL $0x39, X1, X1
|
|
PSHUFL $0x39, X2, X2
|
|
PSHUFL $0x39, X3, X3
|
|
XORL (SI), CX
|
|
XORL 48(SI), R8
|
|
XORL 32(SI), R9
|
|
XORL 16(SI), AX
|
|
MOVL CX, (DI)
|
|
MOVL R8, 48(DI)
|
|
MOVL R9, 32(DI)
|
|
MOVL AX, 16(DI)
|
|
MOVD X0, CX
|
|
MOVD X1, R8
|
|
MOVD X2, R9
|
|
MOVD X3, AX
|
|
PSHUFL $0x39, X0, X0
|
|
PSHUFL $0x39, X1, X1
|
|
PSHUFL $0x39, X2, X2
|
|
PSHUFL $0x39, X3, X3
|
|
XORL 20(SI), CX
|
|
XORL 4(SI), R8
|
|
XORL 52(SI), R9
|
|
XORL 36(SI), AX
|
|
MOVL CX, 20(DI)
|
|
MOVL R8, 4(DI)
|
|
MOVL R9, 52(DI)
|
|
MOVL AX, 36(DI)
|
|
MOVD X0, CX
|
|
MOVD X1, R8
|
|
MOVD X2, R9
|
|
MOVD X3, AX
|
|
PSHUFL $0x39, X0, X0
|
|
PSHUFL $0x39, X1, X1
|
|
PSHUFL $0x39, X2, X2
|
|
PSHUFL $0x39, X3, X3
|
|
XORL 40(SI), CX
|
|
XORL 24(SI), R8
|
|
XORL 8(SI), R9
|
|
XORL 56(SI), AX
|
|
MOVL CX, 40(DI)
|
|
MOVL R8, 24(DI)
|
|
MOVL R9, 8(DI)
|
|
MOVL AX, 56(DI)
|
|
MOVD X0, CX
|
|
MOVD X1, R8
|
|
MOVD X2, R9
|
|
MOVD X3, AX
|
|
XORL 60(SI), CX
|
|
XORL 44(SI), R8
|
|
XORL 28(SI), R9
|
|
XORL 12(SI), AX
|
|
MOVL CX, 60(DI)
|
|
MOVL R8, 44(DI)
|
|
MOVL R9, 28(DI)
|
|
MOVL AX, 12(DI)
|
|
MOVQ 352(R12), R9
|
|
MOVL 16(R12), CX
|
|
MOVL 36(R12), R8
|
|
ADDQ $0x01, CX
|
|
SHLQ $0x20, R8
|
|
ADDQ R8, CX
|
|
MOVQ CX, R8
|
|
SHRQ $0x20, R8
|
|
MOVL CX, 16(R12)
|
|
MOVL R8, 36(R12)
|
|
CMPQ R9, $0x40
|
|
JA BYTESATLEAST65
|
|
JAE BYTESATLEAST64
|
|
MOVQ DI, SI
|
|
MOVQ DX, DI
|
|
MOVQ R9, CX
|
|
REP; MOVSB
|
|
|
|
BYTESATLEAST64:
|
|
DONE:
|
|
RET
|
|
|
|
BYTESATLEAST65:
|
|
SUBQ $0x40, R9
|
|
ADDQ $0x40, DI
|
|
ADDQ $0x40, SI
|
|
JMP BYTESBETWEEN1AND255
|