salsa20/salsa: Port salsa20_amd64.s to Avo
This implementation utilizes the same registers found in the reference implementation, aiming to produce a minimal semantic diff between the Avo-generated output and the original hand-written assembly. To verify the Avo implementation, the reference and Avo-generated assembly files are fed to `go tool asm`, capturing the debug output into corresponding temp files. The debug output contains supplementary metadata (line numbers, instruction offsets, and source file references) that must be removed in order to obtain a semantic diff of the two files. This is accomplished via a small utility script written in awk. Commands used to verify Avo output: GOROOT=$(go env GOROOT) REFERENCE="b2d3a6a4b4d36521cd7f653879cf6981e7c5c340" go tool asm -o /dev/null -I "$GOROOT"/src/runtime -debug \ <(git cat-file -p "$REFERENCE":salsa20/salsa/salsa20_amd64.s) \ > /tmp/reference.s go tool asm -o /dev/null -I "$GOROOT"/src/runtime -debug \ salsa20/salsa/salsa20_amd64.s \ > /tmp/avo.s normalize(){ awk '{ $1=$2=$3=""; print substr($0,4) }' } diff <(normalize < /tmp/reference.s) <(normalize < /tmp/avo.s) Change-Id: Ica0bb06f8b074ad566a979d33ddc81d8a38491b1 Reviewed-on: https://go-review.googlesource.com/c/crypto/+/601217 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Filippo Valsorda <filippo@golang.org> Reviewed-by: Roland Shoemaker <roland@golang.org> Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
This commit is contained in:
Родитель
82942cf1d8
Коммит
620dfbc770
|
@ -0,0 +1,14 @@
|
|||
module salsa20/salsa/_asm
|
||||
|
||||
go 1.23
|
||||
|
||||
require (
|
||||
github.com/mmcloughlin/avo v0.6.0
|
||||
golang.org/x/crypto v0.26.0
|
||||
)
|
||||
|
||||
require (
|
||||
golang.org/x/mod v0.20.0 // indirect
|
||||
golang.org/x/sync v0.8.0 // indirect
|
||||
golang.org/x/tools v0.24.0 // indirect
|
||||
)
|
|
@ -0,0 +1,10 @@
|
|||
github.com/mmcloughlin/avo v0.6.0 h1:QH6FU8SKoTLaVs80GA8TJuLNkUYl4VokHKlPhVDg4YY=
|
||||
github.com/mmcloughlin/avo v0.6.0/go.mod h1:8CoAGaCSYXtCPR+8y18Y9aB/kxb8JSS6FRI7mSkvD+8=
|
||||
golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw=
|
||||
golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54=
|
||||
golang.org/x/mod v0.20.0 h1:utOm6MM3R3dnawAiJgn0y+xvuYRsm1RKM/4giyfDgV0=
|
||||
golang.org/x/mod v0.20.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
|
||||
golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ=
|
||||
golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||
golang.org/x/tools v0.24.0 h1:J1shsA93PJUEVaUSaay7UXAyE8aimq3GW0pjlolpa24=
|
||||
golang.org/x/tools v0.24.0/go.mod h1:YhNqVBIfWHdzvTLs0d8LCuMhkKUgSUKldakyV7W/WDQ=
|
|
@ -0,0 +1,932 @@
|
|||
// Copyright 2024 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// This code was translated into a form compatible with 6a from the public
|
||||
// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
. "github.com/mmcloughlin/avo/build"
|
||||
"github.com/mmcloughlin/avo/ir"
|
||||
. "github.com/mmcloughlin/avo/operand"
|
||||
. "github.com/mmcloughlin/avo/reg"
|
||||
_ "golang.org/x/crypto/salsa20/salsa"
|
||||
)
|
||||
|
||||
//go:generate go run . -out ../salsa20_amd64.s -pkg salsa
|
||||
|
||||
func main() {
|
||||
Package("golang.org/x/crypto/salsa20/salsa")
|
||||
ConstraintExpr("amd64,!purego,gc")
|
||||
salsa2020XORKeyStream()
|
||||
Generate()
|
||||
}
|
||||
|
||||
func salsa2020XORKeyStream() {
|
||||
Implement("salsa2020XORKeyStream")
|
||||
Attributes(0)
|
||||
AllocLocal(456) // frame = 424 + 32 byte alignment
|
||||
Comment("This needs up to 64 bytes at 360(R12); hence the non-obvious frame size.")
|
||||
|
||||
Load(Param("out"), RDI)
|
||||
Load(Param("in"), RSI)
|
||||
Load(Param("n"), RDX)
|
||||
Load(Param("nonce"), RCX)
|
||||
Load(Param("key"), R8)
|
||||
|
||||
MOVQ(RSP, R12)
|
||||
ADDQ(Imm(31), R12)
|
||||
ANDQ(I32(^31), R12)
|
||||
|
||||
MOVQ(RDX, R9)
|
||||
MOVQ(RCX, RDX)
|
||||
MOVQ(R8, R10)
|
||||
CMPQ(R9, Imm(0))
|
||||
JBE(LabelRef("DONE"))
|
||||
|
||||
START()
|
||||
BYTESATLEAST256()
|
||||
MAINLOOP1()
|
||||
BYTESBETWEEN1AND255()
|
||||
NOCOPY()
|
||||
MAINLOOP2()
|
||||
|
||||
Label("BYTESATLEAST64")
|
||||
Label("DONE")
|
||||
RET()
|
||||
Label("BYTESATLEAST65")
|
||||
SUBQ(Imm(64), R9)
|
||||
ADDQ(Imm(64), RDI)
|
||||
ADDQ(Imm(64), RSI)
|
||||
JMP(LabelRef("BYTESBETWEEN1AND255"))
|
||||
}
|
||||
|
||||
func START() {
|
||||
Label("START")
|
||||
MOVL(Mem{Base: R10}.Offset(20), ECX)
|
||||
MOVL(Mem{Base: R10}.Offset(0), R8L)
|
||||
MOVL(Mem{Base: EDX}.Offset(0), EAX)
|
||||
MOVL(Mem{Base: R10}.Offset(16), R11L)
|
||||
MOVL(ECX, Mem{Base: R12}.Offset(0))
|
||||
MOVL(R8L, Mem{Base: R12}.Offset(4))
|
||||
MOVL(EAX, Mem{Base: R12}.Offset(8))
|
||||
MOVL(R11L, Mem{Base: R12}.Offset(12))
|
||||
MOVL(Mem{Base: EDX}.Offset(8), ECX)
|
||||
MOVL(Mem{Base: R10}.Offset(24), R8L)
|
||||
MOVL(Mem{Base: R10}.Offset(4), EAX)
|
||||
MOVL(Mem{Base: EDX}.Offset(4), R11L)
|
||||
MOVL(ECX, Mem{Base: R12}.Offset(16))
|
||||
MOVL(R8L, Mem{Base: R12}.Offset(20))
|
||||
MOVL(EAX, Mem{Base: R12}.Offset(24))
|
||||
MOVL(R11L, Mem{Base: R12}.Offset(28))
|
||||
MOVL(Mem{Base: EDX}.Offset(12), ECX)
|
||||
MOVL(Mem{Base: R10}.Offset(12), EDX)
|
||||
MOVL(Mem{Base: R10}.Offset(28), R8L)
|
||||
MOVL(Mem{Base: R10}.Offset(8), EAX)
|
||||
MOVL(EDX, Mem{Base: R12}.Offset(32))
|
||||
MOVL(ECX, Mem{Base: R12}.Offset(36))
|
||||
MOVL(R8L, Mem{Base: R12}.Offset(40))
|
||||
MOVL(EAX, Mem{Base: R12}.Offset(44))
|
||||
MOVQ(Imm(1634760805), RDX)
|
||||
MOVQ(Imm(857760878), RCX)
|
||||
MOVQ(Imm(2036477234), R8)
|
||||
MOVQ(Imm(1797285236), RAX)
|
||||
MOVL(EDX, Mem{Base: R12}.Offset(48))
|
||||
MOVL(ECX, Mem{Base: R12}.Offset(52))
|
||||
MOVL(R8L, Mem{Base: R12}.Offset(56))
|
||||
MOVL(EAX, Mem{Base: R12}.Offset(60))
|
||||
CMPQ(R9, U32(256))
|
||||
JB(LabelRef("BYTESBETWEEN1AND255"))
|
||||
MOVOA(Mem{Base: R12}.Offset(48), X0)
|
||||
PSHUFL(Imm(0x55), X0, X1)
|
||||
PSHUFL(Imm(0xAA), X0, X2)
|
||||
PSHUFL(Imm(0xFF), X0, X3)
|
||||
PSHUFL(Imm(0x00), X0, X0)
|
||||
MOVOA(X1, Mem{Base: R12}.Offset(64))
|
||||
MOVOA(X2, Mem{Base: R12}.Offset(80))
|
||||
MOVOA(X3, Mem{Base: R12}.Offset(96))
|
||||
MOVOA(X0, Mem{Base: R12}.Offset(112))
|
||||
MOVOA(Mem{Base: R12}.Offset(0), X0)
|
||||
PSHUFL(Imm(0xAA), X0, X1)
|
||||
PSHUFL(Imm(0xFF), X0, X2)
|
||||
PSHUFL(Imm(0x00), X0, X3)
|
||||
PSHUFL(Imm(0x55), X0, X0)
|
||||
MOVOA(X1, Mem{Base: R12}.Offset(128))
|
||||
MOVOA(X2, Mem{Base: R12}.Offset(144))
|
||||
MOVOA(X3, Mem{Base: R12}.Offset(160))
|
||||
MOVOA(X0, Mem{Base: R12}.Offset(176))
|
||||
MOVOA(Mem{Base: R12}.Offset(16), X0)
|
||||
PSHUFL(Imm(0xFF), X0, X1)
|
||||
PSHUFL(Imm(0x55), X0, X2)
|
||||
PSHUFL(Imm(0xAA), X0, X0)
|
||||
MOVOA(X1, Mem{Base: R12}.Offset(192))
|
||||
MOVOA(X2, Mem{Base: R12}.Offset(208))
|
||||
MOVOA(X0, Mem{Base: R12}.Offset(224))
|
||||
MOVOA(Mem{Base: R12}.Offset(32), X0)
|
||||
PSHUFL(Imm(0x00), X0, X1)
|
||||
PSHUFL(Imm(0xAA), X0, X2)
|
||||
PSHUFL(Imm(0xFF), X0, X0)
|
||||
MOVOA(X1, Mem{Base: R12}.Offset(240))
|
||||
MOVOA(X2, Mem{Base: R12}.Offset(256))
|
||||
MOVOA(X0, Mem{Base: R12}.Offset(272))
|
||||
|
||||
}
|
||||
|
||||
func BYTESATLEAST256() {
|
||||
Label("BYTESATLEAST256")
|
||||
MOVL(Mem{Base: R12}.Offset(16), EDX)
|
||||
MOVL(Mem{Base: R12}.Offset(36), ECX)
|
||||
MOVL(EDX, Mem{Base: R12}.Offset(288))
|
||||
MOVL(ECX, Mem{Base: R12}.Offset(304))
|
||||
SHLQ(Imm(32), RCX)
|
||||
ADDQ(RCX, RDX)
|
||||
ADDQ(Imm(1), RDX)
|
||||
MOVQ(RDX, RCX)
|
||||
SHRQ(Imm(32), RCX)
|
||||
MOVL(EDX, Mem{Base: R12}.Offset(292))
|
||||
MOVL(ECX, Mem{Base: R12}.Offset(308))
|
||||
ADDQ(Imm(1), RDX)
|
||||
MOVQ(RDX, RCX)
|
||||
SHRQ(Imm(32), RCX)
|
||||
MOVL(EDX, Mem{Base: R12}.Offset(296))
|
||||
MOVL(ECX, Mem{Base: R12}.Offset(312))
|
||||
ADDQ(Imm(1), RDX)
|
||||
MOVQ(RDX, RCX)
|
||||
SHRQ(Imm(32), RCX)
|
||||
MOVL(EDX, Mem{Base: R12}.Offset(300))
|
||||
MOVL(ECX, Mem{Base: R12}.Offset(316))
|
||||
ADDQ(Imm(1), RDX)
|
||||
MOVQ(RDX, RCX)
|
||||
SHRQ(Imm(32), RCX)
|
||||
MOVL(EDX, Mem{Base: R12}.Offset(16))
|
||||
MOVL(ECX, Mem{Base: R12}.Offset(36))
|
||||
MOVQ(R9, Mem{Base: R12}.Offset(352))
|
||||
MOVQ(U32(20), RDX)
|
||||
MOVOA(Mem{Base: R12}.Offset(64), X0)
|
||||
MOVOA(Mem{Base: R12}.Offset(80), X1)
|
||||
MOVOA(Mem{Base: R12}.Offset(96), X2)
|
||||
MOVOA(Mem{Base: R12}.Offset(256), X3)
|
||||
MOVOA(Mem{Base: R12}.Offset(272), X4)
|
||||
MOVOA(Mem{Base: R12}.Offset(128), X5)
|
||||
MOVOA(Mem{Base: R12}.Offset(144), X6)
|
||||
MOVOA(Mem{Base: R12}.Offset(176), X7)
|
||||
MOVOA(Mem{Base: R12}.Offset(192), X8)
|
||||
MOVOA(Mem{Base: R12}.Offset(208), X9)
|
||||
MOVOA(Mem{Base: R12}.Offset(224), X10)
|
||||
MOVOA(Mem{Base: R12}.Offset(304), X11)
|
||||
MOVOA(Mem{Base: R12}.Offset(112), X12)
|
||||
MOVOA(Mem{Base: R12}.Offset(160), X13)
|
||||
MOVOA(Mem{Base: R12}.Offset(240), X14)
|
||||
MOVOA(Mem{Base: R12}.Offset(288), X15)
|
||||
}
|
||||
|
||||
func MAINLOOP1() {
|
||||
Label("MAINLOOP1")
|
||||
MOVOA(X1, Mem{Base: R12}.Offset(320))
|
||||
MOVOA(X2, Mem{Base: R12}.Offset(336))
|
||||
MOVOA(X13, X1)
|
||||
PADDL(X12, X1)
|
||||
MOVOA(X1, X2)
|
||||
PSLLL(Imm(7), X1)
|
||||
PXOR(X1, X14)
|
||||
PSRLL(Imm(25), X2)
|
||||
PXOR(X2, X14)
|
||||
MOVOA(X7, X1)
|
||||
PADDL(X0, X1)
|
||||
MOVOA(X1, X2)
|
||||
PSLLL(Imm(7), X1)
|
||||
PXOR(X1, X11)
|
||||
PSRLL(Imm(25), X2)
|
||||
PXOR(X2, X11)
|
||||
MOVOA(X12, X1)
|
||||
PADDL(X14, X1)
|
||||
MOVOA(X1, X2)
|
||||
PSLLL(Imm(9), X1)
|
||||
PXOR(X1, X15)
|
||||
PSRLL(Imm(23), X2)
|
||||
PXOR(X2, X15)
|
||||
MOVOA(X0, X1)
|
||||
PADDL(X11, X1)
|
||||
MOVOA(X1, X2)
|
||||
PSLLL(Imm(9), X1)
|
||||
PXOR(X1, X9)
|
||||
PSRLL(Imm(23), X2)
|
||||
PXOR(X2, X9)
|
||||
MOVOA(X14, X1)
|
||||
PADDL(X15, X1)
|
||||
MOVOA(X1, X2)
|
||||
PSLLL(Imm(13), X1)
|
||||
PXOR(X1, X13)
|
||||
PSRLL(Imm(19), X2)
|
||||
PXOR(X2, X13)
|
||||
MOVOA(X11, X1)
|
||||
PADDL(X9, X1)
|
||||
MOVOA(X1, X2)
|
||||
PSLLL(Imm(13), X1)
|
||||
PXOR(X1, X7)
|
||||
PSRLL(Imm(19), X2)
|
||||
PXOR(X2, X7)
|
||||
MOVOA(X15, X1)
|
||||
PADDL(X13, X1)
|
||||
MOVOA(X1, X2)
|
||||
PSLLL(Imm(18), X1)
|
||||
PXOR(X1, X12)
|
||||
PSRLL(Imm(14), X2)
|
||||
PXOR(X2, X12)
|
||||
MOVOA(Mem{Base: R12}.Offset(320), X1)
|
||||
MOVOA(X12, Mem{Base: R12}.Offset(320))
|
||||
MOVOA(X9, X2)
|
||||
PADDL(X7, X2)
|
||||
MOVOA(X2, X12)
|
||||
PSLLL(Imm(18), X2)
|
||||
PXOR(X2, X0)
|
||||
PSRLL(Imm(14), X12)
|
||||
PXOR(X12, X0)
|
||||
MOVOA(X5, X2)
|
||||
PADDL(X1, X2)
|
||||
MOVOA(X2, X12)
|
||||
PSLLL(Imm(7), X2)
|
||||
PXOR(X2, X3)
|
||||
PSRLL(Imm(25), X12)
|
||||
PXOR(X12, X3)
|
||||
MOVOA(Mem{Base: R12}.Offset(336), X2)
|
||||
MOVOA(X0, Mem{Base: R12}.Offset(336))
|
||||
MOVOA(X6, X0)
|
||||
PADDL(X2, X0)
|
||||
MOVOA(X0, X12)
|
||||
PSLLL(Imm(7), X0)
|
||||
PXOR(X0, X4)
|
||||
PSRLL(Imm(25), X12)
|
||||
PXOR(X12, X4)
|
||||
MOVOA(X1, X0)
|
||||
PADDL(X3, X0)
|
||||
MOVOA(X0, X12)
|
||||
PSLLL(Imm(9), X0)
|
||||
PXOR(X0, X10)
|
||||
PSRLL(Imm(23), X12)
|
||||
PXOR(X12, X10)
|
||||
MOVOA(X2, X0)
|
||||
PADDL(X4, X0)
|
||||
MOVOA(X0, X12)
|
||||
PSLLL(Imm(9), X0)
|
||||
PXOR(X0, X8)
|
||||
PSRLL(Imm(23), X12)
|
||||
PXOR(X12, X8)
|
||||
MOVOA(X3, X0)
|
||||
PADDL(X10, X0)
|
||||
MOVOA(X0, X12)
|
||||
PSLLL(Imm(13), X0)
|
||||
PXOR(X0, X5)
|
||||
PSRLL(Imm(19), X12)
|
||||
PXOR(X12, X5)
|
||||
MOVOA(X4, X0)
|
||||
PADDL(X8, X0)
|
||||
MOVOA(X0, X12)
|
||||
PSLLL(Imm(13), X0)
|
||||
PXOR(X0, X6)
|
||||
PSRLL(Imm(19), X12)
|
||||
PXOR(X12, X6)
|
||||
MOVOA(X10, X0)
|
||||
PADDL(X5, X0)
|
||||
MOVOA(X0, X12)
|
||||
PSLLL(Imm(18), X0)
|
||||
PXOR(X0, X1)
|
||||
PSRLL(Imm(14), X12)
|
||||
PXOR(X12, X1)
|
||||
MOVOA(Mem{Base: R12}.Offset(320), X0)
|
||||
MOVOA(X1, Mem{Base: R12}.Offset(320))
|
||||
MOVOA(X4, X1)
|
||||
PADDL(X0, X1)
|
||||
MOVOA(X1, X12)
|
||||
PSLLL(Imm(7), X1)
|
||||
PXOR(X1, X7)
|
||||
PSRLL(Imm(25), X12)
|
||||
PXOR(X12, X7)
|
||||
MOVOA(X8, X1)
|
||||
PADDL(X6, X1)
|
||||
MOVOA(X1, X12)
|
||||
PSLLL(Imm(18), X1)
|
||||
PXOR(X1, X2)
|
||||
PSRLL(Imm(14), X12)
|
||||
PXOR(X12, X2)
|
||||
MOVOA(Mem{Base: R12}.Offset(336), X12)
|
||||
MOVOA(X2, Mem{Base: R12}.Offset(336))
|
||||
MOVOA(X14, X1)
|
||||
PADDL(X12, X1)
|
||||
MOVOA(X1, X2)
|
||||
PSLLL(Imm(7), X1)
|
||||
PXOR(X1, X5)
|
||||
PSRLL(Imm(25), X2)
|
||||
PXOR(X2, X5)
|
||||
MOVOA(X0, X1)
|
||||
PADDL(X7, X1)
|
||||
MOVOA(X1, X2)
|
||||
PSLLL(Imm(9), X1)
|
||||
PXOR(X1, X10)
|
||||
PSRLL(Imm(23), X2)
|
||||
PXOR(X2, X10)
|
||||
MOVOA(X12, X1)
|
||||
PADDL(X5, X1)
|
||||
MOVOA(X1, X2)
|
||||
PSLLL(Imm(9), X1)
|
||||
PXOR(X1, X8)
|
||||
PSRLL(Imm(23), X2)
|
||||
PXOR(X2, X8)
|
||||
MOVOA(X7, X1)
|
||||
PADDL(X10, X1)
|
||||
MOVOA(X1, X2)
|
||||
PSLLL(Imm(13), X1)
|
||||
PXOR(X1, X4)
|
||||
PSRLL(Imm(19), X2)
|
||||
PXOR(X2, X4)
|
||||
MOVOA(X5, X1)
|
||||
PADDL(X8, X1)
|
||||
MOVOA(X1, X2)
|
||||
PSLLL(Imm(13), X1)
|
||||
PXOR(X1, X14)
|
||||
PSRLL(Imm(19), X2)
|
||||
PXOR(X2, X14)
|
||||
MOVOA(X10, X1)
|
||||
PADDL(X4, X1)
|
||||
MOVOA(X1, X2)
|
||||
PSLLL(Imm(18), X1)
|
||||
PXOR(X1, X0)
|
||||
PSRLL(Imm(14), X2)
|
||||
PXOR(X2, X0)
|
||||
MOVOA(Mem{Base: R12}.Offset(320), X1)
|
||||
MOVOA(X0, Mem{Base: R12}.Offset(320))
|
||||
MOVOA(X8, X0)
|
||||
PADDL(X14, X0)
|
||||
MOVOA(X0, X2)
|
||||
PSLLL(Imm(18), X0)
|
||||
PXOR(X0, X12)
|
||||
PSRLL(Imm(14), X2)
|
||||
PXOR(X2, X12)
|
||||
MOVOA(X11, X0)
|
||||
PADDL(X1, X0)
|
||||
MOVOA(X0, X2)
|
||||
PSLLL(Imm(7), X0)
|
||||
PXOR(X0, X6)
|
||||
PSRLL(Imm(25), X2)
|
||||
PXOR(X2, X6)
|
||||
MOVOA(Mem{Base: R12}.Offset(336), X2)
|
||||
MOVOA(X12, Mem{Base: R12}.Offset(336))
|
||||
MOVOA(X3, X0)
|
||||
PADDL(X2, X0)
|
||||
MOVOA(X0, X12)
|
||||
PSLLL(Imm(7), X0)
|
||||
PXOR(X0, X13)
|
||||
PSRLL(Imm(25), X12)
|
||||
PXOR(X12, X13)
|
||||
MOVOA(X1, X0)
|
||||
PADDL(X6, X0)
|
||||
MOVOA(X0, X12)
|
||||
PSLLL(Imm(9), X0)
|
||||
PXOR(X0, X15)
|
||||
PSRLL(Imm(23), X12)
|
||||
PXOR(X12, X15)
|
||||
MOVOA(X2, X0)
|
||||
PADDL(X13, X0)
|
||||
MOVOA(X0, X12)
|
||||
PSLLL(Imm(9), X0)
|
||||
PXOR(X0, X9)
|
||||
PSRLL(Imm(23), X12)
|
||||
PXOR(X12, X9)
|
||||
MOVOA(X6, X0)
|
||||
PADDL(X15, X0)
|
||||
MOVOA(X0, X12)
|
||||
PSLLL(Imm(13), X0)
|
||||
PXOR(X0, X11)
|
||||
PSRLL(Imm(19), X12)
|
||||
PXOR(X12, X11)
|
||||
MOVOA(X13, X0)
|
||||
PADDL(X9, X0)
|
||||
MOVOA(X0, X12)
|
||||
PSLLL(Imm(13), X0)
|
||||
PXOR(X0, X3)
|
||||
PSRLL(Imm(19), X12)
|
||||
PXOR(X12, X3)
|
||||
MOVOA(X15, X0)
|
||||
PADDL(X11, X0)
|
||||
MOVOA(X0, X12)
|
||||
PSLLL(Imm(18), X0)
|
||||
PXOR(X0, X1)
|
||||
PSRLL(Imm(14), X12)
|
||||
PXOR(X12, X1)
|
||||
MOVOA(X9, X0)
|
||||
PADDL(X3, X0)
|
||||
MOVOA(X0, X12)
|
||||
PSLLL(Imm(18), X0)
|
||||
PXOR(X0, X2)
|
||||
PSRLL(Imm(14), X12)
|
||||
PXOR(X12, X2)
|
||||
MOVOA(Mem{Base: R12}.Offset(320), X12)
|
||||
MOVOA(Mem{Base: R12}.Offset(336), X0)
|
||||
SUBQ(Imm(2), RDX)
|
||||
JA(LabelRef("MAINLOOP1"))
|
||||
PADDL(Mem{Base: R12}.Offset(112), X12)
|
||||
PADDL(Mem{Base: R12}.Offset(176), X7)
|
||||
PADDL(Mem{Base: R12}.Offset(224), X10)
|
||||
PADDL(Mem{Base: R12}.Offset(272), X4)
|
||||
MOVD(X12, EDX)
|
||||
MOVD(X7, ECX)
|
||||
MOVD(X10, R8)
|
||||
MOVD(X4, R9)
|
||||
PSHUFL(Imm(0x39), X12, X12)
|
||||
PSHUFL(Imm(0x39), X7, X7)
|
||||
PSHUFL(Imm(0x39), X10, X10)
|
||||
PSHUFL(Imm(0x39), X4, X4)
|
||||
XORL(Mem{Base: SI}.Offset(0), EDX)
|
||||
XORL(Mem{Base: SI}.Offset(4), ECX)
|
||||
XORL(Mem{Base: SI}.Offset(8), R8L)
|
||||
XORL(Mem{Base: SI}.Offset(12), R9L)
|
||||
MOVL(EDX, Mem{Base: DI}.Offset(0))
|
||||
MOVL(ECX, Mem{Base: DI}.Offset(4))
|
||||
MOVL(R8L, Mem{Base: DI}.Offset(8))
|
||||
MOVL(R9L, Mem{Base: DI}.Offset(12))
|
||||
MOVD(X12, EDX)
|
||||
MOVD(X7, ECX)
|
||||
MOVD(X10, R8)
|
||||
MOVD(X4, R9)
|
||||
PSHUFL(Imm(0x39), X12, X12)
|
||||
PSHUFL(Imm(0x39), X7, X7)
|
||||
PSHUFL(Imm(0x39), X10, X10)
|
||||
PSHUFL(Imm(0x39), X4, X4)
|
||||
XORL(Mem{Base: SI}.Offset(64), EDX)
|
||||
XORL(Mem{Base: SI}.Offset(68), ECX)
|
||||
XORL(Mem{Base: SI}.Offset(72), R8L)
|
||||
XORL(Mem{Base: SI}.Offset(76), R9L)
|
||||
MOVL(EDX, Mem{Base: DI}.Offset(64))
|
||||
MOVL(ECX, Mem{Base: DI}.Offset(68))
|
||||
MOVL(R8L, Mem{Base: DI}.Offset(72))
|
||||
MOVL(R9L, Mem{Base: DI}.Offset(76))
|
||||
MOVD(X12, EDX)
|
||||
MOVD(X7, ECX)
|
||||
MOVD(X10, R8)
|
||||
MOVD(X4, R9)
|
||||
PSHUFL(Imm(0x39), X12, X12)
|
||||
PSHUFL(Imm(0x39), X7, X7)
|
||||
PSHUFL(Imm(0x39), X10, X10)
|
||||
PSHUFL(Imm(0x39), X4, X4)
|
||||
XORL(Mem{Base: SI}.Offset(128), EDX)
|
||||
XORL(Mem{Base: SI}.Offset(132), ECX)
|
||||
XORL(Mem{Base: SI}.Offset(136), R8L)
|
||||
XORL(Mem{Base: SI}.Offset(140), R9L)
|
||||
MOVL(EDX, Mem{Base: DI}.Offset(128))
|
||||
MOVL(ECX, Mem{Base: DI}.Offset(132))
|
||||
MOVL(R8L, Mem{Base: DI}.Offset(136))
|
||||
MOVL(R9L, Mem{Base: DI}.Offset(140))
|
||||
MOVD(X12, EDX)
|
||||
MOVD(X7, ECX)
|
||||
MOVD(X10, R8)
|
||||
MOVD(X4, R9)
|
||||
XORL(Mem{Base: SI}.Offset(192), EDX)
|
||||
XORL(Mem{Base: SI}.Offset(196), ECX)
|
||||
XORL(Mem{Base: SI}.Offset(200), R8L)
|
||||
XORL(Mem{Base: SI}.Offset(204), R9L)
|
||||
MOVL(EDX, Mem{Base: DI}.Offset(192))
|
||||
MOVL(ECX, Mem{Base: DI}.Offset(196))
|
||||
MOVL(R8L, Mem{Base: DI}.Offset(200))
|
||||
MOVL(R9L, Mem{Base: DI}.Offset(204))
|
||||
PADDL(Mem{Base: R12}.Offset(240), X14)
|
||||
PADDL(Mem{Base: R12}.Offset(64), X0)
|
||||
PADDL(Mem{Base: R12}.Offset(128), X5)
|
||||
PADDL(Mem{Base: R12}.Offset(192), X8)
|
||||
MOVD(X14, EDX)
|
||||
MOVD(X0, ECX)
|
||||
MOVD(X5, R8)
|
||||
MOVD(X8, R9)
|
||||
PSHUFL(Imm(0x39), X14, X14)
|
||||
PSHUFL(Imm(0x39), X0, X0)
|
||||
PSHUFL(Imm(0x39), X5, X5)
|
||||
PSHUFL(Imm(0x39), X8, X8)
|
||||
XORL(Mem{Base: SI}.Offset(16), EDX)
|
||||
XORL(Mem{Base: SI}.Offset(20), ECX)
|
||||
XORL(Mem{Base: SI}.Offset(24), R8L)
|
||||
XORL(Mem{Base: SI}.Offset(28), R9L)
|
||||
MOVL(EDX, Mem{Base: DI}.Offset(16))
|
||||
MOVL(ECX, Mem{Base: DI}.Offset(20))
|
||||
MOVL(R8L, Mem{Base: DI}.Offset(24))
|
||||
MOVL(R9L, Mem{Base: DI}.Offset(28))
|
||||
MOVD(X14, EDX)
|
||||
MOVD(X0, ECX)
|
||||
MOVD(X5, R8)
|
||||
MOVD(X8, R9)
|
||||
PSHUFL(Imm(0x39), X14, X14)
|
||||
PSHUFL(Imm(0x39), X0, X0)
|
||||
PSHUFL(Imm(0x39), X5, X5)
|
||||
PSHUFL(Imm(0x39), X8, X8)
|
||||
XORL(Mem{Base: SI}.Offset(80), EDX)
|
||||
XORL(Mem{Base: SI}.Offset(84), ECX)
|
||||
XORL(Mem{Base: SI}.Offset(88), R8L)
|
||||
XORL(Mem{Base: SI}.Offset(92), R9L)
|
||||
MOVL(EDX, Mem{Base: DI}.Offset(80))
|
||||
MOVL(ECX, Mem{Base: DI}.Offset(84))
|
||||
MOVL(R8L, Mem{Base: DI}.Offset(88))
|
||||
MOVL(R9L, Mem{Base: DI}.Offset(92))
|
||||
MOVD(X14, EDX)
|
||||
MOVD(X0, ECX)
|
||||
MOVD(X5, R8)
|
||||
MOVD(X8, R9)
|
||||
PSHUFL(Imm(0x39), X14, X14)
|
||||
PSHUFL(Imm(0x39), X0, X0)
|
||||
PSHUFL(Imm(0x39), X5, X5)
|
||||
PSHUFL(Imm(0x39), X8, X8)
|
||||
XORL(Mem{Base: SI}.Offset(144), EDX)
|
||||
XORL(Mem{Base: SI}.Offset(148), ECX)
|
||||
XORL(Mem{Base: SI}.Offset(152), R8L)
|
||||
XORL(Mem{Base: SI}.Offset(156), R9L)
|
||||
MOVL(EDX, Mem{Base: DI}.Offset(144))
|
||||
MOVL(ECX, Mem{Base: DI}.Offset(148))
|
||||
MOVL(R8L, Mem{Base: DI}.Offset(152))
|
||||
MOVL(R9L, Mem{Base: DI}.Offset(156))
|
||||
MOVD(X14, EDX)
|
||||
MOVD(X0, ECX)
|
||||
MOVD(X5, R8)
|
||||
MOVD(X8, R9)
|
||||
XORL(Mem{Base: SI}.Offset(208), EDX)
|
||||
XORL(Mem{Base: SI}.Offset(212), ECX)
|
||||
XORL(Mem{Base: SI}.Offset(216), R8L)
|
||||
XORL(Mem{Base: SI}.Offset(220), R9L)
|
||||
MOVL(EDX, Mem{Base: DI}.Offset(208))
|
||||
MOVL(ECX, Mem{Base: DI}.Offset(212))
|
||||
MOVL(R8L, Mem{Base: DI}.Offset(216))
|
||||
MOVL(R9L, Mem{Base: DI}.Offset(220))
|
||||
PADDL(Mem{Base: R12}.Offset(288), X15)
|
||||
PADDL(Mem{Base: R12}.Offset(304), X11)
|
||||
PADDL(Mem{Base: R12}.Offset(80), X1)
|
||||
PADDL(Mem{Base: R12}.Offset(144), X6)
|
||||
MOVD(X15, EDX)
|
||||
MOVD(X11, ECX)
|
||||
MOVD(X1, R8)
|
||||
MOVD(X6, R9)
|
||||
PSHUFL(Imm(0x39), X15, X15)
|
||||
PSHUFL(Imm(0x39), X11, X11)
|
||||
PSHUFL(Imm(0x39), X1, X1)
|
||||
PSHUFL(Imm(0x39), X6, X6)
|
||||
XORL(Mem{Base: SI}.Offset(32), EDX)
|
||||
XORL(Mem{Base: SI}.Offset(36), ECX)
|
||||
XORL(Mem{Base: SI}.Offset(40), R8L)
|
||||
XORL(Mem{Base: SI}.Offset(44), R9L)
|
||||
MOVL(EDX, Mem{Base: DI}.Offset(32))
|
||||
MOVL(ECX, Mem{Base: DI}.Offset(36))
|
||||
MOVL(R8L, Mem{Base: DI}.Offset(40))
|
||||
MOVL(R9L, Mem{Base: DI}.Offset(44))
|
||||
MOVD(X15, EDX)
|
||||
MOVD(X11, ECX)
|
||||
MOVD(X1, R8)
|
||||
MOVD(X6, R9)
|
||||
PSHUFL(Imm(0x39), X15, X15)
|
||||
PSHUFL(Imm(0x39), X11, X11)
|
||||
PSHUFL(Imm(0x39), X1, X1)
|
||||
PSHUFL(Imm(0x39), X6, X6)
|
||||
XORL(Mem{Base: SI}.Offset(96), EDX)
|
||||
XORL(Mem{Base: SI}.Offset(100), ECX)
|
||||
XORL(Mem{Base: SI}.Offset(104), R8L)
|
||||
XORL(Mem{Base: SI}.Offset(108), R9L)
|
||||
MOVL(EDX, Mem{Base: DI}.Offset(96))
|
||||
MOVL(ECX, Mem{Base: DI}.Offset(100))
|
||||
MOVL(R8L, Mem{Base: DI}.Offset(104))
|
||||
MOVL(R9L, Mem{Base: DI}.Offset(108))
|
||||
MOVD(X15, EDX)
|
||||
MOVD(X11, ECX)
|
||||
MOVD(X1, R8)
|
||||
MOVD(X6, R9)
|
||||
PSHUFL(Imm(0x39), X15, X15)
|
||||
PSHUFL(Imm(0x39), X11, X11)
|
||||
PSHUFL(Imm(0x39), X1, X1)
|
||||
PSHUFL(Imm(0x39), X6, X6)
|
||||
XORL(Mem{Base: SI}.Offset(160), EDX)
|
||||
XORL(Mem{Base: SI}.Offset(164), ECX)
|
||||
XORL(Mem{Base: SI}.Offset(168), R8L)
|
||||
XORL(Mem{Base: SI}.Offset(172), R9L)
|
||||
MOVL(EDX, Mem{Base: DI}.Offset(160))
|
||||
MOVL(ECX, Mem{Base: DI}.Offset(164))
|
||||
MOVL(R8L, Mem{Base: DI}.Offset(168))
|
||||
MOVL(R9L, Mem{Base: DI}.Offset(172))
|
||||
MOVD(X15, EDX)
|
||||
MOVD(X11, ECX)
|
||||
MOVD(X1, R8)
|
||||
MOVD(X6, R9)
|
||||
XORL(Mem{Base: SI}.Offset(224), EDX)
|
||||
XORL(Mem{Base: SI}.Offset(228), ECX)
|
||||
XORL(Mem{Base: SI}.Offset(232), R8L)
|
||||
XORL(Mem{Base: SI}.Offset(236), R9L)
|
||||
MOVL(EDX, Mem{Base: DI}.Offset(224))
|
||||
MOVL(ECX, Mem{Base: DI}.Offset(228))
|
||||
MOVL(R8L, Mem{Base: DI}.Offset(232))
|
||||
MOVL(R9L, Mem{Base: DI}.Offset(236))
|
||||
PADDL(Mem{Base: R12}.Offset(160), X13)
|
||||
PADDL(Mem{Base: R12}.Offset(208), X9)
|
||||
PADDL(Mem{Base: R12}.Offset(256), X3)
|
||||
PADDL(Mem{Base: R12}.Offset(96), X2)
|
||||
MOVD(X13, EDX)
|
||||
MOVD(X9, ECX)
|
||||
MOVD(X3, R8)
|
||||
MOVD(X2, R9)
|
||||
PSHUFL(Imm(0x39), X13, X13)
|
||||
PSHUFL(Imm(0x39), X9, X9)
|
||||
PSHUFL(Imm(0x39), X3, X3)
|
||||
PSHUFL(Imm(0x39), X2, X2)
|
||||
XORL(Mem{Base: SI}.Offset(48), EDX)
|
||||
XORL(Mem{Base: SI}.Offset(52), ECX)
|
||||
XORL(Mem{Base: SI}.Offset(56), R8L)
|
||||
XORL(Mem{Base: SI}.Offset(60), R9L)
|
||||
MOVL(EDX, Mem{Base: DI}.Offset(48))
|
||||
MOVL(ECX, Mem{Base: DI}.Offset(52))
|
||||
MOVL(R8L, Mem{Base: DI}.Offset(56))
|
||||
MOVL(R9L, Mem{Base: DI}.Offset(60))
|
||||
MOVD(X13, EDX)
|
||||
MOVD(X9, ECX)
|
||||
MOVD(X3, R8)
|
||||
MOVD(X2, R9)
|
||||
PSHUFL(Imm(0x39), X13, X13)
|
||||
PSHUFL(Imm(0x39), X9, X9)
|
||||
PSHUFL(Imm(0x39), X3, X3)
|
||||
PSHUFL(Imm(0x39), X2, X2)
|
||||
XORL(Mem{Base: SI}.Offset(112), EDX)
|
||||
XORL(Mem{Base: SI}.Offset(116), ECX)
|
||||
XORL(Mem{Base: SI}.Offset(120), R8L)
|
||||
XORL(Mem{Base: SI}.Offset(124), R9L)
|
||||
MOVL(EDX, Mem{Base: DI}.Offset(112))
|
||||
MOVL(ECX, Mem{Base: DI}.Offset(116))
|
||||
MOVL(R8L, Mem{Base: DI}.Offset(120))
|
||||
MOVL(R9L, Mem{Base: DI}.Offset(124))
|
||||
MOVD(X13, EDX)
|
||||
MOVD(X9, ECX)
|
||||
MOVD(X3, R8)
|
||||
MOVD(X2, R9)
|
||||
PSHUFL(Imm(0x39), X13, X13)
|
||||
PSHUFL(Imm(0x39), X9, X9)
|
||||
PSHUFL(Imm(0x39), X3, X3)
|
||||
PSHUFL(Imm(0x39), X2, X2)
|
||||
XORL(Mem{Base: SI}.Offset(176), EDX)
|
||||
XORL(Mem{Base: SI}.Offset(180), ECX)
|
||||
XORL(Mem{Base: SI}.Offset(184), R8L)
|
||||
XORL(Mem{Base: SI}.Offset(188), R9L)
|
||||
MOVL(EDX, Mem{Base: DI}.Offset(176))
|
||||
MOVL(ECX, Mem{Base: DI}.Offset(180))
|
||||
MOVL(R8L, Mem{Base: DI}.Offset(184))
|
||||
MOVL(R9L, Mem{Base: DI}.Offset(188))
|
||||
MOVD(X13, EDX)
|
||||
MOVD(X9, ECX)
|
||||
MOVD(X3, R8)
|
||||
MOVD(X2, R9)
|
||||
XORL(Mem{Base: SI}.Offset(240), EDX)
|
||||
XORL(Mem{Base: SI}.Offset(244), ECX)
|
||||
XORL(Mem{Base: SI}.Offset(248), R8L)
|
||||
XORL(Mem{Base: SI}.Offset(252), R9L)
|
||||
MOVL(EDX, Mem{Base: DI}.Offset(240))
|
||||
MOVL(ECX, Mem{Base: DI}.Offset(244))
|
||||
MOVL(R8L, Mem{Base: DI}.Offset(248))
|
||||
MOVL(R9L, Mem{Base: DI}.Offset(252))
|
||||
MOVQ(Mem{Base: R12}.Offset(352), R9)
|
||||
SUBQ(U32(256), R9)
|
||||
ADDQ(U32(256), RSI)
|
||||
ADDQ(U32(256), RDI)
|
||||
CMPQ(R9, U32(256))
|
||||
JAE(LabelRef("BYTESATLEAST256"))
|
||||
CMPQ(R9, Imm(0))
|
||||
JBE(LabelRef("DONE"))
|
||||
}
|
||||
|
||||
func BYTESBETWEEN1AND255() {
|
||||
Label("BYTESBETWEEN1AND255")
|
||||
CMPQ(R9, Imm(64))
|
||||
JAE(LabelRef("NOCOPY"))
|
||||
MOVQ(RDI, RDX)
|
||||
LEAQ(Mem{Base: R12}.Offset(360), RDI)
|
||||
MOVQ(R9, RCX)
|
||||
// Hack to get Avo to emit:
|
||||
// REP; MOVSB
|
||||
Instruction(&ir.Instruction{Opcode: "REP; MOVSB"})
|
||||
LEAQ(Mem{Base: R12}.Offset(360), RDI)
|
||||
LEAQ(Mem{Base: R12}.Offset(360), RSI)
|
||||
}
|
||||
|
||||
func NOCOPY() {
|
||||
Label("NOCOPY")
|
||||
MOVQ(R9, Mem{Base: R12}.Offset(352))
|
||||
MOVOA(Mem{Base: R12}.Offset(48), X0)
|
||||
MOVOA(Mem{Base: R12}.Offset(0), X1)
|
||||
MOVOA(Mem{Base: R12}.Offset(16), X2)
|
||||
MOVOA(Mem{Base: R12}.Offset(32), X3)
|
||||
MOVOA(X1, X4)
|
||||
MOVQ(U32(20), RCX)
|
||||
}
|
||||
|
||||
func MAINLOOP2() {
|
||||
Label("MAINLOOP2")
|
||||
PADDL(X0, X4)
|
||||
MOVOA(X0, X5)
|
||||
MOVOA(X4, X6)
|
||||
PSLLL(Imm(7), X4)
|
||||
PSRLL(Imm(25), X6)
|
||||
PXOR(X4, X3)
|
||||
PXOR(X6, X3)
|
||||
PADDL(X3, X5)
|
||||
MOVOA(X3, X4)
|
||||
MOVOA(X5, X6)
|
||||
PSLLL(Imm(9), X5)
|
||||
PSRLL(Imm(23), X6)
|
||||
PXOR(X5, X2)
|
||||
PSHUFL(Imm(0x93), X3, X3)
|
||||
PXOR(X6, X2)
|
||||
PADDL(X2, X4)
|
||||
MOVOA(X2, X5)
|
||||
MOVOA(X4, X6)
|
||||
PSLLL(Imm(13), X4)
|
||||
PSRLL(Imm(19), X6)
|
||||
PXOR(X4, X1)
|
||||
PSHUFL(Imm(0x4E), X2, X2)
|
||||
PXOR(X6, X1)
|
||||
PADDL(X1, X5)
|
||||
MOVOA(X3, X4)
|
||||
MOVOA(X5, X6)
|
||||
PSLLL(Imm(18), X5)
|
||||
PSRLL(Imm(14), X6)
|
||||
PXOR(X5, X0)
|
||||
PSHUFL(Imm(0x39), X1, X1)
|
||||
PXOR(X6, X0)
|
||||
PADDL(X0, X4)
|
||||
MOVOA(X0, X5)
|
||||
MOVOA(X4, X6)
|
||||
PSLLL(Imm(7), X4)
|
||||
PSRLL(Imm(25), X6)
|
||||
PXOR(X4, X1)
|
||||
PXOR(X6, X1)
|
||||
PADDL(X1, X5)
|
||||
MOVOA(X1, X4)
|
||||
MOVOA(X5, X6)
|
||||
PSLLL(Imm(9), X5)
|
||||
PSRLL(Imm(23), X6)
|
||||
PXOR(X5, X2)
|
||||
PSHUFL(Imm(0x93), X1, X1)
|
||||
PXOR(X6, X2)
|
||||
PADDL(X2, X4)
|
||||
MOVOA(X2, X5)
|
||||
MOVOA(X4, X6)
|
||||
PSLLL(Imm(13), X4)
|
||||
PSRLL(Imm(19), X6)
|
||||
PXOR(X4, X3)
|
||||
PSHUFL(Imm(0x4E), X2, X2)
|
||||
PXOR(X6, X3)
|
||||
PADDL(X3, X5)
|
||||
MOVOA(X1, X4)
|
||||
MOVOA(X5, X6)
|
||||
PSLLL(Imm(18), X5)
|
||||
PSRLL(Imm(14), X6)
|
||||
PXOR(X5, X0)
|
||||
PSHUFL(Imm(0x39), X3, X3)
|
||||
PXOR(X6, X0)
|
||||
PADDL(X0, X4)
|
||||
MOVOA(X0, X5)
|
||||
MOVOA(X4, X6)
|
||||
PSLLL(Imm(7), X4)
|
||||
PSRLL(Imm(25), X6)
|
||||
PXOR(X4, X3)
|
||||
PXOR(X6, X3)
|
||||
PADDL(X3, X5)
|
||||
MOVOA(X3, X4)
|
||||
MOVOA(X5, X6)
|
||||
PSLLL(Imm(9), X5)
|
||||
PSRLL(Imm(23), X6)
|
||||
PXOR(X5, X2)
|
||||
PSHUFL(Imm(0x93), X3, X3)
|
||||
PXOR(X6, X2)
|
||||
PADDL(X2, X4)
|
||||
MOVOA(X2, X5)
|
||||
MOVOA(X4, X6)
|
||||
PSLLL(Imm(13), X4)
|
||||
PSRLL(Imm(19), X6)
|
||||
PXOR(X4, X1)
|
||||
PSHUFL(Imm(0x4E), X2, X2)
|
||||
PXOR(X6, X1)
|
||||
PADDL(X1, X5)
|
||||
MOVOA(X3, X4)
|
||||
MOVOA(X5, X6)
|
||||
PSLLL(Imm(18), X5)
|
||||
PSRLL(Imm(14), X6)
|
||||
PXOR(X5, X0)
|
||||
PSHUFL(Imm(0x39), X1, X1)
|
||||
PXOR(X6, X0)
|
||||
PADDL(X0, X4)
|
||||
MOVOA(X0, X5)
|
||||
MOVOA(X4, X6)
|
||||
PSLLL(Imm(7), X4)
|
||||
PSRLL(Imm(25), X6)
|
||||
PXOR(X4, X1)
|
||||
PXOR(X6, X1)
|
||||
PADDL(X1, X5)
|
||||
MOVOA(X1, X4)
|
||||
MOVOA(X5, X6)
|
||||
PSLLL(Imm(9), X5)
|
||||
PSRLL(Imm(23), X6)
|
||||
PXOR(X5, X2)
|
||||
PSHUFL(Imm(0x93), X1, X1)
|
||||
PXOR(X6, X2)
|
||||
PADDL(X2, X4)
|
||||
MOVOA(X2, X5)
|
||||
MOVOA(X4, X6)
|
||||
PSLLL(Imm(13), X4)
|
||||
PSRLL(Imm(19), X6)
|
||||
PXOR(X4, X3)
|
||||
PSHUFL(Imm(0x4E), X2, X2)
|
||||
PXOR(X6, X3)
|
||||
SUBQ(Imm(4), RCX)
|
||||
PADDL(X3, X5)
|
||||
MOVOA(X1, X4)
|
||||
MOVOA(X5, X6)
|
||||
PSLLL(Imm(18), X5)
|
||||
PXOR(X7, X7)
|
||||
PSRLL(Imm(14), X6)
|
||||
PXOR(X5, X0)
|
||||
PSHUFL(Imm(0x39), X3, X3)
|
||||
PXOR(X6, X0)
|
||||
JA(LabelRef("MAINLOOP2"))
|
||||
PADDL(Mem{Base: R12}.Offset(48), X0)
|
||||
PADDL(Mem{Base: R12}.Offset(0), X1)
|
||||
PADDL(Mem{Base: R12}.Offset(16), X2)
|
||||
PADDL(Mem{Base: R12}.Offset(32), X3)
|
||||
MOVD(X0, ECX)
|
||||
MOVD(X1, R8)
|
||||
MOVD(X2, R9)
|
||||
MOVD(X3, EAX)
|
||||
PSHUFL(Imm(0x39), X0, X0)
|
||||
PSHUFL(Imm(0x39), X1, X1)
|
||||
PSHUFL(Imm(0x39), X2, X2)
|
||||
PSHUFL(Imm(0x39), X3, X3)
|
||||
XORL(Mem{Base: SI}.Offset(0), ECX)
|
||||
XORL(Mem{Base: SI}.Offset(48), R8L)
|
||||
XORL(Mem{Base: SI}.Offset(32), R9L)
|
||||
XORL(Mem{Base: SI}.Offset(16), EAX)
|
||||
MOVL(ECX, Mem{Base: DI}.Offset(0))
|
||||
MOVL(R8L, Mem{Base: DI}.Offset(48))
|
||||
MOVL(R9L, Mem{Base: DI}.Offset(32))
|
||||
MOVL(EAX, Mem{Base: DI}.Offset(16))
|
||||
MOVD(X0, ECX)
|
||||
MOVD(X1, R8)
|
||||
MOVD(X2, R9)
|
||||
MOVD(X3, EAX)
|
||||
PSHUFL(Imm(0x39), X0, X0)
|
||||
PSHUFL(Imm(0x39), X1, X1)
|
||||
PSHUFL(Imm(0x39), X2, X2)
|
||||
PSHUFL(Imm(0x39), X3, X3)
|
||||
XORL(Mem{Base: SI}.Offset(20), ECX)
|
||||
XORL(Mem{Base: SI}.Offset(4), R8L)
|
||||
XORL(Mem{Base: SI}.Offset(52), R9L)
|
||||
XORL(Mem{Base: SI}.Offset(36), EAX)
|
||||
MOVL(ECX, Mem{Base: DI}.Offset(20))
|
||||
MOVL(R8L, Mem{Base: DI}.Offset(4))
|
||||
MOVL(R9L, Mem{Base: DI}.Offset(52))
|
||||
MOVL(EAX, Mem{Base: DI}.Offset(36))
|
||||
MOVD(X0, ECX)
|
||||
MOVD(X1, R8)
|
||||
MOVD(X2, R9)
|
||||
MOVD(X3, EAX)
|
||||
PSHUFL(Imm(0x39), X0, X0)
|
||||
PSHUFL(Imm(0x39), X1, X1)
|
||||
PSHUFL(Imm(0x39), X2, X2)
|
||||
PSHUFL(Imm(0x39), X3, X3)
|
||||
XORL(Mem{Base: SI}.Offset(40), ECX)
|
||||
XORL(Mem{Base: SI}.Offset(24), R8L)
|
||||
XORL(Mem{Base: SI}.Offset(8), R9L)
|
||||
XORL(Mem{Base: SI}.Offset(56), EAX)
|
||||
MOVL(ECX, Mem{Base: DI}.Offset(40))
|
||||
MOVL(R8L, Mem{Base: DI}.Offset(24))
|
||||
MOVL(R9L, Mem{Base: DI}.Offset(8))
|
||||
MOVL(EAX, Mem{Base: DI}.Offset(56))
|
||||
MOVD(X0, ECX)
|
||||
MOVD(X1, R8)
|
||||
MOVD(X2, R9)
|
||||
MOVD(X3, EAX)
|
||||
XORL(Mem{Base: SI}.Offset(60), ECX)
|
||||
XORL(Mem{Base: SI}.Offset(44), R8L)
|
||||
XORL(Mem{Base: SI}.Offset(28), R9L)
|
||||
XORL(Mem{Base: SI}.Offset(12), EAX)
|
||||
MOVL(ECX, Mem{Base: DI}.Offset(60))
|
||||
MOVL(R8L, Mem{Base: DI}.Offset(44))
|
||||
MOVL(R9L, Mem{Base: DI}.Offset(28))
|
||||
MOVL(EAX, Mem{Base: DI}.Offset(12))
|
||||
MOVQ(Mem{Base: R12}.Offset(352), R9)
|
||||
MOVL(Mem{Base: R12}.Offset(16), ECX)
|
||||
MOVL(Mem{Base: R12}.Offset(36), R8L)
|
||||
ADDQ(Imm(1), RCX)
|
||||
SHLQ(Imm(32), R8)
|
||||
ADDQ(R8, RCX)
|
||||
MOVQ(RCX, R8)
|
||||
SHRQ(Imm(32), R8)
|
||||
MOVL(ECX, Mem{Base: R12}.Offset(16))
|
||||
MOVL(R8L, Mem{Base: R12}.Offset(36))
|
||||
CMPQ(R9, Imm(64))
|
||||
JA(LabelRef("BYTESATLEAST65"))
|
||||
JAE(LabelRef("BYTESATLEAST64"))
|
||||
MOVQ(RDI, RSI)
|
||||
MOVQ(RDX, RDI)
|
||||
MOVQ(R9, RCX)
|
||||
// Hack to get Avo to emit:
|
||||
// REP; MOVSB
|
||||
Instruction(&ir.Instruction{Opcode: "REP; MOVSB"})
|
||||
}
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Загрузка…
Ссылка в новой задаче