x/crypto/poly1305: fix memory alignment fault in ARM
The current ARM implementation assumes that the input message is memory aligned and so it can cause alignment fault when it is not enabled. Also it may generate incorrect outputs in ARMv5. This change fixes this issue by temporarily copying the input to a local aligned space. Although there may be a better way to handle unaligned access, this would be a safe way in all ARM versions. This change also added a test and benchmarks with unaligned data. The benchmark result on RasberryPI 2 is Benchmark64 2000000 812 ns/op 78.81 MB/s Benchmark1K 200000 7809 ns/op 131.12 MB/s Benchmark64Unaligned 2000000 967 ns/op 66.13 MB/s Benchmark1KUnaligned 200000 10316 ns/op 99.26 MB/s Change-Id: I189cc1b7bb6c67a04c9877271fb27326f2896e82 Reviewed-on: https://go-review.googlesource.com/12797 Reviewed-by: Adam Langley <agl@golang.org>
This commit is contained in:
Родитель
c169681727
Коммит
81bf7719a6
|
@ -47,6 +47,16 @@ TEXT poly1305_init_ext_armv6<>(SB),4,$-4
|
|||
MOVM.IA.W (R13), [R4-R11]
|
||||
RET
|
||||
|
||||
#define MOVW_UNALIGNED(Rsrc, Rdst, Rtmp, offset) \
|
||||
MOVBU (offset+0)(Rsrc), Rtmp; \
|
||||
MOVBU Rtmp, (offset+0)(Rdst); \
|
||||
MOVBU (offset+1)(Rsrc), Rtmp; \
|
||||
MOVBU Rtmp, (offset+1)(Rdst); \
|
||||
MOVBU (offset+2)(Rsrc), Rtmp; \
|
||||
MOVBU Rtmp, (offset+2)(Rdst); \
|
||||
MOVBU (offset+3)(Rsrc), Rtmp; \
|
||||
MOVBU Rtmp, (offset+3)(Rdst)
|
||||
|
||||
TEXT poly1305_blocks_armv6<>(SB),4,$-4
|
||||
MOVM.DB.W [R4, R5, R6, R7, R8, R9, g, R11, R14], (R13)
|
||||
SUB $128, R13
|
||||
|
@ -66,7 +76,19 @@ TEXT poly1305_blocks_armv6<>(SB),4,$-4
|
|||
CMP $16, R12
|
||||
BLO poly1305_blocks_armv6_done
|
||||
poly1305_blocks_armv6_mainloop:
|
||||
WORD $0xe31e0003 // TST R14, #3 not working see issue 5921
|
||||
BEQ poly1305_blocks_armv6_mainloop_aligned
|
||||
ADD $48, R13, g
|
||||
MOVW_UNALIGNED(R14, g, R0, 0)
|
||||
MOVW_UNALIGNED(R14, g, R0, 4)
|
||||
MOVW_UNALIGNED(R14, g, R0, 8)
|
||||
MOVW_UNALIGNED(R14, g, R0, 12)
|
||||
MOVM.IA (g), [R0-R3]
|
||||
ADD $16, R14
|
||||
B poly1305_blocks_armv6_mainloop_loaded
|
||||
poly1305_blocks_armv6_mainloop_aligned:
|
||||
MOVM.IA.W (R14), [R0-R3]
|
||||
poly1305_blocks_armv6_mainloop_loaded:
|
||||
MOVW R0>>26, g
|
||||
MOVW R1>>20, R11
|
||||
MOVW R2>>14, R12
|
||||
|
@ -174,6 +196,16 @@ poly1305_blocks_armv6_done:
|
|||
MOVM.IA.W (R13), [R4, R5, R6, R7, R8, R9, g, R11, R14]
|
||||
RET
|
||||
|
||||
#define MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp) \
|
||||
MOVBU.P 1(Rsrc), Rtmp; \
|
||||
MOVBU.P Rtmp, 1(Rdst); \
|
||||
MOVBU.P 1(Rsrc), Rtmp; \
|
||||
MOVBU.P Rtmp, 1(Rdst)
|
||||
|
||||
#define MOVWP_UNALIGNED(Rsrc, Rdst, Rtmp) \
|
||||
MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp); \
|
||||
MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp)
|
||||
|
||||
TEXT poly1305_finish_ext_armv6<>(SB),4,$-4
|
||||
MOVM.DB.W [R4, R5, R6, R7, R8, R9, g, R11, R14], (R13)
|
||||
SUB $16, R13, R13
|
||||
|
@ -189,16 +221,32 @@ TEXT poly1305_finish_ext_armv6<>(SB),4,$-4
|
|||
MOVW R0, 4(R13)
|
||||
MOVW R0, 8(R13)
|
||||
MOVW R0, 12(R13)
|
||||
WORD $0xe3110003 // TST R1, #3 not working see issue 5921
|
||||
BEQ poly1305_finish_ext_armv6_aligned
|
||||
WORD $0xe3120008 // TST R2, #8 not working see issue 5921
|
||||
BEQ poly1305_finish_ext_armv6_skip8
|
||||
MOVM.IA.W (R1), [g-R11]
|
||||
MOVM.IA.W [g-R11], (R9)
|
||||
MOVWP_UNALIGNED(R1, R9, g)
|
||||
MOVWP_UNALIGNED(R1, R9, g)
|
||||
poly1305_finish_ext_armv6_skip8:
|
||||
WORD $0xe3120004 // TST $4, R2 not working see issue 5921
|
||||
BEQ poly1305_finish_ext_armv6_skip4
|
||||
MOVWP_UNALIGNED(R1, R9, g)
|
||||
poly1305_finish_ext_armv6_skip4:
|
||||
WORD $0xe3120002 // TST $2, R2 not working see issue 5921
|
||||
BEQ poly1305_finish_ext_armv6_skip2
|
||||
MOVHUP_UNALIGNED(R1, R9, g)
|
||||
B poly1305_finish_ext_armv6_skip2
|
||||
poly1305_finish_ext_armv6_aligned:
|
||||
WORD $0xe3120008 // TST R2, #8 not working see issue 5921
|
||||
BEQ poly1305_finish_ext_armv6_skip8_aligned
|
||||
MOVM.IA.W (R1), [g-R11]
|
||||
MOVM.IA.W [g-R11], (R9)
|
||||
poly1305_finish_ext_armv6_skip8_aligned:
|
||||
WORD $0xe3120004 // TST $4, R2 not working see issue 5921
|
||||
BEQ poly1305_finish_ext_armv6_skip4_aligned
|
||||
MOVW.P 4(R1), g
|
||||
MOVW.P g, 4(R9)
|
||||
poly1305_finish_ext_armv6_skip4:
|
||||
poly1305_finish_ext_armv6_skip4_aligned:
|
||||
WORD $0xe3120002 // TST $2, R2 not working see issue 5921
|
||||
BEQ poly1305_finish_ext_armv6_skip2
|
||||
MOVHU.P 2(R1), g
|
||||
|
|
|
@ -7,6 +7,7 @@ package poly1305
|
|||
import (
|
||||
"bytes"
|
||||
"testing"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
var testData = []struct {
|
||||
|
@ -34,41 +35,52 @@ var testData = []struct {
|
|||
},
|
||||
}
|
||||
|
||||
func TestSum(t *testing.T) {
|
||||
func testSum(t *testing.T, unaligned bool) {
|
||||
var out [16]byte
|
||||
var key [32]byte
|
||||
|
||||
for i, v := range testData {
|
||||
in := v.in
|
||||
if unaligned {
|
||||
in = unalignBytes(in)
|
||||
}
|
||||
copy(key[:], v.k)
|
||||
Sum(&out, v.in, &key)
|
||||
Sum(&out, in, &key)
|
||||
if !bytes.Equal(out[:], v.correct) {
|
||||
t.Errorf("%d: expected %x, got %x", i, v.correct, out[:])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func Benchmark1K(b *testing.B) {
|
||||
b.StopTimer()
|
||||
func TestSum(t *testing.T) { testSum(t, false) }
|
||||
func TestSumUnaligned(t *testing.T) { testSum(t, true) }
|
||||
|
||||
func benchmark(b *testing.B, size int, unaligned bool) {
|
||||
var out [16]byte
|
||||
var key [32]byte
|
||||
in := make([]byte, 1024)
|
||||
in := make([]byte, size)
|
||||
if unaligned {
|
||||
in = unalignBytes(in)
|
||||
}
|
||||
b.SetBytes(int64(len(in)))
|
||||
b.StartTimer()
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
Sum(&out, in, &key)
|
||||
}
|
||||
}
|
||||
|
||||
func Benchmark64(b *testing.B) {
|
||||
b.StopTimer()
|
||||
var out [16]byte
|
||||
var key [32]byte
|
||||
in := make([]byte, 64)
|
||||
b.SetBytes(int64(len(in)))
|
||||
b.StartTimer()
|
||||
func Benchmark64(b *testing.B) { benchmark(b, 64, false) }
|
||||
func Benchmark1K(b *testing.B) { benchmark(b, 1024, false) }
|
||||
func Benchmark64Unaligned(b *testing.B) { benchmark(b, 64, true) }
|
||||
func Benchmark1KUnaligned(b *testing.B) { benchmark(b, 1024, true) }
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
Sum(&out, in, &key)
|
||||
func unalignBytes(in []byte) []byte {
|
||||
out := make([]byte, len(in)+1)
|
||||
if uintptr(unsafe.Pointer(&out[0]))&(unsafe.Alignof(uint32(0))-1) == 0 {
|
||||
out = out[1:]
|
||||
} else {
|
||||
out = out[:len(in)]
|
||||
}
|
||||
copy(out, in)
|
||||
return out
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче