x/crypto/poly1305: fix memory alignment fault in ARM

The current ARM implementation assumes that the input message
  is memory aligned and so it can cause alignment fault when it
  is not enabled. Also it may generate incorrect outputs in ARMv5.

  This change fixes this issue by temporarily copying the input
  to a local aligned space. Although there may be a better way
  to handle unaligned access, this would be a safe way in all
  ARM versions.

  This change also added a test and benchmarks with unaligned
  data. The benchmark result on RasberryPI 2 is

  Benchmark64  2000000         812 ns/op    78.81 MB/s
  Benchmark1K   200000        7809 ns/op   131.12 MB/s
  Benchmark64Unaligned   2000000         967 ns/op    66.13 MB/s
  Benchmark1KUnaligned    200000       10316 ns/op    99.26 MB/s

Change-Id: I189cc1b7bb6c67a04c9877271fb27326f2896e82
Reviewed-on: https://go-review.googlesource.com/12797
Reviewed-by: Adam Langley <agl@golang.org>
This commit is contained in:
Jungho Ahn 2015-07-28 14:18:45 -07:00 коммит произвёл Adam Langley
Родитель c169681727
Коммит 81bf7719a6
2 изменённых файлов: 79 добавлений и 19 удалений

Просмотреть файл

@ -47,6 +47,16 @@ TEXT poly1305_init_ext_armv6<>(SB),4,$-4
MOVM.IA.W (R13), [R4-R11]
RET
#define MOVW_UNALIGNED(Rsrc, Rdst, Rtmp, offset) \
MOVBU (offset+0)(Rsrc), Rtmp; \
MOVBU Rtmp, (offset+0)(Rdst); \
MOVBU (offset+1)(Rsrc), Rtmp; \
MOVBU Rtmp, (offset+1)(Rdst); \
MOVBU (offset+2)(Rsrc), Rtmp; \
MOVBU Rtmp, (offset+2)(Rdst); \
MOVBU (offset+3)(Rsrc), Rtmp; \
MOVBU Rtmp, (offset+3)(Rdst)
TEXT poly1305_blocks_armv6<>(SB),4,$-4
MOVM.DB.W [R4, R5, R6, R7, R8, R9, g, R11, R14], (R13)
SUB $128, R13
@ -66,7 +76,19 @@ TEXT poly1305_blocks_armv6<>(SB),4,$-4
CMP $16, R12
BLO poly1305_blocks_armv6_done
poly1305_blocks_armv6_mainloop:
WORD $0xe31e0003 // TST R14, #3 not working see issue 5921
BEQ poly1305_blocks_armv6_mainloop_aligned
ADD $48, R13, g
MOVW_UNALIGNED(R14, g, R0, 0)
MOVW_UNALIGNED(R14, g, R0, 4)
MOVW_UNALIGNED(R14, g, R0, 8)
MOVW_UNALIGNED(R14, g, R0, 12)
MOVM.IA (g), [R0-R3]
ADD $16, R14
B poly1305_blocks_armv6_mainloop_loaded
poly1305_blocks_armv6_mainloop_aligned:
MOVM.IA.W (R14), [R0-R3]
poly1305_blocks_armv6_mainloop_loaded:
MOVW R0>>26, g
MOVW R1>>20, R11
MOVW R2>>14, R12
@ -174,6 +196,16 @@ poly1305_blocks_armv6_done:
MOVM.IA.W (R13), [R4, R5, R6, R7, R8, R9, g, R11, R14]
RET
#define MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp) \
MOVBU.P 1(Rsrc), Rtmp; \
MOVBU.P Rtmp, 1(Rdst); \
MOVBU.P 1(Rsrc), Rtmp; \
MOVBU.P Rtmp, 1(Rdst)
#define MOVWP_UNALIGNED(Rsrc, Rdst, Rtmp) \
MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp); \
MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp)
TEXT poly1305_finish_ext_armv6<>(SB),4,$-4
MOVM.DB.W [R4, R5, R6, R7, R8, R9, g, R11, R14], (R13)
SUB $16, R13, R13
@ -189,16 +221,32 @@ TEXT poly1305_finish_ext_armv6<>(SB),4,$-4
MOVW R0, 4(R13)
MOVW R0, 8(R13)
MOVW R0, 12(R13)
WORD $0xe3110003 // TST R1, #3 not working see issue 5921
BEQ poly1305_finish_ext_armv6_aligned
WORD $0xe3120008 // TST R2, #8 not working see issue 5921
BEQ poly1305_finish_ext_armv6_skip8
MOVM.IA.W (R1), [g-R11]
MOVM.IA.W [g-R11], (R9)
MOVWP_UNALIGNED(R1, R9, g)
MOVWP_UNALIGNED(R1, R9, g)
poly1305_finish_ext_armv6_skip8:
WORD $0xe3120004 // TST $4, R2 not working see issue 5921
BEQ poly1305_finish_ext_armv6_skip4
MOVWP_UNALIGNED(R1, R9, g)
poly1305_finish_ext_armv6_skip4:
WORD $0xe3120002 // TST $2, R2 not working see issue 5921
BEQ poly1305_finish_ext_armv6_skip2
MOVHUP_UNALIGNED(R1, R9, g)
B poly1305_finish_ext_armv6_skip2
poly1305_finish_ext_armv6_aligned:
WORD $0xe3120008 // TST R2, #8 not working see issue 5921
BEQ poly1305_finish_ext_armv6_skip8_aligned
MOVM.IA.W (R1), [g-R11]
MOVM.IA.W [g-R11], (R9)
poly1305_finish_ext_armv6_skip8_aligned:
WORD $0xe3120004 // TST $4, R2 not working see issue 5921
BEQ poly1305_finish_ext_armv6_skip4_aligned
MOVW.P 4(R1), g
MOVW.P g, 4(R9)
poly1305_finish_ext_armv6_skip4:
poly1305_finish_ext_armv6_skip4_aligned:
WORD $0xe3120002 // TST $2, R2 not working see issue 5921
BEQ poly1305_finish_ext_armv6_skip2
MOVHU.P 2(R1), g

Просмотреть файл

@ -7,6 +7,7 @@ package poly1305
import (
"bytes"
"testing"
"unsafe"
)
var testData = []struct {
@ -34,41 +35,52 @@ var testData = []struct {
},
}
func TestSum(t *testing.T) {
func testSum(t *testing.T, unaligned bool) {
var out [16]byte
var key [32]byte
for i, v := range testData {
in := v.in
if unaligned {
in = unalignBytes(in)
}
copy(key[:], v.k)
Sum(&out, v.in, &key)
Sum(&out, in, &key)
if !bytes.Equal(out[:], v.correct) {
t.Errorf("%d: expected %x, got %x", i, v.correct, out[:])
}
}
}
func Benchmark1K(b *testing.B) {
b.StopTimer()
func TestSum(t *testing.T) { testSum(t, false) }
func TestSumUnaligned(t *testing.T) { testSum(t, true) }
func benchmark(b *testing.B, size int, unaligned bool) {
var out [16]byte
var key [32]byte
in := make([]byte, 1024)
in := make([]byte, size)
if unaligned {
in = unalignBytes(in)
}
b.SetBytes(int64(len(in)))
b.StartTimer()
b.ResetTimer()
for i := 0; i < b.N; i++ {
Sum(&out, in, &key)
}
}
func Benchmark64(b *testing.B) {
b.StopTimer()
var out [16]byte
var key [32]byte
in := make([]byte, 64)
b.SetBytes(int64(len(in)))
b.StartTimer()
func Benchmark64(b *testing.B) { benchmark(b, 64, false) }
func Benchmark1K(b *testing.B) { benchmark(b, 1024, false) }
func Benchmark64Unaligned(b *testing.B) { benchmark(b, 64, true) }
func Benchmark1KUnaligned(b *testing.B) { benchmark(b, 1024, true) }
for i := 0; i < b.N; i++ {
Sum(&out, in, &key)
func unalignBytes(in []byte) []byte {
out := make([]byte, len(in)+1)
if uintptr(unsafe.Pointer(&out[0]))&(unsafe.Alignof(uint32(0))-1) == 0 {
out = out[1:]
} else {
out = out[:len(in)]
}
copy(out, in)
return out
}