185 строки
3.4 KiB
ArmAsm
185 строки
3.4 KiB
ArmAsm
|
/*
|
||
|
* arch/ia64/lib/xor.S
|
||
|
*
|
||
|
* Optimized RAID-5 checksumming functions for IA-64.
|
||
|
*
|
||
|
* This program is free software; you can redistribute it and/or modify
|
||
|
* it under the terms of the GNU General Public License as published by
|
||
|
* the Free Software Foundation; either version 2, or (at your option)
|
||
|
* any later version.
|
||
|
*
|
||
|
* You should have received a copy of the GNU General Public License
|
||
|
* (for example /usr/src/linux/COPYING); if not, write to the Free
|
||
|
* Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||
|
*/
|
||
|
|
||
|
#include <asm/asmmacro.h>
|
||
|
|
||
|
GLOBAL_ENTRY(xor_ia64_2)
|
||
|
.prologue
|
||
|
.fframe 0
|
||
|
.save ar.pfs, r31
|
||
|
alloc r31 = ar.pfs, 3, 0, 13, 16
|
||
|
.save ar.lc, r30
|
||
|
mov r30 = ar.lc
|
||
|
.save pr, r29
|
||
|
mov r29 = pr
|
||
|
;;
|
||
|
.body
|
||
|
mov r8 = in1
|
||
|
mov ar.ec = 6 + 2
|
||
|
shr in0 = in0, 3
|
||
|
;;
|
||
|
adds in0 = -1, in0
|
||
|
mov r16 = in1
|
||
|
mov r17 = in2
|
||
|
;;
|
||
|
mov ar.lc = in0
|
||
|
mov pr.rot = 1 << 16
|
||
|
;;
|
||
|
.rotr s1[6+1], s2[6+1], d[2]
|
||
|
.rotp p[6+2]
|
||
|
0:
|
||
|
(p[0]) ld8.nta s1[0] = [r16], 8
|
||
|
(p[0]) ld8.nta s2[0] = [r17], 8
|
||
|
(p[6]) xor d[0] = s1[6], s2[6]
|
||
|
(p[6+1])st8.nta [r8] = d[1], 8
|
||
|
nop.f 0
|
||
|
br.ctop.dptk.few 0b
|
||
|
;;
|
||
|
mov ar.lc = r30
|
||
|
mov pr = r29, -1
|
||
|
br.ret.sptk.few rp
|
||
|
END(xor_ia64_2)
|
||
|
|
||
|
GLOBAL_ENTRY(xor_ia64_3)
|
||
|
.prologue
|
||
|
.fframe 0
|
||
|
.save ar.pfs, r31
|
||
|
alloc r31 = ar.pfs, 4, 0, 20, 24
|
||
|
.save ar.lc, r30
|
||
|
mov r30 = ar.lc
|
||
|
.save pr, r29
|
||
|
mov r29 = pr
|
||
|
;;
|
||
|
.body
|
||
|
mov r8 = in1
|
||
|
mov ar.ec = 6 + 2
|
||
|
shr in0 = in0, 3
|
||
|
;;
|
||
|
adds in0 = -1, in0
|
||
|
mov r16 = in1
|
||
|
mov r17 = in2
|
||
|
;;
|
||
|
mov r18 = in3
|
||
|
mov ar.lc = in0
|
||
|
mov pr.rot = 1 << 16
|
||
|
;;
|
||
|
.rotr s1[6+1], s2[6+1], s3[6+1], d[2]
|
||
|
.rotp p[6+2]
|
||
|
0:
|
||
|
(p[0]) ld8.nta s1[0] = [r16], 8
|
||
|
(p[0]) ld8.nta s2[0] = [r17], 8
|
||
|
(p[6]) xor d[0] = s1[6], s2[6]
|
||
|
;;
|
||
|
(p[0]) ld8.nta s3[0] = [r18], 8
|
||
|
(p[6+1])st8.nta [r8] = d[1], 8
|
||
|
(p[6]) xor d[0] = d[0], s3[6]
|
||
|
br.ctop.dptk.few 0b
|
||
|
;;
|
||
|
mov ar.lc = r30
|
||
|
mov pr = r29, -1
|
||
|
br.ret.sptk.few rp
|
||
|
END(xor_ia64_3)
|
||
|
|
||
|
GLOBAL_ENTRY(xor_ia64_4)
|
||
|
.prologue
|
||
|
.fframe 0
|
||
|
.save ar.pfs, r31
|
||
|
alloc r31 = ar.pfs, 5, 0, 27, 32
|
||
|
.save ar.lc, r30
|
||
|
mov r30 = ar.lc
|
||
|
.save pr, r29
|
||
|
mov r29 = pr
|
||
|
;;
|
||
|
.body
|
||
|
mov r8 = in1
|
||
|
mov ar.ec = 6 + 2
|
||
|
shr in0 = in0, 3
|
||
|
;;
|
||
|
adds in0 = -1, in0
|
||
|
mov r16 = in1
|
||
|
mov r17 = in2
|
||
|
;;
|
||
|
mov r18 = in3
|
||
|
mov ar.lc = in0
|
||
|
mov pr.rot = 1 << 16
|
||
|
mov r19 = in4
|
||
|
;;
|
||
|
.rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], d[2]
|
||
|
.rotp p[6+2]
|
||
|
0:
|
||
|
(p[0]) ld8.nta s1[0] = [r16], 8
|
||
|
(p[0]) ld8.nta s2[0] = [r17], 8
|
||
|
(p[6]) xor d[0] = s1[6], s2[6]
|
||
|
(p[0]) ld8.nta s3[0] = [r18], 8
|
||
|
(p[0]) ld8.nta s4[0] = [r19], 8
|
||
|
(p[6]) xor r20 = s3[6], s4[6]
|
||
|
;;
|
||
|
(p[6+1])st8.nta [r8] = d[1], 8
|
||
|
(p[6]) xor d[0] = d[0], r20
|
||
|
br.ctop.dptk.few 0b
|
||
|
;;
|
||
|
mov ar.lc = r30
|
||
|
mov pr = r29, -1
|
||
|
br.ret.sptk.few rp
|
||
|
END(xor_ia64_4)
|
||
|
|
||
|
GLOBAL_ENTRY(xor_ia64_5)
|
||
|
.prologue
|
||
|
.fframe 0
|
||
|
.save ar.pfs, r31
|
||
|
alloc r31 = ar.pfs, 6, 0, 34, 40
|
||
|
.save ar.lc, r30
|
||
|
mov r30 = ar.lc
|
||
|
.save pr, r29
|
||
|
mov r29 = pr
|
||
|
;;
|
||
|
.body
|
||
|
mov r8 = in1
|
||
|
mov ar.ec = 6 + 2
|
||
|
shr in0 = in0, 3
|
||
|
;;
|
||
|
adds in0 = -1, in0
|
||
|
mov r16 = in1
|
||
|
mov r17 = in2
|
||
|
;;
|
||
|
mov r18 = in3
|
||
|
mov ar.lc = in0
|
||
|
mov pr.rot = 1 << 16
|
||
|
mov r19 = in4
|
||
|
mov r20 = in5
|
||
|
;;
|
||
|
.rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], s5[6+1], d[2]
|
||
|
.rotp p[6+2]
|
||
|
0:
|
||
|
(p[0]) ld8.nta s1[0] = [r16], 8
|
||
|
(p[0]) ld8.nta s2[0] = [r17], 8
|
||
|
(p[6]) xor d[0] = s1[6], s2[6]
|
||
|
(p[0]) ld8.nta s3[0] = [r18], 8
|
||
|
(p[0]) ld8.nta s4[0] = [r19], 8
|
||
|
(p[6]) xor r21 = s3[6], s4[6]
|
||
|
;;
|
||
|
(p[0]) ld8.nta s5[0] = [r20], 8
|
||
|
(p[6+1])st8.nta [r8] = d[1], 8
|
||
|
(p[6]) xor d[0] = d[0], r21
|
||
|
;;
|
||
|
(p[6]) xor d[0] = d[0], s5[6]
|
||
|
nop.f 0
|
||
|
br.ctop.dptk.few 0b
|
||
|
;;
|
||
|
mov ar.lc = r30
|
||
|
mov pr = r29, -1
|
||
|
br.ret.sptk.few rp
|
||
|
END(xor_ia64_5)
|