x86: Clean up csum-copy_64.S a bit
The many stray whitespaces and other uncleanlinesses made this code almost unreadable to me - so fix those. No changes to the code. Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Родитель
0d2eb44f63
Коммит
2c76397bdd
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright 2002,2003 Andi Kleen, SuSE Labs.
|
||||
*
|
||||
* Copyright 2002, 2003 Andi Kleen, SuSE Labs.
|
||||
*
|
||||
* This file is subject to the terms and conditions of the GNU General Public
|
||||
* License. See the file COPYING in the main directory of this archive
|
||||
* for more details. No warranty for anything given at all.
|
||||
|
@ -11,82 +11,82 @@
|
|||
|
||||
/*
|
||||
* Checksum copy with exception handling.
|
||||
* On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the
|
||||
* On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the
|
||||
* destination is zeroed.
|
||||
*
|
||||
*
|
||||
* Input
|
||||
* rdi source
|
||||
* rsi destination
|
||||
* edx len (32bit)
|
||||
* ecx sum (32bit)
|
||||
* ecx sum (32bit)
|
||||
* r8 src_err_ptr (int)
|
||||
* r9 dst_err_ptr (int)
|
||||
*
|
||||
* Output
|
||||
* eax 64bit sum. undefined in case of exception.
|
||||
*
|
||||
* Wrappers need to take care of valid exception sum and zeroing.
|
||||
*
|
||||
* Wrappers need to take care of valid exception sum and zeroing.
|
||||
* They also should align source or destination to 8 bytes.
|
||||
*/
|
||||
|
||||
.macro source
|
||||
10:
|
||||
.section __ex_table,"a"
|
||||
.section __ex_table, "a"
|
||||
.align 8
|
||||
.quad 10b,.Lbad_source
|
||||
.quad 10b, .Lbad_source
|
||||
.previous
|
||||
.endm
|
||||
|
||||
|
||||
.macro dest
|
||||
20:
|
||||
.section __ex_table,"a"
|
||||
.section __ex_table, "a"
|
||||
.align 8
|
||||
.quad 20b,.Lbad_dest
|
||||
.quad 20b, .Lbad_dest
|
||||
.previous
|
||||
.endm
|
||||
|
||||
|
||||
.macro ignore L=.Lignore
|
||||
30:
|
||||
.section __ex_table,"a"
|
||||
.section __ex_table, "a"
|
||||
.align 8
|
||||
.quad 30b,\L
|
||||
.quad 30b, \L
|
||||
.previous
|
||||
.endm
|
||||
|
||||
|
||||
|
||||
|
||||
ENTRY(csum_partial_copy_generic)
|
||||
CFI_STARTPROC
|
||||
cmpl $3*64,%edx
|
||||
jle .Lignore
|
||||
cmpl $3*64, %edx
|
||||
jle .Lignore
|
||||
|
||||
.Lignore:
|
||||
subq $7*8,%rsp
|
||||
.Lignore:
|
||||
subq $7*8, %rsp
|
||||
CFI_ADJUST_CFA_OFFSET 7*8
|
||||
movq %rbx,2*8(%rsp)
|
||||
movq %rbx, 2*8(%rsp)
|
||||
CFI_REL_OFFSET rbx, 2*8
|
||||
movq %r12,3*8(%rsp)
|
||||
movq %r12, 3*8(%rsp)
|
||||
CFI_REL_OFFSET r12, 3*8
|
||||
movq %r14,4*8(%rsp)
|
||||
movq %r14, 4*8(%rsp)
|
||||
CFI_REL_OFFSET r14, 4*8
|
||||
movq %r13,5*8(%rsp)
|
||||
movq %r13, 5*8(%rsp)
|
||||
CFI_REL_OFFSET r13, 5*8
|
||||
movq %rbp,6*8(%rsp)
|
||||
movq %rbp, 6*8(%rsp)
|
||||
CFI_REL_OFFSET rbp, 6*8
|
||||
|
||||
movq %r8,(%rsp)
|
||||
movq %r9,1*8(%rsp)
|
||||
|
||||
movl %ecx,%eax
|
||||
movl %edx,%ecx
|
||||
movq %r8, (%rsp)
|
||||
movq %r9, 1*8(%rsp)
|
||||
|
||||
xorl %r9d,%r9d
|
||||
movq %rcx,%r12
|
||||
movl %ecx, %eax
|
||||
movl %edx, %ecx
|
||||
|
||||
shrq $6,%r12
|
||||
jz .Lhandle_tail /* < 64 */
|
||||
xorl %r9d, %r9d
|
||||
movq %rcx, %r12
|
||||
|
||||
shrq $6, %r12
|
||||
jz .Lhandle_tail /* < 64 */
|
||||
|
||||
clc
|
||||
|
||||
|
||||
/* main loop. clear in 64 byte blocks */
|
||||
/* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
|
||||
/* r11: temp3, rdx: temp4, r12 loopcnt */
|
||||
|
@ -94,156 +94,156 @@ ENTRY(csum_partial_copy_generic)
|
|||
.p2align 4
|
||||
.Lloop:
|
||||
source
|
||||
movq (%rdi),%rbx
|
||||
movq (%rdi), %rbx
|
||||
source
|
||||
movq 8(%rdi),%r8
|
||||
movq 8(%rdi), %r8
|
||||
source
|
||||
movq 16(%rdi),%r11
|
||||
movq 16(%rdi), %r11
|
||||
source
|
||||
movq 24(%rdi),%rdx
|
||||
movq 24(%rdi), %rdx
|
||||
|
||||
source
|
||||
movq 32(%rdi),%r10
|
||||
movq 32(%rdi), %r10
|
||||
source
|
||||
movq 40(%rdi),%rbp
|
||||
movq 40(%rdi), %rbp
|
||||
source
|
||||
movq 48(%rdi),%r14
|
||||
movq 48(%rdi), %r14
|
||||
source
|
||||
movq 56(%rdi),%r13
|
||||
|
||||
movq 56(%rdi), %r13
|
||||
|
||||
ignore 2f
|
||||
prefetcht0 5*64(%rdi)
|
||||
2:
|
||||
adcq %rbx,%rax
|
||||
adcq %r8,%rax
|
||||
adcq %r11,%rax
|
||||
adcq %rdx,%rax
|
||||
adcq %r10,%rax
|
||||
adcq %rbp,%rax
|
||||
adcq %r14,%rax
|
||||
adcq %r13,%rax
|
||||
2:
|
||||
adcq %rbx, %rax
|
||||
adcq %r8, %rax
|
||||
adcq %r11, %rax
|
||||
adcq %rdx, %rax
|
||||
adcq %r10, %rax
|
||||
adcq %rbp, %rax
|
||||
adcq %r14, %rax
|
||||
adcq %r13, %rax
|
||||
|
||||
decl %r12d
|
||||
|
||||
dest
|
||||
movq %rbx,(%rsi)
|
||||
dest
|
||||
movq %r8,8(%rsi)
|
||||
dest
|
||||
movq %r11,16(%rsi)
|
||||
dest
|
||||
movq %rdx,24(%rsi)
|
||||
|
||||
dest
|
||||
movq %r10,32(%rsi)
|
||||
movq %rbx, (%rsi)
|
||||
dest
|
||||
movq %rbp,40(%rsi)
|
||||
movq %r8, 8(%rsi)
|
||||
dest
|
||||
movq %r14,48(%rsi)
|
||||
movq %r11, 16(%rsi)
|
||||
dest
|
||||
movq %r13,56(%rsi)
|
||||
|
||||
movq %rdx, 24(%rsi)
|
||||
|
||||
dest
|
||||
movq %r10, 32(%rsi)
|
||||
dest
|
||||
movq %rbp, 40(%rsi)
|
||||
dest
|
||||
movq %r14, 48(%rsi)
|
||||
dest
|
||||
movq %r13, 56(%rsi)
|
||||
|
||||
3:
|
||||
|
||||
leaq 64(%rdi),%rdi
|
||||
leaq 64(%rsi),%rsi
|
||||
|
||||
jnz .Lloop
|
||||
leaq 64(%rdi), %rdi
|
||||
leaq 64(%rsi), %rsi
|
||||
|
||||
adcq %r9,%rax
|
||||
jnz .Lloop
|
||||
|
||||
adcq %r9, %rax
|
||||
|
||||
/* do last up to 56 bytes */
|
||||
.Lhandle_tail:
|
||||
/* ecx: count */
|
||||
movl %ecx,%r10d
|
||||
andl $63,%ecx
|
||||
shrl $3,%ecx
|
||||
jz .Lfold
|
||||
movl %ecx, %r10d
|
||||
andl $63, %ecx
|
||||
shrl $3, %ecx
|
||||
jz .Lfold
|
||||
clc
|
||||
.p2align 4
|
||||
.Lloop_8:
|
||||
.Lloop_8:
|
||||
source
|
||||
movq (%rdi),%rbx
|
||||
adcq %rbx,%rax
|
||||
movq (%rdi), %rbx
|
||||
adcq %rbx, %rax
|
||||
decl %ecx
|
||||
dest
|
||||
movq %rbx,(%rsi)
|
||||
leaq 8(%rsi),%rsi /* preserve carry */
|
||||
leaq 8(%rdi),%rdi
|
||||
movq %rbx, (%rsi)
|
||||
leaq 8(%rsi), %rsi /* preserve carry */
|
||||
leaq 8(%rdi), %rdi
|
||||
jnz .Lloop_8
|
||||
adcq %r9,%rax /* add in carry */
|
||||
adcq %r9, %rax /* add in carry */
|
||||
|
||||
.Lfold:
|
||||
/* reduce checksum to 32bits */
|
||||
movl %eax,%ebx
|
||||
shrq $32,%rax
|
||||
addl %ebx,%eax
|
||||
adcl %r9d,%eax
|
||||
movl %eax, %ebx
|
||||
shrq $32, %rax
|
||||
addl %ebx, %eax
|
||||
adcl %r9d, %eax
|
||||
|
||||
/* do last up to 6 bytes */
|
||||
/* do last up to 6 bytes */
|
||||
.Lhandle_7:
|
||||
movl %r10d,%ecx
|
||||
andl $7,%ecx
|
||||
shrl $1,%ecx
|
||||
movl %r10d, %ecx
|
||||
andl $7, %ecx
|
||||
shrl $1, %ecx
|
||||
jz .Lhandle_1
|
||||
movl $2,%edx
|
||||
xorl %ebx,%ebx
|
||||
clc
|
||||
movl $2, %edx
|
||||
xorl %ebx, %ebx
|
||||
clc
|
||||
.p2align 4
|
||||
.Lloop_1:
|
||||
.Lloop_1:
|
||||
source
|
||||
movw (%rdi),%bx
|
||||
adcl %ebx,%eax
|
||||
movw (%rdi), %bx
|
||||
adcl %ebx, %eax
|
||||
decl %ecx
|
||||
dest
|
||||
movw %bx,(%rsi)
|
||||
leaq 2(%rdi),%rdi
|
||||
leaq 2(%rsi),%rsi
|
||||
movw %bx, (%rsi)
|
||||
leaq 2(%rdi), %rdi
|
||||
leaq 2(%rsi), %rsi
|
||||
jnz .Lloop_1
|
||||
adcl %r9d,%eax /* add in carry */
|
||||
|
||||
adcl %r9d, %eax /* add in carry */
|
||||
|
||||
/* handle last odd byte */
|
||||
.Lhandle_1:
|
||||
testl $1,%r10d
|
||||
testl $1, %r10d
|
||||
jz .Lende
|
||||
xorl %ebx,%ebx
|
||||
xorl %ebx, %ebx
|
||||
source
|
||||
movb (%rdi),%bl
|
||||
movb (%rdi), %bl
|
||||
dest
|
||||
movb %bl,(%rsi)
|
||||
addl %ebx,%eax
|
||||
adcl %r9d,%eax /* carry */
|
||||
|
||||
movb %bl, (%rsi)
|
||||
addl %ebx, %eax
|
||||
adcl %r9d, %eax /* carry */
|
||||
|
||||
CFI_REMEMBER_STATE
|
||||
.Lende:
|
||||
movq 2*8(%rsp),%rbx
|
||||
movq 2*8(%rsp), %rbx
|
||||
CFI_RESTORE rbx
|
||||
movq 3*8(%rsp),%r12
|
||||
movq 3*8(%rsp), %r12
|
||||
CFI_RESTORE r12
|
||||
movq 4*8(%rsp),%r14
|
||||
movq 4*8(%rsp), %r14
|
||||
CFI_RESTORE r14
|
||||
movq 5*8(%rsp),%r13
|
||||
movq 5*8(%rsp), %r13
|
||||
CFI_RESTORE r13
|
||||
movq 6*8(%rsp),%rbp
|
||||
movq 6*8(%rsp), %rbp
|
||||
CFI_RESTORE rbp
|
||||
addq $7*8,%rsp
|
||||
addq $7*8, %rsp
|
||||
CFI_ADJUST_CFA_OFFSET -7*8
|
||||
ret
|
||||
CFI_RESTORE_STATE
|
||||
|
||||
/* Exception handlers. Very simple, zeroing is done in the wrappers */
|
||||
.Lbad_source:
|
||||
movq (%rsp),%rax
|
||||
testq %rax,%rax
|
||||
movq (%rsp), %rax
|
||||
testq %rax, %rax
|
||||
jz .Lende
|
||||
movl $-EFAULT,(%rax)
|
||||
movl $-EFAULT, (%rax)
|
||||
jmp .Lende
|
||||
|
||||
|
||||
.Lbad_dest:
|
||||
movq 8(%rsp),%rax
|
||||
testq %rax,%rax
|
||||
jz .Lende
|
||||
movl $-EFAULT,(%rax)
|
||||
movq 8(%rsp), %rax
|
||||
testq %rax, %rax
|
||||
jz .Lende
|
||||
movl $-EFAULT, (%rax)
|
||||
jmp .Lende
|
||||
CFI_ENDPROC
|
||||
ENDPROC(csum_partial_copy_generic)
|
||||
|
|
Загрузка…
Ссылка в новой задаче