x86-64: Fix memcpy() to support sizes of 4Gb and above

While currently there doesn't appear to be any reachable in-tree
case where such large memory blocks may be passed to memcpy(),
we already had hit the problem in our Xen kernels. Just like
done recently for mmeset(), rather than working around it,
prevent others from falling into the same trap by fixing this
long standing limitation.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/4F21846F020000780006F3FA@nat28.tlf.novell.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Jan Beulich 2012-01-26 15:50:55 +00:00 коммит произвёл Ingo Molnar
Родитель 5d7244e7c9
Коммит 2ab560911a
1 изменённых файлов: 10 добавлений и 15 удалений

Просмотреть файл

@ -27,9 +27,8 @@
.section .altinstr_replacement, "ax", @progbits
.Lmemcpy_c:
movq %rdi, %rax
movl %edx, %ecx
shrl $3, %ecx
movq %rdx, %rcx
shrq $3, %rcx
andl $7, %edx
rep movsq
movl %edx, %ecx
@ -48,8 +47,7 @@
.section .altinstr_replacement, "ax", @progbits
.Lmemcpy_c_e:
movq %rdi, %rax
movl %edx, %ecx
movq %rdx, %rcx
rep movsb
ret
.Lmemcpy_e_e:
@ -60,10 +58,7 @@ ENTRY(memcpy)
CFI_STARTPROC
movq %rdi, %rax
/*
* Use 32bit CMP here to avoid long NOP padding.
*/
cmp $0x20, %edx
cmpq $0x20, %rdx
jb .Lhandle_tail
/*
@ -72,7 +67,7 @@ ENTRY(memcpy)
*/
cmp %dil, %sil
jl .Lcopy_backward
subl $0x20, %edx
subq $0x20, %rdx
.Lcopy_forward_loop:
subq $0x20, %rdx
@ -91,7 +86,7 @@ ENTRY(memcpy)
movq %r11, 3*8(%rdi)
leaq 4*8(%rdi), %rdi
jae .Lcopy_forward_loop
addq $0x20, %rdx
addl $0x20, %edx
jmp .Lhandle_tail
.Lcopy_backward:
@ -123,11 +118,11 @@ ENTRY(memcpy)
/*
* Calculate copy position to head.
*/
addq $0x20, %rdx
addl $0x20, %edx
subq %rdx, %rsi
subq %rdx, %rdi
.Lhandle_tail:
cmpq $16, %rdx
cmpl $16, %edx
jb .Lless_16bytes
/*
@ -144,7 +139,7 @@ ENTRY(memcpy)
retq
.p2align 4
.Lless_16bytes:
cmpq $8, %rdx
cmpl $8, %edx
jb .Lless_8bytes
/*
* Move data from 8 bytes to 15 bytes.
@ -156,7 +151,7 @@ ENTRY(memcpy)
retq
.p2align 4
.Lless_8bytes:
cmpq $4, %rdx
cmpl $4, %edx
jb .Lless_3bytes
/*