2019-06-04 11:11:33 +03:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0-only */
|
2005-11-01 22:52:24 +03:00
|
|
|
/*
|
|
|
|
* linux/arch/arm/lib/copy_to_user.S
|
|
|
|
*
|
|
|
|
* Author: Nicolas Pitre
|
|
|
|
* Created: Sep 29, 2005
|
|
|
|
* Copyright: MontaVista Software, Inc.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/linkage.h>
|
|
|
|
#include <asm/assembler.h>
|
2014-11-26 16:38:33 +03:00
|
|
|
#include <asm/unwind.h>
|
2005-11-01 22:52:24 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Prototype:
|
|
|
|
*
|
2015-08-19 13:02:28 +03:00
|
|
|
* size_t arm_copy_to_user(void *to, const void *from, size_t n)
|
2005-11-01 22:52:24 +03:00
|
|
|
*
|
|
|
|
* Purpose:
|
|
|
|
*
|
|
|
|
* copy a block to user memory from kernel memory
|
|
|
|
*
|
|
|
|
* Params:
|
|
|
|
*
|
|
|
|
* to = user memory
|
|
|
|
* from = kernel memory
|
|
|
|
* n = number of bytes to copy
|
|
|
|
*
|
|
|
|
* Return value:
|
|
|
|
*
|
|
|
|
* Number of bytes NOT copied.
|
|
|
|
*/
|
|
|
|
|
2009-07-24 15:32:57 +04:00
|
|
|
#define LDR1W_SHIFT 0
|
|
|
|
|
2005-11-01 22:52:24 +03:00
|
|
|
.macro ldr1w ptr reg abort
|
2009-07-24 15:32:57 +04:00
|
|
|
W(ldr) \reg, [\ptr], #4
|
2005-11-01 22:52:24 +03:00
|
|
|
.endm
|
|
|
|
|
|
|
|
.macro ldr4w ptr reg1 reg2 reg3 reg4 abort
|
|
|
|
ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4}
|
|
|
|
.endm
|
|
|
|
|
|
|
|
.macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
|
|
|
|
ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
|
|
|
|
.endm
|
|
|
|
|
|
|
|
.macro ldr1b ptr reg cond=al abort
|
2019-02-18 02:54:36 +03:00
|
|
|
ldrb\cond \reg, [\ptr], #1
|
2005-11-01 22:52:24 +03:00
|
|
|
.endm
|
|
|
|
|
ARM: 8812/1: Optimise copy_{from/to}_user for !CPU_USE_DOMAINS
ARMv6+ processors do not use CONFIG_CPU_USE_DOMAINS and use privileged
ldr/str instructions in copy_{from/to}_user. They are currently
unnecessarily using single ldr/str instructions and can use ldm/stm
instructions instead like memcpy does (but with appropriate fixup
tables).
This speeds up a "dd if=foo of=bar bs=32k" on a tmpfs filesystem by
about 4% on my Cortex-A9.
before:134217728 bytes (128.0MB) copied, 0.543848 seconds, 235.4MB/s
before:134217728 bytes (128.0MB) copied, 0.538610 seconds, 237.6MB/s
before:134217728 bytes (128.0MB) copied, 0.544356 seconds, 235.1MB/s
before:134217728 bytes (128.0MB) copied, 0.544364 seconds, 235.1MB/s
before:134217728 bytes (128.0MB) copied, 0.537130 seconds, 238.3MB/s
before:134217728 bytes (128.0MB) copied, 0.533443 seconds, 240.0MB/s
before:134217728 bytes (128.0MB) copied, 0.545691 seconds, 234.6MB/s
before:134217728 bytes (128.0MB) copied, 0.534695 seconds, 239.4MB/s
before:134217728 bytes (128.0MB) copied, 0.540561 seconds, 236.8MB/s
before:134217728 bytes (128.0MB) copied, 0.541025 seconds, 236.6MB/s
after:134217728 bytes (128.0MB) copied, 0.520445 seconds, 245.9MB/s
after:134217728 bytes (128.0MB) copied, 0.527846 seconds, 242.5MB/s
after:134217728 bytes (128.0MB) copied, 0.519510 seconds, 246.4MB/s
after:134217728 bytes (128.0MB) copied, 0.527231 seconds, 242.8MB/s
after:134217728 bytes (128.0MB) copied, 0.525030 seconds, 243.8MB/s
after:134217728 bytes (128.0MB) copied, 0.524236 seconds, 244.2MB/s
after:134217728 bytes (128.0MB) copied, 0.523659 seconds, 244.4MB/s
after:134217728 bytes (128.0MB) copied, 0.525018 seconds, 243.8MB/s
after:134217728 bytes (128.0MB) copied, 0.519249 seconds, 246.5MB/s
after:134217728 bytes (128.0MB) copied, 0.518527 seconds, 246.9MB/s
Reviewed-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
2018-11-09 12:09:48 +03:00
|
|
|
#ifdef CONFIG_CPU_USE_DOMAINS
|
|
|
|
|
|
|
|
#ifndef CONFIG_THUMB2_KERNEL
|
|
|
|
#define STR1W_SHIFT 0
|
|
|
|
#else
|
|
|
|
#define STR1W_SHIFT 1
|
|
|
|
#endif
|
|
|
|
|
2005-11-01 22:52:24 +03:00
|
|
|
.macro str1w ptr reg abort
|
2009-07-24 15:32:57 +04:00
|
|
|
strusr \reg, \ptr, 4, abort=\abort
|
2005-11-01 22:52:24 +03:00
|
|
|
.endm
|
|
|
|
|
|
|
|
.macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
|
|
|
|
str1w \ptr, \reg1, \abort
|
|
|
|
str1w \ptr, \reg2, \abort
|
|
|
|
str1w \ptr, \reg3, \abort
|
|
|
|
str1w \ptr, \reg4, \abort
|
|
|
|
str1w \ptr, \reg5, \abort
|
|
|
|
str1w \ptr, \reg6, \abort
|
|
|
|
str1w \ptr, \reg7, \abort
|
|
|
|
str1w \ptr, \reg8, \abort
|
|
|
|
.endm
|
|
|
|
|
ARM: 8812/1: Optimise copy_{from/to}_user for !CPU_USE_DOMAINS
ARMv6+ processors do not use CONFIG_CPU_USE_DOMAINS and use privileged
ldr/str instructions in copy_{from/to}_user. They are currently
unnecessarily using single ldr/str instructions and can use ldm/stm
instructions instead like memcpy does (but with appropriate fixup
tables).
This speeds up a "dd if=foo of=bar bs=32k" on a tmpfs filesystem by
about 4% on my Cortex-A9.
before:134217728 bytes (128.0MB) copied, 0.543848 seconds, 235.4MB/s
before:134217728 bytes (128.0MB) copied, 0.538610 seconds, 237.6MB/s
before:134217728 bytes (128.0MB) copied, 0.544356 seconds, 235.1MB/s
before:134217728 bytes (128.0MB) copied, 0.544364 seconds, 235.1MB/s
before:134217728 bytes (128.0MB) copied, 0.537130 seconds, 238.3MB/s
before:134217728 bytes (128.0MB) copied, 0.533443 seconds, 240.0MB/s
before:134217728 bytes (128.0MB) copied, 0.545691 seconds, 234.6MB/s
before:134217728 bytes (128.0MB) copied, 0.534695 seconds, 239.4MB/s
before:134217728 bytes (128.0MB) copied, 0.540561 seconds, 236.8MB/s
before:134217728 bytes (128.0MB) copied, 0.541025 seconds, 236.6MB/s
after:134217728 bytes (128.0MB) copied, 0.520445 seconds, 245.9MB/s
after:134217728 bytes (128.0MB) copied, 0.527846 seconds, 242.5MB/s
after:134217728 bytes (128.0MB) copied, 0.519510 seconds, 246.4MB/s
after:134217728 bytes (128.0MB) copied, 0.527231 seconds, 242.8MB/s
after:134217728 bytes (128.0MB) copied, 0.525030 seconds, 243.8MB/s
after:134217728 bytes (128.0MB) copied, 0.524236 seconds, 244.2MB/s
after:134217728 bytes (128.0MB) copied, 0.523659 seconds, 244.4MB/s
after:134217728 bytes (128.0MB) copied, 0.525018 seconds, 243.8MB/s
after:134217728 bytes (128.0MB) copied, 0.519249 seconds, 246.5MB/s
after:134217728 bytes (128.0MB) copied, 0.518527 seconds, 246.9MB/s
Reviewed-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
2018-11-09 12:09:48 +03:00
|
|
|
#else
|
|
|
|
|
|
|
|
#define STR1W_SHIFT 0
|
|
|
|
|
|
|
|
.macro str1w ptr reg abort
|
|
|
|
USERL(\abort, W(str) \reg, [\ptr], #4)
|
|
|
|
.endm
|
|
|
|
|
|
|
|
.macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
|
|
|
|
USERL(\abort, stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8})
|
|
|
|
.endm
|
|
|
|
|
|
|
|
#endif /* CONFIG_CPU_USE_DOMAINS */
|
|
|
|
|
2005-11-01 22:52:24 +03:00
|
|
|
.macro str1b ptr reg cond=al abort
|
2009-07-24 15:32:57 +04:00
|
|
|
strusr \reg, \ptr, 1, \cond, abort=\abort
|
2005-11-01 22:52:24 +03:00
|
|
|
.endm
|
|
|
|
|
ARM: memcpy: use frame pointer as unwind anchor
The memcpy template is a bit unusual in the way it manages the stack
pointer: depending on the execution path through the function, the SP
assumes different values as different subsets of the register file are
preserved and restored again. This is problematic when it comes to EHABI
unwind info, as it is not instruction accurate, and does not allow
tracking the SP value as it changes.
Commit 279f487e0b471 ("ARM: 8225/1: Add unwinding support for memory
copy functions") addressed this by carving up the function in different
chunks as far as the unwinder is concerned, and keeping a set of unwind
directives for each of them, each corresponding with the state of the
stack pointer during execution of the chunk in question. This not only
duplicates unwind info unnecessarily, but it also complicates unwinding
the stack upon overflow.
Instead, let's do what the compiler does when the SP is updated halfway
through a function, which is to use a frame pointer and emit the
appropriate unwind directives to communicate this to the unwinder.
Note that Thumb-2 uses R7 for this, while ARM uses R11 aka FP. So let's
avoid touching R7 in the body of the template, so that Thumb-2 can use
it as the frame pointer. R11 was not modified in the first place.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Tested-by: Keith Packard <keithpac@amazon.com>
Tested-by: Marc Zyngier <maz@kernel.org>
Tested-by: Vladimir Murzin <vladimir.murzin@arm.com> # ARMv7M
2021-10-03 20:05:53 +03:00
|
|
|
.macro enter regs:vararg
|
2005-11-01 22:52:24 +03:00
|
|
|
mov r3, #0
|
ARM: memcpy: use frame pointer as unwind anchor
The memcpy template is a bit unusual in the way it manages the stack
pointer: depending on the execution path through the function, the SP
assumes different values as different subsets of the register file are
preserved and restored again. This is problematic when it comes to EHABI
unwind info, as it is not instruction accurate, and does not allow
tracking the SP value as it changes.
Commit 279f487e0b471 ("ARM: 8225/1: Add unwinding support for memory
copy functions") addressed this by carving up the function in different
chunks as far as the unwinder is concerned, and keeping a set of unwind
directives for each of them, each corresponding with the state of the
stack pointer during execution of the chunk in question. This not only
duplicates unwind info unnecessarily, but it also complicates unwinding
the stack upon overflow.
Instead, let's do what the compiler does when the SP is updated halfway
through a function, which is to use a frame pointer and emit the
appropriate unwind directives to communicate this to the unwinder.
Note that Thumb-2 uses R7 for this, while ARM uses R11 aka FP. So let's
avoid touching R7 in the body of the template, so that Thumb-2 can use
it as the frame pointer. R11 was not modified in the first place.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Tested-by: Keith Packard <keithpac@amazon.com>
Tested-by: Marc Zyngier <maz@kernel.org>
Tested-by: Vladimir Murzin <vladimir.murzin@arm.com> # ARMv7M
2021-10-03 20:05:53 +03:00
|
|
|
UNWIND( .save {r0, r2, r3, \regs} )
|
|
|
|
stmdb sp!, {r0, r2, r3, \regs}
|
2005-11-01 22:52:24 +03:00
|
|
|
.endm
|
|
|
|
|
ARM: memcpy: use frame pointer as unwind anchor
The memcpy template is a bit unusual in the way it manages the stack
pointer: depending on the execution path through the function, the SP
assumes different values as different subsets of the register file are
preserved and restored again. This is problematic when it comes to EHABI
unwind info, as it is not instruction accurate, and does not allow
tracking the SP value as it changes.
Commit 279f487e0b471 ("ARM: 8225/1: Add unwinding support for memory
copy functions") addressed this by carving up the function in different
chunks as far as the unwinder is concerned, and keeping a set of unwind
directives for each of them, each corresponding with the state of the
stack pointer during execution of the chunk in question. This not only
duplicates unwind info unnecessarily, but it also complicates unwinding
the stack upon overflow.
Instead, let's do what the compiler does when the SP is updated halfway
through a function, which is to use a frame pointer and emit the
appropriate unwind directives to communicate this to the unwinder.
Note that Thumb-2 uses R7 for this, while ARM uses R11 aka FP. So let's
avoid touching R7 in the body of the template, so that Thumb-2 can use
it as the frame pointer. R11 was not modified in the first place.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Tested-by: Keith Packard <keithpac@amazon.com>
Tested-by: Marc Zyngier <maz@kernel.org>
Tested-by: Vladimir Murzin <vladimir.murzin@arm.com> # ARMv7M
2021-10-03 20:05:53 +03:00
|
|
|
.macro exit regs:vararg
|
2005-11-01 22:52:24 +03:00
|
|
|
add sp, sp, #8
|
ARM: memcpy: use frame pointer as unwind anchor
The memcpy template is a bit unusual in the way it manages the stack
pointer: depending on the execution path through the function, the SP
assumes different values as different subsets of the register file are
preserved and restored again. This is problematic when it comes to EHABI
unwind info, as it is not instruction accurate, and does not allow
tracking the SP value as it changes.
Commit 279f487e0b471 ("ARM: 8225/1: Add unwinding support for memory
copy functions") addressed this by carving up the function in different
chunks as far as the unwinder is concerned, and keeping a set of unwind
directives for each of them, each corresponding with the state of the
stack pointer during execution of the chunk in question. This not only
duplicates unwind info unnecessarily, but it also complicates unwinding
the stack upon overflow.
Instead, let's do what the compiler does when the SP is updated halfway
through a function, which is to use a frame pointer and emit the
appropriate unwind directives to communicate this to the unwinder.
Note that Thumb-2 uses R7 for this, while ARM uses R11 aka FP. So let's
avoid touching R7 in the body of the template, so that Thumb-2 can use
it as the frame pointer. R11 was not modified in the first place.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Tested-by: Keith Packard <keithpac@amazon.com>
Tested-by: Marc Zyngier <maz@kernel.org>
Tested-by: Vladimir Murzin <vladimir.murzin@arm.com> # ARMv7M
2021-10-03 20:05:53 +03:00
|
|
|
ldmfd sp!, {r0, \regs}
|
2005-11-01 22:52:24 +03:00
|
|
|
.endm
|
|
|
|
|
|
|
|
.text
|
|
|
|
|
2009-03-09 05:34:45 +03:00
|
|
|
ENTRY(__copy_to_user_std)
|
2015-08-19 13:02:28 +03:00
|
|
|
WEAK(arm_copy_to_user)
|
2018-09-11 12:15:12 +03:00
|
|
|
#ifdef CONFIG_CPU_SPECTRE
|
2021-08-11 10:30:26 +03:00
|
|
|
ldr r3, =TASK_SIZE
|
2018-09-11 12:15:12 +03:00
|
|
|
uaccess_mask_range_ptr r0, r2, r3, ip
|
|
|
|
#endif
|
2005-11-01 22:52:24 +03:00
|
|
|
|
|
|
|
#include "copy_template.S"
|
|
|
|
|
2015-08-19 13:02:28 +03:00
|
|
|
ENDPROC(arm_copy_to_user)
|
2010-05-07 13:52:32 +04:00
|
|
|
ENDPROC(__copy_to_user_std)
|
2008-08-28 14:22:32 +04:00
|
|
|
|
2015-03-24 12:41:09 +03:00
|
|
|
.pushsection .text.fixup,"ax"
|
2005-11-01 22:52:24 +03:00
|
|
|
.align 0
|
|
|
|
copy_abort_preamble
|
|
|
|
ldmfd sp!, {r1, r2, r3}
|
|
|
|
sub r0, r0, r1
|
|
|
|
rsb r0, r0, r2
|
|
|
|
copy_abort_end
|
2010-04-19 13:15:03 +04:00
|
|
|
.popsection
|