powerpc/pseries: Use jump labels for hcall tracepoints
hcall tracepoints add quite a few instructions to our hcall path: plpar_hcall: mr r2,r2 mfcr r0 stw r0,8(r1) b 164 <---- start ld r12,0(r2) std r12,32(r1) cmpdi r12,0 beq 164 <---- end ... We have an unconditional branch that gets noped out during boot and a load/compare/branch. We also store the tracepoint value to the stack for the hcall_exit path to use. By using jump labels we can simplify this to just a single nop that gets replaced with a branch when the tracepoint is enabled: plpar_hcall: mr r2,r2 mfcr r0 stw r0,8(r1) nop <---- ... If jump labels are not enabled, we fall back to the old method. Signed-off-by: Anton Blanchard <anton@samba.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
This commit is contained in:
Родитель
8fa5d4547e
Коммит
cc1adb5f32
|
@ -10,6 +10,7 @@
|
|||
* 2 of the License, or (at your option) any later version.
|
||||
*/
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
#include <linux/types.h>
|
||||
|
||||
#include <asm/feature-fixups.h>
|
||||
|
@ -42,4 +43,12 @@ struct jump_entry {
|
|||
jump_label_t key;
|
||||
};
|
||||
|
||||
#else
|
||||
#define ARCH_STATIC_BRANCH(LABEL, KEY) \
|
||||
1098: nop; \
|
||||
.pushsection __jump_table, "aw"; \
|
||||
FTR_ENTRY_LONG 1098b, LABEL, KEY; \
|
||||
.popsection
|
||||
#endif
|
||||
|
||||
#endif /* _ASM_POWERPC_JUMP_LABEL_H */
|
||||
|
|
|
@ -12,9 +12,13 @@
|
|||
#include <asm/ppc_asm.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/ptrace.h>
|
||||
#include <asm/jump_label.h>
|
||||
|
||||
.section ".text"
|
||||
|
||||
#ifdef CONFIG_TRACEPOINTS
|
||||
|
||||
#ifndef CONFIG_JUMP_LABEL
|
||||
.section ".toc","aw"
|
||||
|
||||
.globl hcall_tracepoint_refcount
|
||||
|
@ -22,21 +26,13 @@ hcall_tracepoint_refcount:
|
|||
.llong 0
|
||||
|
||||
.section ".text"
|
||||
#endif
|
||||
|
||||
/*
|
||||
* precall must preserve all registers. use unused STK_PARAM()
|
||||
* areas to save snapshots and opcode. We branch around this
|
||||
* in early init (eg when populating the MMU hashtable) by using an
|
||||
* unconditional cpu feature.
|
||||
* areas to save snapshots and opcode.
|
||||
*/
|
||||
#define HCALL_INST_PRECALL(FIRST_REG) \
|
||||
BEGIN_FTR_SECTION; \
|
||||
b 1f; \
|
||||
END_FTR_SECTION(0, 1); \
|
||||
ld r12,hcall_tracepoint_refcount@toc(r2); \
|
||||
std r12,32(r1); \
|
||||
cmpdi r12,0; \
|
||||
beq+ 1f; \
|
||||
mflr r0; \
|
||||
std r3,STK_PARAM(R3)(r1); \
|
||||
std r4,STK_PARAM(R4)(r1); \
|
||||
|
@ -60,22 +56,13 @@ END_FTR_SECTION(0, 1); \
|
|||
ld r8,STK_PARAM(R8)(r1); \
|
||||
ld r9,STK_PARAM(R9)(r1); \
|
||||
ld r10,STK_PARAM(R10)(r1); \
|
||||
mtlr r0; \
|
||||
1:
|
||||
mtlr r0
|
||||
|
||||
/*
|
||||
* postcall is performed immediately before function return which
|
||||
* allows liberal use of volatile registers. We branch around this
|
||||
* in early init (eg when populating the MMU hashtable) by using an
|
||||
* unconditional cpu feature.
|
||||
* allows liberal use of volatile registers.
|
||||
*/
|
||||
#define __HCALL_INST_POSTCALL \
|
||||
BEGIN_FTR_SECTION; \
|
||||
b 1f; \
|
||||
END_FTR_SECTION(0, 1); \
|
||||
ld r12,32(r1); \
|
||||
cmpdi r12,0; \
|
||||
beq+ 1f; \
|
||||
mflr r0; \
|
||||
ld r6,STK_PARAM(R3)(r1); \
|
||||
std r3,STK_PARAM(R3)(r1); \
|
||||
|
@ -87,8 +74,7 @@ END_FTR_SECTION(0, 1); \
|
|||
addi r1,r1,STACK_FRAME_OVERHEAD; \
|
||||
ld r0,16(r1); \
|
||||
ld r3,STK_PARAM(R3)(r1); \
|
||||
mtlr r0; \
|
||||
1:
|
||||
mtlr r0
|
||||
|
||||
#define HCALL_INST_POSTCALL_NORETS \
|
||||
li r5,0; \
|
||||
|
@ -98,37 +84,62 @@ END_FTR_SECTION(0, 1); \
|
|||
mr r5,BUFREG; \
|
||||
__HCALL_INST_POSTCALL
|
||||
|
||||
#ifdef CONFIG_JUMP_LABEL
|
||||
#define HCALL_BRANCH(LABEL) \
|
||||
ARCH_STATIC_BRANCH(LABEL, hcall_tracepoint_key)
|
||||
#else
|
||||
|
||||
/*
|
||||
* We branch around this in early init (eg when populating the MMU
|
||||
* hashtable) by using an unconditional cpu feature.
|
||||
*/
|
||||
#define HCALL_BRANCH(LABEL) \
|
||||
BEGIN_FTR_SECTION; \
|
||||
b 1f; \
|
||||
END_FTR_SECTION(0, 1); \
|
||||
ld r12,hcall_tracepoint_refcount@toc(r2); \
|
||||
std r12,32(r1); \
|
||||
cmpdi r12,0; \
|
||||
bne- LABEL; \
|
||||
1:
|
||||
#endif
|
||||
|
||||
#else
|
||||
#define HCALL_INST_PRECALL(FIRST_ARG)
|
||||
#define HCALL_INST_POSTCALL_NORETS
|
||||
#define HCALL_INST_POSTCALL(BUFREG)
|
||||
#define HCALL_BRANCH(LABEL)
|
||||
#endif
|
||||
|
||||
.text
|
||||
|
||||
_GLOBAL_TOC(plpar_hcall_norets)
|
||||
HMT_MEDIUM
|
||||
|
||||
mfcr r0
|
||||
stw r0,8(r1)
|
||||
|
||||
HCALL_INST_PRECALL(R4)
|
||||
|
||||
HCALL_BRANCH(plpar_hcall_norets_trace)
|
||||
HVSC /* invoke the hypervisor */
|
||||
|
||||
HCALL_INST_POSTCALL_NORETS
|
||||
|
||||
lwz r0,8(r1)
|
||||
mtcrf 0xff,r0
|
||||
blr /* return r3 = status */
|
||||
|
||||
#ifdef CONFIG_TRACEPOINTS
|
||||
plpar_hcall_norets_trace:
|
||||
HCALL_INST_PRECALL(R4)
|
||||
HVSC
|
||||
HCALL_INST_POSTCALL_NORETS
|
||||
lwz r0,8(r1)
|
||||
mtcrf 0xff,r0
|
||||
blr
|
||||
#endif
|
||||
|
||||
_GLOBAL_TOC(plpar_hcall)
|
||||
HMT_MEDIUM
|
||||
|
||||
mfcr r0
|
||||
stw r0,8(r1)
|
||||
|
||||
HCALL_INST_PRECALL(R5)
|
||||
HCALL_BRANCH(plpar_hcall_trace)
|
||||
|
||||
std r4,STK_PARAM(R4)(r1) /* Save ret buffer */
|
||||
|
||||
|
@ -147,12 +158,40 @@ _GLOBAL_TOC(plpar_hcall)
|
|||
std r6, 16(r12)
|
||||
std r7, 24(r12)
|
||||
|
||||
lwz r0,8(r1)
|
||||
mtcrf 0xff,r0
|
||||
|
||||
blr /* return r3 = status */
|
||||
|
||||
#ifdef CONFIG_TRACEPOINTS
|
||||
plpar_hcall_trace:
|
||||
HCALL_INST_PRECALL(R5)
|
||||
|
||||
std r4,STK_PARAM(R4)(r1)
|
||||
mr r0,r4
|
||||
|
||||
mr r4,r5
|
||||
mr r5,r6
|
||||
mr r6,r7
|
||||
mr r7,r8
|
||||
mr r8,r9
|
||||
mr r9,r10
|
||||
|
||||
HVSC
|
||||
|
||||
ld r12,STK_PARAM(R4)(r1)
|
||||
std r4,0(r12)
|
||||
std r5,8(r12)
|
||||
std r6,16(r12)
|
||||
std r7,24(r12)
|
||||
|
||||
HCALL_INST_POSTCALL(r12)
|
||||
|
||||
lwz r0,8(r1)
|
||||
mtcrf 0xff,r0
|
||||
|
||||
blr /* return r3 = status */
|
||||
blr
|
||||
#endif
|
||||
|
||||
/*
|
||||
* plpar_hcall_raw can be called in real mode. kexec/kdump need some
|
||||
|
@ -194,7 +233,7 @@ _GLOBAL_TOC(plpar_hcall9)
|
|||
mfcr r0
|
||||
stw r0,8(r1)
|
||||
|
||||
HCALL_INST_PRECALL(R5)
|
||||
HCALL_BRANCH(plpar_hcall9_trace)
|
||||
|
||||
std r4,STK_PARAM(R4)(r1) /* Save ret buffer */
|
||||
|
||||
|
@ -222,12 +261,49 @@ _GLOBAL_TOC(plpar_hcall9)
|
|||
std r11,56(r12)
|
||||
std r0, 64(r12)
|
||||
|
||||
lwz r0,8(r1)
|
||||
mtcrf 0xff,r0
|
||||
|
||||
blr /* return r3 = status */
|
||||
|
||||
#ifdef CONFIG_TRACEPOINTS
|
||||
plpar_hcall9_trace:
|
||||
HCALL_INST_PRECALL(R5)
|
||||
|
||||
std r4,STK_PARAM(R4)(r1)
|
||||
mr r0,r4
|
||||
|
||||
mr r4,r5
|
||||
mr r5,r6
|
||||
mr r6,r7
|
||||
mr r7,r8
|
||||
mr r8,r9
|
||||
mr r9,r10
|
||||
ld r10,STK_PARAM(R11)(r1)
|
||||
ld r11,STK_PARAM(R12)(r1)
|
||||
ld r12,STK_PARAM(R13)(r1)
|
||||
|
||||
HVSC
|
||||
|
||||
mr r0,r12
|
||||
ld r12,STK_PARAM(R4)(r1)
|
||||
std r4,0(r12)
|
||||
std r5,8(r12)
|
||||
std r6,16(r12)
|
||||
std r7,24(r12)
|
||||
std r8,32(r12)
|
||||
std r9,40(r12)
|
||||
std r10,48(r12)
|
||||
std r11,56(r12)
|
||||
std r0,64(r12)
|
||||
|
||||
HCALL_INST_POSTCALL(r12)
|
||||
|
||||
lwz r0,8(r1)
|
||||
mtcrf 0xff,r0
|
||||
|
||||
blr /* return r3 = status */
|
||||
blr
|
||||
#endif
|
||||
|
||||
/* See plpar_hcall_raw to see why this is needed */
|
||||
_GLOBAL(plpar_hcall9_raw)
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#include <linux/dma-mapping.h>
|
||||
#include <linux/console.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/static_key.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/mmu.h>
|
||||
#include <asm/page.h>
|
||||
|
@ -649,6 +650,19 @@ EXPORT_SYMBOL(arch_free_page);
|
|||
#endif
|
||||
|
||||
#ifdef CONFIG_TRACEPOINTS
|
||||
#ifdef CONFIG_JUMP_LABEL
|
||||
struct static_key hcall_tracepoint_key = STATIC_KEY_INIT;
|
||||
|
||||
void hcall_tracepoint_regfunc(void)
|
||||
{
|
||||
static_key_slow_inc(&hcall_tracepoint_key);
|
||||
}
|
||||
|
||||
void hcall_tracepoint_unregfunc(void)
|
||||
{
|
||||
static_key_slow_dec(&hcall_tracepoint_key);
|
||||
}
|
||||
#else
|
||||
/*
|
||||
* We optimise our hcall path by placing hcall_tracepoint_refcount
|
||||
* directly in the TOC so we can check if the hcall tracepoints are
|
||||
|
@ -658,13 +672,6 @@ EXPORT_SYMBOL(arch_free_page);
|
|||
/* NB: reg/unreg are called while guarded with the tracepoints_mutex */
|
||||
extern long hcall_tracepoint_refcount;
|
||||
|
||||
/*
|
||||
* Since the tracing code might execute hcalls we need to guard against
|
||||
* recursion. One example of this are spinlocks calling H_YIELD on
|
||||
* shared processor partitions.
|
||||
*/
|
||||
static DEFINE_PER_CPU(unsigned int, hcall_trace_depth);
|
||||
|
||||
void hcall_tracepoint_regfunc(void)
|
||||
{
|
||||
hcall_tracepoint_refcount++;
|
||||
|
@ -674,6 +681,15 @@ void hcall_tracepoint_unregfunc(void)
|
|||
{
|
||||
hcall_tracepoint_refcount--;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Since the tracing code might execute hcalls we need to guard against
|
||||
* recursion. One example of this are spinlocks calling H_YIELD on
|
||||
* shared processor partitions.
|
||||
*/
|
||||
static DEFINE_PER_CPU(unsigned int, hcall_trace_depth);
|
||||
|
||||
|
||||
void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
|
||||
{
|
||||
|
|
Загрузка…
Ссылка в новой задаче