powerpc/8xx: Only perform perf counting when perf is in use.

In TLB miss handlers, updating the perf counter is only useful
when performing a perf analysis. As it has a noticeable overhead,
let's only do it when needed.

In order to do so, the exit of the miss handlers will be patched
when starting/stopping 'perf': the first register restore
instruction of each exit point will be replaced by a jump to
the counting code.

Once this is done, CONFIG_PPC_8xx_PERF_EVENT becomes useless as
this feature doesn't add any overhead.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
This commit is contained in:
Christophe Leroy 2018-01-12 13:45:23 +01:00 коммит произвёл Michael Ellerman
Родитель bb9b5a8332
Коммит cd99ddbea2
6 изменённых файлов: 88 добавлений и 32 удалений

Просмотреть файл

@ -236,6 +236,7 @@
#define PPC_INST_RFCI 0x4c000066
#define PPC_INST_RFDI 0x4c00004e
#define PPC_INST_RFMCI 0x4c00004c
#define PPC_INST_MFSPR 0x7c0002a6
#define PPC_INST_MFSPR_DSCR 0x7c1102a6
#define PPC_INST_MFSPR_DSCR_MASK 0xfc1ffffe
#define PPC_INST_MTSPR_DSCR 0x7c1103a6
@ -383,6 +384,7 @@
#define __PPC_ME64(s) __PPC_MB64(s)
#define __PPC_BI(s) (((s) & 0x1f) << 16)
#define __PPC_CT(t) (((t) & 0x0f) << 21)
#define __PPC_SPR(r) ((((r) & 0x1f) << 16) | ((((r) >> 5) & 0x1f) << 11))
/*
* Only use the larx hint bit on 64bit CPUs. e500v1/v2 based CPUs will treat a

Просмотреть файл

@ -211,7 +211,7 @@ transfer_to_handler_cont:
mflr r9
lwz r11,0(r9) /* virtual address of handler */
lwz r9,4(r9) /* where to go when done */
#ifdef CONFIG_PPC_8xx_PERF_EVENT
#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
@ -301,7 +301,7 @@ stack_ovf:
lis r9,StackOverflow@ha
addi r9,r9,StackOverflow@l
LOAD_MSR_KERNEL(r10,MSR_KERNEL)
#ifdef CONFIG_PPC_8xx_PERF_EVENT
#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0
#endif
mtspr SPRN_SRR0,r9
@ -430,7 +430,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
lwz r7,_NIP(r1)
lwz r2,GPR2(r1)
lwz r1,GPR1(r1)
#ifdef CONFIG_PPC_8xx_PERF_EVENT
#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0
#endif
mtspr SPRN_SRR0,r7
@ -727,7 +727,7 @@ fast_exception_return:
lwz r10,_LINK(r11)
mtlr r10
REST_GPR(10, r11)
#ifdef CONFIG_PPC_8xx_PERF_EVENT
#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0
#endif
mtspr SPRN_SRR1,r9
@ -978,7 +978,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
.globl exc_exit_restart
exc_exit_restart:
lwz r12,_NIP(r1)
#ifdef CONFIG_PPC_8xx_PERF_EVENT
#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0
#endif
mtspr SPRN_SRR0,r12

Просмотреть файл

@ -304,12 +304,6 @@ InstructionTLBMiss:
#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
mtspr SPRN_SPRG_SCRATCH2, r12
#endif
#ifdef CONFIG_PPC_8xx_PERF_EVENT
lis r10, (itlb_miss_counter - PAGE_OFFSET)@ha
lwz r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10)
addi r11, r11, 1
stw r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10)
#endif
/* If we are faulting a kernel address, we have to use the
* kernel page tables.
@ -392,6 +386,20 @@ _ENTRY(ITLBMiss_cmp)
mtspr SPRN_MI_RPN, r10 /* Update TLB entry */
/* Restore registers */
_ENTRY(itlb_miss_exit_1)
mfspr r10, SPRN_SPRG_SCRATCH0
mfspr r11, SPRN_SPRG_SCRATCH1
#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
mfspr r12, SPRN_SPRG_SCRATCH2
#endif
rfi
#ifdef CONFIG_PERF_EVENTS
_ENTRY(itlb_miss_perf)
lis r10, (itlb_miss_counter - PAGE_OFFSET)@ha
lwz r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10)
addi r11, r11, 1
stw r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10)
#endif
mfspr r10, SPRN_SPRG_SCRATCH0
mfspr r11, SPRN_SPRG_SCRATCH1
#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
@ -429,12 +437,6 @@ DataStoreTLBMiss:
mtspr SPRN_SPRG_SCRATCH0, r10
mtspr SPRN_SPRG_SCRATCH1, r11
mtspr SPRN_SPRG_SCRATCH2, r12
#ifdef CONFIG_PPC_8xx_PERF_EVENT
lis r10, (dtlb_miss_counter - PAGE_OFFSET)@ha
lwz r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10)
addi r11, r11, 1
stw r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10)
#endif
mfcr r12
/* If we are faulting a kernel address, we have to use the
@ -526,6 +528,18 @@ _ENTRY(DTLBMiss_jmp)
/* Restore registers */
mtspr SPRN_DAR, r11 /* Tag DAR */
_ENTRY(dtlb_miss_exit_1)
mfspr r10, SPRN_SPRG_SCRATCH0
mfspr r11, SPRN_SPRG_SCRATCH1
mfspr r12, SPRN_SPRG_SCRATCH2
rfi
#ifdef CONFIG_PERF_EVENTS
_ENTRY(dtlb_miss_perf)
lis r10, (dtlb_miss_counter - PAGE_OFFSET)@ha
lwz r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10)
addi r11, r11, 1
stw r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10)
#endif
mfspr r10, SPRN_SPRG_SCRATCH0
mfspr r11, SPRN_SPRG_SCRATCH1
mfspr r12, SPRN_SPRG_SCRATCH2
@ -635,7 +649,7 @@ DataBreakpoint:
mfspr r11, SPRN_SPRG_SCRATCH1
rfi
#ifdef CONFIG_PPC_8xx_PERF_EVENT
#ifdef CONFIG_PERF_EVENTS
. = 0x1d00
InstructionBreakpoint:
mtspr SPRN_SPRG_SCRATCH0, r10
@ -675,6 +689,7 @@ DTLBMissIMMR:
li r11, RPN_PATTERN
mtspr SPRN_DAR, r11 /* Tag DAR */
_ENTRY(dtlb_miss_exit_2)
mfspr r10, SPRN_SPRG_SCRATCH0
mfspr r11, SPRN_SPRG_SCRATCH1
mfspr r12, SPRN_SPRG_SCRATCH2
@ -692,6 +707,7 @@ DTLBMissLinear:
li r11, RPN_PATTERN
mtspr SPRN_DAR, r11 /* Tag DAR */
_ENTRY(dtlb_miss_exit_3)
mfspr r10, SPRN_SPRG_SCRATCH0
mfspr r11, SPRN_SPRG_SCRATCH1
mfspr r12, SPRN_SPRG_SCRATCH2
@ -708,6 +724,7 @@ ITLBMissLinear:
_PAGE_PRESENT
mtspr SPRN_MI_RPN, r10 /* Update TLB entry */
_ENTRY(itlb_miss_exit_2)
mfspr r10, SPRN_SPRG_SCRATCH0
mfspr r11, SPRN_SPRG_SCRATCH1
mfspr r12, SPRN_SPRG_SCRATCH2
@ -1039,7 +1056,7 @@ initial_mmu:
#endif
/* Disable debug mode entry on breakpoints */
mfspr r8, SPRN_DER
#ifdef CONFIG_PPC_8xx_PERF_EVENT
#ifdef CONFIG_PERF_EVENTS
rlwinm r8, r8, 0, ~0xc
#else
rlwinm r8, r8, 0, ~0x8
@ -1072,7 +1089,7 @@ swapper_pg_dir:
abatron_pteptrs:
.space 8
#ifdef CONFIG_PPC_8xx_PERF_EVENT
#ifdef CONFIG_PERF_EVENTS
.globl itlb_miss_counter
itlb_miss_counter:
.space 4

Просмотреть файл

@ -18,6 +18,7 @@
#include <asm/machdep.h>
#include <asm/firmware.h>
#include <asm/ptrace.h>
#include <asm/code-patching.h>
#define PERF_8xx_ID_CPU_CYCLES 1
#define PERF_8xx_ID_HW_INSTRUCTIONS 2
@ -30,8 +31,13 @@
extern unsigned long itlb_miss_counter, dtlb_miss_counter;
extern atomic_t instruction_counter;
extern unsigned int itlb_miss_perf, dtlb_miss_perf;
extern unsigned int itlb_miss_exit_1, itlb_miss_exit_2;
extern unsigned int dtlb_miss_exit_1, dtlb_miss_exit_2, dtlb_miss_exit_3;
static atomic_t insn_ctr_ref;
static atomic_t itlb_miss_ref;
static atomic_t dtlb_miss_ref;
static s64 get_insn_ctr(void)
{
@ -96,9 +102,24 @@ static int mpc8xx_pmu_add(struct perf_event *event, int flags)
val = get_insn_ctr();
break;
case PERF_8xx_ID_ITLB_LOAD_MISS:
if (atomic_inc_return(&itlb_miss_ref) == 1) {
unsigned long target = (unsigned long)&itlb_miss_perf;
patch_branch(&itlb_miss_exit_1, target, 0);
#ifndef CONFIG_PIN_TLB_TEXT
patch_branch(&itlb_miss_exit_2, target, 0);
#endif
}
val = itlb_miss_counter;
break;
case PERF_8xx_ID_DTLB_LOAD_MISS:
if (atomic_inc_return(&dtlb_miss_ref) == 1) {
unsigned long target = (unsigned long)&dtlb_miss_perf;
patch_branch(&dtlb_miss_exit_1, target, 0);
patch_branch(&dtlb_miss_exit_2, target, 0);
patch_branch(&dtlb_miss_exit_3, target, 0);
}
val = dtlb_miss_counter;
break;
}
@ -143,13 +164,36 @@ static void mpc8xx_pmu_read(struct perf_event *event)
static void mpc8xx_pmu_del(struct perf_event *event, int flags)
{
/* mfspr r10, SPRN_SPRG_SCRATCH0 */
unsigned int insn = PPC_INST_MFSPR | __PPC_RS(R10) |
__PPC_SPR(SPRN_SPRG_SCRATCH0);
mpc8xx_pmu_read(event);
if (event_type(event) != PERF_8xx_ID_HW_INSTRUCTIONS)
return;
/* If it was the last user, stop counting to avoid useles overhead */
if (atomic_dec_return(&insn_ctr_ref) == 0)
mtspr(SPRN_ICTRL, 7);
switch (event_type(event)) {
case PERF_8xx_ID_CPU_CYCLES:
break;
case PERF_8xx_ID_HW_INSTRUCTIONS:
if (atomic_dec_return(&insn_ctr_ref) == 0)
mtspr(SPRN_ICTRL, 7);
break;
case PERF_8xx_ID_ITLB_LOAD_MISS:
if (atomic_dec_return(&itlb_miss_ref) == 0) {
patch_instruction(&itlb_miss_exit_1, insn);
#ifndef CONFIG_PIN_TLB_TEXT
patch_instruction(&itlb_miss_exit_2, insn);
#endif
}
break;
case PERF_8xx_ID_DTLB_LOAD_MISS:
if (atomic_dec_return(&dtlb_miss_ref) == 0) {
patch_instruction(&dtlb_miss_exit_1, insn);
patch_instruction(&dtlb_miss_exit_2, insn);
patch_instruction(&dtlb_miss_exit_3, insn);
}
break;
}
}
static struct pmu mpc8xx_pmu = {

Просмотреть файл

@ -15,7 +15,7 @@ obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o e6500-pmu.o
obj-$(CONFIG_HV_PERF_CTRS) += hv-24x7.o hv-gpci.o hv-common.o
obj-$(CONFIG_PPC_8xx_PERF_EVENT) += 8xx-pmu.o
obj-$(CONFIG_PPC_8xx) += 8xx-pmu.o
obj-$(CONFIG_PPC64) += $(obj64-y)
obj-$(CONFIG_PPC32) += $(obj32-y)

Просмотреть файл

@ -167,13 +167,6 @@ config PPC_FPU
bool
default y if PPC64
config PPC_8xx_PERF_EVENT
bool "PPC 8xx perf events"
depends on PPC_8xx && PERF_EVENTS
help
This is Performance Events support for PPC 8xx. The 8xx doesn't
have a PMU but some events are emulated using 8xx features.
config FSL_EMB_PERFMON
bool "Freescale Embedded Perfmon"
depends on E500 || PPC_83xx