|
|
|
@ -0,0 +1,613 @@
|
|
|
|
|
// SPDX-License-Identifier: GPL-2.0-only
|
|
|
|
|
/*
|
|
|
|
|
* Zhoaxin PMU; like Intel Architectural PerfMon-v2
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
|
|
|
|
|
|
|
|
|
#include <linux/stddef.h>
|
|
|
|
|
#include <linux/types.h>
|
|
|
|
|
#include <linux/init.h>
|
|
|
|
|
#include <linux/slab.h>
|
|
|
|
|
#include <linux/export.h>
|
|
|
|
|
#include <linux/nmi.h>
|
|
|
|
|
|
|
|
|
|
#include <asm/cpufeature.h>
|
|
|
|
|
#include <asm/hardirq.h>
|
|
|
|
|
#include <asm/apic.h>
|
|
|
|
|
|
|
|
|
|
#include "../perf_event.h"
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Zhaoxin PerfMon, used on zxc and later.
|
|
|
|
|
*/
|
|
|
|
|
static u64 zx_pmon_event_map[PERF_COUNT_HW_MAX] __read_mostly = {
|
|
|
|
|
|
|
|
|
|
[PERF_COUNT_HW_CPU_CYCLES] = 0x0082,
|
|
|
|
|
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
|
|
|
|
|
[PERF_COUNT_HW_CACHE_REFERENCES] = 0x0515,
|
|
|
|
|
[PERF_COUNT_HW_CACHE_MISSES] = 0x051a,
|
|
|
|
|
[PERF_COUNT_HW_BUS_CYCLES] = 0x0083,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static struct event_constraint zxc_event_constraints[] __read_mostly = {
|
|
|
|
|
|
|
|
|
|
FIXED_EVENT_CONSTRAINT(0x0082, 1), /* unhalted core clock cycles */
|
|
|
|
|
EVENT_CONSTRAINT_END
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static struct event_constraint zxd_event_constraints[] __read_mostly = {
|
|
|
|
|
|
|
|
|
|
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* retired instructions */
|
|
|
|
|
FIXED_EVENT_CONSTRAINT(0x0082, 1), /* unhalted core clock cycles */
|
|
|
|
|
FIXED_EVENT_CONSTRAINT(0x0083, 2), /* unhalted bus clock cycles */
|
|
|
|
|
EVENT_CONSTRAINT_END
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static __initconst const u64 zxd_hw_cache_event_ids
|
|
|
|
|
[PERF_COUNT_HW_CACHE_MAX]
|
|
|
|
|
[PERF_COUNT_HW_CACHE_OP_MAX]
|
|
|
|
|
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
|
|
|
|
|
[C(L1D)] = {
|
|
|
|
|
[C(OP_READ)] = {
|
|
|
|
|
[C(RESULT_ACCESS)] = 0x0042,
|
|
|
|
|
[C(RESULT_MISS)] = 0x0538,
|
|
|
|
|
},
|
|
|
|
|
[C(OP_WRITE)] = {
|
|
|
|
|
[C(RESULT_ACCESS)] = 0x0043,
|
|
|
|
|
[C(RESULT_MISS)] = 0x0562,
|
|
|
|
|
},
|
|
|
|
|
[C(OP_PREFETCH)] = {
|
|
|
|
|
[C(RESULT_ACCESS)] = -1,
|
|
|
|
|
[C(RESULT_MISS)] = -1,
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
[C(L1I)] = {
|
|
|
|
|
[C(OP_READ)] = {
|
|
|
|
|
[C(RESULT_ACCESS)] = 0x0300,
|
|
|
|
|
[C(RESULT_MISS)] = 0x0301,
|
|
|
|
|
},
|
|
|
|
|
[C(OP_WRITE)] = {
|
|
|
|
|
[C(RESULT_ACCESS)] = -1,
|
|
|
|
|
[C(RESULT_MISS)] = -1,
|
|
|
|
|
},
|
|
|
|
|
[C(OP_PREFETCH)] = {
|
|
|
|
|
[C(RESULT_ACCESS)] = 0x030a,
|
|
|
|
|
[C(RESULT_MISS)] = 0x030b,
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
[C(LL)] = {
|
|
|
|
|
[C(OP_READ)] = {
|
|
|
|
|
[C(RESULT_ACCESS)] = -1,
|
|
|
|
|
[C(RESULT_MISS)] = -1,
|
|
|
|
|
},
|
|
|
|
|
[C(OP_WRITE)] = {
|
|
|
|
|
[C(RESULT_ACCESS)] = -1,
|
|
|
|
|
[C(RESULT_MISS)] = -1,
|
|
|
|
|
},
|
|
|
|
|
[C(OP_PREFETCH)] = {
|
|
|
|
|
[C(RESULT_ACCESS)] = -1,
|
|
|
|
|
[C(RESULT_MISS)] = -1,
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
[C(DTLB)] = {
|
|
|
|
|
[C(OP_READ)] = {
|
|
|
|
|
[C(RESULT_ACCESS)] = 0x0042,
|
|
|
|
|
[C(RESULT_MISS)] = 0x052c,
|
|
|
|
|
},
|
|
|
|
|
[C(OP_WRITE)] = {
|
|
|
|
|
[C(RESULT_ACCESS)] = 0x0043,
|
|
|
|
|
[C(RESULT_MISS)] = 0x0530,
|
|
|
|
|
},
|
|
|
|
|
[C(OP_PREFETCH)] = {
|
|
|
|
|
[C(RESULT_ACCESS)] = 0x0564,
|
|
|
|
|
[C(RESULT_MISS)] = 0x0565,
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
[C(ITLB)] = {
|
|
|
|
|
[C(OP_READ)] = {
|
|
|
|
|
[C(RESULT_ACCESS)] = 0x00c0,
|
|
|
|
|
[C(RESULT_MISS)] = 0x0534,
|
|
|
|
|
},
|
|
|
|
|
[C(OP_WRITE)] = {
|
|
|
|
|
[C(RESULT_ACCESS)] = -1,
|
|
|
|
|
[C(RESULT_MISS)] = -1,
|
|
|
|
|
},
|
|
|
|
|
[C(OP_PREFETCH)] = {
|
|
|
|
|
[C(RESULT_ACCESS)] = -1,
|
|
|
|
|
[C(RESULT_MISS)] = -1,
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
[C(BPU)] = {
|
|
|
|
|
[C(OP_READ)] = {
|
|
|
|
|
[C(RESULT_ACCESS)] = 0x0700,
|
|
|
|
|
[C(RESULT_MISS)] = 0x0709,
|
|
|
|
|
},
|
|
|
|
|
[C(OP_WRITE)] = {
|
|
|
|
|
[C(RESULT_ACCESS)] = -1,
|
|
|
|
|
[C(RESULT_MISS)] = -1,
|
|
|
|
|
},
|
|
|
|
|
[C(OP_PREFETCH)] = {
|
|
|
|
|
[C(RESULT_ACCESS)] = -1,
|
|
|
|
|
[C(RESULT_MISS)] = -1,
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
[C(NODE)] = {
|
|
|
|
|
[C(OP_READ)] = {
|
|
|
|
|
[C(RESULT_ACCESS)] = -1,
|
|
|
|
|
[C(RESULT_MISS)] = -1,
|
|
|
|
|
},
|
|
|
|
|
[C(OP_WRITE)] = {
|
|
|
|
|
[C(RESULT_ACCESS)] = -1,
|
|
|
|
|
[C(RESULT_MISS)] = -1,
|
|
|
|
|
},
|
|
|
|
|
[C(OP_PREFETCH)] = {
|
|
|
|
|
[C(RESULT_ACCESS)] = -1,
|
|
|
|
|
[C(RESULT_MISS)] = -1,
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static __initconst const u64 zxe_hw_cache_event_ids
|
|
|
|
|
[PERF_COUNT_HW_CACHE_MAX]
|
|
|
|
|
[PERF_COUNT_HW_CACHE_OP_MAX]
|
|
|
|
|
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
|
|
|
|
|
[C(L1D)] = {
|
|
|
|
|
[C(OP_READ)] = {
|
|
|
|
|
[C(RESULT_ACCESS)] = 0x0568,
|
|
|
|
|
[C(RESULT_MISS)] = 0x054b,
|
|
|
|
|
},
|
|
|
|
|
[C(OP_WRITE)] = {
|
|
|
|
|
[C(RESULT_ACCESS)] = 0x0669,
|
|
|
|
|
[C(RESULT_MISS)] = 0x0562,
|
|
|
|
|
},
|
|
|
|
|
[C(OP_PREFETCH)] = {
|
|
|
|
|
[C(RESULT_ACCESS)] = -1,
|
|
|
|
|
[C(RESULT_MISS)] = -1,
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
[C(L1I)] = {
|
|
|
|
|
[C(OP_READ)] = {
|
|
|
|
|
[C(RESULT_ACCESS)] = 0x0300,
|
|
|
|
|
[C(RESULT_MISS)] = 0x0301,
|
|
|
|
|
},
|
|
|
|
|
[C(OP_WRITE)] = {
|
|
|
|
|
[C(RESULT_ACCESS)] = -1,
|
|
|
|
|
[C(RESULT_MISS)] = -1,
|
|
|
|
|
},
|
|
|
|
|
[C(OP_PREFETCH)] = {
|
|
|
|
|
[C(RESULT_ACCESS)] = 0x030a,
|
|
|
|
|
[C(RESULT_MISS)] = 0x030b,
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
[C(LL)] = {
|
|
|
|
|
[C(OP_READ)] = {
|
|
|
|
|
[C(RESULT_ACCESS)] = 0x0,
|
|
|
|
|
[C(RESULT_MISS)] = 0x0,
|
|
|
|
|
},
|
|
|
|
|
[C(OP_WRITE)] = {
|
|
|
|
|
[C(RESULT_ACCESS)] = 0x0,
|
|
|
|
|
[C(RESULT_MISS)] = 0x0,
|
|
|
|
|
},
|
|
|
|
|
[C(OP_PREFETCH)] = {
|
|
|
|
|
[C(RESULT_ACCESS)] = 0x0,
|
|
|
|
|
[C(RESULT_MISS)] = 0x0,
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
[C(DTLB)] = {
|
|
|
|
|
[C(OP_READ)] = {
|
|
|
|
|
[C(RESULT_ACCESS)] = 0x0568,
|
|
|
|
|
[C(RESULT_MISS)] = 0x052c,
|
|
|
|
|
},
|
|
|
|
|
[C(OP_WRITE)] = {
|
|
|
|
|
[C(RESULT_ACCESS)] = 0x0669,
|
|
|
|
|
[C(RESULT_MISS)] = 0x0530,
|
|
|
|
|
},
|
|
|
|
|
[C(OP_PREFETCH)] = {
|
|
|
|
|
[C(RESULT_ACCESS)] = 0x0564,
|
|
|
|
|
[C(RESULT_MISS)] = 0x0565,
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
[C(ITLB)] = {
|
|
|
|
|
[C(OP_READ)] = {
|
|
|
|
|
[C(RESULT_ACCESS)] = 0x00c0,
|
|
|
|
|
[C(RESULT_MISS)] = 0x0534,
|
|
|
|
|
},
|
|
|
|
|
[C(OP_WRITE)] = {
|
|
|
|
|
[C(RESULT_ACCESS)] = -1,
|
|
|
|
|
[C(RESULT_MISS)] = -1,
|
|
|
|
|
},
|
|
|
|
|
[C(OP_PREFETCH)] = {
|
|
|
|
|
[C(RESULT_ACCESS)] = -1,
|
|
|
|
|
[C(RESULT_MISS)] = -1,
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
[C(BPU)] = {
|
|
|
|
|
[C(OP_READ)] = {
|
|
|
|
|
[C(RESULT_ACCESS)] = 0x0028,
|
|
|
|
|
[C(RESULT_MISS)] = 0x0029,
|
|
|
|
|
},
|
|
|
|
|
[C(OP_WRITE)] = {
|
|
|
|
|
[C(RESULT_ACCESS)] = -1,
|
|
|
|
|
[C(RESULT_MISS)] = -1,
|
|
|
|
|
},
|
|
|
|
|
[C(OP_PREFETCH)] = {
|
|
|
|
|
[C(RESULT_ACCESS)] = -1,
|
|
|
|
|
[C(RESULT_MISS)] = -1,
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
[C(NODE)] = {
|
|
|
|
|
[C(OP_READ)] = {
|
|
|
|
|
[C(RESULT_ACCESS)] = -1,
|
|
|
|
|
[C(RESULT_MISS)] = -1,
|
|
|
|
|
},
|
|
|
|
|
[C(OP_WRITE)] = {
|
|
|
|
|
[C(RESULT_ACCESS)] = -1,
|
|
|
|
|
[C(RESULT_MISS)] = -1,
|
|
|
|
|
},
|
|
|
|
|
[C(OP_PREFETCH)] = {
|
|
|
|
|
[C(RESULT_ACCESS)] = -1,
|
|
|
|
|
[C(RESULT_MISS)] = -1,
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static void zhaoxin_pmu_disable_all(void)
|
|
|
|
|
{
|
|
|
|
|
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void zhaoxin_pmu_enable_all(int added)
|
|
|
|
|
{
|
|
|
|
|
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline u64 zhaoxin_pmu_get_status(void)
|
|
|
|
|
{
|
|
|
|
|
u64 status;
|
|
|
|
|
|
|
|
|
|
rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
|
|
|
|
|
|
|
|
|
|
return status;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline void zhaoxin_pmu_ack_status(u64 ack)
|
|
|
|
|
{
|
|
|
|
|
wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline void zxc_pmu_ack_status(u64 ack)
|
|
|
|
|
{
|
|
|
|
|
/*
|
|
|
|
|
* ZXC needs global control enabled in order to clear status bits.
|
|
|
|
|
*/
|
|
|
|
|
zhaoxin_pmu_enable_all(0);
|
|
|
|
|
zhaoxin_pmu_ack_status(ack);
|
|
|
|
|
zhaoxin_pmu_disable_all();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void zhaoxin_pmu_disable_fixed(struct hw_perf_event *hwc)
|
|
|
|
|
{
|
|
|
|
|
int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
|
|
|
|
|
u64 ctrl_val, mask;
|
|
|
|
|
|
|
|
|
|
mask = 0xfULL << (idx * 4);
|
|
|
|
|
|
|
|
|
|
rdmsrl(hwc->config_base, ctrl_val);
|
|
|
|
|
ctrl_val &= ~mask;
|
|
|
|
|
wrmsrl(hwc->config_base, ctrl_val);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void zhaoxin_pmu_disable_event(struct perf_event *event)
|
|
|
|
|
{
|
|
|
|
|
struct hw_perf_event *hwc = &event->hw;
|
|
|
|
|
|
|
|
|
|
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
|
|
|
|
|
zhaoxin_pmu_disable_fixed(hwc);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
x86_pmu_disable_event(event);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void zhaoxin_pmu_enable_fixed(struct hw_perf_event *hwc)
|
|
|
|
|
{
|
|
|
|
|
int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
|
|
|
|
|
u64 ctrl_val, bits, mask;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Enable IRQ generation (0x8),
|
|
|
|
|
* and enable ring-3 counting (0x2) and ring-0 counting (0x1)
|
|
|
|
|
* if requested:
|
|
|
|
|
*/
|
|
|
|
|
bits = 0x8ULL;
|
|
|
|
|
if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
|
|
|
|
|
bits |= 0x2;
|
|
|
|
|
if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
|
|
|
|
|
bits |= 0x1;
|
|
|
|
|
|
|
|
|
|
bits <<= (idx * 4);
|
|
|
|
|
mask = 0xfULL << (idx * 4);
|
|
|
|
|
|
|
|
|
|
rdmsrl(hwc->config_base, ctrl_val);
|
|
|
|
|
ctrl_val &= ~mask;
|
|
|
|
|
ctrl_val |= bits;
|
|
|
|
|
wrmsrl(hwc->config_base, ctrl_val);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void zhaoxin_pmu_enable_event(struct perf_event *event)
|
|
|
|
|
{
|
|
|
|
|
struct hw_perf_event *hwc = &event->hw;
|
|
|
|
|
|
|
|
|
|
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
|
|
|
|
|
zhaoxin_pmu_enable_fixed(hwc);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* This handler is triggered by the local APIC, so the APIC IRQ handling
|
|
|
|
|
* rules apply:
|
|
|
|
|
*/
|
|
|
|
|
static int zhaoxin_pmu_handle_irq(struct pt_regs *regs)
|
|
|
|
|
{
|
|
|
|
|
struct perf_sample_data data;
|
|
|
|
|
struct cpu_hw_events *cpuc;
|
|
|
|
|
int handled = 0;
|
|
|
|
|
u64 status;
|
|
|
|
|
int bit;
|
|
|
|
|
|
|
|
|
|
cpuc = this_cpu_ptr(&cpu_hw_events);
|
|
|
|
|
apic_write(APIC_LVTPC, APIC_DM_NMI);
|
|
|
|
|
zhaoxin_pmu_disable_all();
|
|
|
|
|
status = zhaoxin_pmu_get_status();
|
|
|
|
|
if (!status)
|
|
|
|
|
goto done;
|
|
|
|
|
|
|
|
|
|
again:
|
|
|
|
|
if (x86_pmu.enabled_ack)
|
|
|
|
|
zxc_pmu_ack_status(status);
|
|
|
|
|
else
|
|
|
|
|
zhaoxin_pmu_ack_status(status);
|
|
|
|
|
|
|
|
|
|
inc_irq_stat(apic_perf_irqs);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* CondChgd bit 63 doesn't mean any overflow status. Ignore
|
|
|
|
|
* and clear the bit.
|
|
|
|
|
*/
|
|
|
|
|
if (__test_and_clear_bit(63, (unsigned long *)&status)) {
|
|
|
|
|
if (!status)
|
|
|
|
|
goto done;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
|
|
|
|
|
struct perf_event *event = cpuc->events[bit];
|
|
|
|
|
|
|
|
|
|
handled++;
|
|
|
|
|
|
|
|
|
|
if (!test_bit(bit, cpuc->active_mask))
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
x86_perf_event_update(event);
|
|
|
|
|
perf_sample_data_init(&data, 0, event->hw.last_period);
|
|
|
|
|
|
|
|
|
|
if (!x86_perf_event_set_period(event))
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (perf_event_overflow(event, &data, regs))
|
|
|
|
|
x86_pmu_stop(event, 0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Repeat if there is more work to be done:
|
|
|
|
|
*/
|
|
|
|
|
status = zhaoxin_pmu_get_status();
|
|
|
|
|
if (status)
|
|
|
|
|
goto again;
|
|
|
|
|
|
|
|
|
|
done:
|
|
|
|
|
zhaoxin_pmu_enable_all(0);
|
|
|
|
|
return handled;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static u64 zhaoxin_pmu_event_map(int hw_event)
|
|
|
|
|
{
|
|
|
|
|
return zx_pmon_event_map[hw_event];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static struct event_constraint *
|
|
|
|
|
zhaoxin_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
|
|
|
|
|
struct perf_event *event)
|
|
|
|
|
{
|
|
|
|
|
struct event_constraint *c;
|
|
|
|
|
|
|
|
|
|
if (x86_pmu.event_constraints) {
|
|
|
|
|
for_each_event_constraint(c, x86_pmu.event_constraints) {
|
|
|
|
|
if ((event->hw.config & c->cmask) == c->code)
|
|
|
|
|
return c;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return &unconstrained;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
PMU_FORMAT_ATTR(event, "config:0-7");
|
|
|
|
|
PMU_FORMAT_ATTR(umask, "config:8-15");
|
|
|
|
|
PMU_FORMAT_ATTR(edge, "config:18");
|
|
|
|
|
PMU_FORMAT_ATTR(inv, "config:23");
|
|
|
|
|
PMU_FORMAT_ATTR(cmask, "config:24-31");
|
|
|
|
|
|
|
|
|
|
static struct attribute *zx_arch_formats_attr[] = {
|
|
|
|
|
&format_attr_event.attr,
|
|
|
|
|
&format_attr_umask.attr,
|
|
|
|
|
&format_attr_edge.attr,
|
|
|
|
|
&format_attr_inv.attr,
|
|
|
|
|
&format_attr_cmask.attr,
|
|
|
|
|
NULL,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static ssize_t zhaoxin_event_sysfs_show(char *page, u64 config)
|
|
|
|
|
{
|
|
|
|
|
u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT);
|
|
|
|
|
|
|
|
|
|
return x86_event_sysfs_show(page, config, event);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static const struct x86_pmu zhaoxin_pmu __initconst = {
|
|
|
|
|
.name = "zhaoxin",
|
|
|
|
|
.handle_irq = zhaoxin_pmu_handle_irq,
|
|
|
|
|
.disable_all = zhaoxin_pmu_disable_all,
|
|
|
|
|
.enable_all = zhaoxin_pmu_enable_all,
|
|
|
|
|
.enable = zhaoxin_pmu_enable_event,
|
|
|
|
|
.disable = zhaoxin_pmu_disable_event,
|
|
|
|
|
.hw_config = x86_pmu_hw_config,
|
|
|
|
|
.schedule_events = x86_schedule_events,
|
|
|
|
|
.eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
|
|
|
|
|
.perfctr = MSR_ARCH_PERFMON_PERFCTR0,
|
|
|
|
|
.event_map = zhaoxin_pmu_event_map,
|
|
|
|
|
.max_events = ARRAY_SIZE(zx_pmon_event_map),
|
|
|
|
|
.apic = 1,
|
|
|
|
|
/*
|
|
|
|
|
* For zxd/zxe, read/write operation for PMCx MSR is 48 bits.
|
|
|
|
|
*/
|
|
|
|
|
.max_period = (1ULL << 47) - 1,
|
|
|
|
|
.get_event_constraints = zhaoxin_get_event_constraints,
|
|
|
|
|
|
|
|
|
|
.format_attrs = zx_arch_formats_attr,
|
|
|
|
|
.events_sysfs_show = zhaoxin_event_sysfs_show,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static const struct { int id; char *name; } zx_arch_events_map[] __initconst = {
|
|
|
|
|
{ PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" },
|
|
|
|
|
{ PERF_COUNT_HW_INSTRUCTIONS, "instructions" },
|
|
|
|
|
{ PERF_COUNT_HW_BUS_CYCLES, "bus cycles" },
|
|
|
|
|
{ PERF_COUNT_HW_CACHE_REFERENCES, "cache references" },
|
|
|
|
|
{ PERF_COUNT_HW_CACHE_MISSES, "cache misses" },
|
|
|
|
|
{ PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" },
|
|
|
|
|
{ PERF_COUNT_HW_BRANCH_MISSES, "branch misses" },
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static __init void zhaoxin_arch_events_quirk(void)
|
|
|
|
|
{
|
|
|
|
|
int bit;
|
|
|
|
|
|
|
|
|
|
/* disable event that reported as not presend by cpuid */
|
|
|
|
|
for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(zx_arch_events_map)) {
|
|
|
|
|
zx_pmon_event_map[zx_arch_events_map[bit].id] = 0;
|
|
|
|
|
pr_warn("CPUID marked event: \'%s\' unavailable\n",
|
|
|
|
|
zx_arch_events_map[bit].name);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
__init int zhaoxin_pmu_init(void)
|
|
|
|
|
{
|
|
|
|
|
union cpuid10_edx edx;
|
|
|
|
|
union cpuid10_eax eax;
|
|
|
|
|
union cpuid10_ebx ebx;
|
|
|
|
|
struct event_constraint *c;
|
|
|
|
|
unsigned int unused;
|
|
|
|
|
int version;
|
|
|
|
|
|
|
|
|
|
pr_info("Welcome to zhaoxin pmu!\n");
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Check whether the Architectural PerfMon supports
|
|
|
|
|
* hw_event or not.
|
|
|
|
|
*/
|
|
|
|
|
cpuid(10, &eax.full, &ebx.full, &unused, &edx.full);
|
|
|
|
|
|
|
|
|
|
if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT - 1)
|
|
|
|
|
return -ENODEV;
|
|
|
|
|
|
|
|
|
|
version = eax.split.version_id;
|
|
|
|
|
if (version != 2)
|
|
|
|
|
return -ENODEV;
|
|
|
|
|
|
|
|
|
|
x86_pmu = zhaoxin_pmu;
|
|
|
|
|
pr_info("Version check pass!\n");
|
|
|
|
|
|
|
|
|
|
x86_pmu.version = version;
|
|
|
|
|
x86_pmu.num_counters = eax.split.num_counters;
|
|
|
|
|
x86_pmu.cntval_bits = eax.split.bit_width;
|
|
|
|
|
x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1;
|
|
|
|
|
x86_pmu.events_maskl = ebx.full;
|
|
|
|
|
x86_pmu.events_mask_len = eax.split.mask_length;
|
|
|
|
|
|
|
|
|
|
x86_pmu.num_counters_fixed = edx.split.num_counters_fixed;
|
|
|
|
|
x86_add_quirk(zhaoxin_arch_events_quirk);
|
|
|
|
|
|
|
|
|
|
switch (boot_cpu_data.x86) {
|
|
|
|
|
case 0x06:
|
|
|
|
|
if (boot_cpu_data.x86_model == 0x0f || boot_cpu_data.x86_model == 0x19) {
|
|
|
|
|
|
|
|
|
|
x86_pmu.max_period = x86_pmu.cntval_mask >> 1;
|
|
|
|
|
|
|
|
|
|
/* Clearing status works only if the global control is enable on zxc. */
|
|
|
|
|
x86_pmu.enabled_ack = 1;
|
|
|
|
|
|
|
|
|
|
x86_pmu.event_constraints = zxc_event_constraints;
|
|
|
|
|
zx_pmon_event_map[PERF_COUNT_HW_INSTRUCTIONS] = 0;
|
|
|
|
|
zx_pmon_event_map[PERF_COUNT_HW_CACHE_REFERENCES] = 0;
|
|
|
|
|
zx_pmon_event_map[PERF_COUNT_HW_CACHE_MISSES] = 0;
|
|
|
|
|
zx_pmon_event_map[PERF_COUNT_HW_BUS_CYCLES] = 0;
|
|
|
|
|
|
|
|
|
|
pr_cont("ZXC events, ");
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
return -ENODEV;
|
|
|
|
|
|
|
|
|
|
case 0x07:
|
|
|
|
|
zx_pmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
|
|
|
|
|
X86_CONFIG(.event = 0x01, .umask = 0x01, .inv = 0x01, .cmask = 0x01);
|
|
|
|
|
|
|
|
|
|
zx_pmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
|
|
|
|
|
X86_CONFIG(.event = 0x0f, .umask = 0x04, .inv = 0, .cmask = 0);
|
|
|
|
|
|
|
|
|
|
switch (boot_cpu_data.x86_model) {
|
|
|
|
|
case 0x1b:
|
|
|
|
|
memcpy(hw_cache_event_ids, zxd_hw_cache_event_ids,
|
|
|
|
|
sizeof(hw_cache_event_ids));
|
|
|
|
|
|
|
|
|
|
x86_pmu.event_constraints = zxd_event_constraints;
|
|
|
|
|
|
|
|
|
|
zx_pmon_event_map[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0700;
|
|
|
|
|
zx_pmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x0709;
|
|
|
|
|
|
|
|
|
|
pr_cont("ZXD events, ");
|
|
|
|
|
break;
|
|
|
|
|
case 0x3b:
|
|
|
|
|
memcpy(hw_cache_event_ids, zxe_hw_cache_event_ids,
|
|
|
|
|
sizeof(hw_cache_event_ids));
|
|
|
|
|
|
|
|
|
|
x86_pmu.event_constraints = zxd_event_constraints;
|
|
|
|
|
|
|
|
|
|
zx_pmon_event_map[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0028;
|
|
|
|
|
zx_pmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x0029;
|
|
|
|
|
|
|
|
|
|
pr_cont("ZXE events, ");
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
return -ENODEV;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
return -ENODEV;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
x86_pmu.intel_ctrl = (1 << (x86_pmu.num_counters)) - 1;
|
|
|
|
|
x86_pmu.intel_ctrl |= ((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED;
|
|
|
|
|
|
|
|
|
|
if (x86_pmu.event_constraints) {
|
|
|
|
|
for_each_event_constraint(c, x86_pmu.event_constraints) {
|
|
|
|
|
c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
|
|
|
|
|
c->weight += x86_pmu.num_counters;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|