2019-06-03 08:44:50 +03:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-only
|
2015-12-08 10:29:06 +03:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2015 Linaro Ltd.
|
|
|
|
* Author: Shannon Zhao <shannon.zhao@linaro.org>
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/cpu.h>
|
|
|
|
#include <linux/kvm.h>
|
|
|
|
#include <linux/kvm_host.h>
|
2022-01-27 19:17:57 +03:00
|
|
|
#include <linux/list.h>
|
2015-12-08 10:29:06 +03:00
|
|
|
#include <linux/perf_event.h>
|
2019-10-06 12:28:50 +03:00
|
|
|
#include <linux/perf/arm_pmu.h>
|
2016-01-11 16:35:32 +03:00
|
|
|
#include <linux/uaccess.h>
|
2015-12-08 10:29:06 +03:00
|
|
|
#include <asm/kvm_emulate.h>
|
|
|
|
#include <kvm/arm_pmu.h>
|
2016-02-26 14:29:19 +03:00
|
|
|
#include <kvm/arm_vgic.h>
|
2015-12-08 10:29:06 +03:00
|
|
|
|
2022-11-13 19:38:18 +03:00
|
|
|
#define PERF_ATTR_CFG1_COUNTER_64BIT BIT(0)
|
|
|
|
|
2021-11-11 05:07:36 +03:00
|
|
|
DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available);
|
|
|
|
|
2022-01-27 19:17:57 +03:00
|
|
|
static LIST_HEAD(arm_pmus);
|
|
|
|
static DEFINE_MUTEX(arm_pmus_lock);
|
|
|
|
|
2022-11-13 19:38:32 +03:00
|
|
|
static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc);
|
2022-11-13 19:38:24 +03:00
|
|
|
static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc);
|
2019-06-17 22:01:05 +03:00
|
|
|
|
2022-11-13 19:38:32 +03:00
|
|
|
static struct kvm_vcpu *kvm_pmc_to_vcpu(const struct kvm_pmc *pmc)
|
|
|
|
{
|
|
|
|
return container_of(pmc, struct kvm_vcpu, arch.pmu.pmc[pmc->idx]);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct kvm_pmc *kvm_vcpu_idx_to_pmc(struct kvm_vcpu *vcpu, int cnt_idx)
|
|
|
|
{
|
|
|
|
return &vcpu->arch.pmu.pmc[cnt_idx];
|
|
|
|
}
|
|
|
|
|
2020-03-17 14:11:56 +03:00
|
|
|
static u32 kvm_pmu_event_mask(struct kvm *kvm)
|
|
|
|
{
|
2022-01-27 19:17:56 +03:00
|
|
|
unsigned int pmuver;
|
|
|
|
|
|
|
|
pmuver = kvm->arch.arm_pmu->pmuver;
|
|
|
|
|
|
|
|
switch (pmuver) {
|
2022-09-10 19:33:51 +03:00
|
|
|
case ID_AA64DFR0_EL1_PMUVer_IMP:
|
2020-03-17 14:11:56 +03:00
|
|
|
return GENMASK(9, 0);
|
2022-09-10 19:33:51 +03:00
|
|
|
case ID_AA64DFR0_EL1_PMUVer_V3P1:
|
|
|
|
case ID_AA64DFR0_EL1_PMUVer_V3P4:
|
|
|
|
case ID_AA64DFR0_EL1_PMUVer_V3P5:
|
|
|
|
case ID_AA64DFR0_EL1_PMUVer_V3P7:
|
2020-03-17 14:11:56 +03:00
|
|
|
return GENMASK(15, 0);
|
|
|
|
default: /* Shouldn't be here, just for sanity */
|
2022-01-27 19:17:56 +03:00
|
|
|
WARN_ONCE(1, "Unknown PMU version %d\n", pmuver);
|
2020-03-17 14:11:56 +03:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-06-17 22:01:04 +03:00
|
|
|
/**
|
2022-11-13 19:38:32 +03:00
|
|
|
* kvm_pmc_is_64bit - determine if counter is 64bit
|
|
|
|
* @pmc: counter context
|
2019-06-17 22:01:04 +03:00
|
|
|
*/
|
2022-11-13 19:38:32 +03:00
|
|
|
static bool kvm_pmc_is_64bit(struct kvm_pmc *pmc)
|
2022-11-13 19:38:20 +03:00
|
|
|
{
|
2022-11-13 19:38:32 +03:00
|
|
|
return (pmc->idx == ARMV8_PMU_CYCLE_IDX ||
|
|
|
|
kvm_pmu_is_3p5(kvm_pmc_to_vcpu(pmc)));
|
2022-11-13 19:38:20 +03:00
|
|
|
}
|
|
|
|
|
2022-11-13 19:38:32 +03:00
|
|
|
static bool kvm_pmc_has_64bit_overflow(struct kvm_pmc *pmc)
|
2019-06-17 22:01:04 +03:00
|
|
|
{
|
2022-11-13 19:38:32 +03:00
|
|
|
u64 val = __vcpu_sys_reg(kvm_pmc_to_vcpu(pmc), PMCR_EL0);
|
2022-11-13 19:38:29 +03:00
|
|
|
|
2022-11-13 19:38:32 +03:00
|
|
|
return (pmc->idx < ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LP)) ||
|
|
|
|
(pmc->idx == ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LC));
|
2019-06-17 22:01:04 +03:00
|
|
|
}
|
|
|
|
|
2022-11-13 19:38:32 +03:00
|
|
|
static bool kvm_pmu_counter_can_chain(struct kvm_pmc *pmc)
|
2022-11-13 19:38:18 +03:00
|
|
|
{
|
2022-11-13 19:38:32 +03:00
|
|
|
return (!(pmc->idx & 1) && (pmc->idx + 1) < ARMV8_PMU_CYCLE_IDX &&
|
|
|
|
!kvm_pmc_has_64bit_overflow(pmc));
|
2019-06-17 22:01:05 +03:00
|
|
|
}
|
|
|
|
|
2022-11-13 19:38:23 +03:00
|
|
|
static u32 counter_index_to_reg(u64 idx)
|
|
|
|
{
|
|
|
|
return (idx == ARMV8_PMU_CYCLE_IDX) ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + idx;
|
|
|
|
}
|
|
|
|
|
|
|
|
static u32 counter_index_to_evtreg(u64 idx)
|
|
|
|
{
|
|
|
|
return (idx == ARMV8_PMU_CYCLE_IDX) ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + idx;
|
|
|
|
}
|
|
|
|
|
2022-11-13 19:38:32 +03:00
|
|
|
static u64 kvm_pmu_get_pmc_value(struct kvm_pmc *pmc)
|
2019-06-17 22:01:05 +03:00
|
|
|
{
|
2022-11-13 19:38:32 +03:00
|
|
|
struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
|
2022-11-13 19:38:18 +03:00
|
|
|
u64 counter, reg, enabled, running;
|
2019-06-17 22:01:05 +03:00
|
|
|
|
2022-11-13 19:38:32 +03:00
|
|
|
reg = counter_index_to_reg(pmc->idx);
|
2022-11-13 19:38:18 +03:00
|
|
|
counter = __vcpu_sys_reg(vcpu, reg);
|
2019-06-17 22:01:05 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* The real counter value is equal to the value of counter register plus
|
2015-12-08 10:29:06 +03:00
|
|
|
* the value perf event counts.
|
|
|
|
*/
|
|
|
|
if (pmc->perf_event)
|
|
|
|
counter += perf_event_read_value(pmc->perf_event, &enabled,
|
|
|
|
&running);
|
|
|
|
|
2022-11-13 19:38:32 +03:00
|
|
|
if (!kvm_pmc_is_64bit(pmc))
|
2019-06-17 22:01:04 +03:00
|
|
|
counter = lower_32_bits(counter);
|
|
|
|
|
|
|
|
return counter;
|
2015-12-08 10:29:06 +03:00
|
|
|
}
|
|
|
|
|
2022-11-13 19:38:32 +03:00
|
|
|
/**
|
|
|
|
* kvm_pmu_get_counter_value - get PMU counter value
|
|
|
|
* @vcpu: The vcpu pointer
|
|
|
|
* @select_idx: The counter index
|
|
|
|
*/
|
|
|
|
u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
|
2015-12-08 10:29:06 +03:00
|
|
|
{
|
KVM/arm64: Don't emulate a PMU for 32-bit guests if feature not set
kvm->arch.arm_pmu is set when userspace attempts to set the first PMU
attribute. As certain attributes are mandatory, arm_pmu ends up always
being set to a valid arm_pmu, otherwise KVM will refuse to run the VCPU.
However, this only happens if the VCPU has the PMU feature. If the VCPU
doesn't have the feature bit set, kvm->arch.arm_pmu will be left
uninitialized and equal to NULL.
KVM doesn't do ID register emulation for 32-bit guests and accesses to the
PMU registers aren't gated by the pmu_visibility() function. This is done
to prevent injecting unexpected undefined exceptions in guests which have
detected the presence of a hardware PMU. But even though the VCPU feature
is missing, KVM still attempts to emulate certain aspects of the PMU when
PMU registers are accessed. This leads to a NULL pointer dereference like
this one, which happens on an odroid-c4 board when running the
kvm-unit-tests pmu-cycle-counter test with kvmtool and without the PMU
feature being set:
[ 454.402699] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000150
[ 454.405865] Mem abort info:
[ 454.408596] ESR = 0x96000004
[ 454.411638] EC = 0x25: DABT (current EL), IL = 32 bits
[ 454.416901] SET = 0, FnV = 0
[ 454.419909] EA = 0, S1PTW = 0
[ 454.423010] FSC = 0x04: level 0 translation fault
[ 454.427841] Data abort info:
[ 454.430687] ISV = 0, ISS = 0x00000004
[ 454.434484] CM = 0, WnR = 0
[ 454.437404] user pgtable: 4k pages, 48-bit VAs, pgdp=000000000c924000
[ 454.443800] [0000000000000150] pgd=0000000000000000, p4d=0000000000000000
[ 454.450528] Internal error: Oops: 96000004 [#1] PREEMPT SMP
[ 454.456036] Modules linked in:
[ 454.459053] CPU: 1 PID: 267 Comm: kvm-vcpu-0 Not tainted 5.18.0-rc4 #113
[ 454.465697] Hardware name: Hardkernel ODROID-C4 (DT)
[ 454.470612] pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[ 454.477512] pc : kvm_pmu_event_mask.isra.0+0x14/0x74
[ 454.482427] lr : kvm_pmu_set_counter_event_type+0x2c/0x80
[ 454.487775] sp : ffff80000a9839c0
[ 454.491050] x29: ffff80000a9839c0 x28: ffff000000a83a00 x27: 0000000000000000
[ 454.498127] x26: 0000000000000000 x25: 0000000000000000 x24: ffff00000a510000
[ 454.505198] x23: ffff000000a83a00 x22: ffff000003b01000 x21: 0000000000000000
[ 454.512271] x20: 000000000000001f x19: 00000000000003ff x18: 0000000000000000
[ 454.519343] x17: 000000008003fe98 x16: 0000000000000000 x15: 0000000000000000
[ 454.526416] x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000
[ 454.533489] x11: 000000008003fdbc x10: 0000000000009d20 x9 : 000000000000001b
[ 454.540561] x8 : 0000000000000000 x7 : 0000000000000d00 x6 : 0000000000009d00
[ 454.547633] x5 : 0000000000000037 x4 : 0000000000009d00 x3 : 0d09000000000000
[ 454.554705] x2 : 000000000000001f x1 : 0000000000000000 x0 : 0000000000000000
[ 454.561779] Call trace:
[ 454.564191] kvm_pmu_event_mask.isra.0+0x14/0x74
[ 454.568764] kvm_pmu_set_counter_event_type+0x2c/0x80
[ 454.573766] access_pmu_evtyper+0x128/0x170
[ 454.577905] perform_access+0x34/0x80
[ 454.581527] kvm_handle_cp_32+0x13c/0x160
[ 454.585495] kvm_handle_cp15_32+0x1c/0x30
[ 454.589462] handle_exit+0x70/0x180
[ 454.592912] kvm_arch_vcpu_ioctl_run+0x1c4/0x5e0
[ 454.597485] kvm_vcpu_ioctl+0x23c/0x940
[ 454.601280] __arm64_sys_ioctl+0xa8/0xf0
[ 454.605160] invoke_syscall+0x48/0x114
[ 454.608869] el0_svc_common.constprop.0+0xd4/0xfc
[ 454.613527] do_el0_svc+0x28/0x90
[ 454.616803] el0_svc+0x34/0xb0
[ 454.619822] el0t_64_sync_handler+0xa4/0x130
[ 454.624049] el0t_64_sync+0x18c/0x190
[ 454.627675] Code: a9be7bfd 910003fd f9000bf3 52807ff3 (b9415001)
[ 454.633714] ---[ end trace 0000000000000000 ]---
In this particular case, Linux hasn't detected the presence of a hardware
PMU because the PMU node is missing from the DTB, so userspace would have
been unable to set the VCPU PMU feature even if it attempted it. What
happens is that the 32-bit guest reads ID_DFR0, which advertises the
presence of the PMU, and when it tries to program a counter, it triggers
the NULL pointer dereference because kvm->arch.arm_pmu is NULL.
kvm-arch.arm_pmu was introduced by commit 46b187821472 ("KVM: arm64:
Keep a per-VM pointer to the default PMU"). Until that commit, this
error would be triggered instead:
[ 73.388140] ------------[ cut here ]------------
[ 73.388189] Unknown PMU version 0
[ 73.390420] WARNING: CPU: 1 PID: 264 at arch/arm64/kvm/pmu-emul.c:36 kvm_pmu_event_mask.isra.0+0x6c/0x74
[ 73.399821] Modules linked in:
[ 73.402835] CPU: 1 PID: 264 Comm: kvm-vcpu-0 Not tainted 5.17.0 #114
[ 73.409132] Hardware name: Hardkernel ODROID-C4 (DT)
[ 73.414048] pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[ 73.420948] pc : kvm_pmu_event_mask.isra.0+0x6c/0x74
[ 73.425863] lr : kvm_pmu_event_mask.isra.0+0x6c/0x74
[ 73.430779] sp : ffff80000a8db9b0
[ 73.434055] x29: ffff80000a8db9b0 x28: ffff000000dbaac0 x27: 0000000000000000
[ 73.441131] x26: ffff000000dbaac0 x25: 00000000c600000d x24: 0000000000180720
[ 73.448203] x23: ffff800009ffbe10 x22: ffff00000b612000 x21: 0000000000000000
[ 73.455276] x20: 000000000000001f x19: 0000000000000000 x18: ffffffffffffffff
[ 73.462348] x17: 000000008003fe98 x16: 0000000000000000 x15: 0720072007200720
[ 73.469420] x14: 0720072007200720 x13: ffff800009d32488 x12: 00000000000004e6
[ 73.476493] x11: 00000000000001a2 x10: ffff800009d32488 x9 : ffff800009d32488
[ 73.483565] x8 : 00000000ffffefff x7 : ffff800009d8a488 x6 : ffff800009d8a488
[ 73.490638] x5 : ffff0000f461a9d8 x4 : 0000000000000000 x3 : 0000000000000001
[ 73.497710] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff000000dbaac0
[ 73.504784] Call trace:
[ 73.507195] kvm_pmu_event_mask.isra.0+0x6c/0x74
[ 73.511768] kvm_pmu_set_counter_event_type+0x2c/0x80
[ 73.516770] access_pmu_evtyper+0x128/0x16c
[ 73.520910] perform_access+0x34/0x80
[ 73.524532] kvm_handle_cp_32+0x13c/0x160
[ 73.528500] kvm_handle_cp15_32+0x1c/0x30
[ 73.532467] handle_exit+0x70/0x180
[ 73.535917] kvm_arch_vcpu_ioctl_run+0x20c/0x6e0
[ 73.540489] kvm_vcpu_ioctl+0x2b8/0x9e0
[ 73.544283] __arm64_sys_ioctl+0xa8/0xf0
[ 73.548165] invoke_syscall+0x48/0x114
[ 73.551874] el0_svc_common.constprop.0+0xd4/0xfc
[ 73.556531] do_el0_svc+0x28/0x90
[ 73.559808] el0_svc+0x28/0x80
[ 73.562826] el0t_64_sync_handler+0xa4/0x130
[ 73.567054] el0t_64_sync+0x1a0/0x1a4
[ 73.570676] ---[ end trace 0000000000000000 ]---
[ 73.575382] kvm: pmu event creation failed -2
The root cause remains the same: kvm->arch.pmuver was never set to
something sensible because the VCPU feature itself was never set.
The odroid-c4 is somewhat of a special case, because Linux doesn't probe
the PMU. But the above errors can easily be reproduced on any hardware,
with or without a PMU driver, as long as userspace doesn't set the PMU
feature.
Work around the fact that KVM advertises a PMU even when the VCPU feature
is not set by gating all PMU emulation on the feature. The guest can still
access the registers without KVM injecting an undefined exception.
Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220425145530.723858-1-alexandru.elisei@arm.com
2022-04-25 17:55:30 +03:00
|
|
|
if (!kvm_vcpu_has_pmu(vcpu))
|
2022-11-13 19:38:32 +03:00
|
|
|
return 0;
|
KVM/arm64: Don't emulate a PMU for 32-bit guests if feature not set
kvm->arch.arm_pmu is set when userspace attempts to set the first PMU
attribute. As certain attributes are mandatory, arm_pmu ends up always
being set to a valid arm_pmu, otherwise KVM will refuse to run the VCPU.
However, this only happens if the VCPU has the PMU feature. If the VCPU
doesn't have the feature bit set, kvm->arch.arm_pmu will be left
uninitialized and equal to NULL.
KVM doesn't do ID register emulation for 32-bit guests and accesses to the
PMU registers aren't gated by the pmu_visibility() function. This is done
to prevent injecting unexpected undefined exceptions in guests which have
detected the presence of a hardware PMU. But even though the VCPU feature
is missing, KVM still attempts to emulate certain aspects of the PMU when
PMU registers are accessed. This leads to a NULL pointer dereference like
this one, which happens on an odroid-c4 board when running the
kvm-unit-tests pmu-cycle-counter test with kvmtool and without the PMU
feature being set:
[ 454.402699] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000150
[ 454.405865] Mem abort info:
[ 454.408596] ESR = 0x96000004
[ 454.411638] EC = 0x25: DABT (current EL), IL = 32 bits
[ 454.416901] SET = 0, FnV = 0
[ 454.419909] EA = 0, S1PTW = 0
[ 454.423010] FSC = 0x04: level 0 translation fault
[ 454.427841] Data abort info:
[ 454.430687] ISV = 0, ISS = 0x00000004
[ 454.434484] CM = 0, WnR = 0
[ 454.437404] user pgtable: 4k pages, 48-bit VAs, pgdp=000000000c924000
[ 454.443800] [0000000000000150] pgd=0000000000000000, p4d=0000000000000000
[ 454.450528] Internal error: Oops: 96000004 [#1] PREEMPT SMP
[ 454.456036] Modules linked in:
[ 454.459053] CPU: 1 PID: 267 Comm: kvm-vcpu-0 Not tainted 5.18.0-rc4 #113
[ 454.465697] Hardware name: Hardkernel ODROID-C4 (DT)
[ 454.470612] pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[ 454.477512] pc : kvm_pmu_event_mask.isra.0+0x14/0x74
[ 454.482427] lr : kvm_pmu_set_counter_event_type+0x2c/0x80
[ 454.487775] sp : ffff80000a9839c0
[ 454.491050] x29: ffff80000a9839c0 x28: ffff000000a83a00 x27: 0000000000000000
[ 454.498127] x26: 0000000000000000 x25: 0000000000000000 x24: ffff00000a510000
[ 454.505198] x23: ffff000000a83a00 x22: ffff000003b01000 x21: 0000000000000000
[ 454.512271] x20: 000000000000001f x19: 00000000000003ff x18: 0000000000000000
[ 454.519343] x17: 000000008003fe98 x16: 0000000000000000 x15: 0000000000000000
[ 454.526416] x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000
[ 454.533489] x11: 000000008003fdbc x10: 0000000000009d20 x9 : 000000000000001b
[ 454.540561] x8 : 0000000000000000 x7 : 0000000000000d00 x6 : 0000000000009d00
[ 454.547633] x5 : 0000000000000037 x4 : 0000000000009d00 x3 : 0d09000000000000
[ 454.554705] x2 : 000000000000001f x1 : 0000000000000000 x0 : 0000000000000000
[ 454.561779] Call trace:
[ 454.564191] kvm_pmu_event_mask.isra.0+0x14/0x74
[ 454.568764] kvm_pmu_set_counter_event_type+0x2c/0x80
[ 454.573766] access_pmu_evtyper+0x128/0x170
[ 454.577905] perform_access+0x34/0x80
[ 454.581527] kvm_handle_cp_32+0x13c/0x160
[ 454.585495] kvm_handle_cp15_32+0x1c/0x30
[ 454.589462] handle_exit+0x70/0x180
[ 454.592912] kvm_arch_vcpu_ioctl_run+0x1c4/0x5e0
[ 454.597485] kvm_vcpu_ioctl+0x23c/0x940
[ 454.601280] __arm64_sys_ioctl+0xa8/0xf0
[ 454.605160] invoke_syscall+0x48/0x114
[ 454.608869] el0_svc_common.constprop.0+0xd4/0xfc
[ 454.613527] do_el0_svc+0x28/0x90
[ 454.616803] el0_svc+0x34/0xb0
[ 454.619822] el0t_64_sync_handler+0xa4/0x130
[ 454.624049] el0t_64_sync+0x18c/0x190
[ 454.627675] Code: a9be7bfd 910003fd f9000bf3 52807ff3 (b9415001)
[ 454.633714] ---[ end trace 0000000000000000 ]---
In this particular case, Linux hasn't detected the presence of a hardware
PMU because the PMU node is missing from the DTB, so userspace would have
been unable to set the VCPU PMU feature even if it attempted it. What
happens is that the 32-bit guest reads ID_DFR0, which advertises the
presence of the PMU, and when it tries to program a counter, it triggers
the NULL pointer dereference because kvm->arch.arm_pmu is NULL.
kvm-arch.arm_pmu was introduced by commit 46b187821472 ("KVM: arm64:
Keep a per-VM pointer to the default PMU"). Until that commit, this
error would be triggered instead:
[ 73.388140] ------------[ cut here ]------------
[ 73.388189] Unknown PMU version 0
[ 73.390420] WARNING: CPU: 1 PID: 264 at arch/arm64/kvm/pmu-emul.c:36 kvm_pmu_event_mask.isra.0+0x6c/0x74
[ 73.399821] Modules linked in:
[ 73.402835] CPU: 1 PID: 264 Comm: kvm-vcpu-0 Not tainted 5.17.0 #114
[ 73.409132] Hardware name: Hardkernel ODROID-C4 (DT)
[ 73.414048] pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[ 73.420948] pc : kvm_pmu_event_mask.isra.0+0x6c/0x74
[ 73.425863] lr : kvm_pmu_event_mask.isra.0+0x6c/0x74
[ 73.430779] sp : ffff80000a8db9b0
[ 73.434055] x29: ffff80000a8db9b0 x28: ffff000000dbaac0 x27: 0000000000000000
[ 73.441131] x26: ffff000000dbaac0 x25: 00000000c600000d x24: 0000000000180720
[ 73.448203] x23: ffff800009ffbe10 x22: ffff00000b612000 x21: 0000000000000000
[ 73.455276] x20: 000000000000001f x19: 0000000000000000 x18: ffffffffffffffff
[ 73.462348] x17: 000000008003fe98 x16: 0000000000000000 x15: 0720072007200720
[ 73.469420] x14: 0720072007200720 x13: ffff800009d32488 x12: 00000000000004e6
[ 73.476493] x11: 00000000000001a2 x10: ffff800009d32488 x9 : ffff800009d32488
[ 73.483565] x8 : 00000000ffffefff x7 : ffff800009d8a488 x6 : ffff800009d8a488
[ 73.490638] x5 : ffff0000f461a9d8 x4 : 0000000000000000 x3 : 0000000000000001
[ 73.497710] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff000000dbaac0
[ 73.504784] Call trace:
[ 73.507195] kvm_pmu_event_mask.isra.0+0x6c/0x74
[ 73.511768] kvm_pmu_set_counter_event_type+0x2c/0x80
[ 73.516770] access_pmu_evtyper+0x128/0x16c
[ 73.520910] perform_access+0x34/0x80
[ 73.524532] kvm_handle_cp_32+0x13c/0x160
[ 73.528500] kvm_handle_cp15_32+0x1c/0x30
[ 73.532467] handle_exit+0x70/0x180
[ 73.535917] kvm_arch_vcpu_ioctl_run+0x20c/0x6e0
[ 73.540489] kvm_vcpu_ioctl+0x2b8/0x9e0
[ 73.544283] __arm64_sys_ioctl+0xa8/0xf0
[ 73.548165] invoke_syscall+0x48/0x114
[ 73.551874] el0_svc_common.constprop.0+0xd4/0xfc
[ 73.556531] do_el0_svc+0x28/0x90
[ 73.559808] el0_svc+0x28/0x80
[ 73.562826] el0t_64_sync_handler+0xa4/0x130
[ 73.567054] el0t_64_sync+0x1a0/0x1a4
[ 73.570676] ---[ end trace 0000000000000000 ]---
[ 73.575382] kvm: pmu event creation failed -2
The root cause remains the same: kvm->arch.pmuver was never set to
something sensible because the VCPU feature itself was never set.
The odroid-c4 is somewhat of a special case, because Linux doesn't probe
the PMU. But the above errors can easily be reproduced on any hardware,
with or without a PMU driver, as long as userspace doesn't set the PMU
feature.
Work around the fact that KVM advertises a PMU even when the VCPU feature
is not set by gating all PMU emulation on the feature. The guest can still
access the registers without KVM injecting an undefined exception.
Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220425145530.723858-1-alexandru.elisei@arm.com
2022-04-25 17:55:30 +03:00
|
|
|
|
2022-11-13 19:38:32 +03:00
|
|
|
return kvm_pmu_get_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, select_idx));
|
|
|
|
}
|
2022-11-13 19:38:24 +03:00
|
|
|
|
2022-11-13 19:38:32 +03:00
|
|
|
static void kvm_pmu_set_pmc_value(struct kvm_pmc *pmc, u64 val, bool force)
|
|
|
|
{
|
|
|
|
struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
|
|
|
|
u64 reg;
|
2022-11-13 19:38:25 +03:00
|
|
|
|
2022-11-13 19:38:32 +03:00
|
|
|
kvm_pmu_release_perf_event(pmc);
|
|
|
|
|
|
|
|
reg = counter_index_to_reg(pmc->idx);
|
|
|
|
|
|
|
|
if (vcpu_mode_is_32bit(vcpu) && pmc->idx != ARMV8_PMU_CYCLE_IDX &&
|
2022-11-13 19:38:25 +03:00
|
|
|
!force) {
|
|
|
|
/*
|
|
|
|
* Even with PMUv3p5, AArch32 cannot write to the top
|
|
|
|
* 32bit of the counters. The only possible course of
|
|
|
|
* action is to use PMCR.P, which will reset them to
|
|
|
|
* 0 (the only use of the 'force' parameter).
|
|
|
|
*/
|
|
|
|
val = __vcpu_sys_reg(vcpu, reg) & GENMASK(63, 32);
|
|
|
|
val |= lower_32_bits(val);
|
|
|
|
}
|
|
|
|
|
2022-11-13 19:38:24 +03:00
|
|
|
__vcpu_sys_reg(vcpu, reg) = val;
|
2019-06-17 22:01:03 +03:00
|
|
|
|
|
|
|
/* Recreate the perf event to reflect the updated sample_period */
|
2022-11-13 19:38:32 +03:00
|
|
|
kvm_pmu_create_perf_event(pmc);
|
2015-12-08 10:29:06 +03:00
|
|
|
}
|
2015-09-08 07:26:13 +03:00
|
|
|
|
2022-11-13 19:38:25 +03:00
|
|
|
/**
|
|
|
|
* kvm_pmu_set_counter_value - set PMU counter value
|
|
|
|
* @vcpu: The vcpu pointer
|
|
|
|
* @select_idx: The counter index
|
|
|
|
* @val: The counter value
|
|
|
|
*/
|
|
|
|
void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
|
|
|
|
{
|
2022-11-13 19:38:32 +03:00
|
|
|
if (!kvm_vcpu_has_pmu(vcpu))
|
|
|
|
return;
|
|
|
|
|
|
|
|
kvm_pmu_set_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, select_idx), val, false);
|
2022-11-13 19:38:25 +03:00
|
|
|
}
|
|
|
|
|
2019-06-17 22:01:02 +03:00
|
|
|
/**
|
|
|
|
* kvm_pmu_release_perf_event - remove the perf event
|
|
|
|
* @pmc: The PMU counter pointer
|
|
|
|
*/
|
|
|
|
static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
|
|
|
|
{
|
|
|
|
if (pmc->perf_event) {
|
|
|
|
perf_event_disable(pmc->perf_event);
|
|
|
|
perf_event_release_kernel(pmc->perf_event);
|
|
|
|
pmc->perf_event = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-07-03 09:27:25 +03:00
|
|
|
/**
|
|
|
|
* kvm_pmu_stop_counter - stop PMU counter
|
|
|
|
* @pmc: The PMU counter pointer
|
|
|
|
*
|
|
|
|
* If this counter has been configured to monitor some event, release it here.
|
|
|
|
*/
|
2022-11-13 19:38:32 +03:00
|
|
|
static void kvm_pmu_stop_counter(struct kvm_pmc *pmc)
|
2015-07-03 09:27:25 +03:00
|
|
|
{
|
2022-11-13 19:38:32 +03:00
|
|
|
struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
|
2022-11-13 19:38:22 +03:00
|
|
|
u64 reg, val;
|
2015-07-03 09:27:25 +03:00
|
|
|
|
2019-06-17 22:01:05 +03:00
|
|
|
if (!pmc->perf_event)
|
|
|
|
return;
|
|
|
|
|
2022-11-13 19:38:32 +03:00
|
|
|
val = kvm_pmu_get_pmc_value(pmc);
|
2019-06-17 22:01:05 +03:00
|
|
|
|
2022-11-13 19:38:23 +03:00
|
|
|
reg = counter_index_to_reg(pmc->idx);
|
2019-06-17 22:01:05 +03:00
|
|
|
|
2019-10-03 20:02:08 +03:00
|
|
|
__vcpu_sys_reg(vcpu, reg) = val;
|
|
|
|
|
2019-06-17 22:01:05 +03:00
|
|
|
kvm_pmu_release_perf_event(pmc);
|
2015-07-03 09:27:25 +03:00
|
|
|
}
|
|
|
|
|
2019-07-18 11:15:10 +03:00
|
|
|
/**
|
|
|
|
* kvm_pmu_vcpu_init - assign pmu counter idx for cpu
|
|
|
|
* @vcpu: The vcpu pointer
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
struct kvm_pmu *pmu = &vcpu->arch.pmu;
|
|
|
|
|
|
|
|
for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
|
|
|
|
pmu->pmc[i].idx = i;
|
|
|
|
}
|
|
|
|
|
2015-09-11 06:30:22 +03:00
|
|
|
/**
|
|
|
|
* kvm_pmu_vcpu_reset - reset pmu state for cpu
|
|
|
|
* @vcpu: The vcpu pointer
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
2020-01-24 17:25:35 +03:00
|
|
|
unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
|
|
|
|
int i;
|
2015-09-11 06:30:22 +03:00
|
|
|
|
2020-01-24 17:25:35 +03:00
|
|
|
for_each_set_bit(i, &mask, 32)
|
2022-11-13 19:38:32 +03:00
|
|
|
kvm_pmu_stop_counter(kvm_vcpu_idx_to_pmc(vcpu, i));
|
2015-09-11 06:30:22 +03:00
|
|
|
}
|
|
|
|
|
2015-09-11 10:18:05 +03:00
|
|
|
/**
|
|
|
|
* kvm_pmu_vcpu_destroy - free perf event of PMU for cpu
|
|
|
|
* @vcpu: The vcpu pointer
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
2019-06-17 22:01:02 +03:00
|
|
|
for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
|
2022-11-13 19:38:32 +03:00
|
|
|
kvm_pmu_release_perf_event(kvm_vcpu_idx_to_pmc(vcpu, i));
|
2020-09-24 14:07:04 +03:00
|
|
|
irq_work_sync(&vcpu->arch.pmu.overflow_work);
|
2015-09-11 10:18:05 +03:00
|
|
|
}
|
|
|
|
|
2015-09-08 07:26:13 +03:00
|
|
|
u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
2016-03-16 17:38:53 +03:00
|
|
|
u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT;
|
2015-09-08 07:26:13 +03:00
|
|
|
|
|
|
|
val &= ARMV8_PMU_PMCR_N_MASK;
|
|
|
|
if (val == 0)
|
|
|
|
return BIT(ARMV8_PMU_CYCLE_IDX);
|
|
|
|
else
|
|
|
|
return GENMASK(val - 1, 0) | BIT(ARMV8_PMU_CYCLE_IDX);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2019-06-17 22:01:01 +03:00
|
|
|
* kvm_pmu_enable_counter_mask - enable selected PMU counters
|
2015-09-08 07:26:13 +03:00
|
|
|
* @vcpu: The vcpu pointer
|
|
|
|
* @val: the value guest writes to PMCNTENSET register
|
|
|
|
*
|
|
|
|
* Call perf_event_enable to start counting the perf event
|
|
|
|
*/
|
2019-06-17 22:01:01 +03:00
|
|
|
void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
|
2015-09-08 07:26:13 +03:00
|
|
|
{
|
|
|
|
int i;
|
KVM/arm64: Don't emulate a PMU for 32-bit guests if feature not set
kvm->arch.arm_pmu is set when userspace attempts to set the first PMU
attribute. As certain attributes are mandatory, arm_pmu ends up always
being set to a valid arm_pmu, otherwise KVM will refuse to run the VCPU.
However, this only happens if the VCPU has the PMU feature. If the VCPU
doesn't have the feature bit set, kvm->arch.arm_pmu will be left
uninitialized and equal to NULL.
KVM doesn't do ID register emulation for 32-bit guests and accesses to the
PMU registers aren't gated by the pmu_visibility() function. This is done
to prevent injecting unexpected undefined exceptions in guests which have
detected the presence of a hardware PMU. But even though the VCPU feature
is missing, KVM still attempts to emulate certain aspects of the PMU when
PMU registers are accessed. This leads to a NULL pointer dereference like
this one, which happens on an odroid-c4 board when running the
kvm-unit-tests pmu-cycle-counter test with kvmtool and without the PMU
feature being set:
[ 454.402699] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000150
[ 454.405865] Mem abort info:
[ 454.408596] ESR = 0x96000004
[ 454.411638] EC = 0x25: DABT (current EL), IL = 32 bits
[ 454.416901] SET = 0, FnV = 0
[ 454.419909] EA = 0, S1PTW = 0
[ 454.423010] FSC = 0x04: level 0 translation fault
[ 454.427841] Data abort info:
[ 454.430687] ISV = 0, ISS = 0x00000004
[ 454.434484] CM = 0, WnR = 0
[ 454.437404] user pgtable: 4k pages, 48-bit VAs, pgdp=000000000c924000
[ 454.443800] [0000000000000150] pgd=0000000000000000, p4d=0000000000000000
[ 454.450528] Internal error: Oops: 96000004 [#1] PREEMPT SMP
[ 454.456036] Modules linked in:
[ 454.459053] CPU: 1 PID: 267 Comm: kvm-vcpu-0 Not tainted 5.18.0-rc4 #113
[ 454.465697] Hardware name: Hardkernel ODROID-C4 (DT)
[ 454.470612] pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[ 454.477512] pc : kvm_pmu_event_mask.isra.0+0x14/0x74
[ 454.482427] lr : kvm_pmu_set_counter_event_type+0x2c/0x80
[ 454.487775] sp : ffff80000a9839c0
[ 454.491050] x29: ffff80000a9839c0 x28: ffff000000a83a00 x27: 0000000000000000
[ 454.498127] x26: 0000000000000000 x25: 0000000000000000 x24: ffff00000a510000
[ 454.505198] x23: ffff000000a83a00 x22: ffff000003b01000 x21: 0000000000000000
[ 454.512271] x20: 000000000000001f x19: 00000000000003ff x18: 0000000000000000
[ 454.519343] x17: 000000008003fe98 x16: 0000000000000000 x15: 0000000000000000
[ 454.526416] x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000
[ 454.533489] x11: 000000008003fdbc x10: 0000000000009d20 x9 : 000000000000001b
[ 454.540561] x8 : 0000000000000000 x7 : 0000000000000d00 x6 : 0000000000009d00
[ 454.547633] x5 : 0000000000000037 x4 : 0000000000009d00 x3 : 0d09000000000000
[ 454.554705] x2 : 000000000000001f x1 : 0000000000000000 x0 : 0000000000000000
[ 454.561779] Call trace:
[ 454.564191] kvm_pmu_event_mask.isra.0+0x14/0x74
[ 454.568764] kvm_pmu_set_counter_event_type+0x2c/0x80
[ 454.573766] access_pmu_evtyper+0x128/0x170
[ 454.577905] perform_access+0x34/0x80
[ 454.581527] kvm_handle_cp_32+0x13c/0x160
[ 454.585495] kvm_handle_cp15_32+0x1c/0x30
[ 454.589462] handle_exit+0x70/0x180
[ 454.592912] kvm_arch_vcpu_ioctl_run+0x1c4/0x5e0
[ 454.597485] kvm_vcpu_ioctl+0x23c/0x940
[ 454.601280] __arm64_sys_ioctl+0xa8/0xf0
[ 454.605160] invoke_syscall+0x48/0x114
[ 454.608869] el0_svc_common.constprop.0+0xd4/0xfc
[ 454.613527] do_el0_svc+0x28/0x90
[ 454.616803] el0_svc+0x34/0xb0
[ 454.619822] el0t_64_sync_handler+0xa4/0x130
[ 454.624049] el0t_64_sync+0x18c/0x190
[ 454.627675] Code: a9be7bfd 910003fd f9000bf3 52807ff3 (b9415001)
[ 454.633714] ---[ end trace 0000000000000000 ]---
In this particular case, Linux hasn't detected the presence of a hardware
PMU because the PMU node is missing from the DTB, so userspace would have
been unable to set the VCPU PMU feature even if it attempted it. What
happens is that the 32-bit guest reads ID_DFR0, which advertises the
presence of the PMU, and when it tries to program a counter, it triggers
the NULL pointer dereference because kvm->arch.arm_pmu is NULL.
kvm-arch.arm_pmu was introduced by commit 46b187821472 ("KVM: arm64:
Keep a per-VM pointer to the default PMU"). Until that commit, this
error would be triggered instead:
[ 73.388140] ------------[ cut here ]------------
[ 73.388189] Unknown PMU version 0
[ 73.390420] WARNING: CPU: 1 PID: 264 at arch/arm64/kvm/pmu-emul.c:36 kvm_pmu_event_mask.isra.0+0x6c/0x74
[ 73.399821] Modules linked in:
[ 73.402835] CPU: 1 PID: 264 Comm: kvm-vcpu-0 Not tainted 5.17.0 #114
[ 73.409132] Hardware name: Hardkernel ODROID-C4 (DT)
[ 73.414048] pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[ 73.420948] pc : kvm_pmu_event_mask.isra.0+0x6c/0x74
[ 73.425863] lr : kvm_pmu_event_mask.isra.0+0x6c/0x74
[ 73.430779] sp : ffff80000a8db9b0
[ 73.434055] x29: ffff80000a8db9b0 x28: ffff000000dbaac0 x27: 0000000000000000
[ 73.441131] x26: ffff000000dbaac0 x25: 00000000c600000d x24: 0000000000180720
[ 73.448203] x23: ffff800009ffbe10 x22: ffff00000b612000 x21: 0000000000000000
[ 73.455276] x20: 000000000000001f x19: 0000000000000000 x18: ffffffffffffffff
[ 73.462348] x17: 000000008003fe98 x16: 0000000000000000 x15: 0720072007200720
[ 73.469420] x14: 0720072007200720 x13: ffff800009d32488 x12: 00000000000004e6
[ 73.476493] x11: 00000000000001a2 x10: ffff800009d32488 x9 : ffff800009d32488
[ 73.483565] x8 : 00000000ffffefff x7 : ffff800009d8a488 x6 : ffff800009d8a488
[ 73.490638] x5 : ffff0000f461a9d8 x4 : 0000000000000000 x3 : 0000000000000001
[ 73.497710] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff000000dbaac0
[ 73.504784] Call trace:
[ 73.507195] kvm_pmu_event_mask.isra.0+0x6c/0x74
[ 73.511768] kvm_pmu_set_counter_event_type+0x2c/0x80
[ 73.516770] access_pmu_evtyper+0x128/0x16c
[ 73.520910] perform_access+0x34/0x80
[ 73.524532] kvm_handle_cp_32+0x13c/0x160
[ 73.528500] kvm_handle_cp15_32+0x1c/0x30
[ 73.532467] handle_exit+0x70/0x180
[ 73.535917] kvm_arch_vcpu_ioctl_run+0x20c/0x6e0
[ 73.540489] kvm_vcpu_ioctl+0x2b8/0x9e0
[ 73.544283] __arm64_sys_ioctl+0xa8/0xf0
[ 73.548165] invoke_syscall+0x48/0x114
[ 73.551874] el0_svc_common.constprop.0+0xd4/0xfc
[ 73.556531] do_el0_svc+0x28/0x90
[ 73.559808] el0_svc+0x28/0x80
[ 73.562826] el0t_64_sync_handler+0xa4/0x130
[ 73.567054] el0t_64_sync+0x1a0/0x1a4
[ 73.570676] ---[ end trace 0000000000000000 ]---
[ 73.575382] kvm: pmu event creation failed -2
The root cause remains the same: kvm->arch.pmuver was never set to
something sensible because the VCPU feature itself was never set.
The odroid-c4 is somewhat of a special case, because Linux doesn't probe
the PMU. But the above errors can easily be reproduced on any hardware,
with or without a PMU driver, as long as userspace doesn't set the PMU
feature.
Work around the fact that KVM advertises a PMU even when the VCPU feature
is not set by gating all PMU emulation on the feature. The guest can still
access the registers without KVM injecting an undefined exception.
Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220425145530.723858-1-alexandru.elisei@arm.com
2022-04-25 17:55:30 +03:00
|
|
|
if (!kvm_vcpu_has_pmu(vcpu))
|
|
|
|
return;
|
|
|
|
|
2016-03-16 17:38:53 +03:00
|
|
|
if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val)
|
2015-09-08 07:26:13 +03:00
|
|
|
return;
|
|
|
|
|
|
|
|
for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
|
2022-11-13 19:38:32 +03:00
|
|
|
struct kvm_pmc *pmc;
|
|
|
|
|
2015-09-08 07:26:13 +03:00
|
|
|
if (!(val & BIT(i)))
|
|
|
|
continue;
|
|
|
|
|
2022-11-13 19:38:32 +03:00
|
|
|
pmc = kvm_vcpu_idx_to_pmc(vcpu, i);
|
2019-06-17 22:01:05 +03:00
|
|
|
|
2022-11-13 19:38:18 +03:00
|
|
|
if (!pmc->perf_event) {
|
2022-11-13 19:38:32 +03:00
|
|
|
kvm_pmu_create_perf_event(pmc);
|
2022-11-13 19:38:18 +03:00
|
|
|
} else {
|
2015-09-08 07:26:13 +03:00
|
|
|
perf_event_enable(pmc->perf_event);
|
|
|
|
if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE)
|
|
|
|
kvm_debug("fail to enable perf event\n");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2019-06-17 22:01:01 +03:00
|
|
|
* kvm_pmu_disable_counter_mask - disable selected PMU counters
|
2015-09-08 07:26:13 +03:00
|
|
|
* @vcpu: The vcpu pointer
|
|
|
|
* @val: the value guest writes to PMCNTENCLR register
|
|
|
|
*
|
|
|
|
* Call perf_event_disable to stop counting the perf event
|
|
|
|
*/
|
2019-06-17 22:01:01 +03:00
|
|
|
void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
|
2015-09-08 07:26:13 +03:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
KVM/arm64: Don't emulate a PMU for 32-bit guests if feature not set
kvm->arch.arm_pmu is set when userspace attempts to set the first PMU
attribute. As certain attributes are mandatory, arm_pmu ends up always
being set to a valid arm_pmu, otherwise KVM will refuse to run the VCPU.
However, this only happens if the VCPU has the PMU feature. If the VCPU
doesn't have the feature bit set, kvm->arch.arm_pmu will be left
uninitialized and equal to NULL.
KVM doesn't do ID register emulation for 32-bit guests and accesses to the
PMU registers aren't gated by the pmu_visibility() function. This is done
to prevent injecting unexpected undefined exceptions in guests which have
detected the presence of a hardware PMU. But even though the VCPU feature
is missing, KVM still attempts to emulate certain aspects of the PMU when
PMU registers are accessed. This leads to a NULL pointer dereference like
this one, which happens on an odroid-c4 board when running the
kvm-unit-tests pmu-cycle-counter test with kvmtool and without the PMU
feature being set:
[ 454.402699] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000150
[ 454.405865] Mem abort info:
[ 454.408596] ESR = 0x96000004
[ 454.411638] EC = 0x25: DABT (current EL), IL = 32 bits
[ 454.416901] SET = 0, FnV = 0
[ 454.419909] EA = 0, S1PTW = 0
[ 454.423010] FSC = 0x04: level 0 translation fault
[ 454.427841] Data abort info:
[ 454.430687] ISV = 0, ISS = 0x00000004
[ 454.434484] CM = 0, WnR = 0
[ 454.437404] user pgtable: 4k pages, 48-bit VAs, pgdp=000000000c924000
[ 454.443800] [0000000000000150] pgd=0000000000000000, p4d=0000000000000000
[ 454.450528] Internal error: Oops: 96000004 [#1] PREEMPT SMP
[ 454.456036] Modules linked in:
[ 454.459053] CPU: 1 PID: 267 Comm: kvm-vcpu-0 Not tainted 5.18.0-rc4 #113
[ 454.465697] Hardware name: Hardkernel ODROID-C4 (DT)
[ 454.470612] pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[ 454.477512] pc : kvm_pmu_event_mask.isra.0+0x14/0x74
[ 454.482427] lr : kvm_pmu_set_counter_event_type+0x2c/0x80
[ 454.487775] sp : ffff80000a9839c0
[ 454.491050] x29: ffff80000a9839c0 x28: ffff000000a83a00 x27: 0000000000000000
[ 454.498127] x26: 0000000000000000 x25: 0000000000000000 x24: ffff00000a510000
[ 454.505198] x23: ffff000000a83a00 x22: ffff000003b01000 x21: 0000000000000000
[ 454.512271] x20: 000000000000001f x19: 00000000000003ff x18: 0000000000000000
[ 454.519343] x17: 000000008003fe98 x16: 0000000000000000 x15: 0000000000000000
[ 454.526416] x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000
[ 454.533489] x11: 000000008003fdbc x10: 0000000000009d20 x9 : 000000000000001b
[ 454.540561] x8 : 0000000000000000 x7 : 0000000000000d00 x6 : 0000000000009d00
[ 454.547633] x5 : 0000000000000037 x4 : 0000000000009d00 x3 : 0d09000000000000
[ 454.554705] x2 : 000000000000001f x1 : 0000000000000000 x0 : 0000000000000000
[ 454.561779] Call trace:
[ 454.564191] kvm_pmu_event_mask.isra.0+0x14/0x74
[ 454.568764] kvm_pmu_set_counter_event_type+0x2c/0x80
[ 454.573766] access_pmu_evtyper+0x128/0x170
[ 454.577905] perform_access+0x34/0x80
[ 454.581527] kvm_handle_cp_32+0x13c/0x160
[ 454.585495] kvm_handle_cp15_32+0x1c/0x30
[ 454.589462] handle_exit+0x70/0x180
[ 454.592912] kvm_arch_vcpu_ioctl_run+0x1c4/0x5e0
[ 454.597485] kvm_vcpu_ioctl+0x23c/0x940
[ 454.601280] __arm64_sys_ioctl+0xa8/0xf0
[ 454.605160] invoke_syscall+0x48/0x114
[ 454.608869] el0_svc_common.constprop.0+0xd4/0xfc
[ 454.613527] do_el0_svc+0x28/0x90
[ 454.616803] el0_svc+0x34/0xb0
[ 454.619822] el0t_64_sync_handler+0xa4/0x130
[ 454.624049] el0t_64_sync+0x18c/0x190
[ 454.627675] Code: a9be7bfd 910003fd f9000bf3 52807ff3 (b9415001)
[ 454.633714] ---[ end trace 0000000000000000 ]---
In this particular case, Linux hasn't detected the presence of a hardware
PMU because the PMU node is missing from the DTB, so userspace would have
been unable to set the VCPU PMU feature even if it attempted it. What
happens is that the 32-bit guest reads ID_DFR0, which advertises the
presence of the PMU, and when it tries to program a counter, it triggers
the NULL pointer dereference because kvm->arch.arm_pmu is NULL.
kvm-arch.arm_pmu was introduced by commit 46b187821472 ("KVM: arm64:
Keep a per-VM pointer to the default PMU"). Until that commit, this
error would be triggered instead:
[ 73.388140] ------------[ cut here ]------------
[ 73.388189] Unknown PMU version 0
[ 73.390420] WARNING: CPU: 1 PID: 264 at arch/arm64/kvm/pmu-emul.c:36 kvm_pmu_event_mask.isra.0+0x6c/0x74
[ 73.399821] Modules linked in:
[ 73.402835] CPU: 1 PID: 264 Comm: kvm-vcpu-0 Not tainted 5.17.0 #114
[ 73.409132] Hardware name: Hardkernel ODROID-C4 (DT)
[ 73.414048] pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[ 73.420948] pc : kvm_pmu_event_mask.isra.0+0x6c/0x74
[ 73.425863] lr : kvm_pmu_event_mask.isra.0+0x6c/0x74
[ 73.430779] sp : ffff80000a8db9b0
[ 73.434055] x29: ffff80000a8db9b0 x28: ffff000000dbaac0 x27: 0000000000000000
[ 73.441131] x26: ffff000000dbaac0 x25: 00000000c600000d x24: 0000000000180720
[ 73.448203] x23: ffff800009ffbe10 x22: ffff00000b612000 x21: 0000000000000000
[ 73.455276] x20: 000000000000001f x19: 0000000000000000 x18: ffffffffffffffff
[ 73.462348] x17: 000000008003fe98 x16: 0000000000000000 x15: 0720072007200720
[ 73.469420] x14: 0720072007200720 x13: ffff800009d32488 x12: 00000000000004e6
[ 73.476493] x11: 00000000000001a2 x10: ffff800009d32488 x9 : ffff800009d32488
[ 73.483565] x8 : 00000000ffffefff x7 : ffff800009d8a488 x6 : ffff800009d8a488
[ 73.490638] x5 : ffff0000f461a9d8 x4 : 0000000000000000 x3 : 0000000000000001
[ 73.497710] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff000000dbaac0
[ 73.504784] Call trace:
[ 73.507195] kvm_pmu_event_mask.isra.0+0x6c/0x74
[ 73.511768] kvm_pmu_set_counter_event_type+0x2c/0x80
[ 73.516770] access_pmu_evtyper+0x128/0x16c
[ 73.520910] perform_access+0x34/0x80
[ 73.524532] kvm_handle_cp_32+0x13c/0x160
[ 73.528500] kvm_handle_cp15_32+0x1c/0x30
[ 73.532467] handle_exit+0x70/0x180
[ 73.535917] kvm_arch_vcpu_ioctl_run+0x20c/0x6e0
[ 73.540489] kvm_vcpu_ioctl+0x2b8/0x9e0
[ 73.544283] __arm64_sys_ioctl+0xa8/0xf0
[ 73.548165] invoke_syscall+0x48/0x114
[ 73.551874] el0_svc_common.constprop.0+0xd4/0xfc
[ 73.556531] do_el0_svc+0x28/0x90
[ 73.559808] el0_svc+0x28/0x80
[ 73.562826] el0t_64_sync_handler+0xa4/0x130
[ 73.567054] el0t_64_sync+0x1a0/0x1a4
[ 73.570676] ---[ end trace 0000000000000000 ]---
[ 73.575382] kvm: pmu event creation failed -2
The root cause remains the same: kvm->arch.pmuver was never set to
something sensible because the VCPU feature itself was never set.
The odroid-c4 is somewhat of a special case, because Linux doesn't probe
the PMU. But the above errors can easily be reproduced on any hardware,
with or without a PMU driver, as long as userspace doesn't set the PMU
feature.
Work around the fact that KVM advertises a PMU even when the VCPU feature
is not set by gating all PMU emulation on the feature. The guest can still
access the registers without KVM injecting an undefined exception.
Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220425145530.723858-1-alexandru.elisei@arm.com
2022-04-25 17:55:30 +03:00
|
|
|
if (!kvm_vcpu_has_pmu(vcpu) || !val)
|
2015-09-08 07:26:13 +03:00
|
|
|
return;
|
|
|
|
|
|
|
|
for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
|
2022-11-13 19:38:32 +03:00
|
|
|
struct kvm_pmc *pmc;
|
|
|
|
|
2015-09-08 07:26:13 +03:00
|
|
|
if (!(val & BIT(i)))
|
|
|
|
continue;
|
|
|
|
|
2022-11-13 19:38:32 +03:00
|
|
|
pmc = kvm_vcpu_idx_to_pmc(vcpu, i);
|
2019-06-17 22:01:05 +03:00
|
|
|
|
2015-09-08 07:26:13 +03:00
|
|
|
if (pmc->perf_event)
|
|
|
|
perf_event_disable(pmc->perf_event);
|
|
|
|
}
|
|
|
|
}
|
2015-07-03 09:27:25 +03:00
|
|
|
|
2015-09-08 10:03:26 +03:00
|
|
|
static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
|
|
|
u64 reg = 0;
|
|
|
|
|
2016-03-16 17:38:53 +03:00
|
|
|
if ((__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) {
|
|
|
|
reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0);
|
|
|
|
reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
|
|
|
|
reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1);
|
2016-04-01 14:12:22 +03:00
|
|
|
}
|
2015-09-08 10:03:26 +03:00
|
|
|
|
|
|
|
return reg;
|
|
|
|
}
|
|
|
|
|
2017-07-01 19:26:54 +03:00
|
|
|
static void kvm_pmu_update_state(struct kvm_vcpu *vcpu)
|
2017-06-04 15:44:00 +03:00
|
|
|
{
|
|
|
|
struct kvm_pmu *pmu = &vcpu->arch.pmu;
|
2017-07-01 19:26:54 +03:00
|
|
|
bool overflow;
|
|
|
|
|
2020-11-13 19:41:40 +03:00
|
|
|
if (!kvm_vcpu_has_pmu(vcpu))
|
2017-07-01 19:26:54 +03:00
|
|
|
return;
|
2017-06-04 15:44:00 +03:00
|
|
|
|
2017-07-01 19:26:54 +03:00
|
|
|
overflow = !!kvm_pmu_overflow_status(vcpu);
|
2017-06-04 15:44:00 +03:00
|
|
|
if (pmu->irq_level == overflow)
|
|
|
|
return;
|
|
|
|
|
|
|
|
pmu->irq_level = overflow;
|
|
|
|
|
|
|
|
if (likely(irqchip_in_kernel(vcpu->kvm))) {
|
|
|
|
int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
|
2017-07-01 19:26:54 +03:00
|
|
|
pmu->irq_num, overflow, pmu);
|
2017-06-04 15:44:00 +03:00
|
|
|
WARN_ON(ret);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-02-01 14:51:52 +03:00
|
|
|
bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
|
|
|
struct kvm_pmu *pmu = &vcpu->arch.pmu;
|
|
|
|
struct kvm_sync_regs *sregs = &vcpu->run->s.regs;
|
|
|
|
bool run_level = sregs->device_irq_level & KVM_ARM_DEV_PMU;
|
|
|
|
|
|
|
|
if (likely(irqchip_in_kernel(vcpu->kvm)))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
return pmu->irq_level != run_level;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Reflect the PMU overflow interrupt output level into the kvm_run structure
|
|
|
|
*/
|
|
|
|
void kvm_pmu_update_run(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
|
|
|
struct kvm_sync_regs *regs = &vcpu->run->s.regs;
|
|
|
|
|
|
|
|
/* Populate the timer bitmap for user space */
|
|
|
|
regs->device_irq_level &= ~KVM_ARM_DEV_PMU;
|
|
|
|
if (vcpu->arch.pmu.irq_level)
|
|
|
|
regs->device_irq_level |= KVM_ARM_DEV_PMU;
|
|
|
|
}
|
|
|
|
|
2016-02-26 14:29:19 +03:00
|
|
|
/**
|
|
|
|
* kvm_pmu_flush_hwstate - flush pmu state to cpu
|
|
|
|
* @vcpu: The vcpu pointer
|
|
|
|
*
|
|
|
|
* Check if the PMU has overflowed while we were running in the host, and inject
|
|
|
|
* an interrupt if that was the case.
|
|
|
|
*/
|
|
|
|
void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
|
|
|
kvm_pmu_update_state(vcpu);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* kvm_pmu_sync_hwstate - sync pmu state from cpu
|
|
|
|
* @vcpu: The vcpu pointer
|
|
|
|
*
|
|
|
|
* Check if the PMU has overflowed while we were running in the guest, and
|
|
|
|
* inject an interrupt if that was the case.
|
|
|
|
*/
|
|
|
|
void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
|
|
|
kvm_pmu_update_state(vcpu);
|
|
|
|
}
|
|
|
|
|
2020-09-24 14:07:04 +03:00
|
|
|
/**
|
|
|
|
* When perf interrupt is an NMI, we cannot safely notify the vcpu corresponding
|
|
|
|
* to the event.
|
|
|
|
* This is why we need a callback to do it once outside of the NMI context.
|
|
|
|
*/
|
|
|
|
static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work)
|
|
|
|
{
|
|
|
|
struct kvm_vcpu *vcpu;
|
|
|
|
|
2022-11-13 19:38:31 +03:00
|
|
|
vcpu = container_of(work, struct kvm_vcpu, arch.pmu.overflow_work);
|
2020-09-24 14:07:04 +03:00
|
|
|
kvm_vcpu_kick(vcpu);
|
|
|
|
}
|
|
|
|
|
2022-11-13 19:38:18 +03:00
|
|
|
/*
|
|
|
|
* Perform an increment on any of the counters described in @mask,
|
|
|
|
* generating the overflow if required, and propagate it as a chained
|
|
|
|
* event if possible.
|
|
|
|
*/
|
|
|
|
static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu,
|
|
|
|
unsigned long mask, u32 event)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E))
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* Weed out disabled counters */
|
|
|
|
mask &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
|
|
|
|
|
|
|
|
for_each_set_bit(i, &mask, ARMV8_PMU_CYCLE_IDX) {
|
2022-11-13 19:38:32 +03:00
|
|
|
struct kvm_pmc *pmc = kvm_vcpu_idx_to_pmc(vcpu, i);
|
2022-11-13 19:38:18 +03:00
|
|
|
u64 type, reg;
|
|
|
|
|
|
|
|
/* Filter on event type */
|
2022-11-13 19:38:23 +03:00
|
|
|
type = __vcpu_sys_reg(vcpu, counter_index_to_evtreg(i));
|
2022-11-13 19:38:18 +03:00
|
|
|
type &= kvm_pmu_event_mask(vcpu->kvm);
|
|
|
|
if (type != event)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* Increment this counter */
|
2022-11-13 19:38:23 +03:00
|
|
|
reg = __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) + 1;
|
2022-11-13 19:38:32 +03:00
|
|
|
if (!kvm_pmc_is_64bit(pmc))
|
2022-11-13 19:38:22 +03:00
|
|
|
reg = lower_32_bits(reg);
|
2022-11-13 19:38:23 +03:00
|
|
|
__vcpu_sys_reg(vcpu, counter_index_to_reg(i)) = reg;
|
2022-11-13 19:38:18 +03:00
|
|
|
|
2022-11-13 19:38:21 +03:00
|
|
|
/* No overflow? move on */
|
2022-11-13 19:38:32 +03:00
|
|
|
if (kvm_pmc_has_64bit_overflow(pmc) ? reg : lower_32_bits(reg))
|
2022-11-13 19:38:18 +03:00
|
|
|
continue;
|
|
|
|
|
|
|
|
/* Mark overflow */
|
|
|
|
__vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
|
|
|
|
|
2022-11-13 19:38:32 +03:00
|
|
|
if (kvm_pmu_counter_can_chain(pmc))
|
2022-11-13 19:38:18 +03:00
|
|
|
kvm_pmu_counter_increment(vcpu, BIT(i + 1),
|
|
|
|
ARMV8_PMUV3_PERFCTR_CHAIN);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-13 19:38:20 +03:00
|
|
|
/* Compute the sample period for a given counter value */
|
2022-11-13 19:38:32 +03:00
|
|
|
static u64 compute_period(struct kvm_pmc *pmc, u64 counter)
|
2022-11-13 19:38:20 +03:00
|
|
|
{
|
|
|
|
u64 val;
|
|
|
|
|
2022-12-05 15:05:51 +03:00
|
|
|
if (kvm_pmc_is_64bit(pmc) && kvm_pmc_has_64bit_overflow(pmc))
|
|
|
|
val = (-counter) & GENMASK(63, 0);
|
|
|
|
else
|
2022-11-13 19:38:20 +03:00
|
|
|
val = (-counter) & GENMASK(31, 0);
|
|
|
|
|
|
|
|
return val;
|
|
|
|
}
|
|
|
|
|
2016-02-26 14:29:19 +03:00
|
|
|
/**
|
2017-07-01 19:26:54 +03:00
|
|
|
* When the perf event overflows, set the overflow status and inform the vcpu.
|
2016-02-26 14:29:19 +03:00
|
|
|
*/
|
|
|
|
static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
|
|
|
|
struct perf_sample_data *data,
|
|
|
|
struct pt_regs *regs)
|
|
|
|
{
|
|
|
|
struct kvm_pmc *pmc = perf_event->overflow_handler_context;
|
2019-10-06 12:28:50 +03:00
|
|
|
struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu);
|
2016-02-26 14:29:19 +03:00
|
|
|
struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
|
|
|
|
int idx = pmc->idx;
|
2019-10-06 12:28:50 +03:00
|
|
|
u64 period;
|
|
|
|
|
|
|
|
cpu_pmu->pmu.stop(perf_event, PERF_EF_UPDATE);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Reset the sample period to the architectural limit,
|
|
|
|
* i.e. the point where the counter overflows.
|
|
|
|
*/
|
2022-11-13 19:38:32 +03:00
|
|
|
period = compute_period(pmc, local64_read(&perf_event->count));
|
2019-10-06 12:28:50 +03:00
|
|
|
|
|
|
|
local64_set(&perf_event->hw.period_left, 0);
|
|
|
|
perf_event->attr.sample_period = period;
|
|
|
|
perf_event->hw.sample_period = period;
|
2016-02-26 14:29:19 +03:00
|
|
|
|
2016-03-16 17:38:53 +03:00
|
|
|
__vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx);
|
2017-07-01 19:26:54 +03:00
|
|
|
|
2022-11-13 19:38:32 +03:00
|
|
|
if (kvm_pmu_counter_can_chain(pmc))
|
2022-11-13 19:38:18 +03:00
|
|
|
kvm_pmu_counter_increment(vcpu, BIT(idx + 1),
|
|
|
|
ARMV8_PMUV3_PERFCTR_CHAIN);
|
|
|
|
|
2017-07-01 19:26:54 +03:00
|
|
|
if (kvm_pmu_overflow_status(vcpu)) {
|
|
|
|
kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
|
2020-09-24 14:07:04 +03:00
|
|
|
|
|
|
|
if (!in_nmi())
|
|
|
|
kvm_vcpu_kick(vcpu);
|
|
|
|
else
|
|
|
|
irq_work_queue(&vcpu->arch.pmu.overflow_work);
|
2017-07-01 19:26:54 +03:00
|
|
|
}
|
2019-10-06 12:28:50 +03:00
|
|
|
|
|
|
|
cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD);
|
2016-02-26 14:29:19 +03:00
|
|
|
}
|
|
|
|
|
2015-09-08 10:49:39 +03:00
|
|
|
/**
|
|
|
|
* kvm_pmu_software_increment - do software increment
|
|
|
|
* @vcpu: The vcpu pointer
|
|
|
|
* @val: the value guest writes to PMSWINC register
|
|
|
|
*/
|
|
|
|
void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
|
|
|
|
{
|
2022-11-13 19:38:18 +03:00
|
|
|
kvm_pmu_counter_increment(vcpu, val, ARMV8_PMUV3_PERFCTR_SW_INCR);
|
2015-09-08 10:49:39 +03:00
|
|
|
}
|
|
|
|
|
2015-10-28 07:10:30 +03:00
|
|
|
/**
|
|
|
|
* kvm_pmu_handle_pmcr - handle PMCR register
|
|
|
|
* @vcpu: The vcpu pointer
|
|
|
|
* @val: the value guest writes to PMCR register
|
|
|
|
*/
|
|
|
|
void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
KVM/arm64: Don't emulate a PMU for 32-bit guests if feature not set
kvm->arch.arm_pmu is set when userspace attempts to set the first PMU
attribute. As certain attributes are mandatory, arm_pmu ends up always
being set to a valid arm_pmu, otherwise KVM will refuse to run the VCPU.
However, this only happens if the VCPU has the PMU feature. If the VCPU
doesn't have the feature bit set, kvm->arch.arm_pmu will be left
uninitialized and equal to NULL.
KVM doesn't do ID register emulation for 32-bit guests and accesses to the
PMU registers aren't gated by the pmu_visibility() function. This is done
to prevent injecting unexpected undefined exceptions in guests which have
detected the presence of a hardware PMU. But even though the VCPU feature
is missing, KVM still attempts to emulate certain aspects of the PMU when
PMU registers are accessed. This leads to a NULL pointer dereference like
this one, which happens on an odroid-c4 board when running the
kvm-unit-tests pmu-cycle-counter test with kvmtool and without the PMU
feature being set:
[ 454.402699] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000150
[ 454.405865] Mem abort info:
[ 454.408596] ESR = 0x96000004
[ 454.411638] EC = 0x25: DABT (current EL), IL = 32 bits
[ 454.416901] SET = 0, FnV = 0
[ 454.419909] EA = 0, S1PTW = 0
[ 454.423010] FSC = 0x04: level 0 translation fault
[ 454.427841] Data abort info:
[ 454.430687] ISV = 0, ISS = 0x00000004
[ 454.434484] CM = 0, WnR = 0
[ 454.437404] user pgtable: 4k pages, 48-bit VAs, pgdp=000000000c924000
[ 454.443800] [0000000000000150] pgd=0000000000000000, p4d=0000000000000000
[ 454.450528] Internal error: Oops: 96000004 [#1] PREEMPT SMP
[ 454.456036] Modules linked in:
[ 454.459053] CPU: 1 PID: 267 Comm: kvm-vcpu-0 Not tainted 5.18.0-rc4 #113
[ 454.465697] Hardware name: Hardkernel ODROID-C4 (DT)
[ 454.470612] pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[ 454.477512] pc : kvm_pmu_event_mask.isra.0+0x14/0x74
[ 454.482427] lr : kvm_pmu_set_counter_event_type+0x2c/0x80
[ 454.487775] sp : ffff80000a9839c0
[ 454.491050] x29: ffff80000a9839c0 x28: ffff000000a83a00 x27: 0000000000000000
[ 454.498127] x26: 0000000000000000 x25: 0000000000000000 x24: ffff00000a510000
[ 454.505198] x23: ffff000000a83a00 x22: ffff000003b01000 x21: 0000000000000000
[ 454.512271] x20: 000000000000001f x19: 00000000000003ff x18: 0000000000000000
[ 454.519343] x17: 000000008003fe98 x16: 0000000000000000 x15: 0000000000000000
[ 454.526416] x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000
[ 454.533489] x11: 000000008003fdbc x10: 0000000000009d20 x9 : 000000000000001b
[ 454.540561] x8 : 0000000000000000 x7 : 0000000000000d00 x6 : 0000000000009d00
[ 454.547633] x5 : 0000000000000037 x4 : 0000000000009d00 x3 : 0d09000000000000
[ 454.554705] x2 : 000000000000001f x1 : 0000000000000000 x0 : 0000000000000000
[ 454.561779] Call trace:
[ 454.564191] kvm_pmu_event_mask.isra.0+0x14/0x74
[ 454.568764] kvm_pmu_set_counter_event_type+0x2c/0x80
[ 454.573766] access_pmu_evtyper+0x128/0x170
[ 454.577905] perform_access+0x34/0x80
[ 454.581527] kvm_handle_cp_32+0x13c/0x160
[ 454.585495] kvm_handle_cp15_32+0x1c/0x30
[ 454.589462] handle_exit+0x70/0x180
[ 454.592912] kvm_arch_vcpu_ioctl_run+0x1c4/0x5e0
[ 454.597485] kvm_vcpu_ioctl+0x23c/0x940
[ 454.601280] __arm64_sys_ioctl+0xa8/0xf0
[ 454.605160] invoke_syscall+0x48/0x114
[ 454.608869] el0_svc_common.constprop.0+0xd4/0xfc
[ 454.613527] do_el0_svc+0x28/0x90
[ 454.616803] el0_svc+0x34/0xb0
[ 454.619822] el0t_64_sync_handler+0xa4/0x130
[ 454.624049] el0t_64_sync+0x18c/0x190
[ 454.627675] Code: a9be7bfd 910003fd f9000bf3 52807ff3 (b9415001)
[ 454.633714] ---[ end trace 0000000000000000 ]---
In this particular case, Linux hasn't detected the presence of a hardware
PMU because the PMU node is missing from the DTB, so userspace would have
been unable to set the VCPU PMU feature even if it attempted it. What
happens is that the 32-bit guest reads ID_DFR0, which advertises the
presence of the PMU, and when it tries to program a counter, it triggers
the NULL pointer dereference because kvm->arch.arm_pmu is NULL.
kvm-arch.arm_pmu was introduced by commit 46b187821472 ("KVM: arm64:
Keep a per-VM pointer to the default PMU"). Until that commit, this
error would be triggered instead:
[ 73.388140] ------------[ cut here ]------------
[ 73.388189] Unknown PMU version 0
[ 73.390420] WARNING: CPU: 1 PID: 264 at arch/arm64/kvm/pmu-emul.c:36 kvm_pmu_event_mask.isra.0+0x6c/0x74
[ 73.399821] Modules linked in:
[ 73.402835] CPU: 1 PID: 264 Comm: kvm-vcpu-0 Not tainted 5.17.0 #114
[ 73.409132] Hardware name: Hardkernel ODROID-C4 (DT)
[ 73.414048] pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[ 73.420948] pc : kvm_pmu_event_mask.isra.0+0x6c/0x74
[ 73.425863] lr : kvm_pmu_event_mask.isra.0+0x6c/0x74
[ 73.430779] sp : ffff80000a8db9b0
[ 73.434055] x29: ffff80000a8db9b0 x28: ffff000000dbaac0 x27: 0000000000000000
[ 73.441131] x26: ffff000000dbaac0 x25: 00000000c600000d x24: 0000000000180720
[ 73.448203] x23: ffff800009ffbe10 x22: ffff00000b612000 x21: 0000000000000000
[ 73.455276] x20: 000000000000001f x19: 0000000000000000 x18: ffffffffffffffff
[ 73.462348] x17: 000000008003fe98 x16: 0000000000000000 x15: 0720072007200720
[ 73.469420] x14: 0720072007200720 x13: ffff800009d32488 x12: 00000000000004e6
[ 73.476493] x11: 00000000000001a2 x10: ffff800009d32488 x9 : ffff800009d32488
[ 73.483565] x8 : 00000000ffffefff x7 : ffff800009d8a488 x6 : ffff800009d8a488
[ 73.490638] x5 : ffff0000f461a9d8 x4 : 0000000000000000 x3 : 0000000000000001
[ 73.497710] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff000000dbaac0
[ 73.504784] Call trace:
[ 73.507195] kvm_pmu_event_mask.isra.0+0x6c/0x74
[ 73.511768] kvm_pmu_set_counter_event_type+0x2c/0x80
[ 73.516770] access_pmu_evtyper+0x128/0x16c
[ 73.520910] perform_access+0x34/0x80
[ 73.524532] kvm_handle_cp_32+0x13c/0x160
[ 73.528500] kvm_handle_cp15_32+0x1c/0x30
[ 73.532467] handle_exit+0x70/0x180
[ 73.535917] kvm_arch_vcpu_ioctl_run+0x20c/0x6e0
[ 73.540489] kvm_vcpu_ioctl+0x2b8/0x9e0
[ 73.544283] __arm64_sys_ioctl+0xa8/0xf0
[ 73.548165] invoke_syscall+0x48/0x114
[ 73.551874] el0_svc_common.constprop.0+0xd4/0xfc
[ 73.556531] do_el0_svc+0x28/0x90
[ 73.559808] el0_svc+0x28/0x80
[ 73.562826] el0t_64_sync_handler+0xa4/0x130
[ 73.567054] el0t_64_sync+0x1a0/0x1a4
[ 73.570676] ---[ end trace 0000000000000000 ]---
[ 73.575382] kvm: pmu event creation failed -2
The root cause remains the same: kvm->arch.pmuver was never set to
something sensible because the VCPU feature itself was never set.
The odroid-c4 is somewhat of a special case, because Linux doesn't probe
the PMU. But the above errors can easily be reproduced on any hardware,
with or without a PMU driver, as long as userspace doesn't set the PMU
feature.
Work around the fact that KVM advertises a PMU even when the VCPU feature
is not set by gating all PMU emulation on the feature. The guest can still
access the registers without KVM injecting an undefined exception.
Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220425145530.723858-1-alexandru.elisei@arm.com
2022-04-25 17:55:30 +03:00
|
|
|
if (!kvm_vcpu_has_pmu(vcpu))
|
|
|
|
return;
|
|
|
|
|
2022-11-24 13:44:59 +03:00
|
|
|
/* Fixup PMCR_EL0 to reconcile the PMU version and the LP bit */
|
|
|
|
if (!kvm_pmu_is_3p5(vcpu))
|
|
|
|
val &= ~ARMV8_PMU_PMCR_LP;
|
|
|
|
|
2023-03-13 06:32:34 +03:00
|
|
|
/* The reset bits don't indicate any state, and shouldn't be saved. */
|
|
|
|
__vcpu_sys_reg(vcpu, PMCR_EL0) = val & ~(ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_P);
|
2022-11-24 13:44:59 +03:00
|
|
|
|
2015-10-28 07:10:30 +03:00
|
|
|
if (val & ARMV8_PMU_PMCR_E) {
|
2019-06-17 22:01:01 +03:00
|
|
|
kvm_pmu_enable_counter_mask(vcpu,
|
2021-07-19 15:39:00 +03:00
|
|
|
__vcpu_sys_reg(vcpu, PMCNTENSET_EL0));
|
2015-10-28 07:10:30 +03:00
|
|
|
} else {
|
2021-07-19 15:39:01 +03:00
|
|
|
kvm_pmu_disable_counter_mask(vcpu,
|
|
|
|
__vcpu_sys_reg(vcpu, PMCNTENSET_EL0));
|
2015-10-28 07:10:30 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
if (val & ARMV8_PMU_PMCR_C)
|
|
|
|
kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0);
|
|
|
|
|
|
|
|
if (val & ARMV8_PMU_PMCR_P) {
|
2021-07-19 15:39:01 +03:00
|
|
|
unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
|
2021-06-18 13:51:39 +03:00
|
|
|
mask &= ~BIT(ARMV8_PMU_CYCLE_IDX);
|
2020-01-24 17:25:35 +03:00
|
|
|
for_each_set_bit(i, &mask, 32)
|
2022-11-13 19:38:32 +03:00
|
|
|
kvm_pmu_set_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, i), 0, true);
|
2015-10-28 07:10:30 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-13 19:38:32 +03:00
|
|
|
static bool kvm_pmu_counter_is_enabled(struct kvm_pmc *pmc)
|
2015-07-03 09:27:25 +03:00
|
|
|
{
|
2022-11-13 19:38:32 +03:00
|
|
|
struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
|
2016-03-16 17:38:53 +03:00
|
|
|
return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) &&
|
2022-11-13 19:38:32 +03:00
|
|
|
(__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(pmc->idx));
|
2015-07-03 09:27:25 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2019-06-17 22:01:03 +03:00
|
|
|
* kvm_pmu_create_perf_event - create a perf event for a counter
|
2022-11-13 19:38:32 +03:00
|
|
|
* @pmc: Counter context
|
2015-07-03 09:27:25 +03:00
|
|
|
*/
|
2022-11-13 19:38:32 +03:00
|
|
|
static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc)
|
2015-07-03 09:27:25 +03:00
|
|
|
{
|
2022-11-13 19:38:32 +03:00
|
|
|
struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
|
2022-01-27 19:17:56 +03:00
|
|
|
struct arm_pmu *arm_pmu = vcpu->kvm->arch.arm_pmu;
|
2015-07-03 09:27:25 +03:00
|
|
|
struct perf_event *event;
|
|
|
|
struct perf_event_attr attr;
|
2022-11-13 19:38:32 +03:00
|
|
|
u64 eventsel, reg, data;
|
2019-06-17 22:01:03 +03:00
|
|
|
|
2022-11-13 19:38:32 +03:00
|
|
|
reg = counter_index_to_evtreg(pmc->idx);
|
2019-06-17 22:01:03 +03:00
|
|
|
data = __vcpu_sys_reg(vcpu, reg);
|
2015-07-03 09:27:25 +03:00
|
|
|
|
2022-11-13 19:38:32 +03:00
|
|
|
kvm_pmu_stop_counter(pmc);
|
2020-02-12 14:31:02 +03:00
|
|
|
if (pmc->idx == ARMV8_PMU_CYCLE_IDX)
|
|
|
|
eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
|
|
|
|
else
|
|
|
|
eventsel = data & kvm_pmu_event_mask(vcpu->kvm);
|
|
|
|
|
2022-11-13 19:38:18 +03:00
|
|
|
/*
|
|
|
|
* Neither SW increment nor chained events need to be backed
|
|
|
|
* by a perf event.
|
|
|
|
*/
|
|
|
|
if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR ||
|
|
|
|
eventsel == ARMV8_PMUV3_PERFCTR_CHAIN)
|
2020-02-12 14:31:02 +03:00
|
|
|
return;
|
2015-07-03 09:27:25 +03:00
|
|
|
|
2020-02-12 14:31:02 +03:00
|
|
|
/*
|
|
|
|
* If we have a filter in place and that the event isn't allowed, do
|
|
|
|
* not install a perf event either.
|
|
|
|
*/
|
|
|
|
if (vcpu->kvm->arch.pmu_filter &&
|
|
|
|
!test_bit(eventsel, vcpu->kvm->arch.pmu_filter))
|
2015-09-08 10:49:39 +03:00
|
|
|
return;
|
|
|
|
|
2015-07-03 09:27:25 +03:00
|
|
|
memset(&attr, 0, sizeof(struct perf_event_attr));
|
2022-01-27 19:17:56 +03:00
|
|
|
attr.type = arm_pmu->pmu.type;
|
2015-07-03 09:27:25 +03:00
|
|
|
attr.size = sizeof(attr);
|
|
|
|
attr.pinned = 1;
|
2022-11-13 19:38:32 +03:00
|
|
|
attr.disabled = !kvm_pmu_counter_is_enabled(pmc);
|
2015-07-03 09:27:25 +03:00
|
|
|
attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0;
|
|
|
|
attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
|
|
|
|
attr.exclude_hv = 1; /* Don't count EL2 events */
|
|
|
|
attr.exclude_host = 1; /* Don't count host events */
|
2020-02-12 14:31:02 +03:00
|
|
|
attr.config = eventsel;
|
2015-07-03 09:27:25 +03:00
|
|
|
|
2022-11-13 19:38:18 +03:00
|
|
|
/*
|
|
|
|
* If counting with a 64bit counter, advertise it to the perf
|
2022-11-13 19:38:20 +03:00
|
|
|
* code, carefully dealing with the initial sample period
|
|
|
|
* which also depends on the overflow.
|
2022-11-13 19:38:18 +03:00
|
|
|
*/
|
2022-11-13 19:38:32 +03:00
|
|
|
if (kvm_pmc_is_64bit(pmc))
|
2022-11-13 19:38:18 +03:00
|
|
|
attr.config1 |= PERF_ATTR_CFG1_COUNTER_64BIT;
|
2022-11-13 19:38:20 +03:00
|
|
|
|
2022-11-13 19:38:32 +03:00
|
|
|
attr.sample_period = compute_period(pmc, kvm_pmu_get_pmc_value(pmc));
|
2015-07-03 09:27:25 +03:00
|
|
|
|
2022-11-13 19:38:18 +03:00
|
|
|
event = perf_event_create_kernel_counter(&attr, -1, current,
|
2016-02-26 14:29:19 +03:00
|
|
|
kvm_pmu_perf_overflow, pmc);
|
2019-06-17 22:01:05 +03:00
|
|
|
|
2015-07-03 09:27:25 +03:00
|
|
|
if (IS_ERR(event)) {
|
|
|
|
pr_err_once("kvm: pmu event creation failed %ld\n",
|
|
|
|
PTR_ERR(event));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
pmc->perf_event = event;
|
|
|
|
}
|
2016-01-11 17:46:15 +03:00
|
|
|
|
2019-06-17 22:01:03 +03:00
|
|
|
/**
|
|
|
|
* kvm_pmu_set_counter_event_type - set selected counter to monitor some event
|
|
|
|
* @vcpu: The vcpu pointer
|
|
|
|
* @data: The data guest writes to PMXEVTYPER_EL0
|
|
|
|
* @select_idx: The number of selected counter
|
|
|
|
*
|
|
|
|
* When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an
|
|
|
|
* event with given hardware event number. Here we call perf_event API to
|
|
|
|
* emulate this action and create a kernel perf event for it.
|
|
|
|
*/
|
|
|
|
void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
|
|
|
|
u64 select_idx)
|
|
|
|
{
|
2022-11-13 19:38:32 +03:00
|
|
|
struct kvm_pmc *pmc = kvm_vcpu_idx_to_pmc(vcpu, select_idx);
|
2020-03-17 14:11:56 +03:00
|
|
|
u64 reg, mask;
|
|
|
|
|
KVM/arm64: Don't emulate a PMU for 32-bit guests if feature not set
kvm->arch.arm_pmu is set when userspace attempts to set the first PMU
attribute. As certain attributes are mandatory, arm_pmu ends up always
being set to a valid arm_pmu, otherwise KVM will refuse to run the VCPU.
However, this only happens if the VCPU has the PMU feature. If the VCPU
doesn't have the feature bit set, kvm->arch.arm_pmu will be left
uninitialized and equal to NULL.
KVM doesn't do ID register emulation for 32-bit guests and accesses to the
PMU registers aren't gated by the pmu_visibility() function. This is done
to prevent injecting unexpected undefined exceptions in guests which have
detected the presence of a hardware PMU. But even though the VCPU feature
is missing, KVM still attempts to emulate certain aspects of the PMU when
PMU registers are accessed. This leads to a NULL pointer dereference like
this one, which happens on an odroid-c4 board when running the
kvm-unit-tests pmu-cycle-counter test with kvmtool and without the PMU
feature being set:
[ 454.402699] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000150
[ 454.405865] Mem abort info:
[ 454.408596] ESR = 0x96000004
[ 454.411638] EC = 0x25: DABT (current EL), IL = 32 bits
[ 454.416901] SET = 0, FnV = 0
[ 454.419909] EA = 0, S1PTW = 0
[ 454.423010] FSC = 0x04: level 0 translation fault
[ 454.427841] Data abort info:
[ 454.430687] ISV = 0, ISS = 0x00000004
[ 454.434484] CM = 0, WnR = 0
[ 454.437404] user pgtable: 4k pages, 48-bit VAs, pgdp=000000000c924000
[ 454.443800] [0000000000000150] pgd=0000000000000000, p4d=0000000000000000
[ 454.450528] Internal error: Oops: 96000004 [#1] PREEMPT SMP
[ 454.456036] Modules linked in:
[ 454.459053] CPU: 1 PID: 267 Comm: kvm-vcpu-0 Not tainted 5.18.0-rc4 #113
[ 454.465697] Hardware name: Hardkernel ODROID-C4 (DT)
[ 454.470612] pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[ 454.477512] pc : kvm_pmu_event_mask.isra.0+0x14/0x74
[ 454.482427] lr : kvm_pmu_set_counter_event_type+0x2c/0x80
[ 454.487775] sp : ffff80000a9839c0
[ 454.491050] x29: ffff80000a9839c0 x28: ffff000000a83a00 x27: 0000000000000000
[ 454.498127] x26: 0000000000000000 x25: 0000000000000000 x24: ffff00000a510000
[ 454.505198] x23: ffff000000a83a00 x22: ffff000003b01000 x21: 0000000000000000
[ 454.512271] x20: 000000000000001f x19: 00000000000003ff x18: 0000000000000000
[ 454.519343] x17: 000000008003fe98 x16: 0000000000000000 x15: 0000000000000000
[ 454.526416] x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000
[ 454.533489] x11: 000000008003fdbc x10: 0000000000009d20 x9 : 000000000000001b
[ 454.540561] x8 : 0000000000000000 x7 : 0000000000000d00 x6 : 0000000000009d00
[ 454.547633] x5 : 0000000000000037 x4 : 0000000000009d00 x3 : 0d09000000000000
[ 454.554705] x2 : 000000000000001f x1 : 0000000000000000 x0 : 0000000000000000
[ 454.561779] Call trace:
[ 454.564191] kvm_pmu_event_mask.isra.0+0x14/0x74
[ 454.568764] kvm_pmu_set_counter_event_type+0x2c/0x80
[ 454.573766] access_pmu_evtyper+0x128/0x170
[ 454.577905] perform_access+0x34/0x80
[ 454.581527] kvm_handle_cp_32+0x13c/0x160
[ 454.585495] kvm_handle_cp15_32+0x1c/0x30
[ 454.589462] handle_exit+0x70/0x180
[ 454.592912] kvm_arch_vcpu_ioctl_run+0x1c4/0x5e0
[ 454.597485] kvm_vcpu_ioctl+0x23c/0x940
[ 454.601280] __arm64_sys_ioctl+0xa8/0xf0
[ 454.605160] invoke_syscall+0x48/0x114
[ 454.608869] el0_svc_common.constprop.0+0xd4/0xfc
[ 454.613527] do_el0_svc+0x28/0x90
[ 454.616803] el0_svc+0x34/0xb0
[ 454.619822] el0t_64_sync_handler+0xa4/0x130
[ 454.624049] el0t_64_sync+0x18c/0x190
[ 454.627675] Code: a9be7bfd 910003fd f9000bf3 52807ff3 (b9415001)
[ 454.633714] ---[ end trace 0000000000000000 ]---
In this particular case, Linux hasn't detected the presence of a hardware
PMU because the PMU node is missing from the DTB, so userspace would have
been unable to set the VCPU PMU feature even if it attempted it. What
happens is that the 32-bit guest reads ID_DFR0, which advertises the
presence of the PMU, and when it tries to program a counter, it triggers
the NULL pointer dereference because kvm->arch.arm_pmu is NULL.
kvm-arch.arm_pmu was introduced by commit 46b187821472 ("KVM: arm64:
Keep a per-VM pointer to the default PMU"). Until that commit, this
error would be triggered instead:
[ 73.388140] ------------[ cut here ]------------
[ 73.388189] Unknown PMU version 0
[ 73.390420] WARNING: CPU: 1 PID: 264 at arch/arm64/kvm/pmu-emul.c:36 kvm_pmu_event_mask.isra.0+0x6c/0x74
[ 73.399821] Modules linked in:
[ 73.402835] CPU: 1 PID: 264 Comm: kvm-vcpu-0 Not tainted 5.17.0 #114
[ 73.409132] Hardware name: Hardkernel ODROID-C4 (DT)
[ 73.414048] pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[ 73.420948] pc : kvm_pmu_event_mask.isra.0+0x6c/0x74
[ 73.425863] lr : kvm_pmu_event_mask.isra.0+0x6c/0x74
[ 73.430779] sp : ffff80000a8db9b0
[ 73.434055] x29: ffff80000a8db9b0 x28: ffff000000dbaac0 x27: 0000000000000000
[ 73.441131] x26: ffff000000dbaac0 x25: 00000000c600000d x24: 0000000000180720
[ 73.448203] x23: ffff800009ffbe10 x22: ffff00000b612000 x21: 0000000000000000
[ 73.455276] x20: 000000000000001f x19: 0000000000000000 x18: ffffffffffffffff
[ 73.462348] x17: 000000008003fe98 x16: 0000000000000000 x15: 0720072007200720
[ 73.469420] x14: 0720072007200720 x13: ffff800009d32488 x12: 00000000000004e6
[ 73.476493] x11: 00000000000001a2 x10: ffff800009d32488 x9 : ffff800009d32488
[ 73.483565] x8 : 00000000ffffefff x7 : ffff800009d8a488 x6 : ffff800009d8a488
[ 73.490638] x5 : ffff0000f461a9d8 x4 : 0000000000000000 x3 : 0000000000000001
[ 73.497710] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff000000dbaac0
[ 73.504784] Call trace:
[ 73.507195] kvm_pmu_event_mask.isra.0+0x6c/0x74
[ 73.511768] kvm_pmu_set_counter_event_type+0x2c/0x80
[ 73.516770] access_pmu_evtyper+0x128/0x16c
[ 73.520910] perform_access+0x34/0x80
[ 73.524532] kvm_handle_cp_32+0x13c/0x160
[ 73.528500] kvm_handle_cp15_32+0x1c/0x30
[ 73.532467] handle_exit+0x70/0x180
[ 73.535917] kvm_arch_vcpu_ioctl_run+0x20c/0x6e0
[ 73.540489] kvm_vcpu_ioctl+0x2b8/0x9e0
[ 73.544283] __arm64_sys_ioctl+0xa8/0xf0
[ 73.548165] invoke_syscall+0x48/0x114
[ 73.551874] el0_svc_common.constprop.0+0xd4/0xfc
[ 73.556531] do_el0_svc+0x28/0x90
[ 73.559808] el0_svc+0x28/0x80
[ 73.562826] el0t_64_sync_handler+0xa4/0x130
[ 73.567054] el0t_64_sync+0x1a0/0x1a4
[ 73.570676] ---[ end trace 0000000000000000 ]---
[ 73.575382] kvm: pmu event creation failed -2
The root cause remains the same: kvm->arch.pmuver was never set to
something sensible because the VCPU feature itself was never set.
The odroid-c4 is somewhat of a special case, because Linux doesn't probe
the PMU. But the above errors can easily be reproduced on any hardware,
with or without a PMU driver, as long as userspace doesn't set the PMU
feature.
Work around the fact that KVM advertises a PMU even when the VCPU feature
is not set by gating all PMU emulation on the feature. The guest can still
access the registers without KVM injecting an undefined exception.
Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220425145530.723858-1-alexandru.elisei@arm.com
2022-04-25 17:55:30 +03:00
|
|
|
if (!kvm_vcpu_has_pmu(vcpu))
|
|
|
|
return;
|
|
|
|
|
2020-03-17 14:11:56 +03:00
|
|
|
mask = ARMV8_PMU_EVTYPE_MASK;
|
|
|
|
mask &= ~ARMV8_PMU_EVTYPE_EVENT;
|
|
|
|
mask |= kvm_pmu_event_mask(vcpu->kvm);
|
2019-06-17 22:01:03 +03:00
|
|
|
|
2022-11-13 19:38:32 +03:00
|
|
|
reg = counter_index_to_evtreg(pmc->idx);
|
2019-06-17 22:01:03 +03:00
|
|
|
|
2020-03-17 14:11:56 +03:00
|
|
|
__vcpu_sys_reg(vcpu, reg) = data & mask;
|
2019-06-17 22:01:05 +03:00
|
|
|
|
2022-11-13 19:38:32 +03:00
|
|
|
kvm_pmu_create_perf_event(pmc);
|
2019-06-17 22:01:03 +03:00
|
|
|
}
|
|
|
|
|
2021-09-19 16:09:49 +03:00
|
|
|
void kvm_host_pmu_init(struct arm_pmu *pmu)
|
|
|
|
{
|
2022-01-27 19:17:57 +03:00
|
|
|
struct arm_pmu_entry *entry;
|
|
|
|
|
2022-11-28 16:56:29 +03:00
|
|
|
if (pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_NI ||
|
|
|
|
pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
|
2022-01-27 19:17:57 +03:00
|
|
|
return;
|
|
|
|
|
|
|
|
mutex_lock(&arm_pmus_lock);
|
|
|
|
|
|
|
|
entry = kmalloc(sizeof(*entry), GFP_KERNEL);
|
|
|
|
if (!entry)
|
|
|
|
goto out_unlock;
|
|
|
|
|
|
|
|
entry->arm_pmu = pmu;
|
|
|
|
list_add_tail(&entry->entry, &arm_pmus);
|
|
|
|
|
|
|
|
if (list_is_singular(&arm_pmus))
|
2021-09-19 16:09:49 +03:00
|
|
|
static_branch_enable(&kvm_arm_pmu_available);
|
2022-01-27 19:17:57 +03:00
|
|
|
|
|
|
|
out_unlock:
|
|
|
|
mutex_unlock(&arm_pmus_lock);
|
2021-09-19 16:09:49 +03:00
|
|
|
}
|
|
|
|
|
2022-01-27 19:17:56 +03:00
|
|
|
static struct arm_pmu *kvm_pmu_probe_armpmu(void)
|
2020-03-17 14:11:56 +03:00
|
|
|
{
|
|
|
|
struct perf_event_attr attr = { };
|
|
|
|
struct perf_event *event;
|
2022-01-27 19:17:56 +03:00
|
|
|
struct arm_pmu *pmu = NULL;
|
2020-03-17 14:11:56 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Create a dummy event that only counts user cycles. As we'll never
|
|
|
|
* leave this function with the event being live, it will never
|
|
|
|
* count anything. But it allows us to probe some of the PMU
|
|
|
|
* details. Yes, this is terrible.
|
|
|
|
*/
|
|
|
|
attr.type = PERF_TYPE_RAW;
|
|
|
|
attr.size = sizeof(attr);
|
|
|
|
attr.pinned = 1;
|
|
|
|
attr.disabled = 0;
|
|
|
|
attr.exclude_user = 0;
|
|
|
|
attr.exclude_kernel = 1;
|
|
|
|
attr.exclude_hv = 1;
|
|
|
|
attr.exclude_host = 1;
|
|
|
|
attr.config = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
|
|
|
|
attr.sample_period = GENMASK(63, 0);
|
|
|
|
|
|
|
|
event = perf_event_create_kernel_counter(&attr, -1, current,
|
|
|
|
kvm_pmu_perf_overflow, &attr);
|
|
|
|
|
|
|
|
if (IS_ERR(event)) {
|
|
|
|
pr_err_once("kvm: pmu event creation failed %ld\n",
|
|
|
|
PTR_ERR(event));
|
2022-01-27 19:17:56 +03:00
|
|
|
return NULL;
|
2020-03-17 14:11:56 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
if (event->pmu) {
|
|
|
|
pmu = to_arm_pmu(event->pmu);
|
2022-11-28 16:56:29 +03:00
|
|
|
if (pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_NI ||
|
2022-09-10 19:33:50 +03:00
|
|
|
pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
|
2022-01-27 19:17:56 +03:00
|
|
|
pmu = NULL;
|
2020-03-17 14:11:56 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
perf_event_disable(event);
|
|
|
|
perf_event_release_kernel(event);
|
|
|
|
|
2022-01-27 19:17:56 +03:00
|
|
|
return pmu;
|
2020-03-17 14:11:56 +03:00
|
|
|
}
|
|
|
|
|
2020-03-12 19:11:24 +03:00
|
|
|
u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
|
|
|
|
{
|
|
|
|
unsigned long *bmap = vcpu->kvm->arch.pmu_filter;
|
|
|
|
u64 val, mask = 0;
|
2021-01-21 13:56:36 +03:00
|
|
|
int base, i, nr_events;
|
2020-03-12 19:11:24 +03:00
|
|
|
|
KVM/arm64: Don't emulate a PMU for 32-bit guests if feature not set
kvm->arch.arm_pmu is set when userspace attempts to set the first PMU
attribute. As certain attributes are mandatory, arm_pmu ends up always
being set to a valid arm_pmu, otherwise KVM will refuse to run the VCPU.
However, this only happens if the VCPU has the PMU feature. If the VCPU
doesn't have the feature bit set, kvm->arch.arm_pmu will be left
uninitialized and equal to NULL.
KVM doesn't do ID register emulation for 32-bit guests and accesses to the
PMU registers aren't gated by the pmu_visibility() function. This is done
to prevent injecting unexpected undefined exceptions in guests which have
detected the presence of a hardware PMU. But even though the VCPU feature
is missing, KVM still attempts to emulate certain aspects of the PMU when
PMU registers are accessed. This leads to a NULL pointer dereference like
this one, which happens on an odroid-c4 board when running the
kvm-unit-tests pmu-cycle-counter test with kvmtool and without the PMU
feature being set:
[ 454.402699] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000150
[ 454.405865] Mem abort info:
[ 454.408596] ESR = 0x96000004
[ 454.411638] EC = 0x25: DABT (current EL), IL = 32 bits
[ 454.416901] SET = 0, FnV = 0
[ 454.419909] EA = 0, S1PTW = 0
[ 454.423010] FSC = 0x04: level 0 translation fault
[ 454.427841] Data abort info:
[ 454.430687] ISV = 0, ISS = 0x00000004
[ 454.434484] CM = 0, WnR = 0
[ 454.437404] user pgtable: 4k pages, 48-bit VAs, pgdp=000000000c924000
[ 454.443800] [0000000000000150] pgd=0000000000000000, p4d=0000000000000000
[ 454.450528] Internal error: Oops: 96000004 [#1] PREEMPT SMP
[ 454.456036] Modules linked in:
[ 454.459053] CPU: 1 PID: 267 Comm: kvm-vcpu-0 Not tainted 5.18.0-rc4 #113
[ 454.465697] Hardware name: Hardkernel ODROID-C4 (DT)
[ 454.470612] pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[ 454.477512] pc : kvm_pmu_event_mask.isra.0+0x14/0x74
[ 454.482427] lr : kvm_pmu_set_counter_event_type+0x2c/0x80
[ 454.487775] sp : ffff80000a9839c0
[ 454.491050] x29: ffff80000a9839c0 x28: ffff000000a83a00 x27: 0000000000000000
[ 454.498127] x26: 0000000000000000 x25: 0000000000000000 x24: ffff00000a510000
[ 454.505198] x23: ffff000000a83a00 x22: ffff000003b01000 x21: 0000000000000000
[ 454.512271] x20: 000000000000001f x19: 00000000000003ff x18: 0000000000000000
[ 454.519343] x17: 000000008003fe98 x16: 0000000000000000 x15: 0000000000000000
[ 454.526416] x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000
[ 454.533489] x11: 000000008003fdbc x10: 0000000000009d20 x9 : 000000000000001b
[ 454.540561] x8 : 0000000000000000 x7 : 0000000000000d00 x6 : 0000000000009d00
[ 454.547633] x5 : 0000000000000037 x4 : 0000000000009d00 x3 : 0d09000000000000
[ 454.554705] x2 : 000000000000001f x1 : 0000000000000000 x0 : 0000000000000000
[ 454.561779] Call trace:
[ 454.564191] kvm_pmu_event_mask.isra.0+0x14/0x74
[ 454.568764] kvm_pmu_set_counter_event_type+0x2c/0x80
[ 454.573766] access_pmu_evtyper+0x128/0x170
[ 454.577905] perform_access+0x34/0x80
[ 454.581527] kvm_handle_cp_32+0x13c/0x160
[ 454.585495] kvm_handle_cp15_32+0x1c/0x30
[ 454.589462] handle_exit+0x70/0x180
[ 454.592912] kvm_arch_vcpu_ioctl_run+0x1c4/0x5e0
[ 454.597485] kvm_vcpu_ioctl+0x23c/0x940
[ 454.601280] __arm64_sys_ioctl+0xa8/0xf0
[ 454.605160] invoke_syscall+0x48/0x114
[ 454.608869] el0_svc_common.constprop.0+0xd4/0xfc
[ 454.613527] do_el0_svc+0x28/0x90
[ 454.616803] el0_svc+0x34/0xb0
[ 454.619822] el0t_64_sync_handler+0xa4/0x130
[ 454.624049] el0t_64_sync+0x18c/0x190
[ 454.627675] Code: a9be7bfd 910003fd f9000bf3 52807ff3 (b9415001)
[ 454.633714] ---[ end trace 0000000000000000 ]---
In this particular case, Linux hasn't detected the presence of a hardware
PMU because the PMU node is missing from the DTB, so userspace would have
been unable to set the VCPU PMU feature even if it attempted it. What
happens is that the 32-bit guest reads ID_DFR0, which advertises the
presence of the PMU, and when it tries to program a counter, it triggers
the NULL pointer dereference because kvm->arch.arm_pmu is NULL.
kvm-arch.arm_pmu was introduced by commit 46b187821472 ("KVM: arm64:
Keep a per-VM pointer to the default PMU"). Until that commit, this
error would be triggered instead:
[ 73.388140] ------------[ cut here ]------------
[ 73.388189] Unknown PMU version 0
[ 73.390420] WARNING: CPU: 1 PID: 264 at arch/arm64/kvm/pmu-emul.c:36 kvm_pmu_event_mask.isra.0+0x6c/0x74
[ 73.399821] Modules linked in:
[ 73.402835] CPU: 1 PID: 264 Comm: kvm-vcpu-0 Not tainted 5.17.0 #114
[ 73.409132] Hardware name: Hardkernel ODROID-C4 (DT)
[ 73.414048] pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[ 73.420948] pc : kvm_pmu_event_mask.isra.0+0x6c/0x74
[ 73.425863] lr : kvm_pmu_event_mask.isra.0+0x6c/0x74
[ 73.430779] sp : ffff80000a8db9b0
[ 73.434055] x29: ffff80000a8db9b0 x28: ffff000000dbaac0 x27: 0000000000000000
[ 73.441131] x26: ffff000000dbaac0 x25: 00000000c600000d x24: 0000000000180720
[ 73.448203] x23: ffff800009ffbe10 x22: ffff00000b612000 x21: 0000000000000000
[ 73.455276] x20: 000000000000001f x19: 0000000000000000 x18: ffffffffffffffff
[ 73.462348] x17: 000000008003fe98 x16: 0000000000000000 x15: 0720072007200720
[ 73.469420] x14: 0720072007200720 x13: ffff800009d32488 x12: 00000000000004e6
[ 73.476493] x11: 00000000000001a2 x10: ffff800009d32488 x9 : ffff800009d32488
[ 73.483565] x8 : 00000000ffffefff x7 : ffff800009d8a488 x6 : ffff800009d8a488
[ 73.490638] x5 : ffff0000f461a9d8 x4 : 0000000000000000 x3 : 0000000000000001
[ 73.497710] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff000000dbaac0
[ 73.504784] Call trace:
[ 73.507195] kvm_pmu_event_mask.isra.0+0x6c/0x74
[ 73.511768] kvm_pmu_set_counter_event_type+0x2c/0x80
[ 73.516770] access_pmu_evtyper+0x128/0x16c
[ 73.520910] perform_access+0x34/0x80
[ 73.524532] kvm_handle_cp_32+0x13c/0x160
[ 73.528500] kvm_handle_cp15_32+0x1c/0x30
[ 73.532467] handle_exit+0x70/0x180
[ 73.535917] kvm_arch_vcpu_ioctl_run+0x20c/0x6e0
[ 73.540489] kvm_vcpu_ioctl+0x2b8/0x9e0
[ 73.544283] __arm64_sys_ioctl+0xa8/0xf0
[ 73.548165] invoke_syscall+0x48/0x114
[ 73.551874] el0_svc_common.constprop.0+0xd4/0xfc
[ 73.556531] do_el0_svc+0x28/0x90
[ 73.559808] el0_svc+0x28/0x80
[ 73.562826] el0t_64_sync_handler+0xa4/0x130
[ 73.567054] el0t_64_sync+0x1a0/0x1a4
[ 73.570676] ---[ end trace 0000000000000000 ]---
[ 73.575382] kvm: pmu event creation failed -2
The root cause remains the same: kvm->arch.pmuver was never set to
something sensible because the VCPU feature itself was never set.
The odroid-c4 is somewhat of a special case, because Linux doesn't probe
the PMU. But the above errors can easily be reproduced on any hardware,
with or without a PMU driver, as long as userspace doesn't set the PMU
feature.
Work around the fact that KVM advertises a PMU even when the VCPU feature
is not set by gating all PMU emulation on the feature. The guest can still
access the registers without KVM injecting an undefined exception.
Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220425145530.723858-1-alexandru.elisei@arm.com
2022-04-25 17:55:30 +03:00
|
|
|
if (!kvm_vcpu_has_pmu(vcpu))
|
|
|
|
return 0;
|
|
|
|
|
2020-03-12 19:11:24 +03:00
|
|
|
if (!pmceid1) {
|
|
|
|
val = read_sysreg(pmceid0_el0);
|
2022-11-13 19:38:19 +03:00
|
|
|
/* always support CHAIN */
|
|
|
|
val |= BIT(ARMV8_PMUV3_PERFCTR_CHAIN);
|
2020-03-12 19:11:24 +03:00
|
|
|
base = 0;
|
|
|
|
} else {
|
|
|
|
val = read_sysreg(pmceid1_el0);
|
2020-02-16 21:17:22 +03:00
|
|
|
/*
|
|
|
|
* Don't advertise STALL_SLOT, as PMMIR_EL0 is handled
|
|
|
|
* as RAZ
|
|
|
|
*/
|
2022-09-10 19:33:51 +03:00
|
|
|
if (vcpu->kvm->arch.arm_pmu->pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P4)
|
2020-02-16 21:17:22 +03:00
|
|
|
val &= ~BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32);
|
2020-03-12 19:11:24 +03:00
|
|
|
base = 32;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!bmap)
|
|
|
|
return val;
|
|
|
|
|
2021-01-21 13:56:36 +03:00
|
|
|
nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1;
|
|
|
|
|
2020-03-12 19:11:24 +03:00
|
|
|
for (i = 0; i < 32; i += 8) {
|
|
|
|
u64 byte;
|
|
|
|
|
|
|
|
byte = bitmap_get_value8(bmap, base + i);
|
|
|
|
mask |= byte << i;
|
2021-01-21 13:56:36 +03:00
|
|
|
if (nr_events >= (0x4000 + base + 32)) {
|
|
|
|
byte = bitmap_get_value8(bmap, 0x4000 + base + i);
|
|
|
|
mask |= byte << (32 + i);
|
|
|
|
}
|
2020-03-12 19:11:24 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
return val & mask;
|
|
|
|
}
|
|
|
|
|
2017-05-02 14:41:02 +03:00
|
|
|
int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
|
2016-01-11 16:35:32 +03:00
|
|
|
{
|
2020-11-26 17:49:16 +03:00
|
|
|
if (!kvm_vcpu_has_pmu(vcpu))
|
2017-05-02 14:41:02 +03:00
|
|
|
return 0;
|
2016-01-11 16:35:32 +03:00
|
|
|
|
2020-11-26 17:49:16 +03:00
|
|
|
if (!vcpu->arch.pmu.created)
|
|
|
|
return -EINVAL;
|
|
|
|
|
2016-09-27 04:51:47 +03:00
|
|
|
/*
|
2017-05-02 14:41:02 +03:00
|
|
|
* A valid interrupt configuration for the PMU is either to have a
|
|
|
|
* properly configured interrupt number and using an in-kernel
|
2017-05-16 20:53:50 +03:00
|
|
|
* irqchip, or to not have an in-kernel GIC and not set an IRQ.
|
2016-09-27 04:51:47 +03:00
|
|
|
*/
|
2017-05-16 20:53:50 +03:00
|
|
|
if (irqchip_in_kernel(vcpu->kvm)) {
|
|
|
|
int irq = vcpu->arch.pmu.irq_num;
|
|
|
|
/*
|
|
|
|
* If we are using an in-kernel vgic, at this point we know
|
|
|
|
* the vgic will be initialized, so we can check the PMU irq
|
|
|
|
* number against the dimensions of the vgic and make sure
|
|
|
|
* it's valid.
|
|
|
|
*/
|
|
|
|
if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq))
|
|
|
|
return -EINVAL;
|
|
|
|
} else if (kvm_arm_pmu_irq_initialized(vcpu)) {
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2017-05-02 14:41:02 +03:00
|
|
|
|
2021-06-03 18:50:02 +03:00
|
|
|
/* One-off reload of the PMU on first run */
|
|
|
|
kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu);
|
|
|
|
|
2017-05-02 14:41:02 +03:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
|
|
|
|
{
|
|
|
|
if (irqchip_in_kernel(vcpu->kvm)) {
|
2017-05-04 14:32:53 +03:00
|
|
|
int ret;
|
|
|
|
|
2017-05-02 14:41:02 +03:00
|
|
|
/*
|
|
|
|
* If using the PMU with an in-kernel virtual GIC
|
|
|
|
* implementation, we require the GIC to be already
|
|
|
|
* initialized when initializing the PMU.
|
|
|
|
*/
|
|
|
|
if (!vgic_initialized(vcpu->kvm))
|
|
|
|
return -ENODEV;
|
|
|
|
|
|
|
|
if (!kvm_arm_pmu_irq_initialized(vcpu))
|
|
|
|
return -ENXIO;
|
2017-05-04 14:32:53 +03:00
|
|
|
|
|
|
|
ret = kvm_vgic_set_owner(vcpu, vcpu->arch.pmu.irq_num,
|
|
|
|
&vcpu->arch.pmu);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
2017-05-02 14:41:02 +03:00
|
|
|
}
|
2016-01-11 16:35:32 +03:00
|
|
|
|
2020-09-24 14:07:04 +03:00
|
|
|
init_irq_work(&vcpu->arch.pmu.overflow_work,
|
|
|
|
kvm_pmu_perf_overflow_notify_vcpu);
|
|
|
|
|
2017-05-02 14:41:02 +03:00
|
|
|
vcpu->arch.pmu.created = true;
|
2016-01-11 16:35:32 +03:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-03-07 13:32:29 +03:00
|
|
|
/*
|
|
|
|
* For one VM the interrupt type must be same for each vcpu.
|
|
|
|
* As a PPI, the interrupt number is the same for all vcpus,
|
|
|
|
* while as an SPI it must be a separate number per vcpu.
|
|
|
|
*/
|
|
|
|
static bool pmu_irq_is_valid(struct kvm *kvm, int irq)
|
2016-01-11 16:35:32 +03:00
|
|
|
{
|
2021-11-16 19:04:02 +03:00
|
|
|
unsigned long i;
|
2016-01-11 16:35:32 +03:00
|
|
|
struct kvm_vcpu *vcpu;
|
|
|
|
|
|
|
|
kvm_for_each_vcpu(i, vcpu, kvm) {
|
|
|
|
if (!kvm_arm_pmu_irq_initialized(vcpu))
|
|
|
|
continue;
|
|
|
|
|
2016-03-07 13:32:29 +03:00
|
|
|
if (irq_is_ppi(irq)) {
|
2016-01-11 16:35:32 +03:00
|
|
|
if (vcpu->arch.pmu.irq_num != irq)
|
|
|
|
return false;
|
|
|
|
} else {
|
|
|
|
if (vcpu->arch.pmu.irq_num == irq)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
KVM: arm64: Add KVM_ARM_VCPU_PMU_V3_SET_PMU attribute
When KVM creates an event and there are more than one PMUs present on the
system, perf_init_event() will go through the list of available PMUs and
will choose the first one that can create the event. The order of the PMUs
in this list depends on the probe order, which can change under various
circumstances, for example if the order of the PMU nodes change in the DTB
or if asynchronous driver probing is enabled on the kernel command line
(with the driver_async_probe=armv8-pmu option).
Another consequence of this approach is that on heteregeneous systems all
virtual machines that KVM creates will use the same PMU. This might cause
unexpected behaviour for userspace: when a VCPU is executing on the
physical CPU that uses this default PMU, PMU events in the guest work
correctly; but when the same VCPU executes on another CPU, PMU events in
the guest will suddenly stop counting.
Fortunately, perf core allows user to specify on which PMU to create an
event by using the perf_event_attr->type field, which is used by
perf_init_event() as an index in the radix tree of available PMUs.
Add the KVM_ARM_VCPU_PMU_V3_CTRL(KVM_ARM_VCPU_PMU_V3_SET_PMU) VCPU
attribute to allow userspace to specify the arm_pmu that KVM will use when
creating events for that VCPU. KVM will make no attempt to run the VCPU on
the physical CPUs that share the PMU, leaving it up to userspace to manage
the VCPU threads' affinity accordingly.
To ensure that KVM doesn't expose an asymmetric system to the guest, the
PMU set for one VCPU will be used by all other VCPUs. Once a VCPU has run,
the PMU cannot be changed in order to avoid changing the list of available
events for a VCPU, or to change the semantics of existing events.
Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220127161759.53553-6-alexandru.elisei@arm.com
2022-01-27 19:17:58 +03:00
|
|
|
static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id)
|
|
|
|
{
|
|
|
|
struct kvm *kvm = vcpu->kvm;
|
|
|
|
struct arm_pmu_entry *entry;
|
|
|
|
struct arm_pmu *arm_pmu;
|
|
|
|
int ret = -ENXIO;
|
|
|
|
|
|
|
|
mutex_lock(&kvm->lock);
|
|
|
|
mutex_lock(&arm_pmus_lock);
|
|
|
|
|
|
|
|
list_for_each_entry(entry, &arm_pmus, entry) {
|
|
|
|
arm_pmu = entry->arm_pmu;
|
|
|
|
if (arm_pmu->pmu.type == pmu_id) {
|
2022-03-11 20:39:47 +03:00
|
|
|
if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags) ||
|
KVM: arm64: Add KVM_ARM_VCPU_PMU_V3_SET_PMU attribute
When KVM creates an event and there are more than one PMUs present on the
system, perf_init_event() will go through the list of available PMUs and
will choose the first one that can create the event. The order of the PMUs
in this list depends on the probe order, which can change under various
circumstances, for example if the order of the PMU nodes change in the DTB
or if asynchronous driver probing is enabled on the kernel command line
(with the driver_async_probe=armv8-pmu option).
Another consequence of this approach is that on heteregeneous systems all
virtual machines that KVM creates will use the same PMU. This might cause
unexpected behaviour for userspace: when a VCPU is executing on the
physical CPU that uses this default PMU, PMU events in the guest work
correctly; but when the same VCPU executes on another CPU, PMU events in
the guest will suddenly stop counting.
Fortunately, perf core allows user to specify on which PMU to create an
event by using the perf_event_attr->type field, which is used by
perf_init_event() as an index in the radix tree of available PMUs.
Add the KVM_ARM_VCPU_PMU_V3_CTRL(KVM_ARM_VCPU_PMU_V3_SET_PMU) VCPU
attribute to allow userspace to specify the arm_pmu that KVM will use when
creating events for that VCPU. KVM will make no attempt to run the VCPU on
the physical CPUs that share the PMU, leaving it up to userspace to manage
the VCPU threads' affinity accordingly.
To ensure that KVM doesn't expose an asymmetric system to the guest, the
PMU set for one VCPU will be used by all other VCPUs. Once a VCPU has run,
the PMU cannot be changed in order to avoid changing the list of available
events for a VCPU, or to change the semantics of existing events.
Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220127161759.53553-6-alexandru.elisei@arm.com
2022-01-27 19:17:58 +03:00
|
|
|
(kvm->arch.pmu_filter && kvm->arch.arm_pmu != arm_pmu)) {
|
|
|
|
ret = -EBUSY;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
kvm->arch.arm_pmu = arm_pmu;
|
2022-01-27 19:17:59 +03:00
|
|
|
cpumask_copy(kvm->arch.supported_cpus, &arm_pmu->supported_cpus);
|
KVM: arm64: Add KVM_ARM_VCPU_PMU_V3_SET_PMU attribute
When KVM creates an event and there are more than one PMUs present on the
system, perf_init_event() will go through the list of available PMUs and
will choose the first one that can create the event. The order of the PMUs
in this list depends on the probe order, which can change under various
circumstances, for example if the order of the PMU nodes change in the DTB
or if asynchronous driver probing is enabled on the kernel command line
(with the driver_async_probe=armv8-pmu option).
Another consequence of this approach is that on heteregeneous systems all
virtual machines that KVM creates will use the same PMU. This might cause
unexpected behaviour for userspace: when a VCPU is executing on the
physical CPU that uses this default PMU, PMU events in the guest work
correctly; but when the same VCPU executes on another CPU, PMU events in
the guest will suddenly stop counting.
Fortunately, perf core allows user to specify on which PMU to create an
event by using the perf_event_attr->type field, which is used by
perf_init_event() as an index in the radix tree of available PMUs.
Add the KVM_ARM_VCPU_PMU_V3_CTRL(KVM_ARM_VCPU_PMU_V3_SET_PMU) VCPU
attribute to allow userspace to specify the arm_pmu that KVM will use when
creating events for that VCPU. KVM will make no attempt to run the VCPU on
the physical CPUs that share the PMU, leaving it up to userspace to manage
the VCPU threads' affinity accordingly.
To ensure that KVM doesn't expose an asymmetric system to the guest, the
PMU set for one VCPU will be used by all other VCPUs. Once a VCPU has run,
the PMU cannot be changed in order to avoid changing the list of available
events for a VCPU, or to change the semantics of existing events.
Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220127161759.53553-6-alexandru.elisei@arm.com
2022-01-27 19:17:58 +03:00
|
|
|
ret = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
mutex_unlock(&arm_pmus_lock);
|
|
|
|
mutex_unlock(&kvm->lock);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2016-01-11 16:35:32 +03:00
|
|
|
int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
|
|
|
|
{
|
2022-01-27 19:17:54 +03:00
|
|
|
struct kvm *kvm = vcpu->kvm;
|
|
|
|
|
2020-11-12 21:13:27 +03:00
|
|
|
if (!kvm_vcpu_has_pmu(vcpu))
|
2020-03-12 20:27:36 +03:00
|
|
|
return -ENODEV;
|
|
|
|
|
|
|
|
if (vcpu->arch.pmu.created)
|
|
|
|
return -EBUSY;
|
|
|
|
|
2022-01-27 19:17:56 +03:00
|
|
|
mutex_lock(&kvm->lock);
|
|
|
|
if (!kvm->arch.arm_pmu) {
|
|
|
|
/* No PMU set, get the default one */
|
|
|
|
kvm->arch.arm_pmu = kvm_pmu_probe_armpmu();
|
|
|
|
if (!kvm->arch.arm_pmu) {
|
|
|
|
mutex_unlock(&kvm->lock);
|
|
|
|
return -ENODEV;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
mutex_unlock(&kvm->lock);
|
2020-03-17 14:11:56 +03:00
|
|
|
|
2016-01-11 16:35:32 +03:00
|
|
|
switch (attr->attr) {
|
|
|
|
case KVM_ARM_VCPU_PMU_V3_IRQ: {
|
|
|
|
int __user *uaddr = (int __user *)(long)attr->addr;
|
|
|
|
int irq;
|
|
|
|
|
2022-01-27 19:17:54 +03:00
|
|
|
if (!irqchip_in_kernel(kvm))
|
2017-05-02 14:41:02 +03:00
|
|
|
return -EINVAL;
|
|
|
|
|
2016-01-11 16:35:32 +03:00
|
|
|
if (get_user(irq, uaddr))
|
|
|
|
return -EFAULT;
|
|
|
|
|
2016-03-07 13:32:29 +03:00
|
|
|
/* The PMU overflow interrupt can be a PPI or a valid SPI. */
|
2017-05-16 20:53:50 +03:00
|
|
|
if (!(irq_is_ppi(irq) || irq_is_spi(irq)))
|
2016-03-07 13:32:29 +03:00
|
|
|
return -EINVAL;
|
|
|
|
|
2022-01-27 19:17:54 +03:00
|
|
|
if (!pmu_irq_is_valid(kvm, irq))
|
2016-01-11 16:35:32 +03:00
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (kvm_arm_pmu_irq_initialized(vcpu))
|
|
|
|
return -EBUSY;
|
|
|
|
|
|
|
|
kvm_debug("Set kvm ARM PMU irq: %d\n", irq);
|
|
|
|
vcpu->arch.pmu.irq_num = irq;
|
|
|
|
return 0;
|
|
|
|
}
|
2020-02-12 14:31:02 +03:00
|
|
|
case KVM_ARM_VCPU_PMU_V3_FILTER: {
|
|
|
|
struct kvm_pmu_event_filter __user *uaddr;
|
|
|
|
struct kvm_pmu_event_filter filter;
|
|
|
|
int nr_events;
|
|
|
|
|
2022-01-27 19:17:54 +03:00
|
|
|
nr_events = kvm_pmu_event_mask(kvm) + 1;
|
2020-02-12 14:31:02 +03:00
|
|
|
|
|
|
|
uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr;
|
|
|
|
|
|
|
|
if (copy_from_user(&filter, uaddr, sizeof(filter)))
|
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
if (((u32)filter.base_event + filter.nevents) > nr_events ||
|
|
|
|
(filter.action != KVM_PMU_EVENT_ALLOW &&
|
|
|
|
filter.action != KVM_PMU_EVENT_DENY))
|
|
|
|
return -EINVAL;
|
|
|
|
|
2022-01-27 19:17:54 +03:00
|
|
|
mutex_lock(&kvm->lock);
|
|
|
|
|
2022-03-11 20:39:47 +03:00
|
|
|
if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags)) {
|
2022-01-27 19:17:54 +03:00
|
|
|
mutex_unlock(&kvm->lock);
|
|
|
|
return -EBUSY;
|
|
|
|
}
|
2020-02-12 14:31:02 +03:00
|
|
|
|
2022-01-27 19:17:54 +03:00
|
|
|
if (!kvm->arch.pmu_filter) {
|
|
|
|
kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL_ACCOUNT);
|
|
|
|
if (!kvm->arch.pmu_filter) {
|
|
|
|
mutex_unlock(&kvm->lock);
|
2020-02-12 14:31:02 +03:00
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The default depends on the first applied filter.
|
|
|
|
* If it allows events, the default is to deny.
|
|
|
|
* Conversely, if the first filter denies a set of
|
|
|
|
* events, the default is to allow.
|
|
|
|
*/
|
|
|
|
if (filter.action == KVM_PMU_EVENT_ALLOW)
|
2022-01-27 19:17:54 +03:00
|
|
|
bitmap_zero(kvm->arch.pmu_filter, nr_events);
|
2020-02-12 14:31:02 +03:00
|
|
|
else
|
2022-01-27 19:17:54 +03:00
|
|
|
bitmap_fill(kvm->arch.pmu_filter, nr_events);
|
2020-02-12 14:31:02 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
if (filter.action == KVM_PMU_EVENT_ALLOW)
|
2022-01-27 19:17:54 +03:00
|
|
|
bitmap_set(kvm->arch.pmu_filter, filter.base_event, filter.nevents);
|
2020-02-12 14:31:02 +03:00
|
|
|
else
|
2022-01-27 19:17:54 +03:00
|
|
|
bitmap_clear(kvm->arch.pmu_filter, filter.base_event, filter.nevents);
|
2020-02-12 14:31:02 +03:00
|
|
|
|
2022-01-27 19:17:54 +03:00
|
|
|
mutex_unlock(&kvm->lock);
|
2020-02-12 14:31:02 +03:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
KVM: arm64: Add KVM_ARM_VCPU_PMU_V3_SET_PMU attribute
When KVM creates an event and there are more than one PMUs present on the
system, perf_init_event() will go through the list of available PMUs and
will choose the first one that can create the event. The order of the PMUs
in this list depends on the probe order, which can change under various
circumstances, for example if the order of the PMU nodes change in the DTB
or if asynchronous driver probing is enabled on the kernel command line
(with the driver_async_probe=armv8-pmu option).
Another consequence of this approach is that on heteregeneous systems all
virtual machines that KVM creates will use the same PMU. This might cause
unexpected behaviour for userspace: when a VCPU is executing on the
physical CPU that uses this default PMU, PMU events in the guest work
correctly; but when the same VCPU executes on another CPU, PMU events in
the guest will suddenly stop counting.
Fortunately, perf core allows user to specify on which PMU to create an
event by using the perf_event_attr->type field, which is used by
perf_init_event() as an index in the radix tree of available PMUs.
Add the KVM_ARM_VCPU_PMU_V3_CTRL(KVM_ARM_VCPU_PMU_V3_SET_PMU) VCPU
attribute to allow userspace to specify the arm_pmu that KVM will use when
creating events for that VCPU. KVM will make no attempt to run the VCPU on
the physical CPUs that share the PMU, leaving it up to userspace to manage
the VCPU threads' affinity accordingly.
To ensure that KVM doesn't expose an asymmetric system to the guest, the
PMU set for one VCPU will be used by all other VCPUs. Once a VCPU has run,
the PMU cannot be changed in order to avoid changing the list of available
events for a VCPU, or to change the semantics of existing events.
Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220127161759.53553-6-alexandru.elisei@arm.com
2022-01-27 19:17:58 +03:00
|
|
|
case KVM_ARM_VCPU_PMU_V3_SET_PMU: {
|
|
|
|
int __user *uaddr = (int __user *)(long)attr->addr;
|
|
|
|
int pmu_id;
|
|
|
|
|
|
|
|
if (get_user(pmu_id, uaddr))
|
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
return kvm_arm_pmu_v3_set_pmu(vcpu, pmu_id);
|
|
|
|
}
|
2016-01-11 16:35:32 +03:00
|
|
|
case KVM_ARM_VCPU_PMU_V3_INIT:
|
|
|
|
return kvm_arm_pmu_v3_init(vcpu);
|
|
|
|
}
|
|
|
|
|
|
|
|
return -ENXIO;
|
|
|
|
}
|
|
|
|
|
|
|
|
int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
|
|
|
|
{
|
|
|
|
switch (attr->attr) {
|
|
|
|
case KVM_ARM_VCPU_PMU_V3_IRQ: {
|
|
|
|
int __user *uaddr = (int __user *)(long)attr->addr;
|
|
|
|
int irq;
|
|
|
|
|
2017-05-02 14:41:02 +03:00
|
|
|
if (!irqchip_in_kernel(vcpu->kvm))
|
|
|
|
return -EINVAL;
|
|
|
|
|
2020-11-13 19:39:44 +03:00
|
|
|
if (!kvm_vcpu_has_pmu(vcpu))
|
2016-01-11 16:35:32 +03:00
|
|
|
return -ENODEV;
|
|
|
|
|
|
|
|
if (!kvm_arm_pmu_irq_initialized(vcpu))
|
|
|
|
return -ENXIO;
|
|
|
|
|
|
|
|
irq = vcpu->arch.pmu.irq_num;
|
|
|
|
return put_user(irq, uaddr);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return -ENXIO;
|
|
|
|
}
|
|
|
|
|
|
|
|
int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
|
|
|
|
{
|
|
|
|
switch (attr->attr) {
|
|
|
|
case KVM_ARM_VCPU_PMU_V3_IRQ:
|
|
|
|
case KVM_ARM_VCPU_PMU_V3_INIT:
|
2020-02-12 14:31:02 +03:00
|
|
|
case KVM_ARM_VCPU_PMU_V3_FILTER:
|
KVM: arm64: Add KVM_ARM_VCPU_PMU_V3_SET_PMU attribute
When KVM creates an event and there are more than one PMUs present on the
system, perf_init_event() will go through the list of available PMUs and
will choose the first one that can create the event. The order of the PMUs
in this list depends on the probe order, which can change under various
circumstances, for example if the order of the PMU nodes change in the DTB
or if asynchronous driver probing is enabled on the kernel command line
(with the driver_async_probe=armv8-pmu option).
Another consequence of this approach is that on heteregeneous systems all
virtual machines that KVM creates will use the same PMU. This might cause
unexpected behaviour for userspace: when a VCPU is executing on the
physical CPU that uses this default PMU, PMU events in the guest work
correctly; but when the same VCPU executes on another CPU, PMU events in
the guest will suddenly stop counting.
Fortunately, perf core allows user to specify on which PMU to create an
event by using the perf_event_attr->type field, which is used by
perf_init_event() as an index in the radix tree of available PMUs.
Add the KVM_ARM_VCPU_PMU_V3_CTRL(KVM_ARM_VCPU_PMU_V3_SET_PMU) VCPU
attribute to allow userspace to specify the arm_pmu that KVM will use when
creating events for that VCPU. KVM will make no attempt to run the VCPU on
the physical CPUs that share the PMU, leaving it up to userspace to manage
the VCPU threads' affinity accordingly.
To ensure that KVM doesn't expose an asymmetric system to the guest, the
PMU set for one VCPU will be used by all other VCPUs. Once a VCPU has run,
the PMU cannot be changed in order to avoid changing the list of available
events for a VCPU, or to change the semantics of existing events.
Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220127161759.53553-6-alexandru.elisei@arm.com
2022-01-27 19:17:58 +03:00
|
|
|
case KVM_ARM_VCPU_PMU_V3_SET_PMU:
|
2020-11-12 21:13:27 +03:00
|
|
|
if (kvm_vcpu_has_pmu(vcpu))
|
2016-01-11 16:35:32 +03:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return -ENXIO;
|
|
|
|
}
|
2022-11-13 19:38:26 +03:00
|
|
|
|
|
|
|
u8 kvm_arm_pmu_get_pmuver_limit(void)
|
|
|
|
{
|
|
|
|
u64 tmp;
|
|
|
|
|
|
|
|
tmp = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
|
|
|
|
tmp = cpuid_feature_cap_perfmon_field(tmp,
|
|
|
|
ID_AA64DFR0_EL1_PMUVer_SHIFT,
|
2022-11-13 19:38:30 +03:00
|
|
|
ID_AA64DFR0_EL1_PMUVer_V3P5);
|
2022-11-13 19:38:26 +03:00
|
|
|
return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), tmp);
|
|
|
|
}
|