- Disallow virtualizing legacy LBRs if architectural LBRs are available,
    the two are mutually exclusive in hardware
 
  - Disallow writes to immutable feature MSRs (notably PERF_CAPABILITIES)
    after KVM_RUN, and overhaul the vmx_pmu_caps selftest to better
    validate PERF_CAPABILITIES
 
  - Apply PMU filters to emulated events and add test coverage to the
    pmu_event_filter selftest
 
  - Misc cleanups and fixes
 -----BEGIN PGP SIGNATURE-----
 
 iQJGBAABCgAwFiEEMHr+pfEFOIzK+KY1YJEiAU0MEvkFAmRGtd4SHHNlYW5qY0Bn
 b29nbGUuY29tAAoJEGCRIgFNDBL5Z9kP/i3WZ40hevvQvB/5cEpxxmxYDwCYnnjM
 hiQgK5jT4SrMTmVjLgkNdI2PogQoS4CX+GC7lcA9bvse84hjuPvgOflb2B+p2UQi
 Ytbr9g/tfKNIpnKIk9mcPcSObN9vm2Kgt7n28rtPrHWj89eQzgc66eijqdpKBLxA
 c3crVR8krwYAQK0tmzHq1+H6hB369YbHAHyTTRRI/bNWnqKblnvUbt0NL2aBusa9
 rNMaOdRtinLpy2dmuX/b3japRB8QTnlf7zpPIF4cBEhbYXy5woClZpf1D2fCA6Er
 XFbEoYawMVd9UeJYbW4z5yErLT83eYoGp4U0eFXWp6fvh8nZlgCGvBKE9g4mmqwj
 aSLaTR5eVN2qlw6jXVeg3unCo8Eyl36AwYwve2L6sFmBvZvNV5iz2eQ7rrOe4oE3
 dnTUaLQ8I2SVg04MbYmCq5W+frTL/I7kqNpbccL1Z3R5WO4y5gz63mug6NfLIvhR
 t45TAIaifxBfcXQsBZM3v2KUK/xQrD3AbJmFKh54L2CKqiGaNWsMLX+6NZ7LZWgf
 8rEqsVkkQDgF7z8eXai4TR26nYfSX6g9gDqtOH73L87aJ7PJk5cRoDWQ1sWs1e/l
 4HA/L0Bo/3pnKAa0ZWxJOixmzqY49gNQf3dj8gt3jk3y2ijbAivshiSpPBmIxn0u
 QLeOf/LGvipl
 =m18F
 -----END PGP SIGNATURE-----

Merge tag 'kvm-x86-pmu-6.4' of https://github.com/kvm-x86/linux into HEAD

KVM x86 PMU changes for 6.4:

 - Disallow virtualizing legacy LBRs if architectural LBRs are available,
   the two are mutually exclusive in hardware

 - Disallow writes to immutable feature MSRs (notably PERF_CAPABILITIES)
   after KVM_RUN, and overhaul the vmx_pmu_caps selftest to better
   validate PERF_CAPABILITIES

 - Apply PMU filters to emulated events and add test coverage to the
   pmu_event_filter selftest

 - Misc cleanups and fixes
This commit is contained in:
Paolo Bonzini 2023-04-26 15:53:36 -04:00
Родитель 807b758496 457bd7af1a
Коммит 48b1893ae3
14 изменённых файлов: 565 добавлений и 275 удалений

Просмотреть файл

@ -513,6 +513,7 @@ struct kvm_pmc {
#define MSR_ARCH_PERFMON_FIXED_CTR_MAX (MSR_ARCH_PERFMON_FIXED_CTR0 + KVM_PMC_MAX_FIXED - 1)
#define KVM_AMD_PMC_MAX_GENERIC 6
struct kvm_pmu {
u8 version;
unsigned nr_arch_gp_counters;
unsigned nr_arch_fixed_counters;
unsigned available_event_types;
@ -525,7 +526,6 @@ struct kvm_pmu {
u64 global_ovf_ctrl_mask;
u64 reserved_bits;
u64 raw_event_mask;
u8 version;
struct kvm_pmc gp_counters[KVM_INTEL_PMC_MAX_GENERIC];
struct kvm_pmc fixed_counters[KVM_PMC_MAX_FIXED];
struct irq_work irq_work;

Просмотреть файл

@ -414,7 +414,7 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
* KVM_SET_CPUID{,2} again. To support this legacy behavior, check
* whether the supplied CPUID data is equal to what's already set.
*/
if (vcpu->arch.last_vmentry_cpu != -1) {
if (kvm_vcpu_has_run(vcpu)) {
r = kvm_cpuid_check_equal(vcpu, e2, nent);
if (r)
return r;

Просмотреть файл

@ -5476,7 +5476,7 @@ void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu)
* Changing guest CPUID after KVM_RUN is forbidden, see the comment in
* kvm_arch_vcpu_ioctl().
*/
KVM_BUG_ON(vcpu->arch.last_vmentry_cpu != -1, vcpu->kvm);
KVM_BUG_ON(kvm_vcpu_has_run(vcpu), vcpu->kvm);
}
void kvm_mmu_reset_context(struct kvm_vcpu *vcpu)

Просмотреть файл

@ -93,7 +93,7 @@ void kvm_pmu_ops_update(const struct kvm_pmu_ops *pmu_ops)
#undef __KVM_X86_PMU_OP
}
static inline bool pmc_is_enabled(struct kvm_pmc *pmc)
static inline bool pmc_is_globally_enabled(struct kvm_pmc *pmc)
{
return static_call(kvm_x86_pmu_pmc_is_enabled)(pmc);
}
@ -400,6 +400,12 @@ static bool check_pmu_event_filter(struct kvm_pmc *pmc)
return is_fixed_event_allowed(filter, pmc->idx);
}
static bool pmc_event_is_allowed(struct kvm_pmc *pmc)
{
return pmc_is_globally_enabled(pmc) && pmc_speculative_in_use(pmc) &&
check_pmu_event_filter(pmc);
}
static void reprogram_counter(struct kvm_pmc *pmc)
{
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
@ -409,10 +415,7 @@ static void reprogram_counter(struct kvm_pmc *pmc)
pmc_pause_counter(pmc);
if (!pmc_speculative_in_use(pmc) || !pmc_is_enabled(pmc))
goto reprogram_complete;
if (!check_pmu_event_filter(pmc))
if (!pmc_event_is_allowed(pmc))
goto reprogram_complete;
if (pmc->counter < pmc->prev_counter)
@ -589,6 +592,10 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
*/
void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
{
if (KVM_BUG_ON(kvm_vcpu_has_run(vcpu), vcpu->kvm))
return;
bitmap_zero(vcpu_to_pmu(vcpu)->all_valid_pmc_idx, X86_PMC_IDX_MAX);
static_call(kvm_x86_pmu_refresh)(vcpu);
}
@ -646,7 +653,7 @@ static void kvm_pmu_incr_counter(struct kvm_pmc *pmc)
{
pmc->prev_counter = pmc->counter;
pmc->counter = (pmc->counter + 1) & pmc_bitmask(pmc);
kvm_pmu_request_counter_reprogam(pmc);
kvm_pmu_request_counter_reprogram(pmc);
}
static inline bool eventsel_match_perf_hw_id(struct kvm_pmc *pmc,
@ -684,7 +691,7 @@ void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 perf_hw_id)
for_each_set_bit(i, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX) {
pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, i);
if (!pmc || !pmc_is_enabled(pmc) || !pmc_speculative_in_use(pmc))
if (!pmc || !pmc_event_is_allowed(pmc))
continue;
/* Ignore checks for edge detect, pin control, invert and CMASK bits */

Просмотреть файл

@ -195,7 +195,7 @@ static inline void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops)
KVM_PMC_MAX_FIXED);
}
static inline void kvm_pmu_request_counter_reprogam(struct kvm_pmc *pmc)
static inline void kvm_pmu_request_counter_reprogram(struct kvm_pmc *pmc)
{
set_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi);
kvm_make_request(KVM_REQ_PMU, pmc->vcpu);

Просмотреть файл

@ -161,7 +161,7 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
data &= ~pmu->reserved_bits;
if (data != pmc->eventsel) {
pmc->eventsel = data;
kvm_pmu_request_counter_reprogam(pmc);
kvm_pmu_request_counter_reprogram(pmc);
}
return 0;
}

Просмотреть файл

@ -4093,7 +4093,7 @@ static bool svm_has_emulated_msr(struct kvm *kvm, u32 index)
{
switch (index) {
case MSR_IA32_MCG_EXT_CTL:
case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
return false;
case MSR_IA32_SMBASE:
if (!IS_ENABLED(CONFIG_KVM_SMM))

Просмотреть файл

@ -57,7 +57,7 @@ static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)
pmc = get_fixed_pmc(pmu, MSR_CORE_PERF_FIXED_CTR0 + i);
__set_bit(INTEL_PMC_IDX_FIXED + i, pmu->pmc_in_use);
kvm_pmu_request_counter_reprogam(pmc);
kvm_pmu_request_counter_reprogram(pmc);
}
}
@ -76,13 +76,13 @@ static struct kvm_pmc *intel_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
static void reprogram_counters(struct kvm_pmu *pmu, u64 diff)
{
int bit;
struct kvm_pmc *pmc;
for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX) {
pmc = intel_pmc_idx_to_pmc(pmu, bit);
if (pmc)
kvm_pmu_request_counter_reprogam(pmc);
}
if (!diff)
return;
for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX)
set_bit(bit, pmu->reprogram_pmi);
kvm_make_request(KVM_REQ_PMU, pmu_to_vcpu(pmu));
}
static bool intel_hw_event_available(struct kvm_pmc *pmc)
@ -351,45 +351,47 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
switch (msr) {
case MSR_CORE_PERF_FIXED_CTR_CTRL:
msr_info->data = pmu->fixed_ctr_ctrl;
return 0;
break;
case MSR_CORE_PERF_GLOBAL_STATUS:
msr_info->data = pmu->global_status;
return 0;
break;
case MSR_CORE_PERF_GLOBAL_CTRL:
msr_info->data = pmu->global_ctrl;
return 0;
break;
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
msr_info->data = 0;
return 0;
break;
case MSR_IA32_PEBS_ENABLE:
msr_info->data = pmu->pebs_enable;
return 0;
break;
case MSR_IA32_DS_AREA:
msr_info->data = pmu->ds_area;
return 0;
break;
case MSR_PEBS_DATA_CFG:
msr_info->data = pmu->pebs_data_cfg;
return 0;
break;
default:
if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
(pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
u64 val = pmc_read_counter(pmc);
msr_info->data =
val & pmu->counter_bitmask[KVM_PMC_GP];
return 0;
break;
} else if ((pmc = get_fixed_pmc(pmu, msr))) {
u64 val = pmc_read_counter(pmc);
msr_info->data =
val & pmu->counter_bitmask[KVM_PMC_FIXED];
return 0;
break;
} else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
msr_info->data = pmc->eventsel;
return 0;
} else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, true))
return 0;
break;
} else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, true)) {
break;
}
return 1;
}
return 1;
return 0;
}
static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
@ -402,44 +404,43 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
switch (msr) {
case MSR_CORE_PERF_FIXED_CTR_CTRL:
if (pmu->fixed_ctr_ctrl == data)
return 0;
if (!(data & pmu->fixed_ctr_ctrl_mask)) {
if (data & pmu->fixed_ctr_ctrl_mask)
return 1;
if (pmu->fixed_ctr_ctrl != data)
reprogram_fixed_counters(pmu, data);
return 0;
}
break;
case MSR_CORE_PERF_GLOBAL_STATUS:
if (msr_info->host_initiated) {
pmu->global_status = data;
return 0;
}
break; /* RO MSR */
if (!msr_info->host_initiated)
return 1; /* RO MSR */
pmu->global_status = data;
break;
case MSR_CORE_PERF_GLOBAL_CTRL:
if (pmu->global_ctrl == data)
return 0;
if (kvm_valid_perf_global_ctrl(pmu, data)) {
if (!kvm_valid_perf_global_ctrl(pmu, data))
return 1;
if (pmu->global_ctrl != data) {
diff = pmu->global_ctrl ^ data;
pmu->global_ctrl = data;
reprogram_counters(pmu, diff);
return 0;
}
break;
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
if (!(data & pmu->global_ovf_ctrl_mask)) {
if (!msr_info->host_initiated)
pmu->global_status &= ~data;
return 0;
}
if (data & pmu->global_ovf_ctrl_mask)
return 1;
if (!msr_info->host_initiated)
pmu->global_status &= ~data;
break;
case MSR_IA32_PEBS_ENABLE:
if (pmu->pebs_enable == data)
return 0;
if (!(data & pmu->pebs_enable_mask)) {
if (data & pmu->pebs_enable_mask)
return 1;
if (pmu->pebs_enable != data) {
diff = pmu->pebs_enable ^ data;
pmu->pebs_enable = data;
reprogram_counters(pmu, diff);
return 0;
}
break;
case MSR_IA32_DS_AREA:
@ -447,15 +448,14 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
if (is_noncanonical_address(data, vcpu))
return 1;
pmu->ds_area = data;
return 0;
break;
case MSR_PEBS_DATA_CFG:
if (pmu->pebs_data_cfg == data)
return 0;
if (!(data & pmu->pebs_data_cfg_mask)) {
pmu->pebs_data_cfg = data;
return 0;
}
if (data & pmu->pebs_data_cfg_mask)
return 1;
pmu->pebs_data_cfg = data;
break;
default:
if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
@ -463,33 +463,38 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if ((msr & MSR_PMC_FULL_WIDTH_BIT) &&
(data & ~pmu->counter_bitmask[KVM_PMC_GP]))
return 1;
if (!msr_info->host_initiated &&
!(msr & MSR_PMC_FULL_WIDTH_BIT))
data = (s64)(s32)data;
pmc->counter += data - pmc_read_counter(pmc);
pmc_update_sample_period(pmc);
return 0;
break;
} else if ((pmc = get_fixed_pmc(pmu, msr))) {
pmc->counter += data - pmc_read_counter(pmc);
pmc_update_sample_period(pmc);
return 0;
break;
} else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
if (data == pmc->eventsel)
return 0;
reserved_bits = pmu->reserved_bits;
if ((pmc->idx == 2) &&
(pmu->raw_event_mask & HSW_IN_TX_CHECKPOINTED))
reserved_bits ^= HSW_IN_TX_CHECKPOINTED;
if (!(data & reserved_bits)) {
if (data & reserved_bits)
return 1;
if (data != pmc->eventsel) {
pmc->eventsel = data;
kvm_pmu_request_counter_reprogam(pmc);
return 0;
kvm_pmu_request_counter_reprogram(pmc);
}
} else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, false))
return 0;
break;
} else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, false)) {
break;
}
/* Not a known PMU MSR. */
return 1;
}
return 1;
return 0;
}
static void setup_fixed_pmc_eventsel(struct kvm_pmu *pmu)
@ -531,6 +536,16 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->pebs_enable_mask = ~0ull;
pmu->pebs_data_cfg_mask = ~0ull;
memset(&lbr_desc->records, 0, sizeof(lbr_desc->records));
/*
* Setting passthrough of LBR MSRs is done only in the VM-Entry loop,
* and PMU refresh is disallowed after the vCPU has run, i.e. this code
* should never be reached while KVM is passing through MSRs.
*/
if (KVM_BUG_ON(lbr_desc->msr_passthrough, vcpu->kvm))
return;
entry = kvm_find_cpuid_entry(vcpu, 0xa);
if (!entry || !vcpu->kvm->arch.enable_pmu)
return;

Просмотреть файл

@ -1946,7 +1946,7 @@ static inline bool is_vmx_feature_control_msr_valid(struct vcpu_vmx *vmx,
static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
{
switch (msr->index) {
case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
if (!nested)
return 1;
return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data);
@ -2031,7 +2031,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = to_vmx(vcpu)->msr_ia32_sgxlepubkeyhash
[msr_info->index - MSR_IA32_SGXLEPUBKEYHASH0];
break;
case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
if (!nested_vmx_allowed(vcpu))
return 1;
if (vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index,
@ -2340,7 +2340,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vmx->msr_ia32_sgxlepubkeyhash
[msr_index - MSR_IA32_SGXLEPUBKEYHASH0] = data;
break;
case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
if (!msr_info->host_initiated)
return 1; /* they are read-only */
if (!nested_vmx_allowed(vcpu))
@ -6930,7 +6930,7 @@ static bool vmx_has_emulated_msr(struct kvm *kvm, u32 index)
* real mode.
*/
return enable_unrestricted_guest || emulate_invalid_guest_state;
case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
return nested;
case MSR_AMD64_VIRT_SPEC_CTRL:
case MSR_AMD64_TSC_RATIO:
@ -7756,9 +7756,11 @@ static u64 vmx_get_perf_capabilities(void)
if (boot_cpu_has(X86_FEATURE_PDCM))
rdmsrl(MSR_IA32_PERF_CAPABILITIES, host_perf_cap);
x86_perf_get_lbr(&lbr);
if (lbr.nr)
perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT;
if (!cpu_feature_enabled(X86_FEATURE_ARCH_LBR)) {
x86_perf_get_lbr(&lbr);
if (lbr.nr)
perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT;
}
if (vmx_pebs_supported()) {
perf_cap |= host_perf_cap & PERF_CAP_PEBS_MASK;

Просмотреть файл

@ -1556,38 +1556,40 @@ static u32 emulated_msrs[ARRAY_SIZE(emulated_msrs_all)];
static unsigned num_emulated_msrs;
/*
* List of msr numbers which are used to expose MSR-based features that
* can be used by a hypervisor to validate requested CPU features.
* List of MSRs that control the existence of MSR-based features, i.e. MSRs
* that are effectively CPUID leafs. VMX MSRs are also included in the set of
* feature MSRs, but are handled separately to allow expedited lookups.
*/
static const u32 msr_based_features_all[] = {
MSR_IA32_VMX_BASIC,
MSR_IA32_VMX_TRUE_PINBASED_CTLS,
MSR_IA32_VMX_PINBASED_CTLS,
MSR_IA32_VMX_TRUE_PROCBASED_CTLS,
MSR_IA32_VMX_PROCBASED_CTLS,
MSR_IA32_VMX_TRUE_EXIT_CTLS,
MSR_IA32_VMX_EXIT_CTLS,
MSR_IA32_VMX_TRUE_ENTRY_CTLS,
MSR_IA32_VMX_ENTRY_CTLS,
MSR_IA32_VMX_MISC,
MSR_IA32_VMX_CR0_FIXED0,
MSR_IA32_VMX_CR0_FIXED1,
MSR_IA32_VMX_CR4_FIXED0,
MSR_IA32_VMX_CR4_FIXED1,
MSR_IA32_VMX_VMCS_ENUM,
MSR_IA32_VMX_PROCBASED_CTLS2,
MSR_IA32_VMX_EPT_VPID_CAP,
MSR_IA32_VMX_VMFUNC,
static const u32 msr_based_features_all_except_vmx[] = {
MSR_AMD64_DE_CFG,
MSR_IA32_UCODE_REV,
MSR_IA32_ARCH_CAPABILITIES,
MSR_IA32_PERF_CAPABILITIES,
};
static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)];
static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all_except_vmx) +
(KVM_LAST_EMULATED_VMX_MSR - KVM_FIRST_EMULATED_VMX_MSR + 1)];
static unsigned int num_msr_based_features;
/*
* All feature MSRs except uCode revID, which tracks the currently loaded uCode
* patch, are immutable once the vCPU model is defined.
*/
static bool kvm_is_immutable_feature_msr(u32 msr)
{
int i;
if (msr >= KVM_FIRST_EMULATED_VMX_MSR && msr <= KVM_LAST_EMULATED_VMX_MSR)
return true;
for (i = 0; i < ARRAY_SIZE(msr_based_features_all_except_vmx); i++) {
if (msr == msr_based_features_all_except_vmx[i])
return msr != MSR_IA32_UCODE_REV;
}
return false;
}
/*
* Some IA32_ARCH_CAPABILITIES bits have dependencies on MSRs that KVM
* does not yet virtualize. These include:
@ -2205,6 +2207,22 @@ static int do_get_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
{
u64 val;
/*
* Disallow writes to immutable feature MSRs after KVM_RUN. KVM does
* not support modifying the guest vCPU model on the fly, e.g. changing
* the nVMX capabilities while L2 is running is nonsensical. Ignore
* writes of the same value, e.g. to allow userspace to blindly stuff
* all MSRs when emulating RESET.
*/
if (kvm_vcpu_has_run(vcpu) && kvm_is_immutable_feature_msr(index)) {
if (do_get_msr(vcpu, index, &val) || *data != val)
return -EINVAL;
return 0;
}
return kvm_set_msr_ignored_check(vcpu, index, *data, true);
}
@ -3627,9 +3645,17 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (data & ~kvm_caps.supported_perf_cap)
return 1;
/*
* Note, this is not just a performance optimization! KVM
* disallows changing feature MSRs after the vCPU has run; PMU
* refresh will bug the VM if called after the vCPU has run.
*/
if (vcpu->arch.perf_capabilities == data)
break;
vcpu->arch.perf_capabilities = data;
kvm_pmu_refresh(vcpu);
return 0;
break;
case MSR_IA32_PRED_CMD:
if (!msr_info->host_initiated && !guest_has_pred_cmd_msr(vcpu))
return 1;
@ -7045,6 +7071,18 @@ out:
return r;
}
static void kvm_probe_feature_msr(u32 msr_index)
{
struct kvm_msr_entry msr = {
.index = msr_index,
};
if (kvm_get_msr_feature(&msr))
return;
msr_based_features[num_msr_based_features++] = msr_index;
}
static void kvm_probe_msr_to_save(u32 msr_index)
{
u32 dummy[2];
@ -7120,7 +7158,7 @@ static void kvm_probe_msr_to_save(u32 msr_index)
msrs_to_save[num_msrs_to_save++] = msr_index;
}
static void kvm_init_msr_list(void)
static void kvm_init_msr_lists(void)
{
unsigned i;
@ -7146,15 +7184,11 @@ static void kvm_init_msr_list(void)
emulated_msrs[num_emulated_msrs++] = emulated_msrs_all[i];
}
for (i = 0; i < ARRAY_SIZE(msr_based_features_all); i++) {
struct kvm_msr_entry msr;
for (i = KVM_FIRST_EMULATED_VMX_MSR; i <= KVM_LAST_EMULATED_VMX_MSR; i++)
kvm_probe_feature_msr(i);
msr.index = msr_based_features_all[i];
if (kvm_get_msr_feature(&msr))
continue;
msr_based_features[num_msr_based_features++] = msr_based_features_all[i];
}
for (i = 0; i < ARRAY_SIZE(msr_based_features_all_except_vmx); i++)
kvm_probe_feature_msr(msr_based_features_all_except_vmx[i]);
}
static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
@ -9488,7 +9522,7 @@ static int __kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
kvm_caps.max_guest_tsc_khz = max;
}
kvm_caps.default_tsc_scaling_ratio = 1ULL << kvm_caps.tsc_scaling_ratio_frac_bits;
kvm_init_msr_list();
kvm_init_msr_lists();
return 0;
out_unwind_ops:

Просмотреть файл

@ -40,6 +40,14 @@ void kvm_spurious_fault(void);
failed; \
})
/*
* The first...last VMX feature MSRs that are emulated by KVM. This may or may
* not cover all known VMX MSRs, as KVM doesn't emulate an MSR until there's an
* associated feature that KVM supports for nested virtualization.
*/
#define KVM_FIRST_EMULATED_VMX_MSR MSR_IA32_VMX_BASIC
#define KVM_LAST_EMULATED_VMX_MSR MSR_IA32_VMX_VMFUNC
#define KVM_DEFAULT_PLE_GAP 128
#define KVM_VMX_DEFAULT_PLE_WINDOW 4096
#define KVM_DEFAULT_PLE_WINDOW_GROW 2
@ -83,6 +91,11 @@ static inline unsigned int __shrink_ple_window(unsigned int val,
void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu);
int kvm_check_nested_events(struct kvm_vcpu *vcpu);
static inline bool kvm_vcpu_has_run(struct kvm_vcpu *vcpu)
{
return vcpu->arch.last_vmentry_cpu != -1;
}
static inline bool kvm_is_exception_pending(struct kvm_vcpu *vcpu)
{
return vcpu->arch.exception.pending ||

Просмотреть файл

@ -928,14 +928,45 @@ static inline void vcpu_clear_cpuid_feature(struct kvm_vcpu *vcpu,
uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index);
int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value);
static inline void vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index,
uint64_t msr_value)
{
int r = _vcpu_set_msr(vcpu, msr_index, msr_value);
/*
* Assert on an MSR access(es) and pretty print the MSR name when possible.
* Note, the caller provides the stringified name so that the name of macro is
* printed, not the value the macro resolves to (due to macro expansion).
*/
#define TEST_ASSERT_MSR(cond, fmt, msr, str, args...) \
do { \
if (__builtin_constant_p(msr)) { \
TEST_ASSERT(cond, fmt, str, args); \
} else if (!(cond)) { \
char buf[16]; \
\
snprintf(buf, sizeof(buf), "MSR 0x%x", msr); \
TEST_ASSERT(cond, fmt, buf, args); \
} \
} while (0)
TEST_ASSERT(r == 1, KVM_IOCTL_ERROR(KVM_SET_MSRS, r));
/*
* Returns true if KVM should return the last written value when reading an MSR
* from userspace, e.g. the MSR isn't a command MSR, doesn't emulate state that
* is changing, etc. This is NOT an exhaustive list! The intent is to filter
* out MSRs that are not durable _and_ that a selftest wants to write.
*/
static inline bool is_durable_msr(uint32_t msr)
{
return msr != MSR_IA32_TSC;
}
#define vcpu_set_msr(vcpu, msr, val) \
do { \
uint64_t r, v = val; \
\
TEST_ASSERT_MSR(_vcpu_set_msr(vcpu, msr, v) == 1, \
"KVM_SET_MSRS failed on %s, value = 0x%lx", msr, #msr, v); \
if (!is_durable_msr(msr)) \
break; \
r = vcpu_get_msr(vcpu, msr); \
TEST_ASSERT_MSR(r == v, "Set %s to '0x%lx', got back '0x%lx'", msr, #msr, v, r);\
} while (0)
void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits);
bool vm_is_unrestricted_guest(struct kvm_vm *vm);

Просмотреть файл

@ -54,6 +54,21 @@
#define AMD_ZEN_BR_RETIRED EVENT(0xc2, 0)
/*
* "Retired instructions", from Processor Programming Reference
* (PPR) for AMD Family 17h Model 01h, Revision B1 Processors,
* Preliminary Processor Programming Reference (PPR) for AMD Family
* 17h Model 31h, Revision B0 Processors, and Preliminary Processor
* Programming Reference (PPR) for AMD Family 19h Model 01h, Revision
* B1 Processors Volume 1 of 2.
* --- and ---
* "Instructions retired", from the Intel SDM, volume 3,
* "Pre-defined Architectural Performance Events."
*/
#define INST_RETIRED EVENT(0xc0, 0)
/*
* This event list comprises Intel's eight architectural events plus
* AMD's "retired branch instructions" for Zen[123] (and possibly
@ -61,7 +76,7 @@
*/
static const uint64_t event_list[] = {
EVENT(0x3c, 0),
EVENT(0xc0, 0),
INST_RETIRED,
EVENT(0x3c, 1),
EVENT(0x2e, 0x4f),
EVENT(0x2e, 0x41),
@ -71,13 +86,21 @@ static const uint64_t event_list[] = {
AMD_ZEN_BR_RETIRED,
};
struct {
uint64_t loads;
uint64_t stores;
uint64_t loads_stores;
uint64_t branches_retired;
uint64_t instructions_retired;
} pmc_results;
/*
* If we encounter a #GP during the guest PMU sanity check, then the guest
* PMU is not functional. Inform the hypervisor via GUEST_SYNC(0).
*/
static void guest_gp_handler(struct ex_regs *regs)
{
GUEST_SYNC(0);
GUEST_SYNC(-EFAULT);
}
/*
@ -92,12 +115,23 @@ static void check_msr(uint32_t msr, uint64_t bits_to_flip)
wrmsr(msr, v);
if (rdmsr(msr) != v)
GUEST_SYNC(0);
GUEST_SYNC(-EIO);
v ^= bits_to_flip;
wrmsr(msr, v);
if (rdmsr(msr) != v)
GUEST_SYNC(0);
GUEST_SYNC(-EIO);
}
static void run_and_measure_loop(uint32_t msr_base)
{
const uint64_t branches_retired = rdmsr(msr_base + 0);
const uint64_t insn_retired = rdmsr(msr_base + 1);
__asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
pmc_results.branches_retired = rdmsr(msr_base + 0) - branches_retired;
pmc_results.instructions_retired = rdmsr(msr_base + 1) - insn_retired;
}
static void intel_guest_code(void)
@ -105,19 +139,18 @@ static void intel_guest_code(void)
check_msr(MSR_CORE_PERF_GLOBAL_CTRL, 1);
check_msr(MSR_P6_EVNTSEL0, 0xffff);
check_msr(MSR_IA32_PMC0, 0xffff);
GUEST_SYNC(1);
GUEST_SYNC(0);
for (;;) {
uint64_t br0, br1;
wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
wrmsr(MSR_P6_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
ARCH_PERFMON_EVENTSEL_OS | INTEL_BR_RETIRED);
wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 1);
br0 = rdmsr(MSR_IA32_PMC0);
__asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
br1 = rdmsr(MSR_IA32_PMC0);
GUEST_SYNC(br1 - br0);
wrmsr(MSR_P6_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE |
ARCH_PERFMON_EVENTSEL_OS | INST_RETIRED);
wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0x3);
run_and_measure_loop(MSR_IA32_PMC0);
GUEST_SYNC(0);
}
}
@ -130,18 +163,17 @@ static void amd_guest_code(void)
{
check_msr(MSR_K7_EVNTSEL0, 0xffff);
check_msr(MSR_K7_PERFCTR0, 0xffff);
GUEST_SYNC(1);
GUEST_SYNC(0);
for (;;) {
uint64_t br0, br1;
wrmsr(MSR_K7_EVNTSEL0, 0);
wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_BR_RETIRED);
br0 = rdmsr(MSR_K7_PERFCTR0);
__asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
br1 = rdmsr(MSR_K7_PERFCTR0);
GUEST_SYNC(br1 - br0);
wrmsr(MSR_K7_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE |
ARCH_PERFMON_EVENTSEL_OS | INST_RETIRED);
run_and_measure_loop(MSR_K7_PERFCTR0);
GUEST_SYNC(0);
}
}
@ -161,6 +193,19 @@ static uint64_t run_vcpu_to_sync(struct kvm_vcpu *vcpu)
return uc.args[1];
}
static void run_vcpu_and_sync_pmc_results(struct kvm_vcpu *vcpu)
{
uint64_t r;
memset(&pmc_results, 0, sizeof(pmc_results));
sync_global_to_guest(vcpu->vm, pmc_results);
r = run_vcpu_to_sync(vcpu);
TEST_ASSERT(!r, "Unexpected sync value: 0x%lx", r);
sync_global_from_guest(vcpu->vm, pmc_results);
}
/*
* In a nested environment or if the vPMU is disabled, the guest PMU
* might not work as architected (accessing the PMU MSRs may raise
@ -171,13 +216,13 @@ static uint64_t run_vcpu_to_sync(struct kvm_vcpu *vcpu)
*/
static bool sanity_check_pmu(struct kvm_vcpu *vcpu)
{
bool success;
uint64_t r;
vm_install_exception_handler(vcpu->vm, GP_VECTOR, guest_gp_handler);
success = run_vcpu_to_sync(vcpu);
r = run_vcpu_to_sync(vcpu);
vm_install_exception_handler(vcpu->vm, GP_VECTOR, NULL);
return success;
return !r;
}
static struct kvm_pmu_event_filter *alloc_pmu_event_filter(uint32_t nevents)
@ -237,91 +282,101 @@ static struct kvm_pmu_event_filter *remove_event(struct kvm_pmu_event_filter *f,
return f;
}
#define ASSERT_PMC_COUNTING_INSTRUCTIONS() \
do { \
uint64_t br = pmc_results.branches_retired; \
uint64_t ir = pmc_results.instructions_retired; \
\
if (br && br != NUM_BRANCHES) \
pr_info("%s: Branch instructions retired = %lu (expected %u)\n", \
__func__, br, NUM_BRANCHES); \
TEST_ASSERT(br, "%s: Branch instructions retired = %lu (expected > 0)", \
__func__, br); \
TEST_ASSERT(ir, "%s: Instructions retired = %lu (expected > 0)", \
__func__, ir); \
} while (0)
#define ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS() \
do { \
uint64_t br = pmc_results.branches_retired; \
uint64_t ir = pmc_results.instructions_retired; \
\
TEST_ASSERT(!br, "%s: Branch instructions retired = %lu (expected 0)", \
__func__, br); \
TEST_ASSERT(!ir, "%s: Instructions retired = %lu (expected 0)", \
__func__, ir); \
} while (0)
static void test_without_filter(struct kvm_vcpu *vcpu)
{
uint64_t count = run_vcpu_to_sync(vcpu);
run_vcpu_and_sync_pmc_results(vcpu);
if (count != NUM_BRANCHES)
pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
__func__, count, NUM_BRANCHES);
TEST_ASSERT(count, "Allowed PMU event is not counting");
ASSERT_PMC_COUNTING_INSTRUCTIONS();
}
static uint64_t test_with_filter(struct kvm_vcpu *vcpu,
struct kvm_pmu_event_filter *f)
static void test_with_filter(struct kvm_vcpu *vcpu,
struct kvm_pmu_event_filter *f)
{
vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f);
return run_vcpu_to_sync(vcpu);
run_vcpu_and_sync_pmc_results(vcpu);
}
static void test_amd_deny_list(struct kvm_vcpu *vcpu)
{
uint64_t event = EVENT(0x1C2, 0);
struct kvm_pmu_event_filter *f;
uint64_t count;
f = create_pmu_event_filter(&event, 1, KVM_PMU_EVENT_DENY, 0);
count = test_with_filter(vcpu, f);
test_with_filter(vcpu, f);
free(f);
if (count != NUM_BRANCHES)
pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
__func__, count, NUM_BRANCHES);
TEST_ASSERT(count, "Allowed PMU event is not counting");
ASSERT_PMC_COUNTING_INSTRUCTIONS();
}
static void test_member_deny_list(struct kvm_vcpu *vcpu)
{
struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY);
uint64_t count = test_with_filter(vcpu, f);
test_with_filter(vcpu, f);
free(f);
if (count)
pr_info("%s: Branch instructions retired = %lu (expected 0)\n",
__func__, count);
TEST_ASSERT(!count, "Disallowed PMU Event is counting");
ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS();
}
static void test_member_allow_list(struct kvm_vcpu *vcpu)
{
struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW);
uint64_t count = test_with_filter(vcpu, f);
test_with_filter(vcpu, f);
free(f);
if (count != NUM_BRANCHES)
pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
__func__, count, NUM_BRANCHES);
TEST_ASSERT(count, "Allowed PMU event is not counting");
ASSERT_PMC_COUNTING_INSTRUCTIONS();
}
static void test_not_member_deny_list(struct kvm_vcpu *vcpu)
{
struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY);
uint64_t count;
remove_event(f, INST_RETIRED);
remove_event(f, INTEL_BR_RETIRED);
remove_event(f, AMD_ZEN_BR_RETIRED);
count = test_with_filter(vcpu, f);
test_with_filter(vcpu, f);
free(f);
if (count != NUM_BRANCHES)
pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
__func__, count, NUM_BRANCHES);
TEST_ASSERT(count, "Allowed PMU event is not counting");
ASSERT_PMC_COUNTING_INSTRUCTIONS();
}
static void test_not_member_allow_list(struct kvm_vcpu *vcpu)
{
struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW);
uint64_t count;
remove_event(f, INST_RETIRED);
remove_event(f, INTEL_BR_RETIRED);
remove_event(f, AMD_ZEN_BR_RETIRED);
count = test_with_filter(vcpu, f);
test_with_filter(vcpu, f);
free(f);
if (count)
pr_info("%s: Branch instructions retired = %lu (expected 0)\n",
__func__, count);
TEST_ASSERT(!count, "Disallowed PMU Event is counting");
ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS();
}
/*
@ -450,51 +505,30 @@ static bool supports_event_mem_inst_retired(void)
#define EXCLUDE_MASKED_ENTRY(event_select, mask, match) \
KVM_PMU_ENCODE_MASKED_ENTRY(event_select, mask, match, true)
struct perf_counter {
union {
uint64_t raw;
struct {
uint64_t loads:22;
uint64_t stores:22;
uint64_t loads_stores:20;
};
};
};
static uint64_t masked_events_guest_test(uint32_t msr_base)
static void masked_events_guest_test(uint32_t msr_base)
{
uint64_t ld0, ld1, st0, st1, ls0, ls1;
struct perf_counter c;
int val;
/*
* The acutal value of the counters don't determine the outcome of
* The actual value of the counters don't determine the outcome of
* the test. Only that they are zero or non-zero.
*/
ld0 = rdmsr(msr_base + 0);
st0 = rdmsr(msr_base + 1);
ls0 = rdmsr(msr_base + 2);
const uint64_t loads = rdmsr(msr_base + 0);
const uint64_t stores = rdmsr(msr_base + 1);
const uint64_t loads_stores = rdmsr(msr_base + 2);
int val;
__asm__ __volatile__("movl $0, %[v];"
"movl %[v], %%eax;"
"incl %[v];"
: [v]"+m"(val) :: "eax");
ld1 = rdmsr(msr_base + 0);
st1 = rdmsr(msr_base + 1);
ls1 = rdmsr(msr_base + 2);
c.loads = ld1 - ld0;
c.stores = st1 - st0;
c.loads_stores = ls1 - ls0;
return c.raw;
pmc_results.loads = rdmsr(msr_base + 0) - loads;
pmc_results.stores = rdmsr(msr_base + 1) - stores;
pmc_results.loads_stores = rdmsr(msr_base + 2) - loads_stores;
}
static void intel_masked_events_guest_code(void)
{
uint64_t r;
for (;;) {
wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
@ -507,16 +541,13 @@ static void intel_masked_events_guest_code(void)
wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0x7);
r = masked_events_guest_test(MSR_IA32_PMC0);
GUEST_SYNC(r);
masked_events_guest_test(MSR_IA32_PMC0);
GUEST_SYNC(0);
}
}
static void amd_masked_events_guest_code(void)
{
uint64_t r;
for (;;) {
wrmsr(MSR_K7_EVNTSEL0, 0);
wrmsr(MSR_K7_EVNTSEL1, 0);
@ -529,26 +560,22 @@ static void amd_masked_events_guest_code(void)
wrmsr(MSR_K7_EVNTSEL2, ARCH_PERFMON_EVENTSEL_ENABLE |
ARCH_PERFMON_EVENTSEL_OS | LS_DISPATCH_LOAD_STORE);
r = masked_events_guest_test(MSR_K7_PERFCTR0);
GUEST_SYNC(r);
masked_events_guest_test(MSR_K7_PERFCTR0);
GUEST_SYNC(0);
}
}
static struct perf_counter run_masked_events_test(struct kvm_vcpu *vcpu,
const uint64_t masked_events[],
const int nmasked_events)
static void run_masked_events_test(struct kvm_vcpu *vcpu,
const uint64_t masked_events[],
const int nmasked_events)
{
struct kvm_pmu_event_filter *f;
struct perf_counter r;
f = create_pmu_event_filter(masked_events, nmasked_events,
KVM_PMU_EVENT_ALLOW,
KVM_PMU_EVENT_FLAG_MASKED_EVENTS);
r.raw = test_with_filter(vcpu, f);
test_with_filter(vcpu, f);
free(f);
return r;
}
/* Matches KVM_PMU_EVENT_FILTER_MAX_EVENTS in pmu.c */
@ -673,7 +700,6 @@ static void run_masked_events_tests(struct kvm_vcpu *vcpu, uint64_t *events,
int nevents)
{
int ntests = ARRAY_SIZE(test_cases);
struct perf_counter c;
int i, n;
for (i = 0; i < ntests; i++) {
@ -685,13 +711,15 @@ static void run_masked_events_tests(struct kvm_vcpu *vcpu, uint64_t *events,
n = append_test_events(test, events, nevents);
c = run_masked_events_test(vcpu, events, n);
TEST_ASSERT(bool_eq(c.loads, test->flags & ALLOW_LOADS) &&
bool_eq(c.stores, test->flags & ALLOW_STORES) &&
bool_eq(c.loads_stores,
run_masked_events_test(vcpu, events, n);
TEST_ASSERT(bool_eq(pmc_results.loads, test->flags & ALLOW_LOADS) &&
bool_eq(pmc_results.stores, test->flags & ALLOW_STORES) &&
bool_eq(pmc_results.loads_stores,
test->flags & ALLOW_LOADS_STORES),
"%s loads: %u, stores: %u, loads + stores: %u",
test->msg, c.loads, c.stores, c.loads_stores);
"%s loads: %lu, stores: %lu, loads + stores: %lu",
test->msg, pmc_results.loads, pmc_results.stores,
pmc_results.loads_stores);
}
}

Просмотреть файл

@ -14,12 +14,11 @@
#define _GNU_SOURCE /* for program_invocation_short_name */
#include <sys/ioctl.h>
#include <linux/bitmap.h>
#include "kvm_util.h"
#include "vmx.h"
#define PMU_CAP_FW_WRITES (1ULL << 13)
#define PMU_CAP_LBR_FMT 0x3f
union perf_capabilities {
struct {
u64 lbr_format:6;
@ -36,59 +35,220 @@ union perf_capabilities {
u64 capabilities;
};
static void guest_code(void)
/*
* The LBR format and most PEBS features are immutable, all other features are
* fungible (if supported by the host and KVM).
*/
static const union perf_capabilities immutable_caps = {
.lbr_format = -1,
.pebs_trap = 1,
.pebs_arch_reg = 1,
.pebs_format = -1,
.pebs_baseline = 1,
};
static const union perf_capabilities format_caps = {
.lbr_format = -1,
.pebs_format = -1,
};
static void guest_code(uint64_t current_val)
{
wrmsr(MSR_IA32_PERF_CAPABILITIES, PMU_CAP_LBR_FMT);
uint8_t vector;
int i;
vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, current_val);
GUEST_ASSERT_2(vector == GP_VECTOR, current_val, vector);
vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, 0);
GUEST_ASSERT_2(vector == GP_VECTOR, 0, vector);
for (i = 0; i < 64; i++) {
vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES,
current_val ^ BIT_ULL(i));
GUEST_ASSERT_2(vector == GP_VECTOR,
current_val ^ BIT_ULL(i), vector);
}
GUEST_DONE();
}
/*
* Verify that guest WRMSRs to PERF_CAPABILITIES #GP regardless of the value
* written, that the guest always sees the userspace controlled value, and that
* PERF_CAPABILITIES is immutable after KVM_RUN.
*/
static void test_guest_wrmsr_perf_capabilities(union perf_capabilities host_cap)
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm = vm_create_with_one_vcpu(&vcpu, guest_code);
struct ucall uc;
int r, i;
vm_init_descriptor_tables(vm);
vcpu_init_descriptor_tables(vcpu);
vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
vcpu_args_set(vcpu, 1, host_cap.capabilities);
vcpu_run(vcpu);
switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
REPORT_GUEST_ASSERT_2(uc, "val = 0x%lx, vector = %lu");
break;
case UCALL_DONE:
break;
default:
TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
}
ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), host_cap.capabilities);
vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0);
TEST_ASSERT(!r, "Post-KVM_RUN write '0' didn't fail");
for (i = 0; i < 64; i++) {
r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES,
host_cap.capabilities ^ BIT_ULL(i));
TEST_ASSERT(!r, "Post-KVM_RUN write '0x%llx'didn't fail",
host_cap.capabilities ^ BIT_ULL(i));
}
kvm_vm_free(vm);
}
/*
* Verify KVM allows writing PERF_CAPABILITIES with all KVM-supported features
* enabled, as well as '0' (to disable all features).
*/
static void test_basic_perf_capabilities(union perf_capabilities host_cap)
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm = vm_create_with_one_vcpu(&vcpu, NULL);
vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0);
vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
kvm_vm_free(vm);
}
static void test_fungible_perf_capabilities(union perf_capabilities host_cap)
{
const uint64_t fungible_caps = host_cap.capabilities & ~immutable_caps.capabilities;
struct kvm_vcpu *vcpu;
struct kvm_vm *vm = vm_create_with_one_vcpu(&vcpu, NULL);
int bit;
for_each_set_bit(bit, &fungible_caps, 64) {
vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, BIT_ULL(bit));
vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES,
host_cap.capabilities & ~BIT_ULL(bit));
}
vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
kvm_vm_free(vm);
}
/*
* Verify KVM rejects attempts to set unsupported and/or immutable features in
* PERF_CAPABILITIES. Note, LBR format and PEBS format need to be validated
* separately as they are multi-bit values, e.g. toggling or setting a single
* bit can generate a false positive without dedicated safeguards.
*/
static void test_immutable_perf_capabilities(union perf_capabilities host_cap)
{
const uint64_t reserved_caps = (~host_cap.capabilities |
immutable_caps.capabilities) &
~format_caps.capabilities;
struct kvm_vcpu *vcpu;
struct kvm_vm *vm = vm_create_with_one_vcpu(&vcpu, NULL);
union perf_capabilities val = host_cap;
int r, bit;
for_each_set_bit(bit, &reserved_caps, 64) {
r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES,
host_cap.capabilities ^ BIT_ULL(bit));
TEST_ASSERT(!r, "%s immutable feature 0x%llx (bit %d) didn't fail",
host_cap.capabilities & BIT_ULL(bit) ? "Setting" : "Clearing",
BIT_ULL(bit), bit);
}
/*
* KVM only supports the host's native LBR format, as well as '0' (to
* disable LBR support). Verify KVM rejects all other LBR formats.
*/
for (val.lbr_format = 1; val.lbr_format; val.lbr_format++) {
if (val.lbr_format == host_cap.lbr_format)
continue;
r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, val.capabilities);
TEST_ASSERT(!r, "Bad LBR FMT = 0x%x didn't fail, host = 0x%x",
val.lbr_format, host_cap.lbr_format);
}
/* Ditto for the PEBS format. */
for (val.pebs_format = 1; val.pebs_format; val.pebs_format++) {
if (val.pebs_format == host_cap.pebs_format)
continue;
r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, val.capabilities);
TEST_ASSERT(!r, "Bad PEBS FMT = 0x%x didn't fail, host = 0x%x",
val.pebs_format, host_cap.pebs_format);
}
kvm_vm_free(vm);
}
/*
* Test that LBR MSRs are writable when LBRs are enabled, and then verify that
* disabling the vPMU via CPUID also disables LBR support. Set bits 2:0 of
* LBR_TOS as those bits are writable across all uarch implementations (arch
* LBRs will need to poke a different MSR).
*/
static void test_lbr_perf_capabilities(union perf_capabilities host_cap)
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
int r;
if (!host_cap.lbr_format)
return;
vm = vm_create_with_one_vcpu(&vcpu, NULL);
vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
vcpu_set_msr(vcpu, MSR_LBR_TOS, 7);
vcpu_clear_cpuid_entry(vcpu, X86_PROPERTY_PMU_VERSION.function);
r = _vcpu_set_msr(vcpu, MSR_LBR_TOS, 7);
TEST_ASSERT(!r, "Writing LBR_TOS should fail after disabling vPMU");
kvm_vm_free(vm);
}
int main(int argc, char *argv[])
{
struct kvm_vm *vm;
struct kvm_vcpu *vcpu;
int ret;
union perf_capabilities host_cap;
uint64_t val;
host_cap.capabilities = kvm_get_feature_msr(MSR_IA32_PERF_CAPABILITIES);
host_cap.capabilities &= (PMU_CAP_FW_WRITES | PMU_CAP_LBR_FMT);
/* Create VM */
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_PDCM));
TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION));
TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0);
/* testcase 1, set capabilities when we have PDCM bit */
vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, PMU_CAP_FW_WRITES);
host_cap.capabilities = kvm_get_feature_msr(MSR_IA32_PERF_CAPABILITIES);
/* check capabilities can be retrieved with KVM_GET_MSR */
ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), PMU_CAP_FW_WRITES);
TEST_ASSERT(host_cap.full_width_write,
"Full-width writes should always be supported");
/* check whatever we write with KVM_SET_MSR is _not_ modified */
vcpu_run(vcpu);
ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), PMU_CAP_FW_WRITES);
/* testcase 2, check valid LBR formats are accepted */
vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0);
ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), 0);
vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.lbr_format);
ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), (u64)host_cap.lbr_format);
/*
* Testcase 3, check that an "invalid" LBR format is rejected. Only an
* exact match of the host's format (and 0/disabled) is allowed.
*/
for (val = 1; val <= PMU_CAP_LBR_FMT; val++) {
if (val == (host_cap.capabilities & PMU_CAP_LBR_FMT))
continue;
ret = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, val);
TEST_ASSERT(!ret, "Bad LBR FMT = 0x%lx didn't fail", val);
}
printf("Completed perf capability tests.\n");
kvm_vm_free(vm);
test_basic_perf_capabilities(host_cap);
test_fungible_perf_capabilities(host_cap);
test_immutable_perf_capabilities(host_cap);
test_guest_wrmsr_perf_capabilities(host_cap);
test_lbr_perf_capabilities(host_cap);
}