Merge branch 'kvm-late-6.1-fixes' into HEAD
x86: * several fixes to nested VMX execution controls * fixes and clarification to the documentation for Xen emulation * do not unnecessarily release a pmu event with zero period * MMU fixes * fix Coverity warning in kvm_hv_flush_tlb() selftests: * fixes for the ucall mechanism in selftests * other fixes mostly related to compilation with clang
This commit is contained in:
Коммит
a5496886eb
|
@ -5343,9 +5343,9 @@ KVM_XEN_ATTR_TYPE_SHARED_INFO
|
|||
32 vCPUs in the shared_info page, KVM does not automatically do so
|
||||
and instead requires that KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO be used
|
||||
explicitly even when the vcpu_info for a given vCPU resides at the
|
||||
"default" location in the shared_info page. This is because KVM is
|
||||
not aware of the Xen CPU id which is used as the index into the
|
||||
vcpu_info[] array, so cannot know the correct default location.
|
||||
"default" location in the shared_info page. This is because KVM may
|
||||
not be aware of the Xen CPU id which is used as the index into the
|
||||
vcpu_info[] array, so may know the correct default location.
|
||||
|
||||
Note that the shared info page may be constantly written to by KVM;
|
||||
it contains the event channel bitmap used to deliver interrupts to
|
||||
|
@ -5356,23 +5356,29 @@ KVM_XEN_ATTR_TYPE_SHARED_INFO
|
|||
any vCPU has been running or any event channel interrupts can be
|
||||
routed to the guest.
|
||||
|
||||
Setting the gfn to KVM_XEN_INVALID_GFN will disable the shared info
|
||||
page.
|
||||
|
||||
KVM_XEN_ATTR_TYPE_UPCALL_VECTOR
|
||||
Sets the exception vector used to deliver Xen event channel upcalls.
|
||||
This is the HVM-wide vector injected directly by the hypervisor
|
||||
(not through the local APIC), typically configured by a guest via
|
||||
HVM_PARAM_CALLBACK_IRQ.
|
||||
HVM_PARAM_CALLBACK_IRQ. This can be disabled again (e.g. for guest
|
||||
SHUTDOWN_soft_reset) by setting it to zero.
|
||||
|
||||
KVM_XEN_ATTR_TYPE_EVTCHN
|
||||
This attribute is available when the KVM_CAP_XEN_HVM ioctl indicates
|
||||
support for KVM_XEN_HVM_CONFIG_EVTCHN_SEND features. It configures
|
||||
an outbound port number for interception of EVTCHNOP_send requests
|
||||
from the guest. A given sending port number may be directed back
|
||||
to a specified vCPU (by APIC ID) / port / priority on the guest,
|
||||
or to trigger events on an eventfd. The vCPU and priority can be
|
||||
changed by setting KVM_XEN_EVTCHN_UPDATE in a subsequent call,
|
||||
but other fields cannot change for a given sending port. A port
|
||||
mapping is removed by using KVM_XEN_EVTCHN_DEASSIGN in the flags
|
||||
field.
|
||||
from the guest. A given sending port number may be directed back to
|
||||
a specified vCPU (by APIC ID) / port / priority on the guest, or to
|
||||
trigger events on an eventfd. The vCPU and priority can be changed
|
||||
by setting KVM_XEN_EVTCHN_UPDATE in a subsequent call, but but other
|
||||
fields cannot change for a given sending port. A port mapping is
|
||||
removed by using KVM_XEN_EVTCHN_DEASSIGN in the flags field. Passing
|
||||
KVM_XEN_EVTCHN_RESET in the flags field removes all interception of
|
||||
outbound event channels. The values of the flags field are mutually
|
||||
exclusive and cannot be combined as a bitmask.
|
||||
|
||||
KVM_XEN_ATTR_TYPE_XEN_VERSION
|
||||
This attribute is available when the KVM_CAP_XEN_HVM ioctl indicates
|
||||
|
@ -5388,7 +5394,7 @@ KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG
|
|||
support for KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG. It enables the
|
||||
XEN_RUNSTATE_UPDATE flag which allows guest vCPUs to safely read
|
||||
other vCPUs' vcpu_runstate_info. Xen guests enable this feature via
|
||||
the VM_ASST_TYPE_runstate_update_flag of the HYPERVISOR_vm_assist
|
||||
the VMASST_TYPE_runstate_update_flag of the HYPERVISOR_vm_assist
|
||||
hypercall.
|
||||
|
||||
4.127 KVM_XEN_HVM_GET_ATTR
|
||||
|
@ -5446,15 +5452,18 @@ KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO
|
|||
As with the shared_info page for the VM, the corresponding page may be
|
||||
dirtied at any time if event channel interrupt delivery is enabled, so
|
||||
userspace should always assume that the page is dirty without relying
|
||||
on dirty logging.
|
||||
on dirty logging. Setting the gpa to KVM_XEN_INVALID_GPA will disable
|
||||
the vcpu_info.
|
||||
|
||||
KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO
|
||||
Sets the guest physical address of an additional pvclock structure
|
||||
for a given vCPU. This is typically used for guest vsyscall support.
|
||||
Setting the gpa to KVM_XEN_INVALID_GPA will disable the structure.
|
||||
|
||||
KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR
|
||||
Sets the guest physical address of the vcpu_runstate_info for a given
|
||||
vCPU. This is how a Xen guest tracks CPU state such as steal time.
|
||||
Setting the gpa to KVM_XEN_INVALID_GPA will disable the runstate area.
|
||||
|
||||
KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT
|
||||
Sets the runstate (RUNSTATE_running/_runnable/_blocked/_offline) of
|
||||
|
@ -5487,7 +5496,8 @@ KVM_XEN_VCPU_ATTR_TYPE_TIMER
|
|||
This attribute is available when the KVM_CAP_XEN_HVM ioctl indicates
|
||||
support for KVM_XEN_HVM_CONFIG_EVTCHN_SEND features. It sets the
|
||||
event channel port/priority for the VIRQ_TIMER of the vCPU, as well
|
||||
as allowing a pending timer to be saved/restored.
|
||||
as allowing a pending timer to be saved/restored. Setting the timer
|
||||
port to zero disables kernel handling of the singleshot timer.
|
||||
|
||||
KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR
|
||||
This attribute is available when the KVM_CAP_XEN_HVM ioctl indicates
|
||||
|
@ -5495,7 +5505,8 @@ KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR
|
|||
per-vCPU local APIC upcall vector, configured by a Xen guest with
|
||||
the HVMOP_set_evtchn_upcall_vector hypercall. This is typically
|
||||
used by Windows guests, and is distinct from the HVM-wide upcall
|
||||
vector configured with HVM_PARAM_CALLBACK_IRQ.
|
||||
vector configured with HVM_PARAM_CALLBACK_IRQ. It is disabled by
|
||||
setting the vector to zero.
|
||||
|
||||
|
||||
4.129 KVM_XEN_VCPU_GET_ATTR
|
||||
|
@ -6577,11 +6588,6 @@ Please note that the kernel is allowed to use the kvm_run structure as the
|
|||
primary storage for certain register types. Therefore, the kernel may use the
|
||||
values in kvm_run even if the corresponding bit in kvm_dirty_regs is not set.
|
||||
|
||||
::
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
6. Capabilities that can be enabled on vCPUs
|
||||
============================================
|
||||
|
|
|
@ -16,17 +16,26 @@ The acquisition orders for mutexes are as follows:
|
|||
- kvm->slots_lock is taken outside kvm->irq_lock, though acquiring
|
||||
them together is quite rare.
|
||||
|
||||
- Unlike kvm->slots_lock, kvm->slots_arch_lock is released before
|
||||
synchronize_srcu(&kvm->srcu). Therefore kvm->slots_arch_lock
|
||||
can be taken inside a kvm->srcu read-side critical section,
|
||||
while kvm->slots_lock cannot.
|
||||
|
||||
- kvm->mn_active_invalidate_count ensures that pairs of
|
||||
invalidate_range_start() and invalidate_range_end() callbacks
|
||||
use the same memslots array. kvm->slots_lock and kvm->slots_arch_lock
|
||||
are taken on the waiting side in install_new_memslots, so MMU notifiers
|
||||
must not take either kvm->slots_lock or kvm->slots_arch_lock.
|
||||
|
||||
For SRCU:
|
||||
|
||||
- ``synchronize_srcu(&kvm->srcu)`` is called _inside_
|
||||
the kvm->slots_lock critical section, therefore kvm->slots_lock
|
||||
cannot be taken inside a kvm->srcu read-side critical section.
|
||||
Instead, kvm->slots_arch_lock is released before the call
|
||||
to ``synchronize_srcu()`` and _can_ be taken inside a
|
||||
kvm->srcu read-side critical section.
|
||||
|
||||
- kvm->lock is taken inside kvm->srcu, therefore
|
||||
``synchronize_srcu(&kvm->srcu)`` cannot be called inside
|
||||
a kvm->lock critical section. If you cannot delay the
|
||||
call until after kvm->lock is released, use ``call_srcu``.
|
||||
|
||||
On x86:
|
||||
|
||||
- vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock
|
||||
|
|
|
@ -11468,7 +11468,7 @@ F: arch/x86/kvm/hyperv.*
|
|||
F: arch/x86/kvm/kvm_onhyperv.*
|
||||
F: arch/x86/kvm/svm/hyperv.*
|
||||
F: arch/x86/kvm/svm/svm_onhyperv.*
|
||||
F: arch/x86/kvm/vmx/evmcs.*
|
||||
F: arch/x86/kvm/vmx/hyperv.*
|
||||
|
||||
KVM X86 Xen (KVM/Xen)
|
||||
M: David Woodhouse <dwmw2@infradead.org>
|
||||
|
|
|
@ -1769,6 +1769,7 @@ static bool hv_is_vp_in_sparse_set(u32 vp_id, u64 valid_bank_mask, u64 sparse_ba
|
|||
}
|
||||
|
||||
struct kvm_hv_hcall {
|
||||
/* Hypercall input data */
|
||||
u64 param;
|
||||
u64 ingpa;
|
||||
u64 outgpa;
|
||||
|
@ -1779,12 +1780,21 @@ struct kvm_hv_hcall {
|
|||
bool fast;
|
||||
bool rep;
|
||||
sse128_t xmm[HV_HYPERCALL_MAX_XMM_REGISTERS];
|
||||
|
||||
/*
|
||||
* Current read offset when KVM reads hypercall input data gradually,
|
||||
* either offset in bytes from 'ingpa' for regular hypercalls or the
|
||||
* number of already consumed 'XMM halves' for 'fast' hypercalls.
|
||||
*/
|
||||
union {
|
||||
gpa_t data_offset;
|
||||
int consumed_xmm_halves;
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
static int kvm_hv_get_hc_data(struct kvm *kvm, struct kvm_hv_hcall *hc,
|
||||
u16 orig_cnt, u16 cnt_cap, u64 *data,
|
||||
int consumed_xmm_halves, gpa_t offset)
|
||||
u16 orig_cnt, u16 cnt_cap, u64 *data)
|
||||
{
|
||||
/*
|
||||
* Preserve the original count when ignoring entries via a "cap", KVM
|
||||
|
@ -1799,11 +1809,11 @@ static int kvm_hv_get_hc_data(struct kvm *kvm, struct kvm_hv_hcall *hc,
|
|||
* Each XMM holds two sparse banks, but do not count halves that
|
||||
* have already been consumed for hypercall parameters.
|
||||
*/
|
||||
if (orig_cnt > 2 * HV_HYPERCALL_MAX_XMM_REGISTERS - consumed_xmm_halves)
|
||||
if (orig_cnt > 2 * HV_HYPERCALL_MAX_XMM_REGISTERS - hc->consumed_xmm_halves)
|
||||
return HV_STATUS_INVALID_HYPERCALL_INPUT;
|
||||
|
||||
for (i = 0; i < cnt; i++) {
|
||||
j = i + consumed_xmm_halves;
|
||||
j = i + hc->consumed_xmm_halves;
|
||||
if (j % 2)
|
||||
data[i] = sse128_hi(hc->xmm[j / 2]);
|
||||
else
|
||||
|
@ -1812,27 +1822,24 @@ static int kvm_hv_get_hc_data(struct kvm *kvm, struct kvm_hv_hcall *hc,
|
|||
return 0;
|
||||
}
|
||||
|
||||
return kvm_read_guest(kvm, hc->ingpa + offset, data,
|
||||
return kvm_read_guest(kvm, hc->ingpa + hc->data_offset, data,
|
||||
cnt * sizeof(*data));
|
||||
}
|
||||
|
||||
static u64 kvm_get_sparse_vp_set(struct kvm *kvm, struct kvm_hv_hcall *hc,
|
||||
u64 *sparse_banks, int consumed_xmm_halves,
|
||||
gpa_t offset)
|
||||
u64 *sparse_banks)
|
||||
{
|
||||
if (hc->var_cnt > HV_MAX_SPARSE_VCPU_BANKS)
|
||||
return -EINVAL;
|
||||
|
||||
/* Cap var_cnt to ignore banks that cannot contain a legal VP index. */
|
||||
return kvm_hv_get_hc_data(kvm, hc, hc->var_cnt, KVM_HV_MAX_SPARSE_VCPU_SET_BITS,
|
||||
sparse_banks, consumed_xmm_halves, offset);
|
||||
sparse_banks);
|
||||
}
|
||||
|
||||
static int kvm_hv_get_tlb_flush_entries(struct kvm *kvm, struct kvm_hv_hcall *hc, u64 entries[],
|
||||
int consumed_xmm_halves, gpa_t offset)
|
||||
static int kvm_hv_get_tlb_flush_entries(struct kvm *kvm, struct kvm_hv_hcall *hc, u64 entries[])
|
||||
{
|
||||
return kvm_hv_get_hc_data(kvm, hc, hc->rep_cnt, hc->rep_cnt,
|
||||
entries, consumed_xmm_halves, offset);
|
||||
return kvm_hv_get_hc_data(kvm, hc, hc->rep_cnt, hc->rep_cnt, entries);
|
||||
}
|
||||
|
||||
static void hv_tlb_flush_enqueue(struct kvm_vcpu *vcpu,
|
||||
|
@ -1926,8 +1933,6 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
|
|||
struct kvm_vcpu *v;
|
||||
unsigned long i;
|
||||
bool all_cpus;
|
||||
int consumed_xmm_halves = 0;
|
||||
gpa_t data_offset;
|
||||
|
||||
/*
|
||||
* The Hyper-V TLFS doesn't allow more than HV_MAX_SPARSE_VCPU_BANKS
|
||||
|
@ -1955,12 +1960,12 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
|
|||
flush.address_space = hc->ingpa;
|
||||
flush.flags = hc->outgpa;
|
||||
flush.processor_mask = sse128_lo(hc->xmm[0]);
|
||||
consumed_xmm_halves = 1;
|
||||
hc->consumed_xmm_halves = 1;
|
||||
} else {
|
||||
if (unlikely(kvm_read_guest(kvm, hc->ingpa,
|
||||
&flush, sizeof(flush))))
|
||||
return HV_STATUS_INVALID_HYPERCALL_INPUT;
|
||||
data_offset = sizeof(flush);
|
||||
hc->data_offset = sizeof(flush);
|
||||
}
|
||||
|
||||
trace_kvm_hv_flush_tlb(flush.processor_mask,
|
||||
|
@ -1985,12 +1990,12 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
|
|||
flush_ex.flags = hc->outgpa;
|
||||
memcpy(&flush_ex.hv_vp_set,
|
||||
&hc->xmm[0], sizeof(hc->xmm[0]));
|
||||
consumed_xmm_halves = 2;
|
||||
hc->consumed_xmm_halves = 2;
|
||||
} else {
|
||||
if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush_ex,
|
||||
sizeof(flush_ex))))
|
||||
return HV_STATUS_INVALID_HYPERCALL_INPUT;
|
||||
data_offset = sizeof(flush_ex);
|
||||
hc->data_offset = sizeof(flush_ex);
|
||||
}
|
||||
|
||||
trace_kvm_hv_flush_tlb_ex(flush_ex.hv_vp_set.valid_bank_mask,
|
||||
|
@ -2009,8 +2014,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
|
|||
if (!hc->var_cnt)
|
||||
goto ret_success;
|
||||
|
||||
if (kvm_get_sparse_vp_set(kvm, hc, sparse_banks,
|
||||
consumed_xmm_halves, data_offset))
|
||||
if (kvm_get_sparse_vp_set(kvm, hc, sparse_banks))
|
||||
return HV_STATUS_INVALID_HYPERCALL_INPUT;
|
||||
}
|
||||
|
||||
|
@ -2021,8 +2025,10 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
|
|||
* consumed_xmm_halves to make sure TLB flush entries are read
|
||||
* from the correct offset.
|
||||
*/
|
||||
data_offset += hc->var_cnt * sizeof(sparse_banks[0]);
|
||||
consumed_xmm_halves += hc->var_cnt;
|
||||
if (hc->fast)
|
||||
hc->consumed_xmm_halves += hc->var_cnt;
|
||||
else
|
||||
hc->data_offset += hc->var_cnt * sizeof(sparse_banks[0]);
|
||||
}
|
||||
|
||||
if (hc->code == HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE ||
|
||||
|
@ -2030,8 +2036,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
|
|||
hc->rep_cnt > ARRAY_SIZE(__tlb_flush_entries)) {
|
||||
tlb_flush_entries = NULL;
|
||||
} else {
|
||||
if (kvm_hv_get_tlb_flush_entries(kvm, hc, __tlb_flush_entries,
|
||||
consumed_xmm_halves, data_offset))
|
||||
if (kvm_hv_get_tlb_flush_entries(kvm, hc, __tlb_flush_entries))
|
||||
return HV_STATUS_INVALID_HYPERCALL_INPUT;
|
||||
tlb_flush_entries = __tlb_flush_entries;
|
||||
}
|
||||
|
@ -2180,9 +2185,13 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
|
|||
if (!hc->var_cnt)
|
||||
goto ret_success;
|
||||
|
||||
if (kvm_get_sparse_vp_set(kvm, hc, sparse_banks, 1,
|
||||
offsetof(struct hv_send_ipi_ex,
|
||||
vp_set.bank_contents)))
|
||||
if (!hc->fast)
|
||||
hc->data_offset = offsetof(struct hv_send_ipi_ex,
|
||||
vp_set.bank_contents);
|
||||
else
|
||||
hc->consumed_xmm_halves = 1;
|
||||
|
||||
if (kvm_get_sparse_vp_set(kvm, hc, sparse_banks))
|
||||
return HV_STATUS_INVALID_HYPERCALL_INPUT;
|
||||
}
|
||||
|
||||
|
|
|
@ -426,8 +426,9 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
|
|||
kvm_set_msi_irq(vcpu->kvm, entry, &irq);
|
||||
|
||||
if (irq.trig_mode &&
|
||||
kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT,
|
||||
irq.dest_id, irq.dest_mode))
|
||||
(kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT,
|
||||
irq.dest_id, irq.dest_mode) ||
|
||||
kvm_apic_pending_eoi(vcpu, irq.vector)))
|
||||
__set_bit(irq.vector, ioapic_handled_vectors);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -188,11 +188,11 @@ static inline bool lapic_in_kernel(struct kvm_vcpu *vcpu)
|
|||
|
||||
extern struct static_key_false_deferred apic_hw_disabled;
|
||||
|
||||
static inline int kvm_apic_hw_enabled(struct kvm_lapic *apic)
|
||||
static inline bool kvm_apic_hw_enabled(struct kvm_lapic *apic)
|
||||
{
|
||||
if (static_branch_unlikely(&apic_hw_disabled.key))
|
||||
return apic->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE;
|
||||
return MSR_IA32_APICBASE_ENABLE;
|
||||
return true;
|
||||
}
|
||||
|
||||
extern struct static_key_false_deferred apic_sw_disabled;
|
||||
|
|
|
@ -363,7 +363,7 @@ static __always_inline bool is_rsvd_spte(struct rsvd_bits_validate *rsvd_check,
|
|||
* A shadow-present leaf SPTE may be non-writable for 4 possible reasons:
|
||||
*
|
||||
* 1. To intercept writes for dirty logging. KVM write-protects huge pages
|
||||
* so that they can be split be split down into the dirty logging
|
||||
* so that they can be split down into the dirty logging
|
||||
* granularity (4KiB) whenever the guest writes to them. KVM also
|
||||
* write-protects 4KiB pages so that writes can be recorded in the dirty log
|
||||
* (e.g. if not using PML). SPTEs are write-protected for dirty logging
|
||||
|
|
|
@ -1074,7 +1074,9 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu,
|
|||
int ret = RET_PF_FIXED;
|
||||
bool wrprot = false;
|
||||
|
||||
WARN_ON(sp->role.level != fault->goal_level);
|
||||
if (WARN_ON_ONCE(sp->role.level != fault->goal_level))
|
||||
return RET_PF_RETRY;
|
||||
|
||||
if (unlikely(!fault->slot))
|
||||
new_spte = make_mmio_spte(vcpu, iter->gfn, ACC_ALL);
|
||||
else
|
||||
|
@ -1173,9 +1175,6 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
|
|||
if (fault->nx_huge_page_workaround_enabled)
|
||||
disallowed_hugepage_adjust(fault, iter.old_spte, iter.level);
|
||||
|
||||
if (iter.level == fault->goal_level)
|
||||
break;
|
||||
|
||||
/*
|
||||
* If SPTE has been frozen by another thread, just give up and
|
||||
* retry, avoiding unnecessary page table allocation and free.
|
||||
|
@ -1183,6 +1182,9 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
|
|||
if (is_removed_spte(iter.old_spte))
|
||||
goto retry;
|
||||
|
||||
if (iter.level == fault->goal_level)
|
||||
goto map_target_level;
|
||||
|
||||
/* Step down into the lower level page table if it exists. */
|
||||
if (is_shadow_present_pte(iter.old_spte) &&
|
||||
!is_large_pte(iter.old_spte))
|
||||
|
@ -1203,8 +1205,8 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
|
|||
r = tdp_mmu_link_sp(kvm, &iter, sp, true);
|
||||
|
||||
/*
|
||||
* Also force the guest to retry the access if the upper level SPTEs
|
||||
* aren't in place.
|
||||
* Force the guest to retry if installing an upper level SPTE
|
||||
* failed, e.g. because a different task modified the SPTE.
|
||||
*/
|
||||
if (r) {
|
||||
tdp_mmu_free_sp(sp);
|
||||
|
@ -1214,11 +1216,20 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
|
|||
if (fault->huge_page_disallowed &&
|
||||
fault->req_level >= iter.level) {
|
||||
spin_lock(&kvm->arch.tdp_mmu_pages_lock);
|
||||
track_possible_nx_huge_page(kvm, sp);
|
||||
if (sp->nx_huge_page_disallowed)
|
||||
track_possible_nx_huge_page(kvm, sp);
|
||||
spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The walk aborted before reaching the target level, e.g. because the
|
||||
* iterator detected an upper level SPTE was frozen during traversal.
|
||||
*/
|
||||
WARN_ON_ONCE(iter.level == fault->goal_level);
|
||||
goto retry;
|
||||
|
||||
map_target_level:
|
||||
ret = tdp_mmu_map_handle_target_level(vcpu, fault, &iter);
|
||||
|
||||
retry:
|
||||
|
|
|
@ -238,7 +238,8 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc)
|
|||
return false;
|
||||
|
||||
/* recalibrate sample period and check if it's accepted by perf core */
|
||||
if (perf_event_period(pmc->perf_event,
|
||||
if (is_sampling_event(pmc->perf_event) &&
|
||||
perf_event_period(pmc->perf_event,
|
||||
get_sample_period(pmc, pmc->counter)))
|
||||
return false;
|
||||
|
||||
|
|
|
@ -140,7 +140,8 @@ static inline u64 get_sample_period(struct kvm_pmc *pmc, u64 counter_value)
|
|||
|
||||
static inline void pmc_update_sample_period(struct kvm_pmc *pmc)
|
||||
{
|
||||
if (!pmc->perf_event || pmc->is_paused)
|
||||
if (!pmc->perf_event || pmc->is_paused ||
|
||||
!is_sampling_event(pmc->perf_event))
|
||||
return;
|
||||
|
||||
perf_event_period(pmc->perf_event,
|
||||
|
|
|
@ -5296,10 +5296,19 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
|
|||
if (vmptr == vmx->nested.current_vmptr)
|
||||
nested_release_vmcs12(vcpu);
|
||||
|
||||
kvm_vcpu_write_guest(vcpu,
|
||||
vmptr + offsetof(struct vmcs12,
|
||||
launch_state),
|
||||
&zero, sizeof(zero));
|
||||
/*
|
||||
* Silently ignore memory errors on VMCLEAR, Intel's pseudocode
|
||||
* for VMCLEAR includes a "ensure that data for VMCS referenced
|
||||
* by the operand is in memory" clause that guards writes to
|
||||
* memory, i.e. doing nothing for I/O is architecturally valid.
|
||||
*
|
||||
* FIXME: Suppress failures if and only if no memslot is found,
|
||||
* i.e. exit to userspace if __copy_to_user() fails.
|
||||
*/
|
||||
(void)kvm_vcpu_write_guest(vcpu,
|
||||
vmptr + offsetof(struct vmcs12,
|
||||
launch_state),
|
||||
&zero, sizeof(zero));
|
||||
} else if (vmx->nested.hv_evmcs && vmptr == vmx->nested.hv_evmcs_vmptr) {
|
||||
nested_release_evmcs(vcpu);
|
||||
}
|
||||
|
@ -6873,7 +6882,8 @@ void nested_vmx_setup_ctls_msrs(struct vmcs_config *vmcs_conf, u32 ept_caps)
|
|||
SECONDARY_EXEC_ENABLE_INVPCID |
|
||||
SECONDARY_EXEC_RDSEED_EXITING |
|
||||
SECONDARY_EXEC_XSAVES |
|
||||
SECONDARY_EXEC_TSC_SCALING;
|
||||
SECONDARY_EXEC_TSC_SCALING |
|
||||
SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
|
||||
|
||||
/*
|
||||
* We can emulate "VMCS shadowing," even if the hardware
|
||||
|
|
|
@ -4459,6 +4459,13 @@ vmx_adjust_secondary_exec_control(struct vcpu_vmx *vmx, u32 *exec_control,
|
|||
* controls for features that are/aren't exposed to the guest.
|
||||
*/
|
||||
if (nested) {
|
||||
/*
|
||||
* All features that can be added or removed to VMX MSRs must
|
||||
* be supported in the first place for nested virtualization.
|
||||
*/
|
||||
if (WARN_ON_ONCE(!(vmcs_config.nested.secondary_ctls_high & control)))
|
||||
enabled = false;
|
||||
|
||||
if (enabled)
|
||||
vmx->nested.msrs.secondary_ctls_high |= control;
|
||||
else
|
||||
|
|
|
@ -13132,6 +13132,9 @@ int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r,
|
|||
struct x86_exception *e)
|
||||
{
|
||||
if (r == X86EMUL_PROPAGATE_FAULT) {
|
||||
if (KVM_BUG_ON(!e, vcpu->kvm))
|
||||
return -EIO;
|
||||
|
||||
kvm_inject_emulated_page_fault(vcpu, e);
|
||||
return 1;
|
||||
}
|
||||
|
|
|
@ -41,7 +41,7 @@ static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn)
|
|||
int ret = 0;
|
||||
int idx = srcu_read_lock(&kvm->srcu);
|
||||
|
||||
if (gfn == GPA_INVALID) {
|
||||
if (gfn == KVM_XEN_INVALID_GFN) {
|
||||
kvm_gpc_deactivate(gpc);
|
||||
goto out;
|
||||
}
|
||||
|
@ -659,7 +659,7 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
|
|||
if (kvm->arch.xen.shinfo_cache.active)
|
||||
data->u.shared_info.gfn = gpa_to_gfn(kvm->arch.xen.shinfo_cache.gpa);
|
||||
else
|
||||
data->u.shared_info.gfn = GPA_INVALID;
|
||||
data->u.shared_info.gfn = KVM_XEN_INVALID_GFN;
|
||||
r = 0;
|
||||
break;
|
||||
|
||||
|
@ -705,7 +705,7 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
|
|||
BUILD_BUG_ON(offsetof(struct vcpu_info, time) !=
|
||||
offsetof(struct compat_vcpu_info, time));
|
||||
|
||||
if (data->u.gpa == GPA_INVALID) {
|
||||
if (data->u.gpa == KVM_XEN_INVALID_GPA) {
|
||||
kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_info_cache);
|
||||
r = 0;
|
||||
break;
|
||||
|
@ -719,7 +719,7 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
|
|||
break;
|
||||
|
||||
case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO:
|
||||
if (data->u.gpa == GPA_INVALID) {
|
||||
if (data->u.gpa == KVM_XEN_INVALID_GPA) {
|
||||
kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_time_info_cache);
|
||||
r = 0;
|
||||
break;
|
||||
|
@ -739,7 +739,7 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
|
|||
r = -EOPNOTSUPP;
|
||||
break;
|
||||
}
|
||||
if (data->u.gpa == GPA_INVALID) {
|
||||
if (data->u.gpa == KVM_XEN_INVALID_GPA) {
|
||||
r = 0;
|
||||
deactivate_out:
|
||||
kvm_gpc_deactivate(&vcpu->arch.xen.runstate_cache);
|
||||
|
@ -937,7 +937,7 @@ int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
|
|||
if (vcpu->arch.xen.vcpu_info_cache.active)
|
||||
data->u.gpa = vcpu->arch.xen.vcpu_info_cache.gpa;
|
||||
else
|
||||
data->u.gpa = GPA_INVALID;
|
||||
data->u.gpa = KVM_XEN_INVALID_GPA;
|
||||
r = 0;
|
||||
break;
|
||||
|
||||
|
@ -945,7 +945,7 @@ int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
|
|||
if (vcpu->arch.xen.vcpu_time_info_cache.active)
|
||||
data->u.gpa = vcpu->arch.xen.vcpu_time_info_cache.gpa;
|
||||
else
|
||||
data->u.gpa = GPA_INVALID;
|
||||
data->u.gpa = KVM_XEN_INVALID_GPA;
|
||||
r = 0;
|
||||
break;
|
||||
|
||||
|
@ -1069,6 +1069,7 @@ int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data)
|
|||
u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64
|
||||
: kvm->arch.xen_hvm_config.blob_size_32;
|
||||
u8 *page;
|
||||
int ret;
|
||||
|
||||
if (page_num >= blob_size)
|
||||
return 1;
|
||||
|
@ -1079,10 +1080,10 @@ int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data)
|
|||
if (IS_ERR(page))
|
||||
return PTR_ERR(page);
|
||||
|
||||
if (kvm_vcpu_write_guest(vcpu, page_addr, page, PAGE_SIZE)) {
|
||||
kfree(page);
|
||||
ret = kvm_vcpu_write_guest(vcpu, page_addr, page, PAGE_SIZE);
|
||||
kfree(page);
|
||||
if (ret)
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -1183,30 +1184,22 @@ static bool wait_pending_event(struct kvm_vcpu *vcpu, int nr_ports,
|
|||
static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode,
|
||||
u64 param, u64 *r)
|
||||
{
|
||||
int idx, i;
|
||||
struct sched_poll sched_poll;
|
||||
evtchn_port_t port, *ports;
|
||||
gpa_t gpa;
|
||||
struct x86_exception e;
|
||||
int i;
|
||||
|
||||
if (!lapic_in_kernel(vcpu) ||
|
||||
!(vcpu->kvm->arch.xen_hvm_config.flags & KVM_XEN_HVM_CONFIG_EVTCHN_SEND))
|
||||
return false;
|
||||
|
||||
idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
gpa = kvm_mmu_gva_to_gpa_system(vcpu, param, NULL);
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, idx);
|
||||
if (!gpa) {
|
||||
*r = -EFAULT;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (IS_ENABLED(CONFIG_64BIT) && !longmode) {
|
||||
struct compat_sched_poll sp32;
|
||||
|
||||
/* Sanity check that the compat struct definition is correct */
|
||||
BUILD_BUG_ON(sizeof(sp32) != 16);
|
||||
|
||||
if (kvm_vcpu_read_guest(vcpu, gpa, &sp32, sizeof(sp32))) {
|
||||
if (kvm_read_guest_virt(vcpu, param, &sp32, sizeof(sp32), &e)) {
|
||||
*r = -EFAULT;
|
||||
return true;
|
||||
}
|
||||
|
@ -1220,8 +1213,8 @@ static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode,
|
|||
sched_poll.nr_ports = sp32.nr_ports;
|
||||
sched_poll.timeout = sp32.timeout;
|
||||
} else {
|
||||
if (kvm_vcpu_read_guest(vcpu, gpa, &sched_poll,
|
||||
sizeof(sched_poll))) {
|
||||
if (kvm_read_guest_virt(vcpu, param, &sched_poll,
|
||||
sizeof(sched_poll), &e)) {
|
||||
*r = -EFAULT;
|
||||
return true;
|
||||
}
|
||||
|
@ -1243,18 +1236,13 @@ static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode,
|
|||
} else
|
||||
ports = &port;
|
||||
|
||||
for (i = 0; i < sched_poll.nr_ports; i++) {
|
||||
idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
gpa = kvm_mmu_gva_to_gpa_system(vcpu,
|
||||
(gva_t)(sched_poll.ports + i),
|
||||
NULL);
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, idx);
|
||||
if (kvm_read_guest_virt(vcpu, (gva_t)sched_poll.ports, ports,
|
||||
sched_poll.nr_ports * sizeof(*ports), &e)) {
|
||||
*r = -EFAULT;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!gpa || kvm_vcpu_read_guest(vcpu, gpa,
|
||||
&ports[i], sizeof(port))) {
|
||||
*r = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
for (i = 0; i < sched_poll.nr_ports; i++) {
|
||||
if (ports[i] >= max_evtchn_port(vcpu->kvm)) {
|
||||
*r = -EINVAL;
|
||||
goto out;
|
||||
|
@ -1330,9 +1318,8 @@ static bool kvm_xen_hcall_vcpu_op(struct kvm_vcpu *vcpu, bool longmode, int cmd,
|
|||
int vcpu_id, u64 param, u64 *r)
|
||||
{
|
||||
struct vcpu_set_singleshot_timer oneshot;
|
||||
struct x86_exception e;
|
||||
s64 delta;
|
||||
gpa_t gpa;
|
||||
int idx;
|
||||
|
||||
if (!kvm_xen_timer_enabled(vcpu))
|
||||
return false;
|
||||
|
@ -1343,9 +1330,6 @@ static bool kvm_xen_hcall_vcpu_op(struct kvm_vcpu *vcpu, bool longmode, int cmd,
|
|||
*r = -EINVAL;
|
||||
return true;
|
||||
}
|
||||
idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
gpa = kvm_mmu_gva_to_gpa_system(vcpu, param, NULL);
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, idx);
|
||||
|
||||
/*
|
||||
* The only difference for 32-bit compat is the 4 bytes of
|
||||
|
@ -1363,9 +1347,8 @@ static bool kvm_xen_hcall_vcpu_op(struct kvm_vcpu *vcpu, bool longmode, int cmd,
|
|||
BUILD_BUG_ON(sizeof_field(struct compat_vcpu_set_singleshot_timer, flags) !=
|
||||
sizeof_field(struct vcpu_set_singleshot_timer, flags));
|
||||
|
||||
if (!gpa ||
|
||||
kvm_vcpu_read_guest(vcpu, gpa, &oneshot, longmode ? sizeof(oneshot) :
|
||||
sizeof(struct compat_vcpu_set_singleshot_timer))) {
|
||||
if (kvm_read_guest_virt(vcpu, param, &oneshot, longmode ? sizeof(oneshot) :
|
||||
sizeof(struct compat_vcpu_set_singleshot_timer), &e)) {
|
||||
*r = -EFAULT;
|
||||
return true;
|
||||
}
|
||||
|
@ -1825,20 +1808,20 @@ static int kvm_xen_eventfd_update(struct kvm *kvm,
|
|||
{
|
||||
u32 port = data->u.evtchn.send_port;
|
||||
struct evtchnfd *evtchnfd;
|
||||
int ret;
|
||||
|
||||
if (!port || port >= max_evtchn_port(kvm))
|
||||
return -EINVAL;
|
||||
|
||||
/* Protect writes to evtchnfd as well as the idr lookup. */
|
||||
mutex_lock(&kvm->lock);
|
||||
evtchnfd = idr_find(&kvm->arch.xen.evtchn_ports, port);
|
||||
mutex_unlock(&kvm->lock);
|
||||
|
||||
ret = -ENOENT;
|
||||
if (!evtchnfd)
|
||||
return -ENOENT;
|
||||
goto out_unlock;
|
||||
|
||||
/* For an UPDATE, nothing may change except the priority/vcpu */
|
||||
ret = -EINVAL;
|
||||
if (evtchnfd->type != data->u.evtchn.type)
|
||||
return -EINVAL;
|
||||
goto out_unlock;
|
||||
|
||||
/*
|
||||
* Port cannot change, and if it's zero that was an eventfd
|
||||
|
@ -1846,20 +1829,21 @@ static int kvm_xen_eventfd_update(struct kvm *kvm,
|
|||
*/
|
||||
if (!evtchnfd->deliver.port.port ||
|
||||
evtchnfd->deliver.port.port != data->u.evtchn.deliver.port.port)
|
||||
return -EINVAL;
|
||||
goto out_unlock;
|
||||
|
||||
/* We only support 2 level event channels for now */
|
||||
if (data->u.evtchn.deliver.port.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL)
|
||||
return -EINVAL;
|
||||
goto out_unlock;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
evtchnfd->deliver.port.priority = data->u.evtchn.deliver.port.priority;
|
||||
if (evtchnfd->deliver.port.vcpu_id != data->u.evtchn.deliver.port.vcpu) {
|
||||
evtchnfd->deliver.port.vcpu_id = data->u.evtchn.deliver.port.vcpu;
|
||||
evtchnfd->deliver.port.vcpu_idx = -1;
|
||||
}
|
||||
ret = 0;
|
||||
out_unlock:
|
||||
mutex_unlock(&kvm->lock);
|
||||
return 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1871,12 +1855,9 @@ static int kvm_xen_eventfd_assign(struct kvm *kvm,
|
|||
{
|
||||
u32 port = data->u.evtchn.send_port;
|
||||
struct eventfd_ctx *eventfd = NULL;
|
||||
struct evtchnfd *evtchnfd = NULL;
|
||||
struct evtchnfd *evtchnfd;
|
||||
int ret = -EINVAL;
|
||||
|
||||
if (!port || port >= max_evtchn_port(kvm))
|
||||
return -EINVAL;
|
||||
|
||||
evtchnfd = kzalloc(sizeof(struct evtchnfd), GFP_KERNEL);
|
||||
if (!evtchnfd)
|
||||
return -ENOMEM;
|
||||
|
@ -1952,8 +1933,7 @@ static int kvm_xen_eventfd_deassign(struct kvm *kvm, u32 port)
|
|||
if (!evtchnfd)
|
||||
return -ENOENT;
|
||||
|
||||
if (kvm)
|
||||
synchronize_srcu(&kvm->srcu);
|
||||
synchronize_srcu(&kvm->srcu);
|
||||
if (!evtchnfd->deliver.port.port)
|
||||
eventfd_ctx_put(evtchnfd->deliver.eventfd.ctx);
|
||||
kfree(evtchnfd);
|
||||
|
@ -1962,18 +1942,42 @@ static int kvm_xen_eventfd_deassign(struct kvm *kvm, u32 port)
|
|||
|
||||
static int kvm_xen_eventfd_reset(struct kvm *kvm)
|
||||
{
|
||||
struct evtchnfd *evtchnfd;
|
||||
struct evtchnfd *evtchnfd, **all_evtchnfds;
|
||||
int i;
|
||||
int n = 0;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
|
||||
/*
|
||||
* Because synchronize_srcu() cannot be called inside the
|
||||
* critical section, first collect all the evtchnfd objects
|
||||
* in an array as they are removed from evtchn_ports.
|
||||
*/
|
||||
idr_for_each_entry(&kvm->arch.xen.evtchn_ports, evtchnfd, i)
|
||||
n++;
|
||||
|
||||
all_evtchnfds = kmalloc_array(n, sizeof(struct evtchnfd *), GFP_KERNEL);
|
||||
if (!all_evtchnfds) {
|
||||
mutex_unlock(&kvm->lock);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
n = 0;
|
||||
idr_for_each_entry(&kvm->arch.xen.evtchn_ports, evtchnfd, i) {
|
||||
all_evtchnfds[n++] = evtchnfd;
|
||||
idr_remove(&kvm->arch.xen.evtchn_ports, evtchnfd->send_port);
|
||||
synchronize_srcu(&kvm->srcu);
|
||||
}
|
||||
mutex_unlock(&kvm->lock);
|
||||
|
||||
synchronize_srcu(&kvm->srcu);
|
||||
|
||||
while (n--) {
|
||||
evtchnfd = all_evtchnfds[n];
|
||||
if (!evtchnfd->deliver.port.port)
|
||||
eventfd_ctx_put(evtchnfd->deliver.eventfd.ctx);
|
||||
kfree(evtchnfd);
|
||||
}
|
||||
mutex_unlock(&kvm->lock);
|
||||
kfree(all_evtchnfds);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -2002,20 +2006,22 @@ static bool kvm_xen_hcall_evtchn_send(struct kvm_vcpu *vcpu, u64 param, u64 *r)
|
|||
{
|
||||
struct evtchnfd *evtchnfd;
|
||||
struct evtchn_send send;
|
||||
gpa_t gpa;
|
||||
int idx;
|
||||
struct x86_exception e;
|
||||
|
||||
idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||
gpa = kvm_mmu_gva_to_gpa_system(vcpu, param, NULL);
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, idx);
|
||||
|
||||
if (!gpa || kvm_vcpu_read_guest(vcpu, gpa, &send, sizeof(send))) {
|
||||
/* Sanity check: this structure is the same for 32-bit and 64-bit */
|
||||
BUILD_BUG_ON(sizeof(send) != 4);
|
||||
if (kvm_read_guest_virt(vcpu, param, &send, sizeof(send), &e)) {
|
||||
*r = -EFAULT;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* The evtchn_ports idr is protected by vcpu->kvm->srcu */
|
||||
/*
|
||||
* evtchnfd is protected by kvm->srcu; the idr lookup instead
|
||||
* is protected by RCU.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
evtchnfd = idr_find(&vcpu->kvm->arch.xen.evtchn_ports, send.port);
|
||||
rcu_read_unlock();
|
||||
if (!evtchnfd)
|
||||
return false;
|
||||
|
||||
|
|
|
@ -1767,6 +1767,7 @@ struct kvm_xen_hvm_attr {
|
|||
__u8 runstate_update_flag;
|
||||
struct {
|
||||
__u64 gfn;
|
||||
#define KVM_XEN_INVALID_GFN ((__u64)-1)
|
||||
} shared_info;
|
||||
struct {
|
||||
__u32 send_port;
|
||||
|
@ -1798,6 +1799,7 @@ struct kvm_xen_hvm_attr {
|
|||
} u;
|
||||
};
|
||||
|
||||
|
||||
/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
|
||||
#define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0
|
||||
#define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1
|
||||
|
@ -1823,6 +1825,7 @@ struct kvm_xen_vcpu_attr {
|
|||
__u16 pad[3];
|
||||
union {
|
||||
__u64 gpa;
|
||||
#define KVM_XEN_INVALID_GPA ((__u64)-1)
|
||||
__u64 pad[8];
|
||||
struct {
|
||||
__u64 state;
|
||||
|
|
|
@ -1,86 +1,7 @@
|
|||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
/aarch64/aarch32_id_regs
|
||||
/aarch64/arch_timer
|
||||
/aarch64/debug-exceptions
|
||||
/aarch64/get-reg-list
|
||||
/aarch64/hypercalls
|
||||
/aarch64/page_fault_test
|
||||
/aarch64/psci_test
|
||||
/aarch64/vcpu_width_config
|
||||
/aarch64/vgic_init
|
||||
/aarch64/vgic_irq
|
||||
/s390x/memop
|
||||
/s390x/resets
|
||||
/s390x/sync_regs_test
|
||||
/s390x/tprot
|
||||
/x86_64/amx_test
|
||||
/x86_64/cpuid_test
|
||||
/x86_64/cr4_cpuid_sync_test
|
||||
/x86_64/debug_regs
|
||||
/x86_64/exit_on_emulation_failure_test
|
||||
/x86_64/fix_hypercall_test
|
||||
/x86_64/get_msr_index_features
|
||||
/x86_64/kvm_clock_test
|
||||
/x86_64/kvm_pv_test
|
||||
/x86_64/hyperv_clock
|
||||
/x86_64/hyperv_cpuid
|
||||
/x86_64/hyperv_evmcs
|
||||
/x86_64/hyperv_features
|
||||
/x86_64/hyperv_ipi
|
||||
/x86_64/hyperv_svm_test
|
||||
/x86_64/hyperv_tlb_flush
|
||||
/x86_64/max_vcpuid_cap_test
|
||||
/x86_64/mmio_warning_test
|
||||
/x86_64/monitor_mwait_test
|
||||
/x86_64/nested_exceptions_test
|
||||
/x86_64/nx_huge_pages_test
|
||||
/x86_64/platform_info_test
|
||||
/x86_64/pmu_event_filter_test
|
||||
/x86_64/set_boot_cpu_id
|
||||
/x86_64/set_sregs_test
|
||||
/x86_64/sev_migrate_tests
|
||||
/x86_64/smaller_maxphyaddr_emulation_test
|
||||
/x86_64/smm_test
|
||||
/x86_64/state_test
|
||||
/x86_64/svm_vmcall_test
|
||||
/x86_64/svm_int_ctl_test
|
||||
/x86_64/svm_nested_soft_inject_test
|
||||
/x86_64/svm_nested_shutdown_test
|
||||
/x86_64/sync_regs_test
|
||||
/x86_64/tsc_msrs_test
|
||||
/x86_64/tsc_scaling_sync
|
||||
/x86_64/ucna_injection_test
|
||||
/x86_64/userspace_io_test
|
||||
/x86_64/userspace_msr_exit_test
|
||||
/x86_64/vmx_apic_access_test
|
||||
/x86_64/vmx_close_while_nested_test
|
||||
/x86_64/vmx_dirty_log_test
|
||||
/x86_64/vmx_exception_with_invalid_guest_state
|
||||
/x86_64/vmx_invalid_nested_guest_state
|
||||
/x86_64/vmx_msrs_test
|
||||
/x86_64/vmx_preemption_timer_test
|
||||
/x86_64/vmx_set_nested_state_test
|
||||
/x86_64/vmx_tsc_adjust_test
|
||||
/x86_64/vmx_nested_tsc_scaling_test
|
||||
/x86_64/xapic_ipi_test
|
||||
/x86_64/xapic_state_test
|
||||
/x86_64/xen_shinfo_test
|
||||
/x86_64/xen_vmcall_test
|
||||
/x86_64/xss_msr_test
|
||||
/x86_64/vmx_pmu_caps_test
|
||||
/x86_64/triple_fault_event_test
|
||||
/access_tracking_perf_test
|
||||
/demand_paging_test
|
||||
/dirty_log_test
|
||||
/dirty_log_perf_test
|
||||
/hardware_disable_test
|
||||
/kvm_create_max_vcpus
|
||||
/kvm_page_table_test
|
||||
/max_guest_memory_test
|
||||
/memslot_modification_stress_test
|
||||
/memslot_perf_test
|
||||
/rseq_test
|
||||
/set_memory_region_test
|
||||
/steal_time
|
||||
/kvm_binary_stats_test
|
||||
/system_counter_offset_test
|
||||
*
|
||||
!/**/
|
||||
!*.c
|
||||
!*.h
|
||||
!*.S
|
||||
!*.sh
|
||||
|
|
|
@ -7,35 +7,14 @@ top_srcdir = ../../../..
|
|||
include $(top_srcdir)/scripts/subarch.include
|
||||
ARCH ?= $(SUBARCH)
|
||||
|
||||
# For cross-builds to work, UNAME_M has to map to ARCH and arch specific
|
||||
# directories and targets in this Makefile. "uname -m" doesn't map to
|
||||
# arch specific sub-directory names.
|
||||
#
|
||||
# UNAME_M variable to used to run the compiles pointing to the right arch
|
||||
# directories and build the right targets for these supported architectures.
|
||||
#
|
||||
# TEST_GEN_PROGS and LIBKVM are set using UNAME_M variable.
|
||||
# LINUX_TOOL_ARCH_INCLUDE is set using ARCH variable.
|
||||
#
|
||||
# x86_64 targets are named to include x86_64 as a suffix and directories
|
||||
# for includes are in x86_64 sub-directory. s390x and aarch64 follow the
|
||||
# same convention. "uname -m" doesn't result in the correct mapping for
|
||||
# s390x and aarch64.
|
||||
#
|
||||
# No change necessary for x86_64
|
||||
UNAME_M := $(shell uname -m)
|
||||
|
||||
# Set UNAME_M for arm64 compile/install to work
|
||||
ifeq ($(ARCH),arm64)
|
||||
UNAME_M := aarch64
|
||||
endif
|
||||
# Set UNAME_M s390x compile/install to work
|
||||
ifeq ($(ARCH),s390)
|
||||
UNAME_M := s390x
|
||||
endif
|
||||
# Set UNAME_M riscv compile/install to work
|
||||
ifeq ($(ARCH),riscv)
|
||||
UNAME_M := riscv
|
||||
ifeq ($(ARCH),x86)
|
||||
ARCH_DIR := x86_64
|
||||
else ifeq ($(ARCH),arm64)
|
||||
ARCH_DIR := aarch64
|
||||
else ifeq ($(ARCH),s390)
|
||||
ARCH_DIR := s390x
|
||||
else
|
||||
ARCH_DIR := $(ARCH)
|
||||
endif
|
||||
|
||||
LIBKVM += lib/assert.c
|
||||
|
@ -196,10 +175,15 @@ TEST_GEN_PROGS_riscv += kvm_page_table_test
|
|||
TEST_GEN_PROGS_riscv += set_memory_region_test
|
||||
TEST_GEN_PROGS_riscv += kvm_binary_stats_test
|
||||
|
||||
TEST_PROGS += $(TEST_PROGS_$(UNAME_M))
|
||||
TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
|
||||
TEST_GEN_PROGS_EXTENDED += $(TEST_GEN_PROGS_EXTENDED_$(UNAME_M))
|
||||
LIBKVM += $(LIBKVM_$(UNAME_M))
|
||||
TEST_PROGS += $(TEST_PROGS_$(ARCH_DIR))
|
||||
TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(ARCH_DIR))
|
||||
TEST_GEN_PROGS_EXTENDED += $(TEST_GEN_PROGS_EXTENDED_$(ARCH_DIR))
|
||||
LIBKVM += $(LIBKVM_$(ARCH_DIR))
|
||||
|
||||
# lib.mak defines $(OUTPUT), prepends $(OUTPUT)/ to $(TEST_GEN_PROGS), and most
|
||||
# importantly defines, i.e. overwrites, $(CC) (unless `make -e` or `make CC=`,
|
||||
# which causes the environment variable to override the makefile).
|
||||
include ../lib.mk
|
||||
|
||||
INSTALL_HDR_PATH = $(top_srcdir)/usr
|
||||
LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
|
||||
|
@ -210,25 +194,23 @@ else
|
|||
LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include
|
||||
endif
|
||||
CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
|
||||
-Wno-gnu-variable-sized-type-not-at-end \
|
||||
-fno-builtin-memcmp -fno-builtin-memcpy -fno-builtin-memset \
|
||||
-fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \
|
||||
-I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \
|
||||
-I$(<D) -Iinclude/$(UNAME_M) -I ../rseq -I.. $(EXTRA_CFLAGS) \
|
||||
-I$(<D) -Iinclude/$(ARCH_DIR) -I ../rseq -I.. $(EXTRA_CFLAGS) \
|
||||
$(KHDR_INCLUDES)
|
||||
|
||||
no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \
|
||||
$(CC) -Werror -no-pie -x c - -o "$$TMP", -no-pie)
|
||||
no-pie-option := $(call try-run, echo 'int main(void) { return 0; }' | \
|
||||
$(CC) -Werror $(CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie)
|
||||
|
||||
# On s390, build the testcases KVM-enabled
|
||||
pgste-option = $(call try-run, echo 'int main() { return 0; }' | \
|
||||
pgste-option = $(call try-run, echo 'int main(void) { return 0; }' | \
|
||||
$(CC) -Werror -Wl$(comma)--s390-pgste -x c - -o "$$TMP",-Wl$(comma)--s390-pgste)
|
||||
|
||||
LDLIBS += -ldl
|
||||
LDFLAGS += -pthread $(no-pie-option) $(pgste-option)
|
||||
|
||||
# After inclusion, $(OUTPUT) is defined and
|
||||
# $(TEST_GEN_PROGS) starts with $(OUTPUT)/
|
||||
include ../lib.mk
|
||||
|
||||
LIBKVM_C := $(filter %.c,$(LIBKVM))
|
||||
LIBKVM_S := $(filter %.S,$(LIBKVM))
|
||||
LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C))
|
||||
|
|
|
@ -117,7 +117,7 @@ static void guest_cas(void)
|
|||
GUEST_ASSERT(guest_check_lse());
|
||||
asm volatile(".arch_extension lse\n"
|
||||
"casal %0, %1, [%2]\n"
|
||||
:: "r" (0), "r" (TEST_DATA), "r" (guest_test_memory));
|
||||
:: "r" (0ul), "r" (TEST_DATA), "r" (guest_test_memory));
|
||||
val = READ_ONCE(*guest_test_memory);
|
||||
GUEST_ASSERT_EQ(val, TEST_DATA);
|
||||
}
|
||||
|
|
|
@ -14,11 +14,13 @@ static vm_vaddr_t *ucall_exit_mmio_addr;
|
|||
|
||||
void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
|
||||
{
|
||||
virt_pg_map(vm, mmio_gpa, mmio_gpa);
|
||||
vm_vaddr_t mmio_gva = vm_vaddr_unused_gap(vm, vm->page_size, KVM_UTIL_MIN_VADDR);
|
||||
|
||||
virt_map(vm, mmio_gva, mmio_gpa, 1);
|
||||
|
||||
vm->ucall_mmio_addr = mmio_gpa;
|
||||
|
||||
write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gpa);
|
||||
write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gva);
|
||||
}
|
||||
|
||||
void ucall_arch_do_ucall(vm_vaddr_t uc)
|
||||
|
|
|
@ -186,6 +186,15 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = {
|
|||
_Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
|
||||
"Missing new mode params?");
|
||||
|
||||
/*
|
||||
* Initializes vm->vpages_valid to match the canonical VA space of the
|
||||
* architecture.
|
||||
*
|
||||
* The default implementation is valid for architectures which split the
|
||||
* range addressed by a single page table into a low and high region
|
||||
* based on the MSB of the VA. On architectures with this behavior
|
||||
* the VA region spans [0, 2^(va_bits - 1)), [-(2^(va_bits - 1), -1].
|
||||
*/
|
||||
__weak void vm_vaddr_populate_bitmap(struct kvm_vm *vm)
|
||||
{
|
||||
sparsebit_set_num(vm->vpages_valid,
|
||||
|
@ -1416,10 +1425,10 @@ void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
|
|||
|
||||
while (npages--) {
|
||||
virt_pg_map(vm, vaddr, paddr);
|
||||
sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift);
|
||||
|
||||
vaddr += page_size;
|
||||
paddr += page_size;
|
||||
|
||||
sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -4,6 +4,8 @@
|
|||
#include "linux/bitmap.h"
|
||||
#include "linux/atomic.h"
|
||||
|
||||
#define GUEST_UCALL_FAILED -1
|
||||
|
||||
struct ucall_header {
|
||||
DECLARE_BITMAP(in_use, KVM_MAX_VCPUS);
|
||||
struct ucall ucalls[KVM_MAX_VCPUS];
|
||||
|
@ -41,7 +43,8 @@ static struct ucall *ucall_alloc(void)
|
|||
struct ucall *uc;
|
||||
int i;
|
||||
|
||||
GUEST_ASSERT(ucall_pool);
|
||||
if (!ucall_pool)
|
||||
goto ucall_failed;
|
||||
|
||||
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
|
||||
if (!test_and_set_bit(i, ucall_pool->in_use)) {
|
||||
|
@ -51,7 +54,13 @@ static struct ucall *ucall_alloc(void)
|
|||
}
|
||||
}
|
||||
|
||||
GUEST_ASSERT(0);
|
||||
ucall_failed:
|
||||
/*
|
||||
* If the vCPU cannot grab a ucall structure, make a bare ucall with a
|
||||
* magic value to signal to get_ucall() that things went sideways.
|
||||
* GUEST_ASSERT() depends on ucall_alloc() and so cannot be used here.
|
||||
*/
|
||||
ucall_arch_do_ucall(GUEST_UCALL_FAILED);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -93,6 +102,9 @@ uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc)
|
|||
|
||||
addr = ucall_arch_get_ucall(vcpu);
|
||||
if (addr) {
|
||||
TEST_ASSERT(addr != (void *)GUEST_UCALL_FAILED,
|
||||
"Guest failed to allocate ucall struct");
|
||||
|
||||
memcpy(uc, addr, sizeof(*uc));
|
||||
vcpu_run_complete_io(vcpu);
|
||||
} else {
|
||||
|
|
|
@ -1031,7 +1031,7 @@ bool is_amd_cpu(void)
|
|||
void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits)
|
||||
{
|
||||
if (!kvm_cpu_has_p(X86_PROPERTY_MAX_PHY_ADDR)) {
|
||||
*pa_bits == kvm_cpu_has(X86_FEATURE_PAE) ? 36 : 32;
|
||||
*pa_bits = kvm_cpu_has(X86_FEATURE_PAE) ? 36 : 32;
|
||||
*va_bits = 32;
|
||||
} else {
|
||||
*pa_bits = kvm_cpu_property(X86_PROPERTY_MAX_PHY_ADDR);
|
||||
|
|
|
@ -265,6 +265,9 @@ static uint64_t get_max_slots(struct vm_data *data, uint32_t host_page_size)
|
|||
slots = data->nslots;
|
||||
while (--slots > 1) {
|
||||
pages_per_slot = mempages / slots;
|
||||
if (!pages_per_slot)
|
||||
continue;
|
||||
|
||||
rempages = mempages % pages_per_slot;
|
||||
if (check_slot_pages(host_page_size, guest_page_size,
|
||||
pages_per_slot, rempages))
|
||||
|
|
|
@ -193,8 +193,9 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa)
|
|||
GUEST_SYNC(stage++);
|
||||
/*
|
||||
* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to HV_GENERIC_SET_ALL.
|
||||
* Nothing to write anything to XMM regs.
|
||||
*/
|
||||
ipi_ex->vp_set.valid_bank_mask = 0;
|
||||
hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 2);
|
||||
hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT,
|
||||
IPI_VECTOR, HV_GENERIC_SET_ALL);
|
||||
nop_loop();
|
||||
|
|
|
@ -41,8 +41,17 @@ static void guest_int_handler(struct ex_regs *regs)
|
|||
static void l2_guest_code_int(void)
|
||||
{
|
||||
GUEST_ASSERT_1(int_fired == 1, int_fired);
|
||||
vmmcall();
|
||||
ud2();
|
||||
|
||||
/*
|
||||
* Same as the vmmcall() function, but with a ud2 sneaked after the
|
||||
* vmmcall. The caller injects an exception with the return address
|
||||
* increased by 2, so the "pop rbp" must be after the ud2 and we cannot
|
||||
* use vmmcall() directly.
|
||||
*/
|
||||
__asm__ __volatile__("push %%rbp; vmmcall; ud2; pop %%rbp"
|
||||
: : "a"(0xdeadbeef), "c"(0xbeefdead)
|
||||
: "rbx", "rdx", "rsi", "rdi", "r8", "r9",
|
||||
"r10", "r11", "r12", "r13", "r14", "r15");
|
||||
|
||||
GUEST_ASSERT_1(bp_fired == 1, bp_fired);
|
||||
hlt();
|
||||
|
|
|
@ -49,11 +49,6 @@ enum {
|
|||
NUM_VMX_PAGES,
|
||||
};
|
||||
|
||||
struct kvm_single_msr {
|
||||
struct kvm_msrs header;
|
||||
struct kvm_msr_entry entry;
|
||||
} __attribute__((packed));
|
||||
|
||||
/* The virtual machine object. */
|
||||
static struct kvm_vm *vm;
|
||||
|
||||
|
|
|
@ -962,6 +962,12 @@ int main(int argc, char *argv[])
|
|||
}
|
||||
|
||||
done:
|
||||
struct kvm_xen_hvm_attr evt_reset = {
|
||||
.type = KVM_XEN_ATTR_TYPE_EVTCHN,
|
||||
.u.evtchn.flags = KVM_XEN_EVTCHN_RESET,
|
||||
};
|
||||
vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &evt_reset);
|
||||
|
||||
alarm(0);
|
||||
clock_gettime(CLOCK_REALTIME, &max_ts);
|
||||
|
||||
|
|
|
@ -14,14 +14,10 @@
|
|||
#define KVM_MMU_LOCK_INIT(kvm) rwlock_init(&(kvm)->mmu_lock)
|
||||
#define KVM_MMU_LOCK(kvm) write_lock(&(kvm)->mmu_lock)
|
||||
#define KVM_MMU_UNLOCK(kvm) write_unlock(&(kvm)->mmu_lock)
|
||||
#define KVM_MMU_READ_LOCK(kvm) read_lock(&(kvm)->mmu_lock)
|
||||
#define KVM_MMU_READ_UNLOCK(kvm) read_unlock(&(kvm)->mmu_lock)
|
||||
#else
|
||||
#define KVM_MMU_LOCK_INIT(kvm) spin_lock_init(&(kvm)->mmu_lock)
|
||||
#define KVM_MMU_LOCK(kvm) spin_lock(&(kvm)->mmu_lock)
|
||||
#define KVM_MMU_UNLOCK(kvm) spin_unlock(&(kvm)->mmu_lock)
|
||||
#define KVM_MMU_READ_LOCK(kvm) spin_lock(&(kvm)->mmu_lock)
|
||||
#define KVM_MMU_READ_UNLOCK(kvm) spin_unlock(&(kvm)->mmu_lock)
|
||||
#endif /* KVM_HAVE_MMU_RWLOCK */
|
||||
|
||||
kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool interruptible,
|
||||
|
|
Загрузка…
Ссылка в новой задаче