x86 KVM changes:
* The usual accuracy improvements for nested virtualization * The usual round of code cleanups from Sean * Added back optimizations that were prematurely removed in 5.2 (the bare minimum needed to fix the regression was in 5.3-rc8, here comes the rest) * Support for UMWAIT/UMONITOR/TPAUSE * Direct L2->L0 TLB flushing when L0 is Hyper-V and L1 is KVM * Tell Windows guests if SMT is disabled on the host * More accurate detection of vmexit cost * Revert a pvqspinlock pessimization -----BEGIN PGP SIGNATURE----- Version: GnuPG v2.0.22 (GNU/Linux) iQEcBAABAgAGBQJdjfaKAAoJEL/70l94x66D8MAH/2thJnM47tYtMTFA4GBFugeH mAx8OApWFBo8apOip+8ElFLPQ8FQdZCzr9ti8H4JkuzKxgsxCs1iqEg5pHEKxSTi K9kLOZwoFtwgy3XmxC0PIZ9lT2Wx74ruh1HF+QG/YsjKH636UPv2VpmulsTNbm62 2ryzOb3TlGT/cjf+gv9l6IYIxZa2Ff19PF4i//H8u4YRBj358/jr99CK01iE0M9r 4NhEKiQZywzREWtKxymGOM6HEbwbWcIa+loYjj2htq8epep6f9Y1zQ0Jcn5+nPA0 cn1T2gGJAJ0OUahKLwNbz8pzrFDkW+eoQgqCBJZ4RT9Uf8WCESfl14p+/vRkAMg= =tk5S -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull more KVM updates from Paolo Bonzini: "x86 KVM changes: - The usual accuracy improvements for nested virtualization - The usual round of code cleanups from Sean - Added back optimizations that were prematurely removed in 5.2 (the bare minimum needed to fix the regression was in 5.3-rc8, here comes the rest) - Support for UMWAIT/UMONITOR/TPAUSE - Direct L2->L0 TLB flushing when L0 is Hyper-V and L1 is KVM - Tell Windows guests if SMT is disabled on the host - More accurate detection of vmexit cost - Revert a pvqspinlock pessimization" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (56 commits) KVM: nVMX: cleanup and fix host 64-bit mode checks KVM: vmx: fix build warnings in hv_enable_direct_tlbflush() on i386 KVM: x86: Don't check kvm_rebooting in __kvm_handle_fault_on_reboot() KVM: x86: Drop ____kvm_handle_fault_on_reboot() KVM: VMX: Add error handling to VMREAD helper KVM: VMX: Optimize VMX instruction error and fault handling KVM: x86: Check kvm_rebooting in kvm_spurious_fault() KVM: selftests: fix ucall on x86 Revert "locking/pvqspinlock: Don't wait if vCPU is preempted" kvm: nvmx: limit atomic switch MSRs kvm: svm: Intercept RDPRU kvm: x86: Add "significant index" flag to a few CPUID leaves KVM: x86/mmu: Skip invalid pages during zapping iff root_count is zero KVM: x86/mmu: Explicitly track only a single invalid mmu generation KVM: x86/mmu: Revert "KVM: x86/mmu: Remove is_obsolete() call" KVM: x86/mmu: Revert "Revert "KVM: MMU: reclaim the zapped-obsolete page first"" KVM: x86/mmu: Revert "Revert "KVM: MMU: collapse TLB flushes when zap all pages"" KVM: x86/mmu: Revert "Revert "KVM: MMU: zap pages in batch"" KVM: x86/mmu: Revert "Revert "KVM: MMU: add tracepoint for kvm_mmu_invalidate_all_pages"" KVM: x86/mmu: Revert "Revert "KVM: MMU: show mmu_valid_gen in shadow page related tracepoints"" ...
This commit is contained in:
Коммит
8bbe0dec38
|
@ -5309,3 +5309,16 @@ Architectures: x86
|
||||||
This capability indicates that KVM supports paravirtualized Hyper-V IPI send
|
This capability indicates that KVM supports paravirtualized Hyper-V IPI send
|
||||||
hypercalls:
|
hypercalls:
|
||||||
HvCallSendSyntheticClusterIpi, HvCallSendSyntheticClusterIpiEx.
|
HvCallSendSyntheticClusterIpi, HvCallSendSyntheticClusterIpiEx.
|
||||||
|
8.21 KVM_CAP_HYPERV_DIRECT_TLBFLUSH
|
||||||
|
|
||||||
|
Architecture: x86
|
||||||
|
|
||||||
|
This capability indicates that KVM running on top of Hyper-V hypervisor
|
||||||
|
enables Direct TLB flush for its guests meaning that TLB flush
|
||||||
|
hypercalls are handled by Level 0 hypervisor (Hyper-V) bypassing KVM.
|
||||||
|
Due to the different ABI for hypercall parameters between Hyper-V and
|
||||||
|
KVM, enabling this capability effectively disables all hypercall
|
||||||
|
handling by KVM (as some KVM hypercall may be mistakenly treated as TLB
|
||||||
|
flush hypercalls by Hyper-V) so userspace should disable KVM identification
|
||||||
|
in CPUID and only exposes Hyper-V identification. In this case, guest
|
||||||
|
thinks it's running on Hyper-V and only use Hyper-V hypercalls.
|
||||||
|
|
|
@ -180,7 +180,15 @@
|
||||||
/* Recommend using enlightened VMCS */
|
/* Recommend using enlightened VMCS */
|
||||||
#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED BIT(14)
|
#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED BIT(14)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Virtual processor will never share a physical core with another virtual
|
||||||
|
* processor, except for virtual processors that are reported as sibling SMT
|
||||||
|
* threads.
|
||||||
|
*/
|
||||||
|
#define HV_X64_NO_NONARCH_CORESHARING BIT(18)
|
||||||
|
|
||||||
/* Nested features. These are HYPERV_CPUID_NESTED_FEATURES.EAX bits. */
|
/* Nested features. These are HYPERV_CPUID_NESTED_FEATURES.EAX bits. */
|
||||||
|
#define HV_X64_NESTED_DIRECT_FLUSH BIT(17)
|
||||||
#define HV_X64_NESTED_GUEST_MAPPING_FLUSH BIT(18)
|
#define HV_X64_NESTED_GUEST_MAPPING_FLUSH BIT(18)
|
||||||
#define HV_X64_NESTED_MSR_BITMAP BIT(19)
|
#define HV_X64_NESTED_MSR_BITMAP BIT(19)
|
||||||
|
|
||||||
|
@ -524,14 +532,24 @@ struct hv_timer_message_payload {
|
||||||
__u64 delivery_time; /* When the message was delivered */
|
__u64 delivery_time; /* When the message was delivered */
|
||||||
} __packed;
|
} __packed;
|
||||||
|
|
||||||
|
struct hv_nested_enlightenments_control {
|
||||||
|
struct {
|
||||||
|
__u32 directhypercall:1;
|
||||||
|
__u32 reserved:31;
|
||||||
|
} features;
|
||||||
|
struct {
|
||||||
|
__u32 reserved;
|
||||||
|
} hypercallControls;
|
||||||
|
} __packed;
|
||||||
|
|
||||||
/* Define virtual processor assist page structure. */
|
/* Define virtual processor assist page structure. */
|
||||||
struct hv_vp_assist_page {
|
struct hv_vp_assist_page {
|
||||||
__u32 apic_assist;
|
__u32 apic_assist;
|
||||||
__u32 reserved;
|
__u32 reserved1;
|
||||||
__u64 vtl_control[2];
|
__u64 vtl_control[3];
|
||||||
__u64 nested_enlightenments_control[2];
|
struct hv_nested_enlightenments_control nested_control;
|
||||||
__u32 enlighten_vmentry;
|
__u8 enlighten_vmentry;
|
||||||
__u32 padding;
|
__u8 reserved2[7];
|
||||||
__u64 current_nested_vmcs;
|
__u64 current_nested_vmcs;
|
||||||
} __packed;
|
} __packed;
|
||||||
|
|
||||||
|
@ -882,4 +900,7 @@ struct hv_tlb_flush_ex {
|
||||||
u64 gva_list[];
|
u64 gva_list[];
|
||||||
} __packed;
|
} __packed;
|
||||||
|
|
||||||
|
struct hv_partition_assist_pg {
|
||||||
|
u32 tlb_lock_count;
|
||||||
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -320,6 +320,7 @@ struct kvm_mmu_page {
|
||||||
struct list_head link;
|
struct list_head link;
|
||||||
struct hlist_node hash_link;
|
struct hlist_node hash_link;
|
||||||
bool unsync;
|
bool unsync;
|
||||||
|
u8 mmu_valid_gen;
|
||||||
bool mmio_cached;
|
bool mmio_cached;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -335,7 +336,6 @@ struct kvm_mmu_page {
|
||||||
int root_count; /* Currently serving as active root */
|
int root_count; /* Currently serving as active root */
|
||||||
unsigned int unsync_children;
|
unsigned int unsync_children;
|
||||||
struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
|
struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
|
||||||
unsigned long mmu_valid_gen;
|
|
||||||
DECLARE_BITMAP(unsync_child_bitmap, 512);
|
DECLARE_BITMAP(unsync_child_bitmap, 512);
|
||||||
|
|
||||||
#ifdef CONFIG_X86_32
|
#ifdef CONFIG_X86_32
|
||||||
|
@ -844,6 +844,8 @@ struct kvm_hv {
|
||||||
|
|
||||||
/* How many vCPUs have VP index != vCPU index */
|
/* How many vCPUs have VP index != vCPU index */
|
||||||
atomic_t num_mismatched_vp_indexes;
|
atomic_t num_mismatched_vp_indexes;
|
||||||
|
|
||||||
|
struct hv_partition_assist_pg *hv_pa_pg;
|
||||||
};
|
};
|
||||||
|
|
||||||
enum kvm_irqchip_mode {
|
enum kvm_irqchip_mode {
|
||||||
|
@ -857,12 +859,13 @@ struct kvm_arch {
|
||||||
unsigned long n_requested_mmu_pages;
|
unsigned long n_requested_mmu_pages;
|
||||||
unsigned long n_max_mmu_pages;
|
unsigned long n_max_mmu_pages;
|
||||||
unsigned int indirect_shadow_pages;
|
unsigned int indirect_shadow_pages;
|
||||||
unsigned long mmu_valid_gen;
|
u8 mmu_valid_gen;
|
||||||
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
|
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
|
||||||
/*
|
/*
|
||||||
* Hash table of struct kvm_mmu_page.
|
* Hash table of struct kvm_mmu_page.
|
||||||
*/
|
*/
|
||||||
struct list_head active_mmu_pages;
|
struct list_head active_mmu_pages;
|
||||||
|
struct list_head zapped_obsolete_pages;
|
||||||
struct kvm_page_track_notifier_node mmu_sp_tracker;
|
struct kvm_page_track_notifier_node mmu_sp_tracker;
|
||||||
struct kvm_page_track_notifier_head track_notifier_head;
|
struct kvm_page_track_notifier_head track_notifier_head;
|
||||||
|
|
||||||
|
@ -1213,6 +1216,7 @@ struct kvm_x86_ops {
|
||||||
bool (*need_emulation_on_page_fault)(struct kvm_vcpu *vcpu);
|
bool (*need_emulation_on_page_fault)(struct kvm_vcpu *vcpu);
|
||||||
|
|
||||||
bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu);
|
bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu);
|
||||||
|
int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
|
||||||
};
|
};
|
||||||
|
|
||||||
struct kvm_arch_async_pf {
|
struct kvm_arch_async_pf {
|
||||||
|
@ -1312,18 +1316,42 @@ extern u64 kvm_default_tsc_scaling_ratio;
|
||||||
|
|
||||||
extern u64 kvm_mce_cap_supported;
|
extern u64 kvm_mce_cap_supported;
|
||||||
|
|
||||||
enum emulation_result {
|
/*
|
||||||
EMULATE_DONE, /* no further processing */
|
* EMULTYPE_NO_DECODE - Set when re-emulating an instruction (after completing
|
||||||
EMULATE_USER_EXIT, /* kvm_run ready for userspace exit */
|
* userspace I/O) to indicate that the emulation context
|
||||||
EMULATE_FAIL, /* can't emulate this instruction */
|
* should be resued as is, i.e. skip initialization of
|
||||||
};
|
* emulation context, instruction fetch and decode.
|
||||||
|
*
|
||||||
|
* EMULTYPE_TRAP_UD - Set when emulating an intercepted #UD from hardware.
|
||||||
|
* Indicates that only select instructions (tagged with
|
||||||
|
* EmulateOnUD) should be emulated (to minimize the emulator
|
||||||
|
* attack surface). See also EMULTYPE_TRAP_UD_FORCED.
|
||||||
|
*
|
||||||
|
* EMULTYPE_SKIP - Set when emulating solely to skip an instruction, i.e. to
|
||||||
|
* decode the instruction length. For use *only* by
|
||||||
|
* kvm_x86_ops->skip_emulated_instruction() implementations.
|
||||||
|
*
|
||||||
|
* EMULTYPE_ALLOW_RETRY - Set when the emulator should resume the guest to
|
||||||
|
* retry native execution under certain conditions.
|
||||||
|
*
|
||||||
|
* EMULTYPE_TRAP_UD_FORCED - Set when emulating an intercepted #UD that was
|
||||||
|
* triggered by KVM's magic "force emulation" prefix,
|
||||||
|
* which is opt in via module param (off by default).
|
||||||
|
* Bypasses EmulateOnUD restriction despite emulating
|
||||||
|
* due to an intercepted #UD (see EMULTYPE_TRAP_UD).
|
||||||
|
* Used to test the full emulator from userspace.
|
||||||
|
*
|
||||||
|
* EMULTYPE_VMWARE_GP - Set when emulating an intercepted #GP for VMware
|
||||||
|
* backdoor emulation, which is opt in via module param.
|
||||||
|
* VMware backoor emulation handles select instructions
|
||||||
|
* and reinjects the #GP for all other cases.
|
||||||
|
*/
|
||||||
#define EMULTYPE_NO_DECODE (1 << 0)
|
#define EMULTYPE_NO_DECODE (1 << 0)
|
||||||
#define EMULTYPE_TRAP_UD (1 << 1)
|
#define EMULTYPE_TRAP_UD (1 << 1)
|
||||||
#define EMULTYPE_SKIP (1 << 2)
|
#define EMULTYPE_SKIP (1 << 2)
|
||||||
#define EMULTYPE_ALLOW_RETRY (1 << 3)
|
#define EMULTYPE_ALLOW_RETRY (1 << 3)
|
||||||
#define EMULTYPE_NO_UD_ON_FAIL (1 << 4)
|
#define EMULTYPE_TRAP_UD_FORCED (1 << 4)
|
||||||
#define EMULTYPE_VMWARE (1 << 5)
|
#define EMULTYPE_VMWARE_GP (1 << 5)
|
||||||
int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type);
|
int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type);
|
||||||
int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
|
int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
|
||||||
void *insn, int insn_len);
|
void *insn, int insn_len);
|
||||||
|
@ -1506,7 +1534,7 @@ enum {
|
||||||
#define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
|
#define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
|
||||||
#define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
|
#define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
|
||||||
|
|
||||||
asmlinkage void __noreturn kvm_spurious_fault(void);
|
asmlinkage void kvm_spurious_fault(void);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Hardware virtualization extension instructions may fault if a
|
* Hardware virtualization extension instructions may fault if a
|
||||||
|
@ -1514,24 +1542,14 @@ asmlinkage void __noreturn kvm_spurious_fault(void);
|
||||||
* Usually after catching the fault we just panic; during reboot
|
* Usually after catching the fault we just panic; during reboot
|
||||||
* instead the instruction is ignored.
|
* instead the instruction is ignored.
|
||||||
*/
|
*/
|
||||||
#define ____kvm_handle_fault_on_reboot(insn, cleanup_insn) \
|
#define __kvm_handle_fault_on_reboot(insn) \
|
||||||
"666: \n\t" \
|
"666: \n\t" \
|
||||||
insn "\n\t" \
|
insn "\n\t" \
|
||||||
"jmp 668f \n\t" \
|
"jmp 668f \n\t" \
|
||||||
"667: \n\t" \
|
"667: \n\t" \
|
||||||
"call kvm_spurious_fault \n\t" \
|
"call kvm_spurious_fault \n\t" \
|
||||||
"668: \n\t" \
|
"668: \n\t" \
|
||||||
".pushsection .fixup, \"ax\" \n\t" \
|
_ASM_EXTABLE(666b, 667b)
|
||||||
"700: \n\t" \
|
|
||||||
cleanup_insn "\n\t" \
|
|
||||||
"cmpb $0, kvm_rebooting\n\t" \
|
|
||||||
"je 667b \n\t" \
|
|
||||||
"jmp 668b \n\t" \
|
|
||||||
".popsection \n\t" \
|
|
||||||
_ASM_EXTABLE(666b, 700b)
|
|
||||||
|
|
||||||
#define __kvm_handle_fault_on_reboot(insn) \
|
|
||||||
____kvm_handle_fault_on_reboot(insn, "")
|
|
||||||
|
|
||||||
#define KVM_ARCH_WANT_MMU_NOTIFIER
|
#define KVM_ARCH_WANT_MMU_NOTIFIER
|
||||||
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end);
|
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end);
|
||||||
|
|
|
@ -52,6 +52,7 @@ enum {
|
||||||
INTERCEPT_MWAIT,
|
INTERCEPT_MWAIT,
|
||||||
INTERCEPT_MWAIT_COND,
|
INTERCEPT_MWAIT_COND,
|
||||||
INTERCEPT_XSETBV,
|
INTERCEPT_XSETBV,
|
||||||
|
INTERCEPT_RDPRU,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -69,6 +69,7 @@
|
||||||
#define SECONDARY_EXEC_PT_USE_GPA 0x01000000
|
#define SECONDARY_EXEC_PT_USE_GPA 0x01000000
|
||||||
#define SECONDARY_EXEC_MODE_BASED_EPT_EXEC 0x00400000
|
#define SECONDARY_EXEC_MODE_BASED_EPT_EXEC 0x00400000
|
||||||
#define SECONDARY_EXEC_TSC_SCALING 0x02000000
|
#define SECONDARY_EXEC_TSC_SCALING 0x02000000
|
||||||
|
#define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE 0x04000000
|
||||||
|
|
||||||
#define PIN_BASED_EXT_INTR_MASK 0x00000001
|
#define PIN_BASED_EXT_INTR_MASK 0x00000001
|
||||||
#define PIN_BASED_NMI_EXITING 0x00000008
|
#define PIN_BASED_NMI_EXITING 0x00000008
|
||||||
|
@ -110,6 +111,7 @@
|
||||||
#define VMX_MISC_SAVE_EFER_LMA 0x00000020
|
#define VMX_MISC_SAVE_EFER_LMA 0x00000020
|
||||||
#define VMX_MISC_ACTIVITY_HLT 0x00000040
|
#define VMX_MISC_ACTIVITY_HLT 0x00000040
|
||||||
#define VMX_MISC_ZERO_LEN_INS 0x40000000
|
#define VMX_MISC_ZERO_LEN_INS 0x40000000
|
||||||
|
#define VMX_MISC_MSR_LIST_MULTIPLIER 512
|
||||||
|
|
||||||
/* VMFUNC functions */
|
/* VMFUNC functions */
|
||||||
#define VMX_VMFUNC_EPTP_SWITCHING 0x00000001
|
#define VMX_VMFUNC_EPTP_SWITCHING 0x00000001
|
||||||
|
|
|
@ -75,6 +75,7 @@
|
||||||
#define SVM_EXIT_MWAIT 0x08b
|
#define SVM_EXIT_MWAIT 0x08b
|
||||||
#define SVM_EXIT_MWAIT_COND 0x08c
|
#define SVM_EXIT_MWAIT_COND 0x08c
|
||||||
#define SVM_EXIT_XSETBV 0x08d
|
#define SVM_EXIT_XSETBV 0x08d
|
||||||
|
#define SVM_EXIT_RDPRU 0x08e
|
||||||
#define SVM_EXIT_NPF 0x400
|
#define SVM_EXIT_NPF 0x400
|
||||||
#define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401
|
#define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401
|
||||||
#define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402
|
#define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402
|
||||||
|
|
|
@ -86,6 +86,8 @@
|
||||||
#define EXIT_REASON_PML_FULL 62
|
#define EXIT_REASON_PML_FULL 62
|
||||||
#define EXIT_REASON_XSAVES 63
|
#define EXIT_REASON_XSAVES 63
|
||||||
#define EXIT_REASON_XRSTORS 64
|
#define EXIT_REASON_XRSTORS 64
|
||||||
|
#define EXIT_REASON_UMWAIT 67
|
||||||
|
#define EXIT_REASON_TPAUSE 68
|
||||||
|
|
||||||
#define VMX_EXIT_REASONS \
|
#define VMX_EXIT_REASONS \
|
||||||
{ EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \
|
{ EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \
|
||||||
|
@ -144,7 +146,9 @@
|
||||||
{ EXIT_REASON_RDSEED, "RDSEED" }, \
|
{ EXIT_REASON_RDSEED, "RDSEED" }, \
|
||||||
{ EXIT_REASON_PML_FULL, "PML_FULL" }, \
|
{ EXIT_REASON_PML_FULL, "PML_FULL" }, \
|
||||||
{ EXIT_REASON_XSAVES, "XSAVES" }, \
|
{ EXIT_REASON_XSAVES, "XSAVES" }, \
|
||||||
{ EXIT_REASON_XRSTORS, "XRSTORS" }
|
{ EXIT_REASON_XRSTORS, "XRSTORS" }, \
|
||||||
|
{ EXIT_REASON_UMWAIT, "UMWAIT" }, \
|
||||||
|
{ EXIT_REASON_TPAUSE, "TPAUSE" }
|
||||||
|
|
||||||
#define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1
|
#define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1
|
||||||
#define VMX_ABORT_LOAD_HOST_PDPTE_FAIL 2
|
#define VMX_ABORT_LOAD_HOST_PDPTE_FAIL 2
|
||||||
|
|
|
@ -17,6 +17,12 @@
|
||||||
*/
|
*/
|
||||||
static u32 umwait_control_cached = UMWAIT_CTRL_VAL(100000, UMWAIT_C02_ENABLE);
|
static u32 umwait_control_cached = UMWAIT_CTRL_VAL(100000, UMWAIT_C02_ENABLE);
|
||||||
|
|
||||||
|
u32 get_umwait_control_msr(void)
|
||||||
|
{
|
||||||
|
return umwait_control_cached;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(get_umwait_control_msr);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Cache the original IA32_UMWAIT_CONTROL MSR value which is configured by
|
* Cache the original IA32_UMWAIT_CONTROL MSR value which is configured by
|
||||||
* hardware or BIOS before kernel boot.
|
* hardware or BIOS before kernel boot.
|
||||||
|
|
|
@ -304,7 +304,13 @@ static void do_host_cpuid(struct kvm_cpuid_entry2 *entry, u32 function,
|
||||||
case 7:
|
case 7:
|
||||||
case 0xb:
|
case 0xb:
|
||||||
case 0xd:
|
case 0xd:
|
||||||
|
case 0xf:
|
||||||
|
case 0x10:
|
||||||
|
case 0x12:
|
||||||
case 0x14:
|
case 0x14:
|
||||||
|
case 0x17:
|
||||||
|
case 0x18:
|
||||||
|
case 0x1f:
|
||||||
case 0x8000001d:
|
case 0x8000001d:
|
||||||
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
|
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
|
||||||
break;
|
break;
|
||||||
|
@ -360,7 +366,7 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index)
|
||||||
F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ |
|
F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ |
|
||||||
F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
|
F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
|
||||||
F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
|
F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
|
||||||
F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B);
|
F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/;
|
||||||
|
|
||||||
/* cpuid 7.0.edx*/
|
/* cpuid 7.0.edx*/
|
||||||
const u32 kvm_cpuid_7_0_edx_x86_features =
|
const u32 kvm_cpuid_7_0_edx_x86_features =
|
||||||
|
|
|
@ -23,6 +23,7 @@
|
||||||
#include "ioapic.h"
|
#include "ioapic.h"
|
||||||
#include "hyperv.h"
|
#include "hyperv.h"
|
||||||
|
|
||||||
|
#include <linux/cpu.h>
|
||||||
#include <linux/kvm_host.h>
|
#include <linux/kvm_host.h>
|
||||||
#include <linux/highmem.h>
|
#include <linux/highmem.h>
|
||||||
#include <linux/sched/cputime.h>
|
#include <linux/sched/cputime.h>
|
||||||
|
@ -645,7 +646,9 @@ static int stimer_notify_direct(struct kvm_vcpu_hv_stimer *stimer)
|
||||||
.vector = stimer->config.apic_vector
|
.vector = stimer->config.apic_vector
|
||||||
};
|
};
|
||||||
|
|
||||||
return !kvm_apic_set_irq(vcpu, &irq, NULL);
|
if (lapic_in_kernel(vcpu))
|
||||||
|
return !kvm_apic_set_irq(vcpu, &irq, NULL);
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void stimer_expiration(struct kvm_vcpu_hv_stimer *stimer)
|
static void stimer_expiration(struct kvm_vcpu_hv_stimer *stimer)
|
||||||
|
@ -1852,7 +1855,13 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
|
||||||
|
|
||||||
ent->edx |= HV_FEATURE_FREQUENCY_MSRS_AVAILABLE;
|
ent->edx |= HV_FEATURE_FREQUENCY_MSRS_AVAILABLE;
|
||||||
ent->edx |= HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE;
|
ent->edx |= HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE;
|
||||||
ent->edx |= HV_STIMER_DIRECT_MODE_AVAILABLE;
|
|
||||||
|
/*
|
||||||
|
* Direct Synthetic timers only make sense with in-kernel
|
||||||
|
* LAPIC
|
||||||
|
*/
|
||||||
|
if (lapic_in_kernel(vcpu))
|
||||||
|
ent->edx |= HV_STIMER_DIRECT_MODE_AVAILABLE;
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -1864,7 +1873,8 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
|
||||||
ent->eax |= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED;
|
ent->eax |= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED;
|
||||||
if (evmcs_ver)
|
if (evmcs_ver)
|
||||||
ent->eax |= HV_X64_ENLIGHTENED_VMCS_RECOMMENDED;
|
ent->eax |= HV_X64_ENLIGHTENED_VMCS_RECOMMENDED;
|
||||||
|
if (!cpu_smt_possible())
|
||||||
|
ent->eax |= HV_X64_NO_NONARCH_CORESHARING;
|
||||||
/*
|
/*
|
||||||
* Default number of spinlock retry attempts, matches
|
* Default number of spinlock retry attempts, matches
|
||||||
* HyperV 2016.
|
* HyperV 2016.
|
||||||
|
|
|
@ -65,7 +65,9 @@
|
||||||
#define APIC_BROADCAST 0xFF
|
#define APIC_BROADCAST 0xFF
|
||||||
#define X2APIC_BROADCAST 0xFFFFFFFFul
|
#define X2APIC_BROADCAST 0xFFFFFFFFul
|
||||||
|
|
||||||
#define LAPIC_TIMER_ADVANCE_ADJUST_DONE 100
|
static bool lapic_timer_advance_dynamic __read_mostly;
|
||||||
|
#define LAPIC_TIMER_ADVANCE_ADJUST_MIN 100
|
||||||
|
#define LAPIC_TIMER_ADVANCE_ADJUST_MAX 5000
|
||||||
#define LAPIC_TIMER_ADVANCE_ADJUST_INIT 1000
|
#define LAPIC_TIMER_ADVANCE_ADJUST_INIT 1000
|
||||||
/* step-by-step approximation to mitigate fluctuation */
|
/* step-by-step approximation to mitigate fluctuation */
|
||||||
#define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
|
#define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
|
||||||
|
@ -1485,26 +1487,25 @@ static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu,
|
||||||
u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns;
|
u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns;
|
||||||
u64 ns;
|
u64 ns;
|
||||||
|
|
||||||
|
/* Do not adjust for tiny fluctuations or large random spikes. */
|
||||||
|
if (abs(advance_expire_delta) > LAPIC_TIMER_ADVANCE_ADJUST_MAX ||
|
||||||
|
abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_MIN)
|
||||||
|
return;
|
||||||
|
|
||||||
/* too early */
|
/* too early */
|
||||||
if (advance_expire_delta < 0) {
|
if (advance_expire_delta < 0) {
|
||||||
ns = -advance_expire_delta * 1000000ULL;
|
ns = -advance_expire_delta * 1000000ULL;
|
||||||
do_div(ns, vcpu->arch.virtual_tsc_khz);
|
do_div(ns, vcpu->arch.virtual_tsc_khz);
|
||||||
timer_advance_ns -= min((u32)ns,
|
timer_advance_ns -= ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP;
|
||||||
timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
|
|
||||||
} else {
|
} else {
|
||||||
/* too late */
|
/* too late */
|
||||||
ns = advance_expire_delta * 1000000ULL;
|
ns = advance_expire_delta * 1000000ULL;
|
||||||
do_div(ns, vcpu->arch.virtual_tsc_khz);
|
do_div(ns, vcpu->arch.virtual_tsc_khz);
|
||||||
timer_advance_ns += min((u32)ns,
|
timer_advance_ns += ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP;
|
||||||
timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_DONE)
|
if (unlikely(timer_advance_ns > LAPIC_TIMER_ADVANCE_ADJUST_MAX))
|
||||||
apic->lapic_timer.timer_advance_adjust_done = true;
|
|
||||||
if (unlikely(timer_advance_ns > 5000)) {
|
|
||||||
timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT;
|
timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT;
|
||||||
apic->lapic_timer.timer_advance_adjust_done = false;
|
|
||||||
}
|
|
||||||
apic->lapic_timer.timer_advance_ns = timer_advance_ns;
|
apic->lapic_timer.timer_advance_ns = timer_advance_ns;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1524,7 +1525,7 @@ static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
|
||||||
if (guest_tsc < tsc_deadline)
|
if (guest_tsc < tsc_deadline)
|
||||||
__wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
|
__wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
|
||||||
|
|
||||||
if (unlikely(!apic->lapic_timer.timer_advance_adjust_done))
|
if (lapic_timer_advance_dynamic)
|
||||||
adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta);
|
adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2302,13 +2303,12 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
|
||||||
apic->lapic_timer.timer.function = apic_timer_fn;
|
apic->lapic_timer.timer.function = apic_timer_fn;
|
||||||
if (timer_advance_ns == -1) {
|
if (timer_advance_ns == -1) {
|
||||||
apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT;
|
apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT;
|
||||||
apic->lapic_timer.timer_advance_adjust_done = false;
|
lapic_timer_advance_dynamic = true;
|
||||||
} else {
|
} else {
|
||||||
apic->lapic_timer.timer_advance_ns = timer_advance_ns;
|
apic->lapic_timer.timer_advance_ns = timer_advance_ns;
|
||||||
apic->lapic_timer.timer_advance_adjust_done = true;
|
lapic_timer_advance_dynamic = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* APIC is created enabled. This will prevent kvm_lapic_set_base from
|
* APIC is created enabled. This will prevent kvm_lapic_set_base from
|
||||||
* thinking that APIC state has changed.
|
* thinking that APIC state has changed.
|
||||||
|
|
|
@ -35,7 +35,6 @@ struct kvm_timer {
|
||||||
s64 advance_expire_delta;
|
s64 advance_expire_delta;
|
||||||
atomic_t pending; /* accumulated triggered timers */
|
atomic_t pending; /* accumulated triggered timers */
|
||||||
bool hv_timer_in_use;
|
bool hv_timer_in_use;
|
||||||
bool timer_advance_adjust_done;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct kvm_lapic {
|
struct kvm_lapic {
|
||||||
|
|
|
@ -403,8 +403,6 @@ static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn,
|
||||||
mask |= (gpa & shadow_nonpresent_or_rsvd_mask)
|
mask |= (gpa & shadow_nonpresent_or_rsvd_mask)
|
||||||
<< shadow_nonpresent_or_rsvd_mask_len;
|
<< shadow_nonpresent_or_rsvd_mask_len;
|
||||||
|
|
||||||
page_header(__pa(sptep))->mmio_cached = true;
|
|
||||||
|
|
||||||
trace_mark_mmio_spte(sptep, gfn, access, gen);
|
trace_mark_mmio_spte(sptep, gfn, access, gen);
|
||||||
mmu_spte_set(sptep, mask);
|
mmu_spte_set(sptep, mask);
|
||||||
}
|
}
|
||||||
|
@ -2103,6 +2101,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, int direct
|
||||||
* depends on valid pages being added to the head of the list. See
|
* depends on valid pages being added to the head of the list. See
|
||||||
* comments in kvm_zap_obsolete_pages().
|
* comments in kvm_zap_obsolete_pages().
|
||||||
*/
|
*/
|
||||||
|
sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen;
|
||||||
list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
|
list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
|
||||||
kvm_mod_used_mmu_pages(vcpu->kvm, +1);
|
kvm_mod_used_mmu_pages(vcpu->kvm, +1);
|
||||||
return sp;
|
return sp;
|
||||||
|
@ -2252,7 +2251,7 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
|
||||||
#define for_each_valid_sp(_kvm, _sp, _gfn) \
|
#define for_each_valid_sp(_kvm, _sp, _gfn) \
|
||||||
hlist_for_each_entry(_sp, \
|
hlist_for_each_entry(_sp, \
|
||||||
&(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \
|
&(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \
|
||||||
if (is_obsolete_sp((_kvm), (_sp)) || (_sp)->role.invalid) { \
|
if (is_obsolete_sp((_kvm), (_sp))) { \
|
||||||
} else
|
} else
|
||||||
|
|
||||||
#define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn) \
|
#define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn) \
|
||||||
|
@ -2311,7 +2310,8 @@ static void mmu_audit_disable(void) { }
|
||||||
|
|
||||||
static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
|
static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
|
||||||
{
|
{
|
||||||
return unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen);
|
return sp->role.invalid ||
|
||||||
|
unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
|
static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
|
||||||
|
@ -2538,7 +2538,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
|
||||||
if (level > PT_PAGE_TABLE_LEVEL && need_sync)
|
if (level > PT_PAGE_TABLE_LEVEL && need_sync)
|
||||||
flush |= kvm_sync_pages(vcpu, gfn, &invalid_list);
|
flush |= kvm_sync_pages(vcpu, gfn, &invalid_list);
|
||||||
}
|
}
|
||||||
sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen;
|
|
||||||
clear_page(sp->spt);
|
clear_page(sp->spt);
|
||||||
trace_kvm_mmu_get_page(sp, true);
|
trace_kvm_mmu_get_page(sp, true);
|
||||||
|
|
||||||
|
@ -2753,7 +2752,12 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm,
|
||||||
} else {
|
} else {
|
||||||
list_move(&sp->link, &kvm->arch.active_mmu_pages);
|
list_move(&sp->link, &kvm->arch.active_mmu_pages);
|
||||||
|
|
||||||
if (!sp->role.invalid)
|
/*
|
||||||
|
* Obsolete pages cannot be used on any vCPUs, see the comment
|
||||||
|
* in kvm_mmu_zap_all_fast(). Note, is_obsolete_sp() also
|
||||||
|
* treats invalid shadow pages as being obsolete.
|
||||||
|
*/
|
||||||
|
if (!is_obsolete_sp(kvm, sp))
|
||||||
kvm_reload_remote_mmus(kvm);
|
kvm_reload_remote_mmus(kvm);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5383,7 +5387,6 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
|
||||||
void *insn, int insn_len)
|
void *insn, int insn_len)
|
||||||
{
|
{
|
||||||
int r, emulation_type = 0;
|
int r, emulation_type = 0;
|
||||||
enum emulation_result er;
|
|
||||||
bool direct = vcpu->arch.mmu->direct_map;
|
bool direct = vcpu->arch.mmu->direct_map;
|
||||||
|
|
||||||
/* With shadow page tables, fault_address contains a GVA or nGPA. */
|
/* With shadow page tables, fault_address contains a GVA or nGPA. */
|
||||||
|
@ -5450,19 +5453,8 @@ emulate:
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
er = x86_emulate_instruction(vcpu, cr2, emulation_type, insn, insn_len);
|
return x86_emulate_instruction(vcpu, cr2, emulation_type, insn,
|
||||||
|
insn_len);
|
||||||
switch (er) {
|
|
||||||
case EMULATE_DONE:
|
|
||||||
return 1;
|
|
||||||
case EMULATE_USER_EXIT:
|
|
||||||
++vcpu->stat.mmio_exits;
|
|
||||||
/* fall through */
|
|
||||||
case EMULATE_FAIL:
|
|
||||||
return 0;
|
|
||||||
default:
|
|
||||||
BUG();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
|
EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
|
||||||
|
|
||||||
|
@ -5684,12 +5676,11 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define BATCH_ZAP_PAGES 10
|
||||||
static void kvm_zap_obsolete_pages(struct kvm *kvm)
|
static void kvm_zap_obsolete_pages(struct kvm *kvm)
|
||||||
{
|
{
|
||||||
struct kvm_mmu_page *sp, *node;
|
struct kvm_mmu_page *sp, *node;
|
||||||
LIST_HEAD(invalid_list);
|
int nr_zapped, batch = 0;
|
||||||
int ign;
|
|
||||||
|
|
||||||
restart:
|
restart:
|
||||||
list_for_each_entry_safe_reverse(sp, node,
|
list_for_each_entry_safe_reverse(sp, node,
|
||||||
|
@ -5702,46 +5693,39 @@ restart:
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Do not repeatedly zap a root page to avoid unnecessary
|
* Skip invalid pages with a non-zero root count, zapping pages
|
||||||
* KVM_REQ_MMU_RELOAD, otherwise we may not be able to
|
* with a non-zero root count will never succeed, i.e. the page
|
||||||
* progress:
|
* will get thrown back on active_mmu_pages and we'll get stuck
|
||||||
* vcpu 0 vcpu 1
|
* in an infinite loop.
|
||||||
* call vcpu_enter_guest():
|
|
||||||
* 1): handle KVM_REQ_MMU_RELOAD
|
|
||||||
* and require mmu-lock to
|
|
||||||
* load mmu
|
|
||||||
* repeat:
|
|
||||||
* 1): zap root page and
|
|
||||||
* send KVM_REQ_MMU_RELOAD
|
|
||||||
*
|
|
||||||
* 2): if (cond_resched_lock(mmu-lock))
|
|
||||||
*
|
|
||||||
* 2): hold mmu-lock and load mmu
|
|
||||||
*
|
|
||||||
* 3): see KVM_REQ_MMU_RELOAD bit
|
|
||||||
* on vcpu->requests is set
|
|
||||||
* then return 1 to call
|
|
||||||
* vcpu_enter_guest() again.
|
|
||||||
* goto repeat;
|
|
||||||
*
|
|
||||||
* Since we are reversely walking the list and the invalid
|
|
||||||
* list will be moved to the head, skip the invalid page
|
|
||||||
* can help us to avoid the infinity list walking.
|
|
||||||
*/
|
*/
|
||||||
if (sp->role.invalid)
|
if (sp->role.invalid && sp->root_count)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
|
/*
|
||||||
kvm_mmu_commit_zap_page(kvm, &invalid_list);
|
* No need to flush the TLB since we're only zapping shadow
|
||||||
cond_resched_lock(&kvm->mmu_lock);
|
* pages with an obsolete generation number and all vCPUS have
|
||||||
|
* loaded a new root, i.e. the shadow pages being zapped cannot
|
||||||
|
* be in active use by the guest.
|
||||||
|
*/
|
||||||
|
if (batch >= BATCH_ZAP_PAGES &&
|
||||||
|
cond_resched_lock(&kvm->mmu_lock)) {
|
||||||
|
batch = 0;
|
||||||
goto restart;
|
goto restart;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign))
|
if (__kvm_mmu_prepare_zap_page(kvm, sp,
|
||||||
|
&kvm->arch.zapped_obsolete_pages, &nr_zapped)) {
|
||||||
|
batch += nr_zapped;
|
||||||
goto restart;
|
goto restart;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
kvm_mmu_commit_zap_page(kvm, &invalid_list);
|
/*
|
||||||
|
* Trigger a remote TLB flush before freeing the page tables to ensure
|
||||||
|
* KVM is not in the middle of a lockless shadow page table walk, which
|
||||||
|
* may reference the pages.
|
||||||
|
*/
|
||||||
|
kvm_mmu_commit_zap_page(kvm, &kvm->arch.zapped_obsolete_pages);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -5755,13 +5739,39 @@ restart:
|
||||||
*/
|
*/
|
||||||
static void kvm_mmu_zap_all_fast(struct kvm *kvm)
|
static void kvm_mmu_zap_all_fast(struct kvm *kvm)
|
||||||
{
|
{
|
||||||
|
lockdep_assert_held(&kvm->slots_lock);
|
||||||
|
|
||||||
spin_lock(&kvm->mmu_lock);
|
spin_lock(&kvm->mmu_lock);
|
||||||
kvm->arch.mmu_valid_gen++;
|
trace_kvm_mmu_zap_all_fast(kvm);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Toggle mmu_valid_gen between '0' and '1'. Because slots_lock is
|
||||||
|
* held for the entire duration of zapping obsolete pages, it's
|
||||||
|
* impossible for there to be multiple invalid generations associated
|
||||||
|
* with *valid* shadow pages at any given time, i.e. there is exactly
|
||||||
|
* one valid generation and (at most) one invalid generation.
|
||||||
|
*/
|
||||||
|
kvm->arch.mmu_valid_gen = kvm->arch.mmu_valid_gen ? 0 : 1;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Notify all vcpus to reload its shadow page table and flush TLB.
|
||||||
|
* Then all vcpus will switch to new shadow page table with the new
|
||||||
|
* mmu_valid_gen.
|
||||||
|
*
|
||||||
|
* Note: we need to do this under the protection of mmu_lock,
|
||||||
|
* otherwise, vcpu would purge shadow page but miss tlb flush.
|
||||||
|
*/
|
||||||
|
kvm_reload_remote_mmus(kvm);
|
||||||
|
|
||||||
kvm_zap_obsolete_pages(kvm);
|
kvm_zap_obsolete_pages(kvm);
|
||||||
spin_unlock(&kvm->mmu_lock);
|
spin_unlock(&kvm->mmu_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm)
|
||||||
|
{
|
||||||
|
return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages));
|
||||||
|
}
|
||||||
|
|
||||||
static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
|
static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
|
||||||
struct kvm_memory_slot *slot,
|
struct kvm_memory_slot *slot,
|
||||||
struct kvm_page_track_notifier_node *node)
|
struct kvm_page_track_notifier_node *node)
|
||||||
|
@ -5959,7 +5969,7 @@ void kvm_mmu_slot_set_dirty(struct kvm *kvm,
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(kvm_mmu_slot_set_dirty);
|
EXPORT_SYMBOL_GPL(kvm_mmu_slot_set_dirty);
|
||||||
|
|
||||||
static void __kvm_mmu_zap_all(struct kvm *kvm, bool mmio_only)
|
void kvm_mmu_zap_all(struct kvm *kvm)
|
||||||
{
|
{
|
||||||
struct kvm_mmu_page *sp, *node;
|
struct kvm_mmu_page *sp, *node;
|
||||||
LIST_HEAD(invalid_list);
|
LIST_HEAD(invalid_list);
|
||||||
|
@ -5968,14 +5978,10 @@ static void __kvm_mmu_zap_all(struct kvm *kvm, bool mmio_only)
|
||||||
spin_lock(&kvm->mmu_lock);
|
spin_lock(&kvm->mmu_lock);
|
||||||
restart:
|
restart:
|
||||||
list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) {
|
list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) {
|
||||||
if (mmio_only && !sp->mmio_cached)
|
|
||||||
continue;
|
|
||||||
if (sp->role.invalid && sp->root_count)
|
if (sp->role.invalid && sp->root_count)
|
||||||
continue;
|
continue;
|
||||||
if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign)) {
|
if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign))
|
||||||
WARN_ON_ONCE(mmio_only);
|
|
||||||
goto restart;
|
goto restart;
|
||||||
}
|
|
||||||
if (cond_resched_lock(&kvm->mmu_lock))
|
if (cond_resched_lock(&kvm->mmu_lock))
|
||||||
goto restart;
|
goto restart;
|
||||||
}
|
}
|
||||||
|
@ -5984,11 +5990,6 @@ restart:
|
||||||
spin_unlock(&kvm->mmu_lock);
|
spin_unlock(&kvm->mmu_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
void kvm_mmu_zap_all(struct kvm *kvm)
|
|
||||||
{
|
|
||||||
return __kvm_mmu_zap_all(kvm, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen)
|
void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen)
|
||||||
{
|
{
|
||||||
WARN_ON(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS);
|
WARN_ON(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS);
|
||||||
|
@ -6010,7 +6011,7 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen)
|
||||||
*/
|
*/
|
||||||
if (unlikely(gen == 0)) {
|
if (unlikely(gen == 0)) {
|
||||||
kvm_debug_ratelimited("kvm: zapping shadow pages for mmio generation wraparound\n");
|
kvm_debug_ratelimited("kvm: zapping shadow pages for mmio generation wraparound\n");
|
||||||
__kvm_mmu_zap_all(kvm, true);
|
kvm_mmu_zap_all_fast(kvm);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6041,16 +6042,24 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
|
||||||
* want to shrink a VM that only started to populate its MMU
|
* want to shrink a VM that only started to populate its MMU
|
||||||
* anyway.
|
* anyway.
|
||||||
*/
|
*/
|
||||||
if (!kvm->arch.n_used_mmu_pages)
|
if (!kvm->arch.n_used_mmu_pages &&
|
||||||
|
!kvm_has_zapped_obsolete_pages(kvm))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
idx = srcu_read_lock(&kvm->srcu);
|
idx = srcu_read_lock(&kvm->srcu);
|
||||||
spin_lock(&kvm->mmu_lock);
|
spin_lock(&kvm->mmu_lock);
|
||||||
|
|
||||||
|
if (kvm_has_zapped_obsolete_pages(kvm)) {
|
||||||
|
kvm_mmu_commit_zap_page(kvm,
|
||||||
|
&kvm->arch.zapped_obsolete_pages);
|
||||||
|
goto unlock;
|
||||||
|
}
|
||||||
|
|
||||||
if (prepare_zap_oldest_mmu_page(kvm, &invalid_list))
|
if (prepare_zap_oldest_mmu_page(kvm, &invalid_list))
|
||||||
freed++;
|
freed++;
|
||||||
kvm_mmu_commit_zap_page(kvm, &invalid_list);
|
kvm_mmu_commit_zap_page(kvm, &invalid_list);
|
||||||
|
|
||||||
|
unlock:
|
||||||
spin_unlock(&kvm->mmu_lock);
|
spin_unlock(&kvm->mmu_lock);
|
||||||
srcu_read_unlock(&kvm->srcu, idx);
|
srcu_read_unlock(&kvm->srcu, idx);
|
||||||
|
|
||||||
|
|
|
@ -8,16 +8,18 @@
|
||||||
#undef TRACE_SYSTEM
|
#undef TRACE_SYSTEM
|
||||||
#define TRACE_SYSTEM kvmmmu
|
#define TRACE_SYSTEM kvmmmu
|
||||||
|
|
||||||
#define KVM_MMU_PAGE_FIELDS \
|
#define KVM_MMU_PAGE_FIELDS \
|
||||||
__field(__u64, gfn) \
|
__field(__u8, mmu_valid_gen) \
|
||||||
__field(__u32, role) \
|
__field(__u64, gfn) \
|
||||||
__field(__u32, root_count) \
|
__field(__u32, role) \
|
||||||
|
__field(__u32, root_count) \
|
||||||
__field(bool, unsync)
|
__field(bool, unsync)
|
||||||
|
|
||||||
#define KVM_MMU_PAGE_ASSIGN(sp) \
|
#define KVM_MMU_PAGE_ASSIGN(sp) \
|
||||||
__entry->gfn = sp->gfn; \
|
__entry->mmu_valid_gen = sp->mmu_valid_gen; \
|
||||||
__entry->role = sp->role.word; \
|
__entry->gfn = sp->gfn; \
|
||||||
__entry->root_count = sp->root_count; \
|
__entry->role = sp->role.word; \
|
||||||
|
__entry->root_count = sp->root_count; \
|
||||||
__entry->unsync = sp->unsync;
|
__entry->unsync = sp->unsync;
|
||||||
|
|
||||||
#define KVM_MMU_PAGE_PRINTK() ({ \
|
#define KVM_MMU_PAGE_PRINTK() ({ \
|
||||||
|
@ -29,8 +31,9 @@
|
||||||
\
|
\
|
||||||
role.word = __entry->role; \
|
role.word = __entry->role; \
|
||||||
\
|
\
|
||||||
trace_seq_printf(p, "sp gfn %llx l%u %u-byte q%u%s %s%s" \
|
trace_seq_printf(p, "sp gen %u gfn %llx l%u %u-byte q%u%s %s%s" \
|
||||||
" %snxe %sad root %u %s%c", \
|
" %snxe %sad root %u %s%c", \
|
||||||
|
__entry->mmu_valid_gen, \
|
||||||
__entry->gfn, role.level, \
|
__entry->gfn, role.level, \
|
||||||
role.gpte_is_8_bytes ? 8 : 4, \
|
role.gpte_is_8_bytes ? 8 : 4, \
|
||||||
role.quadrant, \
|
role.quadrant, \
|
||||||
|
@ -279,6 +282,27 @@ TRACE_EVENT(
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
TRACE_EVENT(
|
||||||
|
kvm_mmu_zap_all_fast,
|
||||||
|
TP_PROTO(struct kvm *kvm),
|
||||||
|
TP_ARGS(kvm),
|
||||||
|
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
__field(__u8, mmu_valid_gen)
|
||||||
|
__field(unsigned int, mmu_used_pages)
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_fast_assign(
|
||||||
|
__entry->mmu_valid_gen = kvm->arch.mmu_valid_gen;
|
||||||
|
__entry->mmu_used_pages = kvm->arch.n_used_mmu_pages;
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_printk("kvm-mmu-valid-gen %u used_pages %x",
|
||||||
|
__entry->mmu_valid_gen, __entry->mmu_used_pages
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
TRACE_EVENT(
|
TRACE_EVENT(
|
||||||
check_mmio_spte,
|
check_mmio_spte,
|
||||||
TP_PROTO(u64 spte, unsigned int kvm_gen, unsigned int spte_gen),
|
TP_PROTO(u64 spte, unsigned int kvm_gen, unsigned int spte_gen),
|
||||||
|
|
|
@ -777,17 +777,18 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
|
||||||
svm->next_rip = svm->vmcb->control.next_rip;
|
svm->next_rip = svm->vmcb->control.next_rip;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!svm->next_rip)
|
if (!svm->next_rip) {
|
||||||
return kvm_emulate_instruction(vcpu, EMULTYPE_SKIP);
|
if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP))
|
||||||
|
return 0;
|
||||||
if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
|
} else {
|
||||||
printk(KERN_ERR "%s: ip 0x%lx next 0x%llx\n",
|
if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
|
||||||
__func__, kvm_rip_read(vcpu), svm->next_rip);
|
pr_err("%s: ip 0x%lx next 0x%llx\n",
|
||||||
|
__func__, kvm_rip_read(vcpu), svm->next_rip);
|
||||||
kvm_rip_write(vcpu, svm->next_rip);
|
kvm_rip_write(vcpu, svm->next_rip);
|
||||||
|
}
|
||||||
svm_set_interrupt_shadow(vcpu, 0);
|
svm_set_interrupt_shadow(vcpu, 0);
|
||||||
|
|
||||||
return EMULATE_DONE;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void svm_queue_exception(struct kvm_vcpu *vcpu)
|
static void svm_queue_exception(struct kvm_vcpu *vcpu)
|
||||||
|
@ -1539,6 +1540,7 @@ static void init_vmcb(struct vcpu_svm *svm)
|
||||||
set_intercept(svm, INTERCEPT_SKINIT);
|
set_intercept(svm, INTERCEPT_SKINIT);
|
||||||
set_intercept(svm, INTERCEPT_WBINVD);
|
set_intercept(svm, INTERCEPT_WBINVD);
|
||||||
set_intercept(svm, INTERCEPT_XSETBV);
|
set_intercept(svm, INTERCEPT_XSETBV);
|
||||||
|
set_intercept(svm, INTERCEPT_RDPRU);
|
||||||
set_intercept(svm, INTERCEPT_RSM);
|
set_intercept(svm, INTERCEPT_RSM);
|
||||||
|
|
||||||
if (!kvm_mwait_in_guest(svm->vcpu.kvm)) {
|
if (!kvm_mwait_in_guest(svm->vcpu.kvm)) {
|
||||||
|
@ -2768,17 +2770,18 @@ static int gp_interception(struct vcpu_svm *svm)
|
||||||
{
|
{
|
||||||
struct kvm_vcpu *vcpu = &svm->vcpu;
|
struct kvm_vcpu *vcpu = &svm->vcpu;
|
||||||
u32 error_code = svm->vmcb->control.exit_info_1;
|
u32 error_code = svm->vmcb->control.exit_info_1;
|
||||||
int er;
|
|
||||||
|
|
||||||
WARN_ON_ONCE(!enable_vmware_backdoor);
|
WARN_ON_ONCE(!enable_vmware_backdoor);
|
||||||
|
|
||||||
er = kvm_emulate_instruction(vcpu,
|
/*
|
||||||
EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL);
|
* VMware backdoor emulation on #GP interception only handles IN{S},
|
||||||
if (er == EMULATE_USER_EXIT)
|
* OUT{S}, and RDPMC, none of which generate a non-zero error code.
|
||||||
return 0;
|
*/
|
||||||
else if (er != EMULATE_DONE)
|
if (error_code) {
|
||||||
kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
|
kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
|
||||||
return 1;
|
return 1;
|
||||||
|
}
|
||||||
|
return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool is_erratum_383(void)
|
static bool is_erratum_383(void)
|
||||||
|
@ -2876,7 +2879,7 @@ static int io_interception(struct vcpu_svm *svm)
|
||||||
string = (io_info & SVM_IOIO_STR_MASK) != 0;
|
string = (io_info & SVM_IOIO_STR_MASK) != 0;
|
||||||
in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
|
in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
|
||||||
if (string)
|
if (string)
|
||||||
return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
|
return kvm_emulate_instruction(vcpu, 0);
|
||||||
|
|
||||||
port = io_info >> 16;
|
port = io_info >> 16;
|
||||||
size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
|
size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
|
||||||
|
@ -3830,6 +3833,12 @@ static int xsetbv_interception(struct vcpu_svm *svm)
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int rdpru_interception(struct vcpu_svm *svm)
|
||||||
|
{
|
||||||
|
kvm_queue_exception(&svm->vcpu, UD_VECTOR);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
static int task_switch_interception(struct vcpu_svm *svm)
|
static int task_switch_interception(struct vcpu_svm *svm)
|
||||||
{
|
{
|
||||||
u16 tss_selector;
|
u16 tss_selector;
|
||||||
|
@ -3883,24 +3892,15 @@ static int task_switch_interception(struct vcpu_svm *svm)
|
||||||
int_type == SVM_EXITINTINFO_TYPE_SOFT ||
|
int_type == SVM_EXITINTINFO_TYPE_SOFT ||
|
||||||
(int_type == SVM_EXITINTINFO_TYPE_EXEPT &&
|
(int_type == SVM_EXITINTINFO_TYPE_EXEPT &&
|
||||||
(int_vec == OF_VECTOR || int_vec == BP_VECTOR))) {
|
(int_vec == OF_VECTOR || int_vec == BP_VECTOR))) {
|
||||||
if (skip_emulated_instruction(&svm->vcpu) != EMULATE_DONE)
|
if (!skip_emulated_instruction(&svm->vcpu))
|
||||||
goto fail;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (int_type != SVM_EXITINTINFO_TYPE_SOFT)
|
if (int_type != SVM_EXITINTINFO_TYPE_SOFT)
|
||||||
int_vec = -1;
|
int_vec = -1;
|
||||||
|
|
||||||
if (kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason,
|
return kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason,
|
||||||
has_error_code, error_code) == EMULATE_FAIL)
|
has_error_code, error_code);
|
||||||
goto fail;
|
|
||||||
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
fail:
|
|
||||||
svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
|
||||||
svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
|
|
||||||
svm->vcpu.run->internal.ndata = 0;
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int cpuid_interception(struct vcpu_svm *svm)
|
static int cpuid_interception(struct vcpu_svm *svm)
|
||||||
|
@ -3921,7 +3921,7 @@ static int iret_interception(struct vcpu_svm *svm)
|
||||||
static int invlpg_interception(struct vcpu_svm *svm)
|
static int invlpg_interception(struct vcpu_svm *svm)
|
||||||
{
|
{
|
||||||
if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
|
if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
|
||||||
return kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
|
return kvm_emulate_instruction(&svm->vcpu, 0);
|
||||||
|
|
||||||
kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1);
|
kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1);
|
||||||
return kvm_skip_emulated_instruction(&svm->vcpu);
|
return kvm_skip_emulated_instruction(&svm->vcpu);
|
||||||
|
@ -3929,13 +3929,12 @@ static int invlpg_interception(struct vcpu_svm *svm)
|
||||||
|
|
||||||
static int emulate_on_interception(struct vcpu_svm *svm)
|
static int emulate_on_interception(struct vcpu_svm *svm)
|
||||||
{
|
{
|
||||||
return kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
|
return kvm_emulate_instruction(&svm->vcpu, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int rsm_interception(struct vcpu_svm *svm)
|
static int rsm_interception(struct vcpu_svm *svm)
|
||||||
{
|
{
|
||||||
return kvm_emulate_instruction_from_buffer(&svm->vcpu,
|
return kvm_emulate_instruction_from_buffer(&svm->vcpu, rsm_ins_bytes, 2);
|
||||||
rsm_ins_bytes, 2) == EMULATE_DONE;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int rdpmc_interception(struct vcpu_svm *svm)
|
static int rdpmc_interception(struct vcpu_svm *svm)
|
||||||
|
@ -4724,7 +4723,7 @@ static int avic_unaccelerated_access_interception(struct vcpu_svm *svm)
|
||||||
ret = avic_unaccel_trap_write(svm);
|
ret = avic_unaccel_trap_write(svm);
|
||||||
} else {
|
} else {
|
||||||
/* Handling Fault */
|
/* Handling Fault */
|
||||||
ret = (kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE);
|
ret = kvm_emulate_instruction(&svm->vcpu, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -4791,6 +4790,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
|
||||||
[SVM_EXIT_MONITOR] = monitor_interception,
|
[SVM_EXIT_MONITOR] = monitor_interception,
|
||||||
[SVM_EXIT_MWAIT] = mwait_interception,
|
[SVM_EXIT_MWAIT] = mwait_interception,
|
||||||
[SVM_EXIT_XSETBV] = xsetbv_interception,
|
[SVM_EXIT_XSETBV] = xsetbv_interception,
|
||||||
|
[SVM_EXIT_RDPRU] = rdpru_interception,
|
||||||
[SVM_EXIT_NPF] = npf_interception,
|
[SVM_EXIT_NPF] = npf_interception,
|
||||||
[SVM_EXIT_RSM] = rsm_interception,
|
[SVM_EXIT_RSM] = rsm_interception,
|
||||||
[SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception,
|
[SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception,
|
||||||
|
@ -7099,13 +7099,6 @@ failed:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int nested_enable_evmcs(struct kvm_vcpu *vcpu,
|
|
||||||
uint16_t *vmcs_version)
|
|
||||||
{
|
|
||||||
/* Intel-only feature */
|
|
||||||
return -ENODEV;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
|
static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
unsigned long cr4 = kvm_read_cr4(vcpu);
|
unsigned long cr4 = kvm_read_cr4(vcpu);
|
||||||
|
@ -7311,7 +7304,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
|
||||||
.mem_enc_reg_region = svm_register_enc_region,
|
.mem_enc_reg_region = svm_register_enc_region,
|
||||||
.mem_enc_unreg_region = svm_unregister_enc_region,
|
.mem_enc_unreg_region = svm_unregister_enc_region,
|
||||||
|
|
||||||
.nested_enable_evmcs = nested_enable_evmcs,
|
.nested_enable_evmcs = NULL,
|
||||||
.nested_get_evmcs_version = NULL,
|
.nested_get_evmcs_version = NULL,
|
||||||
|
|
||||||
.need_emulation_on_page_fault = svm_need_emulation_on_page_fault,
|
.need_emulation_on_page_fault = svm_need_emulation_on_page_fault,
|
||||||
|
|
|
@ -247,6 +247,12 @@ static inline bool vmx_xsaves_supported(void)
|
||||||
SECONDARY_EXEC_XSAVES;
|
SECONDARY_EXEC_XSAVES;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool vmx_waitpkg_supported(void)
|
||||||
|
{
|
||||||
|
return vmcs_config.cpu_based_2nd_exec_ctrl &
|
||||||
|
SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
|
||||||
|
}
|
||||||
|
|
||||||
static inline bool cpu_has_vmx_tsc_scaling(void)
|
static inline bool cpu_has_vmx_tsc_scaling(void)
|
||||||
{
|
{
|
||||||
return vmcs_config.cpu_based_2nd_exec_ctrl &
|
return vmcs_config.cpu_based_2nd_exec_ctrl &
|
||||||
|
|
|
@ -178,6 +178,8 @@ static inline void evmcs_load(u64 phys_addr)
|
||||||
struct hv_vp_assist_page *vp_ap =
|
struct hv_vp_assist_page *vp_ap =
|
||||||
hv_get_vp_assist_page(smp_processor_id());
|
hv_get_vp_assist_page(smp_processor_id());
|
||||||
|
|
||||||
|
if (current_evmcs->hv_enlightenments_control.nested_flush_hypercall)
|
||||||
|
vp_ap->nested_control.features.directhypercall = 1;
|
||||||
vp_ap->current_nested_vmcs = phys_addr;
|
vp_ap->current_nested_vmcs = phys_addr;
|
||||||
vp_ap->enlighten_vmentry = 1;
|
vp_ap->enlighten_vmentry = 1;
|
||||||
}
|
}
|
||||||
|
|
|
@ -198,6 +198,16 @@ static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator)
|
||||||
pr_debug_ratelimited("kvm: nested vmx abort, indicator %d\n", indicator);
|
pr_debug_ratelimited("kvm: nested vmx abort, indicator %d\n", indicator);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool vmx_control_verify(u32 control, u32 low, u32 high)
|
||||||
|
{
|
||||||
|
return fixed_bits_valid(control, low, high);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline u64 vmx_control_msr(u32 low, u32 high)
|
||||||
|
{
|
||||||
|
return low | ((u64)high << 32);
|
||||||
|
}
|
||||||
|
|
||||||
static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx)
|
static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx)
|
||||||
{
|
{
|
||||||
secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_SHADOW_VMCS);
|
secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_SHADOW_VMCS);
|
||||||
|
@ -866,16 +876,34 @@ static int nested_vmx_store_msr_check(struct kvm_vcpu *vcpu,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static u32 nested_vmx_max_atomic_switch_msrs(struct kvm_vcpu *vcpu)
|
||||||
|
{
|
||||||
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||||
|
u64 vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low,
|
||||||
|
vmx->nested.msrs.misc_high);
|
||||||
|
|
||||||
|
return (vmx_misc_max_msr(vmx_misc) + 1) * VMX_MISC_MSR_LIST_MULTIPLIER;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Load guest's/host's msr at nested entry/exit.
|
* Load guest's/host's msr at nested entry/exit.
|
||||||
* return 0 for success, entry index for failure.
|
* return 0 for success, entry index for failure.
|
||||||
|
*
|
||||||
|
* One of the failure modes for MSR load/store is when a list exceeds the
|
||||||
|
* virtual hardware's capacity. To maintain compatibility with hardware inasmuch
|
||||||
|
* as possible, process all valid entries before failing rather than precheck
|
||||||
|
* for a capacity violation.
|
||||||
*/
|
*/
|
||||||
static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
|
static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
|
||||||
{
|
{
|
||||||
u32 i;
|
u32 i;
|
||||||
struct vmx_msr_entry e;
|
struct vmx_msr_entry e;
|
||||||
|
u32 max_msr_list_size = nested_vmx_max_atomic_switch_msrs(vcpu);
|
||||||
|
|
||||||
for (i = 0; i < count; i++) {
|
for (i = 0; i < count; i++) {
|
||||||
|
if (unlikely(i >= max_msr_list_size))
|
||||||
|
goto fail;
|
||||||
|
|
||||||
if (kvm_vcpu_read_guest(vcpu, gpa + i * sizeof(e),
|
if (kvm_vcpu_read_guest(vcpu, gpa + i * sizeof(e),
|
||||||
&e, sizeof(e))) {
|
&e, sizeof(e))) {
|
||||||
pr_debug_ratelimited(
|
pr_debug_ratelimited(
|
||||||
|
@ -906,8 +934,12 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
|
||||||
u64 data;
|
u64 data;
|
||||||
u32 i;
|
u32 i;
|
||||||
struct vmx_msr_entry e;
|
struct vmx_msr_entry e;
|
||||||
|
u32 max_msr_list_size = nested_vmx_max_atomic_switch_msrs(vcpu);
|
||||||
|
|
||||||
for (i = 0; i < count; i++) {
|
for (i = 0; i < count; i++) {
|
||||||
|
if (unlikely(i >= max_msr_list_size))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
if (kvm_vcpu_read_guest(vcpu,
|
if (kvm_vcpu_read_guest(vcpu,
|
||||||
gpa + i * sizeof(e),
|
gpa + i * sizeof(e),
|
||||||
&e, 2 * sizeof(u32))) {
|
&e, 2 * sizeof(u32))) {
|
||||||
|
@ -1013,17 +1045,6 @@ static u16 nested_get_vpid02(struct kvm_vcpu *vcpu)
|
||||||
return vmx->nested.vpid02 ? vmx->nested.vpid02 : vmx->vpid;
|
return vmx->nested.vpid02 ? vmx->nested.vpid02 : vmx->vpid;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static inline bool vmx_control_verify(u32 control, u32 low, u32 high)
|
|
||||||
{
|
|
||||||
return fixed_bits_valid(control, low, high);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline u64 vmx_control_msr(u32 low, u32 high)
|
|
||||||
{
|
|
||||||
return low | ((u64)high << 32);
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask)
|
static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask)
|
||||||
{
|
{
|
||||||
superset &= mask;
|
superset &= mask;
|
||||||
|
@ -2089,6 +2110,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
|
||||||
SECONDARY_EXEC_ENABLE_INVPCID |
|
SECONDARY_EXEC_ENABLE_INVPCID |
|
||||||
SECONDARY_EXEC_RDTSCP |
|
SECONDARY_EXEC_RDTSCP |
|
||||||
SECONDARY_EXEC_XSAVES |
|
SECONDARY_EXEC_XSAVES |
|
||||||
|
SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |
|
||||||
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
|
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
|
||||||
SECONDARY_EXEC_APIC_REGISTER_VIRT |
|
SECONDARY_EXEC_APIC_REGISTER_VIRT |
|
||||||
SECONDARY_EXEC_ENABLE_VMFUNC);
|
SECONDARY_EXEC_ENABLE_VMFUNC);
|
||||||
|
@ -2642,8 +2664,23 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
|
||||||
CC(!kvm_pat_valid(vmcs12->host_ia32_pat)))
|
CC(!kvm_pat_valid(vmcs12->host_ia32_pat)))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
ia32e = (vmcs12->vm_exit_controls &
|
#ifdef CONFIG_X86_64
|
||||||
VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0;
|
ia32e = !!(vcpu->arch.efer & EFER_LMA);
|
||||||
|
#else
|
||||||
|
ia32e = false;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (ia32e) {
|
||||||
|
if (CC(!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)) ||
|
||||||
|
CC(!(vmcs12->host_cr4 & X86_CR4_PAE)))
|
||||||
|
return -EINVAL;
|
||||||
|
} else {
|
||||||
|
if (CC(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) ||
|
||||||
|
CC(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) ||
|
||||||
|
CC(vmcs12->host_cr4 & X86_CR4_PCIDE) ||
|
||||||
|
CC((vmcs12->host_rip) >> 32))
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
if (CC(vmcs12->host_cs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
|
if (CC(vmcs12->host_cs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
|
||||||
CC(vmcs12->host_ss_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
|
CC(vmcs12->host_ss_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
|
||||||
|
@ -2662,7 +2699,8 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
|
||||||
CC(is_noncanonical_address(vmcs12->host_gs_base, vcpu)) ||
|
CC(is_noncanonical_address(vmcs12->host_gs_base, vcpu)) ||
|
||||||
CC(is_noncanonical_address(vmcs12->host_gdtr_base, vcpu)) ||
|
CC(is_noncanonical_address(vmcs12->host_gdtr_base, vcpu)) ||
|
||||||
CC(is_noncanonical_address(vmcs12->host_idtr_base, vcpu)) ||
|
CC(is_noncanonical_address(vmcs12->host_idtr_base, vcpu)) ||
|
||||||
CC(is_noncanonical_address(vmcs12->host_tr_base, vcpu)))
|
CC(is_noncanonical_address(vmcs12->host_tr_base, vcpu)) ||
|
||||||
|
CC(is_noncanonical_address(vmcs12->host_rip, vcpu)))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -5441,6 +5479,10 @@ bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
|
||||||
case EXIT_REASON_ENCLS:
|
case EXIT_REASON_ENCLS:
|
||||||
/* SGX is never exposed to L1 */
|
/* SGX is never exposed to L1 */
|
||||||
return false;
|
return false;
|
||||||
|
case EXIT_REASON_UMWAIT:
|
||||||
|
case EXIT_REASON_TPAUSE:
|
||||||
|
return nested_cpu_has2(vmcs12,
|
||||||
|
SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE);
|
||||||
default:
|
default:
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,8 +11,13 @@
|
||||||
#include "vmcs.h"
|
#include "vmcs.h"
|
||||||
|
|
||||||
#define __ex(x) __kvm_handle_fault_on_reboot(x)
|
#define __ex(x) __kvm_handle_fault_on_reboot(x)
|
||||||
#define __ex_clear(x, reg) \
|
|
||||||
____kvm_handle_fault_on_reboot(x, "xor " reg ", " reg)
|
asmlinkage void vmread_error(unsigned long field, bool fault);
|
||||||
|
void vmwrite_error(unsigned long field, unsigned long value);
|
||||||
|
void vmclear_error(struct vmcs *vmcs, u64 phys_addr);
|
||||||
|
void vmptrld_error(struct vmcs *vmcs, u64 phys_addr);
|
||||||
|
void invvpid_error(unsigned long ext, u16 vpid, gva_t gva);
|
||||||
|
void invept_error(unsigned long ext, u64 eptp, gpa_t gpa);
|
||||||
|
|
||||||
static __always_inline void vmcs_check16(unsigned long field)
|
static __always_inline void vmcs_check16(unsigned long field)
|
||||||
{
|
{
|
||||||
|
@ -62,8 +67,22 @@ static __always_inline unsigned long __vmcs_readl(unsigned long field)
|
||||||
{
|
{
|
||||||
unsigned long value;
|
unsigned long value;
|
||||||
|
|
||||||
asm volatile (__ex_clear("vmread %1, %0", "%k0")
|
asm volatile("1: vmread %2, %1\n\t"
|
||||||
: "=r"(value) : "r"(field));
|
".byte 0x3e\n\t" /* branch taken hint */
|
||||||
|
"ja 3f\n\t"
|
||||||
|
"mov %2, %%" _ASM_ARG1 "\n\t"
|
||||||
|
"xor %%" _ASM_ARG2 ", %%" _ASM_ARG2 "\n\t"
|
||||||
|
"2: call vmread_error\n\t"
|
||||||
|
"xor %k1, %k1\n\t"
|
||||||
|
"3:\n\t"
|
||||||
|
|
||||||
|
".pushsection .fixup, \"ax\"\n\t"
|
||||||
|
"4: mov %2, %%" _ASM_ARG1 "\n\t"
|
||||||
|
"mov $1, %%" _ASM_ARG2 "\n\t"
|
||||||
|
"jmp 2b\n\t"
|
||||||
|
".popsection\n\t"
|
||||||
|
_ASM_EXTABLE(1b, 4b)
|
||||||
|
: ASM_CALL_CONSTRAINT, "=r"(value) : "r"(field) : "cc");
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -103,21 +122,39 @@ static __always_inline unsigned long vmcs_readl(unsigned long field)
|
||||||
return __vmcs_readl(field);
|
return __vmcs_readl(field);
|
||||||
}
|
}
|
||||||
|
|
||||||
static noinline void vmwrite_error(unsigned long field, unsigned long value)
|
#define vmx_asm1(insn, op1, error_args...) \
|
||||||
{
|
do { \
|
||||||
printk(KERN_ERR "vmwrite error: reg %lx value %lx (err %d)\n",
|
asm_volatile_goto("1: " __stringify(insn) " %0\n\t" \
|
||||||
field, value, vmcs_read32(VM_INSTRUCTION_ERROR));
|
".byte 0x2e\n\t" /* branch not taken hint */ \
|
||||||
dump_stack();
|
"jna %l[error]\n\t" \
|
||||||
}
|
_ASM_EXTABLE(1b, %l[fault]) \
|
||||||
|
: : op1 : "cc" : error, fault); \
|
||||||
|
return; \
|
||||||
|
error: \
|
||||||
|
insn##_error(error_args); \
|
||||||
|
return; \
|
||||||
|
fault: \
|
||||||
|
kvm_spurious_fault(); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define vmx_asm2(insn, op1, op2, error_args...) \
|
||||||
|
do { \
|
||||||
|
asm_volatile_goto("1: " __stringify(insn) " %1, %0\n\t" \
|
||||||
|
".byte 0x2e\n\t" /* branch not taken hint */ \
|
||||||
|
"jna %l[error]\n\t" \
|
||||||
|
_ASM_EXTABLE(1b, %l[fault]) \
|
||||||
|
: : op1, op2 : "cc" : error, fault); \
|
||||||
|
return; \
|
||||||
|
error: \
|
||||||
|
insn##_error(error_args); \
|
||||||
|
return; \
|
||||||
|
fault: \
|
||||||
|
kvm_spurious_fault(); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
static __always_inline void __vmcs_writel(unsigned long field, unsigned long value)
|
static __always_inline void __vmcs_writel(unsigned long field, unsigned long value)
|
||||||
{
|
{
|
||||||
bool error;
|
vmx_asm2(vmwrite, "r"(field), "rm"(value), field, value);
|
||||||
|
|
||||||
asm volatile (__ex("vmwrite %2, %1") CC_SET(na)
|
|
||||||
: CC_OUT(na) (error) : "r"(field), "rm"(value));
|
|
||||||
if (unlikely(error))
|
|
||||||
vmwrite_error(field, value);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static __always_inline void vmcs_write16(unsigned long field, u16 value)
|
static __always_inline void vmcs_write16(unsigned long field, u16 value)
|
||||||
|
@ -182,28 +219,18 @@ static __always_inline void vmcs_set_bits(unsigned long field, u32 mask)
|
||||||
static inline void vmcs_clear(struct vmcs *vmcs)
|
static inline void vmcs_clear(struct vmcs *vmcs)
|
||||||
{
|
{
|
||||||
u64 phys_addr = __pa(vmcs);
|
u64 phys_addr = __pa(vmcs);
|
||||||
bool error;
|
|
||||||
|
|
||||||
asm volatile (__ex("vmclear %1") CC_SET(na)
|
vmx_asm1(vmclear, "m"(phys_addr), vmcs, phys_addr);
|
||||||
: CC_OUT(na) (error) : "m"(phys_addr));
|
|
||||||
if (unlikely(error))
|
|
||||||
printk(KERN_ERR "kvm: vmclear fail: %p/%llx\n",
|
|
||||||
vmcs, phys_addr);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void vmcs_load(struct vmcs *vmcs)
|
static inline void vmcs_load(struct vmcs *vmcs)
|
||||||
{
|
{
|
||||||
u64 phys_addr = __pa(vmcs);
|
u64 phys_addr = __pa(vmcs);
|
||||||
bool error;
|
|
||||||
|
|
||||||
if (static_branch_unlikely(&enable_evmcs))
|
if (static_branch_unlikely(&enable_evmcs))
|
||||||
return evmcs_load(phys_addr);
|
return evmcs_load(phys_addr);
|
||||||
|
|
||||||
asm volatile (__ex("vmptrld %1") CC_SET(na)
|
vmx_asm1(vmptrld, "m"(phys_addr), vmcs, phys_addr);
|
||||||
: CC_OUT(na) (error) : "m"(phys_addr));
|
|
||||||
if (unlikely(error))
|
|
||||||
printk(KERN_ERR "kvm: vmptrld %p/%llx failed\n",
|
|
||||||
vmcs, phys_addr);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void __invvpid(unsigned long ext, u16 vpid, gva_t gva)
|
static inline void __invvpid(unsigned long ext, u16 vpid, gva_t gva)
|
||||||
|
@ -213,11 +240,8 @@ static inline void __invvpid(unsigned long ext, u16 vpid, gva_t gva)
|
||||||
u64 rsvd : 48;
|
u64 rsvd : 48;
|
||||||
u64 gva;
|
u64 gva;
|
||||||
} operand = { vpid, 0, gva };
|
} operand = { vpid, 0, gva };
|
||||||
bool error;
|
|
||||||
|
|
||||||
asm volatile (__ex("invvpid %2, %1") CC_SET(na)
|
vmx_asm2(invvpid, "r"(ext), "m"(operand), ext, vpid, gva);
|
||||||
: CC_OUT(na) (error) : "r"(ext), "m"(operand));
|
|
||||||
BUG_ON(error);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void __invept(unsigned long ext, u64 eptp, gpa_t gpa)
|
static inline void __invept(unsigned long ext, u64 eptp, gpa_t gpa)
|
||||||
|
@ -225,11 +249,8 @@ static inline void __invept(unsigned long ext, u64 eptp, gpa_t gpa)
|
||||||
struct {
|
struct {
|
||||||
u64 eptp, gpa;
|
u64 eptp, gpa;
|
||||||
} operand = {eptp, gpa};
|
} operand = {eptp, gpa};
|
||||||
bool error;
|
|
||||||
|
|
||||||
asm volatile (__ex("invept %2, %1") CC_SET(na)
|
vmx_asm2(invept, "r"(ext), "m"(operand), ext, eptp, gpa);
|
||||||
: CC_OUT(na) (error) : "r"(ext), "m"(operand));
|
|
||||||
BUG_ON(error);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool vpid_sync_vcpu_addr(int vpid, gva_t addr)
|
static inline bool vpid_sync_vcpu_addr(int vpid, gva_t addr)
|
||||||
|
|
|
@ -343,6 +343,48 @@ static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bit
|
||||||
|
|
||||||
void vmx_vmexit(void);
|
void vmx_vmexit(void);
|
||||||
|
|
||||||
|
#define vmx_insn_failed(fmt...) \
|
||||||
|
do { \
|
||||||
|
WARN_ONCE(1, fmt); \
|
||||||
|
pr_warn_ratelimited(fmt); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
asmlinkage void vmread_error(unsigned long field, bool fault)
|
||||||
|
{
|
||||||
|
if (fault)
|
||||||
|
kvm_spurious_fault();
|
||||||
|
else
|
||||||
|
vmx_insn_failed("kvm: vmread failed: field=%lx\n", field);
|
||||||
|
}
|
||||||
|
|
||||||
|
noinline void vmwrite_error(unsigned long field, unsigned long value)
|
||||||
|
{
|
||||||
|
vmx_insn_failed("kvm: vmwrite failed: field=%lx val=%lx err=%d\n",
|
||||||
|
field, value, vmcs_read32(VM_INSTRUCTION_ERROR));
|
||||||
|
}
|
||||||
|
|
||||||
|
noinline void vmclear_error(struct vmcs *vmcs, u64 phys_addr)
|
||||||
|
{
|
||||||
|
vmx_insn_failed("kvm: vmclear failed: %p/%llx\n", vmcs, phys_addr);
|
||||||
|
}
|
||||||
|
|
||||||
|
noinline void vmptrld_error(struct vmcs *vmcs, u64 phys_addr)
|
||||||
|
{
|
||||||
|
vmx_insn_failed("kvm: vmptrld failed: %p/%llx\n", vmcs, phys_addr);
|
||||||
|
}
|
||||||
|
|
||||||
|
noinline void invvpid_error(unsigned long ext, u16 vpid, gva_t gva)
|
||||||
|
{
|
||||||
|
vmx_insn_failed("kvm: invvpid failed: ext=0x%lx vpid=%u gva=0x%lx\n",
|
||||||
|
ext, vpid, gva);
|
||||||
|
}
|
||||||
|
|
||||||
|
noinline void invept_error(unsigned long ext, u64 eptp, gpa_t gpa)
|
||||||
|
{
|
||||||
|
vmx_insn_failed("kvm: invept failed: ext=0x%lx eptp=%llx gpa=0x%llx\n",
|
||||||
|
ext, eptp, gpa);
|
||||||
|
}
|
||||||
|
|
||||||
static DEFINE_PER_CPU(struct vmcs *, vmxarea);
|
static DEFINE_PER_CPU(struct vmcs *, vmxarea);
|
||||||
DEFINE_PER_CPU(struct vmcs *, current_vmcs);
|
DEFINE_PER_CPU(struct vmcs *, current_vmcs);
|
||||||
/*
|
/*
|
||||||
|
@ -486,6 +528,31 @@ static int hv_remote_flush_tlb(struct kvm *kvm)
|
||||||
return hv_remote_flush_tlb_with_range(kvm, NULL);
|
return hv_remote_flush_tlb_with_range(kvm, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu)
|
||||||
|
{
|
||||||
|
struct hv_enlightened_vmcs *evmcs;
|
||||||
|
struct hv_partition_assist_pg **p_hv_pa_pg =
|
||||||
|
&vcpu->kvm->arch.hyperv.hv_pa_pg;
|
||||||
|
/*
|
||||||
|
* Synthetic VM-Exit is not enabled in current code and so All
|
||||||
|
* evmcs in singe VM shares same assist page.
|
||||||
|
*/
|
||||||
|
if (!*p_hv_pa_pg)
|
||||||
|
*p_hv_pa_pg = kzalloc(PAGE_SIZE, GFP_KERNEL);
|
||||||
|
|
||||||
|
if (!*p_hv_pa_pg)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
evmcs = (struct hv_enlightened_vmcs *)to_vmx(vcpu)->loaded_vmcs->vmcs;
|
||||||
|
|
||||||
|
evmcs->partition_assist_page =
|
||||||
|
__pa(*p_hv_pa_pg);
|
||||||
|
evmcs->hv_vm_id = (unsigned long)vcpu->kvm;
|
||||||
|
evmcs->hv_enlightenments_control.nested_flush_hypercall = 1;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
#endif /* IS_ENABLED(CONFIG_HYPERV) */
|
#endif /* IS_ENABLED(CONFIG_HYPERV) */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1472,27 +1539,32 @@ static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
|
||||||
* Returns an int to be compatible with SVM implementation (which can fail).
|
|
||||||
* Do not use directly, use skip_emulated_instruction() instead.
|
|
||||||
*/
|
|
||||||
static int __skip_emulated_instruction(struct kvm_vcpu *vcpu)
|
|
||||||
{
|
{
|
||||||
unsigned long rip;
|
unsigned long rip;
|
||||||
|
|
||||||
rip = kvm_rip_read(vcpu);
|
/*
|
||||||
rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
|
* Using VMCS.VM_EXIT_INSTRUCTION_LEN on EPT misconfig depends on
|
||||||
kvm_rip_write(vcpu, rip);
|
* undefined behavior: Intel's SDM doesn't mandate the VMCS field be
|
||||||
|
* set when EPT misconfig occurs. In practice, real hardware updates
|
||||||
|
* VM_EXIT_INSTRUCTION_LEN on EPT misconfig, but other hypervisors
|
||||||
|
* (namely Hyper-V) don't set it due to it being undefined behavior,
|
||||||
|
* i.e. we end up advancing IP with some random value.
|
||||||
|
*/
|
||||||
|
if (!static_cpu_has(X86_FEATURE_HYPERVISOR) ||
|
||||||
|
to_vmx(vcpu)->exit_reason != EXIT_REASON_EPT_MISCONFIG) {
|
||||||
|
rip = kvm_rip_read(vcpu);
|
||||||
|
rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
|
||||||
|
kvm_rip_write(vcpu, rip);
|
||||||
|
} else {
|
||||||
|
if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP))
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/* skipping an emulated instruction also counts */
|
/* skipping an emulated instruction also counts */
|
||||||
vmx_set_interrupt_shadow(vcpu, 0);
|
vmx_set_interrupt_shadow(vcpu, 0);
|
||||||
|
|
||||||
return EMULATE_DONE;
|
return 1;
|
||||||
}
|
|
||||||
|
|
||||||
static inline void skip_emulated_instruction(struct kvm_vcpu *vcpu)
|
|
||||||
{
|
|
||||||
(void)__skip_emulated_instruction(vcpu);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
|
static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
|
||||||
|
@ -1527,8 +1599,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
|
||||||
int inc_eip = 0;
|
int inc_eip = 0;
|
||||||
if (kvm_exception_is_soft(nr))
|
if (kvm_exception_is_soft(nr))
|
||||||
inc_eip = vcpu->arch.event_exit_inst_len;
|
inc_eip = vcpu->arch.event_exit_inst_len;
|
||||||
if (kvm_inject_realmode_interrupt(vcpu, nr, inc_eip) != EMULATE_DONE)
|
kvm_inject_realmode_interrupt(vcpu, nr, inc_eip);
|
||||||
kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1700,6 +1771,12 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||||
#endif
|
#endif
|
||||||
case MSR_EFER:
|
case MSR_EFER:
|
||||||
return kvm_get_msr_common(vcpu, msr_info);
|
return kvm_get_msr_common(vcpu, msr_info);
|
||||||
|
case MSR_IA32_UMWAIT_CONTROL:
|
||||||
|
if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
msr_info->data = vmx->msr_ia32_umwait_control;
|
||||||
|
break;
|
||||||
case MSR_IA32_SPEC_CTRL:
|
case MSR_IA32_SPEC_CTRL:
|
||||||
if (!msr_info->host_initiated &&
|
if (!msr_info->host_initiated &&
|
||||||
!guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
|
!guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
|
||||||
|
@ -1873,6 +1950,16 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
|
||||||
return 1;
|
return 1;
|
||||||
vmcs_write64(GUEST_BNDCFGS, data);
|
vmcs_write64(GUEST_BNDCFGS, data);
|
||||||
break;
|
break;
|
||||||
|
case MSR_IA32_UMWAIT_CONTROL:
|
||||||
|
if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
/* The reserved bit 1 and non-32 bit [63:32] should be zero */
|
||||||
|
if (data & (BIT_ULL(1) | GENMASK_ULL(63, 32)))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
vmx->msr_ia32_umwait_control = data;
|
||||||
|
break;
|
||||||
case MSR_IA32_SPEC_CTRL:
|
case MSR_IA32_SPEC_CTRL:
|
||||||
if (!msr_info->host_initiated &&
|
if (!msr_info->host_initiated &&
|
||||||
!guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
|
!guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
|
||||||
|
@ -2290,6 +2377,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
|
||||||
SECONDARY_EXEC_RDRAND_EXITING |
|
SECONDARY_EXEC_RDRAND_EXITING |
|
||||||
SECONDARY_EXEC_ENABLE_PML |
|
SECONDARY_EXEC_ENABLE_PML |
|
||||||
SECONDARY_EXEC_TSC_SCALING |
|
SECONDARY_EXEC_TSC_SCALING |
|
||||||
|
SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |
|
||||||
SECONDARY_EXEC_PT_USE_GPA |
|
SECONDARY_EXEC_PT_USE_GPA |
|
||||||
SECONDARY_EXEC_PT_CONCEAL_VMX |
|
SECONDARY_EXEC_PT_CONCEAL_VMX |
|
||||||
SECONDARY_EXEC_ENABLE_VMFUNC |
|
SECONDARY_EXEC_ENABLE_VMFUNC |
|
||||||
|
@ -4026,6 +4114,23 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (vmx_waitpkg_supported()) {
|
||||||
|
bool waitpkg_enabled =
|
||||||
|
guest_cpuid_has(vcpu, X86_FEATURE_WAITPKG);
|
||||||
|
|
||||||
|
if (!waitpkg_enabled)
|
||||||
|
exec_control &= ~SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
|
||||||
|
|
||||||
|
if (nested) {
|
||||||
|
if (waitpkg_enabled)
|
||||||
|
vmx->nested.msrs.secondary_ctls_high |=
|
||||||
|
SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
|
||||||
|
else
|
||||||
|
vmx->nested.msrs.secondary_ctls_high &=
|
||||||
|
~SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
vmx->secondary_exec_control = exec_control;
|
vmx->secondary_exec_control = exec_control;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4160,6 +4265,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
|
||||||
vmx->rmode.vm86_active = 0;
|
vmx->rmode.vm86_active = 0;
|
||||||
vmx->spec_ctrl = 0;
|
vmx->spec_ctrl = 0;
|
||||||
|
|
||||||
|
vmx->msr_ia32_umwait_control = 0;
|
||||||
|
|
||||||
vcpu->arch.microcode_version = 0x100000000ULL;
|
vcpu->arch.microcode_version = 0x100000000ULL;
|
||||||
vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
|
vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
|
||||||
vmx->hv_deadline_tsc = -1;
|
vmx->hv_deadline_tsc = -1;
|
||||||
|
@ -4277,8 +4384,7 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu)
|
||||||
int inc_eip = 0;
|
int inc_eip = 0;
|
||||||
if (vcpu->arch.interrupt.soft)
|
if (vcpu->arch.interrupt.soft)
|
||||||
inc_eip = vcpu->arch.event_exit_inst_len;
|
inc_eip = vcpu->arch.event_exit_inst_len;
|
||||||
if (kvm_inject_realmode_interrupt(vcpu, irq, inc_eip) != EMULATE_DONE)
|
kvm_inject_realmode_interrupt(vcpu, irq, inc_eip);
|
||||||
kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
intr = irq | INTR_INFO_VALID_MASK;
|
intr = irq | INTR_INFO_VALID_MASK;
|
||||||
|
@ -4314,8 +4420,7 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
|
||||||
vmx->loaded_vmcs->nmi_known_unmasked = false;
|
vmx->loaded_vmcs->nmi_known_unmasked = false;
|
||||||
|
|
||||||
if (vmx->rmode.vm86_active) {
|
if (vmx->rmode.vm86_active) {
|
||||||
if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0) != EMULATE_DONE)
|
kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0);
|
||||||
kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4442,7 +4547,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
|
||||||
* Cause the #SS fault with 0 error code in VM86 mode.
|
* Cause the #SS fault with 0 error code in VM86 mode.
|
||||||
*/
|
*/
|
||||||
if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) {
|
if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) {
|
||||||
if (kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE) {
|
if (kvm_emulate_instruction(vcpu, 0)) {
|
||||||
if (vcpu->arch.halt_request) {
|
if (vcpu->arch.halt_request) {
|
||||||
vcpu->arch.halt_request = 0;
|
vcpu->arch.halt_request = 0;
|
||||||
return kvm_vcpu_halt(vcpu);
|
return kvm_vcpu_halt(vcpu);
|
||||||
|
@ -4493,7 +4598,6 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
|
||||||
u32 intr_info, ex_no, error_code;
|
u32 intr_info, ex_no, error_code;
|
||||||
unsigned long cr2, rip, dr6;
|
unsigned long cr2, rip, dr6;
|
||||||
u32 vect_info;
|
u32 vect_info;
|
||||||
enum emulation_result er;
|
|
||||||
|
|
||||||
vect_info = vmx->idt_vectoring_info;
|
vect_info = vmx->idt_vectoring_info;
|
||||||
intr_info = vmx->exit_intr_info;
|
intr_info = vmx->exit_intr_info;
|
||||||
|
@ -4510,13 +4614,17 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
|
||||||
|
|
||||||
if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) {
|
if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) {
|
||||||
WARN_ON_ONCE(!enable_vmware_backdoor);
|
WARN_ON_ONCE(!enable_vmware_backdoor);
|
||||||
er = kvm_emulate_instruction(vcpu,
|
|
||||||
EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL);
|
/*
|
||||||
if (er == EMULATE_USER_EXIT)
|
* VMware backdoor emulation on #GP interception only handles
|
||||||
return 0;
|
* IN{S}, OUT{S}, and RDPMC, none of which generate a non-zero
|
||||||
else if (er != EMULATE_DONE)
|
* error code on #GP.
|
||||||
|
*/
|
||||||
|
if (error_code) {
|
||||||
kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
|
kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
|
||||||
return 1;
|
return 1;
|
||||||
|
}
|
||||||
|
return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -4558,7 +4666,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
|
||||||
vcpu->arch.dr6 &= ~DR_TRAP_BITS;
|
vcpu->arch.dr6 &= ~DR_TRAP_BITS;
|
||||||
vcpu->arch.dr6 |= dr6 | DR6_RTM;
|
vcpu->arch.dr6 |= dr6 | DR6_RTM;
|
||||||
if (is_icebp(intr_info))
|
if (is_icebp(intr_info))
|
||||||
skip_emulated_instruction(vcpu);
|
WARN_ON(!skip_emulated_instruction(vcpu));
|
||||||
|
|
||||||
kvm_queue_exception(vcpu, DB_VECTOR);
|
kvm_queue_exception(vcpu, DB_VECTOR);
|
||||||
return 1;
|
return 1;
|
||||||
|
@ -4613,7 +4721,7 @@ static int handle_io(struct kvm_vcpu *vcpu)
|
||||||
++vcpu->stat.io_exits;
|
++vcpu->stat.io_exits;
|
||||||
|
|
||||||
if (string)
|
if (string)
|
||||||
return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
|
return kvm_emulate_instruction(vcpu, 0);
|
||||||
|
|
||||||
port = exit_qualification >> 16;
|
port = exit_qualification >> 16;
|
||||||
size = (exit_qualification & 7) + 1;
|
size = (exit_qualification & 7) + 1;
|
||||||
|
@ -4687,7 +4795,7 @@ static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val)
|
||||||
static int handle_desc(struct kvm_vcpu *vcpu)
|
static int handle_desc(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP));
|
WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP));
|
||||||
return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
|
return kvm_emulate_instruction(vcpu, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int handle_cr(struct kvm_vcpu *vcpu)
|
static int handle_cr(struct kvm_vcpu *vcpu)
|
||||||
|
@ -4903,7 +5011,7 @@ static int handle_vmcall(struct kvm_vcpu *vcpu)
|
||||||
|
|
||||||
static int handle_invd(struct kvm_vcpu *vcpu)
|
static int handle_invd(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
|
return kvm_emulate_instruction(vcpu, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int handle_invlpg(struct kvm_vcpu *vcpu)
|
static int handle_invlpg(struct kvm_vcpu *vcpu)
|
||||||
|
@ -4937,20 +5045,6 @@ static int handle_xsetbv(struct kvm_vcpu *vcpu)
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int handle_xsaves(struct kvm_vcpu *vcpu)
|
|
||||||
{
|
|
||||||
kvm_skip_emulated_instruction(vcpu);
|
|
||||||
WARN(1, "this should never happen\n");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int handle_xrstors(struct kvm_vcpu *vcpu)
|
|
||||||
{
|
|
||||||
kvm_skip_emulated_instruction(vcpu);
|
|
||||||
WARN(1, "this should never happen\n");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int handle_apic_access(struct kvm_vcpu *vcpu)
|
static int handle_apic_access(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
if (likely(fasteoi)) {
|
if (likely(fasteoi)) {
|
||||||
|
@ -4970,7 +5064,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu)
|
||||||
return kvm_skip_emulated_instruction(vcpu);
|
return kvm_skip_emulated_instruction(vcpu);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
|
return kvm_emulate_instruction(vcpu, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
|
static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
|
||||||
|
@ -5039,23 +5133,15 @@ static int handle_task_switch(struct kvm_vcpu *vcpu)
|
||||||
if (!idt_v || (type != INTR_TYPE_HARD_EXCEPTION &&
|
if (!idt_v || (type != INTR_TYPE_HARD_EXCEPTION &&
|
||||||
type != INTR_TYPE_EXT_INTR &&
|
type != INTR_TYPE_EXT_INTR &&
|
||||||
type != INTR_TYPE_NMI_INTR))
|
type != INTR_TYPE_NMI_INTR))
|
||||||
skip_emulated_instruction(vcpu);
|
WARN_ON(!skip_emulated_instruction(vcpu));
|
||||||
|
|
||||||
if (kvm_task_switch(vcpu, tss_selector,
|
|
||||||
type == INTR_TYPE_SOFT_INTR ? idt_index : -1, reason,
|
|
||||||
has_error_code, error_code) == EMULATE_FAIL) {
|
|
||||||
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
|
||||||
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
|
|
||||||
vcpu->run->internal.ndata = 0;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* TODO: What about debug traps on tss switch?
|
* TODO: What about debug traps on tss switch?
|
||||||
* Are we supposed to inject them and update dr6?
|
* Are we supposed to inject them and update dr6?
|
||||||
*/
|
*/
|
||||||
|
return kvm_task_switch(vcpu, tss_selector,
|
||||||
return 1;
|
type == INTR_TYPE_SOFT_INTR ? idt_index : -1,
|
||||||
|
reason, has_error_code, error_code);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int handle_ept_violation(struct kvm_vcpu *vcpu)
|
static int handle_ept_violation(struct kvm_vcpu *vcpu)
|
||||||
|
@ -5114,21 +5200,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
|
||||||
if (!is_guest_mode(vcpu) &&
|
if (!is_guest_mode(vcpu) &&
|
||||||
!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
|
!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
|
||||||
trace_kvm_fast_mmio(gpa);
|
trace_kvm_fast_mmio(gpa);
|
||||||
/*
|
return kvm_skip_emulated_instruction(vcpu);
|
||||||
* Doing kvm_skip_emulated_instruction() depends on undefined
|
|
||||||
* behavior: Intel's manual doesn't mandate
|
|
||||||
* VM_EXIT_INSTRUCTION_LEN to be set in VMCS when EPT MISCONFIG
|
|
||||||
* occurs and while on real hardware it was observed to be set,
|
|
||||||
* other hypervisors (namely Hyper-V) don't set it, we end up
|
|
||||||
* advancing IP with some random value. Disable fast mmio when
|
|
||||||
* running nested and keep it for real hardware in hope that
|
|
||||||
* VM_EXIT_INSTRUCTION_LEN will always be set correctly.
|
|
||||||
*/
|
|
||||||
if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
|
|
||||||
return kvm_skip_emulated_instruction(vcpu);
|
|
||||||
else
|
|
||||||
return kvm_emulate_instruction(vcpu, EMULTYPE_SKIP) ==
|
|
||||||
EMULATE_DONE;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0);
|
return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0);
|
||||||
|
@ -5147,8 +5219,6 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu)
|
||||||
static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
|
static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||||
enum emulation_result err = EMULATE_DONE;
|
|
||||||
int ret = 1;
|
|
||||||
bool intr_window_requested;
|
bool intr_window_requested;
|
||||||
unsigned count = 130;
|
unsigned count = 130;
|
||||||
|
|
||||||
|
@ -5169,41 +5239,35 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
|
||||||
if (kvm_test_request(KVM_REQ_EVENT, vcpu))
|
if (kvm_test_request(KVM_REQ_EVENT, vcpu))
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
err = kvm_emulate_instruction(vcpu, 0);
|
if (!kvm_emulate_instruction(vcpu, 0))
|
||||||
|
return 0;
|
||||||
if (err == EMULATE_USER_EXIT) {
|
|
||||||
++vcpu->stat.mmio_exits;
|
|
||||||
ret = 0;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (err != EMULATE_DONE)
|
|
||||||
goto emulation_error;
|
|
||||||
|
|
||||||
if (vmx->emulation_required && !vmx->rmode.vm86_active &&
|
if (vmx->emulation_required && !vmx->rmode.vm86_active &&
|
||||||
vcpu->arch.exception.pending)
|
vcpu->arch.exception.pending) {
|
||||||
goto emulation_error;
|
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||||
|
vcpu->run->internal.suberror =
|
||||||
|
KVM_INTERNAL_ERROR_EMULATION;
|
||||||
|
vcpu->run->internal.ndata = 0;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
if (vcpu->arch.halt_request) {
|
if (vcpu->arch.halt_request) {
|
||||||
vcpu->arch.halt_request = 0;
|
vcpu->arch.halt_request = 0;
|
||||||
ret = kvm_vcpu_halt(vcpu);
|
return kvm_vcpu_halt(vcpu);
|
||||||
goto out;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Note, return 1 and not 0, vcpu_run() is responsible for
|
||||||
|
* morphing the pending signal into the proper return code.
|
||||||
|
*/
|
||||||
if (signal_pending(current))
|
if (signal_pending(current))
|
||||||
goto out;
|
return 1;
|
||||||
|
|
||||||
if (need_resched())
|
if (need_resched())
|
||||||
schedule();
|
schedule();
|
||||||
}
|
}
|
||||||
|
|
||||||
out:
|
return 1;
|
||||||
return ret;
|
|
||||||
|
|
||||||
emulation_error:
|
|
||||||
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
|
||||||
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
|
|
||||||
vcpu->run->internal.ndata = 0;
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void grow_ple_window(struct kvm_vcpu *vcpu)
|
static void grow_ple_window(struct kvm_vcpu *vcpu)
|
||||||
|
@ -5474,6 +5538,14 @@ static int handle_encls(struct kvm_vcpu *vcpu)
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int handle_unexpected_vmexit(struct kvm_vcpu *vcpu)
|
||||||
|
{
|
||||||
|
kvm_skip_emulated_instruction(vcpu);
|
||||||
|
WARN_ONCE(1, "Unexpected VM-Exit Reason = 0x%x",
|
||||||
|
vmcs_read32(VM_EXIT_REASON));
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The exit handlers return 1 if the exit was handled fully and guest execution
|
* The exit handlers return 1 if the exit was handled fully and guest execution
|
||||||
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
|
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
|
||||||
|
@ -5525,13 +5597,15 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
|
||||||
[EXIT_REASON_INVVPID] = handle_vmx_instruction,
|
[EXIT_REASON_INVVPID] = handle_vmx_instruction,
|
||||||
[EXIT_REASON_RDRAND] = handle_invalid_op,
|
[EXIT_REASON_RDRAND] = handle_invalid_op,
|
||||||
[EXIT_REASON_RDSEED] = handle_invalid_op,
|
[EXIT_REASON_RDSEED] = handle_invalid_op,
|
||||||
[EXIT_REASON_XSAVES] = handle_xsaves,
|
[EXIT_REASON_XSAVES] = handle_unexpected_vmexit,
|
||||||
[EXIT_REASON_XRSTORS] = handle_xrstors,
|
[EXIT_REASON_XRSTORS] = handle_unexpected_vmexit,
|
||||||
[EXIT_REASON_PML_FULL] = handle_pml_full,
|
[EXIT_REASON_PML_FULL] = handle_pml_full,
|
||||||
[EXIT_REASON_INVPCID] = handle_invpcid,
|
[EXIT_REASON_INVPCID] = handle_invpcid,
|
||||||
[EXIT_REASON_VMFUNC] = handle_vmx_instruction,
|
[EXIT_REASON_VMFUNC] = handle_vmx_instruction,
|
||||||
[EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer,
|
[EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer,
|
||||||
[EXIT_REASON_ENCLS] = handle_encls,
|
[EXIT_REASON_ENCLS] = handle_encls,
|
||||||
|
[EXIT_REASON_UMWAIT] = handle_unexpected_vmexit,
|
||||||
|
[EXIT_REASON_TPAUSE] = handle_unexpected_vmexit,
|
||||||
};
|
};
|
||||||
|
|
||||||
static const int kvm_vmx_max_exit_handlers =
|
static const int kvm_vmx_max_exit_handlers =
|
||||||
|
@ -6362,6 +6436,23 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
|
||||||
msrs[i].host, false);
|
msrs[i].host, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void atomic_switch_umwait_control_msr(struct vcpu_vmx *vmx)
|
||||||
|
{
|
||||||
|
u32 host_umwait_control;
|
||||||
|
|
||||||
|
if (!vmx_has_waitpkg(vmx))
|
||||||
|
return;
|
||||||
|
|
||||||
|
host_umwait_control = get_umwait_control_msr();
|
||||||
|
|
||||||
|
if (vmx->msr_ia32_umwait_control != host_umwait_control)
|
||||||
|
add_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL,
|
||||||
|
vmx->msr_ia32_umwait_control,
|
||||||
|
host_umwait_control, false);
|
||||||
|
else
|
||||||
|
clear_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL);
|
||||||
|
}
|
||||||
|
|
||||||
static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
|
static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||||
|
@ -6456,6 +6547,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
|
||||||
pt_guest_enter(vmx);
|
pt_guest_enter(vmx);
|
||||||
|
|
||||||
atomic_switch_perf_msrs(vmx);
|
atomic_switch_perf_msrs(vmx);
|
||||||
|
atomic_switch_umwait_control_msr(vmx);
|
||||||
|
|
||||||
if (enable_preemption_timer)
|
if (enable_preemption_timer)
|
||||||
vmx_update_hv_timer(vcpu);
|
vmx_update_hv_timer(vcpu);
|
||||||
|
@ -6511,6 +6603,9 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
|
||||||
current_evmcs->hv_clean_fields |=
|
current_evmcs->hv_clean_fields |=
|
||||||
HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
|
HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
|
||||||
|
|
||||||
|
if (static_branch_unlikely(&enable_evmcs))
|
||||||
|
current_evmcs->hv_vp_id = vcpu->arch.hyperv.vp_index;
|
||||||
|
|
||||||
/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
|
/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
|
||||||
if (vmx->host_debugctlmsr)
|
if (vmx->host_debugctlmsr)
|
||||||
update_debugctlmsr(vmx->host_debugctlmsr);
|
update_debugctlmsr(vmx->host_debugctlmsr);
|
||||||
|
@ -6578,6 +6673,7 @@ static struct kvm *vmx_vm_alloc(void)
|
||||||
|
|
||||||
static void vmx_vm_free(struct kvm *kvm)
|
static void vmx_vm_free(struct kvm *kvm)
|
||||||
{
|
{
|
||||||
|
kfree(kvm->arch.hyperv.hv_pa_pg);
|
||||||
vfree(to_kvm_vmx(kvm));
|
vfree(to_kvm_vmx(kvm));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -7706,7 +7802,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
|
||||||
|
|
||||||
.run = vmx_vcpu_run,
|
.run = vmx_vcpu_run,
|
||||||
.handle_exit = vmx_handle_exit,
|
.handle_exit = vmx_handle_exit,
|
||||||
.skip_emulated_instruction = __skip_emulated_instruction,
|
.skip_emulated_instruction = skip_emulated_instruction,
|
||||||
.set_interrupt_shadow = vmx_set_interrupt_shadow,
|
.set_interrupt_shadow = vmx_set_interrupt_shadow,
|
||||||
.get_interrupt_shadow = vmx_get_interrupt_shadow,
|
.get_interrupt_shadow = vmx_get_interrupt_shadow,
|
||||||
.patch_hypercall = vmx_patch_hypercall,
|
.patch_hypercall = vmx_patch_hypercall,
|
||||||
|
@ -7837,6 +7933,7 @@ static void vmx_exit(void)
|
||||||
if (!vp_ap)
|
if (!vp_ap)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
vp_ap->nested_control.features.directhypercall = 0;
|
||||||
vp_ap->current_nested_vmcs = 0;
|
vp_ap->current_nested_vmcs = 0;
|
||||||
vp_ap->enlighten_vmentry = 0;
|
vp_ap->enlighten_vmentry = 0;
|
||||||
}
|
}
|
||||||
|
@ -7876,6 +7973,11 @@ static int __init vmx_init(void)
|
||||||
pr_info("KVM: vmx: using Hyper-V Enlightened VMCS\n");
|
pr_info("KVM: vmx: using Hyper-V Enlightened VMCS\n");
|
||||||
static_branch_enable(&enable_evmcs);
|
static_branch_enable(&enable_evmcs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH)
|
||||||
|
vmx_x86_ops.enable_direct_tlbflush
|
||||||
|
= hv_enable_direct_tlbflush;
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
enlightened_vmcs = false;
|
enlightened_vmcs = false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -14,6 +14,8 @@
|
||||||
extern const u32 vmx_msr_index[];
|
extern const u32 vmx_msr_index[];
|
||||||
extern u64 host_efer;
|
extern u64 host_efer;
|
||||||
|
|
||||||
|
extern u32 get_umwait_control_msr(void);
|
||||||
|
|
||||||
#define MSR_TYPE_R 1
|
#define MSR_TYPE_R 1
|
||||||
#define MSR_TYPE_W 2
|
#define MSR_TYPE_W 2
|
||||||
#define MSR_TYPE_RW 3
|
#define MSR_TYPE_RW 3
|
||||||
|
@ -211,6 +213,7 @@ struct vcpu_vmx {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
u64 spec_ctrl;
|
u64 spec_ctrl;
|
||||||
|
u32 msr_ia32_umwait_control;
|
||||||
|
|
||||||
u32 secondary_exec_control;
|
u32 secondary_exec_control;
|
||||||
|
|
||||||
|
@ -497,6 +500,12 @@ static inline void decache_tsc_multiplier(struct vcpu_vmx *vmx)
|
||||||
vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
|
vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool vmx_has_waitpkg(struct vcpu_vmx *vmx)
|
||||||
|
{
|
||||||
|
return vmx->secondary_exec_control &
|
||||||
|
SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
|
||||||
|
}
|
||||||
|
|
||||||
void dump_vmcs(void);
|
void dump_vmcs(void);
|
||||||
|
|
||||||
#endif /* __KVM_X86_VMX_H */
|
#endif /* __KVM_X86_VMX_H */
|
||||||
|
|
|
@ -360,7 +360,8 @@ EXPORT_SYMBOL_GPL(kvm_set_apic_base);
|
||||||
asmlinkage __visible void kvm_spurious_fault(void)
|
asmlinkage __visible void kvm_spurious_fault(void)
|
||||||
{
|
{
|
||||||
/* Fault while not rebooting. We want the trace. */
|
/* Fault while not rebooting. We want the trace. */
|
||||||
BUG();
|
if (!kvm_rebooting)
|
||||||
|
BUG();
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(kvm_spurious_fault);
|
EXPORT_SYMBOL_GPL(kvm_spurious_fault);
|
||||||
|
|
||||||
|
@ -1145,6 +1146,44 @@ static u32 msrs_to_save[] = {
|
||||||
MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B,
|
MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B,
|
||||||
MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B,
|
MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B,
|
||||||
MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B,
|
MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B,
|
||||||
|
MSR_IA32_UMWAIT_CONTROL,
|
||||||
|
|
||||||
|
MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1,
|
||||||
|
MSR_ARCH_PERFMON_FIXED_CTR0 + 2, MSR_ARCH_PERFMON_FIXED_CTR0 + 3,
|
||||||
|
MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS,
|
||||||
|
MSR_CORE_PERF_GLOBAL_CTRL, MSR_CORE_PERF_GLOBAL_OVF_CTRL,
|
||||||
|
MSR_ARCH_PERFMON_PERFCTR0, MSR_ARCH_PERFMON_PERFCTR1,
|
||||||
|
MSR_ARCH_PERFMON_PERFCTR0 + 2, MSR_ARCH_PERFMON_PERFCTR0 + 3,
|
||||||
|
MSR_ARCH_PERFMON_PERFCTR0 + 4, MSR_ARCH_PERFMON_PERFCTR0 + 5,
|
||||||
|
MSR_ARCH_PERFMON_PERFCTR0 + 6, MSR_ARCH_PERFMON_PERFCTR0 + 7,
|
||||||
|
MSR_ARCH_PERFMON_PERFCTR0 + 8, MSR_ARCH_PERFMON_PERFCTR0 + 9,
|
||||||
|
MSR_ARCH_PERFMON_PERFCTR0 + 10, MSR_ARCH_PERFMON_PERFCTR0 + 11,
|
||||||
|
MSR_ARCH_PERFMON_PERFCTR0 + 12, MSR_ARCH_PERFMON_PERFCTR0 + 13,
|
||||||
|
MSR_ARCH_PERFMON_PERFCTR0 + 14, MSR_ARCH_PERFMON_PERFCTR0 + 15,
|
||||||
|
MSR_ARCH_PERFMON_PERFCTR0 + 16, MSR_ARCH_PERFMON_PERFCTR0 + 17,
|
||||||
|
MSR_ARCH_PERFMON_PERFCTR0 + 18, MSR_ARCH_PERFMON_PERFCTR0 + 19,
|
||||||
|
MSR_ARCH_PERFMON_PERFCTR0 + 20, MSR_ARCH_PERFMON_PERFCTR0 + 21,
|
||||||
|
MSR_ARCH_PERFMON_PERFCTR0 + 22, MSR_ARCH_PERFMON_PERFCTR0 + 23,
|
||||||
|
MSR_ARCH_PERFMON_PERFCTR0 + 24, MSR_ARCH_PERFMON_PERFCTR0 + 25,
|
||||||
|
MSR_ARCH_PERFMON_PERFCTR0 + 26, MSR_ARCH_PERFMON_PERFCTR0 + 27,
|
||||||
|
MSR_ARCH_PERFMON_PERFCTR0 + 28, MSR_ARCH_PERFMON_PERFCTR0 + 29,
|
||||||
|
MSR_ARCH_PERFMON_PERFCTR0 + 30, MSR_ARCH_PERFMON_PERFCTR0 + 31,
|
||||||
|
MSR_ARCH_PERFMON_EVENTSEL0, MSR_ARCH_PERFMON_EVENTSEL1,
|
||||||
|
MSR_ARCH_PERFMON_EVENTSEL0 + 2, MSR_ARCH_PERFMON_EVENTSEL0 + 3,
|
||||||
|
MSR_ARCH_PERFMON_EVENTSEL0 + 4, MSR_ARCH_PERFMON_EVENTSEL0 + 5,
|
||||||
|
MSR_ARCH_PERFMON_EVENTSEL0 + 6, MSR_ARCH_PERFMON_EVENTSEL0 + 7,
|
||||||
|
MSR_ARCH_PERFMON_EVENTSEL0 + 8, MSR_ARCH_PERFMON_EVENTSEL0 + 9,
|
||||||
|
MSR_ARCH_PERFMON_EVENTSEL0 + 10, MSR_ARCH_PERFMON_EVENTSEL0 + 11,
|
||||||
|
MSR_ARCH_PERFMON_EVENTSEL0 + 12, MSR_ARCH_PERFMON_EVENTSEL0 + 13,
|
||||||
|
MSR_ARCH_PERFMON_EVENTSEL0 + 14, MSR_ARCH_PERFMON_EVENTSEL0 + 15,
|
||||||
|
MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17,
|
||||||
|
MSR_ARCH_PERFMON_EVENTSEL0 + 18, MSR_ARCH_PERFMON_EVENTSEL0 + 19,
|
||||||
|
MSR_ARCH_PERFMON_EVENTSEL0 + 20, MSR_ARCH_PERFMON_EVENTSEL0 + 21,
|
||||||
|
MSR_ARCH_PERFMON_EVENTSEL0 + 22, MSR_ARCH_PERFMON_EVENTSEL0 + 23,
|
||||||
|
MSR_ARCH_PERFMON_EVENTSEL0 + 24, MSR_ARCH_PERFMON_EVENTSEL0 + 25,
|
||||||
|
MSR_ARCH_PERFMON_EVENTSEL0 + 26, MSR_ARCH_PERFMON_EVENTSEL0 + 27,
|
||||||
|
MSR_ARCH_PERFMON_EVENTSEL0 + 28, MSR_ARCH_PERFMON_EVENTSEL0 + 29,
|
||||||
|
MSR_ARCH_PERFMON_EVENTSEL0 + 30, MSR_ARCH_PERFMON_EVENTSEL0 + 31,
|
||||||
};
|
};
|
||||||
|
|
||||||
static unsigned num_msrs_to_save;
|
static unsigned num_msrs_to_save;
|
||||||
|
@ -3169,7 +3208,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||||
case KVM_CAP_HYPERV_EVENTFD:
|
case KVM_CAP_HYPERV_EVENTFD:
|
||||||
case KVM_CAP_HYPERV_TLBFLUSH:
|
case KVM_CAP_HYPERV_TLBFLUSH:
|
||||||
case KVM_CAP_HYPERV_SEND_IPI:
|
case KVM_CAP_HYPERV_SEND_IPI:
|
||||||
case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
|
|
||||||
case KVM_CAP_HYPERV_CPUID:
|
case KVM_CAP_HYPERV_CPUID:
|
||||||
case KVM_CAP_PCI_SEGMENT:
|
case KVM_CAP_PCI_SEGMENT:
|
||||||
case KVM_CAP_DEBUGREGS:
|
case KVM_CAP_DEBUGREGS:
|
||||||
|
@ -3246,6 +3284,12 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||||
r = kvm_x86_ops->get_nested_state ?
|
r = kvm_x86_ops->get_nested_state ?
|
||||||
kvm_x86_ops->get_nested_state(NULL, NULL, 0) : 0;
|
kvm_x86_ops->get_nested_state(NULL, NULL, 0) : 0;
|
||||||
break;
|
break;
|
||||||
|
case KVM_CAP_HYPERV_DIRECT_TLBFLUSH:
|
||||||
|
r = kvm_x86_ops->enable_direct_tlbflush != NULL;
|
||||||
|
break;
|
||||||
|
case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
|
||||||
|
r = kvm_x86_ops->nested_enable_evmcs != NULL;
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -4019,6 +4063,11 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
|
||||||
r = -EFAULT;
|
r = -EFAULT;
|
||||||
}
|
}
|
||||||
return r;
|
return r;
|
||||||
|
case KVM_CAP_HYPERV_DIRECT_TLBFLUSH:
|
||||||
|
if (!kvm_x86_ops->enable_direct_tlbflush)
|
||||||
|
return -ENOTTY;
|
||||||
|
|
||||||
|
return kvm_x86_ops->enable_direct_tlbflush(vcpu);
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
@ -5051,6 +5100,11 @@ static void kvm_init_msr_list(void)
|
||||||
u32 dummy[2];
|
u32 dummy[2];
|
||||||
unsigned i, j;
|
unsigned i, j;
|
||||||
|
|
||||||
|
BUILD_BUG_ON_MSG(INTEL_PMC_MAX_FIXED != 4,
|
||||||
|
"Please update the fixed PMCs in msrs_to_save[]");
|
||||||
|
BUILD_BUG_ON_MSG(INTEL_PMC_MAX_GENERIC != 32,
|
||||||
|
"Please update the generic perfctr/eventsel MSRs in msrs_to_save[]");
|
||||||
|
|
||||||
for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
|
for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
|
||||||
if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
|
if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
|
||||||
continue;
|
continue;
|
||||||
|
@ -5389,7 +5443,6 @@ EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
|
||||||
int handle_ud(struct kvm_vcpu *vcpu)
|
int handle_ud(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
int emul_type = EMULTYPE_TRAP_UD;
|
int emul_type = EMULTYPE_TRAP_UD;
|
||||||
enum emulation_result er;
|
|
||||||
char sig[5]; /* ud2; .ascii "kvm" */
|
char sig[5]; /* ud2; .ascii "kvm" */
|
||||||
struct x86_exception e;
|
struct x86_exception e;
|
||||||
|
|
||||||
|
@ -5398,15 +5451,10 @@ int handle_ud(struct kvm_vcpu *vcpu)
|
||||||
sig, sizeof(sig), &e) == 0 &&
|
sig, sizeof(sig), &e) == 0 &&
|
||||||
memcmp(sig, "\xf\xbkvm", sizeof(sig)) == 0) {
|
memcmp(sig, "\xf\xbkvm", sizeof(sig)) == 0) {
|
||||||
kvm_rip_write(vcpu, kvm_rip_read(vcpu) + sizeof(sig));
|
kvm_rip_write(vcpu, kvm_rip_read(vcpu) + sizeof(sig));
|
||||||
emul_type = 0;
|
emul_type = EMULTYPE_TRAP_UD_FORCED;
|
||||||
}
|
}
|
||||||
|
|
||||||
er = kvm_emulate_instruction(vcpu, emul_type);
|
return kvm_emulate_instruction(vcpu, emul_type);
|
||||||
if (er == EMULATE_USER_EXIT)
|
|
||||||
return 0;
|
|
||||||
if (er != EMULATE_DONE)
|
|
||||||
kvm_queue_exception(vcpu, UD_VECTOR);
|
|
||||||
return 1;
|
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(handle_ud);
|
EXPORT_SYMBOL_GPL(handle_ud);
|
||||||
|
|
||||||
|
@ -6228,7 +6276,7 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
|
||||||
vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
|
vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
|
void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
|
||||||
{
|
{
|
||||||
struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
|
struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
|
||||||
int ret;
|
int ret;
|
||||||
|
@ -6240,37 +6288,43 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
|
||||||
ctxt->_eip = ctxt->eip + inc_eip;
|
ctxt->_eip = ctxt->eip + inc_eip;
|
||||||
ret = emulate_int_real(ctxt, irq);
|
ret = emulate_int_real(ctxt, irq);
|
||||||
|
|
||||||
if (ret != X86EMUL_CONTINUE)
|
if (ret != X86EMUL_CONTINUE) {
|
||||||
return EMULATE_FAIL;
|
kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
|
||||||
|
} else {
|
||||||
ctxt->eip = ctxt->_eip;
|
ctxt->eip = ctxt->_eip;
|
||||||
kvm_rip_write(vcpu, ctxt->eip);
|
kvm_rip_write(vcpu, ctxt->eip);
|
||||||
kvm_set_rflags(vcpu, ctxt->eflags);
|
kvm_set_rflags(vcpu, ctxt->eflags);
|
||||||
|
}
|
||||||
return EMULATE_DONE;
|
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
|
EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
|
||||||
|
|
||||||
static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
|
static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
|
||||||
{
|
{
|
||||||
int r = EMULATE_DONE;
|
|
||||||
|
|
||||||
++vcpu->stat.insn_emulation_fail;
|
++vcpu->stat.insn_emulation_fail;
|
||||||
trace_kvm_emulate_insn_failed(vcpu);
|
trace_kvm_emulate_insn_failed(vcpu);
|
||||||
|
|
||||||
if (emulation_type & EMULTYPE_NO_UD_ON_FAIL)
|
if (emulation_type & EMULTYPE_VMWARE_GP) {
|
||||||
return EMULATE_FAIL;
|
kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (emulation_type & EMULTYPE_SKIP) {
|
||||||
|
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||||
|
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
|
||||||
|
vcpu->run->internal.ndata = 0;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
kvm_queue_exception(vcpu, UD_VECTOR);
|
||||||
|
|
||||||
if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) {
|
if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) {
|
||||||
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||||
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
|
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
|
||||||
vcpu->run->internal.ndata = 0;
|
vcpu->run->internal.ndata = 0;
|
||||||
r = EMULATE_USER_EXIT;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
kvm_queue_exception(vcpu, UD_VECTOR);
|
return 1;
|
||||||
|
|
||||||
return r;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
|
static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
|
||||||
|
@ -6425,7 +6479,7 @@ static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
|
||||||
return dr6;
|
return dr6;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)
|
static int kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
struct kvm_run *kvm_run = vcpu->run;
|
struct kvm_run *kvm_run = vcpu->run;
|
||||||
|
|
||||||
|
@ -6434,10 +6488,10 @@ static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)
|
||||||
kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
|
kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
|
||||||
kvm_run->debug.arch.exception = DB_VECTOR;
|
kvm_run->debug.arch.exception = DB_VECTOR;
|
||||||
kvm_run->exit_reason = KVM_EXIT_DEBUG;
|
kvm_run->exit_reason = KVM_EXIT_DEBUG;
|
||||||
*r = EMULATE_USER_EXIT;
|
return 0;
|
||||||
} else {
|
|
||||||
kvm_queue_exception_p(vcpu, DB_VECTOR, DR6_BS);
|
|
||||||
}
|
}
|
||||||
|
kvm_queue_exception_p(vcpu, DB_VECTOR, DR6_BS);
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
|
int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
|
||||||
|
@ -6446,7 +6500,7 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
r = kvm_x86_ops->skip_emulated_instruction(vcpu);
|
r = kvm_x86_ops->skip_emulated_instruction(vcpu);
|
||||||
if (unlikely(r != EMULATE_DONE))
|
if (unlikely(!r))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -6458,8 +6512,8 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
|
||||||
* that sets the TF flag".
|
* that sets the TF flag".
|
||||||
*/
|
*/
|
||||||
if (unlikely(rflags & X86_EFLAGS_TF))
|
if (unlikely(rflags & X86_EFLAGS_TF))
|
||||||
kvm_vcpu_do_singlestep(vcpu, &r);
|
r = kvm_vcpu_do_singlestep(vcpu);
|
||||||
return r == EMULATE_DONE;
|
return r;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
|
EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
|
||||||
|
|
||||||
|
@ -6478,7 +6532,7 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
|
||||||
kvm_run->debug.arch.pc = eip;
|
kvm_run->debug.arch.pc = eip;
|
||||||
kvm_run->debug.arch.exception = DB_VECTOR;
|
kvm_run->debug.arch.exception = DB_VECTOR;
|
||||||
kvm_run->exit_reason = KVM_EXIT_DEBUG;
|
kvm_run->exit_reason = KVM_EXIT_DEBUG;
|
||||||
*r = EMULATE_USER_EXIT;
|
*r = 0;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -6494,7 +6548,7 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
|
||||||
vcpu->arch.dr6 &= ~DR_TRAP_BITS;
|
vcpu->arch.dr6 &= ~DR_TRAP_BITS;
|
||||||
vcpu->arch.dr6 |= dr6 | DR6_RTM;
|
vcpu->arch.dr6 |= dr6 | DR6_RTM;
|
||||||
kvm_queue_exception(vcpu, DB_VECTOR);
|
kvm_queue_exception(vcpu, DB_VECTOR);
|
||||||
*r = EMULATE_DONE;
|
*r = 1;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -6578,11 +6632,14 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
|
||||||
trace_kvm_emulate_insn_start(vcpu);
|
trace_kvm_emulate_insn_start(vcpu);
|
||||||
++vcpu->stat.insn_emulation;
|
++vcpu->stat.insn_emulation;
|
||||||
if (r != EMULATION_OK) {
|
if (r != EMULATION_OK) {
|
||||||
if (emulation_type & EMULTYPE_TRAP_UD)
|
if ((emulation_type & EMULTYPE_TRAP_UD) ||
|
||||||
return EMULATE_FAIL;
|
(emulation_type & EMULTYPE_TRAP_UD_FORCED)) {
|
||||||
|
kvm_queue_exception(vcpu, UD_VECTOR);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
|
if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
|
||||||
emulation_type))
|
emulation_type))
|
||||||
return EMULATE_DONE;
|
return 1;
|
||||||
if (ctxt->have_exception) {
|
if (ctxt->have_exception) {
|
||||||
/*
|
/*
|
||||||
* #UD should result in just EMULATION_FAILED, and trap-like
|
* #UD should result in just EMULATION_FAILED, and trap-like
|
||||||
|
@ -6591,28 +6648,32 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
|
||||||
WARN_ON_ONCE(ctxt->exception.vector == UD_VECTOR ||
|
WARN_ON_ONCE(ctxt->exception.vector == UD_VECTOR ||
|
||||||
exception_type(ctxt->exception.vector) == EXCPT_TRAP);
|
exception_type(ctxt->exception.vector) == EXCPT_TRAP);
|
||||||
inject_emulated_exception(vcpu);
|
inject_emulated_exception(vcpu);
|
||||||
return EMULATE_DONE;
|
return 1;
|
||||||
}
|
}
|
||||||
if (emulation_type & EMULTYPE_SKIP)
|
|
||||||
return EMULATE_FAIL;
|
|
||||||
return handle_emulation_failure(vcpu, emulation_type);
|
return handle_emulation_failure(vcpu, emulation_type);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((emulation_type & EMULTYPE_VMWARE) &&
|
if ((emulation_type & EMULTYPE_VMWARE_GP) &&
|
||||||
!is_vmware_backdoor_opcode(ctxt))
|
!is_vmware_backdoor_opcode(ctxt)) {
|
||||||
return EMULATE_FAIL;
|
kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Note, EMULTYPE_SKIP is intended for use *only* by vendor callbacks
|
||||||
|
* for kvm_skip_emulated_instruction(). The caller is responsible for
|
||||||
|
* updating interruptibility state and injecting single-step #DBs.
|
||||||
|
*/
|
||||||
if (emulation_type & EMULTYPE_SKIP) {
|
if (emulation_type & EMULTYPE_SKIP) {
|
||||||
kvm_rip_write(vcpu, ctxt->_eip);
|
kvm_rip_write(vcpu, ctxt->_eip);
|
||||||
if (ctxt->eflags & X86_EFLAGS_RF)
|
if (ctxt->eflags & X86_EFLAGS_RF)
|
||||||
kvm_set_rflags(vcpu, ctxt->eflags & ~X86_EFLAGS_RF);
|
kvm_set_rflags(vcpu, ctxt->eflags & ~X86_EFLAGS_RF);
|
||||||
kvm_x86_ops->set_interrupt_shadow(vcpu, 0);
|
return 1;
|
||||||
return EMULATE_DONE;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (retry_instruction(ctxt, cr2, emulation_type))
|
if (retry_instruction(ctxt, cr2, emulation_type))
|
||||||
return EMULATE_DONE;
|
return 1;
|
||||||
|
|
||||||
/* this is needed for vmware backdoor interface to work since it
|
/* this is needed for vmware backdoor interface to work since it
|
||||||
changes registers values during IO operation */
|
changes registers values during IO operation */
|
||||||
|
@ -6628,18 +6689,18 @@ restart:
|
||||||
r = x86_emulate_insn(ctxt);
|
r = x86_emulate_insn(ctxt);
|
||||||
|
|
||||||
if (r == EMULATION_INTERCEPTED)
|
if (r == EMULATION_INTERCEPTED)
|
||||||
return EMULATE_DONE;
|
return 1;
|
||||||
|
|
||||||
if (r == EMULATION_FAILED) {
|
if (r == EMULATION_FAILED) {
|
||||||
if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
|
if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
|
||||||
emulation_type))
|
emulation_type))
|
||||||
return EMULATE_DONE;
|
return 1;
|
||||||
|
|
||||||
return handle_emulation_failure(vcpu, emulation_type);
|
return handle_emulation_failure(vcpu, emulation_type);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ctxt->have_exception) {
|
if (ctxt->have_exception) {
|
||||||
r = EMULATE_DONE;
|
r = 1;
|
||||||
if (inject_emulated_exception(vcpu))
|
if (inject_emulated_exception(vcpu))
|
||||||
return r;
|
return r;
|
||||||
} else if (vcpu->arch.pio.count) {
|
} else if (vcpu->arch.pio.count) {
|
||||||
|
@ -6650,16 +6711,18 @@ restart:
|
||||||
writeback = false;
|
writeback = false;
|
||||||
vcpu->arch.complete_userspace_io = complete_emulated_pio;
|
vcpu->arch.complete_userspace_io = complete_emulated_pio;
|
||||||
}
|
}
|
||||||
r = EMULATE_USER_EXIT;
|
r = 0;
|
||||||
} else if (vcpu->mmio_needed) {
|
} else if (vcpu->mmio_needed) {
|
||||||
|
++vcpu->stat.mmio_exits;
|
||||||
|
|
||||||
if (!vcpu->mmio_is_write)
|
if (!vcpu->mmio_is_write)
|
||||||
writeback = false;
|
writeback = false;
|
||||||
r = EMULATE_USER_EXIT;
|
r = 0;
|
||||||
vcpu->arch.complete_userspace_io = complete_emulated_mmio;
|
vcpu->arch.complete_userspace_io = complete_emulated_mmio;
|
||||||
} else if (r == EMULATION_RESTART)
|
} else if (r == EMULATION_RESTART)
|
||||||
goto restart;
|
goto restart;
|
||||||
else
|
else
|
||||||
r = EMULATE_DONE;
|
r = 1;
|
||||||
|
|
||||||
if (writeback) {
|
if (writeback) {
|
||||||
unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
|
unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
|
||||||
|
@ -6668,8 +6731,8 @@ restart:
|
||||||
if (!ctxt->have_exception ||
|
if (!ctxt->have_exception ||
|
||||||
exception_type(ctxt->exception.vector) == EXCPT_TRAP) {
|
exception_type(ctxt->exception.vector) == EXCPT_TRAP) {
|
||||||
kvm_rip_write(vcpu, ctxt->eip);
|
kvm_rip_write(vcpu, ctxt->eip);
|
||||||
if (r == EMULATE_DONE && ctxt->tf)
|
if (r && ctxt->tf)
|
||||||
kvm_vcpu_do_singlestep(vcpu, &r);
|
r = kvm_vcpu_do_singlestep(vcpu);
|
||||||
__kvm_set_rflags(vcpu, ctxt->eflags);
|
__kvm_set_rflags(vcpu, ctxt->eflags);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -8263,12 +8326,11 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
|
||||||
static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
|
static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
|
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
|
||||||
r = kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
|
r = kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
|
||||||
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
|
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
|
||||||
if (r != EMULATE_DONE)
|
return r;
|
||||||
return 0;
|
|
||||||
return 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int complete_emulated_pio(struct kvm_vcpu *vcpu)
|
static int complete_emulated_pio(struct kvm_vcpu *vcpu)
|
||||||
|
@ -8636,14 +8698,17 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
|
||||||
|
|
||||||
ret = emulator_task_switch(ctxt, tss_selector, idt_index, reason,
|
ret = emulator_task_switch(ctxt, tss_selector, idt_index, reason,
|
||||||
has_error_code, error_code);
|
has_error_code, error_code);
|
||||||
|
if (ret) {
|
||||||
if (ret)
|
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
||||||
return EMULATE_FAIL;
|
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
|
||||||
|
vcpu->run->internal.ndata = 0;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
kvm_rip_write(vcpu, ctxt->eip);
|
kvm_rip_write(vcpu, ctxt->eip);
|
||||||
kvm_set_rflags(vcpu, ctxt->eflags);
|
kvm_set_rflags(vcpu, ctxt->eflags);
|
||||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||||
return EMULATE_DONE;
|
return 1;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(kvm_task_switch);
|
EXPORT_SYMBOL_GPL(kvm_task_switch);
|
||||||
|
|
||||||
|
@ -9361,6 +9426,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||||
|
|
||||||
INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
|
INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
|
||||||
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
|
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
|
||||||
|
INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
|
||||||
INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
|
INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
|
||||||
atomic_set(&kvm->arch.noncoherent_dma_count, 0);
|
atomic_set(&kvm->arch.noncoherent_dma_count, 0);
|
||||||
|
|
||||||
|
@ -9690,8 +9756,13 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
|
||||||
* Scan sptes if dirty logging has been stopped, dropping those
|
* Scan sptes if dirty logging has been stopped, dropping those
|
||||||
* which can be collapsed into a single large-page spte. Later
|
* which can be collapsed into a single large-page spte. Later
|
||||||
* page faults will create the large-page sptes.
|
* page faults will create the large-page sptes.
|
||||||
|
*
|
||||||
|
* There is no need to do this in any of the following cases:
|
||||||
|
* CREATE: No dirty mappings will already exist.
|
||||||
|
* MOVE/DELETE: The old mappings will already have been cleaned up by
|
||||||
|
* kvm_arch_flush_shadow_memslot()
|
||||||
*/
|
*/
|
||||||
if ((change != KVM_MR_DELETE) &&
|
if (change == KVM_MR_FLAGS_ONLY &&
|
||||||
(old->flags & KVM_MEM_LOG_DIRTY_PAGES) &&
|
(old->flags & KVM_MEM_LOG_DIRTY_PAGES) &&
|
||||||
!(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
|
!(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
|
||||||
kvm_mmu_zap_collapsible_sptes(kvm, new);
|
kvm_mmu_zap_collapsible_sptes(kvm, new);
|
||||||
|
|
|
@ -261,7 +261,7 @@ static inline bool kvm_check_has_quirk(struct kvm *kvm, u64 quirk)
|
||||||
}
|
}
|
||||||
|
|
||||||
void kvm_set_pending_timer(struct kvm_vcpu *vcpu);
|
void kvm_set_pending_timer(struct kvm_vcpu *vcpu);
|
||||||
int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
|
void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
|
||||||
|
|
||||||
void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr);
|
void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr);
|
||||||
u64 get_kvmclock_ns(struct kvm *kvm);
|
u64 get_kvmclock_ns(struct kvm *kvm);
|
||||||
|
|
|
@ -201,12 +201,14 @@ enum cpuhp_smt_control {
|
||||||
extern enum cpuhp_smt_control cpu_smt_control;
|
extern enum cpuhp_smt_control cpu_smt_control;
|
||||||
extern void cpu_smt_disable(bool force);
|
extern void cpu_smt_disable(bool force);
|
||||||
extern void cpu_smt_check_topology(void);
|
extern void cpu_smt_check_topology(void);
|
||||||
|
extern bool cpu_smt_possible(void);
|
||||||
extern int cpuhp_smt_enable(void);
|
extern int cpuhp_smt_enable(void);
|
||||||
extern int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval);
|
extern int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval);
|
||||||
#else
|
#else
|
||||||
# define cpu_smt_control (CPU_SMT_NOT_IMPLEMENTED)
|
# define cpu_smt_control (CPU_SMT_NOT_IMPLEMENTED)
|
||||||
static inline void cpu_smt_disable(bool force) { }
|
static inline void cpu_smt_disable(bool force) { }
|
||||||
static inline void cpu_smt_check_topology(void) { }
|
static inline void cpu_smt_check_topology(void) { }
|
||||||
|
static inline bool cpu_smt_possible(void) { return false; }
|
||||||
static inline int cpuhp_smt_enable(void) { return 0; }
|
static inline int cpuhp_smt_enable(void) { return 0; }
|
||||||
static inline int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { return 0; }
|
static inline int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { return 0; }
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -999,6 +999,7 @@ struct kvm_ppc_resize_hpt {
|
||||||
#define KVM_CAP_ARM_PTRAUTH_GENERIC 172
|
#define KVM_CAP_ARM_PTRAUTH_GENERIC 172
|
||||||
#define KVM_CAP_PMU_EVENT_FILTER 173
|
#define KVM_CAP_PMU_EVENT_FILTER 173
|
||||||
#define KVM_CAP_ARM_IRQ_LINE_LAYOUT_2 174
|
#define KVM_CAP_ARM_IRQ_LINE_LAYOUT_2 174
|
||||||
|
#define KVM_CAP_HYPERV_DIRECT_TLBFLUSH 175
|
||||||
|
|
||||||
#ifdef KVM_CAP_IRQ_ROUTING
|
#ifdef KVM_CAP_IRQ_ROUTING
|
||||||
|
|
||||||
|
|
11
kernel/cpu.c
11
kernel/cpu.c
|
@ -392,8 +392,7 @@ enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
|
||||||
|
|
||||||
void __init cpu_smt_disable(bool force)
|
void __init cpu_smt_disable(bool force)
|
||||||
{
|
{
|
||||||
if (cpu_smt_control == CPU_SMT_FORCE_DISABLED ||
|
if (!cpu_smt_possible())
|
||||||
cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
|
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (force) {
|
if (force) {
|
||||||
|
@ -438,6 +437,14 @@ static inline bool cpu_smt_allowed(unsigned int cpu)
|
||||||
*/
|
*/
|
||||||
return !cpumask_test_cpu(cpu, &cpus_booted_once_mask);
|
return !cpumask_test_cpu(cpu, &cpus_booted_once_mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Returns true if SMT is not supported of forcefully (irreversibly) disabled */
|
||||||
|
bool cpu_smt_possible(void)
|
||||||
|
{
|
||||||
|
return cpu_smt_control != CPU_SMT_FORCE_DISABLED &&
|
||||||
|
cpu_smt_control != CPU_SMT_NOT_SUPPORTED;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(cpu_smt_possible);
|
||||||
#else
|
#else
|
||||||
static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
|
static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -269,7 +269,7 @@ pv_wait_early(struct pv_node *prev, int loop)
|
||||||
if ((loop & PV_PREV_CHECK_MASK) != 0)
|
if ((loop & PV_PREV_CHECK_MASK) != 0)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
return READ_ONCE(prev->state) != vcpu_running || vcpu_is_preempted(prev->cpu);
|
return READ_ONCE(prev->state) != vcpu_running;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -138,7 +138,6 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func,
|
||||||
"do_task_dead",
|
"do_task_dead",
|
||||||
"__module_put_and_exit",
|
"__module_put_and_exit",
|
||||||
"complete_and_exit",
|
"complete_and_exit",
|
||||||
"kvm_spurious_fault",
|
|
||||||
"__reiserfs_panic",
|
"__reiserfs_panic",
|
||||||
"lbug_with_loc",
|
"lbug_with_loc",
|
||||||
"fortify_panic",
|
"fortify_panic",
|
||||||
|
|
|
@ -19,8 +19,6 @@
|
||||||
#include "kvm_util.h"
|
#include "kvm_util.h"
|
||||||
#include "processor.h"
|
#include "processor.h"
|
||||||
|
|
||||||
#define DEBUG printf
|
|
||||||
|
|
||||||
#define VCPU_ID 1
|
#define VCPU_ID 1
|
||||||
|
|
||||||
/* The memory slot index to track dirty pages */
|
/* The memory slot index to track dirty pages */
|
||||||
|
@ -249,14 +247,12 @@ static void vm_dirty_log_verify(unsigned long *bmap)
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid,
|
static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid,
|
||||||
uint64_t extra_mem_pages, void *guest_code,
|
uint64_t extra_mem_pages, void *guest_code)
|
||||||
unsigned long type)
|
|
||||||
{
|
{
|
||||||
struct kvm_vm *vm;
|
struct kvm_vm *vm;
|
||||||
uint64_t extra_pg_pages = extra_mem_pages / 512 * 2;
|
uint64_t extra_pg_pages = extra_mem_pages / 512 * 2;
|
||||||
|
|
||||||
vm = _vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages,
|
vm = _vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
|
||||||
O_RDWR, type);
|
|
||||||
kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
|
kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
|
||||||
#ifdef __x86_64__
|
#ifdef __x86_64__
|
||||||
vm_create_irqchip(vm);
|
vm_create_irqchip(vm);
|
||||||
|
@ -265,67 +261,35 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid,
|
||||||
return vm;
|
return vm;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define DIRTY_MEM_BITS 30 /* 1G */
|
||||||
|
#define PAGE_SHIFT_4K 12
|
||||||
|
|
||||||
static void run_test(enum vm_guest_mode mode, unsigned long iterations,
|
static void run_test(enum vm_guest_mode mode, unsigned long iterations,
|
||||||
unsigned long interval, uint64_t phys_offset)
|
unsigned long interval, uint64_t phys_offset)
|
||||||
{
|
{
|
||||||
unsigned int guest_pa_bits, guest_page_shift;
|
|
||||||
pthread_t vcpu_thread;
|
pthread_t vcpu_thread;
|
||||||
struct kvm_vm *vm;
|
struct kvm_vm *vm;
|
||||||
uint64_t max_gfn;
|
|
||||||
unsigned long *bmap;
|
unsigned long *bmap;
|
||||||
unsigned long type = 0;
|
|
||||||
|
|
||||||
switch (mode) {
|
|
||||||
case VM_MODE_P52V48_4K:
|
|
||||||
guest_pa_bits = 52;
|
|
||||||
guest_page_shift = 12;
|
|
||||||
break;
|
|
||||||
case VM_MODE_P52V48_64K:
|
|
||||||
guest_pa_bits = 52;
|
|
||||||
guest_page_shift = 16;
|
|
||||||
break;
|
|
||||||
case VM_MODE_P48V48_4K:
|
|
||||||
guest_pa_bits = 48;
|
|
||||||
guest_page_shift = 12;
|
|
||||||
break;
|
|
||||||
case VM_MODE_P48V48_64K:
|
|
||||||
guest_pa_bits = 48;
|
|
||||||
guest_page_shift = 16;
|
|
||||||
break;
|
|
||||||
case VM_MODE_P40V48_4K:
|
|
||||||
guest_pa_bits = 40;
|
|
||||||
guest_page_shift = 12;
|
|
||||||
break;
|
|
||||||
case VM_MODE_P40V48_64K:
|
|
||||||
guest_pa_bits = 40;
|
|
||||||
guest_page_shift = 16;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode);
|
|
||||||
}
|
|
||||||
|
|
||||||
DEBUG("Testing guest mode: %s\n", vm_guest_mode_string(mode));
|
|
||||||
|
|
||||||
#ifdef __x86_64__
|
|
||||||
/*
|
/*
|
||||||
* FIXME
|
* We reserve page table for 2 times of extra dirty mem which
|
||||||
* The x86_64 kvm selftests framework currently only supports a
|
* will definitely cover the original (1G+) test range. Here
|
||||||
* single PML4 which restricts the number of physical address
|
* we do the calculation with 4K page size which is the
|
||||||
* bits we can change to 39.
|
* smallest so the page number will be enough for all archs
|
||||||
|
* (e.g., 64K page size guest will need even less memory for
|
||||||
|
* page tables).
|
||||||
*/
|
*/
|
||||||
guest_pa_bits = 39;
|
vm = create_vm(mode, VCPU_ID,
|
||||||
#endif
|
2ul << (DIRTY_MEM_BITS - PAGE_SHIFT_4K),
|
||||||
#ifdef __aarch64__
|
guest_code);
|
||||||
if (guest_pa_bits != 40)
|
|
||||||
type = KVM_VM_TYPE_ARM_IPA_SIZE(guest_pa_bits);
|
guest_page_size = vm_get_page_size(vm);
|
||||||
#endif
|
|
||||||
max_gfn = (1ul << (guest_pa_bits - guest_page_shift)) - 1;
|
|
||||||
guest_page_size = (1ul << guest_page_shift);
|
|
||||||
/*
|
/*
|
||||||
* A little more than 1G of guest page sized pages. Cover the
|
* A little more than 1G of guest page sized pages. Cover the
|
||||||
* case where the size is not aligned to 64 pages.
|
* case where the size is not aligned to 64 pages.
|
||||||
*/
|
*/
|
||||||
guest_num_pages = (1ul << (30 - guest_page_shift)) + 16;
|
guest_num_pages = (1ul << (DIRTY_MEM_BITS -
|
||||||
|
vm_get_page_shift(vm))) + 16;
|
||||||
#ifdef __s390x__
|
#ifdef __s390x__
|
||||||
/* Round up to multiple of 1M (segment size) */
|
/* Round up to multiple of 1M (segment size) */
|
||||||
guest_num_pages = (guest_num_pages + 0xff) & ~0xffUL;
|
guest_num_pages = (guest_num_pages + 0xff) & ~0xffUL;
|
||||||
|
@ -335,7 +299,8 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
|
||||||
!!((guest_num_pages * guest_page_size) % host_page_size);
|
!!((guest_num_pages * guest_page_size) % host_page_size);
|
||||||
|
|
||||||
if (!phys_offset) {
|
if (!phys_offset) {
|
||||||
guest_test_phys_mem = (max_gfn - guest_num_pages) * guest_page_size;
|
guest_test_phys_mem = (vm_get_max_gfn(vm) -
|
||||||
|
guest_num_pages) * guest_page_size;
|
||||||
guest_test_phys_mem &= ~(host_page_size - 1);
|
guest_test_phys_mem &= ~(host_page_size - 1);
|
||||||
} else {
|
} else {
|
||||||
guest_test_phys_mem = phys_offset;
|
guest_test_phys_mem = phys_offset;
|
||||||
|
@ -351,8 +316,6 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
|
||||||
bmap = bitmap_alloc(host_num_pages);
|
bmap = bitmap_alloc(host_num_pages);
|
||||||
host_bmap_track = bitmap_alloc(host_num_pages);
|
host_bmap_track = bitmap_alloc(host_num_pages);
|
||||||
|
|
||||||
vm = create_vm(mode, VCPU_ID, guest_num_pages, guest_code, type);
|
|
||||||
|
|
||||||
#ifdef USE_CLEAR_DIRTY_LOG
|
#ifdef USE_CLEAR_DIRTY_LOG
|
||||||
struct kvm_enable_cap cap = {};
|
struct kvm_enable_cap cap = {};
|
||||||
|
|
||||||
|
@ -482,7 +445,7 @@ int main(int argc, char *argv[])
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __x86_64__
|
#ifdef __x86_64__
|
||||||
vm_guest_mode_params_init(VM_MODE_P52V48_4K, true, true);
|
vm_guest_mode_params_init(VM_MODE_PXXV48_4K, true, true);
|
||||||
#endif
|
#endif
|
||||||
#ifdef __aarch64__
|
#ifdef __aarch64__
|
||||||
vm_guest_mode_params_init(VM_MODE_P40V48_4K, true, true);
|
vm_guest_mode_params_init(VM_MODE_P40V48_4K, true, true);
|
||||||
|
|
|
@ -24,6 +24,12 @@ struct kvm_vm;
|
||||||
typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */
|
typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */
|
||||||
typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
|
typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
|
||||||
|
|
||||||
|
#ifndef NDEBUG
|
||||||
|
#define DEBUG(...) printf(__VA_ARGS__);
|
||||||
|
#else
|
||||||
|
#define DEBUG(...)
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Minimum allocated guest virtual and physical addresses */
|
/* Minimum allocated guest virtual and physical addresses */
|
||||||
#define KVM_UTIL_MIN_VADDR 0x2000
|
#define KVM_UTIL_MIN_VADDR 0x2000
|
||||||
|
|
||||||
|
@ -38,11 +44,14 @@ enum vm_guest_mode {
|
||||||
VM_MODE_P48V48_64K,
|
VM_MODE_P48V48_64K,
|
||||||
VM_MODE_P40V48_4K,
|
VM_MODE_P40V48_4K,
|
||||||
VM_MODE_P40V48_64K,
|
VM_MODE_P40V48_64K,
|
||||||
|
VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */
|
||||||
NUM_VM_MODES,
|
NUM_VM_MODES,
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef __aarch64__
|
#if defined(__aarch64__)
|
||||||
#define VM_MODE_DEFAULT VM_MODE_P40V48_4K
|
#define VM_MODE_DEFAULT VM_MODE_P40V48_4K
|
||||||
|
#elif defined(__x86_64__)
|
||||||
|
#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K
|
||||||
#else
|
#else
|
||||||
#define VM_MODE_DEFAULT VM_MODE_P52V48_4K
|
#define VM_MODE_DEFAULT VM_MODE_P52V48_4K
|
||||||
#endif
|
#endif
|
||||||
|
@ -60,8 +69,7 @@ int kvm_check_cap(long cap);
|
||||||
int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap);
|
int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap);
|
||||||
|
|
||||||
struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
|
struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
|
||||||
struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages,
|
struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
|
||||||
int perm, unsigned long type);
|
|
||||||
void kvm_vm_free(struct kvm_vm *vmp);
|
void kvm_vm_free(struct kvm_vm *vmp);
|
||||||
void kvm_vm_restart(struct kvm_vm *vmp, int perm);
|
void kvm_vm_restart(struct kvm_vm *vmp, int perm);
|
||||||
void kvm_vm_release(struct kvm_vm *vmp);
|
void kvm_vm_release(struct kvm_vm *vmp);
|
||||||
|
@ -146,6 +154,10 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code);
|
||||||
|
|
||||||
bool vm_is_unrestricted_guest(struct kvm_vm *vm);
|
bool vm_is_unrestricted_guest(struct kvm_vm *vm);
|
||||||
|
|
||||||
|
unsigned int vm_get_page_size(struct kvm_vm *vm);
|
||||||
|
unsigned int vm_get_page_shift(struct kvm_vm *vm);
|
||||||
|
unsigned int vm_get_max_gfn(struct kvm_vm *vm);
|
||||||
|
|
||||||
struct kvm_userspace_memory_region *
|
struct kvm_userspace_memory_region *
|
||||||
kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
|
kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
|
||||||
uint64_t end);
|
uint64_t end);
|
||||||
|
|
|
@ -325,6 +325,9 @@ uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index);
|
||||||
void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
|
void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
|
||||||
uint64_t msr_value);
|
uint64_t msr_value);
|
||||||
|
|
||||||
|
uint32_t kvm_get_cpuid_max(void);
|
||||||
|
void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Basic CPU control in CR0
|
* Basic CPU control in CR0
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -264,6 +264,9 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *ini
|
||||||
case VM_MODE_P52V48_4K:
|
case VM_MODE_P52V48_4K:
|
||||||
TEST_ASSERT(false, "AArch64 does not support 4K sized pages "
|
TEST_ASSERT(false, "AArch64 does not support 4K sized pages "
|
||||||
"with 52-bit physical address ranges");
|
"with 52-bit physical address ranges");
|
||||||
|
case VM_MODE_PXXV48_4K:
|
||||||
|
TEST_ASSERT(false, "AArch64 does not support 4K sized pages "
|
||||||
|
"with ANY-bit physical address ranges");
|
||||||
case VM_MODE_P52V48_64K:
|
case VM_MODE_P52V48_64K:
|
||||||
tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
|
tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
|
||||||
tcr_el1 |= 6ul << 32; /* IPS = 52 bits */
|
tcr_el1 |= 6ul << 32; /* IPS = 52 bits */
|
||||||
|
|
|
@ -8,6 +8,7 @@
|
||||||
#include "test_util.h"
|
#include "test_util.h"
|
||||||
#include "kvm_util.h"
|
#include "kvm_util.h"
|
||||||
#include "kvm_util_internal.h"
|
#include "kvm_util_internal.h"
|
||||||
|
#include "processor.h"
|
||||||
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <sys/mman.h>
|
#include <sys/mman.h>
|
||||||
|
@ -84,7 +85,7 @@ int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void vm_open(struct kvm_vm *vm, int perm, unsigned long type)
|
static void vm_open(struct kvm_vm *vm, int perm)
|
||||||
{
|
{
|
||||||
vm->kvm_fd = open(KVM_DEV_PATH, perm);
|
vm->kvm_fd = open(KVM_DEV_PATH, perm);
|
||||||
if (vm->kvm_fd < 0)
|
if (vm->kvm_fd < 0)
|
||||||
|
@ -95,18 +96,19 @@ static void vm_open(struct kvm_vm *vm, int perm, unsigned long type)
|
||||||
exit(KSFT_SKIP);
|
exit(KSFT_SKIP);
|
||||||
}
|
}
|
||||||
|
|
||||||
vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, type);
|
vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, vm->type);
|
||||||
TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, "
|
TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, "
|
||||||
"rc: %i errno: %i", vm->fd, errno);
|
"rc: %i errno: %i", vm->fd, errno);
|
||||||
}
|
}
|
||||||
|
|
||||||
const char * const vm_guest_mode_string[] = {
|
const char * const vm_guest_mode_string[] = {
|
||||||
"PA-bits:52, VA-bits:48, 4K pages",
|
"PA-bits:52, VA-bits:48, 4K pages",
|
||||||
"PA-bits:52, VA-bits:48, 64K pages",
|
"PA-bits:52, VA-bits:48, 64K pages",
|
||||||
"PA-bits:48, VA-bits:48, 4K pages",
|
"PA-bits:48, VA-bits:48, 4K pages",
|
||||||
"PA-bits:48, VA-bits:48, 64K pages",
|
"PA-bits:48, VA-bits:48, 64K pages",
|
||||||
"PA-bits:40, VA-bits:48, 4K pages",
|
"PA-bits:40, VA-bits:48, 4K pages",
|
||||||
"PA-bits:40, VA-bits:48, 64K pages",
|
"PA-bits:40, VA-bits:48, 64K pages",
|
||||||
|
"PA-bits:ANY, VA-bits:48, 4K pages",
|
||||||
};
|
};
|
||||||
_Static_assert(sizeof(vm_guest_mode_string)/sizeof(char *) == NUM_VM_MODES,
|
_Static_assert(sizeof(vm_guest_mode_string)/sizeof(char *) == NUM_VM_MODES,
|
||||||
"Missing new mode strings?");
|
"Missing new mode strings?");
|
||||||
|
@ -130,17 +132,17 @@ _Static_assert(sizeof(vm_guest_mode_string)/sizeof(char *) == NUM_VM_MODES,
|
||||||
* descriptor to control the created VM is created with the permissions
|
* descriptor to control the created VM is created with the permissions
|
||||||
* given by perm (e.g. O_RDWR).
|
* given by perm (e.g. O_RDWR).
|
||||||
*/
|
*/
|
||||||
struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages,
|
struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
|
||||||
int perm, unsigned long type)
|
|
||||||
{
|
{
|
||||||
struct kvm_vm *vm;
|
struct kvm_vm *vm;
|
||||||
|
|
||||||
|
DEBUG("Testing guest mode: %s\n", vm_guest_mode_string(mode));
|
||||||
|
|
||||||
vm = calloc(1, sizeof(*vm));
|
vm = calloc(1, sizeof(*vm));
|
||||||
TEST_ASSERT(vm != NULL, "Insufficient Memory");
|
TEST_ASSERT(vm != NULL, "Insufficient Memory");
|
||||||
|
|
||||||
vm->mode = mode;
|
vm->mode = mode;
|
||||||
vm->type = type;
|
vm->type = 0;
|
||||||
vm_open(vm, perm, type);
|
|
||||||
|
|
||||||
/* Setup mode specific traits. */
|
/* Setup mode specific traits. */
|
||||||
switch (vm->mode) {
|
switch (vm->mode) {
|
||||||
|
@ -186,10 +188,32 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages,
|
||||||
vm->page_size = 0x10000;
|
vm->page_size = 0x10000;
|
||||||
vm->page_shift = 16;
|
vm->page_shift = 16;
|
||||||
break;
|
break;
|
||||||
|
case VM_MODE_PXXV48_4K:
|
||||||
|
#ifdef __x86_64__
|
||||||
|
kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits);
|
||||||
|
TEST_ASSERT(vm->va_bits == 48, "Linear address width "
|
||||||
|
"(%d bits) not supported", vm->va_bits);
|
||||||
|
vm->pgtable_levels = 4;
|
||||||
|
vm->page_size = 0x1000;
|
||||||
|
vm->page_shift = 12;
|
||||||
|
DEBUG("Guest physical address width detected: %d\n",
|
||||||
|
vm->pa_bits);
|
||||||
|
#else
|
||||||
|
TEST_ASSERT(false, "VM_MODE_PXXV48_4K not supported on "
|
||||||
|
"non-x86 platforms");
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode);
|
TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef __aarch64__
|
||||||
|
if (vm->pa_bits != 40)
|
||||||
|
vm->type = KVM_VM_TYPE_ARM_IPA_SIZE(vm->pa_bits);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
vm_open(vm, perm);
|
||||||
|
|
||||||
/* Limit to VA-bit canonical virtual addresses. */
|
/* Limit to VA-bit canonical virtual addresses. */
|
||||||
vm->vpages_valid = sparsebit_alloc();
|
vm->vpages_valid = sparsebit_alloc();
|
||||||
sparsebit_set_num(vm->vpages_valid,
|
sparsebit_set_num(vm->vpages_valid,
|
||||||
|
@ -212,7 +236,7 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages,
|
||||||
|
|
||||||
struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
|
struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
|
||||||
{
|
{
|
||||||
return _vm_create(mode, phy_pages, perm, 0);
|
return _vm_create(mode, phy_pages, perm);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -232,7 +256,7 @@ void kvm_vm_restart(struct kvm_vm *vmp, int perm)
|
||||||
{
|
{
|
||||||
struct userspace_mem_region *region;
|
struct userspace_mem_region *region;
|
||||||
|
|
||||||
vm_open(vmp, perm, vmp->type);
|
vm_open(vmp, perm);
|
||||||
if (vmp->has_irqchip)
|
if (vmp->has_irqchip)
|
||||||
vm_create_irqchip(vmp);
|
vm_create_irqchip(vmp);
|
||||||
|
|
||||||
|
@ -1628,3 +1652,18 @@ bool vm_is_unrestricted_guest(struct kvm_vm *vm)
|
||||||
|
|
||||||
return val == 'Y';
|
return val == 'Y';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned int vm_get_page_size(struct kvm_vm *vm)
|
||||||
|
{
|
||||||
|
return vm->page_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned int vm_get_page_shift(struct kvm_vm *vm)
|
||||||
|
{
|
||||||
|
return vm->page_shift;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned int vm_get_max_gfn(struct kvm_vm *vm)
|
||||||
|
{
|
||||||
|
return vm->max_gfn;
|
||||||
|
}
|
||||||
|
|
|
@ -228,7 +228,7 @@ void sregs_dump(FILE *stream, struct kvm_sregs *sregs,
|
||||||
|
|
||||||
void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)
|
void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)
|
||||||
{
|
{
|
||||||
TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use "
|
TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
|
||||||
"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
|
"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
|
||||||
|
|
||||||
/* If needed, create page map l4 table. */
|
/* If needed, create page map l4 table. */
|
||||||
|
@ -261,7 +261,7 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
|
||||||
uint16_t index[4];
|
uint16_t index[4];
|
||||||
struct pageMapL4Entry *pml4e;
|
struct pageMapL4Entry *pml4e;
|
||||||
|
|
||||||
TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use "
|
TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
|
||||||
"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
|
"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
|
||||||
|
|
||||||
TEST_ASSERT((vaddr % vm->page_size) == 0,
|
TEST_ASSERT((vaddr % vm->page_size) == 0,
|
||||||
|
@ -547,7 +547,7 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
|
||||||
struct pageDirectoryEntry *pde;
|
struct pageDirectoryEntry *pde;
|
||||||
struct pageTableEntry *pte;
|
struct pageTableEntry *pte;
|
||||||
|
|
||||||
TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use "
|
TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
|
||||||
"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
|
"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
|
||||||
|
|
||||||
index[0] = (gva >> 12) & 0x1ffu;
|
index[0] = (gva >> 12) & 0x1ffu;
|
||||||
|
@ -621,7 +621,7 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_m
|
||||||
kvm_setup_gdt(vm, &sregs.gdt, gdt_memslot, pgd_memslot);
|
kvm_setup_gdt(vm, &sregs.gdt, gdt_memslot, pgd_memslot);
|
||||||
|
|
||||||
switch (vm->mode) {
|
switch (vm->mode) {
|
||||||
case VM_MODE_P52V48_4K:
|
case VM_MODE_PXXV48_4K:
|
||||||
sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG;
|
sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG;
|
||||||
sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR;
|
sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR;
|
||||||
sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
|
sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
|
||||||
|
@ -1157,3 +1157,25 @@ bool is_intel_cpu(void)
|
||||||
chunk = (const uint32_t *)("GenuineIntel");
|
chunk = (const uint32_t *)("GenuineIntel");
|
||||||
return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]);
|
return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint32_t kvm_get_cpuid_max(void)
|
||||||
|
{
|
||||||
|
return kvm_get_supported_cpuid_entry(0x80000000)->eax;
|
||||||
|
}
|
||||||
|
|
||||||
|
void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits)
|
||||||
|
{
|
||||||
|
struct kvm_cpuid_entry2 *entry;
|
||||||
|
bool pae;
|
||||||
|
|
||||||
|
/* SDM 4.1.4 */
|
||||||
|
if (kvm_get_cpuid_max() < 0x80000008) {
|
||||||
|
pae = kvm_get_supported_cpuid_entry(1)->edx & (1 << 6);
|
||||||
|
*pa_bits = pae ? 36 : 32;
|
||||||
|
*va_bits = 32;
|
||||||
|
} else {
|
||||||
|
entry = kvm_get_supported_cpuid_entry(0x80000008);
|
||||||
|
*pa_bits = entry->eax & 0xff;
|
||||||
|
*va_bits = (entry->eax >> 8) & 0xff;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -32,7 +32,7 @@ void ucall(uint64_t cmd, int nargs, ...)
|
||||||
va_end(va);
|
va_end(va);
|
||||||
|
|
||||||
asm volatile("in %[port], %%al"
|
asm volatile("in %[port], %%al"
|
||||||
: : [port] "d" (UCALL_PIO_PORT), "D" (&uc) : "rax");
|
: : [port] "d" (UCALL_PIO_PORT), "D" (&uc) : "rax", "memory");
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
|
uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
|
||||||
|
|
|
@ -26,6 +26,25 @@ static void guest_code(void)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int smt_possible(void)
|
||||||
|
{
|
||||||
|
char buf[16];
|
||||||
|
FILE *f;
|
||||||
|
bool res = 1;
|
||||||
|
|
||||||
|
f = fopen("/sys/devices/system/cpu/smt/control", "r");
|
||||||
|
if (f) {
|
||||||
|
if (fread(buf, sizeof(*buf), sizeof(buf), f) > 0) {
|
||||||
|
if (!strncmp(buf, "forceoff", 8) ||
|
||||||
|
!strncmp(buf, "notsupported", 12))
|
||||||
|
res = 0;
|
||||||
|
}
|
||||||
|
fclose(f);
|
||||||
|
}
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
|
static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
|
||||||
int evmcs_enabled)
|
int evmcs_enabled)
|
||||||
{
|
{
|
||||||
|
@ -59,6 +78,14 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
|
||||||
TEST_ASSERT(!entry->padding[0] && !entry->padding[1] &&
|
TEST_ASSERT(!entry->padding[0] && !entry->padding[1] &&
|
||||||
!entry->padding[2], "padding should be zero");
|
!entry->padding[2], "padding should be zero");
|
||||||
|
|
||||||
|
if (entry->function == 0x40000004) {
|
||||||
|
int nononarchcs = !!(entry->eax & (1UL << 18));
|
||||||
|
|
||||||
|
TEST_ASSERT(nononarchcs == !smt_possible(),
|
||||||
|
"NoNonArchitecturalCoreSharing bit"
|
||||||
|
" doesn't reflect SMT setting");
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If needed for debug:
|
* If needed for debug:
|
||||||
* fprintf(stdout,
|
* fprintf(stdout,
|
||||||
|
|
Загрузка…
Ссылка в новой задаче