KVM: MMU: fast invalidate all mmio sptes
This patch tries to introduce a very simple and scale way to invalidate all mmio sptes - it need not walk any shadow pages and hold mmu-lock KVM maintains a global mmio valid generation-number which is stored in kvm->memslots.generation and every mmio spte stores the current global generation-number into his available bits when it is created When KVM need zap all mmio sptes, it just simply increase the global generation-number. When guests do mmio access, KVM intercepts a MMIO #PF then it walks the shadow page table and get the mmio spte. If the generation-number on the spte does not equal the global generation-number, it will go to the normal #PF handler to update the mmio spte Since 19 bits are used to store generation-number on mmio spte, we zap all mmio sptes when the number is round Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com> Reviewed-by: Gleb Natapov <gleb@redhat.com> Reviewed-by: Marcelo Tosatti <mtosatti@redhat.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
Родитель
b37fbea6ce
Коммит
f8f559422b
|
@ -773,7 +773,7 @@ void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
|
||||||
struct kvm_memory_slot *slot,
|
struct kvm_memory_slot *slot,
|
||||||
gfn_t gfn_offset, unsigned long mask);
|
gfn_t gfn_offset, unsigned long mask);
|
||||||
void kvm_mmu_zap_all(struct kvm *kvm);
|
void kvm_mmu_zap_all(struct kvm *kvm);
|
||||||
void kvm_mmu_zap_mmio_sptes(struct kvm *kvm);
|
void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm);
|
||||||
unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
|
unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
|
||||||
void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
|
void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
|
||||||
|
|
||||||
|
|
|
@ -205,9 +205,11 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
|
||||||
#define MMIO_SPTE_GEN_LOW_SHIFT 3
|
#define MMIO_SPTE_GEN_LOW_SHIFT 3
|
||||||
#define MMIO_SPTE_GEN_HIGH_SHIFT 52
|
#define MMIO_SPTE_GEN_HIGH_SHIFT 52
|
||||||
|
|
||||||
|
#define MMIO_GEN_SHIFT 19
|
||||||
#define MMIO_GEN_LOW_SHIFT 9
|
#define MMIO_GEN_LOW_SHIFT 9
|
||||||
#define MMIO_GEN_LOW_MASK ((1 << MMIO_GEN_LOW_SHIFT) - 1)
|
#define MMIO_GEN_LOW_MASK ((1 << MMIO_GEN_LOW_SHIFT) - 1)
|
||||||
#define MMIO_MAX_GEN ((1 << 19) - 1)
|
#define MMIO_GEN_MASK ((1 << MMIO_GEN_SHIFT) - 1)
|
||||||
|
#define MMIO_MAX_GEN ((1 << MMIO_GEN_SHIFT) - 1)
|
||||||
|
|
||||||
static u64 generation_mmio_spte_mask(unsigned int gen)
|
static u64 generation_mmio_spte_mask(unsigned int gen)
|
||||||
{
|
{
|
||||||
|
@ -231,17 +233,23 @@ static unsigned int get_mmio_spte_generation(u64 spte)
|
||||||
return gen;
|
return gen;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static unsigned int kvm_current_mmio_generation(struct kvm *kvm)
|
||||||
|
{
|
||||||
|
return kvm_memslots(kvm)->generation & MMIO_GEN_MASK;
|
||||||
|
}
|
||||||
|
|
||||||
static void mark_mmio_spte(struct kvm *kvm, u64 *sptep, u64 gfn,
|
static void mark_mmio_spte(struct kvm *kvm, u64 *sptep, u64 gfn,
|
||||||
unsigned access)
|
unsigned access)
|
||||||
{
|
{
|
||||||
struct kvm_mmu_page *sp = page_header(__pa(sptep));
|
struct kvm_mmu_page *sp = page_header(__pa(sptep));
|
||||||
u64 mask = generation_mmio_spte_mask(0);
|
unsigned int gen = kvm_current_mmio_generation(kvm);
|
||||||
|
u64 mask = generation_mmio_spte_mask(gen);
|
||||||
|
|
||||||
access &= ACC_WRITE_MASK | ACC_USER_MASK;
|
access &= ACC_WRITE_MASK | ACC_USER_MASK;
|
||||||
mask |= shadow_mmio_mask | access | gfn << PAGE_SHIFT;
|
mask |= shadow_mmio_mask | access | gfn << PAGE_SHIFT;
|
||||||
sp->mmio_cached = true;
|
sp->mmio_cached = true;
|
||||||
|
|
||||||
trace_mark_mmio_spte(sptep, gfn, access, 0);
|
trace_mark_mmio_spte(sptep, gfn, access, gen);
|
||||||
mmu_spte_set(sptep, mask);
|
mmu_spte_set(sptep, mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -273,6 +281,12 @@ static bool set_mmio_spte(struct kvm *kvm, u64 *sptep, gfn_t gfn,
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool check_mmio_spte(struct kvm *kvm, u64 spte)
|
||||||
|
{
|
||||||
|
return likely(get_mmio_spte_generation(spte) ==
|
||||||
|
kvm_current_mmio_generation(kvm));
|
||||||
|
}
|
||||||
|
|
||||||
static inline u64 rsvd_bits(int s, int e)
|
static inline u64 rsvd_bits(int s, int e)
|
||||||
{
|
{
|
||||||
return ((1ULL << (e - s + 1)) - 1) << s;
|
return ((1ULL << (e - s + 1)) - 1) << s;
|
||||||
|
@ -3237,6 +3251,9 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct)
|
||||||
gfn_t gfn = get_mmio_spte_gfn(spte);
|
gfn_t gfn = get_mmio_spte_gfn(spte);
|
||||||
unsigned access = get_mmio_spte_access(spte);
|
unsigned access = get_mmio_spte_access(spte);
|
||||||
|
|
||||||
|
if (!check_mmio_spte(vcpu->kvm, spte))
|
||||||
|
return RET_MMIO_PF_INVALID;
|
||||||
|
|
||||||
if (direct)
|
if (direct)
|
||||||
addr = 0;
|
addr = 0;
|
||||||
|
|
||||||
|
@ -3278,8 +3295,12 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
|
||||||
|
|
||||||
pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code);
|
pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code);
|
||||||
|
|
||||||
if (unlikely(error_code & PFERR_RSVD_MASK))
|
if (unlikely(error_code & PFERR_RSVD_MASK)) {
|
||||||
return handle_mmio_page_fault(vcpu, gva, error_code, true);
|
r = handle_mmio_page_fault(vcpu, gva, error_code, true);
|
||||||
|
|
||||||
|
if (likely(r != RET_MMIO_PF_INVALID))
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
r = mmu_topup_memory_caches(vcpu);
|
r = mmu_topup_memory_caches(vcpu);
|
||||||
if (r)
|
if (r)
|
||||||
|
@ -3355,8 +3376,12 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
|
||||||
ASSERT(vcpu);
|
ASSERT(vcpu);
|
||||||
ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa));
|
ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa));
|
||||||
|
|
||||||
if (unlikely(error_code & PFERR_RSVD_MASK))
|
if (unlikely(error_code & PFERR_RSVD_MASK)) {
|
||||||
return handle_mmio_page_fault(vcpu, gpa, error_code, true);
|
r = handle_mmio_page_fault(vcpu, gpa, error_code, true);
|
||||||
|
|
||||||
|
if (likely(r != RET_MMIO_PF_INVALID))
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
r = mmu_topup_memory_caches(vcpu);
|
r = mmu_topup_memory_caches(vcpu);
|
||||||
if (r)
|
if (r)
|
||||||
|
@ -4329,7 +4354,7 @@ void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm)
|
||||||
spin_unlock(&kvm->mmu_lock);
|
spin_unlock(&kvm->mmu_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
void kvm_mmu_zap_mmio_sptes(struct kvm *kvm)
|
static void kvm_mmu_zap_mmio_sptes(struct kvm *kvm)
|
||||||
{
|
{
|
||||||
struct kvm_mmu_page *sp, *node;
|
struct kvm_mmu_page *sp, *node;
|
||||||
LIST_HEAD(invalid_list);
|
LIST_HEAD(invalid_list);
|
||||||
|
@ -4352,6 +4377,19 @@ static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm)
|
||||||
return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages));
|
return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* The very rare case: if the generation-number is round,
|
||||||
|
* zap all shadow pages.
|
||||||
|
*
|
||||||
|
* The max value is MMIO_MAX_GEN - 1 since it is not called
|
||||||
|
* when mark memslot invalid.
|
||||||
|
*/
|
||||||
|
if (unlikely(kvm_current_mmio_generation(kvm) >= (MMIO_MAX_GEN - 1)))
|
||||||
|
kvm_mmu_zap_mmio_sptes(kvm);
|
||||||
|
}
|
||||||
|
|
||||||
static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
|
static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
|
||||||
{
|
{
|
||||||
struct kvm *kvm;
|
struct kvm *kvm;
|
||||||
|
|
|
@ -56,12 +56,15 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask);
|
||||||
/*
|
/*
|
||||||
* Return values of handle_mmio_page_fault_common:
|
* Return values of handle_mmio_page_fault_common:
|
||||||
* RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction
|
* RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction
|
||||||
* directly.
|
* directly.
|
||||||
|
* RET_MMIO_PF_INVALID: invalid spte is detected then let the real page
|
||||||
|
* fault path update the mmio spte.
|
||||||
* RET_MMIO_PF_RETRY: let CPU fault again on the address.
|
* RET_MMIO_PF_RETRY: let CPU fault again on the address.
|
||||||
* RET_MMIO_PF_BUG: bug is detected.
|
* RET_MMIO_PF_BUG: bug is detected.
|
||||||
*/
|
*/
|
||||||
enum {
|
enum {
|
||||||
RET_MMIO_PF_EMULATE = 1,
|
RET_MMIO_PF_EMULATE = 1,
|
||||||
|
RET_MMIO_PF_INVALID = 2,
|
||||||
RET_MMIO_PF_RETRY = 0,
|
RET_MMIO_PF_RETRY = 0,
|
||||||
RET_MMIO_PF_BUG = -1
|
RET_MMIO_PF_BUG = -1
|
||||||
};
|
};
|
||||||
|
|
|
@ -552,9 +552,12 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
|
||||||
|
|
||||||
pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
|
pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
|
||||||
|
|
||||||
if (unlikely(error_code & PFERR_RSVD_MASK))
|
if (unlikely(error_code & PFERR_RSVD_MASK)) {
|
||||||
return handle_mmio_page_fault(vcpu, addr, error_code,
|
r = handle_mmio_page_fault(vcpu, addr, error_code,
|
||||||
mmu_is_nested(vcpu));
|
mmu_is_nested(vcpu));
|
||||||
|
if (likely(r != RET_MMIO_PF_INVALID))
|
||||||
|
return r;
|
||||||
|
};
|
||||||
|
|
||||||
r = mmu_topup_memory_caches(vcpu);
|
r = mmu_topup_memory_caches(vcpu);
|
||||||
if (r)
|
if (r)
|
||||||
|
|
|
@ -5369,6 +5369,10 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
|
||||||
if (likely(ret == RET_MMIO_PF_EMULATE))
|
if (likely(ret == RET_MMIO_PF_EMULATE))
|
||||||
return x86_emulate_instruction(vcpu, gpa, 0, NULL, 0) ==
|
return x86_emulate_instruction(vcpu, gpa, 0, NULL, 0) ==
|
||||||
EMULATE_DONE;
|
EMULATE_DONE;
|
||||||
|
|
||||||
|
if (unlikely(ret == RET_MMIO_PF_INVALID))
|
||||||
|
return kvm_mmu_page_fault(vcpu, gpa, 0, NULL, 0);
|
||||||
|
|
||||||
if (unlikely(ret == RET_MMIO_PF_RETRY))
|
if (unlikely(ret == RET_MMIO_PF_RETRY))
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
|
|
|
@ -7084,8 +7084,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
|
||||||
* If memory slot is created, or moved, we need to clear all
|
* If memory slot is created, or moved, we need to clear all
|
||||||
* mmio sptes.
|
* mmio sptes.
|
||||||
*/
|
*/
|
||||||
if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE))
|
kvm_mmu_invalidate_mmio_sptes(kvm);
|
||||||
kvm_mmu_zap_mmio_sptes(kvm);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void kvm_arch_flush_shadow_all(struct kvm *kvm)
|
void kvm_arch_flush_shadow_all(struct kvm *kvm)
|
||||||
|
|
Загрузка…
Ссылка в новой задаче