Second batch of KVM changes for 4.11 merge window
PPC: * correct assumption about ASDR on POWER9 * fix MMIO emulation on POWER9 x86: * add a simple test for ioperm * cleanup TSS (going through KVM tree as the whole undertaking was caused by VMX's use of TSS) * fix nVMX interrupt delivery * fix some performance counters in the guest And two cleanup patches. -----BEGIN PGP SIGNATURE----- iQEcBAABCAAGBQJYuu5qAAoJEED/6hsPKofoRAUH/jkx/KFDcw3FggixysWVgRai iLSbbAZemnSLFSOkOU/t7Bz0fXCUgB0tAcMJd9ow01Dg1zObiTpuUIo6qEPaYHdX gqtUzlHuyECZEcgK0RXS9kDYLrvw7EFocxnDWQfV91qCZSS6nBSSLF3ST1rNV69W mUvcZG+MciDcZUe1lTexoswVTh1m7avvozEnQ5OHnZR9yicoXiadBQjzL6yqWoqf Ml/29zRk5+MvloTudxjkAKm3mh7psW88jNMh37TXbAA7i+Xwl9cU6GLR9mFWstoP 7Ot7ecq9mNAUO3lTIQh7lqvB60LMFznS4IlYK7MbplC3kvJLkfzhTWaN1aGvh90= =cqHo -----END PGP SIGNATURE----- Merge tag 'kvm-4.11-2' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull more KVM updates from Radim Krčmář: "Second batch of KVM changes for the 4.11 merge window: PPC: - correct assumption about ASDR on POWER9 - fix MMIO emulation on POWER9 x86: - add a simple test for ioperm - cleanup TSS (going through KVM tree as the whole undertaking was caused by VMX's use of TSS) - fix nVMX interrupt delivery - fix some performance counters in the guest ... and two cleanup patches" * tag 'kvm-4.11-2' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: nVMX: Fix pending events injection x86/kvm/vmx: remove unused variable in segment_base() selftests/x86: Add a basic selftest for ioperm x86/asm: Tidy up TSS limit code kvm: convert kvm.users_count from atomic_t to refcount_t KVM: x86: never specify a sample period for virtualized in_tx_cp counters KVM: PPC: Book3S HV: Don't use ASDR for real-mode HPT faults on POWER9 KVM: PPC: Book3S HV: Fix software walk of guest process page tables
This commit is contained in:
Коммит
2d62e0768d
|
@ -46,7 +46,7 @@ extern struct patb_entry *partition_tb;
|
|||
|
||||
/* Bits in patb0 field */
|
||||
#define PATB_HR (1UL << 63)
|
||||
#define RPDB_MASK 0x0ffffffffffff00fUL
|
||||
#define RPDB_MASK 0x0fffffffffffff00UL
|
||||
#define RPDB_SHIFT (1UL << 8)
|
||||
#define RTS1_SHIFT 61 /* top 2 bits of radix tree size */
|
||||
#define RTS1_MASK (3UL << RTS1_SHIFT)
|
||||
|
@ -57,6 +57,7 @@ extern struct patb_entry *partition_tb;
|
|||
/* Bits in patb1 field */
|
||||
#define PATB_GR (1UL << 63) /* guest uses radix; must match HR */
|
||||
#define PRTS_MASK 0x1f /* process table size field */
|
||||
#define PRTB_MASK 0x0ffffffffffff000UL
|
||||
|
||||
/*
|
||||
* Limit process table to PAGE_SIZE table. This
|
||||
|
|
|
@ -32,6 +32,7 @@ int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
|
|||
u32 pid;
|
||||
int ret, level, ps;
|
||||
__be64 prte, rpte;
|
||||
unsigned long ptbl;
|
||||
unsigned long root, pte, index;
|
||||
unsigned long rts, bits, offset;
|
||||
unsigned long gpa;
|
||||
|
@ -53,8 +54,8 @@ int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
|
|||
return -EINVAL;
|
||||
|
||||
/* Read partition table to find root of tree for effective PID */
|
||||
ret = kvm_read_guest(kvm, kvm->arch.process_table + pid * 16,
|
||||
&prte, sizeof(prte));
|
||||
ptbl = (kvm->arch.process_table & PRTB_MASK) + (pid * 16);
|
||||
ret = kvm_read_guest(kvm, ptbl, &prte, sizeof(prte));
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
|
|
|
@ -1787,12 +1787,12 @@ kvmppc_hdsi:
|
|||
/* HPTE not found fault or protection fault? */
|
||||
andis. r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h
|
||||
beq 1f /* if not, send it to the guest */
|
||||
andi. r0, r11, MSR_DR /* data relocation enabled? */
|
||||
beq 3f
|
||||
BEGIN_FTR_SECTION
|
||||
mfspr r5, SPRN_ASDR /* on POWER9, use ASDR to get VSID */
|
||||
b 4f
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
|
||||
andi. r0, r11, MSR_DR /* data relocation enabled? */
|
||||
beq 3f
|
||||
clrrdi r0, r4, 28
|
||||
PPC_SLBFEE_DOT(R5, R0) /* if so, look up SLB */
|
||||
li r0, BOOK3S_INTERRUPT_DATA_SEGMENT
|
||||
|
@ -1879,12 +1879,12 @@ kvmppc_hisi:
|
|||
bne .Lradix_hisi /* for radix, just save ASDR */
|
||||
andis. r0, r11, SRR1_ISI_NOPT@h
|
||||
beq 1f
|
||||
andi. r0, r11, MSR_IR /* instruction relocation enabled? */
|
||||
beq 3f
|
||||
BEGIN_FTR_SECTION
|
||||
mfspr r5, SPRN_ASDR /* on POWER9, use ASDR to get VSID */
|
||||
b 4f
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
|
||||
andi. r0, r11, MSR_IR /* instruction relocation enabled? */
|
||||
beq 3f
|
||||
clrrdi r0, r10, 28
|
||||
PPC_SLBFEE_DOT(R5, R0) /* if so, look up SLB */
|
||||
li r0, BOOK3S_INTERRUPT_INST_SEGMENT
|
||||
|
|
|
@ -205,6 +205,8 @@ static inline void native_load_tr_desc(void)
|
|||
asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
|
||||
}
|
||||
|
||||
DECLARE_PER_CPU(bool, __tss_limit_invalid);
|
||||
|
||||
static inline void force_reload_TR(void)
|
||||
{
|
||||
struct desc_struct *d = get_cpu_gdt_table(smp_processor_id());
|
||||
|
@ -220,18 +222,20 @@ static inline void force_reload_TR(void)
|
|||
write_gdt_entry(d, GDT_ENTRY_TSS, &tss, DESC_TSS);
|
||||
|
||||
load_TR_desc();
|
||||
this_cpu_write(__tss_limit_invalid, false);
|
||||
}
|
||||
|
||||
DECLARE_PER_CPU(bool, need_tr_refresh);
|
||||
|
||||
static inline void refresh_TR(void)
|
||||
/*
|
||||
* Call this if you need the TSS limit to be correct, which should be the case
|
||||
* if and only if you have TIF_IO_BITMAP set or you're switching to a task
|
||||
* with TIF_IO_BITMAP set.
|
||||
*/
|
||||
static inline void refresh_tss_limit(void)
|
||||
{
|
||||
DEBUG_LOCKS_WARN_ON(preemptible());
|
||||
|
||||
if (unlikely(this_cpu_read(need_tr_refresh))) {
|
||||
if (unlikely(this_cpu_read(__tss_limit_invalid)))
|
||||
force_reload_TR();
|
||||
this_cpu_write(need_tr_refresh, false);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -250,7 +254,7 @@ static inline void invalidate_tss_limit(void)
|
|||
if (unlikely(test_thread_flag(TIF_IO_BITMAP)))
|
||||
force_reload_TR();
|
||||
else
|
||||
this_cpu_write(need_tr_refresh, true);
|
||||
this_cpu_write(__tss_limit_invalid, true);
|
||||
}
|
||||
|
||||
static inline void native_load_gdt(const struct desc_ptr *dtr)
|
||||
|
|
|
@ -48,8 +48,14 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
|
|||
t->io_bitmap_ptr = bitmap;
|
||||
set_thread_flag(TIF_IO_BITMAP);
|
||||
|
||||
/*
|
||||
* Now that we have an IO bitmap, we need our TSS limit to be
|
||||
* correct. It's fine if we are preempted after doing this:
|
||||
* with TIF_IO_BITMAP set, context switches will keep our TSS
|
||||
* limit correct.
|
||||
*/
|
||||
preempt_disable();
|
||||
refresh_TR();
|
||||
refresh_tss_limit();
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
|
|
|
@ -69,8 +69,8 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
|
|||
};
|
||||
EXPORT_PER_CPU_SYMBOL(cpu_tss);
|
||||
|
||||
DEFINE_PER_CPU(bool, need_tr_refresh);
|
||||
EXPORT_PER_CPU_SYMBOL_GPL(need_tr_refresh);
|
||||
DEFINE_PER_CPU(bool, __tss_limit_invalid);
|
||||
EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid);
|
||||
|
||||
/*
|
||||
* this gets called so that we can store lazy state into memory and copy the
|
||||
|
@ -222,7 +222,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
|
|||
* Make sure that the TSS limit is correct for the CPU
|
||||
* to notice the IO bitmap.
|
||||
*/
|
||||
refresh_TR();
|
||||
refresh_tss_limit();
|
||||
} else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
|
||||
/*
|
||||
* Clear any possible leftover bits:
|
||||
|
|
|
@ -113,12 +113,19 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
|
|||
.config = config,
|
||||
};
|
||||
|
||||
attr.sample_period = (-pmc->counter) & pmc_bitmask(pmc);
|
||||
|
||||
if (in_tx)
|
||||
attr.config |= HSW_IN_TX;
|
||||
if (in_tx_cp)
|
||||
if (in_tx_cp) {
|
||||
/*
|
||||
* HSW_IN_TX_CHECKPOINTED is not supported with nonzero
|
||||
* period. Just clear the sample period so at least
|
||||
* allocating the counter doesn't fail.
|
||||
*/
|
||||
attr.sample_period = 0;
|
||||
attr.config |= HSW_IN_TX_CHECKPOINTED;
|
||||
|
||||
attr.sample_period = (-pmc->counter) & pmc_bitmask(pmc);
|
||||
}
|
||||
|
||||
event = perf_event_create_kernel_counter(&attr, -1, current,
|
||||
intr ? kvm_perf_overflow_intr :
|
||||
|
|
|
@ -2053,7 +2053,6 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
|
|||
static unsigned long segment_base(u16 selector)
|
||||
{
|
||||
struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
|
||||
struct desc_struct *d;
|
||||
struct desc_struct *table;
|
||||
unsigned long v;
|
||||
|
||||
|
@ -10642,6 +10641,11 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
|
|||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
|
||||
if (vcpu->arch.exception.pending ||
|
||||
vcpu->arch.nmi_injected ||
|
||||
vcpu->arch.interrupt.pending)
|
||||
return -EBUSY;
|
||||
|
||||
if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) &&
|
||||
vmx->nested.preemption_timer_expired) {
|
||||
if (vmx->nested.nested_run_pending)
|
||||
|
@ -10651,8 +10655,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
|
|||
}
|
||||
|
||||
if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) {
|
||||
if (vmx->nested.nested_run_pending ||
|
||||
vcpu->arch.interrupt.pending)
|
||||
if (vmx->nested.nested_run_pending)
|
||||
return -EBUSY;
|
||||
nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
|
||||
NMI_VECTOR | INTR_TYPE_NMI_INTR |
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#include <linux/context_tracking.h>
|
||||
#include <linux/irqbypass.h>
|
||||
#include <linux/swait.h>
|
||||
#include <linux/refcount.h>
|
||||
#include <asm/signal.h>
|
||||
|
||||
#include <linux/kvm.h>
|
||||
|
@ -401,7 +402,7 @@ struct kvm {
|
|||
#endif
|
||||
struct kvm_vm_stat stat;
|
||||
struct kvm_arch arch;
|
||||
atomic_t users_count;
|
||||
refcount_t users_count;
|
||||
#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
|
||||
struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
|
||||
spinlock_t ring_lock;
|
||||
|
|
|
@ -5,7 +5,7 @@ include ../lib.mk
|
|||
.PHONY: all all_32 all_64 warn_32bit_failure clean
|
||||
|
||||
TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \
|
||||
check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test \
|
||||
check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test ioperm \
|
||||
protection_keys test_vdso
|
||||
TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
|
||||
test_FCMOV test_FCOMI test_FISTTP \
|
||||
|
|
|
@ -0,0 +1,170 @@
|
|||
/*
|
||||
* ioperm.c - Test case for ioperm(2)
|
||||
* Copyright (c) 2015 Andrew Lutomirski
|
||||
*/
|
||||
|
||||
#define _GNU_SOURCE
|
||||
#include <err.h>
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <signal.h>
|
||||
#include <setjmp.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/wait.h>
|
||||
#include <stdbool.h>
|
||||
#include <sched.h>
|
||||
#include <sys/io.h>
|
||||
|
||||
static int nerrs = 0;
|
||||
|
||||
static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
|
||||
int flags)
|
||||
{
|
||||
struct sigaction sa;
|
||||
memset(&sa, 0, sizeof(sa));
|
||||
sa.sa_sigaction = handler;
|
||||
sa.sa_flags = SA_SIGINFO | flags;
|
||||
sigemptyset(&sa.sa_mask);
|
||||
if (sigaction(sig, &sa, 0))
|
||||
err(1, "sigaction");
|
||||
|
||||
}
|
||||
|
||||
static void clearhandler(int sig)
|
||||
{
|
||||
struct sigaction sa;
|
||||
memset(&sa, 0, sizeof(sa));
|
||||
sa.sa_handler = SIG_DFL;
|
||||
sigemptyset(&sa.sa_mask);
|
||||
if (sigaction(sig, &sa, 0))
|
||||
err(1, "sigaction");
|
||||
}
|
||||
|
||||
static jmp_buf jmpbuf;
|
||||
|
||||
static void sigsegv(int sig, siginfo_t *si, void *ctx_void)
|
||||
{
|
||||
siglongjmp(jmpbuf, 1);
|
||||
}
|
||||
|
||||
static bool try_outb(unsigned short port)
|
||||
{
|
||||
sethandler(SIGSEGV, sigsegv, SA_RESETHAND);
|
||||
if (sigsetjmp(jmpbuf, 1) != 0) {
|
||||
return false;
|
||||
} else {
|
||||
asm volatile ("outb %%al, %w[port]"
|
||||
: : [port] "Nd" (port), "a" (0));
|
||||
return true;
|
||||
}
|
||||
clearhandler(SIGSEGV);
|
||||
}
|
||||
|
||||
static void expect_ok(unsigned short port)
|
||||
{
|
||||
if (!try_outb(port)) {
|
||||
printf("[FAIL]\toutb to 0x%02hx failed\n", port);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
printf("[OK]\toutb to 0x%02hx worked\n", port);
|
||||
}
|
||||
|
||||
static void expect_gp(unsigned short port)
|
||||
{
|
||||
if (try_outb(port)) {
|
||||
printf("[FAIL]\toutb to 0x%02hx worked\n", port);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
printf("[OK]\toutb to 0x%02hx failed\n", port);
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
cpu_set_t cpuset;
|
||||
CPU_ZERO(&cpuset);
|
||||
CPU_SET(0, &cpuset);
|
||||
if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
|
||||
err(1, "sched_setaffinity to CPU 0");
|
||||
|
||||
expect_gp(0x80);
|
||||
expect_gp(0xed);
|
||||
|
||||
/*
|
||||
* Probe for ioperm support. Note that clearing ioperm bits
|
||||
* works even as nonroot.
|
||||
*/
|
||||
printf("[RUN]\tenable 0x80\n");
|
||||
if (ioperm(0x80, 1, 1) != 0) {
|
||||
printf("[OK]\tioperm(0x80, 1, 1) failed (%d) -- try running as root\n",
|
||||
errno);
|
||||
return 0;
|
||||
}
|
||||
expect_ok(0x80);
|
||||
expect_gp(0xed);
|
||||
|
||||
printf("[RUN]\tdisable 0x80\n");
|
||||
if (ioperm(0x80, 1, 0) != 0) {
|
||||
printf("[FAIL]\tioperm(0x80, 1, 0) failed (%d)", errno);
|
||||
return 1;
|
||||
}
|
||||
expect_gp(0x80);
|
||||
expect_gp(0xed);
|
||||
|
||||
/* Make sure that fork() preserves ioperm. */
|
||||
if (ioperm(0x80, 1, 1) != 0) {
|
||||
printf("[FAIL]\tioperm(0x80, 1, 0) failed (%d)", errno);
|
||||
return 1;
|
||||
}
|
||||
|
||||
pid_t child = fork();
|
||||
if (child == -1)
|
||||
err(1, "fork");
|
||||
|
||||
if (child == 0) {
|
||||
printf("[RUN]\tchild: check that we inherited permissions\n");
|
||||
expect_ok(0x80);
|
||||
expect_gp(0xed);
|
||||
return 0;
|
||||
} else {
|
||||
int status;
|
||||
if (waitpid(child, &status, 0) != child ||
|
||||
!WIFEXITED(status)) {
|
||||
printf("[FAIL]\tChild died\n");
|
||||
nerrs++;
|
||||
} else if (WEXITSTATUS(status) != 0) {
|
||||
printf("[FAIL]\tChild failed\n");
|
||||
nerrs++;
|
||||
} else {
|
||||
printf("[OK]\tChild succeeded\n");
|
||||
}
|
||||
}
|
||||
|
||||
/* Test the capability checks. */
|
||||
|
||||
printf("\tDrop privileges\n");
|
||||
if (setresuid(1, 1, 1) != 0) {
|
||||
printf("[WARN]\tDropping privileges failed\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
printf("[RUN]\tdisable 0x80\n");
|
||||
if (ioperm(0x80, 1, 0) != 0) {
|
||||
printf("[FAIL]\tioperm(0x80, 1, 0) failed (%d)", errno);
|
||||
return 1;
|
||||
}
|
||||
printf("[OK]\tit worked\n");
|
||||
|
||||
printf("[RUN]\tenable 0x80 again\n");
|
||||
if (ioperm(0x80, 1, 1) == 0) {
|
||||
printf("[FAIL]\tit succeeded but should have failed.\n");
|
||||
return 1;
|
||||
}
|
||||
printf("[OK]\tit failed\n");
|
||||
return 0;
|
||||
}
|
|
@ -619,7 +619,7 @@ static struct kvm *kvm_create_vm(unsigned long type)
|
|||
mutex_init(&kvm->lock);
|
||||
mutex_init(&kvm->irq_lock);
|
||||
mutex_init(&kvm->slots_lock);
|
||||
atomic_set(&kvm->users_count, 1);
|
||||
refcount_set(&kvm->users_count, 1);
|
||||
INIT_LIST_HEAD(&kvm->devices);
|
||||
|
||||
r = kvm_arch_init_vm(kvm, type);
|
||||
|
@ -749,13 +749,13 @@ static void kvm_destroy_vm(struct kvm *kvm)
|
|||
|
||||
void kvm_get_kvm(struct kvm *kvm)
|
||||
{
|
||||
atomic_inc(&kvm->users_count);
|
||||
refcount_inc(&kvm->users_count);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_get_kvm);
|
||||
|
||||
void kvm_put_kvm(struct kvm *kvm)
|
||||
{
|
||||
if (atomic_dec_and_test(&kvm->users_count))
|
||||
if (refcount_dec_and_test(&kvm->users_count))
|
||||
kvm_destroy_vm(kvm);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_put_kvm);
|
||||
|
@ -3641,7 +3641,7 @@ static int kvm_debugfs_open(struct inode *inode, struct file *file,
|
|||
* To avoid the race between open and the removal of the debugfs
|
||||
* directory we test against the users count.
|
||||
*/
|
||||
if (!atomic_add_unless(&stat_data->kvm->users_count, 1, 0))
|
||||
if (!refcount_inc_not_zero(&stat_data->kvm->users_count))
|
||||
return -ENOENT;
|
||||
|
||||
if (simple_attr_open(inode, file, get, set, fmt)) {
|
||||
|
|
Загрузка…
Ссылка в новой задаче