KVM: x86: Report host tsc and realtime values in KVM_GET_CLOCK
Handling the migration of TSCs correctly is difficult, in part because Linux does not provide userspace with the ability to retrieve a (TSC, realtime) clock pair for a single instant in time. In lieu of a more convenient facility, KVM can report similar information in the kvm_clock structure. Provide userspace with a host TSC & realtime pair iff the realtime clock is based on the TSC. If userspace provides KVM_SET_CLOCK with a valid realtime value, advance the KVM clock by the amount of elapsed time. Do not step the KVM clock backwards, though, as it is a monotonic oscillator. Suggested-by: Paolo Bonzini <pbonzini@redhat.com> Signed-off-by: Oliver Upton <oupton@google.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Message-Id: <20210916181538.968978-5-oupton@google.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
Родитель
3d5e7a28b1
Коммит
c68dc1b577
|
@ -1010,20 +1010,37 @@ such as migration.
|
|||
When KVM_CAP_ADJUST_CLOCK is passed to KVM_CHECK_EXTENSION, it returns the
|
||||
set of bits that KVM can return in struct kvm_clock_data's flag member.
|
||||
|
||||
The only flag defined now is KVM_CLOCK_TSC_STABLE. If set, the returned
|
||||
value is the exact kvmclock value seen by all VCPUs at the instant
|
||||
when KVM_GET_CLOCK was called. If clear, the returned value is simply
|
||||
CLOCK_MONOTONIC plus a constant offset; the offset can be modified
|
||||
with KVM_SET_CLOCK. KVM will try to make all VCPUs follow this clock,
|
||||
but the exact value read by each VCPU could differ, because the host
|
||||
TSC is not stable.
|
||||
The following flags are defined:
|
||||
|
||||
KVM_CLOCK_TSC_STABLE
|
||||
If set, the returned value is the exact kvmclock
|
||||
value seen by all VCPUs at the instant when KVM_GET_CLOCK was called.
|
||||
If clear, the returned value is simply CLOCK_MONOTONIC plus a constant
|
||||
offset; the offset can be modified with KVM_SET_CLOCK. KVM will try
|
||||
to make all VCPUs follow this clock, but the exact value read by each
|
||||
VCPU could differ, because the host TSC is not stable.
|
||||
|
||||
KVM_CLOCK_REALTIME
|
||||
If set, the `realtime` field in the kvm_clock_data
|
||||
structure is populated with the value of the host's real time
|
||||
clocksource at the instant when KVM_GET_CLOCK was called. If clear,
|
||||
the `realtime` field does not contain a value.
|
||||
|
||||
KVM_CLOCK_HOST_TSC
|
||||
If set, the `host_tsc` field in the kvm_clock_data
|
||||
structure is populated with the value of the host's timestamp counter (TSC)
|
||||
at the instant when KVM_GET_CLOCK was called. If clear, the `host_tsc` field
|
||||
does not contain a value.
|
||||
|
||||
::
|
||||
|
||||
struct kvm_clock_data {
|
||||
__u64 clock; /* kvmclock current value */
|
||||
__u32 flags;
|
||||
__u32 pad[9];
|
||||
__u32 pad0;
|
||||
__u64 realtime;
|
||||
__u64 host_tsc;
|
||||
__u32 pad[4];
|
||||
};
|
||||
|
||||
|
||||
|
@ -1040,12 +1057,25 @@ Sets the current timestamp of kvmclock to the value specified in its parameter.
|
|||
In conjunction with KVM_GET_CLOCK, it is used to ensure monotonicity on scenarios
|
||||
such as migration.
|
||||
|
||||
The following flags can be passed:
|
||||
|
||||
KVM_CLOCK_REALTIME
|
||||
If set, KVM will compare the value of the `realtime` field
|
||||
with the value of the host's real time clocksource at the instant when
|
||||
KVM_SET_CLOCK was called. The difference in elapsed time is added to the final
|
||||
kvmclock value that will be provided to guests.
|
||||
|
||||
Other flags returned by ``KVM_GET_CLOCK`` are accepted but ignored.
|
||||
|
||||
::
|
||||
|
||||
struct kvm_clock_data {
|
||||
__u64 clock; /* kvmclock current value */
|
||||
__u32 flags;
|
||||
__u32 pad[9];
|
||||
__u32 pad0;
|
||||
__u64 realtime;
|
||||
__u64 host_tsc;
|
||||
__u32 pad[4];
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -1942,4 +1942,7 @@ int kvm_cpu_dirty_log_size(void);
|
|||
|
||||
int alloc_all_memslots_rmaps(struct kvm *kvm);
|
||||
|
||||
#define KVM_CLOCK_VALID_FLAGS \
|
||||
(KVM_CLOCK_TSC_STABLE | KVM_CLOCK_REALTIME | KVM_CLOCK_HOST_TSC)
|
||||
|
||||
#endif /* _ASM_X86_KVM_HOST_H */
|
||||
|
|
|
@ -2787,6 +2787,7 @@ static void get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data)
|
|||
struct pvclock_vcpu_time_info hv_clock;
|
||||
unsigned long flags;
|
||||
|
||||
data->flags = 0;
|
||||
spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
|
||||
if (!ka->use_master_clock) {
|
||||
spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
|
||||
|
@ -2803,10 +2804,20 @@ static void get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data)
|
|||
get_cpu();
|
||||
|
||||
if (__this_cpu_read(cpu_tsc_khz)) {
|
||||
#ifdef CONFIG_X86_64
|
||||
struct timespec64 ts;
|
||||
|
||||
if (kvm_get_walltime_and_clockread(&ts, &data->host_tsc)) {
|
||||
data->realtime = ts.tv_nsec + NSEC_PER_SEC * ts.tv_sec;
|
||||
data->flags |= KVM_CLOCK_REALTIME | KVM_CLOCK_HOST_TSC;
|
||||
} else
|
||||
#endif
|
||||
data->host_tsc = rdtsc();
|
||||
|
||||
kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
|
||||
&hv_clock.tsc_shift,
|
||||
&hv_clock.tsc_to_system_mul);
|
||||
data->clock = __pvclock_read_cycles(&hv_clock, rdtsc());
|
||||
data->clock = __pvclock_read_cycles(&hv_clock, data->host_tsc);
|
||||
} else {
|
||||
data->clock = get_kvmclock_base_ns() + ka->kvmclock_offset;
|
||||
}
|
||||
|
@ -2818,12 +2829,6 @@ u64 get_kvmclock_ns(struct kvm *kvm)
|
|||
{
|
||||
struct kvm_clock_data data;
|
||||
|
||||
/*
|
||||
* Zero flags as it's accessed RMW, leave everything else uninitialized
|
||||
* as clock is always written and no other fields are consumed.
|
||||
*/
|
||||
data.flags = 0;
|
||||
|
||||
get_kvmclock(kvm, &data);
|
||||
return data.clock;
|
||||
}
|
||||
|
@ -4050,7 +4055,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
|||
r = KVM_SYNC_X86_VALID_FIELDS;
|
||||
break;
|
||||
case KVM_CAP_ADJUST_CLOCK:
|
||||
r = KVM_CLOCK_TSC_STABLE;
|
||||
r = KVM_CLOCK_VALID_FLAGS;
|
||||
break;
|
||||
case KVM_CAP_X86_DISABLE_EXITS:
|
||||
r |= KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_PAUSE |
|
||||
|
@ -5847,12 +5852,16 @@ static int kvm_vm_ioctl_set_clock(struct kvm *kvm, void __user *argp)
|
|||
{
|
||||
struct kvm_arch *ka = &kvm->arch;
|
||||
struct kvm_clock_data data;
|
||||
u64 now_ns;
|
||||
u64 now_raw_ns;
|
||||
|
||||
if (copy_from_user(&data, argp, sizeof(data)))
|
||||
return -EFAULT;
|
||||
|
||||
if (data.flags)
|
||||
/*
|
||||
* Only KVM_CLOCK_REALTIME is used, but allow passing the
|
||||
* result of KVM_GET_CLOCK back to KVM_SET_CLOCK.
|
||||
*/
|
||||
if (data.flags & ~KVM_CLOCK_VALID_FLAGS)
|
||||
return -EINVAL;
|
||||
|
||||
kvm_hv_invalidate_tsc_page(kvm);
|
||||
|
@ -5866,11 +5875,21 @@ static int kvm_vm_ioctl_set_clock(struct kvm *kvm, void __user *argp)
|
|||
* is slightly ahead) here we risk going negative on unsigned
|
||||
* 'system_time' when 'data.clock' is very small.
|
||||
*/
|
||||
if (kvm->arch.use_master_clock)
|
||||
now_ns = ka->master_kernel_ns;
|
||||
if (data.flags & KVM_CLOCK_REALTIME) {
|
||||
u64 now_real_ns = ktime_get_real_ns();
|
||||
|
||||
/*
|
||||
* Avoid stepping the kvmclock backwards.
|
||||
*/
|
||||
if (now_real_ns > data.realtime)
|
||||
data.clock += now_real_ns - data.realtime;
|
||||
}
|
||||
|
||||
if (ka->use_master_clock)
|
||||
now_raw_ns = ka->master_kernel_ns;
|
||||
else
|
||||
now_ns = get_kvmclock_base_ns();
|
||||
ka->kvmclock_offset = data.clock - now_ns;
|
||||
now_raw_ns = get_kvmclock_base_ns();
|
||||
ka->kvmclock_offset = data.clock - now_raw_ns;
|
||||
kvm_end_pvclock_update(kvm);
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -1231,11 +1231,16 @@ struct kvm_irqfd {
|
|||
|
||||
/* Do not use 1, KVM_CHECK_EXTENSION returned it before we had flags. */
|
||||
#define KVM_CLOCK_TSC_STABLE 2
|
||||
#define KVM_CLOCK_REALTIME (1 << 2)
|
||||
#define KVM_CLOCK_HOST_TSC (1 << 3)
|
||||
|
||||
struct kvm_clock_data {
|
||||
__u64 clock;
|
||||
__u32 flags;
|
||||
__u32 pad[9];
|
||||
__u32 pad0;
|
||||
__u64 realtime;
|
||||
__u64 host_tsc;
|
||||
__u32 pad[4];
|
||||
};
|
||||
|
||||
/* For KVM_CAP_SW_TLB */
|
||||
|
|
Загрузка…
Ссылка в новой задаче