Merge branch kvm-arm64/mmu/mte into kvmarm-master/next
KVM/arm64 support for MTE, courtesy of Steven Price. It allows the guest to use memory tagging, and offers a new userspace API to save/restore the tags. * kvm-arm64/mmu/mte: KVM: arm64: Document MTE capability and ioctl KVM: arm64: Add ioctl to fetch/store tags in a guest KVM: arm64: Expose KVM_ARM_CAP_MTE KVM: arm64: Save/restore MTE registers KVM: arm64: Introduce MTE VM feature arm64: mte: Sync tags for pages where PTE is untagged Signed-off-by: Marc Zyngier <maz@kernel.org>
This commit is contained in:
Коммит
9f03db6673
|
@ -5034,6 +5034,43 @@ see KVM_XEN_VCPU_SET_ATTR above.
|
|||
The KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST type may not be used
|
||||
with the KVM_XEN_VCPU_GET_ATTR ioctl.
|
||||
|
||||
4.130 KVM_ARM_MTE_COPY_TAGS
|
||||
---------------------------
|
||||
|
||||
:Capability: KVM_CAP_ARM_MTE
|
||||
:Architectures: arm64
|
||||
:Type: vm ioctl
|
||||
:Parameters: struct kvm_arm_copy_mte_tags
|
||||
:Returns: number of bytes copied, < 0 on error (-EINVAL for incorrect
|
||||
arguments, -EFAULT if memory cannot be accessed).
|
||||
|
||||
::
|
||||
|
||||
struct kvm_arm_copy_mte_tags {
|
||||
__u64 guest_ipa;
|
||||
__u64 length;
|
||||
void __user *addr;
|
||||
__u64 flags;
|
||||
__u64 reserved[2];
|
||||
};
|
||||
|
||||
Copies Memory Tagging Extension (MTE) tags to/from guest tag memory. The
|
||||
``guest_ipa`` and ``length`` fields must be ``PAGE_SIZE`` aligned. The ``addr``
|
||||
field must point to a buffer which the tags will be copied to or from.
|
||||
|
||||
``flags`` specifies the direction of copy, either ``KVM_ARM_TAGS_TO_GUEST`` or
|
||||
``KVM_ARM_TAGS_FROM_GUEST``.
|
||||
|
||||
The size of the buffer to store the tags is ``(length / 16)`` bytes
|
||||
(granules in MTE are 16 bytes long). Each byte contains a single tag
|
||||
value. This matches the format of ``PTRACE_PEEKMTETAGS`` and
|
||||
``PTRACE_POKEMTETAGS``.
|
||||
|
||||
If an error occurs before any data is copied then a negative error code is
|
||||
returned. If some tags have been copied before an error occurs then the number
|
||||
of bytes successfully copied is returned. If the call completes successfully
|
||||
then ``length`` is returned.
|
||||
|
||||
5. The kvm_run structure
|
||||
========================
|
||||
|
||||
|
@ -6362,6 +6399,30 @@ default.
|
|||
|
||||
See Documentation/x86/sgx/2.Kernel-internals.rst for more details.
|
||||
|
||||
7.26 KVM_CAP_ARM_MTE
|
||||
--------------------
|
||||
|
||||
:Architectures: arm64
|
||||
:Parameters: none
|
||||
|
||||
This capability indicates that KVM (and the hardware) supports exposing the
|
||||
Memory Tagging Extensions (MTE) to the guest. It must also be enabled by the
|
||||
VMM before creating any VCPUs to allow the guest access. Note that MTE is only
|
||||
available to a guest running in AArch64 mode and enabling this capability will
|
||||
cause attempts to create AArch32 VCPUs to fail.
|
||||
|
||||
When enabled the guest is able to access tags associated with any memory given
|
||||
to the guest. KVM will ensure that the tags are maintained during swap or
|
||||
hibernation of the host; however the VMM needs to manually save/restore the
|
||||
tags as appropriate if the VM is migrated.
|
||||
|
||||
When this capability is enabled all memory in memslots must be mapped as
|
||||
not-shareable (no MAP_SHARED), attempts to create a memslot with a
|
||||
MAP_SHARED mmap will result in an -EINVAL return.
|
||||
|
||||
When enabled the VMM may make use of the ``KVM_ARM_MTE_COPY_TAGS`` ioctl to
|
||||
perform a bulk copy of tags to/from the guest.
|
||||
|
||||
8. Other capabilities.
|
||||
======================
|
||||
|
||||
|
|
|
@ -12,7 +12,8 @@
|
|||
#include <asm/types.h>
|
||||
|
||||
/* Hyp Configuration Register (HCR) bits */
|
||||
#define HCR_ATA (UL(1) << 56)
|
||||
#define HCR_ATA_SHIFT 56
|
||||
#define HCR_ATA (UL(1) << HCR_ATA_SHIFT)
|
||||
#define HCR_FWB (UL(1) << 46)
|
||||
#define HCR_API (UL(1) << 41)
|
||||
#define HCR_APK (UL(1) << 40)
|
||||
|
|
|
@ -84,6 +84,9 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
|
|||
if (cpus_have_const_cap(ARM64_MISMATCHED_CACHE_TYPE) ||
|
||||
vcpu_el1_is_32bit(vcpu))
|
||||
vcpu->arch.hcr_el2 |= HCR_TID2;
|
||||
|
||||
if (kvm_has_mte(vcpu->kvm))
|
||||
vcpu->arch.hcr_el2 |= HCR_ATA;
|
||||
}
|
||||
|
||||
static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu)
|
||||
|
|
|
@ -133,6 +133,9 @@ struct kvm_arch {
|
|||
|
||||
u8 pfr0_csv2;
|
||||
u8 pfr0_csv3;
|
||||
|
||||
/* Memory Tagging Extension enabled for the guest */
|
||||
bool mte_enabled;
|
||||
};
|
||||
|
||||
struct kvm_vcpu_fault_info {
|
||||
|
@ -207,6 +210,12 @@ enum vcpu_sysreg {
|
|||
CNTP_CVAL_EL0,
|
||||
CNTP_CTL_EL0,
|
||||
|
||||
/* Memory Tagging Extension registers */
|
||||
RGSR_EL1, /* Random Allocation Tag Seed Register */
|
||||
GCR_EL1, /* Tag Control Register */
|
||||
TFSR_EL1, /* Tag Fault Status Register (EL1) */
|
||||
TFSRE0_EL1, /* Tag Fault Status Register (EL0) */
|
||||
|
||||
/* 32bit specific registers. Keep them at the end of the range */
|
||||
DACR32_EL2, /* Domain Access Control Register */
|
||||
IFSR32_EL2, /* Instruction Fault Status Register */
|
||||
|
@ -722,6 +731,9 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
|
|||
int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
|
||||
struct kvm_device_attr *attr);
|
||||
|
||||
long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
|
||||
struct kvm_arm_copy_mte_tags *copy_tags);
|
||||
|
||||
/* Guest/host FPSIMD coordination helpers */
|
||||
int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu);
|
||||
void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu);
|
||||
|
@ -770,6 +782,7 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
|
|||
#define kvm_arm_vcpu_sve_finalized(vcpu) \
|
||||
((vcpu)->arch.flags & KVM_ARM64_VCPU_SVE_FINALIZED)
|
||||
|
||||
#define kvm_has_mte(kvm) (system_supports_mte() && (kvm)->arch.mte_enabled)
|
||||
#define kvm_vcpu_has_pmu(vcpu) \
|
||||
(test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features))
|
||||
|
||||
|
|
|
@ -0,0 +1,66 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright (C) 2020-2021 ARM Ltd.
|
||||
*/
|
||||
#ifndef __ASM_KVM_MTE_H
|
||||
#define __ASM_KVM_MTE_H
|
||||
|
||||
#ifdef __ASSEMBLY__
|
||||
|
||||
#include <asm/sysreg.h>
|
||||
|
||||
#ifdef CONFIG_ARM64_MTE
|
||||
|
||||
.macro mte_switch_to_guest g_ctxt, h_ctxt, reg1
|
||||
alternative_if_not ARM64_MTE
|
||||
b .L__skip_switch\@
|
||||
alternative_else_nop_endif
|
||||
mrs \reg1, hcr_el2
|
||||
tbz \reg1, #(HCR_ATA_SHIFT), .L__skip_switch\@
|
||||
|
||||
mrs_s \reg1, SYS_RGSR_EL1
|
||||
str \reg1, [\h_ctxt, #CPU_RGSR_EL1]
|
||||
mrs_s \reg1, SYS_GCR_EL1
|
||||
str \reg1, [\h_ctxt, #CPU_GCR_EL1]
|
||||
|
||||
ldr \reg1, [\g_ctxt, #CPU_RGSR_EL1]
|
||||
msr_s SYS_RGSR_EL1, \reg1
|
||||
ldr \reg1, [\g_ctxt, #CPU_GCR_EL1]
|
||||
msr_s SYS_GCR_EL1, \reg1
|
||||
|
||||
.L__skip_switch\@:
|
||||
.endm
|
||||
|
||||
.macro mte_switch_to_hyp g_ctxt, h_ctxt, reg1
|
||||
alternative_if_not ARM64_MTE
|
||||
b .L__skip_switch\@
|
||||
alternative_else_nop_endif
|
||||
mrs \reg1, hcr_el2
|
||||
tbz \reg1, #(HCR_ATA_SHIFT), .L__skip_switch\@
|
||||
|
||||
mrs_s \reg1, SYS_RGSR_EL1
|
||||
str \reg1, [\g_ctxt, #CPU_RGSR_EL1]
|
||||
mrs_s \reg1, SYS_GCR_EL1
|
||||
str \reg1, [\g_ctxt, #CPU_GCR_EL1]
|
||||
|
||||
ldr \reg1, [\h_ctxt, #CPU_RGSR_EL1]
|
||||
msr_s SYS_RGSR_EL1, \reg1
|
||||
ldr \reg1, [\h_ctxt, #CPU_GCR_EL1]
|
||||
msr_s SYS_GCR_EL1, \reg1
|
||||
|
||||
isb
|
||||
|
||||
.L__skip_switch\@:
|
||||
.endm
|
||||
|
||||
#else /* !CONFIG_ARM64_MTE */
|
||||
|
||||
.macro mte_switch_to_guest g_ctxt, h_ctxt, reg1
|
||||
.endm
|
||||
|
||||
.macro mte_switch_to_hyp g_ctxt, h_ctxt, reg1
|
||||
.endm
|
||||
|
||||
#endif /* CONFIG_ARM64_MTE */
|
||||
#endif /* __ASSEMBLY__ */
|
||||
#endif /* __ASM_KVM_MTE_H */
|
|
@ -7,6 +7,7 @@
|
|||
|
||||
#define MTE_GRANULE_SIZE UL(16)
|
||||
#define MTE_GRANULE_MASK (~(MTE_GRANULE_SIZE - 1))
|
||||
#define MTE_GRANULES_PER_PAGE (PAGE_SIZE / MTE_GRANULE_SIZE)
|
||||
#define MTE_TAG_SHIFT 56
|
||||
#define MTE_TAG_SIZE 4
|
||||
#define MTE_TAG_MASK GENMASK((MTE_TAG_SHIFT + (MTE_TAG_SIZE - 1)), MTE_TAG_SHIFT)
|
||||
|
|
|
@ -37,7 +37,7 @@ void mte_free_tag_storage(char *storage);
|
|||
/* track which pages have valid allocation tags */
|
||||
#define PG_mte_tagged PG_arch_2
|
||||
|
||||
void mte_sync_tags(pte_t *ptep, pte_t pte);
|
||||
void mte_sync_tags(pte_t old_pte, pte_t pte);
|
||||
void mte_copy_page_tags(void *kto, const void *kfrom);
|
||||
void mte_thread_init_user(void);
|
||||
void mte_thread_switch(struct task_struct *next);
|
||||
|
@ -53,7 +53,7 @@ int mte_ptrace_copy_tags(struct task_struct *child, long request,
|
|||
/* unused if !CONFIG_ARM64_MTE, silence the compiler */
|
||||
#define PG_mte_tagged 0
|
||||
|
||||
static inline void mte_sync_tags(pte_t *ptep, pte_t pte)
|
||||
static inline void mte_sync_tags(pte_t old_pte, pte_t pte)
|
||||
{
|
||||
}
|
||||
static inline void mte_copy_page_tags(void *kto, const void *kfrom)
|
||||
|
|
|
@ -314,9 +314,25 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
|
|||
if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
|
||||
__sync_icache_dcache(pte);
|
||||
|
||||
if (system_supports_mte() &&
|
||||
pte_present(pte) && pte_tagged(pte) && !pte_special(pte))
|
||||
mte_sync_tags(ptep, pte);
|
||||
/*
|
||||
* If the PTE would provide user space access to the tags associated
|
||||
* with it then ensure that the MTE tags are synchronised. Although
|
||||
* pte_access_permitted() returns false for exec only mappings, they
|
||||
* don't expose tags (instruction fetches don't check tags).
|
||||
*/
|
||||
if (system_supports_mte() && pte_access_permitted(pte, false) &&
|
||||
!pte_special(pte)) {
|
||||
pte_t old_pte = READ_ONCE(*ptep);
|
||||
/*
|
||||
* We only need to synchronise if the new PTE has tags enabled
|
||||
* or if swapping in (in which case another mapping may have
|
||||
* set tags in the past even if this PTE isn't tagged).
|
||||
* (!pte_none() && !pte_present()) is an open coded version of
|
||||
* is_swap_pte()
|
||||
*/
|
||||
if (pte_tagged(pte) || (!pte_none(old_pte) && !pte_present(old_pte)))
|
||||
mte_sync_tags(old_pte, pte);
|
||||
}
|
||||
|
||||
__check_racy_pte_update(mm, ptep, pte);
|
||||
|
||||
|
|
|
@ -651,7 +651,8 @@
|
|||
|
||||
#define INIT_SCTLR_EL2_MMU_ON \
|
||||
(SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA | SCTLR_ELx_I | \
|
||||
SCTLR_ELx_IESB | SCTLR_ELx_WXN | ENDIAN_SET_EL2 | SCTLR_EL2_RES1)
|
||||
SCTLR_ELx_IESB | SCTLR_ELx_WXN | ENDIAN_SET_EL2 | \
|
||||
SCTLR_ELx_ITFSB | SCTLR_EL2_RES1)
|
||||
|
||||
#define INIT_SCTLR_EL2_MMU_OFF \
|
||||
(SCTLR_EL2_RES1 | ENDIAN_SET_EL2)
|
||||
|
|
|
@ -184,6 +184,17 @@ struct kvm_vcpu_events {
|
|||
__u32 reserved[12];
|
||||
};
|
||||
|
||||
struct kvm_arm_copy_mte_tags {
|
||||
__u64 guest_ipa;
|
||||
__u64 length;
|
||||
void __user *addr;
|
||||
__u64 flags;
|
||||
__u64 reserved[2];
|
||||
};
|
||||
|
||||
#define KVM_ARM_TAGS_TO_GUEST 0
|
||||
#define KVM_ARM_TAGS_FROM_GUEST 1
|
||||
|
||||
/* If you need to interpret the index values, here is the key: */
|
||||
#define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000
|
||||
#define KVM_REG_ARM_COPROC_SHIFT 16
|
||||
|
|
|
@ -111,6 +111,8 @@ int main(void)
|
|||
DEFINE(VCPU_WORKAROUND_FLAGS, offsetof(struct kvm_vcpu, arch.workaround_flags));
|
||||
DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2));
|
||||
DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_cpu_context, regs));
|
||||
DEFINE(CPU_RGSR_EL1, offsetof(struct kvm_cpu_context, sys_regs[RGSR_EL1]));
|
||||
DEFINE(CPU_GCR_EL1, offsetof(struct kvm_cpu_context, sys_regs[GCR_EL1]));
|
||||
DEFINE(CPU_APIAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIAKEYLO_EL1]));
|
||||
DEFINE(CPU_APIBKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIBKEYLO_EL1]));
|
||||
DEFINE(CPU_APDAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APDAKEYLO_EL1]));
|
||||
|
|
|
@ -32,10 +32,9 @@ DEFINE_STATIC_KEY_FALSE(mte_async_mode);
|
|||
EXPORT_SYMBOL_GPL(mte_async_mode);
|
||||
#endif
|
||||
|
||||
static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap)
|
||||
static void mte_sync_page_tags(struct page *page, pte_t old_pte,
|
||||
bool check_swap, bool pte_is_tagged)
|
||||
{
|
||||
pte_t old_pte = READ_ONCE(*ptep);
|
||||
|
||||
if (check_swap && is_swap_pte(old_pte)) {
|
||||
swp_entry_t entry = pte_to_swp_entry(old_pte);
|
||||
|
||||
|
@ -43,6 +42,9 @@ static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap)
|
|||
return;
|
||||
}
|
||||
|
||||
if (!pte_is_tagged)
|
||||
return;
|
||||
|
||||
page_kasan_tag_reset(page);
|
||||
/*
|
||||
* We need smp_wmb() in between setting the flags and clearing the
|
||||
|
@ -55,16 +57,22 @@ static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap)
|
|||
mte_clear_page_tags(page_address(page));
|
||||
}
|
||||
|
||||
void mte_sync_tags(pte_t *ptep, pte_t pte)
|
||||
void mte_sync_tags(pte_t old_pte, pte_t pte)
|
||||
{
|
||||
struct page *page = pte_page(pte);
|
||||
long i, nr_pages = compound_nr(page);
|
||||
bool check_swap = nr_pages == 1;
|
||||
bool pte_is_tagged = pte_tagged(pte);
|
||||
|
||||
/* Early out if there's nothing to do */
|
||||
if (!check_swap && !pte_is_tagged)
|
||||
return;
|
||||
|
||||
/* if PG_mte_tagged is set, tags have already been initialised */
|
||||
for (i = 0; i < nr_pages; i++, page++) {
|
||||
if (!test_and_set_bit(PG_mte_tagged, &page->flags))
|
||||
mte_sync_page_tags(page, ptep, check_swap);
|
||||
mte_sync_page_tags(page, old_pte, check_swap,
|
||||
pte_is_tagged);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -93,6 +93,12 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
|
|||
r = 0;
|
||||
kvm->arch.return_nisv_io_abort_to_user = true;
|
||||
break;
|
||||
case KVM_CAP_ARM_MTE:
|
||||
if (!system_supports_mte() || kvm->created_vcpus)
|
||||
return -EINVAL;
|
||||
r = 0;
|
||||
kvm->arch.mte_enabled = true;
|
||||
break;
|
||||
default:
|
||||
r = -EINVAL;
|
||||
break;
|
||||
|
@ -237,6 +243,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
|||
*/
|
||||
r = 1;
|
||||
break;
|
||||
case KVM_CAP_ARM_MTE:
|
||||
r = system_supports_mte();
|
||||
break;
|
||||
case KVM_CAP_STEAL_TIME:
|
||||
r = kvm_arm_pvtime_supported();
|
||||
break;
|
||||
|
@ -1354,6 +1363,13 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
|||
|
||||
return 0;
|
||||
}
|
||||
case KVM_ARM_MTE_COPY_TAGS: {
|
||||
struct kvm_arm_copy_mte_tags copy_tags;
|
||||
|
||||
if (copy_from_user(©_tags, argp, sizeof(copy_tags)))
|
||||
return -EFAULT;
|
||||
return kvm_vm_ioctl_mte_copy_tags(kvm, ©_tags);
|
||||
}
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
|
|
@ -995,3 +995,85 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
|
|||
|
||||
return ret;
|
||||
}
|
||||
|
||||
long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
|
||||
struct kvm_arm_copy_mte_tags *copy_tags)
|
||||
{
|
||||
gpa_t guest_ipa = copy_tags->guest_ipa;
|
||||
size_t length = copy_tags->length;
|
||||
void __user *tags = copy_tags->addr;
|
||||
gpa_t gfn;
|
||||
bool write = !(copy_tags->flags & KVM_ARM_TAGS_FROM_GUEST);
|
||||
int ret = 0;
|
||||
|
||||
if (!kvm_has_mte(kvm))
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_tags->reserved[0] || copy_tags->reserved[1])
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_tags->flags & ~KVM_ARM_TAGS_FROM_GUEST)
|
||||
return -EINVAL;
|
||||
|
||||
if (length & ~PAGE_MASK || guest_ipa & ~PAGE_MASK)
|
||||
return -EINVAL;
|
||||
|
||||
gfn = gpa_to_gfn(guest_ipa);
|
||||
|
||||
mutex_lock(&kvm->slots_lock);
|
||||
|
||||
while (length > 0) {
|
||||
kvm_pfn_t pfn = gfn_to_pfn_prot(kvm, gfn, write, NULL);
|
||||
void *maddr;
|
||||
unsigned long num_tags;
|
||||
struct page *page;
|
||||
|
||||
if (is_error_noslot_pfn(pfn)) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
page = pfn_to_online_page(pfn);
|
||||
if (!page) {
|
||||
/* Reject ZONE_DEVICE memory */
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
maddr = page_address(page);
|
||||
|
||||
if (!write) {
|
||||
if (test_bit(PG_mte_tagged, &page->flags))
|
||||
num_tags = mte_copy_tags_to_user(tags, maddr,
|
||||
MTE_GRANULES_PER_PAGE);
|
||||
else
|
||||
/* No tags in memory, so write zeros */
|
||||
num_tags = MTE_GRANULES_PER_PAGE -
|
||||
clear_user(tags, MTE_GRANULES_PER_PAGE);
|
||||
kvm_release_pfn_clean(pfn);
|
||||
} else {
|
||||
num_tags = mte_copy_tags_from_user(maddr, tags,
|
||||
MTE_GRANULES_PER_PAGE);
|
||||
kvm_release_pfn_dirty(pfn);
|
||||
}
|
||||
|
||||
if (num_tags != MTE_GRANULES_PER_PAGE) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Set the flag after checking the write completed fully */
|
||||
if (write)
|
||||
set_bit(PG_mte_tagged, &page->flags);
|
||||
|
||||
gfn++;
|
||||
tags += num_tags;
|
||||
length -= PAGE_SIZE;
|
||||
}
|
||||
|
||||
out:
|
||||
mutex_unlock(&kvm->slots_lock);
|
||||
/* If some data has been copied report the number of bytes copied */
|
||||
if (length != copy_tags->length)
|
||||
return copy_tags->length - length;
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
#include <asm/kvm_arm.h>
|
||||
#include <asm/kvm_asm.h>
|
||||
#include <asm/kvm_mmu.h>
|
||||
#include <asm/kvm_mte.h>
|
||||
#include <asm/kvm_ptrauth.h>
|
||||
|
||||
.text
|
||||
|
@ -51,6 +52,9 @@ alternative_else_nop_endif
|
|||
|
||||
add x29, x0, #VCPU_CONTEXT
|
||||
|
||||
// mte_switch_to_guest(g_ctxt, h_ctxt, tmp1)
|
||||
mte_switch_to_guest x29, x1, x2
|
||||
|
||||
// Macro ptrauth_switch_to_guest format:
|
||||
// ptrauth_switch_to_guest(guest cxt, tmp1, tmp2, tmp3)
|
||||
// The below macro to restore guest keys is not implemented in C code
|
||||
|
@ -142,6 +146,9 @@ SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL)
|
|||
// when this feature is enabled for kernel code.
|
||||
ptrauth_switch_to_hyp x1, x2, x3, x4, x5
|
||||
|
||||
// mte_switch_to_hyp(g_ctxt, h_ctxt, reg1)
|
||||
mte_switch_to_hyp x1, x2, x3
|
||||
|
||||
// Restore hyp's sp_el0
|
||||
restore_sp_el0 x2, x3
|
||||
|
||||
|
|
|
@ -112,7 +112,8 @@ static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode,
|
|||
new |= (old & PSR_C_BIT);
|
||||
new |= (old & PSR_V_BIT);
|
||||
|
||||
// TODO: TCO (if/when ARMv8.5-MemTag is exposed to guests)
|
||||
if (kvm_has_mte(vcpu->kvm))
|
||||
new |= PSR_TCO_BIT;
|
||||
|
||||
new |= (old & PSR_DIT_BIT);
|
||||
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
#include <asm/kvm_asm.h>
|
||||
#include <asm/kvm_emulate.h>
|
||||
#include <asm/kvm_hyp.h>
|
||||
#include <asm/kvm_mmu.h>
|
||||
|
||||
static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
|
||||
{
|
||||
|
@ -26,6 +27,16 @@ static inline void __sysreg_save_user_state(struct kvm_cpu_context *ctxt)
|
|||
ctxt_sys_reg(ctxt, TPIDRRO_EL0) = read_sysreg(tpidrro_el0);
|
||||
}
|
||||
|
||||
static inline bool ctxt_has_mte(struct kvm_cpu_context *ctxt)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = ctxt->__hyp_running_vcpu;
|
||||
|
||||
if (!vcpu)
|
||||
vcpu = container_of(ctxt, struct kvm_vcpu, arch.ctxt);
|
||||
|
||||
return kvm_has_mte(kern_hyp_va(vcpu->kvm));
|
||||
}
|
||||
|
||||
static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
|
||||
{
|
||||
ctxt_sys_reg(ctxt, CSSELR_EL1) = read_sysreg(csselr_el1);
|
||||
|
@ -46,6 +57,11 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
|
|||
ctxt_sys_reg(ctxt, PAR_EL1) = read_sysreg_par();
|
||||
ctxt_sys_reg(ctxt, TPIDR_EL1) = read_sysreg(tpidr_el1);
|
||||
|
||||
if (ctxt_has_mte(ctxt)) {
|
||||
ctxt_sys_reg(ctxt, TFSR_EL1) = read_sysreg_el1(SYS_TFSR);
|
||||
ctxt_sys_reg(ctxt, TFSRE0_EL1) = read_sysreg_s(SYS_TFSRE0_EL1);
|
||||
}
|
||||
|
||||
ctxt_sys_reg(ctxt, SP_EL1) = read_sysreg(sp_el1);
|
||||
ctxt_sys_reg(ctxt, ELR_EL1) = read_sysreg_el1(SYS_ELR);
|
||||
ctxt_sys_reg(ctxt, SPSR_EL1) = read_sysreg_el1(SYS_SPSR);
|
||||
|
@ -107,6 +123,11 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
|
|||
write_sysreg(ctxt_sys_reg(ctxt, PAR_EL1), par_el1);
|
||||
write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL1), tpidr_el1);
|
||||
|
||||
if (ctxt_has_mte(ctxt)) {
|
||||
write_sysreg_el1(ctxt_sys_reg(ctxt, TFSR_EL1), SYS_TFSR);
|
||||
write_sysreg_s(ctxt_sys_reg(ctxt, TFSRE0_EL1), SYS_TFSRE0_EL1);
|
||||
}
|
||||
|
||||
if (!has_vhe() &&
|
||||
cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT) &&
|
||||
ctxt->__hyp_running_vcpu) {
|
||||
|
|
|
@ -853,6 +853,45 @@ static int get_vma_page_shift(struct vm_area_struct *vma, unsigned long hva)
|
|||
return PAGE_SHIFT;
|
||||
}
|
||||
|
||||
/*
|
||||
* The page will be mapped in stage 2 as Normal Cacheable, so the VM will be
|
||||
* able to see the page's tags and therefore they must be initialised first. If
|
||||
* PG_mte_tagged is set, tags have already been initialised.
|
||||
*
|
||||
* The race in the test/set of the PG_mte_tagged flag is handled by:
|
||||
* - preventing VM_SHARED mappings in a memslot with MTE preventing two VMs
|
||||
* racing to santise the same page
|
||||
* - mmap_lock protects between a VM faulting a page in and the VMM performing
|
||||
* an mprotect() to add VM_MTE
|
||||
*/
|
||||
static int sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn,
|
||||
unsigned long size)
|
||||
{
|
||||
unsigned long i, nr_pages = size >> PAGE_SHIFT;
|
||||
struct page *page;
|
||||
|
||||
if (!kvm_has_mte(kvm))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* pfn_to_online_page() is used to reject ZONE_DEVICE pages
|
||||
* that may not support tags.
|
||||
*/
|
||||
page = pfn_to_online_page(pfn);
|
||||
|
||||
if (!page)
|
||||
return -EFAULT;
|
||||
|
||||
for (i = 0; i < nr_pages; i++, page++) {
|
||||
if (!test_bit(PG_mte_tagged, &page->flags)) {
|
||||
mte_clear_page_tags(page_address(page));
|
||||
set_bit(PG_mte_tagged, &page->flags);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
struct kvm_memory_slot *memslot, unsigned long hva,
|
||||
unsigned long fault_status)
|
||||
|
@ -861,6 +900,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
|||
bool write_fault, writable, force_pte = false;
|
||||
bool exec_fault;
|
||||
bool device = false;
|
||||
bool shared;
|
||||
unsigned long mmu_seq;
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
|
||||
|
@ -907,6 +947,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
|||
vma_shift = get_vma_page_shift(vma, hva);
|
||||
}
|
||||
|
||||
shared = (vma->vm_flags & VM_PFNMAP);
|
||||
|
||||
switch (vma_shift) {
|
||||
#ifndef __PAGETABLE_PMD_FOLDED
|
||||
case PUD_SHIFT:
|
||||
|
@ -1011,6 +1053,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
|||
if (vma_pagesize == PAGE_SIZE && !(force_pte || device))
|
||||
vma_pagesize = transparent_hugepage_adjust(memslot, hva,
|
||||
&pfn, &fault_ipa);
|
||||
|
||||
if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) {
|
||||
/* Check the VMM hasn't introduced a new VM_SHARED VMA */
|
||||
if (!shared)
|
||||
ret = sanitise_mte_tags(kvm, pfn, vma_pagesize);
|
||||
else
|
||||
ret = -EFAULT;
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (writable)
|
||||
prot |= KVM_PGTABLE_PROT_W;
|
||||
|
||||
|
@ -1206,12 +1259,17 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
|
|||
bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
|
||||
{
|
||||
kvm_pfn_t pfn = pte_pfn(range->pte);
|
||||
int ret;
|
||||
|
||||
if (!kvm->arch.mmu.pgt)
|
||||
return false;
|
||||
|
||||
WARN_ON(range->end - range->start != 1);
|
||||
|
||||
ret = sanitise_mte_tags(kvm, pfn, PAGE_SIZE);
|
||||
if (ret)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* We've moved a page around, probably through CoW, so let's treat
|
||||
* it just like a translation fault and the map handler will clean
|
||||
|
@ -1414,6 +1472,14 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
|||
if (!vma)
|
||||
break;
|
||||
|
||||
/*
|
||||
* VM_SHARED mappings are not allowed with MTE to avoid races
|
||||
* when updating the PG_mte_tagged page flag, see
|
||||
* sanitise_mte_tags for more details.
|
||||
*/
|
||||
if (kvm_has_mte(kvm) && vma->vm_flags & VM_SHARED)
|
||||
return -EINVAL;
|
||||
|
||||
if (vma->vm_flags & VM_PFNMAP) {
|
||||
/* IO region dirty page logging not allowed */
|
||||
if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) {
|
||||
|
|
|
@ -176,6 +176,10 @@ static bool vcpu_allowed_register_width(struct kvm_vcpu *vcpu)
|
|||
if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1) && is32bit)
|
||||
return false;
|
||||
|
||||
/* MTE is incompatible with AArch32 */
|
||||
if (kvm_has_mte(vcpu->kvm) && is32bit)
|
||||
return false;
|
||||
|
||||
/* Check that the vcpus are either all 32bit or all 64bit */
|
||||
kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
|
||||
if (vcpu_has_feature(tmp, KVM_ARM_VCPU_EL1_32BIT) != is32bit)
|
||||
|
|
|
@ -1047,6 +1047,13 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
|
|||
break;
|
||||
case SYS_ID_AA64PFR1_EL1:
|
||||
val &= ~FEATURE(ID_AA64PFR1_MTE);
|
||||
if (kvm_has_mte(vcpu->kvm)) {
|
||||
u64 pfr, mte;
|
||||
|
||||
pfr = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1);
|
||||
mte = cpuid_feature_extract_unsigned_field(pfr, ID_AA64PFR1_MTE_SHIFT);
|
||||
val |= FIELD_PREP(FEATURE(ID_AA64PFR1_MTE), mte);
|
||||
}
|
||||
break;
|
||||
case SYS_ID_AA64ISAR1_EL1:
|
||||
if (!vcpu_has_ptrauth(vcpu))
|
||||
|
@ -1302,6 +1309,23 @@ static bool access_ccsidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
|||
return true;
|
||||
}
|
||||
|
||||
static unsigned int mte_visibility(const struct kvm_vcpu *vcpu,
|
||||
const struct sys_reg_desc *rd)
|
||||
{
|
||||
if (kvm_has_mte(vcpu->kvm))
|
||||
return 0;
|
||||
|
||||
return REG_HIDDEN;
|
||||
}
|
||||
|
||||
#define MTE_REG(name) { \
|
||||
SYS_DESC(SYS_##name), \
|
||||
.access = undef_access, \
|
||||
.reset = reset_unknown, \
|
||||
.reg = name, \
|
||||
.visibility = mte_visibility, \
|
||||
}
|
||||
|
||||
/* sys_reg_desc initialiser for known cpufeature ID registers */
|
||||
#define ID_SANITISED(name) { \
|
||||
SYS_DESC(SYS_##name), \
|
||||
|
@ -1470,8 +1494,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
|
|||
{ SYS_DESC(SYS_ACTLR_EL1), access_actlr, reset_actlr, ACTLR_EL1 },
|
||||
{ SYS_DESC(SYS_CPACR_EL1), NULL, reset_val, CPACR_EL1, 0 },
|
||||
|
||||
{ SYS_DESC(SYS_RGSR_EL1), undef_access },
|
||||
{ SYS_DESC(SYS_GCR_EL1), undef_access },
|
||||
MTE_REG(RGSR_EL1),
|
||||
MTE_REG(GCR_EL1),
|
||||
|
||||
{ SYS_DESC(SYS_ZCR_EL1), NULL, reset_val, ZCR_EL1, 0, .visibility = sve_visibility },
|
||||
{ SYS_DESC(SYS_TRFCR_EL1), undef_access },
|
||||
|
@ -1498,8 +1522,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
|
|||
{ SYS_DESC(SYS_ERXMISC0_EL1), trap_raz_wi },
|
||||
{ SYS_DESC(SYS_ERXMISC1_EL1), trap_raz_wi },
|
||||
|
||||
{ SYS_DESC(SYS_TFSR_EL1), undef_access },
|
||||
{ SYS_DESC(SYS_TFSRE0_EL1), undef_access },
|
||||
MTE_REG(TFSR_EL1),
|
||||
MTE_REG(TFSRE0_EL1),
|
||||
|
||||
{ SYS_DESC(SYS_FAR_EL1), access_vm_reg, reset_unknown, FAR_EL1 },
|
||||
{ SYS_DESC(SYS_PAR_EL1), NULL, reset_unknown, PAR_EL1 },
|
||||
|
|
|
@ -1083,6 +1083,7 @@ struct kvm_ppc_resize_hpt {
|
|||
#define KVM_CAP_SGX_ATTRIBUTE 196
|
||||
#define KVM_CAP_VM_COPY_ENC_CONTEXT_FROM 197
|
||||
#define KVM_CAP_PTP_KVM 198
|
||||
#define KVM_CAP_ARM_MTE 199
|
||||
|
||||
#ifdef KVM_CAP_IRQ_ROUTING
|
||||
|
||||
|
@ -1428,6 +1429,7 @@ struct kvm_s390_ucas_mapping {
|
|||
/* Available with KVM_CAP_PMU_EVENT_FILTER */
|
||||
#define KVM_SET_PMU_EVENT_FILTER _IOW(KVMIO, 0xb2, struct kvm_pmu_event_filter)
|
||||
#define KVM_PPC_SVM_OFF _IO(KVMIO, 0xb3)
|
||||
#define KVM_ARM_MTE_COPY_TAGS _IOR(KVMIO, 0xb4, struct kvm_arm_copy_mte_tags)
|
||||
|
||||
/* ioctl for vm fd */
|
||||
#define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device)
|
||||
|
|
Загрузка…
Ссылка в новой задаче