aboutsummaryrefslogtreecommitdiff
path: root/arch/arm/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm/kvm')
-rw-r--r--arch/arm/kvm/arm.c31
-rw-r--r--arch/arm/kvm/guest.c1
-rw-r--r--arch/arm/kvm/interrupts.S10
-rw-r--r--arch/arm/kvm/interrupts_head.S20
-rw-r--r--arch/arm/kvm/mmu.c100
5 files changed, 144 insertions, 18 deletions
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 7f35063c8882..23eda4c12251 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -213,6 +213,11 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
int err;
struct kvm_vcpu *vcpu;
+ if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) {
+ err = -EBUSY;
+ goto out;
+ }
+
vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
if (!vcpu) {
err = -ENOMEM;
@@ -419,6 +424,7 @@ static void update_vttbr(struct kvm *kvm)
static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
{
+ struct kvm *kvm = vcpu->kvm;
int ret;
if (likely(vcpu->arch.has_run_once))
@@ -430,12 +436,20 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
* Initialize the VGIC before running a vcpu the first time on
* this VM.
*/
- if (unlikely(!vgic_initialized(vcpu->kvm))) {
- ret = kvm_vgic_init(vcpu->kvm);
+ if (unlikely(!vgic_initialized(kvm))) {
+ ret = kvm_vgic_init(kvm);
if (ret)
return ret;
}
+ /*
+ * Enable the arch timers only if we have an in-kernel VGIC
+ * and it has been properly initialized, since we cannot handle
+ * interrupts from the virtual timer with a userspace gic.
+ */
+ if (irqchip_in_kernel(kvm) && vgic_initialized(kvm))
+ kvm_timer_enable(kvm);
+
return 0;
}
@@ -659,10 +673,21 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
return ret;
/*
+ * Ensure a rebooted VM will fault in RAM pages and detect if the
+ * guest MMU is turned off and flush the caches as needed.
+ */
+ if (vcpu->arch.has_run_once)
+ stage2_unmap_vm(vcpu->kvm);
+
+ vcpu_reset_hcr(vcpu);
+
+ /*
* Handle the "start in power-off" case by marking the VCPU as paused.
*/
- if (__test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
+ if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
vcpu->arch.pause = true;
+ else
+ vcpu->arch.pause = false;
return 0;
}
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index cc0b78769bd8..8c97208b9b97 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -38,7 +38,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
{
- vcpu->arch.hcr = HCR_GUEST_MASK;
return 0;
}
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index 01dcb0e752d9..d66d608f7ce7 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -159,13 +159,9 @@ __kvm_vcpu_return:
@ Don't trap coprocessor accesses for host kernel
set_hstr vmexit
set_hdcr vmexit
- set_hcptr vmexit, (HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11))
+ set_hcptr vmexit, (HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11)), after_vfp_restore
#ifdef CONFIG_VFPv3
- @ Save floating point registers we if let guest use them.
- tst r2, #(HCPTR_TCP(10) | HCPTR_TCP(11))
- bne after_vfp_restore
-
@ Switch VFP/NEON hardware state to the host's
add r7, vcpu, #VCPU_VFP_GUEST
store_vfp_state r7
@@ -177,6 +173,8 @@ after_vfp_restore:
@ Restore FPEXC_EN which we clobbered on entry
pop {r2}
VFPFMXR FPEXC, r2
+#else
+after_vfp_restore:
#endif
@ Reset Hyp-role
@@ -472,7 +470,7 @@ switch_to_guest_vfp:
push {r3-r7}
@ NEON/VFP used. Turn on VFP access.
- set_hcptr vmexit, (HCPTR_TCP(10) | HCPTR_TCP(11))
+ set_hcptr vmtrap, (HCPTR_TCP(10) | HCPTR_TCP(11))
@ Switch VFP/NEON hardware state to the guest's
add r7, r0, #VCPU_VFP_HOST
diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S
index 98c8c5b9a87f..f8f322102989 100644
--- a/arch/arm/kvm/interrupts_head.S
+++ b/arch/arm/kvm/interrupts_head.S
@@ -592,8 +592,13 @@ ARM_BE8(rev r6, r6 )
.endm
/* Configures the HCPTR (Hyp Coprocessor Trap Register) on entry/return
- * (hardware reset value is 0). Keep previous value in r2. */
-.macro set_hcptr operation, mask
+ * (hardware reset value is 0). Keep previous value in r2.
+ * An ISB is emited on vmexit/vmtrap, but executed on vmexit only if
+ * VFP wasn't already enabled (always executed on vmtrap).
+ * If a label is specified with vmexit, it is branched to if VFP wasn't
+ * enabled.
+ */
+.macro set_hcptr operation, mask, label = none
mrc p15, 4, r2, c1, c1, 2
ldr r3, =\mask
.if \operation == vmentry
@@ -602,6 +607,17 @@ ARM_BE8(rev r6, r6 )
bic r3, r2, r3 @ Don't trap defined coproc-accesses
.endif
mcr p15, 4, r3, c1, c1, 2
+ .if \operation != vmentry
+ .if \operation == vmexit
+ tst r2, #(HCPTR_TCP(10) | HCPTR_TCP(11))
+ beq 1f
+ .endif
+ isb
+ .if \label != none
+ b \label
+ .endif
+1:
+ .endif
.endm
/* Configures the HDCR (Hyp Debug Configuration Register) on entry/return
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index eea03069161b..9bd826ce3897 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -194,10 +194,11 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
phys_addr_t addr = start, end = start + size;
phys_addr_t next;
- pgd = pgdp + pgd_index(addr);
+ pgd = pgdp + kvm_pgd_index(addr);
do {
next = kvm_pgd_addr_end(addr, end);
- unmap_puds(kvm, pgd, addr, next);
+ if (!pgd_none(*pgd))
+ unmap_puds(kvm, pgd, addr, next);
} while (pgd++, addr = next, addr != end);
}
@@ -263,7 +264,7 @@ static void stage2_flush_memslot(struct kvm *kvm,
phys_addr_t next;
pgd_t *pgd;
- pgd = kvm->arch.pgd + pgd_index(addr);
+ pgd = kvm->arch.pgd + kvm_pgd_index(addr);
do {
next = kvm_pgd_addr_end(addr, end);
stage2_flush_puds(kvm, pgd, addr, next);
@@ -555,6 +556,71 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
unmap_range(kvm, kvm->arch.pgd, start, size);
}
+static void stage2_unmap_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *memslot)
+{
+ hva_t hva = memslot->userspace_addr;
+ phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
+ phys_addr_t size = PAGE_SIZE * memslot->npages;
+ hva_t reg_end = hva + size;
+
+ /*
+ * A memory region could potentially cover multiple VMAs, and any holes
+ * between them, so iterate over all of them to find out if we should
+ * unmap any of them.
+ *
+ * +--------------------------------------------+
+ * +---------------+----------------+ +----------------+
+ * | : VMA 1 | VMA 2 | | VMA 3 : |
+ * +---------------+----------------+ +----------------+
+ * | memory region |
+ * +--------------------------------------------+
+ */
+ do {
+ struct vm_area_struct *vma = find_vma(current->mm, hva);
+ hva_t vm_start, vm_end;
+
+ if (!vma || vma->vm_start >= reg_end)
+ break;
+
+ /*
+ * Take the intersection of this VMA with the memory region
+ */
+ vm_start = max(hva, vma->vm_start);
+ vm_end = min(reg_end, vma->vm_end);
+
+ if (!(vma->vm_flags & VM_PFNMAP)) {
+ gpa_t gpa = addr + (vm_start - memslot->userspace_addr);
+ unmap_stage2_range(kvm, gpa, vm_end - vm_start);
+ }
+ hva = vm_end;
+ } while (hva < reg_end);
+}
+
+/**
+ * stage2_unmap_vm - Unmap Stage-2 RAM mappings
+ * @kvm: The struct kvm pointer
+ *
+ * Go through the memregions and unmap any reguler RAM
+ * backing memory already mapped to the VM.
+ */
+void stage2_unmap_vm(struct kvm *kvm)
+{
+ struct kvm_memslots *slots;
+ struct kvm_memory_slot *memslot;
+ int idx;
+
+ idx = srcu_read_lock(&kvm->srcu);
+ spin_lock(&kvm->mmu_lock);
+
+ slots = kvm_memslots(kvm);
+ kvm_for_each_memslot(memslot, slots)
+ stage2_unmap_memslot(kvm, memslot);
+
+ spin_unlock(&kvm->mmu_lock);
+ srcu_read_unlock(&kvm->srcu, idx);
+}
+
/**
* kvm_free_stage2_pgd - free all stage-2 tables
* @kvm: The KVM struct pointer for the VM.
@@ -583,7 +649,7 @@ static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
pud_t *pud;
pmd_t *pmd;
- pgd = kvm->arch.pgd + pgd_index(addr);
+ pgd = kvm->arch.pgd + kvm_pgd_index(addr);
pud = pud_offset(pgd, addr);
if (pud_none(*pud)) {
if (!cache)
@@ -754,6 +820,11 @@ static bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
return kvm_vcpu_dabt_iswrite(vcpu);
}
+static bool kvm_is_device_pfn(unsigned long pfn)
+{
+ return !pfn_valid(pfn);
+}
+
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
struct kvm_memory_slot *memslot, unsigned long hva,
unsigned long fault_status)
@@ -777,6 +848,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
/* Let's check if we will get back a huge page backed by hugetlbfs */
down_read(&current->mm->mmap_sem);
vma = find_vma_intersection(current->mm, hva, hva + 1);
+ if (unlikely(!vma)) {
+ kvm_err("Failed to find VMA for hva 0x%lx\n", hva);
+ up_read(&current->mm->mmap_sem);
+ return -EFAULT;
+ }
+
if (is_vm_hugetlb_page(vma)) {
hugetlb = true;
gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
@@ -817,7 +894,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (is_error_pfn(pfn))
return -EFAULT;
- if (kvm_is_mmio_pfn(pfn))
+ if (kvm_is_device_pfn(pfn))
mem_type = PAGE_S2_DEVICE;
spin_lock(&kvm->mmu_lock);
@@ -843,7 +920,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
}
coherent_cache_guest_page(vcpu, hva, PAGE_SIZE);
ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte,
- mem_type == PAGE_S2_DEVICE);
+ pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE));
}
@@ -916,6 +993,9 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
goto out_unlock;
}
+ /* Userspace should not be able to register out-of-bounds IPAs */
+ VM_BUG_ON(fault_ipa >= KVM_PHYS_SIZE);
+
ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
if (ret == 0)
ret = 1;
@@ -1140,6 +1220,14 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
enum kvm_mr_change change)
{
+ /*
+ * Prevent userspace from creating a memory region outside of the IPA
+ * space addressable by the KVM guest IPA space.
+ */
+ if (memslot->base_gfn + memslot->npages >=
+ (KVM_PHYS_SIZE >> PAGE_SHIFT))
+ return -EFAULT;
+
return 0;
}