aboutsummaryrefslogtreecommitdiff
path: root/arch/arm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm')
-rw-r--r--arch/arm/boot/dts/dove.dtsi4
-rw-r--r--arch/arm/include/asm/elf.h2
-rw-r--r--arch/arm/include/asm/kvm_emulate.h5
-rw-r--r--arch/arm/include/asm/kvm_mmu.h87
-rw-r--r--arch/arm/include/uapi/asm/kvm.h8
-rw-r--r--arch/arm/kernel/hibernate.c5
-rw-r--r--arch/arm/kernel/process.c10
-rw-r--r--arch/arm/kernel/reboot.h6
-rw-r--r--arch/arm/kvm/arm.c38
-rw-r--r--arch/arm/kvm/guest.c1
-rw-r--r--arch/arm/kvm/mmu.c251
-rw-r--r--arch/arm/kvm/psci.c18
-rw-r--r--arch/arm/mach-mvebu/pmsu.c16
-rw-r--r--arch/arm/mach-s3c64xx/crag6410.h1
-rw-r--r--arch/arm/mach-s3c64xx/mach-crag6410.c1
-rw-r--r--arch/arm/mm/hugetlbpage.c6
16 files changed, 379 insertions, 80 deletions
diff --git a/arch/arm/boot/dts/dove.dtsi b/arch/arm/boot/dts/dove.dtsi
index a5441d5482a6..3cc8b8320345 100644
--- a/arch/arm/boot/dts/dove.dtsi
+++ b/arch/arm/boot/dts/dove.dtsi
@@ -154,7 +154,7 @@
uart2: serial@12200 {
compatible = "ns16550a";
- reg = <0x12000 0x100>;
+ reg = <0x12200 0x100>;
reg-shift = <2>;
interrupts = <9>;
clocks = <&core_clk 0>;
@@ -163,7 +163,7 @@
uart3: serial@12300 {
compatible = "ns16550a";
- reg = <0x12100 0x100>;
+ reg = <0x12300 0x100>;
reg-shift = <2>;
interrupts = <10>;
clocks = <&core_clk 0>;
diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h
index afb9cafd3786..674d03f4ba15 100644
--- a/arch/arm/include/asm/elf.h
+++ b/arch/arm/include/asm/elf.h
@@ -115,7 +115,7 @@ int dump_task_regs(struct task_struct *t, elf_gregset_t *elfregs);
the loader. We need to make sure that it is out of the way of the program
that it will "exec", and that there is sufficient room for the brk. */
-#define ELF_ET_DYN_BASE (2 * TASK_SIZE / 3)
+#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2)
/* When the program starts, a1 contains a pointer to a function to be
registered with atexit, as per the SVR4 ABI. A value of 0 means we
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index b9db269c6e61..66ce17655bb9 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -33,6 +33,11 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu);
void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
+static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.hcr = HCR_GUEST_MASK;
+}
+
static inline bool vcpu_mode_is_32bit(struct kvm_vcpu *vcpu)
{
return 1;
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index acb0d5712716..16d9d788d0b8 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -44,6 +44,7 @@
#ifndef __ASSEMBLY__
+#include <linux/highmem.h>
#include <asm/cacheflush.h>
#include <asm/pgalloc.h>
@@ -52,6 +53,7 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
void free_boot_hyp_pgd(void);
void free_hyp_pgds(void);
+void stage2_unmap_vm(struct kvm *kvm);
int kvm_alloc_stage2_pgd(struct kvm *kvm);
void kvm_free_stage2_pgd(struct kvm *kvm);
int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
@@ -126,29 +128,28 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
(__boundary - 1 < (end) - 1)? __boundary: (end); \
})
+#define kvm_pgd_index(addr) pgd_index(addr)
+
static inline bool kvm_page_empty(void *ptr)
{
struct page *ptr_page = virt_to_page(ptr);
return page_count(ptr_page) == 1;
}
-
#define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep)
#define kvm_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp)
#define kvm_pud_table_empty(kvm, pudp) (0)
#define KVM_PREALLOC_LEVEL 0
-static inline int kvm_prealloc_hwpgd(struct kvm *kvm, pgd_t *pgd)
+static inline void *kvm_get_hwpgd(struct kvm *kvm)
{
- return 0;
+ return kvm->arch.pgd;
}
-static inline void kvm_free_hwpgd(struct kvm *kvm) { }
-
-static inline void *kvm_get_hwpgd(struct kvm *kvm)
+static inline unsigned int kvm_get_hwpgd_size(void)
{
- return kvm->arch.pgd;
+ return PTRS_PER_S2_PGD * sizeof(pgd_t);
}
struct kvm;
@@ -160,12 +161,10 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
return (vcpu->arch.cp15[c1_SCTLR] & 0b101) == 0b101;
}
-static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
- unsigned long size)
+static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn,
+ unsigned long size,
+ bool ipa_uncached)
{
- if (!vcpu_has_cache_enabled(vcpu))
- kvm_flush_dcache_to_poc((void *)hva, size);
-
/*
* If we are going to insert an instruction page and the icache is
* either VIPT or PIPT, there is a potential problem where the host
@@ -177,15 +176,73 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
*
* VIVT caches are tagged using both the ASID and the VMID and doesn't
* need any kind of flushing (DDI 0406C.b - Page B3-1392).
+ *
+ * We need to do this through a kernel mapping (using the
+ * user-space mapping has proved to be the wrong
+ * solution). For that, we need to kmap one page at a time,
+ * and iterate over the range.
*/
- if (icache_is_pipt()) {
- __cpuc_coherent_user_range(hva, hva + size);
- } else if (!icache_is_vivt_asid_tagged()) {
+
+ bool need_flush = !vcpu_has_cache_enabled(vcpu) || ipa_uncached;
+
+ VM_BUG_ON(size & ~PAGE_MASK);
+
+ if (!need_flush && !icache_is_pipt())
+ goto vipt_cache;
+
+ while (size) {
+ void *va = kmap_atomic_pfn(pfn);
+
+ if (need_flush)
+ kvm_flush_dcache_to_poc(va, PAGE_SIZE);
+
+ if (icache_is_pipt())
+ __cpuc_coherent_user_range((unsigned long)va,
+ (unsigned long)va + PAGE_SIZE);
+
+ size -= PAGE_SIZE;
+ pfn++;
+
+ kunmap_atomic(va);
+ }
+
+vipt_cache:
+ if (!icache_is_pipt() && !icache_is_vivt_asid_tagged()) {
/* any kind of VIPT cache */
__flush_icache_all();
}
}
+static inline void __kvm_flush_dcache_pte(pte_t pte)
+{
+ void *va = kmap_atomic(pte_page(pte));
+
+ kvm_flush_dcache_to_poc(va, PAGE_SIZE);
+
+ kunmap_atomic(va);
+}
+
+static inline void __kvm_flush_dcache_pmd(pmd_t pmd)
+{
+ unsigned long size = PMD_SIZE;
+ pfn_t pfn = pmd_pfn(pmd);
+
+ while (size) {
+ void *va = kmap_atomic_pfn(pfn);
+
+ kvm_flush_dcache_to_poc(va, PAGE_SIZE);
+
+ pfn++;
+ size -= PAGE_SIZE;
+
+ kunmap_atomic(va);
+ }
+}
+
+static inline void __kvm_flush_dcache_pud(pud_t pud)
+{
+}
+
#define kvm_virt_to_phys(x) virt_to_idmap((unsigned long)(x))
void stage2_flush_vm(struct kvm *kvm);
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index 09ee408c1a67..b404cf886029 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -193,8 +193,14 @@ struct kvm_arch_memory_slot {
#define KVM_ARM_IRQ_CPU_IRQ 0
#define KVM_ARM_IRQ_CPU_FIQ 1
-/* Highest supported SPI, from VGIC_NR_IRQS */
+/*
+ * This used to hold the highest supported SPI, but it is now obsolete
+ * and only here to provide source code level compatibility with older
+ * userland. The highest SPI number can be set via KVM_DEV_ARM_VGIC_GRP_NR_IRQS.
+ */
+#ifndef __KERNEL__
#define KVM_ARM_IRQ_GIC_MAX 127
+#endif
/* PSCI interface */
#define KVM_PSCI_FN_BASE 0x95c1ba5e
diff --git a/arch/arm/kernel/hibernate.c b/arch/arm/kernel/hibernate.c
index c4cc50e58c13..cfb354ff2a60 100644
--- a/arch/arm/kernel/hibernate.c
+++ b/arch/arm/kernel/hibernate.c
@@ -22,6 +22,7 @@
#include <asm/suspend.h>
#include <asm/memory.h>
#include <asm/sections.h>
+#include "reboot.h"
int pfn_is_nosave(unsigned long pfn)
{
@@ -61,7 +62,7 @@ static int notrace arch_save_image(unsigned long unused)
ret = swsusp_save();
if (ret == 0)
- soft_restart(virt_to_phys(cpu_resume));
+ _soft_restart(virt_to_phys(cpu_resume), false);
return ret;
}
@@ -86,7 +87,7 @@ static void notrace arch_restore_image(void *unused)
for (pbe = restore_pblist; pbe; pbe = pbe->next)
copy_page(pbe->orig_address, pbe->address);
- soft_restart(virt_to_phys(cpu_resume));
+ _soft_restart(virt_to_phys(cpu_resume), false);
}
static u64 resume_stack[PAGE_SIZE/2/sizeof(u64)] __nosavedata;
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index fe972a2f3df3..ecefea4e2929 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -41,6 +41,7 @@
#include <asm/system_misc.h>
#include <asm/mach/time.h>
#include <asm/tls.h>
+#include "reboot.h"
#ifdef CONFIG_CC_STACKPROTECTOR
#include <linux/stackprotector.h>
@@ -95,7 +96,7 @@ static void __soft_restart(void *addr)
BUG();
}
-void soft_restart(unsigned long addr)
+void _soft_restart(unsigned long addr, bool disable_l2)
{
u64 *stack = soft_restart_stack + ARRAY_SIZE(soft_restart_stack);
@@ -104,7 +105,7 @@ void soft_restart(unsigned long addr)
local_fiq_disable();
/* Disable the L2 if we're the last man standing. */
- if (num_online_cpus() == 1)
+ if (disable_l2)
outer_disable();
/* Change to the new stack and continue with the reset. */
@@ -114,6 +115,11 @@ void soft_restart(unsigned long addr)
BUG();
}
+void soft_restart(unsigned long addr)
+{
+ _soft_restart(addr, num_online_cpus() == 1);
+}
+
/*
* Function pointers to optional machine specific functions
*/
diff --git a/arch/arm/kernel/reboot.h b/arch/arm/kernel/reboot.h
new file mode 100644
index 000000000000..c87f05816d6b
--- /dev/null
+++ b/arch/arm/kernel/reboot.h
@@ -0,0 +1,6 @@
+#ifndef REBOOT_H
+#define REBOOT_H
+
+extern void _soft_restart(unsigned long addr, bool disable_l2);
+
+#endif
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 9e193c8a959e..ed5834e8e2ac 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -213,6 +213,11 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
int err;
struct kvm_vcpu *vcpu;
+ if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) {
+ err = -EBUSY;
+ goto out;
+ }
+
vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
if (!vcpu) {
err = -ENOMEM;
@@ -419,6 +424,7 @@ static void update_vttbr(struct kvm *kvm)
static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
{
+ struct kvm *kvm = vcpu->kvm;
int ret;
if (likely(vcpu->arch.has_run_once))
@@ -427,15 +433,23 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
vcpu->arch.has_run_once = true;
/*
- * Initialize the VGIC before running a vcpu the first time on
- * this VM.
+ * Map the VGIC hardware resources before running a vcpu the first
+ * time on this VM.
*/
- if (unlikely(!vgic_initialized(vcpu->kvm))) {
- ret = kvm_vgic_init(vcpu->kvm);
+ if (unlikely(!vgic_initialized(kvm))) {
+ ret = kvm_vgic_map_resources(kvm);
if (ret)
return ret;
}
+ /*
+ * Enable the arch timers only if we have an in-kernel VGIC
+ * and it has been properly initialized, since we cannot handle
+ * interrupts from the virtual timer with a userspace gic.
+ */
+ if (irqchip_in_kernel(kvm) && vgic_initialized(kvm))
+ kvm_timer_enable(kvm);
+
return 0;
}
@@ -639,8 +653,7 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
if (!irqchip_in_kernel(kvm))
return -ENXIO;
- if (irq_num < VGIC_NR_PRIVATE_IRQS ||
- irq_num > KVM_ARM_IRQ_GIC_MAX)
+ if (irq_num < VGIC_NR_PRIVATE_IRQS)
return -EINVAL;
return kvm_vgic_inject_irq(kvm, 0, irq_num, level);
@@ -659,10 +672,21 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
return ret;
/*
+ * Ensure a rebooted VM will fault in RAM pages and detect if the
+ * guest MMU is turned off and flush the caches as needed.
+ */
+ if (vcpu->arch.has_run_once)
+ stage2_unmap_vm(vcpu->kvm);
+
+ vcpu_reset_hcr(vcpu);
+
+ /*
* Handle the "start in power-off" case by marking the VCPU as paused.
*/
- if (__test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
+ if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
vcpu->arch.pause = true;
+ else
+ vcpu->arch.pause = false;
return 0;
}
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index cc0b78769bd8..8c97208b9b97 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -38,7 +38,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
{
- vcpu->arch.hcr = HCR_GUEST_MASK;
return 0;
}
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 8664ff17cbbe..cba52cf6ed3f 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -58,6 +58,26 @@ static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
}
+/*
+ * D-Cache management functions. They take the page table entries by
+ * value, as they are flushing the cache using the kernel mapping (or
+ * kmap on 32bit).
+ */
+static void kvm_flush_dcache_pte(pte_t pte)
+{
+ __kvm_flush_dcache_pte(pte);
+}
+
+static void kvm_flush_dcache_pmd(pmd_t pmd)
+{
+ __kvm_flush_dcache_pmd(pmd);
+}
+
+static void kvm_flush_dcache_pud(pud_t pud)
+{
+ __kvm_flush_dcache_pud(pud);
+}
+
static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
int min, int max)
{
@@ -119,6 +139,26 @@ static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
put_page(virt_to_page(pmd));
}
+/*
+ * Unmapping vs dcache management:
+ *
+ * If a guest maps certain memory pages as uncached, all writes will
+ * bypass the data cache and go directly to RAM. However, the CPUs
+ * can still speculate reads (not writes) and fill cache lines with
+ * data.
+ *
+ * Those cache lines will be *clean* cache lines though, so a
+ * clean+invalidate operation is equivalent to an invalidate
+ * operation, because no cache lines are marked dirty.
+ *
+ * Those clean cache lines could be filled prior to an uncached write
+ * by the guest, and the cache coherent IO subsystem would therefore
+ * end up writing old data to disk.
+ *
+ * This is why right after unmapping a page/section and invalidating
+ * the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure
+ * the IO subsystem will never hit in the cache.
+ */
static void unmap_ptes(struct kvm *kvm, pmd_t *pmd,
phys_addr_t addr, phys_addr_t end)
{
@@ -128,9 +168,16 @@ static void unmap_ptes(struct kvm *kvm, pmd_t *pmd,
start_pte = pte = pte_offset_kernel(pmd, addr);
do {
if (!pte_none(*pte)) {
+ pte_t old_pte = *pte;
+
kvm_set_pte(pte, __pte(0));
- put_page(virt_to_page(pte));
kvm_tlb_flush_vmid_ipa(kvm, addr);
+
+ /* No need to invalidate the cache for device mappings */
+ if ((pte_val(old_pte) & PAGE_S2_DEVICE) != PAGE_S2_DEVICE)
+ kvm_flush_dcache_pte(old_pte);
+
+ put_page(virt_to_page(pte));
}
} while (pte++, addr += PAGE_SIZE, addr != end);
@@ -149,8 +196,13 @@ static void unmap_pmds(struct kvm *kvm, pud_t *pud,
next = kvm_pmd_addr_end(addr, end);
if (!pmd_none(*pmd)) {
if (kvm_pmd_huge(*pmd)) {
+ pmd_t old_pmd = *pmd;
+
pmd_clear(pmd);
kvm_tlb_flush_vmid_ipa(kvm, addr);
+
+ kvm_flush_dcache_pmd(old_pmd);
+
put_page(virt_to_page(pmd));
} else {
unmap_ptes(kvm, pmd, addr, next);
@@ -173,8 +225,13 @@ static void unmap_puds(struct kvm *kvm, pgd_t *pgd,
next = kvm_pud_addr_end(addr, end);
if (!pud_none(*pud)) {
if (pud_huge(*pud)) {
+ pud_t old_pud = *pud;
+
pud_clear(pud);
kvm_tlb_flush_vmid_ipa(kvm, addr);
+
+ kvm_flush_dcache_pud(old_pud);
+
put_page(virt_to_page(pud));
} else {
unmap_pmds(kvm, pud, addr, next);
@@ -194,7 +251,7 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
phys_addr_t addr = start, end = start + size;
phys_addr_t next;
- pgd = pgdp + pgd_index(addr);
+ pgd = pgdp + kvm_pgd_index(addr);
do {
next = kvm_pgd_addr_end(addr, end);
if (!pgd_none(*pgd))
@@ -209,10 +266,9 @@ static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd,
pte = pte_offset_kernel(pmd, addr);
do {
- if (!pte_none(*pte)) {
- hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
- kvm_flush_dcache_to_poc((void*)hva, PAGE_SIZE);
- }
+ if (!pte_none(*pte) &&
+ (pte_val(*pte) & PAGE_S2_DEVICE) != PAGE_S2_DEVICE)
+ kvm_flush_dcache_pte(*pte);
} while (pte++, addr += PAGE_SIZE, addr != end);
}
@@ -226,12 +282,10 @@ static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud,
do {
next = kvm_pmd_addr_end(addr, end);
if (!pmd_none(*pmd)) {
- if (kvm_pmd_huge(*pmd)) {
- hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
- kvm_flush_dcache_to_poc((void*)hva, PMD_SIZE);
- } else {
+ if (kvm_pmd_huge(*pmd))
+ kvm_flush_dcache_pmd(*pmd);
+ else
stage2_flush_ptes(kvm, pmd, addr, next);
- }
}
} while (pmd++, addr = next, addr != end);
}
@@ -246,12 +300,10 @@ static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd,
do {
next = kvm_pud_addr_end(addr, end);
if (!pud_none(*pud)) {
- if (pud_huge(*pud)) {
- hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
- kvm_flush_dcache_to_poc((void*)hva, PUD_SIZE);
- } else {
+ if (pud_huge(*pud))
+ kvm_flush_dcache_pud(*pud);
+ else
stage2_flush_pmds(kvm, pud, addr, next);
- }
}
} while (pud++, addr = next, addr != end);
}
@@ -264,7 +316,7 @@ static void stage2_flush_memslot(struct kvm *kvm,
phys_addr_t next;
pgd_t *pgd;
- pgd = kvm->arch.pgd + pgd_index(addr);
+ pgd = kvm->arch.pgd + kvm_pgd_index(addr);
do {
next = kvm_pgd_addr_end(addr, end);
stage2_flush_puds(kvm, pgd, addr, next);
@@ -541,6 +593,20 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
__phys_to_pfn(phys_addr), PAGE_HYP_DEVICE);
}
+/* Free the HW pgd, one page at a time */
+static void kvm_free_hwpgd(void *hwpgd)
+{
+ free_pages_exact(hwpgd, kvm_get_hwpgd_size());
+}
+
+/* Allocate the HW PGD, making sure that each page gets its own refcount */
+static void *kvm_alloc_hwpgd(void)
+{
+ unsigned int size = kvm_get_hwpgd_size();
+
+ return alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
+}
+
/**
* kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
* @kvm: The KVM struct pointer for the VM.
@@ -554,15 +620,31 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
*/
int kvm_alloc_stage2_pgd(struct kvm *kvm)
{
- int ret;
pgd_t *pgd;
+ void *hwpgd;
if (kvm->arch.pgd != NULL) {
kvm_err("kvm_arch already initialized?\n");
return -EINVAL;
}
+ hwpgd = kvm_alloc_hwpgd();
+ if (!hwpgd)
+ return -ENOMEM;
+
+ /* When the kernel uses more levels of page tables than the
+ * guest, we allocate a fake PGD and pre-populate it to point
+ * to the next-level page table, which will be the real
+ * initial page table pointed to by the VTTBR.
+ *
+ * When KVM_PREALLOC_LEVEL==2, we allocate a single page for
+ * the PMD and the kernel will use folded pud.
+ * When KVM_PREALLOC_LEVEL==1, we allocate 2 consecutive PUD
+ * pages.
+ */
if (KVM_PREALLOC_LEVEL > 0) {
+ int i;
+
/*
* Allocate fake pgd for the page table manipulation macros to
* work. This is not used by the hardware and we have no
@@ -570,30 +652,32 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
*/
pgd = (pgd_t *)kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t),
GFP_KERNEL | __GFP_ZERO);
+
+ if (!pgd) {
+ kvm_free_hwpgd(hwpgd);
+ return -ENOMEM;
+ }
+
+ /* Plug the HW PGD into the fake one. */
+ for (i = 0; i < PTRS_PER_S2_PGD; i++) {
+ if (KVM_PREALLOC_LEVEL == 1)
+ pgd_populate(NULL, pgd + i,
+ (pud_t *)hwpgd + i * PTRS_PER_PUD);
+ else if (KVM_PREALLOC_LEVEL == 2)
+ pud_populate(NULL, pud_offset(pgd, 0) + i,
+ (pmd_t *)hwpgd + i * PTRS_PER_PMD);
+ }
} else {
/*
* Allocate actual first-level Stage-2 page table used by the
* hardware for Stage-2 page table walks.
*/
- pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, S2_PGD_ORDER);
+ pgd = (pgd_t *)hwpgd;
}
- if (!pgd)
- return -ENOMEM;
-
- ret = kvm_prealloc_hwpgd(kvm, pgd);
- if (ret)
- goto out_err;
-
kvm_clean_pgd(pgd);
kvm->arch.pgd = pgd;
return 0;
-out_err:
- if (KVM_PREALLOC_LEVEL > 0)
- kfree(pgd);
- else
- free_pages((unsigned long)pgd, S2_PGD_ORDER);
- return ret;
}
/**
@@ -612,6 +696,71 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
unmap_range(kvm, kvm->arch.pgd, start, size);
}
+static void stage2_unmap_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *memslot)
+{
+ hva_t hva = memslot->userspace_addr;
+ phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
+ phys_addr_t size = PAGE_SIZE * memslot->npages;
+ hva_t reg_end = hva + size;
+
+ /*
+ * A memory region could potentially cover multiple VMAs, and any holes
+ * between them, so iterate over all of them to find out if we should
+ * unmap any of them.
+ *
+ * +--------------------------------------------+
+ * +---------------+----------------+ +----------------+
+ * | : VMA 1 | VMA 2 | | VMA 3 : |
+ * +---------------+----------------+ +----------------+
+ * | memory region |
+ * +--------------------------------------------+
+ */
+ do {
+ struct vm_area_struct *vma = find_vma(current->mm, hva);
+ hva_t vm_start, vm_end;
+
+ if (!vma || vma->vm_start >= reg_end)
+ break;
+
+ /*
+ * Take the intersection of this VMA with the memory region
+ */
+ vm_start = max(hva, vma->vm_start);
+ vm_end = min(reg_end, vma->vm_end);
+
+ if (!(vma->vm_flags & VM_PFNMAP)) {
+ gpa_t gpa = addr + (vm_start - memslot->userspace_addr);
+ unmap_stage2_range(kvm, gpa, vm_end - vm_start);
+ }
+ hva = vm_end;
+ } while (hva < reg_end);
+}
+
+/**
+ * stage2_unmap_vm - Unmap Stage-2 RAM mappings
+ * @kvm: The struct kvm pointer
+ *
+ * Go through the memregions and unmap any reguler RAM
+ * backing memory already mapped to the VM.
+ */
+void stage2_unmap_vm(struct kvm *kvm)
+{
+ struct kvm_memslots *slots;
+ struct kvm_memory_slot *memslot;
+ int idx;
+
+ idx = srcu_read_lock(&kvm->srcu);
+ spin_lock(&kvm->mmu_lock);
+
+ slots = kvm_memslots(kvm);
+ kvm_for_each_memslot(memslot, slots)
+ stage2_unmap_memslot(kvm, memslot);
+
+ spin_unlock(&kvm->mmu_lock);
+ srcu_read_unlock(&kvm->srcu, idx);
+}
+
/**
* kvm_free_stage2_pgd - free all stage-2 tables
* @kvm: The KVM struct pointer for the VM.
@@ -629,11 +778,10 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
return;
unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
- kvm_free_hwpgd(kvm);
+ kvm_free_hwpgd(kvm_get_hwpgd(kvm));
if (KVM_PREALLOC_LEVEL > 0)
kfree(kvm->arch.pgd);
- else
- free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER);
+
kvm->arch.pgd = NULL;
}
@@ -643,7 +791,7 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
pgd_t *pgd;
pud_t *pud;
- pgd = kvm->arch.pgd + pgd_index(addr);
+ pgd = kvm->arch.pgd + kvm_pgd_index(addr);
if (WARN_ON(pgd_none(*pgd))) {
if (!cache)
return NULL;
@@ -840,6 +988,12 @@ static bool kvm_is_device_pfn(unsigned long pfn)
return !pfn_valid(pfn);
}
+static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn,
+ unsigned long size, bool uncached)
+{
+ __coherent_cache_guest_page(vcpu, pfn, size, uncached);
+}
+
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
struct kvm_memory_slot *memslot, unsigned long hva,
unsigned long fault_status)
@@ -853,6 +1007,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
struct vm_area_struct *vma;
pfn_t pfn;
pgprot_t mem_type = PAGE_S2;
+ bool fault_ipa_uncached;
write_fault = kvm_is_write_fault(vcpu);
if (fault_status == FSC_PERM && !write_fault) {
@@ -919,6 +1074,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (!hugetlb && !force_pte)
hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa);
+ fault_ipa_uncached = memslot->flags & KVM_MEMSLOT_INCOHERENT;
+
if (hugetlb) {
pmd_t new_pmd = pfn_pmd(pfn, mem_type);
new_pmd = pmd_mkhuge(new_pmd);
@@ -926,7 +1083,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
kvm_set_s2pmd_writable(&new_pmd);
kvm_set_pfn_dirty(pfn);
}
- coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE);
+ coherent_cache_guest_page(vcpu, pfn, PMD_SIZE, fault_ipa_uncached);
ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
} else {
pte_t new_pte = pfn_pte(pfn, mem_type);
@@ -934,7 +1091,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
kvm_set_s2pte_writable(&new_pte);
kvm_set_pfn_dirty(pfn);
}
- coherent_cache_guest_page(vcpu, hva, PAGE_SIZE);
+ coherent_cache_guest_page(vcpu, pfn, PAGE_SIZE, fault_ipa_uncached);
ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte,
pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE));
}
@@ -1294,11 +1451,12 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
hva = vm_end;
} while (hva < reg_end);
- if (ret) {
- spin_lock(&kvm->mmu_lock);
+ spin_lock(&kvm->mmu_lock);
+ if (ret)
unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size);
- spin_unlock(&kvm->mmu_lock);
- }
+ else
+ stage2_flush_memslot(kvm, memslot);
+ spin_unlock(&kvm->mmu_lock);
return ret;
}
@@ -1310,6 +1468,15 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
unsigned long npages)
{
+ /*
+ * Readonly memslots are not incoherent with the caches by definition,
+ * but in practice, they are used mostly to emulate ROMs or NOR flashes
+ * that the guest may consider devices and hence map as uncached.
+ * To prevent incoherency issues in these cases, tag all readonly
+ * regions as incoherent.
+ */
+ if (slot->flags & KVM_MEM_READONLY)
+ slot->flags |= KVM_MEMSLOT_INCOHERENT;
return 0;
}
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 09cf37737ee2..58cb3248d277 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -15,6 +15,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <linux/preempt.h>
#include <linux/kvm_host.h>
#include <linux/wait.h>
@@ -166,6 +167,23 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type)
{
+ int i;
+ struct kvm_vcpu *tmp;
+
+ /*
+ * The KVM ABI specifies that a system event exit may call KVM_RUN
+ * again and may perform shutdown/reboot at a later time that when the
+ * actual request is made. Since we are implementing PSCI and a
+ * caller of PSCI reboot and shutdown expects that the system shuts
+ * down or reboots immediately, let's make sure that VCPUs are not run
+ * after this call is handled and before the VCPUs have been
+ * re-initialized.
+ */
+ kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
+ tmp->arch.pause = true;
+ kvm_vcpu_kick(tmp);
+ }
+
memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event));
vcpu->run->system_event.type = type;
vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
diff --git a/arch/arm/mach-mvebu/pmsu.c b/arch/arm/mach-mvebu/pmsu.c
index bbd8664d1bac..6f8a85c5965c 100644
--- a/arch/arm/mach-mvebu/pmsu.c
+++ b/arch/arm/mach-mvebu/pmsu.c
@@ -415,6 +415,9 @@ static __init int armada_38x_cpuidle_init(void)
void __iomem *mpsoc_base;
u32 reg;
+ pr_warn("CPU idle is currently broken on Armada 38x: disabling");
+ return 0;
+
np = of_find_compatible_node(NULL, NULL,
"marvell,armada-380-coherency-fabric");
if (!np)
@@ -476,6 +479,16 @@ static int __init mvebu_v7_cpu_pm_init(void)
return 0;
of_node_put(np);
+ /*
+ * Currently the CPU idle support for Armada 38x is broken, as
+ * the CPU hotplug uses some of the CPU idle functions it is
+ * broken too, so let's disable it
+ */
+ if (of_machine_is_compatible("marvell,armada380")) {
+ cpu_hotplug_disable();
+ pr_warn("CPU hotplug support is currently broken on Armada 38x: disabling");
+ }
+
if (of_machine_is_compatible("marvell,armadaxp"))
ret = armada_xp_cpuidle_init();
else if (of_machine_is_compatible("marvell,armada370"))
@@ -489,7 +502,8 @@ static int __init mvebu_v7_cpu_pm_init(void)
return ret;
mvebu_v7_pmsu_enable_l2_powerdown_onidle();
- platform_device_register(&mvebu_v7_cpuidle_device);
+ if (mvebu_v7_cpuidle_device.name)
+ platform_device_register(&mvebu_v7_cpuidle_device);
cpu_pm_register_notifier(&mvebu_v7_cpu_pm_notifier);
return 0;
diff --git a/arch/arm/mach-s3c64xx/crag6410.h b/arch/arm/mach-s3c64xx/crag6410.h
index 7bc66682687e..dcbe17f5e5f8 100644
--- a/arch/arm/mach-s3c64xx/crag6410.h
+++ b/arch/arm/mach-s3c64xx/crag6410.h
@@ -14,6 +14,7 @@
#include <mach/gpio-samsung.h>
#define GLENFARCLAS_PMIC_IRQ_BASE IRQ_BOARD_START
+#define BANFF_PMIC_IRQ_BASE (IRQ_BOARD_START + 64)
#define PCA935X_GPIO_BASE GPIO_BOARD_START
#define CODEC_GPIO_BASE (GPIO_BOARD_START + 8)
diff --git a/arch/arm/mach-s3c64xx/mach-crag6410.c b/arch/arm/mach-s3c64xx/mach-crag6410.c
index 10b913baab28..65c426bc45f7 100644
--- a/arch/arm/mach-s3c64xx/mach-crag6410.c
+++ b/arch/arm/mach-s3c64xx/mach-crag6410.c
@@ -554,6 +554,7 @@ static struct wm831x_touch_pdata touch_pdata = {
static struct wm831x_pdata crag_pmic_pdata = {
.wm831x_num = 1,
+ .irq_base = BANFF_PMIC_IRQ_BASE,
.gpio_base = BANFF_PMIC_GPIO_BASE,
.soft_shutdown = true,
diff --git a/arch/arm/mm/hugetlbpage.c b/arch/arm/mm/hugetlbpage.c
index 66781bf34077..c72412415093 100644
--- a/arch/arm/mm/hugetlbpage.c
+++ b/arch/arm/mm/hugetlbpage.c
@@ -36,12 +36,6 @@
* of type casting from pmd_t * to pte_t *.
*/
-struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
- int write)
-{
- return ERR_PTR(-EINVAL);
-}
-
int pud_huge(pud_t pud)
{
return 0;