77 files changed, 888 insertions, 439 deletions
diff --git a/arch/arm/boot/dts/dove.dtsi b/arch/arm/boot/dts/dove.dtsi
index a5441d5482a6..3cc8b8320345 100644
--- a/arch/arm/boot/dts/dove.dtsi
+++ b/arch/arm/boot/dts/dove.dtsi
@@ -154,7 +154,7 @@
 
 			uart2: serial@12200 {
 				compatible = "ns16550a";
-				reg = <0x12000 0x100>;
+				reg = <0x12200 0x100>;
 				reg-shift = <2>;
 				interrupts = <9>;
 				clocks = <&core_clk 0>;
@@ -163,7 +163,7 @@
 
 			uart3: serial@12300 {
 				compatible = "ns16550a";
-				reg = <0x12100 0x100>;
+				reg = <0x12300 0x100>;
 				reg-shift = <2>;
 				interrupts = <10>;
 				clocks = <&core_clk 0>;
diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h
index afb9cafd3786..674d03f4ba15 100644
--- a/arch/arm/include/asm/elf.h
+++ b/arch/arm/include/asm/elf.h
@@ -115,7 +115,7 @@ int dump_task_regs(struct task_struct *t, elf_gregset_t *elfregs);
    the loader.  We need to make sure that it is out of the way of the program
    that it will "exec", and that there is sufficient room for the brk.  */
 
-#define ELF_ET_DYN_BASE	(2 * TASK_SIZE / 3)
+#define ELF_ET_DYN_BASE	(TASK_SIZE / 3 * 2)
 
 /* When the program starts, a1 contains a pointer to a function to be 
    registered with atexit, as per the SVR4 ABI.  A value of 0 means we 
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index b9db269c6e61..66ce17655bb9 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -33,6 +33,11 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu);
 void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
 void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
 
+static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.hcr = HCR_GUEST_MASK;
+}
+
 static inline bool vcpu_mode_is_32bit(struct kvm_vcpu *vcpu)
 {
 	return 1;
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index acb0d5712716..16d9d788d0b8 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -44,6 +44,7 @@
 
 #ifndef __ASSEMBLY__
 
+#include <linux/highmem.h>
 #include <asm/cacheflush.h>
 #include <asm/pgalloc.h>
 
@@ -52,6 +53,7 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
 void free_boot_hyp_pgd(void);
 void free_hyp_pgds(void);
 
+void stage2_unmap_vm(struct kvm *kvm);
 int kvm_alloc_stage2_pgd(struct kvm *kvm);
 void kvm_free_stage2_pgd(struct kvm *kvm);
 int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
@@ -126,29 +128,28 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
 	(__boundary - 1 < (end) - 1)? __boundary: (end);		\
 })
 
+#define kvm_pgd_index(addr)			pgd_index(addr)
+
 static inline bool kvm_page_empty(void *ptr)
 {
 	struct page *ptr_page = virt_to_page(ptr);
 	return page_count(ptr_page) == 1;
 }
 
-
 #define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep)
 #define kvm_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp)
 #define kvm_pud_table_empty(kvm, pudp) (0)
 
 #define KVM_PREALLOC_LEVEL	0
 
-static inline int kvm_prealloc_hwpgd(struct kvm *kvm, pgd_t *pgd)
+static inline void *kvm_get_hwpgd(struct kvm *kvm)
 {
-	return 0;
+	return kvm->arch.pgd;
 }
 
-static inline void kvm_free_hwpgd(struct kvm *kvm) { }
-
-static inline void *kvm_get_hwpgd(struct kvm *kvm)
+static inline unsigned int kvm_get_hwpgd_size(void)
 {
-	return kvm->arch.pgd;
+	return PTRS_PER_S2_PGD * sizeof(pgd_t);
 }
 
 struct kvm;
@@ -160,12 +161,10 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
 	return (vcpu->arch.cp15[c1_SCTLR] & 0b101) == 0b101;
 }
 
-static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
-					     unsigned long size)
+static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn,
+					       unsigned long size,
+					       bool ipa_uncached)
 {
-	if (!vcpu_has_cache_enabled(vcpu))
-		kvm_flush_dcache_to_poc((void *)hva, size);
-	
 	/*
 	 * If we are going to insert an instruction page and the icache is
 	 * either VIPT or PIPT, there is a potential problem where the host
@@ -177,15 +176,73 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
 	 *
 	 * VIVT caches are tagged using both the ASID and the VMID and doesn't
 	 * need any kind of flushing (DDI 0406C.b - Page B3-1392).
+	 *
+	 * We need to do this through a kernel mapping (using the
+	 * user-space mapping has proved to be the wrong
+	 * solution). For that, we need to kmap one page at a time,
+	 * and iterate over the range.
 	 */
-	if (icache_is_pipt()) {
-		__cpuc_coherent_user_range(hva, hva + size);
-	} else if (!icache_is_vivt_asid_tagged()) {
+
+	bool need_flush = !vcpu_has_cache_enabled(vcpu) || ipa_uncached;
+
+	VM_BUG_ON(size & ~PAGE_MASK);
+
+	if (!need_flush && !icache_is_pipt())
+		goto vipt_cache;
+
+	while (size) {
+		void *va = kmap_atomic_pfn(pfn);
+
+		if (need_flush)
+			kvm_flush_dcache_to_poc(va, PAGE_SIZE);
+
+		if (icache_is_pipt())
+			__cpuc_coherent_user_range((unsigned long)va,
+						   (unsigned long)va + PAGE_SIZE);
+
+		size -= PAGE_SIZE;
+		pfn++;
+
+		kunmap_atomic(va);
+	}
+
+vipt_cache:
+	if (!icache_is_pipt() && !icache_is_vivt_asid_tagged()) {
 		/* any kind of VIPT cache */
 		__flush_icache_all();
 	}
 }
 
+static inline void __kvm_flush_dcache_pte(pte_t pte)
+{
+	void *va = kmap_atomic(pte_page(pte));
+
+	kvm_flush_dcache_to_poc(va, PAGE_SIZE);
+
+	kunmap_atomic(va);
+}
+
+static inline void __kvm_flush_dcache_pmd(pmd_t pmd)
+{
+	unsigned long size = PMD_SIZE;
+	pfn_t pfn = pmd_pfn(pmd);
+
+	while (size) {
+		void *va = kmap_atomic_pfn(pfn);
+
+		kvm_flush_dcache_to_poc(va, PAGE_SIZE);
+
+		pfn++;
+		size -= PAGE_SIZE;
+
+		kunmap_atomic(va);
+	}
+}
+
+static inline void __kvm_flush_dcache_pud(pud_t pud)
+{
+}
+
 #define kvm_virt_to_phys(x)		virt_to_idmap((unsigned long)(x))
 
 void stage2_flush_vm(struct kvm *kvm);
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index 09ee408c1a67..b404cf886029 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -193,8 +193,14 @@ struct kvm_arch_memory_slot {
 #define KVM_ARM_IRQ_CPU_IRQ		0
 #define KVM_ARM_IRQ_CPU_FIQ		1
 
-/* Highest supported SPI, from VGIC_NR_IRQS */
+/*
+ * This used to hold the highest supported SPI, but it is now obsolete
+ * and only here to provide source code level compatibility with older
+ * userland. The highest SPI number can be set via KVM_DEV_ARM_VGIC_GRP_NR_IRQS.
+ */
+#ifndef __KERNEL__
 #define KVM_ARM_IRQ_GIC_MAX		127
+#endif
 
 /* PSCI interface */
 #define KVM_PSCI_FN_BASE		0x95c1ba5e
diff --git a/arch/arm/kernel/hibernate.c b/arch/arm/kernel/hibernate.c
index c4cc50e58c13..cfb354ff2a60 100644
--- a/arch/arm/kernel/hibernate.c
+++ b/arch/arm/kernel/hibernate.c
@@ -22,6 +22,7 @@
 #include <asm/suspend.h>
 #include <asm/memory.h>
 #include <asm/sections.h>
+#include "reboot.h"
 
 int pfn_is_nosave(unsigned long pfn)
 {
@@ -61,7 +62,7 @@ static int notrace arch_save_image(unsigned long unused)
 
 	ret = swsusp_save();
 	if (ret == 0)
-		soft_restart(virt_to_phys(cpu_resume));
+		_soft_restart(virt_to_phys(cpu_resume), false);
 	return ret;
 }
 
@@ -86,7 +87,7 @@ static void notrace arch_restore_image(void *unused)
 	for (pbe = restore_pblist; pbe; pbe = pbe->next)
 		copy_page(pbe->orig_address, pbe->address);
 
-	soft_restart(virt_to_phys(cpu_resume));
+	_soft_restart(virt_to_phys(cpu_resume), false);
 }
 
 static u64 resume_stack[PAGE_SIZE/2/sizeof(u64)] __nosavedata;
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index fe972a2f3df3..ecefea4e2929 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -41,6 +41,7 @@
 #include <asm/system_misc.h>
 #include <asm/mach/time.h>
 #include <asm/tls.h>
+#include "reboot.h"
 
 #ifdef CONFIG_CC_STACKPROTECTOR
 #include <linux/stackprotector.h>
@@ -95,7 +96,7 @@ static void __soft_restart(void *addr)
 	BUG();
 }
 
-void soft_restart(unsigned long addr)
+void _soft_restart(unsigned long addr, bool disable_l2)
 {
 	u64 *stack = soft_restart_stack + ARRAY_SIZE(soft_restart_stack);
 
@@ -104,7 +105,7 @@ void soft_restart(unsigned long addr)
 	local_fiq_disable();
 
 	/* Disable the L2 if we're the last man standing. */
-	if (num_online_cpus() == 1)
+	if (disable_l2)
 		outer_disable();
 
 	/* Change to the new stack and continue with the reset. */
@@ -114,6 +115,11 @@ void soft_restart(unsigned long addr)
 	BUG();
 }
 
+void soft_restart(unsigned long addr)
+{
+	_soft_restart(addr, num_online_cpus() == 1);
+}
+
 /*
  * Function pointers to optional machine specific functions
  */
diff --git a/arch/arm/kernel/reboot.h b/arch/arm/kernel/reboot.h
new file mode 100644
index 000000000000..c87f05816d6b
--- /dev/null
+++ b/arch/arm/kernel/reboot.h
@@ -0,0 +1,6 @@
+#ifndef REBOOT_H
+#define REBOOT_H
+
+extern void _soft_restart(unsigned long addr, bool disable_l2);
+
+#endif
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 9e193c8a959e..ed5834e8e2ac 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -213,6 +213,11 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 	int err;
 	struct kvm_vcpu *vcpu;
 
+	if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) {
+		err = -EBUSY;
+		goto out;
+	}
+
 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
 	if (!vcpu) {
 		err = -ENOMEM;
@@ -419,6 +424,7 @@ static void update_vttbr(struct kvm *kvm)
 
 static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
 {
+	struct kvm *kvm = vcpu->kvm;
 	int ret;
 
 	if (likely(vcpu->arch.has_run_once))
@@ -427,15 +433,23 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
 	vcpu->arch.has_run_once = true;
 
 	/*
-	 * Initialize the VGIC before running a vcpu the first time on
-	 * this VM.
+	 * Map the VGIC hardware resources before running a vcpu the first
+	 * time on this VM.
 	 */
-	if (unlikely(!vgic_initialized(vcpu->kvm))) {
-		ret = kvm_vgic_init(vcpu->kvm);
+	if (unlikely(!vgic_initialized(kvm))) {
+		ret = kvm_vgic_map_resources(kvm);
 		if (ret)
 			return ret;
 	}
 
+	/*
+	 * Enable the arch timers only if we have an in-kernel VGIC
+	 * and it has been properly initialized, since we cannot handle
+	 * interrupts from the virtual timer with a userspace gic.
+	 */
+	if (irqchip_in_kernel(kvm) && vgic_initialized(kvm))
+		kvm_timer_enable(kvm);
+
 	return 0;
 }
 
@@ -639,8 +653,7 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
 		if (!irqchip_in_kernel(kvm))
 			return -ENXIO;
 
-		if (irq_num < VGIC_NR_PRIVATE_IRQS ||
-		    irq_num > KVM_ARM_IRQ_GIC_MAX)
+		if (irq_num < VGIC_NR_PRIVATE_IRQS)
 			return -EINVAL;
 
 		return kvm_vgic_inject_irq(kvm, 0, irq_num, level);
@@ -659,10 +672,21 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
 		return ret;
 
 	/*
+	 * Ensure a rebooted VM will fault in RAM pages and detect if the
+	 * guest MMU is turned off and flush the caches as needed.
+	 */
+	if (vcpu->arch.has_run_once)
+		stage2_unmap_vm(vcpu->kvm);
+
+	vcpu_reset_hcr(vcpu);
+
+	/*
 	 * Handle the "start in power-off" case by marking the VCPU as paused.
 	 */
-	if (__test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
+	if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
 		vcpu->arch.pause = true;
+	else
+		vcpu->arch.pause = false;
 
 	return 0;
 }
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index cc0b78769bd8..8c97208b9b97 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -38,7 +38,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
-	vcpu->arch.hcr = HCR_GUEST_MASK;
 	return 0;
 }
 
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 8664ff17cbbe..cba52cf6ed3f 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -58,6 +58,26 @@ static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
 		kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
 }
 
+/*
+ * D-Cache management functions. They take the page table entries by
+ * value, as they are flushing the cache using the kernel mapping (or
+ * kmap on 32bit).
+ */
+static void kvm_flush_dcache_pte(pte_t pte)
+{
+	__kvm_flush_dcache_pte(pte);
+}
+
+static void kvm_flush_dcache_pmd(pmd_t pmd)
+{
+	__kvm_flush_dcache_pmd(pmd);
+}
+
+static void kvm_flush_dcache_pud(pud_t pud)
+{
+	__kvm_flush_dcache_pud(pud);
+}
+
 static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
 				  int min, int max)
 {
@@ -119,6 +139,26 @@ static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
 	put_page(virt_to_page(pmd));
 }
 
+/*
+ * Unmapping vs dcache management:
+ *
+ * If a guest maps certain memory pages as uncached, all writes will
+ * bypass the data cache and go directly to RAM.  However, the CPUs
+ * can still speculate reads (not writes) and fill cache lines with
+ * data.
+ *
+ * Those cache lines will be *clean* cache lines though, so a
+ * clean+invalidate operation is equivalent to an invalidate
+ * operation, because no cache lines are marked dirty.
+ *
+ * Those clean cache lines could be filled prior to an uncached write
+ * by the guest, and the cache coherent IO subsystem would therefore
+ * end up writing old data to disk.
+ *
+ * This is why right after unmapping a page/section and invalidating
+ * the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure
+ * the IO subsystem will never hit in the cache.
+ */
 static void unmap_ptes(struct kvm *kvm, pmd_t *pmd,
 		       phys_addr_t addr, phys_addr_t end)
 {
@@ -128,9 +168,16 @@ static void unmap_ptes(struct kvm *kvm, pmd_t *pmd,
 	start_pte = pte = pte_offset_kernel(pmd, addr);
 	do {
 		if (!pte_none(*pte)) {
+			pte_t old_pte = *pte;
+
 			kvm_set_pte(pte, __pte(0));
-			put_page(virt_to_page(pte));
 			kvm_tlb_flush_vmid_ipa(kvm, addr);
+
+			/* No need to invalidate the cache for device mappings */
+			if ((pte_val(old_pte) & PAGE_S2_DEVICE) != PAGE_S2_DEVICE)
+				kvm_flush_dcache_pte(old_pte);
+
+			put_page(virt_to_page(pte));
 		}
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 
@@ -149,8 +196,13 @@ static void unmap_pmds(struct kvm *kvm, pud_t *pud,
 		next = kvm_pmd_addr_end(addr, end);
 		if (!pmd_none(*pmd)) {
 			if (kvm_pmd_huge(*pmd)) {
+				pmd_t old_pmd = *pmd;
+
 				pmd_clear(pmd);
 				kvm_tlb_flush_vmid_ipa(kvm, addr);
+
+				kvm_flush_dcache_pmd(old_pmd);
+
 				put_page(virt_to_page(pmd));
 			} else {
 				unmap_ptes(kvm, pmd, addr, next);
@@ -173,8 +225,13 @@ static void unmap_puds(struct kvm *kvm, pgd_t *pgd,
 		next = kvm_pud_addr_end(addr, end);
 		if (!pud_none(*pud)) {
 			if (pud_huge(*pud)) {
+				pud_t old_pud = *pud;
+
 				pud_clear(pud);
 				kvm_tlb_flush_vmid_ipa(kvm, addr);
+
+				kvm_flush_dcache_pud(old_pud);
+
 				put_page(virt_to_page(pud));
 			} else {
 				unmap_pmds(kvm, pud, addr, next);
@@ -194,7 +251,7 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
 	phys_addr_t addr = start, end = start + size;
 	phys_addr_t next;
 
-	pgd = pgdp + pgd_index(addr);
+	pgd = pgdp + kvm_pgd_index(addr);
 	do {
 		next = kvm_pgd_addr_end(addr, end);
 		if (!pgd_none(*pgd))
@@ -209,10 +266,9 @@ static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd,
 
 	pte = pte_offset_kernel(pmd, addr);
 	do {
-		if (!pte_none(*pte)) {
-			hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
-			kvm_flush_dcache_to_poc((void*)hva, PAGE_SIZE);
-		}
+		if (!pte_none(*pte) &&
+		    (pte_val(*pte) & PAGE_S2_DEVICE) != PAGE_S2_DEVICE)
+			kvm_flush_dcache_pte(*pte);
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 }
 
@@ -226,12 +282,10 @@ static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud,
 	do {
 		next = kvm_pmd_addr_end(addr, end);
 		if (!pmd_none(*pmd)) {
-			if (kvm_pmd_huge(*pmd)) {
-				hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
-				kvm_flush_dcache_to_poc((void*)hva, PMD_SIZE);
-			} else {
+			if (kvm_pmd_huge(*pmd))
+				kvm_flush_dcache_pmd(*pmd);
+			else
 				stage2_flush_ptes(kvm, pmd, addr, next);
-			}
 		}
 	} while (pmd++, addr = next, addr != end);
 }
@@ -246,12 +300,10 @@ static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd,
 	do {
 		next = kvm_pud_addr_end(addr, end);
 		if (!pud_none(*pud)) {
-			if (pud_huge(*pud)) {
-				hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
-				kvm_flush_dcache_to_poc((void*)hva, PUD_SIZE);
-			} else {
+			if (pud_huge(*pud))
+				kvm_flush_dcache_pud(*pud);
+			else
 				stage2_flush_pmds(kvm, pud, addr, next);
-			}
 		}
 	} while (pud++, addr = next, addr != end);
 }
@@ -264,7 +316,7 @@ static void stage2_flush_memslot(struct kvm *kvm,
 	phys_addr_t next;
 	pgd_t *pgd;
 
-	pgd = kvm->arch.pgd + pgd_index(addr);
+	pgd = kvm->arch.pgd + kvm_pgd_index(addr);
 	do {
 		next = kvm_pgd_addr_end(addr, end);
 		stage2_flush_puds(kvm, pgd, addr, next);
@@ -541,6 +593,20 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
 				     __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE);
 }
 
+/* Free the HW pgd, one page at a time */
+static void kvm_free_hwpgd(void *hwpgd)
+{
+	free_pages_exact(hwpgd, kvm_get_hwpgd_size());
+}
+
+/* Allocate the HW PGD, making sure that each page gets its own refcount */
+static void *kvm_alloc_hwpgd(void)
+{
+	unsigned int size = kvm_get_hwpgd_size();
+
+	return alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
+}
+
 /**
  * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
  * @kvm:	The KVM struct pointer for the VM.
@@ -554,15 +620,31 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
  */
 int kvm_alloc_stage2_pgd(struct kvm *kvm)
 {
-	int ret;
 	pgd_t *pgd;
+	void *hwpgd;
 
 	if (kvm->arch.pgd != NULL) {
 		kvm_err("kvm_arch already initialized?\n");
 		return -EINVAL;
 	}
 
+	hwpgd = kvm_alloc_hwpgd();
+	if (!hwpgd)
+		return -ENOMEM;
+
+	/* When the kernel uses more levels of page tables than the
+	 * guest, we allocate a fake PGD and pre-populate it to point
+	 * to the next-level page table, which will be the real
+	 * initial page table pointed to by the VTTBR.
+	 *
+	 * When KVM_PREALLOC_LEVEL==2, we allocate a single page for
+	 * the PMD and the kernel will use folded pud.
+	 * When KVM_PREALLOC_LEVEL==1, we allocate 2 consecutive PUD
+	 * pages.
+	 */
 	if (KVM_PREALLOC_LEVEL > 0) {
+		int i;
+
 		/*
 		 * Allocate fake pgd for the page table manipulation macros to
 		 * work.  This is not used by the hardware and we have no
@@ -570,30 +652,32 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
 		 */
 		pgd = (pgd_t *)kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t),
 				       GFP_KERNEL | __GFP_ZERO);
+
+		if (!pgd) {
+			kvm_free_hwpgd(hwpgd);
+			return -ENOMEM;
+		}
+
+		/* Plug the HW PGD into the fake one. */
+		for (i = 0; i < PTRS_PER_S2_PGD; i++) {
+			if (KVM_PREALLOC_LEVEL == 1)
+				pgd_populate(NULL, pgd + i,
+					     (pud_t *)hwpgd + i * PTRS_PER_PUD);
+			else if (KVM_PREALLOC_LEVEL == 2)
+				pud_populate(NULL, pud_offset(pgd, 0) + i,
+					     (pmd_t *)hwpgd + i * PTRS_PER_PMD);
+		}
 	} else {
 		/*
 		 * Allocate actual first-level Stage-2 page table used by the
 		 * hardware for Stage-2 page table walks.
 		 */
-		pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, S2_PGD_ORDER);
+		pgd = (pgd_t *)hwpgd;
 	}
 
-	if (!pgd)
-		return -ENOMEM;
-
-	ret = kvm_prealloc_hwpgd(kvm, pgd);
-	if (ret)
-		goto out_err;
-
 	kvm_clean_pgd(pgd);
 	kvm->arch.pgd = pgd;
 	return 0;
-out_err:
-	if (KVM_PREALLOC_LEVEL > 0)
-		kfree(pgd);
-	else
-		free_pages((unsigned long)pgd, S2_PGD_ORDER);
-	return ret;
 }
 
 /**
@@ -612,6 +696,71 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
 	unmap_range(kvm, kvm->arch.pgd, start, size);
 }
 
+static void stage2_unmap_memslot(struct kvm *kvm,
+				 struct kvm_memory_slot *memslot)
+{
+	hva_t hva = memslot->userspace_addr;
+	phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
+	phys_addr_t size = PAGE_SIZE * memslot->npages;
+	hva_t reg_end = hva + size;
+
+	/*
+	 * A memory region could potentially cover multiple VMAs, and any holes
+	 * between them, so iterate over all of them to find out if we should
+	 * unmap any of them.
+	 *
+	 *     +--------------------------------------------+
+	 * +---------------+----------------+   +----------------+
+	 * |   : VMA 1     |      VMA 2     |   |    VMA 3  :    |
+	 * +---------------+----------------+   +----------------+
+	 *     |               memory region                |
+	 *     +--------------------------------------------+
+	 */
+	do {
+		struct vm_area_struct *vma = find_vma(current->mm, hva);
+		hva_t vm_start, vm_end;
+
+		if (!vma || vma->vm_start >= reg_end)
+			break;
+
+		/*
+		 * Take the intersection of this VMA with the memory region
+		 */
+		vm_start = max(hva, vma->vm_start);
+		vm_end = min(reg_end, vma->vm_end);
+
+		if (!(vma->vm_flags & VM_PFNMAP)) {
+			gpa_t gpa = addr + (vm_start - memslot->userspace_addr);
+			unmap_stage2_range(kvm, gpa, vm_end - vm_start);
+		}
+		hva = vm_end;
+	} while (hva < reg_end);
+}
+
+/**
+ * stage2_unmap_vm - Unmap Stage-2 RAM mappings
+ * @kvm: The struct kvm pointer
+ *
+ * Go through the memregions and unmap any reguler RAM
+ * backing memory already mapped to the VM.
+ */
+void stage2_unmap_vm(struct kvm *kvm)
+{
+	struct kvm_memslots *slots;
+	struct kvm_memory_slot *memslot;
+	int idx;
+
+	idx = srcu_read_lock(&kvm->srcu);
+	spin_lock(&kvm->mmu_lock);
+
+	slots = kvm_memslots(kvm);
+	kvm_for_each_memslot(memslot, slots)
+		stage2_unmap_memslot(kvm, memslot);
+
+	spin_unlock(&kvm->mmu_lock);
+	srcu_read_unlock(&kvm->srcu, idx);
+}
+
 /**
  * kvm_free_stage2_pgd - free all stage-2 tables
  * @kvm:	The KVM struct pointer for the VM.
@@ -629,11 +778,10 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
 		return;
 
 	unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
-	kvm_free_hwpgd(kvm);
+	kvm_free_hwpgd(kvm_get_hwpgd(kvm));
 	if (KVM_PREALLOC_LEVEL > 0)
 		kfree(kvm->arch.pgd);
-	else
-		free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER);
+
 	kvm->arch.pgd = NULL;
 }
 
@@ -643,7 +791,7 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
 	pgd_t *pgd;
 	pud_t *pud;
 
-	pgd = kvm->arch.pgd + pgd_index(addr);
+	pgd = kvm->arch.pgd + kvm_pgd_index(addr);
 	if (WARN_ON(pgd_none(*pgd))) {
 		if (!cache)
 			return NULL;
@@ -840,6 +988,12 @@ static bool kvm_is_device_pfn(unsigned long pfn)
 	return !pfn_valid(pfn);
 }
 
+static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn,
+				      unsigned long size, bool uncached)
+{
+	__coherent_cache_guest_page(vcpu, pfn, size, uncached);
+}
+
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			  struct kvm_memory_slot *memslot, unsigned long hva,
 			  unsigned long fault_status)
@@ -853,6 +1007,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	struct vm_area_struct *vma;
 	pfn_t pfn;
 	pgprot_t mem_type = PAGE_S2;
+	bool fault_ipa_uncached;
 
 	write_fault = kvm_is_write_fault(vcpu);
 	if (fault_status == FSC_PERM && !write_fault) {
@@ -919,6 +1074,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	if (!hugetlb && !force_pte)
 		hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa);
 
+	fault_ipa_uncached = memslot->flags & KVM_MEMSLOT_INCOHERENT;
+
 	if (hugetlb) {
 		pmd_t new_pmd = pfn_pmd(pfn, mem_type);
 		new_pmd = pmd_mkhuge(new_pmd);
@@ -926,7 +1083,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			kvm_set_s2pmd_writable(&new_pmd);
 			kvm_set_pfn_dirty(pfn);
 		}
-		coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE);
+		coherent_cache_guest_page(vcpu, pfn, PMD_SIZE, fault_ipa_uncached);
 		ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
 	} else {
 		pte_t new_pte = pfn_pte(pfn, mem_type);
@@ -934,7 +1091,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			kvm_set_s2pte_writable(&new_pte);
 			kvm_set_pfn_dirty(pfn);
 		}
-		coherent_cache_guest_page(vcpu, hva, PAGE_SIZE);
+		coherent_cache_guest_page(vcpu, pfn, PAGE_SIZE, fault_ipa_uncached);
 		ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte,
 			pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE));
 	}
@@ -1294,11 +1451,12 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 		hva = vm_end;
 	} while (hva < reg_end);
 
-	if (ret) {
-		spin_lock(&kvm->mmu_lock);
+	spin_lock(&kvm->mmu_lock);
+	if (ret)
 		unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size);
-		spin_unlock(&kvm->mmu_lock);
-	}
+	else
+		stage2_flush_memslot(kvm, memslot);
+	spin_unlock(&kvm->mmu_lock);
 	return ret;
 }
 
@@ -1310,6 +1468,15 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
 			    unsigned long npages)
 {
+	/*
+	 * Readonly memslots are not incoherent with the caches by definition,
+	 * but in practice, they are used mostly to emulate ROMs or NOR flashes
+	 * that the guest may consider devices and hence map as uncached.
+	 * To prevent incoherency issues in these cases, tag all readonly
+	 * regions as incoherent.
+	 */
+	if (slot->flags & KVM_MEM_READONLY)
+		slot->flags |= KVM_MEMSLOT_INCOHERENT;
 	return 0;
 }
 
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 09cf37737ee2..58cb3248d277 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -15,6 +15,7 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/preempt.h>
 #include <linux/kvm_host.h>
 #include <linux/wait.h>
 
@@ -166,6 +167,23 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
 
 static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type)
 {
+	int i;
+	struct kvm_vcpu *tmp;
+
+	/*
+	 * The KVM ABI specifies that a system event exit may call KVM_RUN
+	 * again and may perform shutdown/reboot at a later time that when the
+	 * actual request is made.  Since we are implementing PSCI and a
+	 * caller of PSCI reboot and shutdown expects that the system shuts
+	 * down or reboots immediately, let's make sure that VCPUs are not run
+	 * after this call is handled and before the VCPUs have been
+	 * re-initialized.
+	 */
+	kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
+		tmp->arch.pause = true;
+		kvm_vcpu_kick(tmp);
+	}
+
 	memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event));
 	vcpu->run->system_event.type = type;
 	vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
diff --git a/arch/arm/mach-mvebu/pmsu.c b/arch/arm/mach-mvebu/pmsu.c
index bbd8664d1bac..6f8a85c5965c 100644
--- a/arch/arm/mach-mvebu/pmsu.c
+++ b/arch/arm/mach-mvebu/pmsu.c
@@ -415,6 +415,9 @@ static __init int armada_38x_cpuidle_init(void)
 	void __iomem *mpsoc_base;
 	u32 reg;
 
+	pr_warn("CPU idle is currently broken on Armada 38x: disabling");
+	return 0;
+
 	np = of_find_compatible_node(NULL, NULL,
 				     "marvell,armada-380-coherency-fabric");
 	if (!np)
@@ -476,6 +479,16 @@ static int __init mvebu_v7_cpu_pm_init(void)
 		return 0;
 	of_node_put(np);
 
+	/*
+	 * Currently the CPU idle support for Armada 38x is broken, as
+	 * the CPU hotplug uses some of the CPU idle functions it is
+	 * broken too, so let's disable it
+	 */
+	if (of_machine_is_compatible("marvell,armada380")) {
+		cpu_hotplug_disable();
+		pr_warn("CPU hotplug support is currently broken on Armada 38x: disabling");
+	}
+
 	if (of_machine_is_compatible("marvell,armadaxp"))
 		ret = armada_xp_cpuidle_init();
 	else if (of_machine_is_compatible("marvell,armada370"))
@@ -489,7 +502,8 @@ static int __init mvebu_v7_cpu_pm_init(void)
 		return ret;
 
 	mvebu_v7_pmsu_enable_l2_powerdown_onidle();
-	platform_device_register(&mvebu_v7_cpuidle_device);
+	if (mvebu_v7_cpuidle_device.name)
+		platform_device_register(&mvebu_v7_cpuidle_device);
 	cpu_pm_register_notifier(&mvebu_v7_cpu_pm_notifier);
 
 	return 0;
diff --git a/arch/arm/mach-s3c64xx/crag6410.h b/arch/arm/mach-s3c64xx/crag6410.h
index 7bc66682687e..dcbe17f5e5f8 100644
--- a/arch/arm/mach-s3c64xx/crag6410.h
+++ b/arch/arm/mach-s3c64xx/crag6410.h
@@ -14,6 +14,7 @@
 #include <mach/gpio-samsung.h>
 
 #define GLENFARCLAS_PMIC_IRQ_BASE	IRQ_BOARD_START
+#define BANFF_PMIC_IRQ_BASE		(IRQ_BOARD_START + 64)
 
 #define PCA935X_GPIO_BASE		GPIO_BOARD_START
 #define CODEC_GPIO_BASE			(GPIO_BOARD_START + 8)
diff --git a/arch/arm/mach-s3c64xx/mach-crag6410.c b/arch/arm/mach-s3c64xx/mach-crag6410.c
index 10b913baab28..65c426bc45f7 100644
--- a/arch/arm/mach-s3c64xx/mach-crag6410.c
+++ b/arch/arm/mach-s3c64xx/mach-crag6410.c
@@ -554,6 +554,7 @@ static struct wm831x_touch_pdata touch_pdata = {
 
 static struct wm831x_pdata crag_pmic_pdata = {
 	.wm831x_num = 1,
+	.irq_base = BANFF_PMIC_IRQ_BASE,
 	.gpio_base = BANFF_PMIC_GPIO_BASE,
 	.soft_shutdown = true,
 
diff --git a/arch/arm/mm/hugetlbpage.c b/arch/arm/mm/hugetlbpage.c
index 66781bf34077..c72412415093 100644
--- a/arch/arm/mm/hugetlbpage.c
+++ b/arch/arm/mm/hugetlbpage.c
@@ -36,12 +36,6 @@
  * of type casting from pmd_t * to pte_t *.
  */
 
-struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
-			      int write)
-{
-	return ERR_PTR(-EINVAL);
-}
-
 int pud_huge(pud_t pud)
 {
 	return 0;
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 5674a55b5518..865a7e28ea2d 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -38,6 +38,13 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu);
 void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
 void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
 
+static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
+	if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features))
+		vcpu->arch.hcr_el2 &= ~HCR_RW;
+}
+
 static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu)
 {
 	return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc;
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 2012c4ba8d67..dbd32127dbb6 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -200,6 +200,7 @@ struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
 struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
 
 u64 kvm_call_hyp(void *hypfn, ...);
+void force_vm_exit(const cpumask_t *mask);
 
 int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		int exception_index);
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 0caf7a59f6a1..a205e957d5c4 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -83,6 +83,7 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
 void free_boot_hyp_pgd(void);
 void free_hyp_pgds(void);
 
+void stage2_unmap_vm(struct kvm *kvm);
 int kvm_alloc_stage2_pgd(struct kvm *kvm);
 void kvm_free_stage2_pgd(struct kvm *kvm);
 int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
@@ -136,6 +137,8 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
 #define PTRS_PER_S2_PGD		(1 << PTRS_PER_S2_PGD_SHIFT)
 #define S2_PGD_ORDER		get_order(PTRS_PER_S2_PGD * sizeof(pgd_t))
 
+#define kvm_pgd_index(addr)	(((addr) >> PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1))
+
 /*
  * If we are concatenating first level stage-2 page tables, we would have less
  * than or equal to 16 pointers in the fake PGD, because that's what the
@@ -149,43 +152,6 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
 #define KVM_PREALLOC_LEVEL	(0)
 #endif
 
-/**
- * kvm_prealloc_hwpgd - allocate inital table for VTTBR
- * @kvm:	The KVM struct pointer for the VM.
- * @pgd:	The kernel pseudo pgd
- *
- * When the kernel uses more levels of page tables than the guest, we allocate
- * a fake PGD and pre-populate it to point to the next-level page table, which
- * will be the real initial page table pointed to by the VTTBR.
- *
- * When KVM_PREALLOC_LEVEL==2, we allocate a single page for the PMD and
- * the kernel will use folded pud.  When KVM_PREALLOC_LEVEL==1, we
- * allocate 2 consecutive PUD pages.
- */
-static inline int kvm_prealloc_hwpgd(struct kvm *kvm, pgd_t *pgd)
-{
-	unsigned int i;
-	unsigned long hwpgd;
-
-	if (KVM_PREALLOC_LEVEL == 0)
-		return 0;
-
-	hwpgd = __get_free_pages(GFP_KERNEL | __GFP_ZERO, PTRS_PER_S2_PGD_SHIFT);
-	if (!hwpgd)
-		return -ENOMEM;
-
-	for (i = 0; i < PTRS_PER_S2_PGD; i++) {
-		if (KVM_PREALLOC_LEVEL == 1)
-			pgd_populate(NULL, pgd + i,
-				     (pud_t *)hwpgd + i * PTRS_PER_PUD);
-		else if (KVM_PREALLOC_LEVEL == 2)
-			pud_populate(NULL, pud_offset(pgd, 0) + i,
-				     (pmd_t *)hwpgd + i * PTRS_PER_PMD);
-	}
-
-	return 0;
-}
-
 static inline void *kvm_get_hwpgd(struct kvm *kvm)
 {
 	pgd_t *pgd = kvm->arch.pgd;
@@ -202,12 +168,11 @@ static inline void *kvm_get_hwpgd(struct kvm *kvm)
 	return pmd_offset(pud, 0);
 }
 
-static inline void kvm_free_hwpgd(struct kvm *kvm)
+static inline unsigned int kvm_get_hwpgd_size(void)
 {
-	if (KVM_PREALLOC_LEVEL > 0) {
-		unsigned long hwpgd = (unsigned long)kvm_get_hwpgd(kvm);
-		free_pages(hwpgd, PTRS_PER_S2_PGD_SHIFT);
-	}
+	if (KVM_PREALLOC_LEVEL > 0)
+		return PTRS_PER_S2_PGD * PAGE_SIZE;
+	return PTRS_PER_S2_PGD * sizeof(pgd_t);
 }
 
 static inline bool kvm_page_empty(void *ptr)
@@ -242,20 +207,42 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
 	return (vcpu_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
 }
 
-static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
-					     unsigned long size)
+static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn,
+					       unsigned long size,
+					       bool ipa_uncached)
 {
-	if (!vcpu_has_cache_enabled(vcpu))
-		kvm_flush_dcache_to_poc((void *)hva, size);
+	void *va = page_address(pfn_to_page(pfn));
+
+	if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached)
+		kvm_flush_dcache_to_poc(va, size);
 
 	if (!icache_is_aliasing()) {		/* PIPT */
-		flush_icache_range(hva, hva + size);
+		flush_icache_range((unsigned long)va,
+				   (unsigned long)va + size);
 	} else if (!icache_is_aivivt()) {	/* non ASID-tagged VIVT */
 		/* any kind of VIPT cache */
 		__flush_icache_all();
 	}
 }
 
+static inline void __kvm_flush_dcache_pte(pte_t pte)
+{
+	struct page *page = pte_page(pte);
+	kvm_flush_dcache_to_poc(page_address(page), PAGE_SIZE);
+}
+
+static inline void __kvm_flush_dcache_pmd(pmd_t pmd)
+{
+	struct page *page = pmd_page(pmd);
+	kvm_flush_dcache_to_poc(page_address(page), PMD_SIZE);
+}
+
+static inline void __kvm_flush_dcache_pud(pud_t pud)
+{
+	struct page *page = pud_page(pud);
+	kvm_flush_dcache_to_poc(page_address(page), PUD_SIZE);
+}
+
 #define kvm_virt_to_phys(x)		__virt_to_phys((unsigned long)(x))
 
 void stage2_flush_vm(struct kvm *kvm);
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 8e38878c87c6..d9e9822efcee 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -179,8 +179,14 @@ struct kvm_arch_memory_slot {
 #define KVM_ARM_IRQ_CPU_IRQ		0
 #define KVM_ARM_IRQ_CPU_FIQ		1
 
-/* Highest supported SPI, from VGIC_NR_IRQS */
+/*
+ * This used to hold the highest supported SPI, but it is now obsolete
+ * and only here to provide source code level compatibility with older
+ * userland. The highest SPI number can be set via KVM_DEV_ARM_VGIC_GRP_NR_IRQS.
+ */
+#ifndef __KERNEL__
 #define KVM_ARM_IRQ_GIC_MAX		127
+#endif
 
 /* PSCI interface */
 #define KVM_PSCI_FN_BASE		0x95c1ba5e
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 76794692c20b..84d5959ff874 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -38,7 +38,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
-	vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
 	return 0;
 }
 
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index b72aa9f9215c..a767f6a4ce54 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -1014,6 +1014,7 @@ ENTRY(__kvm_tlb_flush_vmid_ipa)
 	 * Instead, we invalidate Stage-2 for this IPA, and the
 	 * whole of Stage-1. Weep...
 	 */
+	lsr	x1, x1, #12
 	tlbi	ipas2e1is, x1
 	/*
 	 * We have to ensure completion of the invalidation at Stage-2,
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 70a7816535cd..0b4326578985 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -90,7 +90,6 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 			if (!cpu_has_32bit_el1())
 				return -EINVAL;
 			cpu_reset = &default_regs_reset32;
-			vcpu->arch.hcr_el2 &= ~HCR_RW;
 		} else {
 			cpu_reset = &default_regs_reset;
 		}
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index df34a70caca1..6efbb52cb92e 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -67,8 +67,7 @@ static void *__alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags)
 
 		*ret_page = phys_to_page(phys);
 		ptr = (void *)val;
-		if (flags & __GFP_ZERO)
-			memset(ptr, 0, size);
+		memset(ptr, 0, size);
 	}
 
 	return ptr;
@@ -113,8 +112,7 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size,
 
 		*dma_handle = phys_to_dma(dev, page_to_phys(page));
 		addr = page_address(page);
-		if (flags & __GFP_ZERO)
-			memset(addr, 0, size);
+		memset(addr, 0, size);
 		return addr;
 	} else {
 		return swiotlb_alloc_coherent(dev, size, dma_handle, flags);
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 023747bf4dd7..2de9d2e59d96 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -38,12 +38,6 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
 }
 #endif
 
-struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
-			      int write)
-{
-	return ERR_PTR(-EINVAL);
-}
-
 int pmd_huge(pmd_t pmd)
 {
 	return !(pmd_val(pmd) & PMD_TABLE_BIT);
diff --git a/arch/c6x/kernel/time.c b/arch/c6x/kernel/time.c
index 356ee84cad95..04845aaf5985 100644
--- a/arch/c6x/kernel/time.c
+++ b/arch/c6x/kernel/time.c
@@ -49,7 +49,7 @@ u64 sched_clock(void)
 	return (tsc * sched_clock_multiplier) >> SCHED_CLOCK_SHIFT;
 }
 
-void time_init(void)
+void __init time_init(void)
 {
 	u64 tmp = (u64)NSEC_PER_SEC << SCHED_CLOCK_SHIFT;
 
diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c
index 76069c18ee42..52b7604b5215 100644
--- a/arch/ia64/mm/hugetlbpage.c
+++ b/arch/ia64/mm/hugetlbpage.c
@@ -114,12 +114,6 @@ int pud_huge(pud_t pud)
 	return 0;
 }
 
-struct page *
-follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int write)
-{
-	return NULL;
-}
-
 void hugetlb_free_pgd_range(struct mmu_gather *tlb,
 			unsigned long addr, unsigned long end,
 			unsigned long floor, unsigned long ceiling)
diff --git a/arch/metag/mm/hugetlbpage.c b/arch/metag/mm/hugetlbpage.c
index 3c32075d2945..7ca80ac42ed5 100644
--- a/arch/metag/mm/hugetlbpage.c
+++ b/arch/metag/mm/hugetlbpage.c
@@ -94,12 +94,6 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
 	return 0;
 }
 
-struct page *follow_huge_addr(struct mm_struct *mm,
-			      unsigned long address, int write)
-{
-	return ERR_PTR(-EINVAL);
-}
-
 int pmd_huge(pmd_t pmd)
 {
 	return pmd_page_shift(pmd) > PAGE_SHIFT;
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 9536ef912f59..155bb7f5ad28 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2048,7 +2048,7 @@ config MIPS_CMP
 
 config MIPS_CPS
 	bool "MIPS Coherent Processing System support"
-	depends on SYS_SUPPORTS_MIPS_CPS
+	depends on SYS_SUPPORTS_MIPS_CPS && !64BIT
 	select MIPS_CM
 	select MIPS_CPC
 	select MIPS_CPS_PM if HOTPLUG_CPU
diff --git a/arch/mips/bcm47xx/board.c b/arch/mips/bcm47xx/board.c
index b3ae068ca4fa..3fd369d74444 100644
--- a/arch/mips/bcm47xx/board.c
+++ b/arch/mips/bcm47xx/board.c
@@ -247,8 +247,8 @@ static __init const struct bcm47xx_board_type *bcm47xx_board_get_nvram(void)
 	}
 
 	if (bcm47xx_nvram_getenv("hardware_version", buf1, sizeof(buf1)) >= 0 &&
-	    bcm47xx_nvram_getenv("boardtype", buf2, sizeof(buf2)) >= 0) {
-		for (e2 = bcm47xx_board_list_boot_hw; e2->value1; e2++) {
+	    bcm47xx_nvram_getenv("boardnum", buf2, sizeof(buf2)) >= 0) {
+		for (e2 = bcm47xx_board_list_hw_version_num; e2->value1; e2++) {
 			if (!strstarts(buf1, e2->value1) &&
 			    !strcmp(buf2, e2->value2))
 				return &e2->board;
diff --git a/arch/mips/bcm63xx/prom.c b/arch/mips/bcm63xx/prom.c
index e1f27d653f60..7019e2967009 100644
--- a/arch/mips/bcm63xx/prom.c
+++ b/arch/mips/bcm63xx/prom.c
@@ -17,7 +17,6 @@
 #include <bcm63xx_cpu.h>
 #include <bcm63xx_io.h>
 #include <bcm63xx_regs.h>
-#include <bcm63xx_gpio.h>
 
 void __init prom_init(void)
 {
@@ -53,9 +52,6 @@ void __init prom_init(void)
 	reg &= ~mask;
 	bcm_perf_writel(reg, PERF_CKCTL_REG);
 
-	/* register gpiochip */
-	bcm63xx_gpio_init();
-
 	/* do low level board init */
 	board_prom_init();
 
diff --git a/arch/mips/bcm63xx/setup.c b/arch/mips/bcm63xx/setup.c
index 6660c7ddf87b..240fb4ffa55c 100644
--- a/arch/mips/bcm63xx/setup.c
+++ b/arch/mips/bcm63xx/setup.c
@@ -20,6 +20,7 @@
 #include <bcm63xx_cpu.h>
 #include <bcm63xx_regs.h>
 #include <bcm63xx_io.h>
+#include <bcm63xx_gpio.h>
 
 void bcm63xx_machine_halt(void)
 {
@@ -160,6 +161,9 @@ void __init plat_mem_setup(void)
 
 int __init bcm63xx_register_devices(void)
 {
+	/* register gpiochip */
+	bcm63xx_gpio_init();
+
 	return board_register_devices();
 }
 
diff --git a/arch/mips/cavium-octeon/dma-octeon.c b/arch/mips/cavium-octeon/dma-octeon.c
index 02f244475207..c76a289b95b5 100644
--- a/arch/mips/cavium-octeon/dma-octeon.c
+++ b/arch/mips/cavium-octeon/dma-octeon.c
@@ -306,7 +306,7 @@ void __init plat_swiotlb_setup(void)
 		swiotlbsize = 64 * (1<<20);
 	}
 #endif
-#ifdef CONFIG_USB_OCTEON_OHCI
+#ifdef CONFIG_USB_OHCI_HCD_PLATFORM
 	/* OCTEON II ohci is only 32-bit. */
 	if (OCTEON_IS_MODEL(OCTEON_CN6XXX) && max_addr >= 0x100000000ul)
 		swiotlbsize = 64 * (1<<20);
diff --git a/arch/mips/include/asm/asm-eva.h b/arch/mips/include/asm/asm-eva.h
index e41c56e375b1..1e38f0e1ea3e 100644
--- a/arch/mips/include/asm/asm-eva.h
+++ b/arch/mips/include/asm/asm-eva.h
@@ -11,6 +11,36 @@
 #define __ASM_ASM_EVA_H
 
 #ifndef __ASSEMBLY__
+
+/* Kernel variants */
+
+#define kernel_cache(op, base)		"cache " op ", " base "\n"
+#define kernel_ll(reg, addr)		"ll " reg ", " addr "\n"
+#define kernel_sc(reg, addr)		"sc " reg ", " addr "\n"
+#define kernel_lw(reg, addr)		"lw " reg ", " addr "\n"
+#define kernel_lwl(reg, addr)		"lwl " reg ", " addr "\n"
+#define kernel_lwr(reg, addr)		"lwr " reg ", " addr "\n"
+#define kernel_lh(reg, addr)		"lh " reg ", " addr "\n"
+#define kernel_lb(reg, addr)		"lb " reg ", " addr "\n"
+#define kernel_lbu(reg, addr)		"lbu " reg ", " addr "\n"
+#define kernel_sw(reg, addr)		"sw " reg ", " addr "\n"
+#define kernel_swl(reg, addr)		"swl " reg ", " addr "\n"
+#define kernel_swr(reg, addr)		"swr " reg ", " addr "\n"
+#define kernel_sh(reg, addr)		"sh " reg ", " addr "\n"
+#define kernel_sb(reg, addr)		"sb " reg ", " addr "\n"
+
+#ifdef CONFIG_32BIT
+/*
+ * No 'sd' or 'ld' instructions in 32-bit but the code will
+ * do the correct thing
+ */
+#define kernel_sd(reg, addr)		user_sw(reg, addr)
+#define kernel_ld(reg, addr)		user_lw(reg, addr)
+#else
+#define kernel_sd(reg, addr)		"sd " reg", " addr "\n"
+#define kernel_ld(reg, addr)		"ld " reg", " addr "\n"
+#endif /* CONFIG_32BIT */
+
 #ifdef CONFIG_EVA
 
 #define __BUILD_EVA_INSN(insn, reg, addr)				\
@@ -41,37 +71,60 @@
 
 #else
 
-#define user_cache(op, base)		"cache " op ", " base "\n"
-#define user_ll(reg, addr)		"ll " reg ", " addr "\n"
-#define user_sc(reg, addr)		"sc " reg ", " addr "\n"
-#define user_lw(reg, addr)		"lw " reg ", " addr "\n"
-#define user_lwl(reg, addr)		"lwl " reg ", " addr "\n"
-#define user_lwr(reg, addr)		"lwr " reg ", " addr "\n"
-#define user_lh(reg, addr)		"lh " reg ", " addr "\n"
-#define user_lb(reg, addr)		"lb " reg ", " addr "\n"
-#define user_lbu(reg, addr)		"lbu " reg ", " addr "\n"
-#define user_sw(reg, addr)		"sw " reg ", " addr "\n"
-#define user_swl(reg, addr)		"swl " reg ", " addr "\n"
-#define user_swr(reg, addr)		"swr " reg ", " addr "\n"
-#define user_sh(reg, addr)		"sh " reg ", " addr "\n"
-#define user_sb(reg, addr)		"sb " reg ", " addr "\n"
+#define user_cache(op, base)		kernel_cache(op, base)
+#define user_ll(reg, addr)		kernel_ll(reg, addr)
+#define user_sc(reg, addr)		kernel_sc(reg, addr)
+#define user_lw(reg, addr)		kernel_lw(reg, addr)
+#define user_lwl(reg, addr)		kernel_lwl(reg, addr)
+#define user_lwr(reg, addr)		kernel_lwr(reg, addr)
+#define user_lh(reg, addr)		kernel_lh(reg, addr)
+#define user_lb(reg, addr)		kernel_lb(reg, addr)
+#define user_lbu(reg, addr)		kernel_lbu(reg, addr)
+#define user_sw(reg, addr)		kernel_sw(reg, addr)
+#define user_swl(reg, addr)		kernel_swl(reg, addr)
+#define user_swr(reg, addr)		kernel_swr(reg, addr)
+#define user_sh(reg, addr)		kernel_sh(reg, addr)
+#define user_sb(reg, addr)		kernel_sb(reg, addr)
 
 #ifdef CONFIG_32BIT
-/*
- * No 'sd' or 'ld' instructions in 32-bit but the code will
- * do the correct thing
- */
-#define user_sd(reg, addr)		user_sw(reg, addr)
-#define user_ld(reg, addr)		user_lw(reg, addr)
+#define user_sd(reg, addr)		kernel_sw(reg, addr)
+#define user_ld(reg, addr)		kernel_lw(reg, addr)
 #else
-#define user_sd(reg, addr)		"sd " reg", " addr "\n"
-#define user_ld(reg, addr)		"ld " reg", " addr "\n"
+#define user_sd(reg, addr)		kernel_sd(reg, addr)
+#define user_ld(reg, addr)		kernel_ld(reg, addr)
 #endif /* CONFIG_32BIT */
 
 #endif /* CONFIG_EVA */
 
 #else /* __ASSEMBLY__ */
 
+#define kernel_cache(op, base)		cache op, base
+#define kernel_ll(reg, addr)		ll reg, addr
+#define kernel_sc(reg, addr)		sc reg, addr
+#define kernel_lw(reg, addr)		lw reg, addr
+#define kernel_lwl(reg, addr)		lwl reg, addr
+#define kernel_lwr(reg, addr)		lwr reg, addr
+#define kernel_lh(reg, addr)		lh reg, addr
+#define kernel_lb(reg, addr)		lb reg, addr
+#define kernel_lbu(reg, addr)		lbu reg, addr
+#define kernel_sw(reg, addr)		sw reg, addr
+#define kernel_swl(reg, addr)		swl reg, addr
+#define kernel_swr(reg, addr)		swr reg, addr
+#define kernel_sh(reg, addr)		sh reg, addr
+#define kernel_sb(reg, addr)		sb reg, addr
+
+#ifdef CONFIG_32BIT
+/*
+ * No 'sd' or 'ld' instructions in 32-bit but the code will
+ * do the correct thing
+ */
+#define kernel_sd(reg, addr)		user_sw(reg, addr)
+#define kernel_ld(reg, addr)		user_lw(reg, addr)
+#else
+#define kernel_sd(reg, addr)		sd reg, addr
+#define kernel_ld(reg, addr)		ld reg, addr
+#endif /* CONFIG_32BIT */
+
 #ifdef CONFIG_EVA
 
 #define __BUILD_EVA_INSN(insn, reg, addr)			\
@@ -101,31 +154,27 @@
 #define user_sd(reg, addr)		user_sw(reg, addr)
 #else
 
-#define user_cache(op, base)		cache op, base
-#define user_ll(reg, addr)		ll reg, addr
-#define user_sc(reg, addr)		sc reg, addr
-#define user_lw(reg, addr)		lw reg, addr
-#define user_lwl(reg, addr)		lwl reg, addr
-#define user_lwr(reg, addr)		lwr reg, addr
-#define user_lh(reg, addr)		lh reg, addr
-#define user_lb(reg, addr)		lb reg, addr
-#define user_lbu(reg, addr)		lbu reg, addr
-#define user_sw(reg, addr)		sw reg, addr
-#define user_swl(reg, addr)		swl reg, addr
-#define user_swr(reg, addr)		swr reg, addr
-#define user_sh(reg, addr)		sh reg, addr
-#define user_sb(reg, addr)		sb reg, addr
+#define user_cache(op, base)		kernel_cache(op, base)
+#define user_ll(reg, addr)		kernel_ll(reg, addr)
+#define user_sc(reg, addr)		kernel_sc(reg, addr)
+#define user_lw(reg, addr)		kernel_lw(reg, addr)
+#define user_lwl(reg, addr)		kernel_lwl(reg, addr)
+#define user_lwr(reg, addr)		kernel_lwr(reg, addr)
+#define user_lh(reg, addr)		kernel_lh(reg, addr)
+#define user_lb(reg, addr)		kernel_lb(reg, addr)
+#define user_lbu(reg, addr)		kernel_lbu(reg, addr)
+#define user_sw(reg, addr)		kernel_sw(reg, addr)
+#define user_swl(reg, addr)		kernel_swl(reg, addr)
+#define user_swr(reg, addr)		kernel_swr(reg, addr)
+#define user_sh(reg, addr)		kernel_sh(reg, addr)
+#define user_sb(reg, addr)		kernel_sb(reg, addr)
 
 #ifdef CONFIG_32BIT
-/*
- * No 'sd' or 'ld' instructions in 32-bit but the code will
- * do the correct thing
- */
-#define user_sd(reg, addr)		user_sw(reg, addr)
-#define user_ld(reg, addr)		user_lw(reg, addr)
+#define user_sd(reg, addr)		kernel_sw(reg, addr)
+#define user_ld(reg, addr)		kernel_lw(reg, addr)
 #else
-#define user_sd(reg, addr)		sd reg, addr
-#define user_ld(reg, addr)		ld reg, addr
+#define user_sd(reg, addr)		kernel_sd(reg, addr)
+#define user_ld(reg, addr)		kernel_sd(reg, addr)
 #endif /* CONFIG_32BIT */
 
 #endif /* CONFIG_EVA */
diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h
index dd562414cd5e..99f71e87ce31 100644
--- a/arch/mips/include/asm/fpu.h
+++ b/arch/mips/include/asm/fpu.h
@@ -150,6 +150,7 @@ static inline void lose_fpu(int save)
 		}
 		disable_msa();
 		clear_thread_flag(TIF_USEDMSA);
+		__disable_fpu();
 	} else if (is_fpu_owner()) {
 		if (save)
 			_save_fp(current);
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index f2c249796ea8..4e3205a3bee2 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -321,6 +321,7 @@ enum mips_mmu_types {
 #define T_TRAP			13	/* Trap instruction */
 #define T_VCEI			14	/* Virtual coherency exception */
 #define T_FPE			15	/* Floating point exception */
+#define T_MSADIS		21	/* MSA disabled exception */
 #define T_WATCH			23	/* Watch address reference */
 #define T_VCED			31	/* Virtual coherency data */
 
@@ -577,6 +578,7 @@ struct kvm_mips_callbacks {
 	int (*handle_syscall)(struct kvm_vcpu *vcpu);
 	int (*handle_res_inst)(struct kvm_vcpu *vcpu);
 	int (*handle_break)(struct kvm_vcpu *vcpu);
+	int (*handle_msa_disabled)(struct kvm_vcpu *vcpu);
 	int (*vm_init)(struct kvm *kvm);
 	int (*vcpu_init)(struct kvm_vcpu *vcpu);
 	int (*vcpu_setup)(struct kvm_vcpu *vcpu);
diff --git a/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h b/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h
index fa1f3cfbae8d..d68e685cde60 100644
--- a/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h
@@ -50,7 +50,6 @@
 #define cpu_has_mips32r2	0
 #define cpu_has_mips64r1	0
 #define cpu_has_mips64r2	1
-#define cpu_has_mips_r2_exec_hazard 0
 #define cpu_has_dsp		0
 #define cpu_has_dsp2		0
 #define cpu_has_mipsmt		0
diff --git a/arch/mips/include/asm/octeon/pci-octeon.h b/arch/mips/include/asm/octeon/pci-octeon.h
index 64ba56a02843..1884609741a8 100644
--- a/arch/mips/include/asm/octeon/pci-octeon.h
+++ b/arch/mips/include/asm/octeon/pci-octeon.h
@@ -11,9 +11,6 @@
 
 #include <linux/pci.h>
 
-/* Some PCI cards require delays when accessing config space. */
-#define PCI_CONFIG_SPACE_DELAY 10000
-
 /*
  * The physical memory base mapped by BAR1.  256MB at the end of the
  * first 4GB.
diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S
index 4353d323f017..39d682937d52 100644
--- a/arch/mips/kernel/entry.S
+++ b/arch/mips/kernel/entry.S
@@ -10,6 +10,7 @@
 
 #include <asm/asm.h>
 #include <asm/asmmacro.h>
+#include <asm/compiler.h>
 #include <asm/regdef.h>
 #include <asm/mipsregs.h>
 #include <asm/stackframe.h>
@@ -166,7 +167,7 @@ syscall_exit_work:
  * For C code use the inline version named instruction_hazard().
  */
 LEAF(mips_ihb)
-	.set	mips32r2
+	.set	MIPS_ISA_LEVEL_RAW
 	jr.hb	ra
 	nop
 	END(mips_ihb)
diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
index e6e16a1d4add..0854f17829f3 100644
--- a/arch/mips/kernel/smp-cps.c
+++ b/arch/mips/kernel/smp-cps.c
@@ -88,6 +88,12 @@ static void __init cps_smp_setup(void)
 
 	/* Make core 0 coherent with everything */
 	write_gcr_cl_coherence(0xff);
+
+#ifdef CONFIG_MIPS_MT_FPAFF
+	/* If we have an FPU, enroll ourselves in the FPU-full mask */
+	if (cpu_has_fpu)
+		cpu_set(0, mt_fpu_cpumask);
+#endif /* CONFIG_MIPS_MT_FPAFF */
 }
 
 static void __init cps_prepare_cpus(unsigned int max_cpus)
diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c
index fb3e8dfd1ff6..838d3a6a5b7d 100644
--- a/arch/mips/kvm/emulate.c
+++ b/arch/mips/kvm/emulate.c
@@ -2176,6 +2176,7 @@ enum emulation_result kvm_mips_check_privilege(unsigned long cause,
 		case T_SYSCALL:
 		case T_BREAK:
 		case T_RES_INST:
+		case T_MSADIS:
 			break;
 
 		case T_COP_UNUSABLE:
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 270bbd41769e..39074fb83bad 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -1119,6 +1119,10 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
 		ret = kvm_mips_callbacks->handle_break(vcpu);
 		break;
 
+	case T_MSADIS:
+		ret = kvm_mips_callbacks->handle_msa_disabled(vcpu);
+		break;
+
 	default:
 		kvm_err("Exception Code: %d, not yet handled, @ PC: %p, inst: 0x%08x  BadVaddr: %#lx Status: %#lx\n",
 			exccode, opc, kvm_get_inst(opc, vcpu), badvaddr,
diff --git a/arch/mips/kvm/trap_emul.c b/arch/mips/kvm/trap_emul.c
index fd7257b70e65..4372cc86650c 100644
--- a/arch/mips/kvm/trap_emul.c
+++ b/arch/mips/kvm/trap_emul.c
@@ -330,6 +330,33 @@ static int kvm_trap_emul_handle_break(struct kvm_vcpu *vcpu)
 	return ret;
 }
 
+static int kvm_trap_emul_handle_msa_disabled(struct kvm_vcpu *vcpu)
+{
+	struct kvm_run *run = vcpu->run;
+	uint32_t __user *opc = (uint32_t __user *) vcpu->arch.pc;
+	unsigned long cause = vcpu->arch.host_cp0_cause;
+	enum emulation_result er = EMULATE_DONE;
+	int ret = RESUME_GUEST;
+
+	/* No MSA supported in guest, guest reserved instruction exception */
+	er = kvm_mips_emulate_ri_exc(cause, opc, run, vcpu);
+
+	switch (er) {
+	case EMULATE_DONE:
+		ret = RESUME_GUEST;
+		break;
+
+	case EMULATE_FAIL:
+		run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+		ret = RESUME_HOST;
+		break;
+
+	default:
+		BUG();
+	}
+	return ret;
+}
+
 static int kvm_trap_emul_vm_init(struct kvm *kvm)
 {
 	return 0;
@@ -470,6 +497,7 @@ static struct kvm_mips_callbacks kvm_trap_emul_callbacks = {
 	.handle_syscall = kvm_trap_emul_handle_syscall,
 	.handle_res_inst = kvm_trap_emul_handle_res_inst,
 	.handle_break = kvm_trap_emul_handle_break,
+	.handle_msa_disabled = kvm_trap_emul_handle_msa_disabled,
 
 	.vm_init = kvm_trap_emul_vm_init,
 	.vcpu_init = kvm_trap_emul_vcpu_init,
diff --git a/arch/mips/loongson/loongson-3/irq.c b/arch/mips/loongson/loongson-3/irq.c
index ca1c62af5188..8f5209aff01e 100644
--- a/arch/mips/loongson/loongson-3/irq.c
+++ b/arch/mips/loongson/loongson-3/irq.c
@@ -44,6 +44,7 @@ void mach_irq_dispatch(unsigned int pending)
 
 static struct irqaction cascade_irqaction = {
 	.handler = no_action,
+	.flags = IRQF_NO_SUSPEND,
 	.name = "cascade",
 };
 
diff --git a/arch/mips/mm/hugetlbpage.c b/arch/mips/mm/hugetlbpage.c
index 4ec8ee10d371..06e0f421b41b 100644
--- a/arch/mips/mm/hugetlbpage.c
+++ b/arch/mips/mm/hugetlbpage.c
@@ -68,12 +68,6 @@ int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
 	return 0;
 }
 
-struct page *
-follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
-{
-	return ERR_PTR(-EINVAL);
-}
-
 int pmd_huge(pmd_t pmd)
 {
 	return (pmd_val(pmd) & _PAGE_HUGE) != 0;
@@ -83,15 +77,3 @@ int pud_huge(pud_t pud)
 {
 	return (pud_val(pud) & _PAGE_HUGE) != 0;
 }
-
-struct page *
-follow_huge_pmd(struct mm_struct *mm, unsigned long address,
-		pmd_t *pmd, int write)
-{
-	struct page *page;
-
-	page = pte_page(*(pte_t *)pmd);
-	if (page)
-		page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT);
-	return page;
-}
diff --git a/arch/mips/mti-malta/malta-memory.c b/arch/mips/mti-malta/malta-memory.c
index 8fddd2cdbff7..efe366d618b1 100644
--- a/arch/mips/mti-malta/malta-memory.c
+++ b/arch/mips/mti-malta/malta-memory.c
@@ -53,6 +53,12 @@ fw_memblock_t * __init fw_getmdesc(int eva)
 		pr_warn("memsize not set in YAMON, set to default (32Mb)\n");
 		physical_memsize = 0x02000000;
 	} else {
+		if (memsize > (256 << 20)) { /* memsize should be capped to 256M */
+			pr_warn("Unsupported memsize value (0x%lx) detected! "
+				"Using 0x10000000 (256M) instead\n",
+				memsize);
+			memsize = 256 << 20;
+		}
 		/* If ememsize is set, then set physical_memsize to that */
 		physical_memsize = ememsize ? : memsize;
 	}
diff --git a/arch/mips/netlogic/xlp/ahci-init-xlp2.c b/arch/mips/netlogic/xlp/ahci-init-xlp2.c
index c83dbf3689e2..7b066a44e679 100644
--- a/arch/mips/netlogic/xlp/ahci-init-xlp2.c
+++ b/arch/mips/netlogic/xlp/ahci-init-xlp2.c
@@ -203,6 +203,7 @@ static u8 read_phy_reg(u64 regbase, u32 addr, u32 physel)
 static void config_sata_phy(u64 regbase)
 {
 	u32 port, i, reg;
+	u8 val;
 
 	for (port = 0; port < 2; port++) {
 		for (i = 0, reg = RXCDRCALFOSC0; reg <= CALDUTY; reg++, i++)
@@ -210,6 +211,18 @@ static void config_sata_phy(u64 regbase)
 
 		for (i = 0, reg = RXDPIF; reg <= PPMDRIFTMAX_HI; reg++, i++)
 			write_phy_reg(regbase, reg, port, sata_phy_config2[i]);
+
+		/* Fix for PHY link up failures at lower temperatures */
+		write_phy_reg(regbase, 0x800F, port, 0x1f);
+
+		val = read_phy_reg(regbase, 0x0029, port);
+		write_phy_reg(regbase, 0x0029, port, val | (0x7 << 1));
+
+		val = read_phy_reg(regbase, 0x0056, port);
+		write_phy_reg(regbase, 0x0056, port, val & ~(1 << 3));
+
+		val = read_phy_reg(regbase, 0x0018, port);
+		write_phy_reg(regbase, 0x0018, port, val & ~(0x7 << 0));
 	}
 }
 
diff --git a/arch/mips/pci/pci-octeon.c b/arch/mips/pci/pci-octeon.c
index 59cccd95688b..14d3351227ef 100644
--- a/arch/mips/pci/pci-octeon.c
+++ b/arch/mips/pci/pci-octeon.c
@@ -214,6 +214,8 @@ const char *octeon_get_pci_interrupts(void)
 		return "AAABAAAAAAAAAAAAAAAAAAAAAAAAAAAA";
 	case CVMX_BOARD_TYPE_BBGW_REF:
 		return "AABCD";
+	case CVMX_BOARD_TYPE_CUST_DSR1000N:
+		return "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC";
 	case CVMX_BOARD_TYPE_THUNDER:
 	case CVMX_BOARD_TYPE_EBH3000:
 	default:
@@ -271,9 +273,6 @@ static int octeon_read_config(struct pci_bus *bus, unsigned int devfn,
 	pci_addr.s.func = devfn & 0x7;
 	pci_addr.s.reg = reg;
 
-#if PCI_CONFIG_SPACE_DELAY
-	udelay(PCI_CONFIG_SPACE_DELAY);
-#endif
 	switch (size) {
 	case 4:
 		*val = le32_to_cpu(cvmx_read64_uint32(pci_addr.u64));
@@ -308,9 +307,6 @@ static int octeon_write_config(struct pci_bus *bus, unsigned int devfn,
 	pci_addr.s.func = devfn & 0x7;
 	pci_addr.s.reg = reg;
 
-#if PCI_CONFIG_SPACE_DELAY
-	udelay(PCI_CONFIG_SPACE_DELAY);
-#endif
 	switch (size) {
 	case 4:
 		cvmx_write64_uint32(pci_addr.u64, cpu_to_le32(val));
diff --git a/arch/mips/pci/pcie-octeon.c b/arch/mips/pci/pcie-octeon.c
index 5e36c33e5543..38335af0a7f3 100644
--- a/arch/mips/pci/pcie-octeon.c
+++ b/arch/mips/pci/pcie-octeon.c
@@ -1762,14 +1762,6 @@ static int octeon_pcie_write_config(unsigned int pcie_port, struct pci_bus *bus,
 	default:
 		return PCIBIOS_FUNC_NOT_SUPPORTED;
 	}
-#if PCI_CONFIG_SPACE_DELAY
-	/*
-	 * Delay on writes so that devices have time to come up. Some
-	 * bridges need this to allow time for the secondary busses to
-	 * work
-	 */
-	udelay(PCI_CONFIG_SPACE_DELAY);
-#endif
 	return PCIBIOS_SUCCESSFUL;
 }
 
diff --git a/arch/mips/power/hibernate.S b/arch/mips/power/hibernate.S
index 32a7c828f073..e7567c8a9e79 100644
--- a/arch/mips/power/hibernate.S
+++ b/arch/mips/power/hibernate.S
@@ -30,6 +30,8 @@ LEAF(swsusp_arch_suspend)
 END(swsusp_arch_suspend)
 
 LEAF(swsusp_arch_resume)
+	/* Avoid TLB mismatch during and after kernel resume */
+	jal local_flush_tlb_all
 	PTR_L t0, restore_pblist
 0:
 	PTR_L t1, PBE_ADDRESS(t0)   /* source */
@@ -43,7 +45,6 @@ LEAF(swsusp_arch_resume)
 	bne t1, t3, 1b
 	PTR_L t0, PBE_NEXT(t0)
 	bnez t0, 0b
-	jal local_flush_tlb_all /* Avoid TLB mismatch after kernel resume */
 	PTR_LA t0, saved_regs
 	PTR_L ra, PT_R31(t0)
 	PTR_L sp, PT_R29(t0)
diff --git a/arch/mips/ralink/Kconfig b/arch/mips/ralink/Kconfig
index 77e8a9620e18..d50914d5191e 100644
--- a/arch/mips/ralink/Kconfig
+++ b/arch/mips/ralink/Kconfig
@@ -7,6 +7,11 @@ config CLKEVT_RT3352
 	select CLKSRC_OF
 	select CLKSRC_MMIO
 
+config RALINK_ILL_ACC
+	bool
+	depends on SOC_RT305X
+	default y
+
 choice
 	prompt "Ralink SoC selection"
 	default SOC_RT305X
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
index 40198d50b4c2..8005b79ecbcf 100644
--- a/arch/powerpc/kernel/cacheinfo.c
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -61,12 +61,22 @@ struct cache_type_info {
 };
 
 /* These are used to index the cache_type_info array. */
-#define CACHE_TYPE_UNIFIED     0
-#define CACHE_TYPE_INSTRUCTION 1
-#define CACHE_TYPE_DATA        2
+#define CACHE_TYPE_UNIFIED     0 /* cache-size, cache-block-size, etc. */
+#define CACHE_TYPE_UNIFIED_D   1 /* d-cache-size, d-cache-block-size, etc */
+#define CACHE_TYPE_INSTRUCTION 2
+#define CACHE_TYPE_DATA        3
 
 static const struct cache_type_info cache_type_info[] = {
 	{
+		/* Embedded systems that use cache-size, cache-block-size,
+		 * etc. for the Unified (typically L2) cache. */
+		.name            = "Unified",
+		.size_prop       = "cache-size",
+		.line_size_props = { "cache-line-size",
+				     "cache-block-size", },
+		.nr_sets_prop    = "cache-sets",
+	},
+	{
 		/* PowerPC Processor binding says the [di]-cache-*
 		 * must be equal on unified caches, so just use
 		 * d-cache properties. */
@@ -293,7 +303,8 @@ static struct cache *cache_find_first_sibling(struct cache *cache)
 {
 	struct cache *iter;
 
-	if (cache->type == CACHE_TYPE_UNIFIED)
+	if (cache->type == CACHE_TYPE_UNIFIED ||
+	    cache->type == CACHE_TYPE_UNIFIED_D)
 		return cache;
 
 	list_for_each_entry(iter, &cache_list, list)
@@ -324,16 +335,29 @@ static bool cache_node_is_unified(const struct device_node *np)
 	return of_get_property(np, "cache-unified", NULL);
 }
 
-static struct cache *cache_do_one_devnode_unified(struct device_node *node,
-						  int level)
+/*
+ * Unified caches can have two different sets of tags.  Most embedded
+ * use cache-size, etc. for the unified cache size, but open firmware systems
+ * use d-cache-size, etc.   Check on initialization for which type we have, and
+ * return the appropriate structure type.  Assume it's embedded if it isn't
+ * open firmware.  If it's yet a 3rd type, then there will be missing entries
+ * in /sys/devices/system/cpu/cpu0/cache/index2/, and this code will need
+ * to be extended further.
+ */
+static int cache_is_unified_d(const struct device_node *np)
 {
-	struct cache *cache;
+	return of_get_property(np,
+		cache_type_info[CACHE_TYPE_UNIFIED_D].size_prop, NULL) ?
+		CACHE_TYPE_UNIFIED_D : CACHE_TYPE_UNIFIED;
+}
 
+/*
+ */
+static struct cache *cache_do_one_devnode_unified(struct device_node *node, int level)
+{
 	pr_debug("creating L%d ucache for %s\n", level, node->full_name);
 
-	cache = new_cache(CACHE_TYPE_UNIFIED, level, node);
-
-	return cache;
+	return new_cache(cache_is_unified_d(node), level, node);
 }
 
 static struct cache *cache_do_one_devnode_split(struct device_node *node,
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 6a4a5fcb9730..17e83a043bba 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -704,6 +704,14 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 	return NULL;
 }
 
+struct page *
+follow_huge_pud(struct mm_struct *mm, unsigned long address,
+		pud_t *pud, int write)
+{
+	BUG();
+	return NULL;
+}
+
 static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
 				      unsigned long sz)
 {
diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c
index 2396dda282cd..ead55351b254 100644
--- a/arch/powerpc/perf/callchain.c
+++ b/arch/powerpc/perf/callchain.c
@@ -243,7 +243,7 @@ static void perf_callchain_user_64(struct perf_callchain_entry *entry,
 	sp = regs->gpr[1];
 	perf_callchain_store(entry, next_ip);
 
-	for (;;) {
+	while (entry->nr < PERF_MAX_STACK_DEPTH) {
 		fp = (unsigned long __user *) sp;
 		if (!valid_user_sp(sp, 1) || read_user_stack_64(fp, &next_sp))
 			return;
diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c
index 8a106b4172e0..28e558d3316d 100644
--- a/arch/powerpc/platforms/cell/interrupt.c
+++ b/arch/powerpc/platforms/cell/interrupt.c
@@ -163,7 +163,7 @@ static unsigned int iic_get_irq(void)
 
 void iic_setup_cpu(void)
 {
-	out_be64(&__get_cpu_var(cpu_iic).regs->prio, 0xff);
+	out_be64(&this_cpu_ptr(&cpu_iic)->regs->prio, 0xff);
 }
 
 u8 iic_get_target_id(int cpu)
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index 2b90ff8a93be..59ef76c5f4f4 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -197,7 +197,7 @@ static int tce_build_cell(struct iommu_table *tbl, long index, long npages,
 
 	io_pte = (unsigned long *)tbl->it_base + (index - tbl->it_offset);
 
-	for (i = 0; i < npages; i++, uaddr += tbl->it_page_shift)
+	for (i = 0; i < npages; i++, uaddr += (1 << tbl->it_page_shift))
 		io_pte[i] = base_pte | (__pa(uaddr) & CBE_IOPTE_RPN_Mask);
 
 	mb();
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 3f596706a5b7..86a7256d84e5 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1645,7 +1645,8 @@ static void pnv_ioda_setup_pe_seg(struct pci_controller *hose,
 				region.start += phb->ioda.io_segsize;
 				index++;
 			}
-		} else if (res->flags & IORESOURCE_MEM) {
+		} else if ((res->flags & IORESOURCE_MEM) &&
+			   !pnv_pci_is_mem_pref_64(res->flags)) {
 			region.start = res->start -
 				       hose->mem_offset[0] -
 				       phb->ioda.m32_pci_base;
diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c
index 1c4c5accd220..d3236c9e226b 100644
--- a/arch/s390/kernel/suspend.c
+++ b/arch/s390/kernel/suspend.c
@@ -138,6 +138,8 @@ int pfn_is_nosave(unsigned long pfn)
 {
 	unsigned long nosave_begin_pfn = PFN_DOWN(__pa(&__nosave_begin));
 	unsigned long nosave_end_pfn = PFN_DOWN(__pa(&__nosave_end));
+	unsigned long eshared_pfn = PFN_DOWN(__pa(&_eshared)) - 1;
+	unsigned long stext_pfn = PFN_DOWN(__pa(&_stext));
 
 	/* Always save lowcore pages (LC protection might be enabled). */
 	if (pfn <= LC_PAGES)
@@ -145,6 +147,8 @@ int pfn_is_nosave(unsigned long pfn)
 	if (pfn >= nosave_begin_pfn && pfn < nosave_end_pfn)
 		return 1;
 	/* Skip memory holes and read-only pages (NSS, DCSS, ...). */
+	if (pfn >= stext_pfn && pfn <= eshared_pfn)
+		return ipl_info.type == IPL_TYPE_NSS ? 1 : 0;
 	if (tprot(PFN_PHYS(pfn)))
 		return 1;
 	return 0;
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 29e2e5aa2111..cd6344d334cb 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -16,6 +16,7 @@
 #include <linux/mmu_context.h>
 #include <linux/signal.h>
 #include <linux/slab.h>
+#include <linux/vmalloc.h>
 #include <asm/asm-offsets.h>
 #include <asm/uaccess.h>
 #include "kvm-s390.h"
@@ -784,7 +785,6 @@ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
 
 	if ((!schid && !cr6) || (schid && cr6))
 		return NULL;
-	mutex_lock(&kvm->lock);
 	fi = &kvm->arch.float_int;
 	spin_lock(&fi->lock);
 	inti = NULL;
@@ -812,7 +812,6 @@ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
 	if (list_empty(&fi->list))
 		atomic_set(&fi->active, 0);
 	spin_unlock(&fi->lock);
-	mutex_unlock(&kvm->lock);
 	return inti;
 }
 
@@ -825,7 +824,6 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
 	int sigcpu;
 	int rc = 0;
 
-	mutex_lock(&kvm->lock);
 	fi = &kvm->arch.float_int;
 	spin_lock(&fi->lock);
 	if (fi->irq_count >= KVM_S390_MAX_FLOAT_IRQS) {
@@ -868,7 +866,6 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
 	kvm_s390_vcpu_wakeup(kvm_get_vcpu(kvm, sigcpu));
 unlock_fi:
 	spin_unlock(&fi->lock);
-	mutex_unlock(&kvm->lock);
 	return rc;
 }
 
@@ -930,10 +927,10 @@ int kvm_s390_inject_vm(struct kvm *kvm,
 	return rc;
 }
 
-void kvm_s390_reinject_io_int(struct kvm *kvm,
+int kvm_s390_reinject_io_int(struct kvm *kvm,
 			      struct kvm_s390_interrupt_info *inti)
 {
-	__inject_vm(kvm, inti);
+	return __inject_vm(kvm, inti);
 }
 
 int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
@@ -1029,7 +1026,6 @@ void kvm_s390_clear_float_irqs(struct kvm *kvm)
 	struct kvm_s390_float_interrupt *fi;
 	struct kvm_s390_interrupt_info	*n, *inti = NULL;
 
-	mutex_lock(&kvm->lock);
 	fi = &kvm->arch.float_int;
 	spin_lock(&fi->lock);
 	list_for_each_entry_safe(inti, n, &fi->list, list) {
@@ -1039,66 +1035,68 @@ void kvm_s390_clear_float_irqs(struct kvm *kvm)
 	fi->irq_count = 0;
 	atomic_set(&fi->active, 0);
 	spin_unlock(&fi->lock);
-	mutex_unlock(&kvm->lock);
 }
 
-static inline int copy_irq_to_user(struct kvm_s390_interrupt_info *inti,
-				   u8 *addr)
+static void inti_to_irq(struct kvm_s390_interrupt_info *inti,
+		       struct kvm_s390_irq *irq)
 {
-	struct kvm_s390_irq __user *uptr = (struct kvm_s390_irq __user *) addr;
-	struct kvm_s390_irq irq = {0};
-
-	irq.type = inti->type;
+	irq->type = inti->type;
 	switch (inti->type) {
 	case KVM_S390_INT_PFAULT_INIT:
 	case KVM_S390_INT_PFAULT_DONE:
 	case KVM_S390_INT_VIRTIO:
 	case KVM_S390_INT_SERVICE:
-		irq.u.ext = inti->ext;
+		irq->u.ext = inti->ext;
 		break;
 	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
-		irq.u.io = inti->io;
+		irq->u.io = inti->io;
 		break;
 	case KVM_S390_MCHK:
-		irq.u.mchk = inti->mchk;
+		irq->u.mchk = inti->mchk;
 		break;
-	default:
-		return -EINVAL;
 	}
-
-	if (copy_to_user(uptr, &irq, sizeof(irq)))
-		return -EFAULT;
-
-	return 0;
 }
 
-static int get_all_floating_irqs(struct kvm *kvm, __u8 *buf, __u64 len)
+static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len)
 {
 	struct kvm_s390_interrupt_info *inti;
 	struct kvm_s390_float_interrupt *fi;
+	struct kvm_s390_irq *buf;
+	int max_irqs;
 	int ret = 0;
 	int n = 0;
 
-	mutex_lock(&kvm->lock);
+	if (len > KVM_S390_FLIC_MAX_BUFFER || len == 0)
+		return -EINVAL;
+
+	/*
+	 * We are already using -ENOMEM to signal
+	 * userspace it may retry with a bigger buffer,
+	 * so we need to use something else for this case
+	 */
+	buf = vzalloc(len);
+	if (!buf)
+		return -ENOBUFS;
+
+	max_irqs = len / sizeof(struct kvm_s390_irq);
+
 	fi = &kvm->arch.float_int;
 	spin_lock(&fi->lock);
-
 	list_for_each_entry(inti, &fi->list, list) {
-		if (len < sizeof(struct kvm_s390_irq)) {
+		if (n == max_irqs) {
 			/* signal userspace to try again */
 			ret = -ENOMEM;
 			break;
 		}
-		ret = copy_irq_to_user(inti, buf);
-		if (ret)
-			break;
-		buf += sizeof(struct kvm_s390_irq);
-		len -= sizeof(struct kvm_s390_irq);
+		inti_to_irq(inti, &buf[n]);
 		n++;
 	}
-
 	spin_unlock(&fi->lock);
-	mutex_unlock(&kvm->lock);
+	if (!ret && n > 0) {
+		if (copy_to_user(usrbuf, buf, sizeof(struct kvm_s390_irq) * n))
+			ret = -EFAULT;
+	}
+	vfree(buf);
 
 	return ret < 0 ? ret : n;
 }
@@ -1109,7 +1107,7 @@ static int flic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 
 	switch (attr->group) {
 	case KVM_DEV_FLIC_GET_ALL_IRQS:
-		r = get_all_floating_irqs(dev->kvm, (u8 *) attr->addr,
+		r = get_all_floating_irqs(dev->kvm, (u8 __user *) attr->addr,
 					  attr->attr);
 		break;
 	default:
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 244d02303182..4c173474ecfe 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -148,8 +148,8 @@ int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
 int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
 struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
 						    u64 cr6, u64 schid);
-void kvm_s390_reinject_io_int(struct kvm *kvm,
-			      struct kvm_s390_interrupt_info *inti);
+int kvm_s390_reinject_io_int(struct kvm *kvm,
+			     struct kvm_s390_interrupt_info *inti);
 int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked);
 
 /* implemented in priv.c */
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 9c565b6b4ccb..a30102c4d707 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -228,18 +228,19 @@ static int handle_tpi(struct kvm_vcpu *vcpu)
 	struct kvm_s390_interrupt_info *inti;
 	unsigned long len;
 	u32 tpi_data[3];
-	int cc, rc;
+	int rc;
 	u64 addr;
 
-	rc = 0;
 	addr = kvm_s390_get_base_disp_s(vcpu);
 	if (addr & 3)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-	cc = 0;
+
 	inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->arch.sie_block->gcr[6], 0);
-	if (!inti)
-		goto no_interrupt;
-	cc = 1;
+	if (!inti) {
+		kvm_s390_set_psw_cc(vcpu, 0);
+		return 0;
+	}
+
 	tpi_data[0] = inti->io.subchannel_id << 16 | inti->io.subchannel_nr;
 	tpi_data[1] = inti->io.io_int_parm;
 	tpi_data[2] = inti->io.io_int_word;
@@ -250,30 +251,38 @@ static int handle_tpi(struct kvm_vcpu *vcpu)
 		 */
 		len = sizeof(tpi_data) - 4;
 		rc = write_guest(vcpu, addr, &tpi_data, len);
-		if (rc)
-			return kvm_s390_inject_prog_cond(vcpu, rc);
+		if (rc) {
+			rc = kvm_s390_inject_prog_cond(vcpu, rc);
+			goto reinject_interrupt;
+		}
 	} else {
 		/*
 		 * Store the three-word I/O interruption code into
 		 * the appropriate lowcore area.
 		 */
 		len = sizeof(tpi_data);
-		if (write_guest_lc(vcpu, __LC_SUBCHANNEL_ID, &tpi_data, len))
+		if (write_guest_lc(vcpu, __LC_SUBCHANNEL_ID, &tpi_data, len)) {
+			/* failed writes to the low core are not recoverable */
 			rc = -EFAULT;
+			goto reinject_interrupt;
+		}
 	}
+
+	/* irq was successfully handed to the guest */
+	kfree(inti);
+	kvm_s390_set_psw_cc(vcpu, 1);
+	return 0;
+reinject_interrupt:
 	/*
 	 * If we encounter a problem storing the interruption code, the
 	 * instruction is suppressed from the guest's view: reinject the
 	 * interrupt.
 	 */
-	if (!rc)
+	if (kvm_s390_reinject_io_int(vcpu->kvm, inti)) {
 		kfree(inti);
-	else
-		kvm_s390_reinject_io_int(vcpu->kvm, inti);
-no_interrupt:
-	/* Set condition code and we're done. */
-	if (!rc)
-		kvm_s390_set_psw_cc(vcpu, cc);
+		rc = -EFAULT;
+	}
+	/* don't set the cc, a pgm irq was injected or we drop to user space */
 	return rc ? -EFAULT : 0;
 }
 
@@ -461,6 +470,7 @@ static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem)
 	for (n = mem->count - 1; n > 0 ; n--)
 		memcpy(&mem->vm[n], &mem->vm[n - 1], sizeof(mem->vm[0]));
 
+	memset(&mem->vm[0], 0, sizeof(mem->vm[0]));
 	mem->vm[0].cpus_total = cpus;
 	mem->vm[0].cpus_configured = cpus;
 	mem->vm[0].cpus_standby = 0;
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index 3c80d2e38f03..210ffede0153 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -192,12 +192,6 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
 	return 0;
 }
 
-struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
-			      int write)
-{
-	return ERR_PTR(-EINVAL);
-}
-
 int pmd_huge(pmd_t pmd)
 {
 	if (!MACHINE_HAS_HPAGE)
@@ -210,17 +204,3 @@ int pud_huge(pud_t pud)
 {
 	return 0;
 }
-
-struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
-			     pmd_t *pmdp, int write)
-{
-	struct page *page;
-
-	if (!MACHINE_HAS_HPAGE)
-		return NULL;
-
-	page = pmd_page(*pmdp);
-	if (page)
-		page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT);
-	return page;
-}
diff --git a/arch/sh/mm/hugetlbpage.c b/arch/sh/mm/hugetlbpage.c
index d7762349ea48..534bc978af8a 100644
--- a/arch/sh/mm/hugetlbpage.c
+++ b/arch/sh/mm/hugetlbpage.c
@@ -67,12 +67,6 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
 	return 0;
 }
 
-struct page *follow_huge_addr(struct mm_struct *mm,
-			      unsigned long address, int write)
-{
-	return ERR_PTR(-EINVAL);
-}
-
 int pmd_huge(pmd_t pmd)
 {
 	return 0;
@@ -82,9 +76,3 @@ int pud_huge(pud_t pud)
 {
 	return 0;
 }
-
-struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
-			     pmd_t *pmd, int write)
-{
-	return NULL;
-}
diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
index d329537739c6..4242eab12e10 100644
--- a/arch/sparc/mm/hugetlbpage.c
+++ b/arch/sparc/mm/hugetlbpage.c
@@ -215,12 +215,6 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
 	return entry;
 }
 
-struct page *follow_huge_addr(struct mm_struct *mm,
-			      unsigned long address, int write)
-{
-	return ERR_PTR(-EINVAL);
-}
-
 int pmd_huge(pmd_t pmd)
 {
 	return 0;
@@ -230,9 +224,3 @@ int pud_huge(pud_t pud)
 {
 	return 0;
 }
-
-struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
-			     pmd_t *pmd, int write)
-{
-	return NULL;
-}
diff --git a/arch/tile/mm/hugetlbpage.c b/arch/tile/mm/hugetlbpage.c
index e514899e1100..8a00c7b7b862 100644
--- a/arch/tile/mm/hugetlbpage.c
+++ b/arch/tile/mm/hugetlbpage.c
@@ -150,12 +150,6 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
 	return NULL;
 }
 
-struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
-			      int write)
-{
-	return ERR_PTR(-EINVAL);
-}
-
 int pmd_huge(pmd_t pmd)
 {
 	return !!(pmd_val(pmd) & _PAGE_HUGE_PAGE);
@@ -166,28 +160,6 @@ int pud_huge(pud_t pud)
 	return !!(pud_val(pud) & _PAGE_HUGE_PAGE);
 }
 
-struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
-			     pmd_t *pmd, int write)
-{
-	struct page *page;
-
-	page = pte_page(*(pte_t *)pmd);
-	if (page)
-		page += ((address & ~PMD_MASK) >> PAGE_SHIFT);
-	return page;
-}
-
-struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address,
-			     pud_t *pud, int write)
-{
-	struct page *page;
-
-	page = pte_page(*(pte_t *)pud);
-	if (page)
-		page += ((address & ~PUD_MASK) >> PAGE_SHIFT);
-	return page;
-}
-
 int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
 {
 	return 0;
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index a1410db38a1a..653dfa7662e1 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -30,6 +30,14 @@ static inline void __mwait(unsigned long eax, unsigned long ecx)
 		     :: "a" (eax), "c" (ecx));
 }
 
+static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
+{
+	trace_hardirqs_on();
+	/* "mwait %eax, %ecx;" */
+	asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
+		     :: "a" (eax), "c" (ecx));
+}
+
 /*
  * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
  * which can obviate IPI to trigger checking of need_resched.
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index e127ddaa2d5a..6ad8a6396b75 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -24,6 +24,7 @@
 #include <asm/syscalls.h>
 #include <asm/idle.h>
 #include <asm/uaccess.h>
+#include <asm/mwait.h>
 #include <asm/i387.h>
 #include <asm/fpu-internal.h>
 #include <asm/debugreg.h>
@@ -398,6 +399,53 @@ static void amd_e400_idle(void)
 		default_idle();
 }
 
+/*
+ * Intel Core2 and older machines prefer MWAIT over HALT for C1.
+ * We can't rely on cpuidle installing MWAIT, because it will not load
+ * on systems that support only C1 -- so the boot default must be MWAIT.
+ *
+ * Some AMD machines are the opposite, they depend on using HALT.
+ *
+ * So for default C1, which is used during boot until cpuidle loads,
+ * use MWAIT-C1 on Intel HW that has it, else use HALT.
+ */
+static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c)
+{
+	if (c->x86_vendor != X86_VENDOR_INTEL)
+		return 0;
+
+	if (!cpu_has(c, X86_FEATURE_MWAIT))
+		return 0;
+
+	return 1;
+}
+
+/*
+ * MONITOR/MWAIT with no hints, used for default default C1 state.
+ * This invokes MWAIT with interrutps enabled and no flags,
+ * which is backwards compatible with the original MWAIT implementation.
+ */
+
+static void mwait_idle(void)
+{
+	if (!current_set_polling_and_test()) {
+		if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) {
+			smp_mb(); /* quirk */
+			clflush((void *)&current_thread_info()->flags);
+			smp_mb(); /* quirk */
+		}
+
+		__monitor((void *)&current_thread_info()->flags, 0, 0);
+		if (!need_resched())
+			__sti_mwait(0, 0);
+		else
+			local_irq_enable();
+	} else {
+		local_irq_enable();
+	}
+	__current_clr_polling();
+}
+
 void select_idle_routine(const struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_SMP
@@ -411,6 +459,9 @@ void select_idle_routine(const struct cpuinfo_x86 *c)
 		/* E400: APIC timer interrupt does not wake up CPU from C1e */
 		pr_info("using AMD E400 aware idle routine\n");
 		x86_idle = amd_e400_idle;
+	} else if (prefer_mwait_c1_over_halt(c)) {
+		pr_info("using mwait in idle threads\n");
+		x86_idle = mwait_idle;
 	} else
 		x86_idle = default_idle;
 }
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ed7039465f16..47843b04d60f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2381,8 +2381,7 @@ static __init void nested_vmx_setup_ctls_msrs(void)
 
 	if (enable_ept) {
 		/* nested EPT: emulate EPT also to L1 */
-		nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT |
-			SECONDARY_EXEC_UNRESTRICTED_GUEST;
+		nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT;
 		nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
 			 VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT |
 			 VMX_EPT_INVEPT_BIT;
@@ -2396,6 +2395,10 @@ static __init void nested_vmx_setup_ctls_msrs(void)
 	} else
 		nested_vmx_ept_caps = 0;
 
+	if (enable_unrestricted_guest)
+		nested_vmx_secondary_ctls_high |=
+			SECONDARY_EXEC_UNRESTRICTED_GUEST;
+
 	/* miscellaneous data */
 	rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high);
 	nested_vmx_misc_low &= VMX_MISC_SAVE_EFER_LMA;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a38dd816015b..5369ec6a8094 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5779,7 +5779,6 @@ int kvm_arch_init(void *opaque)
 	kvm_set_mmio_spte_mask();
 
 	kvm_x86_ops = ops;
-	kvm_init_msr_list();
 
 	kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
 			PT_DIRTY_MASK, PT64_NX_MASK, 0);
@@ -7210,7 +7209,14 @@ void kvm_arch_hardware_disable(void)
 
 int kvm_arch_hardware_setup(void)
 {
-	return kvm_x86_ops->hardware_setup();
+	int r;
+
+	r = kvm_x86_ops->hardware_setup();
+	if (r != 0)
+		return r;
+
+	kvm_init_msr_list();
+	return 0;
 }
 
 void kvm_arch_hardware_unsetup(void)
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 006cc914994b..9161f764121e 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -52,20 +52,8 @@ int pud_huge(pud_t pud)
 	return 0;
 }
 
-struct page *
-follow_huge_pmd(struct mm_struct *mm, unsigned long address,
-		pmd_t *pmd, int write)
-{
-	return NULL;
-}
 #else
 
-struct page *
-follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
-{
-	return ERR_PTR(-EINVAL);
-}
-
 /*
  * pmd_huge() returns 1 if @pmd is hugetlb related entry, that is normal
  * hugetlb entry or non-present (migration or hwpoisoned) hugetlb entry.
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 81f57e8c8f1b..73d328f480e5 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -406,6 +406,36 @@ source "drivers/pcmcia/Kconfig"
 
 source "drivers/pci/hotplug/Kconfig"
 
+config XTFPGA_LCD
+	bool "Enable XTFPGA LCD driver"
+	depends on XTENSA_PLATFORM_XTFPGA
+	default n
+	help
+	  There's a 2x16 LCD on most of XTFPGA boards, kernel may output
+	  progress messages there during bootup/shutdown. It may be useful
+	  during board bringup.
+
+	  If unsure, say N.
+
+config XTFPGA_LCD_BASE_ADDR
+	hex "XTFPGA LCD base address"
+	depends on XTFPGA_LCD
+	default "0x0d0c0000"
+	help
+	  Base address of the LCD controller inside KIO region.
+	  Different boards from XTFPGA family have LCD controller at different
+	  addresses. Please consult prototyping user guide for your board for
+	  the correct address. Wrong address here may lead to hardware lockup.
+
+config XTFPGA_LCD_8BIT_ACCESS
+	bool "Use 8-bit access to XTFPGA LCD"
+	depends on XTFPGA_LCD
+	default n
+	help
+	  LCD may be connected with 4- or 8-bit interface, 8-bit access may
+	  only be used with 8-bit interface. Please consult prototyping user
+	  guide for your board for the correct interface width.
+
 endmenu
 
 menu "Executable file formats"
diff --git a/arch/xtensa/include/uapi/asm/unistd.h b/arch/xtensa/include/uapi/asm/unistd.h
index db5bb72e2f4e..62d84657c60b 100644
--- a/arch/xtensa/include/uapi/asm/unistd.h
+++ b/arch/xtensa/include/uapi/asm/unistd.h
@@ -715,7 +715,7 @@ __SYSCALL(323, sys_process_vm_writev, 6)
 __SYSCALL(324, sys_name_to_handle_at, 5)
 #define __NR_open_by_handle_at			325
 __SYSCALL(325, sys_open_by_handle_at, 3)
-#define __NR_sync_file_range			326
+#define __NR_sync_file_range2			326
 __SYSCALL(326, sys_sync_file_range2, 6)
 #define __NR_perf_event_open			327
 __SYSCALL(327, sys_perf_event_open, 5)
diff --git a/arch/xtensa/platforms/iss/network.c b/arch/xtensa/platforms/iss/network.c
index d05f8feeb8d7..17b1ef3232e4 100644
--- a/arch/xtensa/platforms/iss/network.c
+++ b/arch/xtensa/platforms/iss/network.c
@@ -349,8 +349,8 @@ static void iss_net_timer(unsigned long priv)
 {
 	struct iss_net_private *lp = (struct iss_net_private *)priv;
 
-	spin_lock(&lp->lock);
 	iss_net_poll();
+	spin_lock(&lp->lock);
 	mod_timer(&lp->timer, jiffies + lp->timer_val);
 	spin_unlock(&lp->lock);
 }
@@ -361,7 +361,7 @@ static int iss_net_open(struct net_device *dev)
 	struct iss_net_private *lp = netdev_priv(dev);
 	int err;
 
-	spin_lock(&lp->lock);
+	spin_lock_bh(&lp->lock);
 
 	err = lp->tp.open(lp);
 	if (err < 0)
@@ -376,9 +376,11 @@ static int iss_net_open(struct net_device *dev)
 	while ((err = iss_net_rx(dev)) > 0)
 		;
 
-	spin_lock(&opened_lock);
+	spin_unlock_bh(&lp->lock);
+	spin_lock_bh(&opened_lock);
 	list_add(&lp->opened_list, &opened);
-	spin_unlock(&opened_lock);
+	spin_unlock_bh(&opened_lock);
+	spin_lock_bh(&lp->lock);
 
 	init_timer(&lp->timer);
 	lp->timer_val = ISS_NET_TIMER_VALUE;
@@ -387,7 +389,7 @@ static int iss_net_open(struct net_device *dev)
 	mod_timer(&lp->timer, jiffies + lp->timer_val);
 
 out:
-	spin_unlock(&lp->lock);
+	spin_unlock_bh(&lp->lock);
 	return err;
 }
 
@@ -395,7 +397,7 @@ static int iss_net_close(struct net_device *dev)
 {
 	struct iss_net_private *lp = netdev_priv(dev);
 	netif_stop_queue(dev);
-	spin_lock(&lp->lock);
+	spin_lock_bh(&lp->lock);
 
 	spin_lock(&opened_lock);
 	list_del(&opened);
@@ -405,18 +407,17 @@ static int iss_net_close(struct net_device *dev)
 
 	lp->tp.close(lp);
 
-	spin_unlock(&lp->lock);
+	spin_unlock_bh(&lp->lock);
 	return 0;
 }
 
 static int iss_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct iss_net_private *lp = netdev_priv(dev);
-	unsigned long flags;
 	int len;
 
 	netif_stop_queue(dev);
-	spin_lock_irqsave(&lp->lock, flags);
+	spin_lock_bh(&lp->lock);
 
 	len = lp->tp.write(lp, &skb);
 
@@ -438,7 +439,7 @@ static int iss_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		pr_err("%s: %s failed(%d)\n", dev->name, __func__, len);
 	}
 
-	spin_unlock_irqrestore(&lp->lock, flags);
+	spin_unlock_bh(&lp->lock);
 
 	dev_kfree_skb(skb);
 	return NETDEV_TX_OK;
@@ -466,9 +467,9 @@ static int iss_net_set_mac(struct net_device *dev, void *addr)
 
 	if (!is_valid_ether_addr(hwaddr->sa_data))
 		return -EADDRNOTAVAIL;
-	spin_lock(&lp->lock);
+	spin_lock_bh(&lp->lock);
 	memcpy(dev->dev_addr, hwaddr->sa_data, ETH_ALEN);
-	spin_unlock(&lp->lock);
+	spin_unlock_bh(&lp->lock);
 	return 0;
 }
 
@@ -520,11 +521,11 @@ static int iss_net_configure(int index, char *init)
 	*lp = (struct iss_net_private) {
 		.device_list		= LIST_HEAD_INIT(lp->device_list),
 		.opened_list		= LIST_HEAD_INIT(lp->opened_list),
-		.lock			= __SPIN_LOCK_UNLOCKED(lp.lock),
 		.dev			= dev,
 		.index			= index,
-		};
+	};
 
+	spin_lock_init(&lp->lock);
 	/*
 	 * If this name ends up conflicting with an existing registered
 	 * netdevice, that is OK, register_netdev{,ice}() will notice this
diff --git a/arch/xtensa/platforms/xtfpga/Makefile b/arch/xtensa/platforms/xtfpga/Makefile
index b9ae206340cd..7839d38b2337 100644
--- a/arch/xtensa/platforms/xtfpga/Makefile
+++ b/arch/xtensa/platforms/xtfpga/Makefile
@@ -6,4 +6,5 @@
 #
 # Note 2! The CFLAGS definitions are in the main makefile...
 
-obj-y			= setup.o lcd.o
+obj-y			+= setup.o
+obj-$(CONFIG_XTFPGA_LCD) += lcd.o
diff --git a/arch/xtensa/platforms/xtfpga/include/platform/hardware.h b/arch/xtensa/platforms/xtfpga/include/platform/hardware.h
index aeb316b7ff88..e8cc86fbba09 100644
--- a/arch/xtensa/platforms/xtfpga/include/platform/hardware.h
+++ b/arch/xtensa/platforms/xtfpga/include/platform/hardware.h
@@ -40,9 +40,6 @@
 
 /* UART */
 #define DUART16552_PADDR	(XCHAL_KIO_PADDR + 0x0D050020)
-/* LCD instruction and data addresses. */
-#define LCD_INSTR_ADDR		((char *)IOADDR(0x0D040000))
-#define LCD_DATA_ADDR		((char *)IOADDR(0x0D040004))
 
 /* Misc. */
 #define XTFPGA_FPGAREGS_VADDR	IOADDR(0x0D020000)
diff --git a/arch/xtensa/platforms/xtfpga/include/platform/lcd.h b/arch/xtensa/platforms/xtfpga/include/platform/lcd.h
index 0e435645af5a..4c8541ed1139 100644
--- a/arch/xtensa/platforms/xtfpga/include/platform/lcd.h
+++ b/arch/xtensa/platforms/xtfpga/include/platform/lcd.h
@@ -11,10 +11,25 @@
 #ifndef __XTENSA_XTAVNET_LCD_H
 #define __XTENSA_XTAVNET_LCD_H
 
+#ifdef CONFIG_XTFPGA_LCD
 /* Display string STR at position POS on the LCD. */
 void lcd_disp_at_pos(char *str, unsigned char pos);
 
 /* Shift the contents of the LCD display left or right. */
 void lcd_shiftleft(void);
 void lcd_shiftright(void);
+#else
+static inline void lcd_disp_at_pos(char *str, unsigned char pos)
+{
+}
+
+static inline void lcd_shiftleft(void)
+{
+}
+
+static inline void lcd_shiftright(void)
+{
+}
+#endif
+
 #endif
diff --git a/arch/xtensa/platforms/xtfpga/lcd.c b/arch/xtensa/platforms/xtfpga/lcd.c
index 2872301598df..4dc0c1b43f4b 100644
--- a/arch/xtensa/platforms/xtfpga/lcd.c
+++ b/arch/xtensa/platforms/xtfpga/lcd.c
@@ -1,50 +1,63 @@
 /*
- * Driver for the LCD display on the Tensilica LX60 Board.
+ * Driver for the LCD display on the Tensilica XTFPGA board family.
+ * http://www.mytechcorp.com/cfdata/productFile/File1/MOC-16216B-B-A0A04.pdf
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
  * Copyright (C) 2001, 2006 Tensilica Inc.
+ * Copyright (C) 2015 Cadence Design Systems Inc.
  */
 
-/*
- *
- * FIXME: this code is from the examples from the LX60 user guide.
- *
- * The lcd_pause function does busy waiting, which is probably not
- * great. Maybe the code could be changed to use kernel timers, or
- * change the hardware to not need to wait.
- */
-
+#include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/io.h>
 
 #include <platform/hardware.h>
 #include <platform/lcd.h>
-#include <linux/delay.h>
 
-#define LCD_PAUSE_ITERATIONS	4000
+/* LCD instruction and data addresses. */
+#define LCD_INSTR_ADDR		((char *)IOADDR(CONFIG_XTFPGA_LCD_BASE_ADDR))
+#define LCD_DATA_ADDR		(LCD_INSTR_ADDR + 4)
+
 #define LCD_CLEAR		0x1
 #define LCD_DISPLAY_ON		0xc
 
 /* 8bit and 2 lines display */
 #define LCD_DISPLAY_MODE8BIT	0x38
+#define LCD_DISPLAY_MODE4BIT	0x28
 #define LCD_DISPLAY_POS		0x80
 #define LCD_SHIFT_LEFT		0x18
 #define LCD_SHIFT_RIGHT		0x1c
 
+static void lcd_put_byte(u8 *addr, u8 data)
+{
+#ifdef CONFIG_XTFPGA_LCD_8BIT_ACCESS
+	ACCESS_ONCE(*addr) = data;
+#else
+	ACCESS_ONCE(*addr) = data & 0xf0;
+	ACCESS_ONCE(*addr) = (data << 4) & 0xf0;
+#endif
+}
+
 static int __init lcd_init(void)
 {
-	*LCD_INSTR_ADDR = LCD_DISPLAY_MODE8BIT;
+	ACCESS_ONCE(*LCD_INSTR_ADDR) = LCD_DISPLAY_MODE8BIT;
 	mdelay(5);
-	*LCD_INSTR_ADDR = LCD_DISPLAY_MODE8BIT;
+	ACCESS_ONCE(*LCD_INSTR_ADDR) = LCD_DISPLAY_MODE8BIT;
 	udelay(200);
-	*LCD_INSTR_ADDR = LCD_DISPLAY_MODE8BIT;
+	ACCESS_ONCE(*LCD_INSTR_ADDR) = LCD_DISPLAY_MODE8BIT;
+	udelay(50);
+#ifndef CONFIG_XTFPGA_LCD_8BIT_ACCESS
+	ACCESS_ONCE(*LCD_INSTR_ADDR) = LCD_DISPLAY_MODE4BIT;
+	udelay(50);
+	lcd_put_byte(LCD_INSTR_ADDR, LCD_DISPLAY_MODE4BIT);
 	udelay(50);
-	*LCD_INSTR_ADDR = LCD_DISPLAY_ON;
+#endif
+	lcd_put_byte(LCD_INSTR_ADDR, LCD_DISPLAY_ON);
 	udelay(50);
-	*LCD_INSTR_ADDR = LCD_CLEAR;
+	lcd_put_byte(LCD_INSTR_ADDR, LCD_CLEAR);
 	mdelay(10);
 	lcd_disp_at_pos("XTENSA LINUX", 0);
 	return 0;
@@ -52,10 +65,10 @@ static int __init lcd_init(void)
 
 void lcd_disp_at_pos(char *str, unsigned char pos)
 {
-	*LCD_INSTR_ADDR = LCD_DISPLAY_POS | pos;
+	lcd_put_byte(LCD_INSTR_ADDR, LCD_DISPLAY_POS | pos);
 	udelay(100);
 	while (*str != 0) {
-		*LCD_DATA_ADDR = *str;
+		lcd_put_byte(LCD_DATA_ADDR, *str);
 		udelay(200);
 		str++;
 	}
@@ -63,13 +76,13 @@ void lcd_disp_at_pos(char *str, unsigned char pos)
 
 void lcd_shiftleft(void)
 {
-	*LCD_INSTR_ADDR = LCD_SHIFT_LEFT;
+	lcd_put_byte(LCD_INSTR_ADDR, LCD_SHIFT_LEFT);
 	udelay(50);
 }
 
 void lcd_shiftright(void)
 {
-	*LCD_INSTR_ADDR = LCD_SHIFT_RIGHT;
+	lcd_put_byte(LCD_INSTR_ADDR, LCD_SHIFT_RIGHT);
 	udelay(50);
 }