diff options
Diffstat (limited to 'arch')
223 files changed, 7049 insertions, 4702 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index a65eafb24997..ecd8c3f5ea1b 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -71,6 +71,12 @@ config JUMP_LABEL ( On 32-bit x86, the necessary options added to the compiler flags may increase the size of the kernel slightly. ) +config STATIC_KEYS_SELFTEST + bool "Static key selftest" + depends on JUMP_LABEL + help + Boot time self-test of the branch patching code. + config OPTPROBES def_bool y depends on KPROBES && HAVE_OPTPROBES diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 19f4cc634b0e..32909b4ef959 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -35,6 +35,7 @@ config ARM select HAVE_ARCH_KGDB select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT) select HAVE_ARCH_TRACEHOOK + select HAVE_ARM_SMCCC if CPU_V7 select HAVE_BPF_JIT select HAVE_CC_STACKPROTECTOR select HAVE_CONTEXT_TRACKING @@ -1409,8 +1410,7 @@ config BIG_LITTLE config BL_SWITCHER bool "big.LITTLE switcher support" - depends on BIG_LITTLE && MCPM && HOTPLUG_CPU - select ARM_CPU_SUSPEND + depends on BIG_LITTLE && MCPM && HOTPLUG_CPU && ARM_GIC select CPU_PM help The big.LITTLE "switcher" provides the core functionality to @@ -1465,7 +1465,8 @@ config HOTPLUG_CPU config ARM_PSCI bool "Support for the ARM Power State Coordination Interface (PSCI)" - depends on CPU_V7 + depends on HAVE_ARM_SMCCC + select ARM_PSCI_FW help Say Y here if you want Linux to communicate with system firmware implementing the PSCI specification for CPU-centric power @@ -2091,7 +2092,8 @@ config ARCH_SUSPEND_POSSIBLE def_bool y config ARM_CPU_SUSPEND - def_bool PM_SLEEP + def_bool PM_SLEEP || BL_SWITCHER || ARM_PSCI_FW + depends on ARCH_SUSPEND_POSSIBLE config ARCH_HIBERNATION_POSSIBLE bool diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index 2d46862e7bef..5797815727fe 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -488,7 +488,6 @@ int set_memory_x(unsigned long addr, int numpages); int set_memory_nx(unsigned long addr, int numpages); #ifdef CONFIG_DEBUG_RODATA -void mark_rodata_ro(void); void set_kernel_text_rw(void); void set_kernel_text_ro(void); #else diff --git a/arch/arm/include/asm/jump_label.h b/arch/arm/include/asm/jump_label.h index 5f337dc5c108..34f7b6980d21 100644 --- a/arch/arm/include/asm/jump_label.h +++ b/arch/arm/include/asm/jump_label.h @@ -4,23 +4,32 @@ #ifndef __ASSEMBLY__ #include <linux/types.h> +#include <asm/unified.h> #define JUMP_LABEL_NOP_SIZE 4 -#ifdef CONFIG_THUMB2_KERNEL -#define JUMP_LABEL_NOP "nop.w" -#else -#define JUMP_LABEL_NOP "nop" -#endif +static __always_inline bool arch_static_branch(struct static_key *key, bool branch) +{ + asm_volatile_goto("1:\n\t" + WASM(nop) "\n\t" + ".pushsection __jump_table, \"aw\"\n\t" + ".word 1b, %l[l_yes], %c0\n\t" + ".popsection\n\t" + : : "i" (&((char *)key)[branch]) : : l_yes); + + return false; +l_yes: + return true; +} -static __always_inline bool arch_static_branch(struct static_key *key) +static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { asm_volatile_goto("1:\n\t" - JUMP_LABEL_NOP "\n\t" + WASM(b) " %l[l_yes]\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".word 1b, %l[l_yes], %c0\n\t" ".popsection\n\t" - : : "i" (key) : : l_yes); + : : "i" (&((char *)key)[branch]) : : l_yes); return false; l_yes: diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index d995821f1698..cba371cefc68 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -19,6 +19,7 @@ #ifndef __ARM_KVM_ARM_H__ #define __ARM_KVM_ARM_H__ +#include <linux/const.h> #include <linux/types.h> /* Hyp Configuration Register (HCR) bits */ @@ -132,10 +133,9 @@ * space. */ #define KVM_PHYS_SHIFT (40) -#define KVM_PHYS_SIZE (1ULL << KVM_PHYS_SHIFT) -#define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1ULL) -#define PTRS_PER_S2_PGD (1ULL << (KVM_PHYS_SHIFT - 30)) -#define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t)) +#define KVM_PHYS_SIZE (_AC(1, ULL) << KVM_PHYS_SHIFT) +#define KVM_PHYS_MASK (KVM_PHYS_SIZE - _AC(1, ULL)) +#define PTRS_PER_S2_PGD (_AC(1, ULL) << (KVM_PHYS_SHIFT - 30)) /* Virtualization Translation Control Register (VTCR) bits */ #define VTCR_SH0 (3 << 12) @@ -162,17 +162,17 @@ #define VTTBR_X (5 - KVM_T0SZ) #endif #define VTTBR_BADDR_SHIFT (VTTBR_X - 1) -#define VTTBR_BADDR_MASK (((1LLU << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) -#define VTTBR_VMID_SHIFT (48LLU) -#define VTTBR_VMID_MASK (0xffLLU << VTTBR_VMID_SHIFT) +#define VTTBR_BADDR_MASK (((_AC(1, ULL) << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) +#define VTTBR_VMID_SHIFT _AC(48, ULL) +#define VTTBR_VMID_MASK (_AC(0xff, ULL) << VTTBR_VMID_SHIFT) /* Hyp Syndrome Register (HSR) bits */ #define HSR_EC_SHIFT (26) -#define HSR_EC (0x3fU << HSR_EC_SHIFT) -#define HSR_IL (1U << 25) +#define HSR_EC (_AC(0x3f, UL) << HSR_EC_SHIFT) +#define HSR_IL (_AC(1, UL) << 25) #define HSR_ISS (HSR_IL - 1) #define HSR_ISV_SHIFT (24) -#define HSR_ISV (1U << HSR_ISV_SHIFT) +#define HSR_ISV (_AC(1, UL) << HSR_ISV_SHIFT) #define HSR_SRT_SHIFT (16) #define HSR_SRT_MASK (0xf << HSR_SRT_SHIFT) #define HSR_FSC (0x3f) @@ -180,9 +180,9 @@ #define HSR_SSE (1 << 21) #define HSR_WNR (1 << 6) #define HSR_CV_SHIFT (24) -#define HSR_CV (1U << HSR_CV_SHIFT) +#define HSR_CV (_AC(1, UL) << HSR_CV_SHIFT) #define HSR_COND_SHIFT (20) -#define HSR_COND (0xfU << HSR_COND_SHIFT) +#define HSR_COND (_AC(0xf, UL) << HSR_COND_SHIFT) #define FSC_FAULT (0x04) #define FSC_ACCESS (0x08) @@ -210,12 +210,12 @@ #define HSR_EC_DABT (0x24) #define HSR_EC_DABT_HYP (0x25) -#define HSR_WFI_IS_WFE (1U << 0) +#define HSR_WFI_IS_WFE (_AC(1, UL) << 0) -#define HSR_HVC_IMM_MASK ((1UL << 16) - 1) +#define HSR_HVC_IMM_MASK ((_AC(1, UL) << 16) - 1) -#define HSR_DABT_S1PTW (1U << 7) -#define HSR_DABT_CM (1U << 8) -#define HSR_DABT_EA (1U << 9) +#define HSR_DABT_S1PTW (_AC(1, UL) << 7) +#define HSR_DABT_CM (_AC(1, UL) << 8) +#define HSR_DABT_EA (_AC(1, UL) << 9) #endif /* __ARM_KVM_ARM_H__ */ diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h index 25410b2d8bc1..240a987df5d1 100644 --- a/arch/arm/include/asm/kvm_asm.h +++ b/arch/arm/include/asm/kvm_asm.h @@ -19,6 +19,8 @@ #ifndef __ARM_KVM_ASM_H__ #define __ARM_KVM_ASM_H__ +#include <asm/virt.h> + /* 0 is reserved as an invalid value. */ #define c0_MPIDR 1 /* MultiProcessor ID Register */ #define c0_CSSELR 2 /* Cache Size Selection Register */ @@ -79,6 +81,8 @@ #define rr_lo_hi(a1, a2) a1, a2 #endif +#define kvm_ksym_ref(kva) (kva) + #ifndef __ASSEMBLY__ struct kvm; struct kvm_vcpu; @@ -91,9 +95,6 @@ extern char __kvm_hyp_exit_end[]; extern char __kvm_hyp_vector[]; -extern char __kvm_hyp_code_start[]; -extern char __kvm_hyp_code_end[]; - extern void __kvm_flush_vm_context(void); extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); extern void __kvm_tlb_flush_vmid(struct kvm *kvm); diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index d71607c16601..ce15847ce33a 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -213,14 +213,22 @@ static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr, kvm_call_hyp((void*)hyp_stack_ptr, vector_ptr, pgd_ptr); } -static inline int kvm_arch_dev_ioctl_check_extension(long ext) +static inline void __cpu_init_stage2(void) { - return 0; } -static inline void vgic_arch_setup(const struct vgic_params *vgic) +static inline void __cpu_reset_hyp_mode(phys_addr_t boot_pgd_ptr, + phys_addr_t phys_idmap_start) +{ + /* + * TODO + * kvm_call_reset(boot_pgd_ptr, phys_idmap_start); + */ +} + +static inline int kvm_arch_dev_ioctl_check_extension(long ext) { - BUG_ON(vgic->type != VGIC_V2); + return 0; } int kvm_perf_init(void); @@ -230,10 +238,14 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot); struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); -static inline void kvm_arch_hardware_disable(void) {} static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} +static inline void kvm_arm_init_debug(void) {} +static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {} +static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {} +static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {} + #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 405aa1883307..dc6fadfd0407 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -66,6 +66,7 @@ void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu); phys_addr_t kvm_mmu_get_httbr(void); phys_addr_t kvm_mmu_get_boot_httbr(void); phys_addr_t kvm_get_idmap_vector(void); +phys_addr_t kvm_get_idmap_start(void); int kvm_mmu_init(void); void kvm_clear_hyp_idmap(void); diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 184def0e1652..de038cc1f016 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -121,12 +121,6 @@ #endif /* - * Convert a physical address to a Page Frame Number and back - */ -#define __phys_to_pfn(paddr) ((unsigned long)((paddr) >> PAGE_SHIFT)) -#define __pfn_to_phys(pfn) ((phys_addr_t)(pfn) << PAGE_SHIFT) - -/* * Convert a page to/from a physical address */ #define page_to_phys(page) (__pfn_to_phys(page_to_pfn(page))) diff --git a/arch/arm/include/asm/psci.h b/arch/arm/include/asm/psci.h index e3789fb02c9c..2512343932cb 100644 --- a/arch/arm/include/asm/psci.h +++ b/arch/arm/include/asm/psci.h @@ -14,34 +14,12 @@ #ifndef __ASM_ARM_PSCI_H #define __ASM_ARM_PSCI_H -#define PSCI_POWER_STATE_TYPE_STANDBY 0 -#define PSCI_POWER_STATE_TYPE_POWER_DOWN 1 - -struct psci_power_state { - u16 id; - u8 type; - u8 affinity_level; -}; - -struct psci_operations { - int (*cpu_suspend)(struct psci_power_state state, - unsigned long entry_point); - int (*cpu_off)(struct psci_power_state state); - int (*cpu_on)(unsigned long cpuid, unsigned long entry_point); - int (*migrate)(unsigned long cpuid); - int (*affinity_info)(unsigned long target_affinity, - unsigned long lowest_affinity_level); - int (*migrate_info_type)(void); -}; - -extern struct psci_operations psci_ops; extern struct smp_operations psci_smp_ops; #if defined(CONFIG_SMP) && defined(CONFIG_ARM_PSCI) int psci_init(void); bool psci_smp_available(void); #else -static inline int psci_init(void) { return 0; } static inline bool psci_smp_available(void) { return false; } #endif diff --git a/arch/arm/include/asm/virt.h b/arch/arm/include/asm/virt.h index 4371f45c5784..d4ceaf5f299b 100644 --- a/arch/arm/include/asm/virt.h +++ b/arch/arm/include/asm/virt.h @@ -74,6 +74,15 @@ static inline bool is_hyp_mode_mismatched(void) { return !!(__boot_cpu_mode & BOOT_CPU_MODE_MISMATCH); } + +static inline bool is_kernel_in_hyp_mode(void) +{ + return false; +} + +/* The section containing the hypervisor text */ +extern char __hyp_text_start[]; +extern char __hyp_text_end[]; #endif #endif /* __ASSEMBLY__ */ diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 752725dcbf42..740047f707d1 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -86,8 +86,9 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_ARM_VIRT_EXT) += hyp-stub.o ifeq ($(CONFIG_ARM_PSCI),y) -obj-y += psci.o psci-call.o obj-$(CONFIG_SMP) += psci_smp.o endif +obj-$(CONFIG_HAVE_ARM_SMCCC) += smccc-call.o + extra-y := $(head-y) vmlinux.lds diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c index a88671cfe1ff..1d90bcd0c678 100644 --- a/arch/arm/kernel/armksyms.c +++ b/arch/arm/kernel/armksyms.c @@ -16,6 +16,7 @@ #include <linux/syscalls.h> #include <linux/uaccess.h> #include <linux/io.h> +#include <linux/arm-smccc.h> #include <asm/checksum.h> #include <asm/ftrace.h> @@ -169,3 +170,8 @@ EXPORT_SYMBOL(__gnu_mcount_nc); EXPORT_SYMBOL(__pv_phys_pfn_offset); EXPORT_SYMBOL(__pv_offset); #endif + +#ifdef CONFIG_HAVE_ARM_SMCCC +EXPORT_SYMBOL(arm_smccc_smc); +EXPORT_SYMBOL(arm_smccc_hvc); +#endif diff --git a/arch/arm/kernel/jump_label.c b/arch/arm/kernel/jump_label.c index e39cbf488cfe..845a5dd9c42b 100644 --- a/arch/arm/kernel/jump_label.c +++ b/arch/arm/kernel/jump_label.c @@ -12,7 +12,7 @@ static void __arch_jump_label_transform(struct jump_entry *entry, void *addr = (void *)entry->code; unsigned int insn; - if (type == JUMP_LABEL_ENABLE) + if (type == JUMP_LABEL_JMP) insn = arm_gen_branch(entry->code, entry->target); else insn = arm_gen_nop(); diff --git a/arch/arm/kernel/psci-call.S b/arch/arm/kernel/psci-call.S deleted file mode 100644 index a78e9e1e206d..000000000000 --- a/arch/arm/kernel/psci-call.S +++ /dev/null @@ -1,31 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Copyright (C) 2015 ARM Limited - * - * Author: Mark Rutland <mark.rutland@arm.com> - */ - -#include <linux/linkage.h> - -#include <asm/opcodes-sec.h> -#include <asm/opcodes-virt.h> - -/* int __invoke_psci_fn_hvc(u32 function_id, u32 arg0, u32 arg1, u32 arg2) */ -ENTRY(__invoke_psci_fn_hvc) - __HVC(0) - bx lr -ENDPROC(__invoke_psci_fn_hvc) - -/* int __invoke_psci_fn_smc(u32 function_id, u32 arg0, u32 arg1, u32 arg2) */ -ENTRY(__invoke_psci_fn_smc) - __SMC(0) - bx lr -ENDPROC(__invoke_psci_fn_smc) diff --git a/arch/arm/kernel/psci.c b/arch/arm/kernel/psci.c deleted file mode 100644 index f90fdf4ce7c7..000000000000 --- a/arch/arm/kernel/psci.c +++ /dev/null @@ -1,299 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Copyright (C) 2012 ARM Limited - * - * Author: Will Deacon <will.deacon@arm.com> - */ - -#define pr_fmt(fmt) "psci: " fmt - -#include <linux/init.h> -#include <linux/of.h> -#include <linux/reboot.h> -#include <linux/pm.h> -#include <uapi/linux/psci.h> - -#include <asm/compiler.h> -#include <asm/errno.h> -#include <asm/psci.h> -#include <asm/system_misc.h> - -struct psci_operations psci_ops; - -static int (*invoke_psci_fn)(u32, u32, u32, u32); -typedef int (*psci_initcall_t)(const struct device_node *); - -asmlinkage int __invoke_psci_fn_hvc(u32, u32, u32, u32); -asmlinkage int __invoke_psci_fn_smc(u32, u32, u32, u32); - -enum psci_function { - PSCI_FN_CPU_SUSPEND, - PSCI_FN_CPU_ON, - PSCI_FN_CPU_OFF, - PSCI_FN_MIGRATE, - PSCI_FN_AFFINITY_INFO, - PSCI_FN_MIGRATE_INFO_TYPE, - PSCI_FN_MAX, -}; - -static u32 psci_function_id[PSCI_FN_MAX]; - -static int psci_to_linux_errno(int errno) -{ - switch (errno) { - case PSCI_RET_SUCCESS: - return 0; - case PSCI_RET_NOT_SUPPORTED: - return -EOPNOTSUPP; - case PSCI_RET_INVALID_PARAMS: - return -EINVAL; - case PSCI_RET_DENIED: - return -EPERM; - }; - - return -EINVAL; -} - -static u32 psci_power_state_pack(struct psci_power_state state) -{ - return ((state.id << PSCI_0_2_POWER_STATE_ID_SHIFT) - & PSCI_0_2_POWER_STATE_ID_MASK) | - ((state.type << PSCI_0_2_POWER_STATE_TYPE_SHIFT) - & PSCI_0_2_POWER_STATE_TYPE_MASK) | - ((state.affinity_level << PSCI_0_2_POWER_STATE_AFFL_SHIFT) - & PSCI_0_2_POWER_STATE_AFFL_MASK); -} - -static int psci_get_version(void) -{ - int err; - - err = invoke_psci_fn(PSCI_0_2_FN_PSCI_VERSION, 0, 0, 0); - return err; -} - -static int psci_cpu_suspend(struct psci_power_state state, - unsigned long entry_point) -{ - int err; - u32 fn, power_state; - - fn = psci_function_id[PSCI_FN_CPU_SUSPEND]; - power_state = psci_power_state_pack(state); - err = invoke_psci_fn(fn, power_state, entry_point, 0); - return psci_to_linux_errno(err); -} - -static int psci_cpu_off(struct psci_power_state state) -{ - int err; - u32 fn, power_state; - - fn = psci_function_id[PSCI_FN_CPU_OFF]; - power_state = psci_power_state_pack(state); - err = invoke_psci_fn(fn, power_state, 0, 0); - return psci_to_linux_errno(err); -} - -static int psci_cpu_on(unsigned long cpuid, unsigned long entry_point) -{ - int err; - u32 fn; - - fn = psci_function_id[PSCI_FN_CPU_ON]; - err = invoke_psci_fn(fn, cpuid, entry_point, 0); - return psci_to_linux_errno(err); -} - -static int psci_migrate(unsigned long cpuid) -{ - int err; - u32 fn; - - fn = psci_function_id[PSCI_FN_MIGRATE]; - err = invoke_psci_fn(fn, cpuid, 0, 0); - return psci_to_linux_errno(err); -} - -static int psci_affinity_info(unsigned long target_affinity, - unsigned long lowest_affinity_level) -{ - int err; - u32 fn; - - fn = psci_function_id[PSCI_FN_AFFINITY_INFO]; - err = invoke_psci_fn(fn, target_affinity, lowest_affinity_level, 0); - return err; -} - -static int psci_migrate_info_type(void) -{ - int err; - u32 fn; - - fn = psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE]; - err = invoke_psci_fn(fn, 0, 0, 0); - return err; -} - -static int get_set_conduit_method(struct device_node *np) -{ - const char *method; - - pr_info("probing for conduit method from DT.\n"); - - if (of_property_read_string(np, "method", &method)) { - pr_warn("missing \"method\" property\n"); - return -ENXIO; - } - - if (!strcmp("hvc", method)) { - invoke_psci_fn = __invoke_psci_fn_hvc; - } else if (!strcmp("smc", method)) { - invoke_psci_fn = __invoke_psci_fn_smc; - } else { - pr_warn("invalid \"method\" property: %s\n", method); - return -EINVAL; - } - return 0; -} - -static void psci_sys_reset(enum reboot_mode reboot_mode, const char *cmd) -{ - invoke_psci_fn(PSCI_0_2_FN_SYSTEM_RESET, 0, 0, 0); -} - -static void psci_sys_poweroff(void) -{ - invoke_psci_fn(PSCI_0_2_FN_SYSTEM_OFF, 0, 0, 0); -} - -/* - * PSCI Function IDs for v0.2+ are well defined so use - * standard values. - */ -static int psci_0_2_init(struct device_node *np) -{ - int err, ver; - - err = get_set_conduit_method(np); - - if (err) - goto out_put_node; - - ver = psci_get_version(); - - if (ver == PSCI_RET_NOT_SUPPORTED) { - /* PSCI v0.2 mandates implementation of PSCI_ID_VERSION. */ - pr_err("PSCI firmware does not comply with the v0.2 spec.\n"); - err = -EOPNOTSUPP; - goto out_put_node; - } else { - pr_info("PSCIv%d.%d detected in firmware.\n", - PSCI_VERSION_MAJOR(ver), - PSCI_VERSION_MINOR(ver)); - - if (PSCI_VERSION_MAJOR(ver) == 0 && - PSCI_VERSION_MINOR(ver) < 2) { - err = -EINVAL; - pr_err("Conflicting PSCI version detected.\n"); - goto out_put_node; - } - } - - pr_info("Using standard PSCI v0.2 function IDs\n"); - psci_function_id[PSCI_FN_CPU_SUSPEND] = PSCI_0_2_FN_CPU_SUSPEND; - psci_ops.cpu_suspend = psci_cpu_suspend; - - psci_function_id[PSCI_FN_CPU_OFF] = PSCI_0_2_FN_CPU_OFF; - psci_ops.cpu_off = psci_cpu_off; - - psci_function_id[PSCI_FN_CPU_ON] = PSCI_0_2_FN_CPU_ON; - psci_ops.cpu_on = psci_cpu_on; - - psci_function_id[PSCI_FN_MIGRATE] = PSCI_0_2_FN_MIGRATE; - psci_ops.migrate = psci_migrate; - - psci_function_id[PSCI_FN_AFFINITY_INFO] = PSCI_0_2_FN_AFFINITY_INFO; - psci_ops.affinity_info = psci_affinity_info; - - psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE] = - PSCI_0_2_FN_MIGRATE_INFO_TYPE; - psci_ops.migrate_info_type = psci_migrate_info_type; - - arm_pm_restart = psci_sys_reset; - - pm_power_off = psci_sys_poweroff; - -out_put_node: - of_node_put(np); - return err; -} - -/* - * PSCI < v0.2 get PSCI Function IDs via DT. - */ -static int psci_0_1_init(struct device_node *np) -{ - u32 id; - int err; - - err = get_set_conduit_method(np); - - if (err) - goto out_put_node; - - pr_info("Using PSCI v0.1 Function IDs from DT\n"); - - if (!of_property_read_u32(np, "cpu_suspend", &id)) { - psci_function_id[PSCI_FN_CPU_SUSPEND] = id; - psci_ops.cpu_suspend = psci_cpu_suspend; - } - - if (!of_property_read_u32(np, "cpu_off", &id)) { - psci_function_id[PSCI_FN_CPU_OFF] = id; - psci_ops.cpu_off = psci_cpu_off; - } - - if (!of_property_read_u32(np, "cpu_on", &id)) { - psci_function_id[PSCI_FN_CPU_ON] = id; - psci_ops.cpu_on = psci_cpu_on; - } - - if (!of_property_read_u32(np, "migrate", &id)) { - psci_function_id[PSCI_FN_MIGRATE] = id; - psci_ops.migrate = psci_migrate; - } - -out_put_node: - of_node_put(np); - return err; -} - -static const struct of_device_id psci_of_match[] __initconst = { - { .compatible = "arm,psci", .data = psci_0_1_init}, - { .compatible = "arm,psci-0.2", .data = psci_0_2_init}, - {}, -}; - -int __init psci_init(void) -{ - struct device_node *np; - const struct of_device_id *matched_np; - psci_initcall_t init_fn; - - np = of_find_matching_node_and_match(NULL, psci_of_match, &matched_np); - if (!np) - return -ENODEV; - - init_fn = (psci_initcall_t)matched_np->data; - return init_fn(np); -} diff --git a/arch/arm/kernel/psci_smp.c b/arch/arm/kernel/psci_smp.c index 28a1db4da704..9d479b2ea40d 100644 --- a/arch/arm/kernel/psci_smp.c +++ b/arch/arm/kernel/psci_smp.c @@ -17,6 +17,8 @@ #include <linux/smp.h> #include <linux/of.h> #include <linux/delay.h> +#include <linux/psci.h> + #include <uapi/linux/psci.h> #include <asm/psci.h> @@ -51,25 +53,37 @@ static int psci_boot_secondary(unsigned int cpu, struct task_struct *idle) { if (psci_ops.cpu_on) return psci_ops.cpu_on(cpu_logical_map(cpu), - __pa(secondary_startup)); + virt_to_idmap(&secondary_startup)); return -ENODEV; } #ifdef CONFIG_HOTPLUG_CPU -void __ref psci_cpu_die(unsigned int cpu) +int psci_cpu_disable(unsigned int cpu) +{ + /* Fail early if we don't have CPU_OFF support */ + if (!psci_ops.cpu_off) + return -EOPNOTSUPP; + + /* Trusted OS will deny CPU_OFF */ + if (psci_tos_resident_on(cpu)) + return -EPERM; + + return 0; +} + +void psci_cpu_die(unsigned int cpu) { - const struct psci_power_state ps = { - .type = PSCI_POWER_STATE_TYPE_POWER_DOWN, - }; + u32 state = PSCI_POWER_STATE_TYPE_POWER_DOWN << + PSCI_0_2_POWER_STATE_TYPE_SHIFT; - if (psci_ops.cpu_off) - psci_ops.cpu_off(ps); + if (psci_ops.cpu_off) + psci_ops.cpu_off(state); - /* We should never return */ - panic("psci: cpu %d failed to shutdown\n", cpu); + /* We should never return */ + panic("psci: cpu %d failed to shutdown\n", cpu); } -int __ref psci_cpu_kill(unsigned int cpu) +int psci_cpu_kill(unsigned int cpu) { int err, i; @@ -109,6 +123,7 @@ bool __init psci_smp_available(void) struct smp_operations __initdata psci_smp_ops = { .smp_boot_secondary = psci_boot_secondary, #ifdef CONFIG_HOTPLUG_CPU + .cpu_disable = psci_cpu_disable, .cpu_die = psci_cpu_die, .cpu_kill = psci_cpu_kill, #endif diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 6c777e908a24..3224680e44f4 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -31,6 +31,7 @@ #include <linux/bug.h> #include <linux/compiler.h> #include <linux/sort.h> +#include <linux/psci.h> #include <asm/unified.h> #include <asm/cp15.h> @@ -950,7 +951,7 @@ void __init setup_arch(char **cmdline_p) unflatten_device_tree(); arm_dt_init_cpu_maps(); - psci_init(); + psci_dt_init(); #ifdef CONFIG_SMP if (is_smp()) { if (!mdesc->smp_init || !mdesc->smp_init()) { diff --git a/arch/arm/kernel/smccc-call.S b/arch/arm/kernel/smccc-call.S new file mode 100644 index 000000000000..2e48b674aab1 --- /dev/null +++ b/arch/arm/kernel/smccc-call.S @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2015, Linaro Limited + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include <linux/linkage.h> + +#include <asm/opcodes-sec.h> +#include <asm/opcodes-virt.h> +#include <asm/unwind.h> + + /* + * Wrap c macros in asm macros to delay expansion until after the + * SMCCC asm macro is expanded. + */ + .macro SMCCC_SMC + __SMC(0) + .endm + + .macro SMCCC_HVC + __HVC(0) + .endm + + .macro SMCCC instr +UNWIND( .fnstart) + mov r12, sp + push {r4-r7} +UNWIND( .save {r4-r7}) + ldm r12, {r4-r7} + \instr + pop {r4-r7} + ldr r12, [sp, #(4 * 4)] + stm r12, {r0-r3} + bx lr +UNWIND( .fnend) + .endm + +/* + * void smccc_smc(unsigned long a0, unsigned long a1, unsigned long a2, + * unsigned long a3, unsigned long a4, unsigned long a5, + * unsigned long a6, unsigned long a7, struct arm_smccc_res *res) + */ +ENTRY(arm_smccc_smc) + SMCCC SMCCC_SMC +ENDPROC(arm_smccc_smc) + +/* + * void smccc_hvc(unsigned long a0, unsigned long a1, unsigned long a2, + * unsigned long a3, unsigned long a4, unsigned long a5, + * unsigned long a6, unsigned long a7, struct arm_smccc_res *res) + */ +ENTRY(arm_smccc_hvc) + SMCCC SMCCC_HVC +ENDPROC(arm_smccc_hvc) diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index 8b60fde5ce48..b4139cbbbdd9 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -18,6 +18,11 @@ *(.proc.info.init) \ VMLINUX_SYMBOL(__proc_info_end) = .; +#define HYPERVISOR_TEXT \ + VMLINUX_SYMBOL(__hyp_text_start) = .; \ + *(.hyp.text) \ + VMLINUX_SYMBOL(__hyp_text_end) = .; + #define IDMAP_TEXT \ ALIGN_FUNCTION(); \ VMLINUX_SYMBOL(__idmap_text_start) = .; \ @@ -108,6 +113,7 @@ SECTIONS TEXT_TEXT SCHED_TEXT LOCK_TEXT + HYPERVISOR_TEXT KPROBES_TEXT *(.gnu.warning) *(.glue_7) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 5414081c0bbf..77eb3fb4cc0a 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -16,7 +16,6 @@ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -#include <linux/cpu.h> #include <linux/cpu_pm.h> #include <linux/errno.h> #include <linux/err.h> @@ -27,6 +26,7 @@ #include <linux/mman.h> #include <linux/sched.h> #include <linux/kvm.h> +#include <linux/cpu.h> #include <trace/events/kvm.h> #define CREATE_TRACE_POINTS @@ -44,6 +44,7 @@ #include <asm/kvm_emulate.h> #include <asm/kvm_coproc.h> #include <asm/kvm_psci.h> +#include <asm/sections.h> #ifdef REQUIRES_VIRT __asm__(".arch_extension virt"); @@ -61,6 +62,10 @@ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); static u8 kvm_next_vmid; static DEFINE_SPINLOCK(kvm_vmid_lock); +static bool vgic_present; + +static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled); + static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu) { BUG_ON(preemptible()); @@ -85,11 +90,6 @@ struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void) return &kvm_arm_running_vcpu; } -int kvm_arch_hardware_enable(void) -{ - return 0; -} - int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) { return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; @@ -131,7 +131,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.vmid_gen = 0; /* The maximum number of VCPUs is limited by the host's GIC model */ - kvm->arch.max_vcpus = kvm_vgic_get_max_vcpus(); + kvm->arch.max_vcpus = vgic_present ? + kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS; return ret; out_free_stage2_pgd: @@ -171,7 +172,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) int r; switch (ext) { case KVM_CAP_IRQCHIP: - case KVM_CAP_IRQFD: + r = vgic_present; + break; case KVM_CAP_IOEVENTFD: case KVM_CAP_DEVICE_CTRL: case KVM_CAP_USER_MEMORY: @@ -280,6 +282,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) /* Set up the timer */ kvm_timer_vcpu_init(vcpu); + kvm_arm_reset_debug_ptr(vcpu); + return 0; } @@ -550,19 +554,27 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) continue; } + kvm_arm_setup_debug(vcpu); + /************************************************************** * Enter the guest */ trace_kvm_entry(*vcpu_pc(vcpu)); - kvm_guest_enter(); + __kvm_guest_enter(); vcpu->mode = IN_GUEST_MODE; ret = kvm_call_hyp(__kvm_vcpu_run, vcpu); vcpu->mode = OUTSIDE_GUEST_MODE; - kvm_guest_exit(); + __kvm_guest_exit(); trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); /* + * Back from guest + *************************************************************/ + + kvm_arm_clear_debug(vcpu); + + /* * We may have taken a host interrupt in HYP mode (ie * while executing the guest). This interrupt is still * pending, as we haven't serviced it yet! @@ -851,6 +863,8 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, switch (dev_id) { case KVM_ARM_DEVICE_VGIC_V2: + if (!vgic_present) + return -ENXIO; return kvm_vgic_addr(kvm, type, &dev_addr->addr, true); default: return -ENODEV; @@ -865,6 +879,8 @@ long kvm_arch_vm_ioctl(struct file *filp, switch (ioctl) { case KVM_CREATE_IRQCHIP: { + if (!vgic_present) + return -ENXIO; return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); } case KVM_ARM_SET_DEVICE_ADDR: { @@ -907,41 +923,99 @@ static void cpu_init_hyp_mode(void *dummy) pgd_ptr = kvm_mmu_get_httbr(); stack_page = __this_cpu_read(kvm_arm_hyp_stack_page); hyp_stack_ptr = stack_page + PAGE_SIZE; - vector_ptr = (unsigned long)__kvm_hyp_vector; + vector_ptr = (unsigned long)kvm_ksym_ref(__kvm_hyp_vector); __cpu_init_hyp_mode(boot_pgd_ptr, pgd_ptr, hyp_stack_ptr, vector_ptr); + __cpu_init_stage2(); + + kvm_arm_init_debug(); } -static int hyp_init_cpu_notify(struct notifier_block *self, - unsigned long action, void *cpu) +static void cpu_hyp_reinit(void) { - switch (action) { - case CPU_STARTING: - case CPU_STARTING_FROZEN: + if (is_kernel_in_hyp_mode()) { + /* + * __cpu_init_stage2() is safe to call even if the PM + * event was cancelled before the CPU was reset. + */ + __cpu_init_stage2(); + } else { if (__hyp_get_vectors() == hyp_default_vectors) cpu_init_hyp_mode(NULL); - break; } +} + +static void cpu_hyp_reset(void) +{ + phys_addr_t boot_pgd_ptr; + phys_addr_t phys_idmap_start; - return NOTIFY_OK; + if (!is_kernel_in_hyp_mode()) { + boot_pgd_ptr = kvm_mmu_get_boot_httbr(); + phys_idmap_start = kvm_get_idmap_start(); + + __cpu_reset_hyp_mode(boot_pgd_ptr, phys_idmap_start); + } } -static struct notifier_block hyp_init_cpu_nb = { - .notifier_call = hyp_init_cpu_notify, -}; +static void _kvm_arch_hardware_enable(void *discard) +{ + if (!__this_cpu_read(kvm_arm_hardware_enabled)) { + cpu_hyp_reinit(); + __this_cpu_write(kvm_arm_hardware_enabled, 1); + } +} + +int kvm_arch_hardware_enable(void) +{ + _kvm_arch_hardware_enable(NULL); + return 0; +} + +static void _kvm_arch_hardware_disable(void *discard) +{ + if (__this_cpu_read(kvm_arm_hardware_enabled)) { + cpu_hyp_reset(); + __this_cpu_write(kvm_arm_hardware_enabled, 0); + } +} + +void kvm_arch_hardware_disable(void) +{ + _kvm_arch_hardware_disable(NULL); +} #ifdef CONFIG_CPU_PM static int hyp_init_cpu_pm_notifier(struct notifier_block *self, unsigned long cmd, void *v) { - if (cmd == CPU_PM_EXIT && - __hyp_get_vectors() == hyp_default_vectors) { - cpu_init_hyp_mode(NULL); + /* + * kvm_arm_hardware_enabled is left with its old value over + * PM_ENTER->PM_EXIT. It is used to indicate PM_EXIT should + * re-enable hyp. + */ + switch (cmd) { + case CPU_PM_ENTER: + if (__this_cpu_read(kvm_arm_hardware_enabled)) + /* + * don't update kvm_arm_hardware_enabled here + * so that the hardware will be re-enabled + * when we resume. See below. + */ + cpu_hyp_reset(); + + return NOTIFY_OK; + case CPU_PM_EXIT: + if (__this_cpu_read(kvm_arm_hardware_enabled)) + /* The hardware was enabled before suspend. */ + cpu_hyp_reinit(); + return NOTIFY_OK; - } - return NOTIFY_DONE; + default: + return NOTIFY_DONE; + } } static struct notifier_block hyp_init_cpu_pm_nb = { @@ -952,12 +1026,102 @@ static void __init hyp_cpu_pm_init(void) { cpu_pm_register_notifier(&hyp_init_cpu_pm_nb); } +static void __init hyp_cpu_pm_exit(void) +{ + cpu_pm_unregister_notifier(&hyp_init_cpu_pm_nb); +} #else static inline void hyp_cpu_pm_init(void) { } +static inline void hyp_cpu_pm_exit(void) +{ +} #endif +static void teardown_common_resources(void) +{ + free_percpu(kvm_host_cpu_state); +} + +static int init_common_resources(void) +{ + kvm_host_cpu_state = alloc_percpu(kvm_cpu_context_t); + if (!kvm_host_cpu_state) { + kvm_err("Cannot allocate host CPU state\n"); + return -ENOMEM; + } + + return 0; +} + +static int init_subsystems(void) +{ + int err = 0; + + /* + * Enable hardware so that subsystem initialisation can access EL2. + */ + on_each_cpu(_kvm_arch_hardware_enable, NULL, 1); + + /* + * Register CPU lower-power notifier + */ + hyp_cpu_pm_init(); + + /* + * Init HYP view of VGIC + */ + err = kvm_vgic_hyp_init(); + switch (err) { + case 0: + vgic_present = true; + break; + case -ENODEV: + case -ENXIO: + vgic_present = false; + err = 0; + break; + default: + goto out; + } + + /* + * Init HYP architected timer support + */ + err = kvm_timer_hyp_init(); + if (err) + goto out; + + kvm_perf_init(); + kvm_coproc_table_init(); + +out: + on_each_cpu(_kvm_arch_hardware_disable, NULL, 1); + + return err; +} + +static void teardown_hyp_mode(void) +{ + int cpu; + + if (is_kernel_in_hyp_mode()) + return; + + free_hyp_pgds(); + for_each_possible_cpu(cpu) + free_page(per_cpu(kvm_arm_hyp_stack_page, cpu)); + hyp_cpu_pm_exit(); +} + +static int init_vhe_mode(void) +{ + + kvm_info("VHE mode initialized successfully\n"); + return 0; +} + /** * Inits Hyp-mode on all online CPUs */ @@ -988,7 +1152,7 @@ static int init_hyp_mode(void) stack_page = __get_free_page(GFP_KERNEL); if (!stack_page) { err = -ENOMEM; - goto out_free_stack_pages; + goto out_err; } per_cpu(kvm_arm_hyp_stack_page, cpu) = stack_page; @@ -997,10 +1161,16 @@ static int init_hyp_mode(void) /* * Map the Hyp-code called directly from the host */ - err = create_hyp_mappings(__kvm_hyp_code_start, __kvm_hyp_code_end); + err = create_hyp_mappings(__hyp_text_start, __hyp_text_end); if (err) { kvm_err("Cannot map world-switch code\n"); - goto out_free_mappings; + goto out_err; + } + + err = create_hyp_mappings(__start_rodata, __end_rodata); + if (err) { + kvm_err("Cannot map rodata section\n"); + goto out_err; } /* @@ -1012,20 +1182,10 @@ static int init_hyp_mode(void) if (err) { kvm_err("Cannot map hyp stack\n"); - goto out_free_mappings; + goto out_err; } } - /* - * Map the host CPU structures - */ - kvm_host_cpu_state = alloc_percpu(kvm_cpu_context_t); - if (!kvm_host_cpu_state) { - err = -ENOMEM; - kvm_err("Cannot allocate host CPU state\n"); - goto out_free_mappings; - } - for_each_possible_cpu(cpu) { kvm_cpu_context_t *cpu_ctxt; @@ -1034,46 +1194,20 @@ static int init_hyp_mode(void) if (err) { kvm_err("Cannot map host CPU state: %d\n", err); - goto out_free_context; + goto out_err; } } - /* - * Execute the init code on each CPU. - */ - on_each_cpu(cpu_init_hyp_mode, NULL, 1); - - /* - * Init HYP view of VGIC - */ - err = kvm_vgic_hyp_init(); - if (err) - goto out_free_context; - - /* - * Init HYP architected timer support - */ - err = kvm_timer_hyp_init(); - if (err) - goto out_free_mappings; - #ifndef CONFIG_HOTPLUG_CPU free_boot_hyp_pgd(); #endif - kvm_perf_init(); - kvm_info("Hyp mode initialized successfully\n"); return 0; -out_free_context: - free_percpu(kvm_host_cpu_state); -out_free_mappings: - free_hyp_pgds(); -out_free_stack_pages: - for_each_possible_cpu(cpu) - free_page(per_cpu(kvm_arm_hyp_stack_page, cpu)); + out_err: + teardown_hyp_mode(); kvm_err("error initializing Hyp mode: %d\n", err); return err; } @@ -1117,26 +1251,27 @@ int kvm_arch_init(void *opaque) } } - cpu_notifier_register_begin(); - - err = init_hyp_mode(); + err = init_common_resources(); if (err) - goto out_err; + return err; - err = __register_cpu_notifier(&hyp_init_cpu_nb); - if (err) { - kvm_err("Cannot register HYP init CPU notifier (%d)\n", err); + if (is_kernel_in_hyp_mode()) + err = init_vhe_mode(); + else + err = init_hyp_mode(); + if (err) goto out_err; - } - cpu_notifier_register_done(); - - hyp_cpu_pm_init(); + err = init_subsystems(); + if (err) + goto out_hyp; - kvm_coproc_table_init(); return 0; + +out_hyp: + teardown_hyp_mode(); out_err: - cpu_notifier_register_done(); + teardown_common_resources(); return err; } diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index f7db3a5d80e3..b3cf99798824 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S @@ -28,9 +28,7 @@ #include "interrupts_head.S" .text - -__kvm_hyp_code_start: - .globl __kvm_hyp_code_start + .pushsection .hyp.text, "ax" /******************************************************************** * Flush per-VMID TLBs @@ -314,8 +312,6 @@ THUMB( orr r2, r2, #PSR_T_BIT ) eret .endm - .text - .align 5 __kvm_hyp_vector: .globl __kvm_hyp_vector @@ -509,10 +505,9 @@ hyp_fiq: .ltorg -__kvm_hyp_code_end: - .globl __kvm_hyp_code_end + .popsection - .section ".rodata" + .pushsection ".rodata" und_die_str: .ascii "unexpected undefined exception in Hyp mode at: %#08x\n" @@ -522,3 +517,5 @@ dabt_die_str: .ascii "unexpected data abort in Hyp mode at: %#08x\n" svc_die_str: .ascii "unexpected HVC/SVC trap in Hyp mode at: %#08x\n" + + .popsection diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 691ea94897fd..d75591c2aafb 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -28,6 +28,7 @@ #include <asm/kvm_mmio.h> #include <asm/kvm_asm.h> #include <asm/kvm_emulate.h> +#include <asm/virt.h> #include "trace.h" @@ -598,6 +599,9 @@ int create_hyp_mappings(void *from, void *to) unsigned long start = KERN_TO_HYP((unsigned long)from); unsigned long end = KERN_TO_HYP((unsigned long)to); + if (is_kernel_in_hyp_mode()) + return 0; + start = start & PAGE_MASK; end = PAGE_ALIGN(end); @@ -630,6 +634,9 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr) unsigned long start = KERN_TO_HYP((unsigned long)from); unsigned long end = KERN_TO_HYP((unsigned long)to); + if (is_kernel_in_hyp_mode()) + return 0; + /* Check for a valid kernel IO mapping */ if (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1)) return -EINVAL; @@ -656,9 +663,9 @@ static void *kvm_alloc_hwpgd(void) * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation. * @kvm: The KVM struct pointer for the VM. * - * Allocates the 1st level table only of size defined by S2_PGD_ORDER (can - * support either full 40-bit input addresses or limited to 32-bit input - * addresses). Clears the allocated pages. + * Allocates only the stage-2 HW PGD level table(s) (can support either full + * 40-bit input addresses or limited to 32-bit input addresses). Clears the + * allocated pages. * * Note we don't need locking here as this is only called when the VM is * created, which can only be done once. @@ -1647,6 +1654,11 @@ phys_addr_t kvm_get_idmap_vector(void) return hyp_idmap_vector; } +phys_addr_t kvm_get_idmap_start(void) +{ + return hyp_idmap_start; +} + int kvm_mmu_init(void) { int err; diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index 531e922486b2..ad6f6424f1d1 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -24,6 +24,8 @@ #include <asm/kvm_psci.h> #include <asm/kvm_host.h> +#include <uapi/linux/psci.h> + /* * This is an implementation of the Power State Coordination Interface * as described in ARM document number ARM DEN 0022A. @@ -124,7 +126,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu) { - int i; + int i, matching_cpus = 0; unsigned long mpidr; unsigned long target_affinity; unsigned long target_affinity_mask; @@ -149,12 +151,16 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu) */ kvm_for_each_vcpu(i, tmp, kvm) { mpidr = kvm_vcpu_get_mpidr_aff(tmp); - if (((mpidr & target_affinity_mask) == target_affinity) && - !tmp->arch.pause) { - return PSCI_0_2_AFFINITY_LEVEL_ON; + if ((mpidr & target_affinity_mask) == target_affinity) { + matching_cpus++; + if (!tmp->arch.pause) + return PSCI_0_2_AFFINITY_LEVEL_ON; } } + if (!matching_cpus) + return PSCI_RET_INVALID_PARAMS; + return PSCI_0_2_AFFINITY_LEVEL_OFF; } diff --git a/arch/arm/mach-highbank/highbank.c b/arch/arm/mach-highbank/highbank.c index 231fba0d03e5..6050a14faee6 100644 --- a/arch/arm/mach-highbank/highbank.c +++ b/arch/arm/mach-highbank/highbank.c @@ -28,8 +28,8 @@ #include <linux/reboot.h> #include <linux/amba/bus.h> #include <linux/platform_device.h> +#include <linux/psci.h> -#include <asm/psci.h> #include <asm/hardware/cache-l2x0.h> #include <asm/mach/arch.h> #include <asm/mach/map.h> diff --git a/arch/arm/mach-highbank/pm.c b/arch/arm/mach-highbank/pm.c index 7f2bd85eb935..400311695548 100644 --- a/arch/arm/mach-highbank/pm.c +++ b/arch/arm/mach-highbank/pm.c @@ -16,19 +16,21 @@ #include <linux/cpu_pm.h> #include <linux/init.h> +#include <linux/psci.h> #include <linux/suspend.h> #include <asm/suspend.h> -#include <asm/psci.h> + +#include <uapi/linux/psci.h> + +#define HIGHBANK_SUSPEND_PARAM \ + ((0 << PSCI_0_2_POWER_STATE_ID_SHIFT) | \ + (1 << PSCI_0_2_POWER_STATE_AFFL_SHIFT) | \ + (PSCI_POWER_STATE_TYPE_POWER_DOWN << PSCI_0_2_POWER_STATE_TYPE_SHIFT)) static int highbank_suspend_finish(unsigned long val) { - const struct psci_power_state ps = { - .type = PSCI_POWER_STATE_TYPE_POWER_DOWN, - .affinity_level = 1, - }; - - return psci_ops.cpu_suspend(ps, __pa(cpu_resume)); + return psci_ops.cpu_suspend(HIGHBANK_SUSPEND_PARAM, __pa(cpu_resume)); } static int highbank_pm_enter(suspend_state_t state) diff --git a/arch/arm/mach-imx/mach-imx6q.c b/arch/arm/mach-imx/mach-imx6q.c index 3ab61549ce0f..bb8a1306ac34 100644 --- a/arch/arm/mach-imx/mach-imx6q.c +++ b/arch/arm/mach-imx/mach-imx6q.c @@ -350,7 +350,7 @@ static void __init imx6q_opp_init(void) return; } - if (of_init_opp_table(cpu_dev)) { + if (dev_pm_opp_of_add_table(cpu_dev)) { pr_warn("failed to init OPP table\n"); goto put_node; } diff --git a/arch/arm/mach-omap2/omap-hotplug.c b/arch/arm/mach-omap2/omap-hotplug.c index 971791fe9a3f..593fec753b28 100644 --- a/arch/arm/mach-omap2/omap-hotplug.c +++ b/arch/arm/mach-omap2/omap-hotplug.c @@ -27,7 +27,7 @@ * platform-specific code to shutdown a CPU * Called with IRQs disabled */ -void __ref omap4_cpu_die(unsigned int cpu) +void omap4_cpu_die(unsigned int cpu) { unsigned int boot_cpu = 0; void __iomem *base = omap_get_wakeupgen_base(); diff --git a/arch/arm/mach-omap2/omap-wakeupgen.c b/arch/arm/mach-omap2/omap-wakeupgen.c index 6833df45d7b1..3e059b215a1d 100644 --- a/arch/arm/mach-omap2/omap-wakeupgen.c +++ b/arch/arm/mach-omap2/omap-wakeupgen.c @@ -330,7 +330,7 @@ static int irq_cpu_hotplug_notify(struct notifier_block *self, return NOTIFY_OK; } -static struct notifier_block __refdata irq_hotplug_notifier = { +static struct notifier_block irq_hotplug_notifier = { .notifier_call = irq_cpu_hotplug_notify, }; diff --git a/arch/arm/mach-prima2/hotplug.c b/arch/arm/mach-prima2/hotplug.c index 0ab2f8bae28e..a728c78b996f 100644 --- a/arch/arm/mach-prima2/hotplug.c +++ b/arch/arm/mach-prima2/hotplug.c @@ -32,7 +32,7 @@ static inline void platform_do_lowpower(unsigned int cpu) * * Called with IRQs disabled */ -void __ref sirfsoc_cpu_die(unsigned int cpu) +void sirfsoc_cpu_die(unsigned int cpu) { platform_do_lowpower(cpu); } diff --git a/arch/arm/mach-qcom/platsmp.c b/arch/arm/mach-qcom/platsmp.c index 5cde63a64b34..9b00123a315d 100644 --- a/arch/arm/mach-qcom/platsmp.c +++ b/arch/arm/mach-qcom/platsmp.c @@ -49,7 +49,7 @@ extern void secondary_startup_arm(void); static DEFINE_SPINLOCK(boot_lock); #ifdef CONFIG_HOTPLUG_CPU -static void __ref qcom_cpu_die(unsigned int cpu) +static void qcom_cpu_die(unsigned int cpu) { wfi(); } diff --git a/arch/arm/mach-realview/hotplug.c b/arch/arm/mach-realview/hotplug.c index ac22dd41b135..968e2d1964f6 100644 --- a/arch/arm/mach-realview/hotplug.c +++ b/arch/arm/mach-realview/hotplug.c @@ -90,7 +90,7 @@ static inline void platform_do_lowpower(unsigned int cpu, int *spurious) * * Called with IRQs disabled */ -void __ref realview_cpu_die(unsigned int cpu) +void realview_cpu_die(unsigned int cpu) { int spurious = 0; diff --git a/arch/arm/mach-spear/hotplug.c b/arch/arm/mach-spear/hotplug.c index d97749c642ce..12edd1cf8a12 100644 --- a/arch/arm/mach-spear/hotplug.c +++ b/arch/arm/mach-spear/hotplug.c @@ -80,7 +80,7 @@ static inline void spear13xx_do_lowpower(unsigned int cpu, int *spurious) * * Called with IRQs disabled */ -void __ref spear13xx_cpu_die(unsigned int cpu) +void spear13xx_cpu_die(unsigned int cpu) { int spurious = 0; diff --git a/arch/arm/mach-tegra/hotplug.c b/arch/arm/mach-tegra/hotplug.c index 6fc71f1534b0..1b129899a277 100644 --- a/arch/arm/mach-tegra/hotplug.c +++ b/arch/arm/mach-tegra/hotplug.c @@ -37,7 +37,7 @@ int tegra_cpu_kill(unsigned cpu) * * Called with IRQs disabled */ -void __ref tegra_cpu_die(unsigned int cpu) +void tegra_cpu_die(unsigned int cpu) { if (!tegra_hotplug_shutdown) { WARN(1, "hotplug is not yet initialized\n"); diff --git a/arch/arm/mach-ux500/hotplug.c b/arch/arm/mach-ux500/hotplug.c index 2bc00b085e38..1cbed0331fd3 100644 --- a/arch/arm/mach-ux500/hotplug.c +++ b/arch/arm/mach-ux500/hotplug.c @@ -21,7 +21,7 @@ * * Called with IRQs disabled */ -void __ref ux500_cpu_die(unsigned int cpu) +void ux500_cpu_die(unsigned int cpu) { /* directly enter low power state, skipping secure registers */ for (;;) { diff --git a/arch/arm/mach-vexpress/hotplug.c b/arch/arm/mach-vexpress/hotplug.c index f0ce6b8f5e71..f2fafc10a91d 100644 --- a/arch/arm/mach-vexpress/hotplug.c +++ b/arch/arm/mach-vexpress/hotplug.c @@ -85,7 +85,7 @@ static inline void platform_do_lowpower(unsigned int cpu, int *spurious) * * Called with IRQs disabled */ -void __ref vexpress_cpu_die(unsigned int cpu) +void vexpress_cpu_die(unsigned int cpu) { int spurious = 0; diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index be92fa0f2f35..8d32f7e9695f 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -22,6 +22,7 @@ #include <linux/memblock.h> #include <linux/dma-contiguous.h> #include <linux/sizes.h> +#include <linux/stop_machine.h> #include <asm/cp15.h> #include <asm/mach-types.h> @@ -626,12 +627,10 @@ static struct section_perm ro_perms[] = { * safe to be called with preemption disabled, as under stop_machine(). */ static inline void section_update(unsigned long addr, pmdval_t mask, - pmdval_t prot) + pmdval_t prot, struct mm_struct *mm) { - struct mm_struct *mm; pmd_t *pmd; - mm = current->active_mm; pmd = pmd_offset(pud_offset(pgd_offset(mm, addr), addr), addr); #ifdef CONFIG_ARM_LPAE @@ -655,49 +654,82 @@ static inline bool arch_has_strict_perms(void) return !!(get_cr() & CR_XP); } -#define set_section_perms(perms, field) { \ - size_t i; \ - unsigned long addr; \ - \ - if (!arch_has_strict_perms()) \ - return; \ - \ - for (i = 0; i < ARRAY_SIZE(perms); i++) { \ - if (!IS_ALIGNED(perms[i].start, SECTION_SIZE) || \ - !IS_ALIGNED(perms[i].end, SECTION_SIZE)) { \ - pr_err("BUG: section %lx-%lx not aligned to %lx\n", \ - perms[i].start, perms[i].end, \ - SECTION_SIZE); \ - continue; \ - } \ - \ - for (addr = perms[i].start; \ - addr < perms[i].end; \ - addr += SECTION_SIZE) \ - section_update(addr, perms[i].mask, \ - perms[i].field); \ - } \ +void set_section_perms(struct section_perm *perms, int n, bool set, + struct mm_struct *mm) +{ + size_t i; + unsigned long addr; + + if (!arch_has_strict_perms()) + return; + + for (i = 0; i < n; i++) { + if (!IS_ALIGNED(perms[i].start, SECTION_SIZE) || + !IS_ALIGNED(perms[i].end, SECTION_SIZE)) { + pr_err("BUG: section %lx-%lx not aligned to %lx\n", + perms[i].start, perms[i].end, + SECTION_SIZE); + continue; + } + + for (addr = perms[i].start; + addr < perms[i].end; + addr += SECTION_SIZE) + section_update(addr, perms[i].mask, + set ? perms[i].prot : perms[i].clear, mm); + } + } -static inline void fix_kernmem_perms(void) +static void update_sections_early(struct section_perm perms[], int n) { - set_section_perms(nx_perms, prot); + struct task_struct *t, *s; + + read_lock(&tasklist_lock); + for_each_process(t) { + if (t->flags & PF_KTHREAD) + continue; + for_each_thread(t, s) + set_section_perms(perms, n, true, s->mm); + } + read_unlock(&tasklist_lock); + set_section_perms(perms, n, true, current->active_mm); + set_section_perms(perms, n, true, &init_mm); +} + +int __fix_kernmem_perms(void *unused) +{ + update_sections_early(nx_perms, ARRAY_SIZE(nx_perms)); + return 0; +} + +void fix_kernmem_perms(void) +{ + stop_machine(__fix_kernmem_perms, NULL, NULL); } #ifdef CONFIG_DEBUG_RODATA +int __mark_rodata_ro(void *unused) +{ + update_sections_early(ro_perms, ARRAY_SIZE(ro_perms)); + return 0; +} + void mark_rodata_ro(void) { - set_section_perms(ro_perms, prot); + stop_machine(__mark_rodata_ro, NULL, NULL); } void set_kernel_text_rw(void) { - set_section_perms(ro_perms, clear); + set_section_perms(ro_perms, ARRAY_SIZE(ro_perms), false, + current->active_mm); } void set_kernel_text_ro(void) { - set_section_perms(ro_perms, prot); + set_section_perms(ro_perms, ARRAY_SIZE(ro_perms), true, + current->active_mm); } #endif /* CONFIG_DEBUG_RODATA */ diff --git a/arch/arm/vdso/vdso.S b/arch/arm/vdso/vdso.S index b2b97e3e7bab..a62a7b64f49c 100644 --- a/arch/arm/vdso/vdso.S +++ b/arch/arm/vdso/vdso.S @@ -23,9 +23,8 @@ #include <linux/const.h> #include <asm/page.h> - __PAGE_ALIGNED_DATA - .globl vdso_start, vdso_end + .section .data..ro_after_init .balign PAGE_SIZE vdso_start: .incbin "arch/arm/vdso/vdso.so" diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 6f0a3b41b009..2b5b0c5ac670 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1,5 +1,6 @@ config ARM64 def_bool y + select ACPI_CCA_REQUIRED if ACPI select ACPI_GENERIC_GSI if ACPI select ACPI_REDUCED_HARDWARE_ONLY if ACPI select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE @@ -19,6 +20,7 @@ config ARM64 select ARM_GIC_V2M if PCI_MSI select ARM_GIC_V3 select ARM_GIC_V3_ITS if PCI_MSI + select ARM_PSCI_FW select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS select COMMON_CLK @@ -26,7 +28,7 @@ config ARM64 select DCACHE_WORD_ACCESS select GENERIC_ALLOCATOR select GENERIC_CLOCKEVENTS - select GENERIC_CLOCKEVENTS_BROADCAST if SMP + select GENERIC_CLOCKEVENTS_BROADCAST select GENERIC_CPU_AUTOPROBE select GENERIC_EARLY_IOREMAP select GENERIC_IRQ_PROBE @@ -44,6 +46,7 @@ config ARM64 select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_BITREVERSE select HAVE_ARCH_JUMP_LABEL + select HAVE_ARCH_KASAN if SPARSEMEM_VMEMMAP && !(ARM64_16K_PAGES && ARM64_VA_BITS_48) select HAVE_ARCH_KGDB select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK @@ -70,6 +73,7 @@ config ARM64 select HAVE_PERF_USER_STACK_DUMP select HAVE_RCU_TABLE_FREE select HAVE_SYSCALL_TRACEPOINTS + select IOMMU_DMA if IOMMU_SUPPORT select IRQ_DOMAIN select MODULES_USE_ELF_RELA select NO_BOOTMEM @@ -83,6 +87,7 @@ config ARM64 select SPARSE_IRQ select SYSCTL_EXCEPTION_TRACE select HAVE_CONTEXT_TRACKING + select HAVE_ARM_SMCCC help ARM 64-bit (AArch64) Linux support. @@ -138,6 +143,9 @@ config NEED_DMA_MAP_STATE config NEED_SG_DMA_LENGTH def_bool y +config SMP + def_bool y + config SWIOTLB def_bool y @@ -392,6 +400,27 @@ config ARM64_ERRATUM_832075 If unsure, say Y. +config ARM64_ERRATUM_834220 + bool "Cortex-A57: 834220: Stage 2 translation fault might be incorrectly reported in presence of a Stage 1 fault" + depends on KVM + default y + help + This option adds an alternative code sequence to work around ARM + erratum 834220 on Cortex-A57 parts up to r1p2. + + Affected Cortex-A57 parts might report a Stage 2 translation + fault as the result of a Stage 1 fault for load crossing a + page boundary when there is a permission or device memory + alignment fault at Stage 1 and a translation fault at Stage 2. + + The workaround is to verify that the Stage 1 translation + doesn't generate a fault before handling the Stage 2 fault. + Please note that this does not necessarily enable the workaround, + as it depends on the alternative framework, which will only patch + the kernel if an affected CPU is detected. + + If unsure, say Y. + config ARM64_ERRATUM_845719 bool "Cortex-A53: 845719: a load might read incorrect data" depends on COMPAT @@ -486,22 +515,8 @@ config CPU_BIG_ENDIAN help Say Y if you plan on running a kernel in big-endian mode. -config SMP - bool "Symmetric Multi-Processing" - help - This enables support for systems with more than one CPU. If - you say N here, the kernel will run on single and - multiprocessor machines, but will use only one CPU of a - multiprocessor machine. If you say Y here, the kernel will run - on many, but not all, single processor machines. On a single - processor machine, the kernel will run faster if you say N - here. - - If you don't know what to do here, say N. - config SCHED_MC bool "Multi-core scheduler support" - depends on SMP help Multi-core scheduler support improves the CPU scheduler's decision making when dealing with multi-core CPU chips at a cost of slightly @@ -509,7 +524,6 @@ config SCHED_MC config SCHED_SMT bool "SMT scheduler support" - depends on SMP help Improves the CPU scheduler's decision making when dealing with MultiThreading at a cost of slightly increased overhead in some @@ -518,23 +532,17 @@ config SCHED_SMT config NR_CPUS int "Maximum number of CPUs (2-4096)" range 2 4096 - depends on SMP # These have to remain sorted largest to smallest default "64" config HOTPLUG_CPU bool "Support for hot-pluggable CPUs" - depends on SMP help Say Y here to experiment with turning CPUs off and on. CPUs can be controlled through /sys/devices/system/cpu. source kernel/Kconfig.preempt -config UP_LATE_INIT - def_bool y - depends on !SMP - config HZ int default 100 @@ -609,6 +617,20 @@ config FORCE_MAX_ZONEORDER default "14" if (ARM64_64K_PAGES && TRANSPARENT_HUGEPAGE) default "11" +config ARM64_PAN + bool "Enable support for Privileged Access Never (PAN)" + default y + help + Privileged Access Never (PAN; part of the ARMv8.1 Extensions) + prevents the kernel or hypervisor from accessing user-space (EL0) + memory directly. + + Choosing this option will cause any unprotected (not using + copy_to_user et al) memory access to fail with a permission fault. + + The feature is detected at runtime, and will remain as a 'nop' + instruction if the cpu does not implement the feature. + menuconfig ARMV8_DEPRECATED bool "Emulate deprecated/obsolete ARMv8 instructions" depends on COMPAT @@ -762,6 +784,14 @@ menu "Power management options" source "kernel/power/Kconfig" +config ARCH_HIBERNATION_POSSIBLE + def_bool y + depends on CPU_PM + +config ARCH_HIBERNATION_HEADER + def_bool y + depends on HIBERNATION + config ARCH_SUSPEND_POSSIBLE def_bool y diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index f462e6e4ce07..f4e253845a28 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -46,6 +46,13 @@ else TEXT_OFFSET := 0x00080000 endif +# KASAN_SHADOW_OFFSET = VA_START + (1 << (VA_BITS - 3)) - (1 << 61) +# in 32-bit arithmetic +KASAN_SHADOW_OFFSET := $(shell printf "0x%08x00000000\n" $$(( \ + (0xffffffff & (-1 << ($(CONFIG_ARM64_VA_BITS) - 32))) \ + + (1 << ($(CONFIG_ARM64_VA_BITS) - 32 - 3)) \ + - (1 << (64 - 32 - 3)) )) ) + export TEXT_OFFSET GZFLAGS core-y += arch/arm64/kernel/ arch/arm64/mm/ diff --git a/arch/arm64/boot/dts/apm/apm-storm.dtsi b/arch/arm64/boot/dts/apm/apm-storm.dtsi index c8d3e0e86678..d8f3a1c65ecd 100644 --- a/arch/arm64/boot/dts/apm/apm-storm.dtsi +++ b/arch/arm64/boot/dts/apm/apm-storm.dtsi @@ -374,6 +374,28 @@ }; }; + msi: msi@79000000 { + compatible = "apm,xgene1-msi"; + msi-controller; + reg = <0x00 0x79000000 0x0 0x900000>; + interrupts = < 0x0 0x10 0x4 + 0x0 0x11 0x4 + 0x0 0x12 0x4 + 0x0 0x13 0x4 + 0x0 0x14 0x4 + 0x0 0x15 0x4 + 0x0 0x16 0x4 + 0x0 0x17 0x4 + 0x0 0x18 0x4 + 0x0 0x19 0x4 + 0x0 0x1a 0x4 + 0x0 0x1b 0x4 + 0x0 0x1c 0x4 + 0x0 0x1d 0x4 + 0x0 0x1e 0x4 + 0x0 0x1f 0x4>; + }; + pcie0: pcie@1f2b0000 { status = "disabled"; device_type = "pci"; @@ -395,6 +417,7 @@ 0x0 0x0 0x0 0x4 &gic 0x0 0xc5 0x1>; dma-coherent; clocks = <&pcie0clk 0>; + msi-parent = <&msi>; }; pcie1: pcie@1f2c0000 { @@ -418,6 +441,7 @@ 0x0 0x0 0x0 0x4 &gic 0x0 0xcb 0x1>; dma-coherent; clocks = <&pcie1clk 0>; + msi-parent = <&msi>; }; pcie2: pcie@1f2d0000 { @@ -441,6 +465,7 @@ 0x0 0x0 0x0 0x4 &gic 0x0 0xd1 0x1>; dma-coherent; clocks = <&pcie2clk 0>; + msi-parent = <&msi>; }; pcie3: pcie@1f500000 { @@ -464,6 +489,7 @@ 0x0 0x0 0x0 0x4 &gic 0x0 0xd7 0x1>; dma-coherent; clocks = <&pcie3clk 0>; + msi-parent = <&msi>; }; pcie4: pcie@1f510000 { @@ -487,6 +513,7 @@ 0x0 0x0 0x0 0x4 &gic 0x0 0xdd 0x1>; dma-coherent; clocks = <&pcie4clk 0>; + msi-parent = <&msi>; }; serial0: serial@1c020000 { diff --git a/arch/arm64/boot/dts/arm/Makefile b/arch/arm64/boot/dts/arm/Makefile index 301a0dada1fe..c5c98b91514e 100644 --- a/arch/arm64/boot/dts/arm/Makefile +++ b/arch/arm64/boot/dts/arm/Makefile @@ -1,5 +1,5 @@ dtb-$(CONFIG_ARCH_VEXPRESS) += foundation-v8.dtb -dtb-$(CONFIG_ARCH_VEXPRESS) += juno.dtb +dtb-$(CONFIG_ARCH_VEXPRESS) += juno.dtb juno-r1.dtb dtb-$(CONFIG_ARCH_VEXPRESS) += rtsm_ve-aemv8a.dtb always := $(dtb-y) diff --git a/arch/arm64/boot/dts/arm/juno-base.dtsi b/arch/arm64/boot/dts/arm/juno-base.dtsi new file mode 100644 index 000000000000..e3ee96036eca --- /dev/null +++ b/arch/arm64/boot/dts/arm/juno-base.dtsi @@ -0,0 +1,154 @@ + /* + * Devices shared by all Juno boards + */ + + memtimer: timer@2a810000 { + compatible = "arm,armv7-timer-mem"; + reg = <0x0 0x2a810000 0x0 0x10000>; + clock-frequency = <50000000>; + #address-cells = <2>; + #size-cells = <2>; + ranges; + status = "disabled"; + frame@2a830000 { + frame-number = <1>; + interrupts = <0 60 4>; + reg = <0x0 0x2a830000 0x0 0x10000>; + }; + }; + + gic: interrupt-controller@2c010000 { + compatible = "arm,gic-400", "arm,cortex-a15-gic"; + reg = <0x0 0x2c010000 0 0x1000>, + <0x0 0x2c02f000 0 0x2000>, + <0x0 0x2c04f000 0 0x2000>, + <0x0 0x2c06f000 0 0x2000>; + #address-cells = <2>; + #interrupt-cells = <3>; + #size-cells = <2>; + interrupt-controller; + interrupts = <GIC_PPI 9 (GIC_CPU_MASK_SIMPLE(6) | IRQ_TYPE_LEVEL_HIGH)>; + ranges = <0 0 0 0x2c1c0000 0 0x40000>; + v2m_0: v2m@0 { + compatible = "arm,gic-v2m-frame"; + msi-controller; + reg = <0 0 0 0x1000>; + }; + }; + + timer { + compatible = "arm,armv8-timer"; + interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(6) | IRQ_TYPE_LEVEL_LOW)>, + <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(6) | IRQ_TYPE_LEVEL_LOW)>, + <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(6) | IRQ_TYPE_LEVEL_LOW)>, + <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(6) | IRQ_TYPE_LEVEL_LOW)>; + }; + + /include/ "juno-clocks.dtsi" + + dma@7ff00000 { + compatible = "arm,pl330", "arm,primecell"; + reg = <0x0 0x7ff00000 0 0x1000>; + #dma-cells = <1>; + #dma-channels = <8>; + #dma-requests = <32>; + interrupts = <GIC_SPI 88 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 89 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 90 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 91 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 109 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 110 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 111 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&soc_faxiclk>; + clock-names = "apb_pclk"; + }; + + soc_uart0: uart@7ff80000 { + compatible = "arm,pl011", "arm,primecell"; + reg = <0x0 0x7ff80000 0x0 0x1000>; + interrupts = <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&soc_uartclk>, <&soc_refclk100mhz>; + clock-names = "uartclk", "apb_pclk"; + }; + + i2c@7ffa0000 { + compatible = "snps,designware-i2c"; + reg = <0x0 0x7ffa0000 0x0 0x1000>; + #address-cells = <1>; + #size-cells = <0>; + interrupts = <GIC_SPI 104 IRQ_TYPE_LEVEL_HIGH>; + clock-frequency = <400000>; + i2c-sda-hold-time-ns = <500>; + clocks = <&soc_smc50mhz>; + + dvi0: dvi-transmitter@70 { + compatible = "nxp,tda998x"; + reg = <0x70>; + }; + + dvi1: dvi-transmitter@71 { + compatible = "nxp,tda998x"; + reg = <0x71>; + }; + }; + + ohci@7ffb0000 { + compatible = "generic-ohci"; + reg = <0x0 0x7ffb0000 0x0 0x10000>; + interrupts = <GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&soc_usb48mhz>; + }; + + ehci@7ffc0000 { + compatible = "generic-ehci"; + reg = <0x0 0x7ffc0000 0x0 0x10000>; + interrupts = <GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&soc_usb48mhz>; + }; + + memory-controller@7ffd0000 { + compatible = "arm,pl354", "arm,primecell"; + reg = <0 0x7ffd0000 0 0x1000>; + interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 87 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&soc_smc50mhz>; + clock-names = "apb_pclk"; + }; + + memory@80000000 { + device_type = "memory"; + /* last 16MB of the first memory area is reserved for secure world use by firmware */ + reg = <0x00000000 0x80000000 0x0 0x7f000000>, + <0x00000008 0x80000000 0x1 0x80000000>; + }; + + smb { + compatible = "simple-bus"; + #address-cells = <2>; + #size-cells = <1>; + ranges = <0 0 0 0x08000000 0x04000000>, + <1 0 0 0x14000000 0x04000000>, + <2 0 0 0x18000000 0x04000000>, + <3 0 0 0x1c000000 0x04000000>, + <4 0 0 0x0c000000 0x04000000>, + <5 0 0 0x10000000 0x04000000>; + + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 15>; + interrupt-map = <0 0 0 &gic 0 0 0 68 IRQ_TYPE_LEVEL_HIGH>, + <0 0 1 &gic 0 0 0 69 IRQ_TYPE_LEVEL_HIGH>, + <0 0 2 &gic 0 0 0 70 IRQ_TYPE_LEVEL_HIGH>, + <0 0 3 &gic 0 0 0 160 IRQ_TYPE_LEVEL_HIGH>, + <0 0 4 &gic 0 0 0 161 IRQ_TYPE_LEVEL_HIGH>, + <0 0 5 &gic 0 0 0 162 IRQ_TYPE_LEVEL_HIGH>, + <0 0 6 &gic 0 0 0 163 IRQ_TYPE_LEVEL_HIGH>, + <0 0 7 &gic 0 0 0 164 IRQ_TYPE_LEVEL_HIGH>, + <0 0 8 &gic 0 0 0 165 IRQ_TYPE_LEVEL_HIGH>, + <0 0 9 &gic 0 0 0 166 IRQ_TYPE_LEVEL_HIGH>, + <0 0 10 &gic 0 0 0 167 IRQ_TYPE_LEVEL_HIGH>, + <0 0 11 &gic 0 0 0 168 IRQ_TYPE_LEVEL_HIGH>, + <0 0 12 &gic 0 0 0 169 IRQ_TYPE_LEVEL_HIGH>; + + /include/ "juno-motherboard.dtsi" + }; diff --git a/arch/arm64/boot/dts/arm/juno-clocks.dtsi b/arch/arm64/boot/dts/arm/juno-clocks.dtsi index c9b89efe0f56..25352ed943e6 100644 --- a/arch/arm64/boot/dts/arm/juno-clocks.dtsi +++ b/arch/arm64/boot/dts/arm/juno-clocks.dtsi @@ -36,9 +36,9 @@ clock-output-names = "apb_pclk"; }; - soc_faxiclk: refclk533mhz { + soc_faxiclk: refclk400mhz { compatible = "fixed-clock"; #clock-cells = <0>; - clock-frequency = <533000000>; + clock-frequency = <400000000>; clock-output-names = "faxi_clk"; }; diff --git a/arch/arm64/boot/dts/arm/juno-r1.dts b/arch/arm64/boot/dts/arm/juno-r1.dts new file mode 100644 index 000000000000..a25964d26bda --- /dev/null +++ b/arch/arm64/boot/dts/arm/juno-r1.dts @@ -0,0 +1,136 @@ +/* + * ARM Ltd. Juno Platform + * + * Copyright (c) 2015 ARM Ltd. + * + * This file is licensed under a dual GPLv2 or BSD license. + */ + +/dts-v1/; + +#include <dt-bindings/interrupt-controller/arm-gic.h> + +/ { + model = "ARM Juno development board (r1)"; + compatible = "arm,juno-r1", "arm,juno", "arm,vexpress"; + interrupt-parent = <&gic>; + #address-cells = <2>; + #size-cells = <2>; + + aliases { + serial0 = &soc_uart0; + }; + + chosen { + stdout-path = "serial0:115200n8"; + }; + + psci { + compatible = "arm,psci-0.2"; + method = "smc"; + }; + + cpus { + #address-cells = <2>; + #size-cells = <0>; + + A57_0: cpu@0 { + compatible = "arm,cortex-a57","arm,armv8"; + reg = <0x0 0x0>; + device_type = "cpu"; + enable-method = "psci"; + next-level-cache = <&A57_L2>; + }; + + A57_1: cpu@1 { + compatible = "arm,cortex-a57","arm,armv8"; + reg = <0x0 0x1>; + device_type = "cpu"; + enable-method = "psci"; + next-level-cache = <&A57_L2>; + }; + + A53_0: cpu@100 { + compatible = "arm,cortex-a53","arm,armv8"; + reg = <0x0 0x100>; + device_type = "cpu"; + enable-method = "psci"; + next-level-cache = <&A53_L2>; + }; + + A53_1: cpu@101 { + compatible = "arm,cortex-a53","arm,armv8"; + reg = <0x0 0x101>; + device_type = "cpu"; + enable-method = "psci"; + next-level-cache = <&A53_L2>; + }; + + A53_2: cpu@102 { + compatible = "arm,cortex-a53","arm,armv8"; + reg = <0x0 0x102>; + device_type = "cpu"; + enable-method = "psci"; + next-level-cache = <&A53_L2>; + }; + + A53_3: cpu@103 { + compatible = "arm,cortex-a53","arm,armv8"; + reg = <0x0 0x103>; + device_type = "cpu"; + enable-method = "psci"; + next-level-cache = <&A53_L2>; + }; + + A57_L2: l2-cache0 { + compatible = "cache"; + }; + + A53_L2: l2-cache1 { + compatible = "cache"; + }; + }; + + pmu { + compatible = "arm,armv8-pmuv3"; + interrupts = <GIC_SPI 02 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 06 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>; + interrupt-affinity = <&A57_0>, + <&A57_1>, + <&A53_0>, + <&A53_1>, + <&A53_2>, + <&A53_3>; + }; + + #include "juno-base.dtsi" + + pcie-controller@40000000 { + compatible = "arm,juno-r1-pcie", "plda,xpressrich3-axi", "pci-host-ecam-generic"; + device_type = "pci"; + reg = <0 0x40000000 0 0x10000000>; /* ECAM config space */ + bus-range = <0 255>; + linux,pci-domain = <0>; + #address-cells = <3>; + #size-cells = <2>; + dma-coherent; + ranges = <0x01000000 0x00 0x5f800000 0x00 0x5f800000 0x0 0x00800000>, + <0x02000000 0x00 0x50000000 0x00 0x50000000 0x0 0x08000000>, + <0x42000000 0x40 0x00000000 0x40 0x00000000 0x1 0x00000000>; + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 0 7>; + interrupt-map = <0 0 0 1 &gic 0 0 0 136 4>, + <0 0 0 2 &gic 0 0 0 137 4>, + <0 0 0 3 &gic 0 0 0 138 4>, + <0 0 0 4 &gic 0 0 0 139 4>; + msi-parent = <&v2m_0>; + }; +}; + +&memtimer { + status = "okay"; +}; diff --git a/arch/arm64/boot/dts/arm/juno.dts b/arch/arm64/boot/dts/arm/juno.dts index 5e9110a3353d..d7cbdd482a61 100644 --- a/arch/arm64/boot/dts/arm/juno.dts +++ b/arch/arm64/boot/dts/arm/juno.dts @@ -91,33 +91,6 @@ }; }; - memory@80000000 { - device_type = "memory"; - /* last 16MB of the first memory area is reserved for secure world use by firmware */ - reg = <0x00000000 0x80000000 0x0 0x7f000000>, - <0x00000008 0x80000000 0x1 0x80000000>; - }; - - gic: interrupt-controller@2c001000 { - compatible = "arm,gic-400", "arm,cortex-a15-gic"; - reg = <0x0 0x2c010000 0 0x1000>, - <0x0 0x2c02f000 0 0x2000>, - <0x0 0x2c04f000 0 0x2000>, - <0x0 0x2c06f000 0 0x2000>; - #address-cells = <0>; - #interrupt-cells = <3>; - interrupt-controller; - interrupts = <GIC_PPI 9 (GIC_CPU_MASK_SIMPLE(6) | IRQ_TYPE_LEVEL_HIGH)>; - }; - - timer { - compatible = "arm,armv8-timer"; - interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(6) | IRQ_TYPE_LEVEL_LOW)>, - <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(6) | IRQ_TYPE_LEVEL_LOW)>, - <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(6) | IRQ_TYPE_LEVEL_LOW)>, - <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(6) | IRQ_TYPE_LEVEL_LOW)>; - }; - pmu { compatible = "arm,armv8-pmuv3"; interrupts = <GIC_SPI 02 IRQ_TYPE_LEVEL_HIGH>, @@ -134,105 +107,5 @@ <&A53_3>; }; - /include/ "juno-clocks.dtsi" - - dma@7ff00000 { - compatible = "arm,pl330", "arm,primecell"; - reg = <0x0 0x7ff00000 0 0x1000>; - #dma-cells = <1>; - #dma-channels = <8>; - #dma-requests = <32>; - interrupts = <GIC_SPI 88 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 89 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 90 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 91 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 109 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 110 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 111 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&soc_faxiclk>; - clock-names = "apb_pclk"; - }; - - soc_uart0: uart@7ff80000 { - compatible = "arm,pl011", "arm,primecell"; - reg = <0x0 0x7ff80000 0x0 0x1000>; - interrupts = <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&soc_uartclk>, <&soc_refclk100mhz>; - clock-names = "uartclk", "apb_pclk"; - }; - - i2c@7ffa0000 { - compatible = "snps,designware-i2c"; - reg = <0x0 0x7ffa0000 0x0 0x1000>; - #address-cells = <1>; - #size-cells = <0>; - interrupts = <GIC_SPI 104 IRQ_TYPE_LEVEL_HIGH>; - clock-frequency = <400000>; - i2c-sda-hold-time-ns = <500>; - clocks = <&soc_smc50mhz>; - - dvi0: dvi-transmitter@70 { - compatible = "nxp,tda998x"; - reg = <0x70>; - }; - - dvi1: dvi-transmitter@71 { - compatible = "nxp,tda998x"; - reg = <0x71>; - }; - }; - - ohci@7ffb0000 { - compatible = "generic-ohci"; - reg = <0x0 0x7ffb0000 0x0 0x10000>; - interrupts = <GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&soc_usb48mhz>; - }; - - ehci@7ffc0000 { - compatible = "generic-ehci"; - reg = <0x0 0x7ffc0000 0x0 0x10000>; - interrupts = <GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&soc_usb48mhz>; - }; - - memory-controller@7ffd0000 { - compatible = "arm,pl354", "arm,primecell"; - reg = <0 0x7ffd0000 0 0x1000>; - interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 87 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&soc_smc50mhz>; - clock-names = "apb_pclk"; - }; - - smb { - compatible = "simple-bus"; - #address-cells = <2>; - #size-cells = <1>; - ranges = <0 0 0 0x08000000 0x04000000>, - <1 0 0 0x14000000 0x04000000>, - <2 0 0 0x18000000 0x04000000>, - <3 0 0 0x1c000000 0x04000000>, - <4 0 0 0x0c000000 0x04000000>, - <5 0 0 0x10000000 0x04000000>; - - #interrupt-cells = <1>; - interrupt-map-mask = <0 0 15>; - interrupt-map = <0 0 0 &gic 0 68 IRQ_TYPE_LEVEL_HIGH>, - <0 0 1 &gic 0 69 IRQ_TYPE_LEVEL_HIGH>, - <0 0 2 &gic 0 70 IRQ_TYPE_LEVEL_HIGH>, - <0 0 3 &gic 0 160 IRQ_TYPE_LEVEL_HIGH>, - <0 0 4 &gic 0 161 IRQ_TYPE_LEVEL_HIGH>, - <0 0 5 &gic 0 162 IRQ_TYPE_LEVEL_HIGH>, - <0 0 6 &gic 0 163 IRQ_TYPE_LEVEL_HIGH>, - <0 0 7 &gic 0 164 IRQ_TYPE_LEVEL_HIGH>, - <0 0 8 &gic 0 165 IRQ_TYPE_LEVEL_HIGH>, - <0 0 9 &gic 0 166 IRQ_TYPE_LEVEL_HIGH>, - <0 0 10 &gic 0 167 IRQ_TYPE_LEVEL_HIGH>, - <0 0 11 &gic 0 168 IRQ_TYPE_LEVEL_HIGH>, - <0 0 12 &gic 0 169 IRQ_TYPE_LEVEL_HIGH>; - - /include/ "juno-motherboard.dtsi" - }; + #include "juno-base.dtsi" }; diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 2ed7449d9273..e9e4ea5cd339 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -45,6 +45,7 @@ CONFIG_ARCH_XGENE=y CONFIG_ARCH_ZYNQMP=y CONFIG_PCI=y CONFIG_PCI_MSI=y +CONFIG_PCI_HOST_GENERIC=y CONFIG_PCI_XGENE=y CONFIG_SMP=y CONFIG_PREEMPT=y diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index 59c05d8ea4a0..ed7e212c893c 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -12,8 +12,9 @@ #ifndef _ASM_ACPI_H #define _ASM_ACPI_H -#include <linux/mm.h> #include <linux/irqchip/arm-gic-acpi.h> +#include <linux/mm.h> +#include <linux/psci.h> #include <asm/cputype.h> #include <asm/smp_plat.h> @@ -39,18 +40,6 @@ extern int acpi_disabled; extern int acpi_noirq; extern int acpi_pci_disabled; -/* 1 to indicate PSCI 0.2+ is implemented */ -static inline bool acpi_psci_present(void) -{ - return acpi_gbl_FADT.arm_boot_flags & ACPI_FADT_PSCI_COMPLIANT; -} - -/* 1 to indicate HVC must be used instead of SMC as the PSCI conduit */ -static inline bool acpi_psci_use_hvc(void) -{ - return acpi_gbl_FADT.arm_boot_flags & ACPI_FADT_PSCI_USE_HVC; -} - static inline void disable_acpi(void) { acpi_disabled = 1; @@ -88,9 +77,11 @@ static inline void arch_fix_phys_package_id(int num, u32 slot) { } void __init acpi_init_cpus(void); #else -static inline bool acpi_psci_present(void) { return false; } -static inline bool acpi_psci_use_hvc(void) { return false; } static inline void acpi_init_cpus(void) { } #endif /* CONFIG_ACPI */ +static inline const char *acpi_get_enable_method(int cpu) +{ + return acpi_psci_present() ? "psci" : NULL; +} #endif /*_ASM_ACPI_H*/ diff --git a/arch/arm64/include/asm/alternative-asm.h b/arch/arm64/include/asm/alternative-asm.h deleted file mode 100644 index 919a67855b63..000000000000 --- a/arch/arm64/include/asm/alternative-asm.h +++ /dev/null @@ -1,29 +0,0 @@ -#ifndef __ASM_ALTERNATIVE_ASM_H -#define __ASM_ALTERNATIVE_ASM_H - -#ifdef __ASSEMBLY__ - -.macro altinstruction_entry orig_offset alt_offset feature orig_len alt_len - .word \orig_offset - . - .word \alt_offset - . - .hword \feature - .byte \orig_len - .byte \alt_len -.endm - -.macro alternative_insn insn1 insn2 cap -661: \insn1 -662: .pushsection .altinstructions, "a" - altinstruction_entry 661b, 663f, \cap, 662b-661b, 664f-663f - .popsection - .pushsection .altinstr_replacement, "ax" -663: \insn2 -664: .popsection - .if ((664b-663b) != (662b-661b)) - .error "Alternatives instruction length mismatch" - .endif -.endm - -#endif /* __ASSEMBLY__ */ - -#endif /* __ASM_ALTERNATIVE_ASM_H */ diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index d261f01e2bae..c04fab5de325 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -1,6 +1,9 @@ #ifndef __ASM_ALTERNATIVE_H #define __ASM_ALTERNATIVE_H +#ifndef __ASSEMBLY__ + +#include <linux/kconfig.h> #include <linux/types.h> #include <linux/stddef.h> #include <linux/stringify.h> @@ -15,7 +18,6 @@ struct alt_instr { void apply_alternatives_all(void); void apply_alternatives(void *start, size_t length); -void free_alternatives_memory(void); #define ALTINSTR_ENTRY(feature) \ " .word 661b - .\n" /* label */ \ @@ -24,8 +26,22 @@ void free_alternatives_memory(void); " .byte 662b-661b\n" /* source len */ \ " .byte 664f-663f\n" /* replacement len */ -/* alternative assembly primitive: */ -#define ALTERNATIVE(oldinstr, newinstr, feature) \ +/* + * alternative assembly primitive: + * + * If any of these .org directive fail, it means that insn1 and insn2 + * don't have the same length. This used to be written as + * + * .if ((664b-663b) != (662b-661b)) + * .error "Alternatives instruction length mismatch" + * .endif + * + * but most assemblers die if insn1 or insn2 have a .inst. This should + * be fixed in a binutils release posterior to 2.25.51.0.2 (anything + * containing commit 4e4d08cf7399b606 or c1baaddf8861). + */ +#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled) \ + ".if "__stringify(cfg_enabled)" == 1\n" \ "661:\n\t" \ oldinstr "\n" \ "662:\n" \ @@ -37,8 +53,98 @@ void free_alternatives_memory(void); newinstr "\n" \ "664:\n\t" \ ".popsection\n\t" \ - ".if ((664b-663b) != (662b-661b))\n\t" \ - " .error \"Alternatives instruction length mismatch\"\n\t"\ + ".org . - (664b-663b) + (662b-661b)\n\t" \ + ".org . - (662b-661b) + (664b-663b)\n" \ ".endif\n" +#define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...) \ + __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg)) + +#else + +.macro altinstruction_entry orig_offset alt_offset feature orig_len alt_len + .word \orig_offset - . + .word \alt_offset - . + .hword \feature + .byte \orig_len + .byte \alt_len +.endm + +.macro alternative_insn insn1, insn2, cap, enable = 1 + .if \enable +661: \insn1 +662: .pushsection .altinstructions, "a" + altinstruction_entry 661b, 663f, \cap, 662b-661b, 664f-663f + .popsection + .pushsection .altinstr_replacement, "ax" +663: \insn2 +664: .popsection + .org . - (664b-663b) + (662b-661b) + .org . - (662b-661b) + (664b-663b) + .endif +.endm + +/* + * Begin an alternative code sequence. + * + * The code that follows this macro will be assembled and linked as + * normal. There are no restrictions on this code. + */ +.macro alternative_if_not cap, enable = 1 + .if \enable + .pushsection .altinstructions, "a" + altinstruction_entry 661f, 663f, \cap, 662f-661f, 664f-663f + .popsection +661: + .endif +.endm + +/* + * Provide the alternative code sequence. + * + * The code that follows this macro is assembled into a special + * section to be used for dynamic patching. Code that follows this + * macro must: + * + * 1. Be exactly the same length (in bytes) as the default code + * sequence. + * + * 2. Not contain a branch target that is used outside of the + * alternative sequence it is defined in (branches into an + * alternative sequence are not fixed up). + */ +.macro alternative_else, enable = 1 + .if \enable +662: .pushsection .altinstr_replacement, "ax" +663: + .endif +.endm + +/* + * Complete an alternative code sequence. + */ +.macro alternative_endif, enable = 1 + .if \enable +664: .popsection + .org . - (664b-663b) + (662b-661b) + .org . - (662b-661b) + (664b-663b) + .endif +.endm + +#define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...) \ + alternative_insn insn1, insn2, cap, IS_ENABLED(cfg) + + +#endif /* __ASSEMBLY__ */ + +/* + * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature)); + * + * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature, CONFIG_FOO)); + * N.B. If CONFIG_FOO is specified, but not selected, the whole block + * will be omitted, including oldinstr. + */ +#define ALTERNATIVE(oldinstr, newinstr, ...) \ + _ALTERNATIVE_CFG(oldinstr, newinstr, __VA_ARGS__, 1) + #endif /* __ASM_ALTERNATIVE_H */ diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 144b64ad96c3..d8732b773bd1 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -1,5 +1,5 @@ /* - * Based on arch/arm/include/asm/assembler.h + * Based on arch/arm/include/asm/assembler.h, arch/arm/mm/proc-macros.S * * Copyright (C) 1996-2000 Russell King * Copyright (C) 2012 ARM Ltd. @@ -23,6 +23,9 @@ #ifndef __ASM_ASSEMBLER_H #define __ASM_ASSEMBLER_H +#include <asm/asm-offsets.h> +#include <asm/page.h> +#include <asm/pgtable-hwdef.h> #include <asm/ptrace.h> #include <asm/thread_info.h> @@ -103,9 +106,7 @@ * SMP data memory barrier */ .macro smp_dmb, opt -#ifdef CONFIG_SMP dmb \opt -#endif .endm #define USER(l, x...) \ @@ -207,4 +208,111 @@ lr .req x30 // link register str \src, [\tmp, :lo12:\sym] .endm +/* + * vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm) + */ + .macro vma_vm_mm, rd, rn + ldr \rd, [\rn, #VMA_VM_MM] + .endm + +/* + * mmid - get context id from mm pointer (mm->context.id) + */ + .macro mmid, rd, rn + ldr \rd, [\rn, #MM_CONTEXT_ID] + .endm + +/* + * dcache_line_size - get the minimum D-cache line size from the CTR register. + */ + .macro dcache_line_size, reg, tmp + mrs \tmp, ctr_el0 // read CTR + ubfm \tmp, \tmp, #16, #19 // cache line size encoding + mov \reg, #4 // bytes per word + lsl \reg, \reg, \tmp // actual cache line size + .endm + +/* + * icache_line_size - get the minimum I-cache line size from the CTR register. + */ + .macro icache_line_size, reg, tmp + mrs \tmp, ctr_el0 // read CTR + and \tmp, \tmp, #0xf // cache line size encoding + mov \reg, #4 // bytes per word + lsl \reg, \reg, \tmp // actual cache line size + .endm + +/* + * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map + */ + .macro tcr_set_idmap_t0sz, valreg, tmpreg +#ifndef CONFIG_ARM64_VA_BITS_48 + ldr_l \tmpreg, idmap_t0sz + bfi \valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH +#endif + .endm + +/* + * Macro to perform a data cache maintenance for the interval + * [kaddr, kaddr + size) + * + * op: operation passed to dc instruction + * domain: domain used in dsb instruciton + * kaddr: starting virtual address of the region + * size: size of the region + * Corrupts: kaddr, size, tmp1, tmp2 + */ + .macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2 + dcache_line_size \tmp1, \tmp2 + add \size, \kaddr, \size + sub \tmp2, \tmp1, #1 + bic \kaddr, \kaddr, \tmp2 +9998: dc \op, \kaddr + add \kaddr, \kaddr, \tmp1 + cmp \kaddr, \size + b.lo 9998b + dsb \domain + .endm + +/* + * reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present + */ + .macro reset_pmuserenr_el0, tmpreg + mrs \tmpreg, id_aa64dfr0_el1 // Check ID_AA64DFR0_EL1 PMUVer + sbfx \tmpreg, \tmpreg, #8, #4 + cmp \tmpreg, #1 // Skip if no PMU present + b.lt 9000f + msr pmuserenr_el0, xzr // Disable PMU access from EL0 +9000: + .endm + +/* + * copy_page - copy src to dest using temp registers t1-t8 + */ + .macro copy_page dest:req src:req t1:req t2:req t3:req t4:req t5:req t6:req t7:req t8:req +9998: ldp \t1, \t2, [\src] + ldp \t3, \t4, [\src, #16] + ldp \t5, \t6, [\src, #32] + ldp \t7, \t8, [\src, #48] + add \src, \src, #64 + stnp \t1, \t2, [\dest] + stnp \t3, \t4, [\dest, #16] + stnp \t5, \t6, [\dest, #32] + stnp \t7, \t8, [\dest, #48] + add \dest, \dest, #64 + tst \src, #(PAGE_SIZE - 1) + b.ne 9998b + .endm + +/* + * Annotate a function as position independent, i.e., safe to be called before + * the kernel virtual mapping is activated. + */ +#define ENDPIPROC(x) \ + .globl __pi_##x; \ + .type __pi_##x, %function; \ + .set __pi_##x, x; \ + .size __pi_##x, . - x; \ + ENDPROC(x) + #endif /* __ASM_ASSEMBLER_H */ diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 71f19c4dc0de..2666c9b84ca0 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -35,28 +35,6 @@ #define dma_rmb() dmb(oshld) #define dma_wmb() dmb(oshst) -#ifndef CONFIG_SMP -#define smp_mb() barrier() -#define smp_rmb() barrier() -#define smp_wmb() barrier() - -#define smp_store_release(p, v) \ -do { \ - compiletime_assert_atomic_type(*p); \ - barrier(); \ - ACCESS_ONCE(*p) = (v); \ -} while (0) - -#define smp_load_acquire(p) \ -({ \ - typeof(*p) ___p1 = ACCESS_ONCE(*p); \ - compiletime_assert_atomic_type(*p); \ - barrier(); \ - ___p1; \ -}) - -#else - #define smp_mb() dmb(ish) #define smp_rmb() dmb(ishld) #define smp_wmb() dmb(ishst) @@ -109,8 +87,6 @@ do { \ ___p1; \ }) -#endif - #define read_barrier_depends() do { } while(0) #define smp_read_barrier_depends() do { } while(0) diff --git a/arch/arm64/include/asm/boot.h b/arch/arm64/include/asm/boot.h new file mode 100644 index 000000000000..81151b67b26b --- /dev/null +++ b/arch/arm64/include/asm/boot.h @@ -0,0 +1,14 @@ + +#ifndef __ASM_BOOT_H +#define __ASM_BOOT_H + +#include <asm/sizes.h> + +/* + * arm64 requires the DTB to be 8 byte aligned and + * not exceed 2MB in size. + */ +#define MIN_FDT_ALIGN 8 +#define MAX_FDT_SIZE SZ_2M + +#endif diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 67d309cc3b6b..c64268dbff64 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -40,10 +40,6 @@ * the implementation assumes non-aliasing VIPT D-cache and (aliasing) * VIPT or ASID-tagged VIVT I-cache. * - * flush_cache_all() - * - * Unconditionally clean and invalidate the entire cache. - * * flush_cache_mm(mm) * * Clean and invalidate all user space cache entries @@ -69,10 +65,10 @@ * - kaddr - page address * - size - region size */ -extern void flush_cache_all(void); extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); extern void flush_icache_range(unsigned long start, unsigned long end); extern void __flush_dcache_area(void *addr, size_t len); +extern void __clean_dcache_area_pou(void *addr, size_t len); extern long __flush_cache_user_range(unsigned long start, unsigned long end); static inline void flush_cache_mm(struct mm_struct *mm) @@ -153,8 +149,4 @@ int set_memory_rw(unsigned long addr, int numpages); int set_memory_x(unsigned long addr, int numpages); int set_memory_nx(unsigned long addr, int numpages); -#ifdef CONFIG_DEBUG_RODATA -void mark_rodata_ro(void); -#endif - #endif diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h index 5a31d6716914..8f03446cf89f 100644 --- a/arch/arm64/include/asm/cpu_ops.h +++ b/arch/arm64/include/asm/cpu_ops.h @@ -19,15 +19,15 @@ #include <linux/init.h> #include <linux/threads.h> -struct device_node; - /** * struct cpu_operations - Callback operations for hotplugging CPUs. * * @name: Name of the property as appears in a devicetree cpu node's - * enable-method property. - * @cpu_init: Reads any data necessary for a specific enable-method from the - * devicetree, for a given cpu node and proposed logical id. + * enable-method property. On systems booting with ACPI, @name + * identifies the struct cpu_operations entry corresponding to + * the boot protocol specified in the ACPI MADT table. + * @cpu_init: Reads any data necessary for a specific enable-method for a + * proposed logical id. * @cpu_prepare: Early one-time preparation step for a cpu. If there is a * mechanism for doing so, tests whether it is possible to boot * the given CPU. @@ -40,15 +40,15 @@ struct device_node; * @cpu_die: Makes a cpu leave the kernel. Must not fail. Called from the * cpu being killed. * @cpu_kill: Ensures a cpu has left the kernel. Called from another cpu. - * @cpu_init_idle: Reads any data necessary to initialize CPU idle states from - * devicetree, for a given cpu node and proposed logical id. + * @cpu_init_idle: Reads any data necessary to initialize CPU idle states for + * a proposed logical id. * @cpu_suspend: Suspends a cpu and saves the required context. May fail owing * to wrong parameters or error conditions. Called from the * CPU being suspended. Must be called with IRQs disabled. */ struct cpu_operations { const char *name; - int (*cpu_init)(struct device_node *, unsigned int); + int (*cpu_init)(unsigned int); int (*cpu_prepare)(unsigned int); int (*cpu_boot)(unsigned int); void (*cpu_postboot)(void); @@ -58,14 +58,17 @@ struct cpu_operations { int (*cpu_kill)(unsigned int cpu); #endif #ifdef CONFIG_CPU_IDLE - int (*cpu_init_idle)(struct device_node *, unsigned int); + int (*cpu_init_idle)(unsigned int); int (*cpu_suspend)(unsigned long); #endif }; extern const struct cpu_operations *cpu_ops[NR_CPUS]; -int __init cpu_read_ops(struct device_node *dn, int cpu); -void __init cpu_read_bootcpu_ops(void); -const struct cpu_operations *cpu_get_ops(const char *name); +int __init cpu_read_ops(int cpu); + +static inline void __init cpu_read_bootcpu_ops(void) +{ + cpu_read_ops(0); +} #endif /* ifndef __ASM_CPU_OPS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 82cb9f98ba1a..d4030d8d9fa2 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -24,8 +24,18 @@ #define ARM64_WORKAROUND_CLEAN_CACHE 0 #define ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE 1 #define ARM64_WORKAROUND_845719 2 +#define ARM64_HAS_SYSREG_GIC_CPUIF 3 +#define ARM64_HAS_PAN 4 -#define ARM64_NCAPS 3 +#define ARM64_HAS_LSE_ATOMICS 5 +#define ARM64_WORKAROUND_CAVIUM_23154 6 +#define ARM64_WORKAROUND_834220 7 +/* #define ARM64_HAS_NO_HW_PREFETCH 8 */ +/* #define ARM64_HAS_UAO 9 */ +/* #define ARM64_ALT_PAN_NOT_UAO 10 */ +#define ARM64_HAS_VIRT_HOST_EXTN 11 + +#define ARM64_NCAPS 12 #ifndef __ASSEMBLY__ @@ -33,11 +43,19 @@ struct arm64_cpu_capabilities { const char *desc; u16 capability; bool (*matches)(const struct arm64_cpu_capabilities *); + void (*enable)(void); union { struct { /* To be used for erratum handling only */ u32 midr_model; u32 midr_range_min, midr_range_max; }; + + struct { /* Feature register checking */ + int field_pos; + int min_field_value; + u64 register_mask; + u64 register_value; + }; }; }; @@ -64,6 +82,13 @@ static inline void cpus_set_cap(unsigned int num) __set_bit(num, cpu_hwcaps); } +static inline int __attribute_const__ cpuid_feature_extract_field(u64 features, + int field) +{ + return (s64)(features << (64 - 4 - field)) >> (64 - 4); +} + + void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps, const char *info); void check_local_cpu_errata(void); diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index a84ec605bed8..ee6403df9fe4 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -81,9 +81,6 @@ #define ID_AA64MMFR0_BIGEND(mmfr0) \ (((mmfr0) & ID_AA64MMFR0_BIGEND_MASK) >> ID_AA64MMFR0_BIGEND_SHIFT) -#define SCTLR_EL1_CP15BEN (0x1 << 5) -#define SCTLR_EL1_SED (0x1 << 8) - #ifndef __ASSEMBLY__ /* diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h index 9437e3dc5833..c991d5b4c1b2 100644 --- a/arch/arm64/include/asm/dma-mapping.h +++ b/arch/arm64/include/asm/dma-mapping.h @@ -27,14 +27,18 @@ #include <asm/xen/hypervisor.h> #define DMA_ERROR_CODE (~(dma_addr_t)0) -extern struct dma_map_ops *dma_ops; +extern struct dma_map_ops dummy_dma_ops; static inline struct dma_map_ops *__generic_dma_ops(struct device *dev) { - if (unlikely(!dev) || !dev->archdata.dma_ops) - return dma_ops; - else + if (dev && dev->archdata.dma_ops) return dev->archdata.dma_ops; + + /* + * We expect no ISA devices, and all other DMA masters are expected to + * have someone call arch_setup_dma_ops at device creation time. + */ + return &dummy_dma_ops; } static inline struct dma_map_ops *get_dma_ops(struct device *dev) @@ -45,13 +49,15 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev) return __generic_dma_ops(dev); } -static inline void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - struct iommu_ops *iommu, bool coherent) -{ - dev->archdata.dma_coherent = coherent; -} +void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, + struct iommu_ops *iommu, bool coherent); #define arch_setup_dma_ops arch_setup_dma_ops +#ifdef CONFIG_IOMMU_DMA +void arch_teardown_dma_ops(struct device *dev); +#define arch_teardown_dma_ops arch_teardown_dma_ops +#endif + /* do not use this function in a driver */ static inline bool is_device_dma_coherent(struct device *dev) { diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index 95e6b6dcbe37..c0739187a920 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -17,6 +17,7 @@ #ifndef __ASSEMBLY__ #include <linux/kernel.h> +#include <asm/boot.h> #include <asm/page.h> /* @@ -32,6 +33,20 @@ */ enum fixed_addresses { FIX_HOLE, + + /* + * Reserve a virtual window for the FDT that is 2 MB larger than the + * maximum supported size, and put it at the top of the fixmap region. + * The additional space ensures that any FDT that does not exceed + * MAX_FDT_SIZE can be mapped regardless of whether it crosses any + * 2 MB alignment boundaries. + * + * Keep this at the top so it remains 2 MB aligned. + */ +#define FIX_FDT_SIZE (MAX_FDT_SIZE + SZ_2M) + FIX_FDT_END, + FIX_FDT = FIX_FDT_END + FIX_FDT_SIZE / PAGE_SIZE - 1, + FIX_EARLYCON_MEM_BASE, FIX_TEXT_POKE0, __end_of_permanent_fixed_addresses, diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index 5f750dc96e0f..667346273d9b 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -20,10 +20,16 @@ #include <linux/futex.h> #include <linux/uaccess.h> + +#include <asm/alternative.h> +#include <asm/cpufeature.h> #include <asm/errno.h> +#include <asm/sysreg.h> #define __futex_atomic_op(insn, ret, oldval, uaddr, tmp, oparg) \ asm volatile( \ + ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, \ + CONFIG_ARM64_PAN) \ "1: ldxr %w1, %2\n" \ insn "\n" \ "2: stlxr %w3, %w0, %2\n" \ @@ -39,6 +45,8 @@ " .align 3\n" \ " .quad 1b, 4b, 2b, 4b\n" \ " .popsection\n" \ + ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \ + CONFIG_ARM64_PAN) \ : "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp) \ : "r" (oparg), "Ir" (-EFAULT) \ : "memory") diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h index 6aae421f4d73..2bb7009bdac7 100644 --- a/arch/arm64/include/asm/hardirq.h +++ b/arch/arm64/include/asm/hardirq.h @@ -24,9 +24,7 @@ typedef struct { unsigned int __softirq_pending; -#ifdef CONFIG_SMP unsigned int ipi_irqs[NR_IPI]; -#endif } ____cacheline_aligned irq_cpustat_t; #include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */ @@ -34,10 +32,8 @@ typedef struct { #define __inc_irq_stat(cpu, member) __IRQ_STAT(cpu, member)++ #define __get_irq_stat(cpu, member) __IRQ_STAT(cpu, member) -#ifdef CONFIG_SMP u64 smp_irq_stat_cpu(unsigned int cpu); #define arch_irq_stat_cpu smp_irq_stat_cpu -#endif #define __ARCH_IRQ_EXIT_IRQS_DISABLED 1 diff --git a/arch/arm64/include/asm/irq_work.h b/arch/arm64/include/asm/irq_work.h index b4f6b19a8a68..8e24ef3f7c82 100644 --- a/arch/arm64/include/asm/irq_work.h +++ b/arch/arm64/include/asm/irq_work.h @@ -1,8 +1,6 @@ #ifndef __ASM_IRQ_WORK_H #define __ASM_IRQ_WORK_H -#ifdef CONFIG_SMP - #include <asm/smp.h> static inline bool arch_irq_work_has_interrupt(void) @@ -10,13 +8,4 @@ static inline bool arch_irq_work_has_interrupt(void) return !!__smp_cross_call; } -#else - -static inline bool arch_irq_work_has_interrupt(void) -{ - return false; -} - -#endif - #endif /* __ASM_IRQ_WORK_H */ diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h index c0e5165c2f76..1b5e0e843c3a 100644 --- a/arch/arm64/include/asm/jump_label.h +++ b/arch/arm64/include/asm/jump_label.h @@ -26,14 +26,28 @@ #define JUMP_LABEL_NOP_SIZE AARCH64_INSN_SIZE -static __always_inline bool arch_static_branch(struct static_key *key) +static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { asm goto("1: nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".align 3\n\t" ".quad 1b, %l[l_yes], %c0\n\t" ".popsection\n\t" - : : "i"(key) : : l_yes); + : : "i"(&((char *)key)[branch]) : : l_yes); + + return false; +l_yes: + return true; +} + +static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) +{ + asm goto("1: b %l[l_yes]\n\t" + ".pushsection __jump_table, \"aw\"\n\t" + ".align 3\n\t" + ".quad 1b, %l[l_yes], %c0\n\t" + ".popsection\n\t" + : : "i"(&((char *)key)[branch]) : : l_yes); return false; l_yes: diff --git a/arch/arm64/include/asm/kasan.h b/arch/arm64/include/asm/kasan.h new file mode 100644 index 000000000000..2774fa384c47 --- /dev/null +++ b/arch/arm64/include/asm/kasan.h @@ -0,0 +1,38 @@ +#ifndef __ASM_KASAN_H +#define __ASM_KASAN_H + +#ifndef __ASSEMBLY__ + +#ifdef CONFIG_KASAN + +#include <linux/linkage.h> +#include <asm/memory.h> + +/* + * KASAN_SHADOW_START: beginning of the kernel virtual addresses. + * KASAN_SHADOW_END: KASAN_SHADOW_START + 1/8 of kernel virtual addresses. + */ +#define KASAN_SHADOW_START (VA_START) +#define KASAN_SHADOW_END (KASAN_SHADOW_START + (1UL << (VA_BITS - 3))) + +/* + * This value is used to map an address to the corresponding shadow + * address by the following formula: + * shadow_addr = (address >> 3) + KASAN_SHADOW_OFFSET; + * + * (1 << 61) shadow addresses - [KASAN_SHADOW_OFFSET,KASAN_SHADOW_END] + * cover all 64-bits of virtual addresses. So KASAN_SHADOW_OFFSET + * should satisfy the following equation: + * KASAN_SHADOW_OFFSET = KASAN_SHADOW_END - (1ULL << 61) + */ +#define KASAN_SHADOW_OFFSET (KASAN_SHADOW_END - (1ULL << (64 - 3))) + +void kasan_init(void); +asmlinkage void kasan_early_init(void); + +#else +static inline void kasan_init(void) { } +#endif + +#endif +#endif diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h new file mode 100644 index 000000000000..4e08faabd9e4 --- /dev/null +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -0,0 +1,80 @@ +/* + * Kernel page table mapping + * + * Copyright (C) 2015 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __ASM_KERNEL_PGTABLE_H +#define __ASM_KERNEL_PGTABLE_H + + +/* + * The linear mapping and the start of memory are both 2M aligned (per + * the arm64 booting.txt requirements). Hence we can use section mapping + * with 4K (section size = 2M) but not with 16K (section size = 32M) or + * 64K (section size = 512M). + */ +#ifdef CONFIG_ARM64_4K_PAGES +#define ARM64_SWAPPER_USES_SECTION_MAPS 1 +#else +#define ARM64_SWAPPER_USES_SECTION_MAPS 0 +#endif + +/* + * The idmap and swapper page tables need some space reserved in the kernel + * image. Both require pgd, pud (4 levels only) and pmd tables to (section) + * map the kernel. With the 64K page configuration, swapper and idmap need to + * map to pte level. The swapper also maps the FDT (see __create_page_tables + * for more information). Note that the number of ID map translation levels + * could be increased on the fly if system RAM is out of reach for the default + * VA range, so 3 pages are reserved in all cases. + */ +#if ARM64_SWAPPER_USES_SECTION_MAPS +#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - 1) +#else +#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS) +#endif + +#define SWAPPER_DIR_SIZE (SWAPPER_PGTABLE_LEVELS * PAGE_SIZE) +#define IDMAP_DIR_SIZE (3 * PAGE_SIZE) + +/* Initial memory map size */ +#if ARM64_SWAPPER_USES_SECTION_MAPS +#define SWAPPER_BLOCK_SHIFT SECTION_SHIFT +#define SWAPPER_BLOCK_SIZE SECTION_SIZE +#define SWAPPER_TABLE_SHIFT PUD_SHIFT +#else +#define SWAPPER_BLOCK_SHIFT PAGE_SHIFT +#define SWAPPER_BLOCK_SIZE PAGE_SIZE +#define SWAPPER_TABLE_SHIFT PMD_SHIFT +#endif + +/* The size of the initial kernel direct mapping */ +#define SWAPPER_INIT_MAP_SIZE (_AC(1, UL) << SWAPPER_TABLE_SHIFT) + +/* + * Initial memory map attributes. + */ +#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) +#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) + +#if ARM64_SWAPPER_USES_SECTION_MAPS +#define SWAPPER_MM_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS) +#else +#define SWAPPER_MM_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS) +#endif + + +#endif /* __ASM_KERNEL_PGTABLE_H */ diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index ac6fafb95fe7..7ae1f3626acf 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -83,17 +83,6 @@ #define HCR_INT_OVERRIDE (HCR_FMO | HCR_IMO) -/* Hyp System Control Register (SCTLR_EL2) bits */ -#define SCTLR_EL2_EE (1 << 25) -#define SCTLR_EL2_WXN (1 << 19) -#define SCTLR_EL2_I (1 << 12) -#define SCTLR_EL2_SA (1 << 3) -#define SCTLR_EL2_C (1 << 2) -#define SCTLR_EL2_A (1 << 1) -#define SCTLR_EL2_M 1 -#define SCTLR_EL2_FLAGS (SCTLR_EL2_M | SCTLR_EL2_A | SCTLR_EL2_C | \ - SCTLR_EL2_SA | SCTLR_EL2_I) - /* TCR_EL2 Registers bits */ #define TCR_EL2_TBI (1 << 20) #define TCR_EL2_PS (7 << 16) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 4f7310fa77f0..57ca3f92cac4 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -46,24 +46,16 @@ #define CNTKCTL_EL1 20 /* Timer Control Register (EL1) */ #define PAR_EL1 21 /* Physical Address Register */ #define MDSCR_EL1 22 /* Monitor Debug System Control Register */ -#define DBGBCR0_EL1 23 /* Debug Breakpoint Control Registers (0-15) */ -#define DBGBCR15_EL1 38 -#define DBGBVR0_EL1 39 /* Debug Breakpoint Value Registers (0-15) */ -#define DBGBVR15_EL1 54 -#define DBGWCR0_EL1 55 /* Debug Watchpoint Control Registers (0-15) */ -#define DBGWCR15_EL1 70 -#define DBGWVR0_EL1 71 /* Debug Watchpoint Value Registers (0-15) */ -#define DBGWVR15_EL1 86 -#define MDCCINT_EL1 87 /* Monitor Debug Comms Channel Interrupt Enable Reg */ +#define MDCCINT_EL1 23 /* Monitor Debug Comms Channel Interrupt Enable Reg */ /* 32bit specific registers. Keep them at the end of the range */ -#define DACR32_EL2 88 /* Domain Access Control Register */ -#define IFSR32_EL2 89 /* Instruction Fault Status Register */ -#define FPEXC32_EL2 90 /* Floating-Point Exception Control Register */ -#define DBGVCR32_EL2 91 /* Debug Vector Catch Register */ -#define TEECR32_EL1 92 /* ThumbEE Configuration Register */ -#define TEEHBR32_EL1 93 /* ThumbEE Handler Base Register */ -#define NR_SYS_REGS 94 +#define DACR32_EL2 24 /* Domain Access Control Register */ +#define IFSR32_EL2 25 /* Instruction Fault Status Register */ +#define FPEXC32_EL2 26 /* Floating-Point Exception Control Register */ +#define DBGVCR32_EL2 27 /* Debug Vector Catch Register */ +#define TEECR32_EL1 28 /* ThumbEE Configuration Register */ +#define TEEHBR32_EL1 29 /* ThumbEE Handler Base Register */ +#define NR_SYS_REGS 30 /* 32bit mapping */ #define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */ @@ -108,22 +100,39 @@ #define ARM_EXCEPTION_IRQ 0 #define ARM_EXCEPTION_TRAP 1 +/* The hyp-stub will return this for any kvm_call_hyp() call */ +#define ARM_EXCEPTION_HYP_GONE 2 #define KVM_ARM64_DEBUG_DIRTY_SHIFT 0 #define KVM_ARM64_DEBUG_DIRTY (1 << KVM_ARM64_DEBUG_DIRTY_SHIFT) +#define kvm_ksym_ref(sym) ((void *)&sym + kvm_ksym_shift) + #ifndef __ASSEMBLY__ +#if __GNUC__ > 4 +#define kvm_ksym_shift (PAGE_OFFSET - KIMAGE_VADDR) +#else +/* + * GCC versions 4.9 and older will fold the constant below into the addend of + * the reference to 'sym' above if kvm_ksym_shift is declared static or if the + * constant is used directly. However, since we use the small code model for + * the core kernel, the reference to 'sym' will be emitted as a adrp/add pair, + * with a +/- 4 GB range, resulting in linker relocation errors if the shift + * is sufficiently large. So prevent the compiler from folding the shift into + * the addend, by making the shift a variable with external linkage. + */ +__weak u64 kvm_ksym_shift = PAGE_OFFSET - KIMAGE_VADDR; +#endif + struct kvm; struct kvm_vcpu; extern char __kvm_hyp_init[]; extern char __kvm_hyp_init_end[]; +extern char __kvm_hyp_reset[]; extern char __kvm_hyp_vector[]; -#define __kvm_hyp_code_start __hyp_text_start -#define __kvm_hyp_code_end __hyp_text_end - extern void __kvm_flush_vm_context(void); extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); extern void __kvm_tlb_flush_vmid(struct kvm *kvm); @@ -132,10 +141,7 @@ extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); extern u64 __vgic_v3_get_ich_vtr_el2(void); -extern char __save_vgic_v2_state[]; -extern char __restore_vgic_v2_state[]; -extern char __save_vgic_v3_state[]; -extern char __restore_vgic_v3_state[]; +extern u32 __kvm_get_mdcr_el2(void); #endif diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index f0f58c9beec0..580a5232d217 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -48,6 +48,8 @@ int __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); int kvm_arch_dev_ioctl_check_extension(long ext); +unsigned long kvm_hyp_reset_entry(void); +void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start); struct kvm_arch { /* The VMID generation used for the virt. memory system */ @@ -103,15 +105,30 @@ struct kvm_vcpu_arch { /* HYP configuration */ u64 hcr_el2; + u32 mdcr_el2; /* Exception Information */ struct kvm_vcpu_fault_info fault; - /* Debug state */ + /* Guest debug state */ u64 debug_flags; + /* + * We maintain more than a single set of debug registers to support + * debugging the guest from the host and to maintain separate host and + * guest state during world switches. vcpu_debug_state are the debug + * registers of the vcpu as the guest sees them. host_debug_state are + * the host registers which are saved and restored during world switches. + * + * debug_ptr points to the set of debug registers that should be loaded + * onto the hardware when running the guest. + */ + struct kvm_guest_debug_arch *debug_ptr; + struct kvm_guest_debug_arch vcpu_debug_state; + /* Pointer to host CPU context */ kvm_cpu_context_t *host_cpu_context; + struct kvm_guest_debug_arch host_debug_state; /* VGIC state */ struct vgic_cpu vgic_cpu; @@ -216,38 +233,29 @@ static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr, hyp_stack_ptr, vector_ptr); } -struct vgic_sr_vectors { - void *save_vgic; - void *restore_vgic; -}; - -static inline void vgic_arch_setup(const struct vgic_params *vgic) +static inline void __cpu_init_stage2(void) { - extern struct vgic_sr_vectors __vgic_sr_vectors; - - switch(vgic->type) - { - case VGIC_V2: - __vgic_sr_vectors.save_vgic = __save_vgic_v2_state; - __vgic_sr_vectors.restore_vgic = __restore_vgic_v2_state; - break; - -#ifdef CONFIG_ARM_GIC_V3 - case VGIC_V3: - __vgic_sr_vectors.save_vgic = __save_vgic_v3_state; - __vgic_sr_vectors.restore_vgic = __restore_vgic_v3_state; - break; -#endif +} - default: - BUG(); - } +static inline void __cpu_reset_hyp_mode(phys_addr_t boot_pgd_ptr, + phys_addr_t phys_idmap_start) +{ + /* + * Call reset code, and switch back to stub hyp vectors. + * Uses kvm_call_hyp() to avoid kaslr's kvm_ksym_ref() translation. + */ + kvm_call_hyp((void *)kvm_hyp_reset_entry(), + boot_pgd_ptr, phys_idmap_start); } -static inline void kvm_arch_hardware_disable(void) {} static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} +void kvm_arm_init_debug(void); +void kvm_arm_setup_debug(struct kvm_vcpu *vcpu); +void kvm_arm_clear_debug(struct kvm_vcpu *vcpu); +void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu); + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 61505676d085..61f1c0bfabd0 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -98,6 +98,7 @@ void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu); phys_addr_t kvm_mmu_get_httbr(void); phys_addr_t kvm_mmu_get_boot_httbr(void); phys_addr_t kvm_get_idmap_vector(void); +phys_addr_t kvm_get_idmap_start(void); int kvm_mmu_init(void); void kvm_clear_hyp_idmap(void); @@ -158,7 +159,6 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd) #define PTRS_PER_S2_PGD_SHIFT (KVM_PHYS_SHIFT - PGDIR_SHIFT) #endif #define PTRS_PER_S2_PGD (1 << PTRS_PER_S2_PGD_SHIFT) -#define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t)) #define kvm_pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1)) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 44a59c20e773..c885bbcccf58 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -42,14 +42,17 @@ * PAGE_OFFSET - the virtual address of the start of the kernel image (top * (VA_BITS - 1)) * VA_BITS - the maximum number of bits for virtual addresses. + * VA_START - the first kernel virtual address. * TASK_SIZE - the maximum size of a user space task. * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area. * The module space lives between the addresses given by TASK_SIZE * and PAGE_OFFSET - it must be within 128MB of the kernel text. */ #define VA_BITS (CONFIG_ARM64_VA_BITS) +#define VA_START (UL(0xffffffffffffffff) << VA_BITS) #define PAGE_OFFSET (UL(0xffffffffffffffff) << (VA_BITS - 1)) -#define MODULES_END (PAGE_OFFSET) +#define KIMAGE_VADDR (PAGE_OFFSET) +#define MODULES_END (KIMAGE_VADDR) #define MODULES_VADDR (MODULES_END - SZ_64M) #define PCI_IO_END (MODULES_VADDR - SZ_2M) #define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE) @@ -77,14 +80,16 @@ * private definitions which should NOT be used outside memory.h * files. Use virt_to_phys/phys_to_virt/__pa/__va instead. */ -#define __virt_to_phys(x) (((phys_addr_t)(x) - PAGE_OFFSET + PHYS_OFFSET)) +#define __virt_to_phys(x) ({ \ + phys_addr_t __x = (phys_addr_t)(x); \ + __x >= PAGE_OFFSET ? (__x - PAGE_OFFSET + PHYS_OFFSET) : \ + (__x - KIMAGE_VADDR + PHYS_OFFSET); }) + #define __phys_to_virt(x) ((unsigned long)((x) - PHYS_OFFSET + PAGE_OFFSET)) +#define __phys_to_kimg(x) ((unsigned long)((x) - PHYS_OFFSET + KIMAGE_VADDR)) -/* - * Convert a physical address to a Page Frame Number and back - */ -#define __phys_to_pfn(paddr) ((unsigned long)((paddr) >> PAGE_SHIFT)) -#define __pfn_to_phys(pfn) ((phys_addr_t)(pfn) << PAGE_SHIFT) +#define KERNEL_START _text +#define KERNEL_END _end /* * Convert a page to/from a physical address diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 3d311761e3c2..990124a67eeb 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -17,22 +17,23 @@ #define __ASM_MMU_H typedef struct { - unsigned int id; - raw_spinlock_t id_lock; - void *vdso; + atomic64_t id; + void *vdso; } mm_context_t; -#define INIT_MM_CONTEXT(name) \ - .context.id_lock = __RAW_SPIN_LOCK_UNLOCKED(name.context.id_lock), - -#define ASID(mm) ((mm)->context.id & 0xffff) +/* + * This macro is only used by the TLBI code, which cannot race with an + * ASID change and therefore doesn't need to reload the counter using + * atomic64_read. + */ +#define ASID(mm) ((mm)->context.id.counter & 0xffff) extern void paging_init(void); -extern void setup_mm_for_reboot(void); extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt); extern void init_mem_pgprot(void); extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot); +extern void *fixmap_remap_fdt(phys_addr_t dt_phys); #endif diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 8ec41e5f56f0..a00f7cf35bbd 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -27,13 +27,7 @@ #include <asm-generic/mm_hooks.h> #include <asm/cputype.h> #include <asm/pgtable.h> - -#define MAX_ASID_BITS 16 - -extern unsigned int cpu_last_asid; - -void __init_new_context(struct task_struct *tsk, struct mm_struct *mm); -void __new_context(struct mm_struct *mm); +#include <asm/tlbflush.h> #ifdef CONFIG_PID_IN_CONTEXTIDR static inline void contextidr_thread_switch(struct task_struct *next) @@ -55,7 +49,7 @@ static inline void contextidr_thread_switch(struct task_struct *next) */ static inline void cpu_set_reserved_ttbr0(void) { - unsigned long ttbr = page_to_phys(empty_zero_page); + unsigned long ttbr = virt_to_phys(empty_zero_page); asm( " msr ttbr0_el1, %0 // set TTBR0\n" @@ -77,98 +71,95 @@ static inline bool __cpu_uses_extended_idmap(void) unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS))); } -static inline void __cpu_set_tcr_t0sz(u64 t0sz) +/* + * Set TCR.T0SZ to its default value (based on VA_BITS) + */ +static inline void __cpu_set_tcr_t0sz(unsigned long t0sz) { unsigned long tcr; - if (__cpu_uses_extended_idmap()) - asm volatile ( - " mrs %0, tcr_el1 ;" - " bfi %0, %1, %2, %3 ;" - " msr tcr_el1, %0 ;" - " isb" - : "=&r" (tcr) - : "r"(t0sz), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH)); -} + if (!__cpu_uses_extended_idmap()) + return; -/* - * Set TCR.T0SZ to the value appropriate for activating the identity map. - */ -static inline void cpu_set_idmap_tcr_t0sz(void) -{ - __cpu_set_tcr_t0sz(idmap_t0sz); + asm volatile ( + " mrs %0, tcr_el1 ;" + " bfi %0, %1, %2, %3 ;" + " msr tcr_el1, %0 ;" + " isb" + : "=&r" (tcr) + : "r"(t0sz), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH)); } +#define cpu_set_default_tcr_t0sz() __cpu_set_tcr_t0sz(TCR_T0SZ(VA_BITS)) +#define cpu_set_idmap_tcr_t0sz() __cpu_set_tcr_t0sz(idmap_t0sz) + /* - * Set TCR.T0SZ to its default value (based on VA_BITS) + * Remove the idmap from TTBR0_EL1 and install the pgd of the active mm. + * + * The idmap lives in the same VA range as userspace, but uses global entries + * and may use a different TCR_EL1.T0SZ. To avoid issues resulting from + * speculative TLB fetches, we must temporarily install the reserved page + * tables while we invalidate the TLBs and set up the correct TCR_EL1.T0SZ. + * + * If current is a not a user task, the mm covers the TTBR1_EL1 page tables, + * which should not be installed in TTBR0_EL1. In this case we can leave the + * reserved page tables in place. */ -static inline void cpu_set_default_tcr_t0sz(void) -{ - __cpu_set_tcr_t0sz(TCR_T0SZ(VA_BITS)); -} - -static inline void switch_new_context(struct mm_struct *mm) +static inline void cpu_uninstall_idmap(void) { - unsigned long flags; + struct mm_struct *mm = current->active_mm; - __new_context(mm); + cpu_set_reserved_ttbr0(); + local_flush_tlb_all(); + cpu_set_default_tcr_t0sz(); - local_irq_save(flags); - cpu_switch_mm(mm->pgd, mm); - local_irq_restore(flags); + if (mm != &init_mm) + cpu_switch_mm(mm->pgd, mm); } -static inline void check_and_switch_context(struct mm_struct *mm, - struct task_struct *tsk) +static inline void cpu_install_idmap(void) { - /* - * Required during context switch to avoid speculative page table - * walking with the wrong TTBR. - */ cpu_set_reserved_ttbr0(); + local_flush_tlb_all(); + cpu_set_idmap_tcr_t0sz(); - if (!((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS)) - /* - * The ASID is from the current generation, just switch to the - * new pgd. This condition is only true for calls from - * context_switch() and interrupts are already disabled. - */ - cpu_switch_mm(mm->pgd, mm); - else if (irqs_disabled()) - /* - * Defer the new ASID allocation until after the context - * switch critical region since __new_context() cannot be - * called with interrupts disabled. - */ - set_ti_thread_flag(task_thread_info(tsk), TIF_SWITCH_MM); - else - /* - * That is a direct call to switch_mm() or activate_mm() with - * interrupts enabled and a new context. - */ - switch_new_context(mm); + cpu_switch_mm(idmap_pg_dir, &init_mm); } -#define init_new_context(tsk,mm) (__init_new_context(tsk,mm),0) -#define destroy_context(mm) do { } while(0) - -#define finish_arch_post_lock_switch \ - finish_arch_post_lock_switch -static inline void finish_arch_post_lock_switch(void) +/* + * Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD, + * avoiding the possibility of conflicting TLB entries being allocated. + */ +static inline void cpu_replace_ttbr1(pgd_t *pgd) { - if (test_and_clear_thread_flag(TIF_SWITCH_MM)) { - struct mm_struct *mm = current->mm; - unsigned long flags; + typedef void (ttbr_replace_func)(phys_addr_t); + extern ttbr_replace_func idmap_cpu_replace_ttbr1; + ttbr_replace_func *replace_phys; - __new_context(mm); + phys_addr_t pgd_phys = virt_to_phys(pgd); - local_irq_save(flags); - cpu_switch_mm(mm->pgd, mm); - local_irq_restore(flags); - } + replace_phys = (void *)virt_to_phys(idmap_cpu_replace_ttbr1); + + cpu_install_idmap(); + replace_phys(pgd_phys); + cpu_uninstall_idmap(); } /* + * It would be nice to return ASIDs back to the allocator, but unfortunately + * that introduces a race with a generation rollover where we could erroneously + * free an ASID allocated in a future generation. We could workaround this by + * freeing the ASID from the context of the dying mm (e.g. in arch_exit_mmap), + * but we'd then need to make sure that we didn't dirty any TLBs afterwards. + * Setting a reserved TTBR0 or EPD0 would work, but it all gets ugly when you + * take CPU migration into account. + */ +#define destroy_context(mm) do { } while(0) +void check_and_switch_context(struct mm_struct *mm, unsigned int cpu); + +#define init_new_context(tsk,mm) ({ atomic64_set(&(mm)->context.id, 0); 0; }) + +/* * This is called when "tsk" is about to enter lazy TLB mode. * * mm: describes the currently active mm context @@ -194,6 +185,9 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, { unsigned int cpu = smp_processor_id(); + if (prev == next) + return; + /* * init_mm.pgd does not contain any user mappings and it is always * active for kernel addresses in TTBR1. Just set the reserved TTBR0. @@ -203,8 +197,7 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, return; } - if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next) - check_and_switch_context(next, tsk); + check_and_switch_context(next, cpu); } #define deactivate_mm(tsk,mm) do { } while (0) diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index 7d9c7e4a424b..a324a38d9dcf 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -19,32 +19,22 @@ #ifndef __ASM_PAGE_H #define __ASM_PAGE_H +#include <linux/const.h> + /* PAGE_SHIFT determines the page size */ +/* CONT_SHIFT determines the number of pages which can be tracked together */ #ifdef CONFIG_ARM64_64K_PAGES #define PAGE_SHIFT 16 +#define CONT_SHIFT 5 #else #define PAGE_SHIFT 12 +#define CONT_SHIFT 4 #endif -#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) +#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) -/* - * The idmap and swapper page tables need some space reserved in the kernel - * image. Both require pgd, pud (4 levels only) and pmd tables to (section) - * map the kernel. With the 64K page configuration, swapper and idmap need to - * map to pte level. The swapper also maps the FDT (see __create_page_tables - * for more information). Note that the number of ID map translation levels - * could be increased on the fly if system RAM is out of reach for the default - * VA range, so 3 pages are reserved in all cases. - */ -#ifdef CONFIG_ARM64_64K_PAGES -#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS) -#else -#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - 1) -#endif - -#define SWAPPER_DIR_SIZE (SWAPPER_PGTABLE_LEVELS * PAGE_SIZE) -#define IDMAP_DIR_SIZE (3 * PAGE_SIZE) +#define CONT_SIZE (_AC(1, UL) << (CONT_SHIFT + PAGE_SHIFT)) +#define CONT_MASK (~(CONT_SIZE-1)) #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index 4fde8c1df97f..0a456bef8c79 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -16,8 +16,6 @@ #ifndef __ASM_PERCPU_H #define __ASM_PERCPU_H -#ifdef CONFIG_SMP - static inline void set_my_cpu_offset(unsigned long off) { asm volatile("msr tpidr_el1, %0" :: "r" (off) : "memory"); @@ -38,12 +36,6 @@ static inline unsigned long __my_cpu_offset(void) } #define __my_cpu_offset __my_cpu_offset() -#else /* !CONFIG_SMP */ - -#define set_my_cpu_offset(x) do { } while (0) - -#endif /* CONFIG_SMP */ - #define PERCPU_OP(op, asm_op) \ static inline unsigned long __percpu_##op(void *ptr, \ unsigned long val, int size) \ diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index 76420568d66a..c15053902942 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -27,6 +27,7 @@ #define check_pgt_cache() do { } while (0) #define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO) +#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t)) #if CONFIG_PGTABLE_LEVELS > 2 diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index f1fc3140dedb..2d99dfee1144 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -40,7 +40,14 @@ * fixed mappings and modules */ #define VMEMMAP_SIZE ALIGN((1UL << (VA_BITS - PAGE_SHIFT)) * sizeof(struct page), PUD_SIZE) -#define VMALLOC_START (UL(0xffffffffffffffff) << VA_BITS) + +#ifndef CONFIG_KASAN +#define VMALLOC_START (VA_START) +#else +#include <asm/kasan.h> +#define VMALLOC_START (KASAN_SHADOW_END + SZ_64K) +#endif + #define VMALLOC_END (PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K) #define VMEMMAP_START (VMALLOC_END + SZ_64K) @@ -55,13 +62,8 @@ extern void __pmd_error(const char *file, int line, unsigned long val); extern void __pud_error(const char *file, int line, unsigned long val); extern void __pgd_error(const char *file, int line, unsigned long val); -#ifdef CONFIG_SMP #define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) #define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) -#else -#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF) -#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF) -#endif #define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRE)) #define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_NC)) @@ -112,8 +114,8 @@ extern void __pgd_error(const char *file, int line, unsigned long val); * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. */ -extern struct page *empty_zero_page; -#define ZERO_PAGE(vaddr) (empty_zero_page) +extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; +#define ZERO_PAGE(vaddr) virt_to_page(empty_zero_page) #define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte_val(pte)) @@ -125,16 +127,6 @@ extern struct page *empty_zero_page; #define pte_clear(mm,addr,ptep) set_pte(ptep, __pte(0)) #define pte_page(pte) (pfn_to_page(pte_pfn(pte))) -/* Find an entry in the third-level page table. */ -#define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) - -#define pte_offset_kernel(dir,addr) (pmd_page_vaddr(*(dir)) + pte_index(addr)) - -#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) -#define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir), (addr)) -#define pte_unmap(pte) do { } while (0) -#define pte_unmap_nested(pte) do { } while (0) - /* * The following only work if pte_present(). Undefined behaviour otherwise. */ @@ -363,11 +355,22 @@ static inline void pmd_clear(pmd_t *pmdp) set_pmd(pmdp, __pmd(0)); } -static inline pte_t *pmd_page_vaddr(pmd_t pmd) +static inline phys_addr_t pmd_page_paddr(pmd_t pmd) { - return __va(pmd_val(pmd) & PHYS_MASK & (s32)PAGE_MASK); + return pmd_val(pmd) & PHYS_MASK & (s32)PAGE_MASK; } +/* Find an entry in the third-level page table. */ +#define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) + +#define pte_offset_phys(dir,addr) (pmd_page_paddr(*(dir)) + pte_index(addr) * sizeof(pte_t)) +#define pte_offset_kernel(dir,addr) ((pte_t *)__va(pte_offset_phys((dir), (addr)))) + +#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) +#define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir), (addr)) +#define pte_unmap(pte) do { } while (0) +#define pte_unmap_nested(pte) do { } while (0) + #define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK)) /* @@ -396,21 +399,23 @@ static inline void pud_clear(pud_t *pudp) set_pud(pudp, __pud(0)); } -static inline pmd_t *pud_page_vaddr(pud_t pud) +static inline phys_addr_t pud_page_paddr(pud_t pud) { - return __va(pud_val(pud) & PHYS_MASK & (s32)PAGE_MASK); + return pud_val(pud) & PHYS_MASK & (s32)PAGE_MASK; } /* Find an entry in the second-level page table. */ #define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) -static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) -{ - return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(addr); -} +#define pmd_offset_phys(dir, addr) (pud_page_paddr(*(dir)) + pmd_index(addr) * sizeof(pmd_t)) +#define pmd_offset(dir, addr) ((pmd_t *)__va(pmd_offset_phys((dir), (addr)))) #define pud_page(pud) pfn_to_page(__phys_to_pfn(pud_val(pud) & PHYS_MASK)) +#else + +#define pud_page_paddr(pud) ({ BUILD_BUG(); 0; }) + #endif /* CONFIG_PGTABLE_LEVELS > 2 */ #if CONFIG_PGTABLE_LEVELS > 3 @@ -432,21 +437,23 @@ static inline void pgd_clear(pgd_t *pgdp) set_pgd(pgdp, __pgd(0)); } -static inline pud_t *pgd_page_vaddr(pgd_t pgd) +static inline phys_addr_t pgd_page_paddr(pgd_t pgd) { - return __va(pgd_val(pgd) & PHYS_MASK & (s32)PAGE_MASK); + return pgd_val(pgd) & PHYS_MASK & (s32)PAGE_MASK; } /* Find an entry in the frst-level page table. */ #define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) -static inline pud_t *pud_offset(pgd_t *pgd, unsigned long addr) -{ - return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(addr); -} +#define pud_offset_phys(dir, addr) (pgd_page_paddr(*(dir)) + pud_index(addr) * sizeof(pud_t)) +#define pud_offset(dir, addr) ((pud_t *)__va(pud_offset_phys((dir), (addr)))) #define pgd_page(pgd) pfn_to_page(__phys_to_pfn(pgd_val(pgd) & PHYS_MASK)) +#else + +#define pgd_page_paddr(pgd) ({ BUILD_BUG(); 0;}) + #endif /* CONFIG_PGTABLE_LEVELS > 3 */ #define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd)) @@ -454,7 +461,9 @@ static inline pud_t *pud_offset(pgd_t *pgd, unsigned long addr) /* to find an entry in a page-table-directory */ #define pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) -#define pgd_offset(mm, addr) ((mm)->pgd+pgd_index(addr)) +#define pgd_offset_raw(pgd, addr) ((pgd) + pgd_index(addr)) + +#define pgd_offset(mm, addr) (pgd_offset_raw((mm)->pgd, (addr))) /* to find an entry in a kernel page-table-directory */ #define pgd_offset_k(addr) pgd_offset(&init_mm, addr) diff --git a/arch/arm64/include/asm/proc-fns.h b/arch/arm64/include/asm/proc-fns.h index 220633b791b8..14ad6e4e87d1 100644 --- a/arch/arm64/include/asm/proc-fns.h +++ b/arch/arm64/include/asm/proc-fns.h @@ -28,12 +28,8 @@ struct mm_struct; struct cpu_suspend_ctx; -extern void cpu_cache_off(void); extern void cpu_do_idle(void); extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm); -extern void cpu_reset(unsigned long addr) __attribute__((noreturn)); -void cpu_soft_restart(phys_addr_t cpu_reset, - unsigned long addr) __attribute__((noreturn)); extern void cpu_do_suspend(struct cpu_suspend_ctx *ptr); extern u64 cpu_do_resume(phys_addr_t ptr, u64 idmap_ttbr); diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index d2c37a1df0eb..6c2f5726fe0b 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -169,4 +169,6 @@ static inline void spin_lock_prefetch(const void *x) #endif +void cpu_enable_pan(void); + #endif /* __ASM_PROCESSOR_H */ diff --git a/arch/arm64/include/asm/psci.h b/arch/arm64/include/asm/psci.h deleted file mode 100644 index 2454bc59c916..000000000000 --- a/arch/arm64/include/asm/psci.h +++ /dev/null @@ -1,20 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Copyright (C) 2013 ARM Limited - */ - -#ifndef __ASM_PSCI_H -#define __ASM_PSCI_H - -int psci_dt_init(void); -int psci_acpi_init(void); - -#endif /* __ASM_PSCI_H */ diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 7cc281d94769..7f94755089e2 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -160,11 +160,7 @@ int valid_user_regs(struct user_pt_regs *regs, struct task_struct *task); #define instruction_pointer(regs) ((unsigned long)(regs)->pc) -#ifdef CONFIG_SMP extern unsigned long profile_pc(struct pt_regs *regs); -#else -#define profile_pc(regs) instruction_pointer(regs) -#endif #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index bf22650b1a78..d9c3d6a6100a 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -20,10 +20,6 @@ #include <linux/cpumask.h> #include <linux/thread_info.h> -#ifndef CONFIG_SMP -# error "<asm/smp.h> included in non-SMP build" -#endif - #define raw_smp_processor_id() (current_thread_info()->cpu) struct seq_file; @@ -42,7 +38,7 @@ extern void handle_IPI(int ipinr, struct pt_regs *regs); * Discover the set of possible CPUs and determine their * SMP operations. */ -extern void of_smp_init_cpus(void); +extern void smp_init_cpus(void); /* * Provide a function to raise an IPI cross call on CPUs in callmap. diff --git a/arch/arm64/include/asm/smp_plat.h b/arch/arm64/include/asm/smp_plat.h index 8dcd61e32176..7abf7570c00f 100644 --- a/arch/arm64/include/asm/smp_plat.h +++ b/arch/arm64/include/asm/smp_plat.h @@ -19,6 +19,8 @@ #ifndef __ASM_SMP_PLAT_H #define __ASM_SMP_PLAT_H +#include <linux/cpumask.h> + #include <asm/types.h> struct mpidr_hash { @@ -39,6 +41,20 @@ static inline u32 mpidr_hash_size(void) */ extern u64 __cpu_logical_map[NR_CPUS]; #define cpu_logical_map(cpu) __cpu_logical_map[cpu] +/* + * Retrieve logical cpu index corresponding to a given MPIDR.Aff* + * - mpidr: MPIDR.Aff* bits to be used for the look-up + * + * Returns the cpu logical index or -EINVAL on look-up error + */ +static inline int get_logical_index(u64 mpidr) +{ + int cpu; + for (cpu = 0; cpu < nr_cpu_ids; cpu++) + if (cpu_logical_map(cpu) == mpidr) + return cpu; + return -EINVAL; +} void __init do_post_cpus_up_work(void); diff --git a/arch/arm64/include/asm/string.h b/arch/arm64/include/asm/string.h index 64d2d4884a9d..2eb714c4639f 100644 --- a/arch/arm64/include/asm/string.h +++ b/arch/arm64/include/asm/string.h @@ -36,17 +36,33 @@ extern __kernel_size_t strnlen(const char *, __kernel_size_t); #define __HAVE_ARCH_MEMCPY extern void *memcpy(void *, const void *, __kernel_size_t); +extern void *__memcpy(void *, const void *, __kernel_size_t); #define __HAVE_ARCH_MEMMOVE extern void *memmove(void *, const void *, __kernel_size_t); +extern void *__memmove(void *, const void *, __kernel_size_t); #define __HAVE_ARCH_MEMCHR extern void *memchr(const void *, int, __kernel_size_t); #define __HAVE_ARCH_MEMSET extern void *memset(void *, int, __kernel_size_t); +extern void *__memset(void *, int, __kernel_size_t); #define __HAVE_ARCH_MEMCMP extern int memcmp(const void *, const void *, size_t); + +#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__) + +/* + * For files that are not instrumented (e.g. mm/slub.c) we + * should use not instrumented version of mem* functions. + */ + +#define memcpy(dst, src, len) __memcpy(dst, src, len) +#define memmove(dst, src, len) __memmove(dst, src, len) +#define memset(s, c, n) __memset(s, c, n) +#endif + #endif diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h index 59a5b0f1e81c..024d623f662e 100644 --- a/arch/arm64/include/asm/suspend.h +++ b/arch/arm64/include/asm/suspend.h @@ -1,7 +1,8 @@ #ifndef __ASM_SUSPEND_H #define __ASM_SUSPEND_H -#define NR_CTX_REGS 11 +#define NR_CTX_REGS 10 +#define NR_CALLEE_SAVED_REGS 12 /* * struct cpu_suspend_ctx must be 16-byte aligned since it is allocated on @@ -16,11 +17,34 @@ struct cpu_suspend_ctx { u64 sp; } __aligned(16); -struct sleep_save_sp { - phys_addr_t *save_ptr_stash; - phys_addr_t save_ptr_stash_phys; +/* + * Memory to save the cpu state is allocated on the stack by + * __cpu_suspend_enter()'s caller, and populated by __cpu_suspend_enter(). + * This data must survive until cpu_resume() is called. + * + * This struct desribes the size and the layout of the saved cpu state. + * The layout of the callee_saved_regs is defined by the implementation + * of __cpu_suspend_enter(), and cpu_resume(). This struct must be passed + * in by the caller as __cpu_suspend_enter()'s stack-frame is gone once it + * returns, and the data would be subsequently corrupted by the call to the + * finisher. + */ +struct sleep_stack_data { + struct cpu_suspend_ctx system_regs; + unsigned long callee_saved_regs[NR_CALLEE_SAVED_REGS]; }; +extern unsigned long *sleep_save_stash; + extern int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)); extern void cpu_resume(void); +int __cpu_suspend_enter(struct sleep_stack_data *state); +void __cpu_suspend_exit(void); +void _cpu_resume(void); + +int swsusp_arch_suspend(void); +int swsusp_arch_resume(void); +int arch_hibernation_header_save(void *addr, unsigned int max_size); +int arch_hibernation_header_restore(void *addr); + #endif diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 5c89df0acbcb..bc40ec0f4fe1 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -20,8 +20,62 @@ #ifndef __ASM_SYSREG_H #define __ASM_SYSREG_H +#include <asm/opcodes.h> +#include <linux/stringify.h> + +/* + * ARMv8 ARM reserves the following encoding for system registers: + * (Ref: ARMv8 ARM, Section: "System instruction class encoding overview", + * C5.2, version:ARM DDI 0487A.f) + * [20-19] : Op0 + * [18-16] : Op1 + * [15-12] : CRn + * [11-8] : CRm + * [7-5] : Op2 + */ #define sys_reg(op0, op1, crn, crm, op2) \ - ((((op0)-2)<<19)|((op1)<<16)|((crn)<<12)|((crm)<<8)|((op2)<<5)) + ((((op0)&3)<<19)|((op1)<<16)|((crn)<<12)|((crm)<<8)|((op2)<<5)) + +#define REG_PSTATE_PAN_IMM sys_reg(0, 0, 4, 0, 4) + +#define SET_PSTATE_PAN(x) __inst_arm(0xd5000000 | REG_PSTATE_PAN_IMM |\ + (!!x)<<8 | 0x1f) + +/* Common SCTLR_ELx flags. */ +#define SCTLR_ELx_EE (1 << 25) +#define SCTLR_ELx_I (1 << 12) +#define SCTLR_ELx_SA (1 << 3) +#define SCTLR_ELx_C (1 << 2) +#define SCTLR_ELx_A (1 << 1) +#define SCTLR_ELx_M 1 + +#define SCTLR_ELx_FLAGS (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \ + SCTLR_ELx_SA | SCTLR_ELx_I) + +/* SCTLR_EL1 specific flags. */ +#define SCTLR_EL1_SPAN (1 << 23) +#define SCTLR_EL1_SED (1 << 8) +#define SCTLR_EL1_CP15BEN (1 << 5) + + +#define ID_AA64MMFR0_TGRAN4_SHIFT 28 +#define ID_AA64MMFR0_TGRAN64_SHIFT 24 +#define ID_AA64MMFR0_TGRAN16_SHIFT 20 + +#define ID_AA64MMFR0_TGRAN4_NI 0xf +#define ID_AA64MMFR0_TGRAN4_SUPPORTED 0x0 +#define ID_AA64MMFR0_TGRAN64_NI 0xf +#define ID_AA64MMFR0_TGRAN64_SUPPORTED 0x0 +#define ID_AA64MMFR0_TGRAN16_NI 0x0 +#define ID_AA64MMFR0_TGRAN16_SUPPORTED 0x1 + +#if defined(CONFIG_ARM64_4K_PAGES) +#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN4_SHIFT +#define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN4_SUPPORTED +#elif defined(CONFIG_ARM64_64K_PAGES) +#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN64_SHIFT +#define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN64_SUPPORTED +#endif #ifdef __ASSEMBLY__ @@ -31,15 +85,17 @@ .equ __reg_num_xzr, 31 .macro mrs_s, rt, sreg - .inst 0xd5300000|(\sreg)|(__reg_num_\rt) + .inst 0xd5200000|(\sreg)|(__reg_num_\rt) .endm .macro msr_s, sreg, rt - .inst 0xd5100000|(\sreg)|(__reg_num_\rt) + .inst 0xd5000000|(\sreg)|(__reg_num_\rt) .endm #else +#include <linux/types.h> + asm( " .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" " .equ __reg_num_x\\num, \\num\n" @@ -47,14 +103,39 @@ asm( " .equ __reg_num_xzr, 31\n" "\n" " .macro mrs_s, rt, sreg\n" -" .inst 0xd5300000|(\\sreg)|(__reg_num_\\rt)\n" +" .inst 0xd5200000|(\\sreg)|(__reg_num_\\rt)\n" " .endm\n" "\n" " .macro msr_s, sreg, rt\n" -" .inst 0xd5100000|(\\sreg)|(__reg_num_\\rt)\n" +" .inst 0xd5000000|(\\sreg)|(__reg_num_\\rt)\n" " .endm\n" ); +static inline void config_sctlr_el1(u32 clear, u32 set) +{ + u32 val; + + asm volatile("mrs %0, sctlr_el1" : "=r" (val)); + val &= ~clear; + val |= set; + asm volatile("msr sctlr_el1, %0" : : "r" (val)); +} + +/* + * Unlike read_cpuid, calls to read_sysreg are never expected to be + * optimized away or replaced with synthetic values. + */ +#define read_sysreg(r) ({ \ + u64 __val; \ + asm volatile("mrs %0, " __stringify(r) : "=r" (__val)); \ + __val; \ +}) + +#define write_sysreg(v, r) do { \ + u64 __val = (u64)v; \ + asm volatile("msr " __stringify(r) ", %0" \ + : : "r" (__val)); \ +} while (0) #endif #endif /* __ASM_SYSREG_H */ diff --git a/arch/arm64/include/asm/system_misc.h b/arch/arm64/include/asm/system_misc.h index 7a18fabbe0f6..659fbf5925de 100644 --- a/arch/arm64/include/asm/system_misc.h +++ b/arch/arm64/include/asm/system_misc.h @@ -41,7 +41,6 @@ struct mm_struct; extern void show_pte(struct mm_struct *mm, unsigned long addr); extern void __show_regs(struct pt_regs *); -void soft_restart(unsigned long); extern void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd); #define UDBG_UNDEFINED (1 << 0) diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index dcd06d18a42a..7b2f2ecadfd1 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -71,10 +71,16 @@ register unsigned long current_stack_pointer asm ("sp"); */ static inline struct thread_info *current_thread_info(void) __attribute_const__; +/* + * struct thread_info can be accessed directly via sp_el0. + */ static inline struct thread_info *current_thread_info(void) { - return (struct thread_info *) - (current_stack_pointer & ~(THREAD_SIZE - 1)); + unsigned long sp_el0; + + asm ("mrs %0, sp_el0" : "=r" (sp_el0)); + + return (struct thread_info *)sp_el0; } #define thread_saved_pc(tsk) \ @@ -111,7 +117,6 @@ static inline struct thread_info *current_thread_info(void) #define TIF_RESTORE_SIGMASK 20 #define TIF_SINGLESTEP 21 #define TIF_32BIT 22 /* 32bit process */ -#define TIF_SWITCH_MM 23 /* deferred switch_mm */ #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index c3bb05b98616..fca74efe1fc5 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -65,6 +65,14 @@ * only require the D-TLB to be invalidated. * - kaddr - Kernel virtual memory address */ +static inline void local_flush_tlb_all(void) +{ + dsb(nshst); + asm("tlbi vmalle1"); + dsb(nsh); + isb(); +} + static inline void flush_tlb_all(void) { dsb(ishst); @@ -93,11 +101,23 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, dsb(ish); } -static inline void __flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) +/* + * This is meant to avoid soft lock-ups on large TLB flushing ranges and not + * necessarily a performance improvement. + */ +#define MAX_TLB_RANGE (1024UL << PAGE_SHIFT) + +static inline void flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) { unsigned long asid = (unsigned long)ASID(vma->vm_mm) << 48; unsigned long addr; + + if ((end - start) > MAX_TLB_RANGE) { + flush_tlb_mm(vma->vm_mm); + return; + } + start = asid | (start >> 12); end = asid | (end >> 12); @@ -107,9 +127,15 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, dsb(ish); } -static inline void __flush_tlb_kernel_range(unsigned long start, unsigned long end) +static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) { unsigned long addr; + + if ((end - start) > MAX_TLB_RANGE) { + flush_tlb_all(); + return; + } + start >>= 12; end >>= 12; @@ -121,29 +147,6 @@ static inline void __flush_tlb_kernel_range(unsigned long start, unsigned long e } /* - * This is meant to avoid soft lock-ups on large TLB flushing ranges and not - * necessarily a performance improvement. - */ -#define MAX_TLB_RANGE (1024UL << PAGE_SHIFT) - -static inline void flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) -{ - if ((end - start) <= MAX_TLB_RANGE) - __flush_tlb_range(vma, start, end); - else - flush_tlb_mm(vma->vm_mm); -} - -static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) -{ - if ((end - start) <= MAX_TLB_RANGE) - __flush_tlb_kernel_range(start, end); - else - flush_tlb_all(); -} - -/* * Used to invalidate the TLB (walk caches) corresponding to intermediate page * table levels (pgd/pud/pmd). */ diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h index 7ebcd31ce51c..65e9c4615664 100644 --- a/arch/arm64/include/asm/topology.h +++ b/arch/arm64/include/asm/topology.h @@ -1,8 +1,6 @@ #ifndef __ASM_TOPOLOGY_H #define __ASM_TOPOLOGY_H -#ifdef CONFIG_SMP - #include <linux/cpumask.h> struct cpu_topology { @@ -24,13 +22,6 @@ void init_cpu_topology(void); void store_cpu_topology(unsigned int cpuid); const struct cpumask *cpu_coregroup_mask(int cpu); -#else - -static inline void init_cpu_topology(void) { } -static inline void store_cpu_topology(unsigned int cpuid) { } - -#endif - #include <asm-generic/topology.h> #endif /* _ASM_ARM_TOPOLOGY_H */ diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 07e1ba449bf1..b2ede967fe7d 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -24,7 +24,10 @@ #include <linux/string.h> #include <linux/thread_info.h> +#include <asm/alternative.h> +#include <asm/cpufeature.h> #include <asm/ptrace.h> +#include <asm/sysreg.h> #include <asm/errno.h> #include <asm/memory.h> #include <asm/compiler.h> @@ -131,6 +134,8 @@ static inline void set_fs(mm_segment_t fs) do { \ unsigned long __gu_val; \ __chk_user_ptr(ptr); \ + asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, \ + CONFIG_ARM64_PAN)); \ switch (sizeof(*(ptr))) { \ case 1: \ __get_user_asm("ldrb", "%w", __gu_val, (ptr), (err)); \ @@ -148,6 +153,8 @@ do { \ BUILD_BUG(); \ } \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ + asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \ + CONFIG_ARM64_PAN)); \ } while (0) #define __get_user(x, ptr) \ @@ -194,6 +201,8 @@ do { \ do { \ __typeof__(*(ptr)) __pu_val = (x); \ __chk_user_ptr(ptr); \ + asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, \ + CONFIG_ARM64_PAN)); \ switch (sizeof(*(ptr))) { \ case 1: \ __put_user_asm("strb", "%w", __pu_val, (ptr), (err)); \ @@ -210,6 +219,8 @@ do { \ default: \ BUILD_BUG(); \ } \ + asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \ + CONFIG_ARM64_PAN)); \ } while (0) #define __put_user(x, ptr) \ diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 7a5df5252dd7..06e6a5238c4c 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -18,11 +18,29 @@ #ifndef __ASM__VIRT_H #define __ASM__VIRT_H +/* + * The arm64 hcall implementation uses x0 to specify the hcall type. A value + * less than 0xfff indicates a special hcall, such as get/set vector. + * Any other value is used as a pointer to the function to call. + */ + +/* HVC_GET_VECTORS - Return the value of the vbar_el2 register. */ +#define HVC_GET_VECTORS 0 + +/* + * HVC_SET_VECTORS - Set the value of the vbar_el2 register. + * + * @x1: Physical address of the new vector table. + */ +#define HVC_SET_VECTORS 1 + #define BOOT_CPU_MODE_EL1 (0xe11) #define BOOT_CPU_MODE_EL2 (0xe12) #ifndef __ASSEMBLY__ +#include <asm/ptrace.h> + /* * __boot_cpu_mode records what mode CPUs were booted in. * A correctly-implemented bootloader must start all CPUs in the same mode: @@ -50,6 +68,14 @@ static inline bool is_hyp_mode_mismatched(void) return __boot_cpu_mode[0] != __boot_cpu_mode[1]; } +static inline bool is_kernel_in_hyp_mode(void) +{ + u64 el; + + asm("mrs %0, CurrentEL" : "=r" (el)); + return el == CurrentEL_EL2; +} + /* The section containing the hypervisor text */ extern char __hyp_text_start[]; extern char __hyp_text_end[]; diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index d26832022127..d82f3f316ba4 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -100,12 +100,39 @@ struct kvm_sregs { struct kvm_fpu { }; +/* + * See v8 ARM ARM D7.3: Debug Registers + * + * The architectural limit is 16 debug registers of each type although + * in practice there are usually less (see ID_AA64DFR0_EL1). + * + * Although the control registers are architecturally defined as 32 + * bits wide we use a 64 bit structure here to keep parity with + * KVM_GET/SET_ONE_REG behaviour which treats all system registers as + * 64 bit values. It also allows for the possibility of the + * architecture expanding the control registers without having to + * change the userspace ABI. + */ +#define KVM_ARM_MAX_DBG_REGS 16 struct kvm_guest_debug_arch { + __u64 dbg_bcr[KVM_ARM_MAX_DBG_REGS]; + __u64 dbg_bvr[KVM_ARM_MAX_DBG_REGS]; + __u64 dbg_wcr[KVM_ARM_MAX_DBG_REGS]; + __u64 dbg_wvr[KVM_ARM_MAX_DBG_REGS]; }; struct kvm_debug_exit_arch { + __u32 hsr; + __u64 far; /* used for watchpoints */ }; +/* + * Architecture specific defines for kvm_guest_debug->control + */ + +#define KVM_GUESTDBG_USE_SW_BP (1 << 16) +#define KVM_GUESTDBG_USE_HW (1 << 17) + struct kvm_sync_regs { }; diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index 6913643bbe54..208db3df135a 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -44,6 +44,7 @@ #define PSR_I_BIT 0x00000080 #define PSR_A_BIT 0x00000100 #define PSR_D_BIT 0x00000200 +#define PSR_PAN_BIT 0x00400000 #define PSR_Q_BIT 0x08000000 #define PSR_V_BIT 0x10000000 #define PSR_C_BIT 0x20000000 diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 426d0763c81b..3c1ba215c538 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -7,6 +7,8 @@ AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET) CFLAGS_efi-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET) CFLAGS_armv8_deprecated.o := -I$(src) +KASAN_SANITIZE_efi-stub.o := n + CFLAGS_REMOVE_ftrace.o = -pg CFLAGS_REMOVE_insn.o = -pg CFLAGS_REMOVE_return_address.o = -pg @@ -15,16 +17,24 @@ CFLAGS_REMOVE_return_address.o = -pg arm64-obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ entry-fpsimd.o process.o ptrace.o setup.o signal.o \ sys.o stacktrace.o time.o traps.o io.o vdso.o \ - hyp-stub.o psci.o psci-call.o cpu_ops.o insn.o \ + hyp-stub.o psci.o cpu_ops.o insn.o \ return_address.o cpuinfo.o cpu_errata.o \ - cpufeature.o alternative.o cacheinfo.o + cpufeature.o alternative.o cacheinfo.o \ + smp.o smp_spin_table.o topology.o smccc-call.o + +stub-obj := efi-stub.o efi-entry.o +extra-y := $(stub-obj) +stub-obj := $(patsubst %.o,%.stub.o,$(stub-obj)) + +OBJCOPYFLAGS := --prefix-symbols=__efistub_ +$(obj)/%.stub.o: $(obj)/%.o FORCE + $(call if_changed,objcopy) arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ sys_compat.o entry32.o \ ../../arm/kernel/opcodes.o arm64-obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o -arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o topology.o arm64-obj-$(CONFIG_PERF_EVENTS) += perf_regs.o arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o @@ -32,15 +42,16 @@ arm64-obj-$(CONFIG_CPU_PM) += sleep.o suspend.o arm64-obj-$(CONFIG_CPU_IDLE) += cpuidle.o arm64-obj-$(CONFIG_JUMP_LABEL) += jump_label.o arm64-obj-$(CONFIG_KGDB) += kgdb.o -arm64-obj-$(CONFIG_EFI) += efi.o efi-stub.o efi-entry.o +arm64-obj-$(CONFIG_EFI) += efi.o $(stub-obj) arm64-obj-$(CONFIG_PCI) += pci.o arm64-obj-$(CONFIG_ARMV8_DEPRECATED) += armv8_deprecated.o arm64-obj-$(CONFIG_ACPI) += acpi.o +arm64-obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o obj-y += $(arm64-obj-y) vdso/ obj-m += $(arm64-obj-m) head-y := head.o -extra-y := $(head-y) vmlinux.lds +extra-y += $(head-y) vmlinux.lds # vDSO - this must be built first to generate the symbol offsets $(call objectify,$(arm64-obj-y)): $(obj)/vdso/vdso-offsets.h diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index 8b839558838e..19de7537e7d3 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -36,12 +36,6 @@ EXPORT_SYMBOL(acpi_disabled); int acpi_pci_disabled = 1; /* skip ACPI PCI scan and IRQ initialization */ EXPORT_SYMBOL(acpi_pci_disabled); -/* Processors with enabled flag and sane MPIDR */ -static int enabled_cpus; - -/* Boot CPU is valid or not in MADT */ -static bool bootcpu_valid __initdata; - static bool param_acpi_off __initdata; static bool param_acpi_force __initdata; @@ -95,122 +89,15 @@ void __init __acpi_unmap_table(char *map, unsigned long size) early_memunmap(map, size); } -/** - * acpi_map_gic_cpu_interface - generates a logical cpu number - * and map to MPIDR represented by GICC structure - */ -static void __init -acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor) +bool __init acpi_psci_present(void) { - int i; - u64 mpidr = processor->arm_mpidr & MPIDR_HWID_BITMASK; - bool enabled = !!(processor->flags & ACPI_MADT_ENABLED); - - if (mpidr == INVALID_HWID) { - pr_info("Skip MADT cpu entry with invalid MPIDR\n"); - return; - } - - total_cpus++; - if (!enabled) - return; - - if (enabled_cpus >= NR_CPUS) { - pr_warn("NR_CPUS limit of %d reached, Processor %d/0x%llx ignored.\n", - NR_CPUS, total_cpus, mpidr); - return; - } - - /* Check if GICC structure of boot CPU is available in the MADT */ - if (cpu_logical_map(0) == mpidr) { - if (bootcpu_valid) { - pr_err("Firmware bug, duplicate CPU MPIDR: 0x%llx in MADT\n", - mpidr); - return; - } - - bootcpu_valid = true; - } - - /* - * Duplicate MPIDRs are a recipe for disaster. Scan - * all initialized entries and check for - * duplicates. If any is found just ignore the CPU. - */ - for (i = 1; i < enabled_cpus; i++) { - if (cpu_logical_map(i) == mpidr) { - pr_err("Firmware bug, duplicate CPU MPIDR: 0x%llx in MADT\n", - mpidr); - return; - } - } - - if (!acpi_psci_present()) - return; - - cpu_ops[enabled_cpus] = cpu_get_ops("psci"); - /* CPU 0 was already initialized */ - if (enabled_cpus) { - if (!cpu_ops[enabled_cpus]) - return; - - if (cpu_ops[enabled_cpus]->cpu_init(NULL, enabled_cpus)) - return; - - /* map the logical cpu id to cpu MPIDR */ - cpu_logical_map(enabled_cpus) = mpidr; - } - - enabled_cpus++; + return acpi_gbl_FADT.arm_boot_flags & ACPI_FADT_PSCI_COMPLIANT; } -static int __init -acpi_parse_gic_cpu_interface(struct acpi_subtable_header *header, - const unsigned long end) +/* Whether HVC must be used instead of SMC as the PSCI conduit */ +bool __init acpi_psci_use_hvc(void) { - struct acpi_madt_generic_interrupt *processor; - - processor = (struct acpi_madt_generic_interrupt *)header; - - if (BAD_MADT_ENTRY(processor, end)) - return -EINVAL; - - acpi_table_print_madt_entry(header); - acpi_map_gic_cpu_interface(processor); - return 0; -} - -/* Parse GIC cpu interface entries in MADT for SMP init */ -void __init acpi_init_cpus(void) -{ - int count, i; - - /* - * do a partial walk of MADT to determine how many CPUs - * we have including disabled CPUs, and get information - * we need for SMP init - */ - count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT, - acpi_parse_gic_cpu_interface, 0); - - if (!count) { - pr_err("No GIC CPU interface entries present\n"); - return; - } else if (count < 0) { - pr_err("Error parsing GIC CPU interface entry\n"); - return; - } - - if (!bootcpu_valid) { - pr_err("MADT missing boot CPU MPIDR, not enabling secondaries\n"); - return; - } - - for (i = 0; i < enabled_cpus; i++) - set_cpu_possible(i, true); - - /* Make boot-up look pretty */ - pr_info("%d CPUs enabled, %d CPUs total\n", enabled_cpus, total_cpus); + return acpi_gbl_FADT.arm_boot_flags & ACPI_FADT_PSCI_USE_HVC; } /* diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 28f8365edc4c..14effc6e590f 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -77,9 +77,3 @@ void apply_alternatives(void *start, size_t length) __apply_alternatives(®ion); } - -void free_alternatives_memory(void) -{ - free_reserved_area(__alt_instructions, __alt_instructions_end, - 0, "alternatives"); -} diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c index a85843ddbde8..678f30b05a45 100644 --- a/arch/arm64/kernel/arm64ksyms.c +++ b/arch/arm64/kernel/arm64ksyms.c @@ -26,6 +26,7 @@ #include <linux/syscalls.h> #include <linux/uaccess.h> #include <linux/io.h> +#include <linux/arm-smccc.h> #include <asm/checksum.h> @@ -51,6 +52,9 @@ EXPORT_SYMBOL(strnlen); EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memmove); +EXPORT_SYMBOL(__memset); +EXPORT_SYMBOL(__memcpy); +EXPORT_SYMBOL(__memmove); EXPORT_SYMBOL(memchr); EXPORT_SYMBOL(memcmp); @@ -65,3 +69,7 @@ EXPORT_SYMBOL(test_and_change_bit); #ifdef CONFIG_FUNCTION_TRACER EXPORT_SYMBOL(_mcount); #endif + + /* arm-smccc */ +EXPORT_SYMBOL(arm_smccc_smc); +EXPORT_SYMBOL(arm_smccc_hvc); diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 7ac3920b1356..937f5e58a4d3 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -14,8 +14,11 @@ #include <linux/slab.h> #include <linux/sysctl.h> +#include <asm/alternative.h> +#include <asm/cpufeature.h> #include <asm/insn.h> #include <asm/opcodes.h> +#include <asm/sysreg.h> #include <asm/system_misc.h> #include <asm/traps.h> #include <asm/uaccess.h> @@ -279,6 +282,8 @@ static void register_insn_emulation_sysctl(struct ctl_table *table) */ #define __user_swpX_asm(data, addr, res, temp, B) \ __asm__ __volatile__( \ + ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, \ + CONFIG_ARM64_PAN) \ "0: ldxr"B" %w2, [%3]\n" \ "1: stxr"B" %w0, %w1, [%3]\n" \ " cbz %w0, 2f\n" \ @@ -297,6 +302,8 @@ static void register_insn_emulation_sysctl(struct ctl_table *table) " .quad 0b, 4b\n" \ " .quad 1b, 4b\n" \ " .popsection\n" \ + ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \ + CONFIG_ARM64_PAN) \ : "=&r" (res), "+r" (data), "=&r" (temp) \ : "r" (addr), "i" (-EAGAIN), "i" (-EFAULT) \ : "memory") @@ -506,16 +513,6 @@ ret: return 0; } -static inline void config_sctlr_el1(u32 clear, u32 set) -{ - u32 val; - - asm volatile("mrs %0, sctlr_el1" : "=r" (val)); - val &= ~clear; - val |= set; - asm volatile("msr sctlr_el1, %0" : : "r" (val)); -} - static int cp15_barrier_set_hw_mode(bool enable) { if (enable) diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 4106ac64f95e..dc02084b36aa 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -22,12 +22,14 @@ #include <linux/mm.h> #include <linux/dma-mapping.h> #include <linux/kvm_host.h> +#include <linux/suspend.h> #include <asm/thread_info.h> #include <asm/memory.h> #include <asm/smp_plat.h> #include <asm/suspend.h> #include <asm/vdso_datapage.h> #include <linux/kbuild.h> +#include <linux/arm-smccc.h> int main(void) { @@ -61,7 +63,7 @@ int main(void) DEFINE(S_ORIG_ADDR_LIMIT, offsetof(struct pt_regs, orig_addr_limit)); DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs)); BLANK(); - DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id)); + DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id.counter)); BLANK(); DEFINE(VMA_VM_MM, offsetof(struct vm_area_struct, vm_mm)); DEFINE(VMA_VM_FLAGS, offsetof(struct vm_area_struct, vm_flags)); @@ -117,18 +119,22 @@ int main(void) DEFINE(VCPU_FAR_EL2, offsetof(struct kvm_vcpu, arch.fault.far_el2)); DEFINE(VCPU_HPFAR_EL2, offsetof(struct kvm_vcpu, arch.fault.hpfar_el2)); DEFINE(VCPU_DEBUG_FLAGS, offsetof(struct kvm_vcpu, arch.debug_flags)); + DEFINE(VCPU_DEBUG_PTR, offsetof(struct kvm_vcpu, arch.debug_ptr)); + DEFINE(DEBUG_BCR, offsetof(struct kvm_guest_debug_arch, dbg_bcr)); + DEFINE(DEBUG_BVR, offsetof(struct kvm_guest_debug_arch, dbg_bvr)); + DEFINE(DEBUG_WCR, offsetof(struct kvm_guest_debug_arch, dbg_wcr)); + DEFINE(DEBUG_WVR, offsetof(struct kvm_guest_debug_arch, dbg_wvr)); DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2)); + DEFINE(VCPU_MDCR_EL2, offsetof(struct kvm_vcpu, arch.mdcr_el2)); DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines)); DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context)); + DEFINE(VCPU_HOST_DEBUG_STATE, offsetof(struct kvm_vcpu, arch.host_debug_state)); DEFINE(VCPU_TIMER_CNTV_CTL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl)); DEFINE(VCPU_TIMER_CNTV_CVAL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval)); DEFINE(KVM_TIMER_CNTVOFF, offsetof(struct kvm, arch.timer.cntvoff)); DEFINE(KVM_TIMER_ENABLED, offsetof(struct kvm, arch.timer.enabled)); DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm)); DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu)); - DEFINE(VGIC_SAVE_FN, offsetof(struct vgic_sr_vectors, save_vgic)); - DEFINE(VGIC_RESTORE_FN, offsetof(struct vgic_sr_vectors, restore_vgic)); - DEFINE(VGIC_SR_VECTOR_SZ, sizeof(struct vgic_sr_vectors)); DEFINE(VGIC_V2_CPU_HCR, offsetof(struct vgic_cpu, vgic_v2.vgic_hcr)); DEFINE(VGIC_V2_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr)); DEFINE(VGIC_V2_CPU_MISR, offsetof(struct vgic_cpu, vgic_v2.vgic_misr)); @@ -154,9 +160,14 @@ int main(void) DEFINE(CPU_CTX_SP, offsetof(struct cpu_suspend_ctx, sp)); DEFINE(MPIDR_HASH_MASK, offsetof(struct mpidr_hash, mask)); DEFINE(MPIDR_HASH_SHIFTS, offsetof(struct mpidr_hash, shift_aff)); - DEFINE(SLEEP_SAVE_SP_SZ, sizeof(struct sleep_save_sp)); - DEFINE(SLEEP_SAVE_SP_PHYS, offsetof(struct sleep_save_sp, save_ptr_stash_phys)); - DEFINE(SLEEP_SAVE_SP_VIRT, offsetof(struct sleep_save_sp, save_ptr_stash)); + DEFINE(SLEEP_STACK_DATA_SYSTEM_REGS, offsetof(struct sleep_stack_data, system_regs)); + DEFINE(SLEEP_STACK_DATA_CALLEE_REGS, offsetof(struct sleep_stack_data, callee_saved_regs)); #endif + DEFINE(ARM_SMCCC_RES_X0_OFFS, offsetof(struct arm_smccc_res, a0)); + DEFINE(ARM_SMCCC_RES_X2_OFFS, offsetof(struct arm_smccc_res, a2)); + BLANK(); + DEFINE(HIBERN_PBE_ORIG, offsetof(struct pbe, orig_address)); + DEFINE(HIBERN_PBE_ADDR, offsetof(struct pbe, address)); + DEFINE(HIBERN_PBE_NEXT, offsetof(struct pbe, next)); return 0; } diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 6ffd91438560..dc0df822def3 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -74,6 +74,15 @@ const struct arm64_cpu_capabilities arm64_errata[] = { (1 << MIDR_VARIANT_SHIFT) | 2), }, #endif +#ifdef CONFIG_ARM64_ERRATUM_834220 + { + /* Cortex-A57 r0p0 - r1p2 */ + .desc = "ARM erratum 834220", + .capability = ARM64_WORKAROUND_834220, + MIDR_RANGE(MIDR_CORTEX_A57, 0x00, + (1 << MIDR_VARIANT_SHIFT) | 2), + }, +#endif #ifdef CONFIG_ARM64_ERRATUM_845719 { /* Cortex-A53 r0p[01234] */ diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c index fb8ff9ba467a..b6bd7d447768 100644 --- a/arch/arm64/kernel/cpu_ops.c +++ b/arch/arm64/kernel/cpu_ops.c @@ -16,11 +16,13 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include <asm/cpu_ops.h> -#include <asm/smp_plat.h> +#include <linux/acpi.h> #include <linux/errno.h> #include <linux/of.h> #include <linux/string.h> +#include <asm/acpi.h> +#include <asm/cpu_ops.h> +#include <asm/smp_plat.h> extern const struct cpu_operations smp_spin_table_ops; extern const struct cpu_operations cpu_psci_ops; @@ -28,14 +30,12 @@ extern const struct cpu_operations cpu_psci_ops; const struct cpu_operations *cpu_ops[NR_CPUS]; static const struct cpu_operations *supported_cpu_ops[] __initconst = { -#ifdef CONFIG_SMP &smp_spin_table_ops, -#endif &cpu_psci_ops, NULL, }; -const struct cpu_operations * __init cpu_get_ops(const char *name) +static const struct cpu_operations * __init cpu_get_ops(const char *name) { const struct cpu_operations **ops = supported_cpu_ops; @@ -49,39 +49,53 @@ const struct cpu_operations * __init cpu_get_ops(const char *name) return NULL; } +static const char *__init cpu_read_enable_method(int cpu) +{ + const char *enable_method; + + if (acpi_disabled) { + struct device_node *dn = of_get_cpu_node(cpu, NULL); + + if (!dn) { + if (!cpu) + pr_err("Failed to find device node for boot cpu\n"); + return NULL; + } + + enable_method = of_get_property(dn, "enable-method", NULL); + if (!enable_method) { + /* + * The boot CPU may not have an enable method (e.g. + * when spin-table is used for secondaries). + * Don't warn spuriously. + */ + if (cpu != 0) + pr_err("%s: missing enable-method property\n", + dn->full_name); + } + } else { + enable_method = acpi_get_enable_method(cpu); + if (!enable_method) + pr_err("Unsupported ACPI enable-method\n"); + } + + return enable_method; +} /* - * Read a cpu's enable method from the device tree and record it in cpu_ops. + * Read a cpu's enable method and record it in cpu_ops. */ -int __init cpu_read_ops(struct device_node *dn, int cpu) +int __init cpu_read_ops(int cpu) { - const char *enable_method = of_get_property(dn, "enable-method", NULL); - if (!enable_method) { - /* - * The boot CPU may not have an enable method (e.g. when - * spin-table is used for secondaries). Don't warn spuriously. - */ - if (cpu != 0) - pr_err("%s: missing enable-method property\n", - dn->full_name); - return -ENOENT; - } + const char *enable_method = cpu_read_enable_method(cpu); + + if (!enable_method) + return -ENODEV; cpu_ops[cpu] = cpu_get_ops(enable_method); if (!cpu_ops[cpu]) { - pr_warn("%s: unsupported enable-method property: %s\n", - dn->full_name, enable_method); + pr_warn("Unsupported enable-method: %s\n", enable_method); return -EOPNOTSUPP; } return 0; } - -void __init cpu_read_bootcpu_ops(void) -{ - struct device_node *dn = of_get_cpu_node(0, NULL); - if (!dn) { - pr_err("Failed to find device node for boot cpu\n"); - return; - } - cpu_read_ops(dn, 0); -} diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 3d9967e43d89..bd93fcb33521 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -21,8 +21,63 @@ #include <linux/types.h> #include <asm/cpu.h> #include <asm/cpufeature.h> +#include <asm/processor.h> + +static bool +feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry) +{ + int val = cpuid_feature_extract_field(reg, entry->field_pos); + + return val >= entry->min_field_value; +} +#include <asm/virt.h> + +static bool +has_id_aa64pfr0_feature(const struct arm64_cpu_capabilities *entry) +{ + u64 val; + + val = read_cpuid(id_aa64pfr0_el1); + return feature_matches(val, entry); +} + +static bool __maybe_unused +has_id_aa64mmfr1_feature(const struct arm64_cpu_capabilities *entry) +{ + u64 val; + + val = read_cpuid(id_aa64mmfr1_el1); + return feature_matches(val, entry); +} + +static bool runs_at_el2(const struct arm64_cpu_capabilities *entry) +{ + return is_kernel_in_hyp_mode(); +} static const struct arm64_cpu_capabilities arm64_features[] = { + { + .desc = "GIC system register CPU interface", + .capability = ARM64_HAS_SYSREG_GIC_CPUIF, + .matches = has_id_aa64pfr0_feature, + .field_pos = 24, + .min_field_value = 1, + }, +#ifdef CONFIG_ARM64_PAN + { + .desc = "Privileged Access Never", + .capability = ARM64_HAS_PAN, + .matches = has_id_aa64mmfr1_feature, + .field_pos = 20, + .min_field_value = 1, + .enable = cpu_enable_pan, + }, +#endif /* CONFIG_ARM64_PAN */ + { + .desc = "Virtualization Host Extensions", + .capability = ARM64_HAS_VIRT_HOST_EXTN, + .matches = runs_at_el2, + }, {}, }; @@ -39,6 +94,12 @@ void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps, pr_info("%s %s\n", info, caps[i].desc); cpus_set_cap(caps[i].capability); } + + /* second pass allows enable() to consider interacting capabilities */ + for (i = 0; caps[i].desc; i++) { + if (cpus_have_cap(caps[i].capability) && caps[i].enable) + caps[i].enable(); + } } void check_local_cpu_features(void) diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c index 2bbd0fee084f..7ce589ca54a4 100644 --- a/arch/arm64/kernel/cpuidle.c +++ b/arch/arm64/kernel/cpuidle.c @@ -18,15 +18,10 @@ int arm_cpuidle_init(unsigned int cpu) { int ret = -EOPNOTSUPP; - struct device_node *cpu_node = of_cpu_device_node_get(cpu); - - if (!cpu_node) - return -ENODEV; if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_init_idle) - ret = cpu_ops[cpu]->cpu_init_idle(cpu_node, cpu); + ret = cpu_ops[cpu]->cpu_init_idle(cpu); - of_node_put(cpu_node); return ret; } diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 00ced919fa5a..5ea03d564743 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -58,7 +58,7 @@ static u32 mdscr_read(void) * Allow root to disable self-hosted debug from userspace. * This is useful if you want to connect an external JTAG debugger. */ -static u32 debug_enabled = 1; +static bool debug_enabled = true; static int create_debug_debugfs_entry(void) { @@ -69,7 +69,7 @@ fs_initcall(create_debug_debugfs_entry); static int __init early_debug_disable(char *buf) { - debug_enabled = 0; + debug_enabled = false; return 0; } diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S index 8ce9b0577442..a773db92908b 100644 --- a/arch/arm64/kernel/efi-entry.S +++ b/arch/arm64/kernel/efi-entry.S @@ -29,7 +29,7 @@ * we want to be. The kernel image wants to be placed at TEXT_OFFSET * from start of RAM. */ -ENTRY(efi_stub_entry) +ENTRY(entry) /* * Create a stack frame to save FP/LR with extra space * for image_addr variable passed to efi_entry(). @@ -86,8 +86,8 @@ ENTRY(efi_stub_entry) * entries for the VA range of the current image, so no maintenance is * necessary. */ - adr x0, efi_stub_entry - adr x1, efi_stub_entry_end + adr x0, entry + adr x1, entry_end sub x1, x1, x0 bl __flush_dcache_area @@ -120,5 +120,5 @@ efi_load_fail: ldp x29, x30, [sp], #32 ret -efi_stub_entry_end: -ENDPROC(efi_stub_entry) +entry_end: +ENDPROC(entry) diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 5170fd5c8e97..4abe5fbb1942 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -48,7 +48,6 @@ static struct mm_struct efi_mm = { .mmap_sem = __RWSEM_INITIALIZER(efi_mm.mmap_sem), .page_table_lock = __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock), .mmlist = LIST_HEAD_INIT(efi_mm.mmlist), - INIT_MM_CONTEXT(efi_mm) }; static int uefi_debug __initdata; @@ -233,6 +232,8 @@ static bool __init efi_virtmap_init(void) { efi_memory_desc_t *md; + init_new_context(NULL, &efi_mm); + for_each_efi_memory_desc(&memmap, md) { u64 paddr, npages, size; pgprot_t prot; @@ -263,7 +264,8 @@ static bool __init efi_virtmap_init(void) else prot = PAGE_KERNEL; - create_pgd_mapping(&efi_mm, paddr, md->virt_addr, size, prot); + create_pgd_mapping(&efi_mm, paddr, md->virt_addr, size, + __pgprot(pgprot_val(prot) | PTE_NG)); } return true; } @@ -338,14 +340,7 @@ core_initcall(arm64_dmi_init); static void efi_set_pgd(struct mm_struct *mm) { - if (mm == &init_mm) - cpu_set_reserved_ttbr0(); - else - cpu_switch_mm(mm->pgd, mm); - - flush_tlb_all(); - if (icache_is_aivivt()) - __flush_icache_all(); + switch_mm(NULL, mm, NULL); } void efi_virtmap_load(void) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 05012cdb555f..b400bad28052 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -21,7 +21,7 @@ #include <linux/init.h> #include <linux/linkage.h> -#include <asm/alternative-asm.h> +#include <asm/alternative.h> #include <asm/assembler.h> #include <asm/asm-offsets.h> #include <asm/cpufeature.h> @@ -89,7 +89,8 @@ .if \el == 0 mrs x21, sp_el0 - get_thread_info tsk // Ensure MDSCR_EL1.SS is clear, + mov tsk, sp + and tsk, tsk, #~(THREAD_SIZE - 1) // Ensure MDSCR_EL1.SS is clear, ldr x19, [tsk, #TI_FLAGS] // since we can unmask debug disable_step_tsk x19, x20 // exceptions when scheduling. .else @@ -115,6 +116,13 @@ .endif /* + * Set sp_el0 to current thread_info. + */ + .if \el == 0 + msr sp_el0, tsk + .endif + + /* * Registers that may be useful after this macro is invoked: * * x21 - aborted SP @@ -181,8 +189,7 @@ .endm .macro get_thread_info, rd - mov \rd, sp - and \rd, \rd, #~(THREAD_SIZE - 1) // top of stack + mrs \rd, sp_el0 .endm /* @@ -613,6 +620,8 @@ ENTRY(cpu_switch_to) ldp x29, x9, [x8], #16 ldr lr, [x8] mov sp, x9 + and x9, x9, #~(THREAD_SIZE - 1) + msr sp_el0, x9 ret ENDPROC(cpu_switch_to) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index cc7435c9676e..f484684ff44a 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -29,11 +29,13 @@ #include <asm/asm-offsets.h> #include <asm/cache.h> #include <asm/cputype.h> +#include <asm/kernel-pgtable.h> #include <asm/memory.h> -#include <asm/thread_info.h> #include <asm/pgtable-hwdef.h> #include <asm/pgtable.h> #include <asm/page.h> +#include <asm/sysreg.h> +#include <asm/thread_info.h> #include <asm/virt.h> #define __PHYS_OFFSET (KERNEL_START - TEXT_OFFSET) @@ -46,36 +48,6 @@ #error TEXT_OFFSET must be less than 2MB #endif -#ifdef CONFIG_ARM64_64K_PAGES -#define BLOCK_SHIFT PAGE_SHIFT -#define BLOCK_SIZE PAGE_SIZE -#define TABLE_SHIFT PMD_SHIFT -#else -#define BLOCK_SHIFT SECTION_SHIFT -#define BLOCK_SIZE SECTION_SIZE -#define TABLE_SHIFT PUD_SHIFT -#endif - -#define KERNEL_START _text -#define KERNEL_END _end - -/* - * Initial memory map attributes. - */ -#ifndef CONFIG_SMP -#define PTE_FLAGS PTE_TYPE_PAGE | PTE_AF -#define PMD_FLAGS PMD_TYPE_SECT | PMD_SECT_AF -#else -#define PTE_FLAGS PTE_TYPE_PAGE | PTE_AF | PTE_SHARED -#define PMD_FLAGS PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S -#endif - -#ifdef CONFIG_ARM64_64K_PAGES -#define MM_MMUFLAGS PTE_ATTRINDX(MT_NORMAL) | PTE_FLAGS -#else -#define MM_MMUFLAGS PMD_ATTRINDX(MT_NORMAL) | PMD_FLAGS -#endif - /* * Kernel startup entry point. * --------------------------- @@ -125,8 +97,8 @@ efi_head: #endif #ifdef CONFIG_EFI - .globl stext_offset - .set stext_offset, stext - efi_head + .globl __efistub_stext_offset + .set __efistub_stext_offset, stext - efi_head .align 3 pe_header: .ascii "PE" @@ -149,8 +121,8 @@ optional_header: .long _end - stext // SizeOfCode .long 0 // SizeOfInitializedData .long 0 // SizeOfUninitializedData - .long efi_stub_entry - efi_head // AddressOfEntryPoint - .long stext_offset // BaseOfCode + .long __efistub_entry - efi_head // AddressOfEntryPoint + .long __efistub_stext_offset // BaseOfCode extra_header_fields: .quad 0 // ImageBase @@ -167,7 +139,7 @@ extra_header_fields: .long _end - efi_head // SizeOfImage // Everything before the kernel image is considered part of the header - .long stext_offset // SizeOfHeaders + .long __efistub_stext_offset // SizeOfHeaders .long 0 // CheckSum .short 0xa // Subsystem (EFI application) .short 0 // DllCharacteristics @@ -212,9 +184,9 @@ section_table: .byte 0 .byte 0 // end of 0 padding of section name .long _end - stext // VirtualSize - .long stext_offset // VirtualAddress + .long __efistub_stext_offset // VirtualAddress .long _edata - stext // SizeOfRawData - .long stext_offset // PointerToRawData + .long __efistub_stext_offset // PointerToRawData .long 0 // PointerToRelocations (0 for executables) .long 0 // PointerToLineNumbers (0 for executables) @@ -237,8 +209,6 @@ ENTRY(stext) bl el2_setup // Drop to EL1, w20=cpu_boot_mode adrp x24, __PHYS_OFFSET bl set_cpu_boot_mode_flag - - bl __vet_fdt bl __create_page_tables // x25=TTBR0, x26=TTBR1 /* * The following calls CPU setup code, see arch/arm64/mm/proc.S for @@ -270,24 +240,6 @@ preserve_boot_args: ENDPROC(preserve_boot_args) /* - * Determine validity of the x21 FDT pointer. - * The dtb must be 8-byte aligned and live in the first 512M of memory. - */ -__vet_fdt: - tst x21, #0x7 - b.ne 1f - cmp x21, x24 - b.lt 1f - mov x0, #(1 << 29) - add x0, x0, x24 - cmp x21, x0 - b.ge 1f - ret -1: - mov x21, #0 - ret -ENDPROC(__vet_fdt) -/* * Macro to create a table entry to the next page. * * tbl: page table address @@ -318,7 +270,7 @@ ENDPROC(__vet_fdt) .macro create_pgd_entry, tbl, virt, tmp1, tmp2 create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2 #if SWAPPER_PGTABLE_LEVELS == 3 - create_table_entry \tbl, \virt, TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2 + create_table_entry \tbl, \virt, SWAPPER_TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2 #endif .endm @@ -330,15 +282,15 @@ ENDPROC(__vet_fdt) * Corrupts: phys, start, end, pstate */ .macro create_block_map, tbl, flags, phys, start, end - lsr \phys, \phys, #BLOCK_SHIFT - lsr \start, \start, #BLOCK_SHIFT + lsr \phys, \phys, #SWAPPER_BLOCK_SHIFT + lsr \start, \start, #SWAPPER_BLOCK_SHIFT and \start, \start, #PTRS_PER_PTE - 1 // table index - orr \phys, \flags, \phys, lsl #BLOCK_SHIFT // table entry - lsr \end, \end, #BLOCK_SHIFT + orr \phys, \flags, \phys, lsl #SWAPPER_BLOCK_SHIFT // table entry + lsr \end, \end, #SWAPPER_BLOCK_SHIFT and \end, \end, #PTRS_PER_PTE - 1 // table end index 9999: str \phys, [\tbl, \start, lsl #3] // store the entry add \start, \start, #1 // next entry - add \phys, \phys, #BLOCK_SIZE // next block + add \phys, \phys, #SWAPPER_BLOCK_SIZE // next block cmp \start, \end b.ls 9999b .endm @@ -348,8 +300,7 @@ ENDPROC(__vet_fdt) * required to get the kernel running. The following sections are required: * - identity mapping to enable the MMU (low address, TTBR0) * - first few MB of the kernel linear mapping to jump to once the MMU has - * been enabled, including the FDT blob (TTBR1) - * - pgd entry for fixed mappings (TTBR1) + * been enabled */ __create_page_tables: adrp x25, idmap_pg_dir @@ -376,13 +327,13 @@ __create_page_tables: cmp x0, x6 b.lo 1b - ldr x7, =MM_MMUFLAGS + ldr x7, =SWAPPER_MM_MMUFLAGS /* * Create the identity mapping. */ mov x0, x25 // idmap_pg_dir - adrp x3, KERNEL_START // __pa(KERNEL_START) + adrp x3, __idmap_text_start // __pa(__idmap_text_start) #ifndef CONFIG_ARM64_VA_BITS_48 #define EXTRA_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3) @@ -405,11 +356,11 @@ __create_page_tables: /* * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the - * entire kernel image can be ID mapped. As T0SZ == (64 - #bits used), + * entire ID map region can be mapped. As T0SZ == (64 - #bits used), * this number conveniently equals the number of leading zeroes in - * the physical address of KERNEL_END. + * the physical address of __idmap_text_end. */ - adrp x5, KERNEL_END + adrp x5, __idmap_text_end clz x5, x5 cmp x5, TCR_T0SZ(VA_BITS) // default T0SZ small enough? b.ge 1f // .. then skip additional level @@ -424,37 +375,21 @@ __create_page_tables: #endif create_pgd_entry x0, x3, x5, x6 - mov x5, x3 // __pa(KERNEL_START) - adr_l x6, KERNEL_END // __pa(KERNEL_END) + mov x5, x3 // __pa(__idmap_text_start) + adr_l x6, __idmap_text_end // __pa(__idmap_text_end) create_block_map x0, x7, x3, x5, x6 /* * Map the kernel image (starting with PHYS_OFFSET). */ mov x0, x26 // swapper_pg_dir - mov x5, #PAGE_OFFSET + ldr x5, =KIMAGE_VADDR create_pgd_entry x0, x5, x3, x6 ldr x6, =KERNEL_END // __va(KERNEL_END) mov x3, x24 // phys offset create_block_map x0, x7, x3, x5, x6 /* - * Map the FDT blob (maximum 2MB; must be within 512MB of - * PHYS_OFFSET). - */ - mov x3, x21 // FDT phys address - and x3, x3, #~((1 << 21) - 1) // 2MB aligned - mov x6, #PAGE_OFFSET - sub x5, x3, x24 // subtract PHYS_OFFSET - tst x5, #~((1 << 29) - 1) // within 512MB? - csel x21, xzr, x21, ne // zero the FDT pointer - b.ne 1f - add x5, x5, x6 // __va(FDT blob) - add x6, x5, #1 << 21 // 2MB for the FDT blob - sub x6, x6, #1 // inclusive range - create_block_map x0, x7, x3, x5, x6 -1: - /* * Since the page tables have been populated with non-cacheable * accesses (MMU disabled), invalidate the idmap and swapper page * tables again to remove any speculatively loaded cache lines. @@ -474,18 +409,24 @@ ENDPROC(__create_page_tables) */ .set initial_sp, init_thread_union + THREAD_START_SP __mmap_switched: - adr_l x6, __bss_start - adr_l x7, __bss_stop - -1: cmp x6, x7 - b.hs 2f - str xzr, [x6], #8 // Clear BSS - b 1b -2: + // Clear BSS + adr_l x0, __bss_start + mov x1, xzr + adr_l x2, __bss_stop + sub x2, x2, x0 + bl __pi_memset + dsb ishst // Make zero page visible to PTW + adr_l sp, initial_sp, x4 + mov x4, sp + and x4, x4, #~(THREAD_SIZE - 1) + msr sp_el0, x4 // Save thread_info str_l x21, __fdt_pointer, x5 // Save FDT pointer str_l x24, memstart_addr, x6 // Save PHYS_OFFSET mov x29, #0 +#ifdef CONFIG_KASAN + bl kasan_early_init +#endif b start_kernel ENDPROC(__mmap_switched) @@ -621,7 +562,6 @@ ENTRY(__boot_cpu_mode) .long BOOT_CPU_MODE_EL1 .popsection -#ifdef CONFIG_SMP /* * This provides a "holding pen" for platforms to hold all secondary * cores are held until we're ready for them to initialise. @@ -666,10 +606,11 @@ ENDPROC(secondary_startup) ENTRY(__secondary_switched) ldr x0, [x21] // get secondary_data.stack mov sp, x0 + and x0, x0, #~(THREAD_SIZE - 1) + msr sp_el0, x0 // save thread_info mov x29, #0 b secondary_start_kernel ENDPROC(__secondary_switched) -#endif /* CONFIG_SMP */ /* * Enable the MMU. @@ -677,9 +618,17 @@ ENDPROC(__secondary_switched) * x0 = SCTLR_EL1 value for turning on the MMU. * x27 = *virtual* address to jump to upon completion * - * other registers depend on the function called upon completion + * Other registers depend on the function called upon completion. + * + * Checks if the selected granule size is supported by the CPU. + * If it isn't, park the CPU */ -__enable_mmu: + .section ".idmap.text", "ax" +ENTRY(__enable_mmu) + mrs x1, ID_AA64MMFR0_EL1 + ubfx x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4 + cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED + b.ne __no_granule_support ldr x5, =vectors msr vbar_el1, x5 msr ttbr0_el1, x25 // load TTBR0 @@ -687,5 +636,18 @@ __enable_mmu: isb msr sctlr_el1, x0 isb + /* + * Invalidate the local I-cache so that any instructions fetched + * speculatively from the PoC are discarded, since they may have + * been dynamically patched at the PoU. + */ + ic iallu + dsb nsh + isb br x27 ENDPROC(__enable_mmu) + +__no_granule_support: + wfe + b __no_granule_support +ENDPROC(__no_granule_support) diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S new file mode 100644 index 000000000000..46f29b6560ec --- /dev/null +++ b/arch/arm64/kernel/hibernate-asm.S @@ -0,0 +1,176 @@ +/* + * Hibernate low-level support + * + * Copyright (C) 2016 ARM Ltd. + * Author: James Morse <james.morse@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include <linux/linkage.h> +#include <linux/errno.h> + +#include <asm/asm-offsets.h> +#include <asm/assembler.h> +#include <asm/cputype.h> +#include <asm/memory.h> +#include <asm/page.h> +#include <asm/virt.h> + +/* + * To prevent the possibility of old and new partial table walks being visible + * in the tlb, switch the ttbr to a zero page when we invalidate the old + * records. D4.7.1 'General TLB maintenance requirements' in ARM DDI 0487A.i + * Even switching to our copied tables will cause a changed output address at + * each stage of the walk. + */ +.macro break_before_make_ttbr_switch zero_page, page_table + msr ttbr1_el1, \zero_page + isb + tlbi vmalle1is + dsb ish + msr ttbr1_el1, \page_table + isb +.endm + + +/* + * Resume from hibernate + * + * Loads temporary page tables then restores the memory image. + * Finally branches to cpu_resume() to restore the state saved by + * swsusp_arch_suspend(). + * + * Because this code has to be copied to a 'safe' page, it can't call out to + * other functions by PC-relative address. Also remember that it may be + * mid-way through over-writing other functions. For this reason it contains + * code from flush_icache_range() and uses the copy_page() macro. + * + * This 'safe' page is mapped via ttbr0, and executed from there. This function + * switches to a copy of the linear map in ttbr1, performs the restore, then + * switches ttbr1 to the original kernel's swapper_pg_dir. + * + * All of memory gets written to, including code. We need to clean the kernel + * text to the Point of Coherence (PoC) before secondary cores can be booted. + * Because the kernel modules and executable pages mapped to user space are + * also written as data, we clean all pages we touch to the Point of + * Unification (PoU). + * + * x0: physical address of temporary page tables + * x1: physical address of swapper page tables + * x2: address of cpu_resume + * x3: linear map address of restore_pblist in the current kernel + * x4: physical address of __hyp_stub_vectors, or 0 + * x5: physical address of a zero page that remains zero after resume + */ +.pushsection ".hibernate_exit.text", "ax" +ENTRY(swsusp_arch_suspend_exit) + /* + * We execute from ttbr0, change ttbr1 to our copied linear map tables + * with a break-before-make via the zero page + */ + break_before_make_ttbr_switch x5, x0 + + mov x21, x1 + mov x30, x2 + mov x24, x4 + mov x25, x5 + + /* walk the restore_pblist and use copy_page() to over-write memory */ + mov x19, x3 + +1: ldr x10, [x19, #HIBERN_PBE_ORIG] + mov x0, x10 + ldr x1, [x19, #HIBERN_PBE_ADDR] + + copy_page x0, x1, x2, x3, x4, x5, x6, x7, x8, x9 + + add x1, x10, #PAGE_SIZE + /* Clean the copied page to PoU - based on flush_icache_range() */ + dcache_line_size x2, x3 + sub x3, x2, #1 + bic x4, x10, x3 +2: dc cvau, x4 /* clean D line / unified line */ + add x4, x4, x2 + cmp x4, x1 + b.lo 2b + + ldr x19, [x19, #HIBERN_PBE_NEXT] + cbnz x19, 1b + dsb ish /* wait for PoU cleaning to finish */ + + /* switch to the restored kernels page tables */ + break_before_make_ttbr_switch x25, x21 + + ic ialluis + dsb ish + isb + + cbz x24, 3f /* Do we need to re-initialise EL2? */ + hvc #0 +3: ret + + .ltorg +ENDPROC(swsusp_arch_suspend_exit) + +/* + * Restore the hyp stub. + * This must be done before the hibernate page is unmapped by _cpu_resume(), + * but happens before any of the hyp-stub's code is cleaned to PoC. + * + * x24: The physical address of __hyp_stub_vectors + */ +el1_sync: + msr vbar_el2, x24 + eret +ENDPROC(el1_sync) + +.macro invalid_vector label +\label: + b \label +ENDPROC(\label) +.endm + + invalid_vector el2_sync_invalid + invalid_vector el2_irq_invalid + invalid_vector el2_fiq_invalid + invalid_vector el2_error_invalid + invalid_vector el1_sync_invalid + invalid_vector el1_irq_invalid + invalid_vector el1_fiq_invalid + invalid_vector el1_error_invalid + +/* el2 vectors - switch el2 here while we restore the memory image. */ + .align 11 +ENTRY(hibernate_el2_vectors) + ventry el2_sync_invalid // Synchronous EL2t + ventry el2_irq_invalid // IRQ EL2t + ventry el2_fiq_invalid // FIQ EL2t + ventry el2_error_invalid // Error EL2t + + ventry el2_sync_invalid // Synchronous EL2h + ventry el2_irq_invalid // IRQ EL2h + ventry el2_fiq_invalid // FIQ EL2h + ventry el2_error_invalid // Error EL2h + + ventry el1_sync // Synchronous 64-bit EL1 + ventry el1_irq_invalid // IRQ 64-bit EL1 + ventry el1_fiq_invalid // FIQ 64-bit EL1 + ventry el1_error_invalid // Error 64-bit EL1 + + ventry el1_sync_invalid // Synchronous 32-bit EL1 + ventry el1_irq_invalid // IRQ 32-bit EL1 + ventry el1_fiq_invalid // FIQ 32-bit EL1 + ventry el1_error_invalid // Error 32-bit EL1 +END(hibernate_el2_vectors) + +.popsection diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c new file mode 100644 index 000000000000..f8df75d740f4 --- /dev/null +++ b/arch/arm64/kernel/hibernate.c @@ -0,0 +1,487 @@ +/*: + * Hibernate support specific for ARM64 + * + * Derived from work on ARM hibernation support by: + * + * Ubuntu project, hibernation support for mach-dove + * Copyright (C) 2010 Nokia Corporation (Hiroshi Doyu) + * Copyright (C) 2010 Texas Instruments, Inc. (Teerth Reddy et al.) + * https://lkml.org/lkml/2010/6/18/4 + * https://lists.linux-foundation.org/pipermail/linux-pm/2010-June/027422.html + * https://patchwork.kernel.org/patch/96442/ + * + * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> + * + * License terms: GNU General Public License (GPL) version 2 + */ +#define pr_fmt(x) "hibernate: " x +#include <linux/kvm_host.h> +#include <linux/mm.h> +#include <linux/notifier.h> +#include <linux/pm.h> +#include <linux/sched.h> +#include <linux/suspend.h> +#include <linux/utsname.h> +#include <linux/version.h> + +#include <asm/barrier.h> +#include <asm/cacheflush.h> +#include <asm/irqflags.h> +#include <asm/memory.h> +#include <asm/mmu_context.h> +#include <asm/pgalloc.h> +#include <asm/pgtable.h> +#include <asm/pgtable-hwdef.h> +#include <asm/sections.h> +#include <asm/suspend.h> +#include <asm/virt.h> + +/* + * Hibernate core relies on this value being 0 on resume, and marks it + * __nosavedata assuming it will keep the resume kernel's '0' value. This + * doesn't happen with either KASLR. + * + * defined as "__visible int in_suspend __nosavedata" in + * kernel/power/hibernate.c + */ +extern int in_suspend; + +/* Find a symbols alias in the linear map */ +#define LMADDR(x) phys_to_virt(virt_to_phys(x)) + +/* Do we need to reset el2? */ +#define el2_reset_needed() (is_hyp_mode_available() && !is_kernel_in_hyp_mode()) + +/* + * Start/end of the hibernate exit code, this must be copied to a 'safe' + * location in memory, and executed from there. + */ +extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[]; + +/* temporary el2 vectors in the __hibernate_exit_text section. */ +extern char hibernate_el2_vectors[]; + +/* hyp-stub vectors, used to restore el2 during resume from hibernate. */ +extern char __hyp_stub_vectors[]; + +/* + * Values that may not change over hibernate/resume. We put the build number + * and date in here so that we guarantee not to resume with a different + * kernel. + */ +struct arch_hibernate_hdr_invariants { + char uts_version[__NEW_UTS_LEN + 1]; +}; + +/* These values need to be know across a hibernate/restore. */ +static struct arch_hibernate_hdr { + struct arch_hibernate_hdr_invariants invariants; + + /* These are needed to find the relocated kernel if built with kaslr */ + phys_addr_t ttbr1_el1; + void (*reenter_kernel)(void); + + /* + * We need to know where the __hyp_stub_vectors are after restore to + * re-configure el2. + */ + phys_addr_t __hyp_stub_vectors; +} resume_hdr; + +static inline void arch_hdr_invariants(struct arch_hibernate_hdr_invariants *i) +{ + memset(i, 0, sizeof(*i)); + memcpy(i->uts_version, init_utsname()->version, sizeof(i->uts_version)); +} + +int pfn_is_nosave(unsigned long pfn) +{ + unsigned long nosave_begin_pfn = virt_to_pfn(&__nosave_begin); + unsigned long nosave_end_pfn = virt_to_pfn(&__nosave_end - 1); + + return (pfn >= nosave_begin_pfn) && (pfn <= nosave_end_pfn); +} + +void notrace save_processor_state(void) +{ + WARN_ON(num_online_cpus() != 1); +} + +void notrace restore_processor_state(void) +{ +} + +int arch_hibernation_header_save(void *addr, unsigned int max_size) +{ + struct arch_hibernate_hdr *hdr = addr; + + if (max_size < sizeof(*hdr)) + return -EOVERFLOW; + + arch_hdr_invariants(&hdr->invariants); + hdr->ttbr1_el1 = virt_to_phys(swapper_pg_dir); + hdr->reenter_kernel = _cpu_resume; + + /* We can't use __hyp_get_vectors() because kvm may still be loaded */ + if (el2_reset_needed()) + hdr->__hyp_stub_vectors = virt_to_phys(__hyp_stub_vectors); + else + hdr->__hyp_stub_vectors = 0; + + return 0; +} +EXPORT_SYMBOL(arch_hibernation_header_save); + +int arch_hibernation_header_restore(void *addr) +{ + struct arch_hibernate_hdr_invariants invariants; + struct arch_hibernate_hdr *hdr = addr; + + arch_hdr_invariants(&invariants); + if (memcmp(&hdr->invariants, &invariants, sizeof(invariants))) { + pr_crit("Hibernate image not generated by this kernel!\n"); + return -EINVAL; + } + + resume_hdr = *hdr; + + return 0; +} +EXPORT_SYMBOL(arch_hibernation_header_restore); + +/* + * Copies length bytes, starting at src_start into an new page, + * perform cache maintentance, then maps it at the specified address low + * address as executable. + * + * This is used by hibernate to copy the code it needs to execute when + * overwriting the kernel text. This function generates a new set of page + * tables, which it loads into ttbr0. + * + * Length is provided as we probably only want 4K of data, even on a 64K + * page system. + */ +static int create_safe_exec_page(void *src_start, size_t length, + unsigned long dst_addr, + phys_addr_t *phys_dst_addr, + void *(*allocator)(gfp_t mask), + gfp_t mask) +{ + int rc = 0; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + unsigned long dst = (unsigned long)allocator(mask); + + if (!dst) { + rc = -ENOMEM; + goto out; + } + + memcpy((void *)dst, src_start, length); + flush_icache_range(dst, dst + length); + + pgd = pgd_offset_raw(allocator(mask), dst_addr); + if (pgd_none(*pgd)) { + pud = allocator(mask); + if (!pud) { + rc = -ENOMEM; + goto out; + } + pgd_populate(&init_mm, pgd, pud); + } + + pud = pud_offset(pgd, dst_addr); + if (pud_none(*pud)) { + pmd = allocator(mask); + if (!pmd) { + rc = -ENOMEM; + goto out; + } + pud_populate(&init_mm, pud, pmd); + } + + pmd = pmd_offset(pud, dst_addr); + if (pmd_none(*pmd)) { + pte = allocator(mask); + if (!pte) { + rc = -ENOMEM; + goto out; + } + pmd_populate_kernel(&init_mm, pmd, pte); + } + + pte = pte_offset_kernel(pmd, dst_addr); + set_pte(pte, __pte(virt_to_phys((void *)dst) | + pgprot_val(PAGE_KERNEL_EXEC))); + + /* Load our new page tables */ + asm volatile("msr ttbr0_el1, %0;" + "isb;" + "tlbi vmalle1is;" + "dsb ish;" + "isb" : : "r"(virt_to_phys(pgd))); + + *phys_dst_addr = virt_to_phys((void *)dst); + +out: + return rc; +} + + +int swsusp_arch_suspend(void) +{ + int ret = 0; + unsigned long flags; + struct sleep_stack_data state; + + local_dbg_save(flags); + + if (__cpu_suspend_enter(&state)) { + ret = swsusp_save(); + } else { + /* Clean kernel to PoC for secondary core startup */ + __flush_dcache_area(LMADDR(KERNEL_START), KERNEL_END - KERNEL_START); + + /* + * Tell the hibernation core that we've just restored + * the memory + */ + in_suspend = 0; + + __cpu_suspend_exit(); + } + + local_dbg_restore(flags); + + return ret; +} + +static int copy_pte(pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long start, + unsigned long end) +{ + pte_t *src_pte; + pte_t *dst_pte; + unsigned long addr = start; + + dst_pte = (pte_t *)get_safe_page(GFP_ATOMIC); + if (!dst_pte) + return -ENOMEM; + pmd_populate_kernel(&init_mm, dst_pmd, dst_pte); + dst_pte = pte_offset_kernel(dst_pmd, start); + + src_pte = pte_offset_kernel(src_pmd, start); + do { + if (!pte_none(*src_pte)) + /* + * Resume will overwrite areas that may be marked + * read only (code, rodata). Clear the RDONLY bit from + * the temporary mappings we use during restore. + */ + set_pte(dst_pte, __pte(pte_val(*src_pte) & ~PTE_RDONLY)); + } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end); + + return 0; +} + +static int copy_pmd(pud_t *dst_pud, pud_t *src_pud, unsigned long start, + unsigned long end) +{ + pmd_t *src_pmd; + pmd_t *dst_pmd; + unsigned long next; + unsigned long addr = start; + + if (pud_none(*dst_pud)) { + dst_pmd = (pmd_t *)get_safe_page(GFP_ATOMIC); + if (!dst_pmd) + return -ENOMEM; + pud_populate(&init_mm, dst_pud, dst_pmd); + } + dst_pmd = pmd_offset(dst_pud, start); + + src_pmd = pmd_offset(src_pud, start); + do { + next = pmd_addr_end(addr, end); + if (pmd_none(*src_pmd)) + continue; + if (pmd_table(*src_pmd)) { + if (copy_pte(dst_pmd, src_pmd, addr, next)) + return -ENOMEM; + } else { + set_pmd(dst_pmd, + __pmd(pmd_val(*src_pmd) & ~PMD_SECT_RDONLY)); + } + } while (dst_pmd++, src_pmd++, addr = next, addr != end); + + return 0; +} + +static int copy_pud(pgd_t *dst_pgd, pgd_t *src_pgd, unsigned long start, + unsigned long end) +{ + pud_t *dst_pud; + pud_t *src_pud; + unsigned long next; + unsigned long addr = start; + + if (pgd_none(*dst_pgd)) { + dst_pud = (pud_t *)get_safe_page(GFP_ATOMIC); + if (!dst_pud) + return -ENOMEM; + pgd_populate(&init_mm, dst_pgd, dst_pud); + } + dst_pud = pud_offset(dst_pgd, start); + + src_pud = pud_offset(src_pgd, start); + do { + next = pud_addr_end(addr, end); + if (pud_none(*src_pud)) + continue; + if (pud_table(*(src_pud))) { + if (copy_pmd(dst_pud, src_pud, addr, next)) + return -ENOMEM; + } else { + set_pud(dst_pud, + __pud(pud_val(*src_pud) & ~PMD_SECT_RDONLY)); + } + } while (dst_pud++, src_pud++, addr = next, addr != end); + + return 0; +} + +static int copy_page_tables(pgd_t *dst_pgd, unsigned long start, + unsigned long end) +{ + unsigned long next; + unsigned long addr = start; + pgd_t *src_pgd = pgd_offset_k(start); + + dst_pgd = pgd_offset_raw(dst_pgd, start); + do { + next = pgd_addr_end(addr, end); + if (pgd_none(*src_pgd)) + continue; + if (copy_pud(dst_pgd, src_pgd, addr, next)) + return -ENOMEM; + } while (dst_pgd++, src_pgd++, addr = next, addr != end); + + return 0; +} + +/* + * Setup then Resume from the hibernate image using swsusp_arch_suspend_exit(). + * + * Memory allocated by get_safe_page() will be dealt with by the hibernate code, + * we don't need to free it here. + */ +int swsusp_arch_resume(void) +{ + int rc = 0; + void *zero_page; + size_t exit_size; + pgd_t *tmp_pg_dir; + void *lm_restore_pblist; + phys_addr_t phys_hibernate_exit; + void __noreturn (*hibernate_exit)(phys_addr_t, phys_addr_t, void *, + void *, phys_addr_t, phys_addr_t); + + /* + * Locate the exit code in the bottom-but-one page, so that *NULL + * still has disastrous affects. + */ + hibernate_exit = (void *)PAGE_SIZE; + exit_size = __hibernate_exit_text_end - __hibernate_exit_text_start; + /* + * Copy swsusp_arch_suspend_exit() to a safe page. This will generate + * a new set of ttbr0 page tables and load them. + */ + rc = create_safe_exec_page(__hibernate_exit_text_start, exit_size, + (unsigned long)hibernate_exit, + &phys_hibernate_exit, + (void *)get_safe_page, GFP_ATOMIC); + if (rc) { + pr_err("Failed to create safe executable page for hibernate_exit code."); + goto out; + } + + /* + * The hibernate exit text contains a set of el2 vectors, that will + * be executed at el2 with the mmu off in order to reload hyp-stub. + */ + __flush_dcache_area(hibernate_exit, exit_size); + + /* + * Restoring the memory image will overwrite the ttbr1 page tables. + * Create a second copy of just the linear map, and use this when + * restoring. + */ + tmp_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC); + if (!tmp_pg_dir) { + pr_err("Failed to allocate memory for temporary page tables."); + rc = -ENOMEM; + goto out; + } + rc = copy_page_tables(tmp_pg_dir, PAGE_OFFSET, 0); + if (rc) + goto out; + + /* + * Since we only copied the linear map, we need to find restore_pblist's + * linear map address. + */ + lm_restore_pblist = LMADDR(restore_pblist); + + /* + * KASLR will cause the el2 vectors to be in a different location in + * the resumed kernel. Load hibernate's temporary copy into el2. + * + * We can skip this step if we booted at EL1, or are running with VHE. + */ + if (el2_reset_needed()) { + phys_addr_t el2_vectors = phys_hibernate_exit; /* base */ + el2_vectors += hibernate_el2_vectors - + __hibernate_exit_text_start; /* offset */ + + __hyp_set_vectors(el2_vectors); + } + + /* + * We need a zero page that is zero before & after resume in order to + * to break before make on the ttbr1 page tables. + */ + zero_page = (void *)get_safe_page(GFP_ATOMIC); + + hibernate_exit(virt_to_phys(tmp_pg_dir), resume_hdr.ttbr1_el1, + resume_hdr.reenter_kernel, lm_restore_pblist, + resume_hdr.__hyp_stub_vectors, virt_to_phys(zero_page)); + +out: + return rc; +} + +static int check_boot_cpu_online_pm_callback(struct notifier_block *nb, + unsigned long action, void *ptr) +{ + if (action == PM_HIBERNATION_PREPARE && + cpumask_first(cpu_online_mask) != 0) { + pr_warn("CPU0 is offline.\n"); + return notifier_from_errno(-ENODEV); + } + + return NOTIFY_OK; +} + +static int __init check_boot_cpu_online_init(void) +{ + /* + * Set this pm_notifier callback with a lower priority than + * cpu_hotplug_pm_callback, so that cpu_hotplug_pm_callback will be + * called earlier to disable cpu hotplug before the cpu online check. + */ + pm_notifier(check_boot_cpu_online_pm_callback, -INT_MAX); + + return 0; +} +core_initcall(check_boot_cpu_online_init); diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index a272f335c289..8727f4490772 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -22,6 +22,8 @@ #include <linux/irqchip/arm-gic-v3.h> #include <asm/assembler.h> +#include <asm/kvm_arm.h> +#include <asm/kvm_asm.h> #include <asm/ptrace.h> #include <asm/virt.h> @@ -53,15 +55,26 @@ ENDPROC(__hyp_stub_vectors) .align 11 el1_sync: - mrs x1, esr_el2 - lsr x1, x1, #26 - cmp x1, #0x16 - b.ne 2f // Not an HVC trap - cbz x0, 1f - msr vbar_el2, x0 // Set vbar_el2 - b 2f -1: mrs x0, vbar_el2 // Return vbar_el2 -2: eret + mrs x30, esr_el2 + lsr x30, x30, #ESR_ELx_EC_SHIFT + + cmp x30, #ESR_ELx_EC_HVC64 + b.ne 9f // Not an HVC trap + + cmp x0, #HVC_GET_VECTORS + b.ne 1f + mrs x0, vbar_el2 + b 9f + +1: cmp x0, #HVC_SET_VECTORS + b.ne 2f + msr vbar_el2, x1 + b 9f + + /* Someone called kvm_call_hyp() against the hyp-stub... */ +2: mov x0, #ARM_EXCEPTION_HYP_GONE + +9: eret ENDPROC(el1_sync) .macro invalid_vector label @@ -101,10 +114,18 @@ ENDPROC(\label) */ ENTRY(__hyp_get_vectors) - mov x0, xzr - // fall through -ENTRY(__hyp_set_vectors) + str lr, [sp, #-16]! + mov x0, #HVC_GET_VECTORS hvc #0 + ldr lr, [sp], #16 ret ENDPROC(__hyp_get_vectors) + +ENTRY(__hyp_set_vectors) + str lr, [sp, #-16]! + mov x1, x0 + mov x0, #HVC_SET_VECTORS + hvc #0 + ldr lr, [sp], #16 + ret ENDPROC(__hyp_set_vectors) diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h index 8fae0756e175..6eb8fee93321 100644 --- a/arch/arm64/kernel/image.h +++ b/arch/arm64/kernel/image.h @@ -59,4 +59,37 @@ _kernel_offset_le = DATA_LE64(TEXT_OFFSET); \ _kernel_flags_le = DATA_LE64(__HEAD_FLAGS); +#ifdef CONFIG_EFI + +/* + * The EFI stub has its own symbol namespace prefixed by __efistub_, to + * isolate it from the kernel proper. The following symbols are legally + * accessed by the stub, so provide some aliases to make them accessible. + * Only include data symbols here, or text symbols of functions that are + * guaranteed to be safe when executed at another offset than they were + * linked at. The routines below are all implemented in assembler in a + * position independent manner + */ +__efistub_memcmp = __pi_memcmp; +__efistub_memchr = __pi_memchr; +__efistub_memcpy = __pi_memcpy; +__efistub_memmove = __pi_memmove; +__efistub_memset = __pi_memset; +__efistub_strlen = __pi_strlen; +__efistub_strcmp = __pi_strcmp; +__efistub_strncmp = __pi_strncmp; +__efistub___flush_dcache_area = __pi___flush_dcache_area; + +#ifdef CONFIG_KASAN +__efistub___memcpy = __pi_memcpy; +__efistub___memmove = __pi_memmove; +__efistub___memset = __pi_memset; +#endif + +__efistub__text = _text; +__efistub__end = _end; +__efistub__edata = _edata; + +#endif + #endif /* __ASM_IMAGE_H */ diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index 240b75c0e94f..de99d4bd31bb 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -33,9 +33,7 @@ unsigned long irq_err_count; int arch_show_interrupts(struct seq_file *p, int prec) { -#ifdef CONFIG_SMP show_ipi_list(p, prec); -#endif seq_printf(p, "%*s: %10lu\n", prec, "Err", irq_err_count); return 0; } diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c index 4f1fec7a46db..c2dd1ad3e648 100644 --- a/arch/arm64/kernel/jump_label.c +++ b/arch/arm64/kernel/jump_label.c @@ -28,7 +28,7 @@ void arch_jump_label_transform(struct jump_entry *entry, void *addr = (void *)entry->code; u32 insn; - if (type == JUMP_LABEL_ENABLE) { + if (type == JUMP_LABEL_JMP) { insn = aarch64_insn_gen_branch_imm(entry->code, entry->target, AARCH64_INSN_BRANCH_NOLINK); diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 876eb8df50bf..f4bc779e62e8 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -21,6 +21,7 @@ #include <linux/bitops.h> #include <linux/elf.h> #include <linux/gfp.h> +#include <linux/kasan.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/moduleloader.h> @@ -34,9 +35,18 @@ void *module_alloc(unsigned long size) { - return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, - GFP_KERNEL, PAGE_KERNEL_EXEC, 0, - NUMA_NO_NODE, __builtin_return_address(0)); + void *p; + + p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR, MODULES_END, + GFP_KERNEL, PAGE_KERNEL_EXEC, 0, + NUMA_NO_NODE, __builtin_return_address(0)); + + if (p && (kasan_module_alloc(p, size) < 0)) { + vfree(p); + return NULL; + } + + return p; } enum aarch64_reloc_op { diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index c6b1f3b96f45..c506bee6b613 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -58,14 +58,6 @@ unsigned long __stack_chk_guard __read_mostly; EXPORT_SYMBOL(__stack_chk_guard); #endif -void soft_restart(unsigned long addr) -{ - setup_mm_for_reboot(); - cpu_soft_restart(virt_to_phys(cpu_reset), addr); - /* Should never get here */ - BUG(); -} - /* * Function pointers to optional machine specific functions */ @@ -136,9 +128,7 @@ void machine_power_off(void) /* * Restart requires that the secondary CPUs stop performing any activity - * while the primary CPU resets the system. Systems with a single CPU can - * use soft_restart() as their machine descriptor's .restart hook, since that - * will cause the only available CPU to reset. Systems with multiple CPUs must + * while the primary CPU resets the system. Systems with multiple CPUs must * provide a HW restart implementation, to ensure that all CPUs reset at once. * This is required so that any code running after reset on the primary CPU * doesn't have to co-ordinate with other CPUs to ensure they aren't still diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index 24d4733b7e3c..42816bebb1e0 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -15,436 +15,20 @@ #define pr_fmt(fmt) "psci: " fmt -#include <linux/acpi.h> #include <linux/init.h> #include <linux/of.h> #include <linux/smp.h> -#include <linux/reboot.h> -#include <linux/pm.h> #include <linux/delay.h> -#include <linux/slab.h> +#include <linux/psci.h> + #include <uapi/linux/psci.h> -#include <asm/acpi.h> #include <asm/compiler.h> #include <asm/cpu_ops.h> #include <asm/errno.h> -#include <asm/psci.h> #include <asm/smp_plat.h> -#include <asm/suspend.h> -#include <asm/system_misc.h> - -#define PSCI_POWER_STATE_TYPE_STANDBY 0 -#define PSCI_POWER_STATE_TYPE_POWER_DOWN 1 - -struct psci_power_state { - u16 id; - u8 type; - u8 affinity_level; -}; - -struct psci_operations { - int (*cpu_suspend)(struct psci_power_state state, - unsigned long entry_point); - int (*cpu_off)(struct psci_power_state state); - int (*cpu_on)(unsigned long cpuid, unsigned long entry_point); - int (*migrate)(unsigned long cpuid); - int (*affinity_info)(unsigned long target_affinity, - unsigned long lowest_affinity_level); - int (*migrate_info_type)(void); -}; - -static struct psci_operations psci_ops; - -static int (*invoke_psci_fn)(u64, u64, u64, u64); -typedef int (*psci_initcall_t)(const struct device_node *); - -asmlinkage int __invoke_psci_fn_hvc(u64, u64, u64, u64); -asmlinkage int __invoke_psci_fn_smc(u64, u64, u64, u64); - -enum psci_function { - PSCI_FN_CPU_SUSPEND, - PSCI_FN_CPU_ON, - PSCI_FN_CPU_OFF, - PSCI_FN_MIGRATE, - PSCI_FN_AFFINITY_INFO, - PSCI_FN_MIGRATE_INFO_TYPE, - PSCI_FN_MAX, -}; - -static DEFINE_PER_CPU_READ_MOSTLY(struct psci_power_state *, psci_power_state); - -static u32 psci_function_id[PSCI_FN_MAX]; - -static int psci_to_linux_errno(int errno) -{ - switch (errno) { - case PSCI_RET_SUCCESS: - return 0; - case PSCI_RET_NOT_SUPPORTED: - return -EOPNOTSUPP; - case PSCI_RET_INVALID_PARAMS: - return -EINVAL; - case PSCI_RET_DENIED: - return -EPERM; - }; - - return -EINVAL; -} - -static u32 psci_power_state_pack(struct psci_power_state state) -{ - return ((state.id << PSCI_0_2_POWER_STATE_ID_SHIFT) - & PSCI_0_2_POWER_STATE_ID_MASK) | - ((state.type << PSCI_0_2_POWER_STATE_TYPE_SHIFT) - & PSCI_0_2_POWER_STATE_TYPE_MASK) | - ((state.affinity_level << PSCI_0_2_POWER_STATE_AFFL_SHIFT) - & PSCI_0_2_POWER_STATE_AFFL_MASK); -} - -static void psci_power_state_unpack(u32 power_state, - struct psci_power_state *state) -{ - state->id = (power_state & PSCI_0_2_POWER_STATE_ID_MASK) >> - PSCI_0_2_POWER_STATE_ID_SHIFT; - state->type = (power_state & PSCI_0_2_POWER_STATE_TYPE_MASK) >> - PSCI_0_2_POWER_STATE_TYPE_SHIFT; - state->affinity_level = - (power_state & PSCI_0_2_POWER_STATE_AFFL_MASK) >> - PSCI_0_2_POWER_STATE_AFFL_SHIFT; -} - -static int psci_get_version(void) -{ - int err; - - err = invoke_psci_fn(PSCI_0_2_FN_PSCI_VERSION, 0, 0, 0); - return err; -} - -static int psci_cpu_suspend(struct psci_power_state state, - unsigned long entry_point) -{ - int err; - u32 fn, power_state; - - fn = psci_function_id[PSCI_FN_CPU_SUSPEND]; - power_state = psci_power_state_pack(state); - err = invoke_psci_fn(fn, power_state, entry_point, 0); - return psci_to_linux_errno(err); -} - -static int psci_cpu_off(struct psci_power_state state) -{ - int err; - u32 fn, power_state; - - fn = psci_function_id[PSCI_FN_CPU_OFF]; - power_state = psci_power_state_pack(state); - err = invoke_psci_fn(fn, power_state, 0, 0); - return psci_to_linux_errno(err); -} - -static int psci_cpu_on(unsigned long cpuid, unsigned long entry_point) -{ - int err; - u32 fn; - - fn = psci_function_id[PSCI_FN_CPU_ON]; - err = invoke_psci_fn(fn, cpuid, entry_point, 0); - return psci_to_linux_errno(err); -} - -static int psci_migrate(unsigned long cpuid) -{ - int err; - u32 fn; - - fn = psci_function_id[PSCI_FN_MIGRATE]; - err = invoke_psci_fn(fn, cpuid, 0, 0); - return psci_to_linux_errno(err); -} - -static int psci_affinity_info(unsigned long target_affinity, - unsigned long lowest_affinity_level) -{ - int err; - u32 fn; - - fn = psci_function_id[PSCI_FN_AFFINITY_INFO]; - err = invoke_psci_fn(fn, target_affinity, lowest_affinity_level, 0); - return err; -} - -static int psci_migrate_info_type(void) -{ - int err; - u32 fn; - - fn = psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE]; - err = invoke_psci_fn(fn, 0, 0, 0); - return err; -} - -static int __maybe_unused cpu_psci_cpu_init_idle(struct device_node *cpu_node, - unsigned int cpu) -{ - int i, ret, count = 0; - struct psci_power_state *psci_states; - struct device_node *state_node; - - /* - * If the PSCI cpu_suspend function hook has not been initialized - * idle states must not be enabled, so bail out - */ - if (!psci_ops.cpu_suspend) - return -EOPNOTSUPP; - - /* Count idle states */ - while ((state_node = of_parse_phandle(cpu_node, "cpu-idle-states", - count))) { - count++; - of_node_put(state_node); - } - - if (!count) - return -ENODEV; - - psci_states = kcalloc(count, sizeof(*psci_states), GFP_KERNEL); - if (!psci_states) - return -ENOMEM; - - for (i = 0; i < count; i++) { - u32 psci_power_state; - - state_node = of_parse_phandle(cpu_node, "cpu-idle-states", i); - - ret = of_property_read_u32(state_node, - "arm,psci-suspend-param", - &psci_power_state); - if (ret) { - pr_warn(" * %s missing arm,psci-suspend-param property\n", - state_node->full_name); - of_node_put(state_node); - goto free_mem; - } - - of_node_put(state_node); - pr_debug("psci-power-state %#x index %d\n", psci_power_state, - i); - psci_power_state_unpack(psci_power_state, &psci_states[i]); - } - /* Idle states parsed correctly, initialize per-cpu pointer */ - per_cpu(psci_power_state, cpu) = psci_states; - return 0; - -free_mem: - kfree(psci_states); - return ret; -} - -static int get_set_conduit_method(struct device_node *np) -{ - const char *method; - - pr_info("probing for conduit method from DT.\n"); - - if (of_property_read_string(np, "method", &method)) { - pr_warn("missing \"method\" property\n"); - return -ENXIO; - } - - if (!strcmp("hvc", method)) { - invoke_psci_fn = __invoke_psci_fn_hvc; - } else if (!strcmp("smc", method)) { - invoke_psci_fn = __invoke_psci_fn_smc; - } else { - pr_warn("invalid \"method\" property: %s\n", method); - return -EINVAL; - } - return 0; -} - -static void psci_sys_reset(enum reboot_mode reboot_mode, const char *cmd) -{ - invoke_psci_fn(PSCI_0_2_FN_SYSTEM_RESET, 0, 0, 0); -} - -static void psci_sys_poweroff(void) -{ - invoke_psci_fn(PSCI_0_2_FN_SYSTEM_OFF, 0, 0, 0); -} - -static void __init psci_0_2_set_functions(void) -{ - pr_info("Using standard PSCI v0.2 function IDs\n"); - psci_function_id[PSCI_FN_CPU_SUSPEND] = PSCI_0_2_FN64_CPU_SUSPEND; - psci_ops.cpu_suspend = psci_cpu_suspend; - - psci_function_id[PSCI_FN_CPU_OFF] = PSCI_0_2_FN_CPU_OFF; - psci_ops.cpu_off = psci_cpu_off; - - psci_function_id[PSCI_FN_CPU_ON] = PSCI_0_2_FN64_CPU_ON; - psci_ops.cpu_on = psci_cpu_on; - - psci_function_id[PSCI_FN_MIGRATE] = PSCI_0_2_FN64_MIGRATE; - psci_ops.migrate = psci_migrate; - - psci_function_id[PSCI_FN_AFFINITY_INFO] = PSCI_0_2_FN64_AFFINITY_INFO; - psci_ops.affinity_info = psci_affinity_info; - - psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE] = - PSCI_0_2_FN_MIGRATE_INFO_TYPE; - psci_ops.migrate_info_type = psci_migrate_info_type; - arm_pm_restart = psci_sys_reset; - - pm_power_off = psci_sys_poweroff; -} - -/* - * Probe function for PSCI firmware versions >= 0.2 - */ -static int __init psci_probe(void) -{ - int ver = psci_get_version(); - - if (ver == PSCI_RET_NOT_SUPPORTED) { - /* - * PSCI versions >=0.2 mandates implementation of - * PSCI_VERSION. - */ - pr_err("PSCI firmware does not comply with the v0.2 spec.\n"); - return -EOPNOTSUPP; - } else { - pr_info("PSCIv%d.%d detected in firmware.\n", - PSCI_VERSION_MAJOR(ver), - PSCI_VERSION_MINOR(ver)); - - if (PSCI_VERSION_MAJOR(ver) == 0 && - PSCI_VERSION_MINOR(ver) < 2) { - pr_err("Conflicting PSCI version detected.\n"); - return -EINVAL; - } - } - - psci_0_2_set_functions(); - - return 0; -} - -/* - * PSCI init function for PSCI versions >=0.2 - * - * Probe based on PSCI PSCI_VERSION function - */ -static int __init psci_0_2_init(struct device_node *np) -{ - int err; - - err = get_set_conduit_method(np); - - if (err) - goto out_put_node; - /* - * Starting with v0.2, the PSCI specification introduced a call - * (PSCI_VERSION) that allows probing the firmware version, so - * that PSCI function IDs and version specific initialization - * can be carried out according to the specific version reported - * by firmware - */ - err = psci_probe(); - -out_put_node: - of_node_put(np); - return err; -} - -/* - * PSCI < v0.2 get PSCI Function IDs via DT. - */ -static int __init psci_0_1_init(struct device_node *np) -{ - u32 id; - int err; - - err = get_set_conduit_method(np); - - if (err) - goto out_put_node; - - pr_info("Using PSCI v0.1 Function IDs from DT\n"); - - if (!of_property_read_u32(np, "cpu_suspend", &id)) { - psci_function_id[PSCI_FN_CPU_SUSPEND] = id; - psci_ops.cpu_suspend = psci_cpu_suspend; - } - - if (!of_property_read_u32(np, "cpu_off", &id)) { - psci_function_id[PSCI_FN_CPU_OFF] = id; - psci_ops.cpu_off = psci_cpu_off; - } - - if (!of_property_read_u32(np, "cpu_on", &id)) { - psci_function_id[PSCI_FN_CPU_ON] = id; - psci_ops.cpu_on = psci_cpu_on; - } - - if (!of_property_read_u32(np, "migrate", &id)) { - psci_function_id[PSCI_FN_MIGRATE] = id; - psci_ops.migrate = psci_migrate; - } - -out_put_node: - of_node_put(np); - return err; -} - -static const struct of_device_id psci_of_match[] __initconst = { - { .compatible = "arm,psci", .data = psci_0_1_init}, - { .compatible = "arm,psci-0.2", .data = psci_0_2_init}, - {}, -}; - -int __init psci_dt_init(void) -{ - struct device_node *np; - const struct of_device_id *matched_np; - psci_initcall_t init_fn; - - np = of_find_matching_node_and_match(NULL, psci_of_match, &matched_np); - - if (!np) - return -ENODEV; - - init_fn = (psci_initcall_t)matched_np->data; - return init_fn(np); -} - -/* - * We use PSCI 0.2+ when ACPI is deployed on ARM64 and it's - * explicitly clarified in SBBR - */ -int __init psci_acpi_init(void) -{ - if (!acpi_psci_present()) { - pr_info("is not implemented in ACPI.\n"); - return -EOPNOTSUPP; - } - - pr_info("probing for conduit method from ACPI.\n"); - - if (acpi_psci_use_hvc()) - invoke_psci_fn = __invoke_psci_fn_hvc; - else - invoke_psci_fn = __invoke_psci_fn_smc; - - return psci_probe(); -} - -#ifdef CONFIG_SMP - -static int __init cpu_psci_cpu_init(struct device_node *dn, unsigned int cpu) +static int __init cpu_psci_cpu_init(unsigned int cpu) { return 0; } @@ -474,6 +58,11 @@ static int cpu_psci_cpu_disable(unsigned int cpu) /* Fail early if we don't have CPU_OFF support */ if (!psci_ops.cpu_off) return -EOPNOTSUPP; + + /* Trusted OS will deny CPU_OFF */ + if (psci_tos_resident_on(cpu)) + return -EPERM; + return 0; } @@ -484,9 +73,8 @@ static void cpu_psci_cpu_die(unsigned int cpu) * There are no known implementations of PSCI actually using the * power state field, pass a sensible default for now. */ - struct psci_power_state state = { - .type = PSCI_POWER_STATE_TYPE_POWER_DOWN, - }; + u32 state = PSCI_POWER_STATE_TYPE_POWER_DOWN << + PSCI_0_2_POWER_STATE_TYPE_SHIFT; ret = psci_ops.cpu_off(state); @@ -498,7 +86,7 @@ static int cpu_psci_cpu_kill(unsigned int cpu) int err, i; if (!psci_ops.affinity_info) - return 1; + return 0; /* * cpu_kill could race with cpu_die and we can * potentially end up declaring this cpu undead @@ -509,7 +97,7 @@ static int cpu_psci_cpu_kill(unsigned int cpu) err = psci_ops.affinity_info(cpu_logical_map(cpu), 0); if (err == PSCI_0_2_AFFINITY_LEVEL_OFF) { pr_info("CPU%d killed.\n", cpu); - return 1; + return 0; } msleep(10); @@ -518,46 +106,16 @@ static int cpu_psci_cpu_kill(unsigned int cpu) pr_warn("CPU%d may not have shut down cleanly (AFFINITY_INFO reports %d)\n", cpu, err); - /* Make op_cpu_kill() fail. */ - return 0; + return -ETIMEDOUT; } #endif -#endif - -static int psci_suspend_finisher(unsigned long index) -{ - struct psci_power_state *state = __this_cpu_read(psci_power_state); - - return psci_ops.cpu_suspend(state[index - 1], - virt_to_phys(cpu_resume)); -} - -static int __maybe_unused cpu_psci_cpu_suspend(unsigned long index) -{ - int ret; - struct psci_power_state *state = __this_cpu_read(psci_power_state); - /* - * idle state index 0 corresponds to wfi, should never be called - * from the cpu_suspend operations - */ - if (WARN_ON_ONCE(!index)) - return -EINVAL; - - if (state[index - 1].type == PSCI_POWER_STATE_TYPE_STANDBY) - ret = psci_ops.cpu_suspend(state[index - 1], 0); - else - ret = cpu_suspend(index, psci_suspend_finisher); - - return ret; -} const struct cpu_operations cpu_psci_ops = { .name = "psci", #ifdef CONFIG_CPU_IDLE - .cpu_init_idle = cpu_psci_cpu_init_idle, - .cpu_suspend = cpu_psci_cpu_suspend, + .cpu_init_idle = psci_cpu_init_idle, + .cpu_suspend = psci_cpu_suspend_enter, #endif -#ifdef CONFIG_SMP .cpu_init = cpu_psci_cpu_init, .cpu_prepare = cpu_psci_cpu_prepare, .cpu_boot = cpu_psci_cpu_boot, @@ -566,6 +124,5 @@ const struct cpu_operations cpu_psci_ops = { .cpu_die = cpu_psci_cpu_die, .cpu_kill = cpu_psci_cpu_kill, #endif -#endif }; diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index bbdb53b87e13..6dfbc51edb07 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -46,6 +46,7 @@ #include <linux/of_platform.h> #include <linux/efi.h> #include <linux/personality.h> +#include <linux/psci.h> #include <asm/acpi.h> #include <asm/fixmap.h> @@ -54,6 +55,7 @@ #include <asm/elf.h> #include <asm/cpufeature.h> #include <asm/cpu_ops.h> +#include <asm/kasan.h> #include <asm/sections.h> #include <asm/setup.h> #include <asm/smp_plat.h> @@ -61,7 +63,6 @@ #include <asm/tlbflush.h> #include <asm/traps.h> #include <asm/memblock.h> -#include <asm/psci.h> #include <asm/efi.h> #include <asm/virt.h> @@ -81,6 +82,8 @@ unsigned int compat_elf_hwcap2 __read_mostly; #endif DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); +#include <asm/xen/hypervisor.h> +#include <asm/mmu_context.h> phys_addr_t __fdt_pointer __initdata; @@ -105,18 +108,6 @@ static struct resource mem_res[] = { #define kernel_code mem_res[0] #define kernel_data mem_res[1] -void __init early_print(const char *str, ...) -{ - char buf[256]; - va_list ap; - - va_start(ap, str); - vsnprintf(buf, sizeof(buf), str, ap); - va_end(ap); - - printk("%s", buf); -} - /* * The recorded values of x0 .. x3 upon kernel entry. */ @@ -142,7 +133,6 @@ bool arch_match_cpu_phys_id(int cpu, u64 phys_id) } struct mpidr_hash mpidr_hash; -#ifdef CONFIG_SMP /** * smp_build_mpidr_hash - Pre-compute shifts required at each affinity * level in order to build a linear index from an @@ -206,9 +196,7 @@ static void __init smp_build_mpidr_hash(void) */ if (mpidr_hash_size() > 4 * num_possible_cpus()) pr_warn("Large number of MPIDR hash buckets detected\n"); - __flush_dcache_area(&mpidr_hash, sizeof(struct mpidr_hash)); } -#endif static void __init hyp_mode_check(void) { @@ -240,8 +228,7 @@ static void __init setup_processor(void) u32 cwg; int cls; - printk("CPU: AArch64 Processor [%08x] revision %d\n", - read_cpuid_id(), read_cpuid_id() & 15); + pr_info("Boot CPU: AArch64 Processor [%08x]\n", read_cpuid_id()); sprintf(init_utsname()->machine, ELF_PLATFORM); elf_hwcap = 0; @@ -326,12 +313,14 @@ static void __init setup_processor(void) static void __init setup_machine_fdt(phys_addr_t dt_phys) { - if (!dt_phys || !early_init_dt_scan(phys_to_virt(dt_phys))) { - early_print("\n" - "Error: invalid device tree blob at physical address 0x%p (virtual address 0x%p)\n" - "The dtb must be 8-byte aligned and passed in the first 512MB of memory\n" - "\nPlease check your bootloader.\n", - dt_phys, phys_to_virt(dt_phys)); + void *dt_virt = fixmap_remap_fdt(dt_phys); + + if (!dt_virt || !early_init_dt_scan(dt_virt)) { + pr_crit("\n" + "Error: invalid device tree blob at physical address %pa (virtual address 0x%p)\n" + "The dtb must be 8-byte aligned and must not exceed 2 MB in size\n" + "\nPlease check your bootloader.", + &dt_phys, dt_virt); while (true) cpu_relax(); @@ -368,14 +357,75 @@ static void __init request_standard_resources(void) } } +#ifdef CONFIG_BLK_DEV_INITRD +/* + * Relocate initrd if it is not completely within the linear mapping. + * This would be the case if mem= cuts out all or part of it. + */ +static void __init relocate_initrd(void) +{ + phys_addr_t orig_start = __virt_to_phys(initrd_start); + phys_addr_t orig_end = __virt_to_phys(initrd_end); + phys_addr_t ram_end = memblock_end_of_DRAM(); + phys_addr_t new_start; + unsigned long size, to_free = 0; + void *dest; + + if (orig_end <= ram_end) + return; + + /* + * Any of the original initrd which overlaps the linear map should + * be freed after relocating. + */ + if (orig_start < ram_end) + to_free = ram_end - orig_start; + + size = orig_end - orig_start; + if (!size) + return; + + /* initrd needs to be relocated completely inside linear mapping */ + new_start = memblock_find_in_range(0, PFN_PHYS(max_pfn), + size, PAGE_SIZE); + if (!new_start) + panic("Cannot relocate initrd of size %ld\n", size); + memblock_reserve(new_start, size); + + initrd_start = __phys_to_virt(new_start); + initrd_end = initrd_start + size; + + pr_info("Moving initrd from [%llx-%llx] to [%llx-%llx]\n", + orig_start, orig_start + size - 1, + new_start, new_start + size - 1); + + dest = (void *)initrd_start; + + if (to_free) { + memcpy(dest, (void *)__phys_to_virt(orig_start), to_free); + dest += to_free; + } + + copy_from_early_mem(dest, orig_start + to_free, size - to_free); + + if (to_free) { + pr_info("Freeing original RAMDISK from [%llx-%llx]\n", + orig_start, orig_start + to_free - 1); + memblock_free(orig_start, to_free); + } +} +#else +static inline void __init relocate_initrd(void) +{ +} +#endif + u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID }; void __init setup_arch(char **cmdline_p) { setup_processor(); - setup_machine_fdt(__fdt_pointer); - init_mm.start_code = (unsigned long) _text; init_mm.end_code = (unsigned long) _etext; init_mm.end_data = (unsigned long) _edata; @@ -386,6 +436,8 @@ void __init setup_arch(char **cmdline_p) early_fixmap_init(); early_ioremap_init(); + setup_machine_fdt(__fdt_pointer); + parse_early_param(); /* @@ -394,6 +446,12 @@ void __init setup_arch(char **cmdline_p) */ local_async_enable(); + /* + * TTBR0 is only used for the identity mapping at this stage. Make it + * point to zero page to avoid speculatively fetching new entries. + */ + cpu_uninstall_idmap(); + efi_init(); arm64_memblock_init(); @@ -401,6 +459,10 @@ void __init setup_arch(char **cmdline_p) acpi_boot_table_init(); paging_init(); + relocate_initrd(); + + kasan_init(); + request_standard_resources(); early_ioremap_reset(); @@ -408,18 +470,13 @@ void __init setup_arch(char **cmdline_p) if (acpi_disabled) { unflatten_device_tree(); psci_dt_init(); - cpu_read_bootcpu_ops(); -#ifdef CONFIG_SMP - of_smp_init_cpus(); -#endif } else { psci_acpi_init(); - acpi_init_cpus(); } -#ifdef CONFIG_SMP + cpu_read_bootcpu_ops(); + smp_init_cpus(); smp_build_mpidr_hash(); -#endif #ifdef CONFIG_VT #if defined(CONFIG_VGA_CONSOLE) @@ -519,9 +576,7 @@ static int c_show(struct seq_file *m, void *v) * online processors, looking for lines beginning with * "processor". Give glibc what it expects. */ -#ifdef CONFIG_SMP seq_printf(m, "processor\t: %d\n", i); -#endif seq_printf(m, "BogoMIPS\t: %lu.%02lu\n", loops_per_jiffy / (500000UL/HZ), diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index ede186cdd452..5a7f5cefa85e 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -49,40 +49,32 @@ orr \dst, \dst, \mask // dst|=(aff3>>rs3) .endm /* - * Save CPU state for a suspend and execute the suspend finisher. - * On success it will return 0 through cpu_resume - ie through a CPU - * soft/hard reboot from the reset vector. - * On failure it returns the suspend finisher return value or force - * -EOPNOTSUPP if the finisher erroneously returns 0 (the suspend finisher - * is not allowed to return, if it does this must be considered failure). - * It saves callee registers, and allocates space on the kernel stack - * to save the CPU specific registers + some other data for resume. + * Save CPU state in the provided sleep_stack_data area, and publish its + * location for cpu_resume()'s use in sleep_save_stash. * - * x0 = suspend finisher argument - * x1 = suspend finisher function pointer + * cpu_resume() will restore this saved state, and return. Because the + * link-register is saved and restored, it will appear to return from this + * function. So that the caller can tell the suspend/resume paths apart, + * __cpu_suspend_enter() will always return a non-zero value, whereas the + * path through cpu_resume() will return 0. + * + * x0 = struct sleep_stack_data area */ ENTRY(__cpu_suspend_enter) - stp x29, lr, [sp, #-96]! - stp x19, x20, [sp,#16] - stp x21, x22, [sp,#32] - stp x23, x24, [sp,#48] - stp x25, x26, [sp,#64] - stp x27, x28, [sp,#80] - /* - * Stash suspend finisher and its argument in x20 and x19 - */ - mov x19, x0 - mov x20, x1 + stp x29, lr, [x0, #SLEEP_STACK_DATA_CALLEE_REGS] + stp x19, x20, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+16] + stp x21, x22, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+32] + stp x23, x24, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+48] + stp x25, x26, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+64] + stp x27, x28, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+80] + + /* save the sp in cpu_suspend_ctx */ mov x2, sp - sub sp, sp, #CPU_SUSPEND_SZ // allocate cpu_suspend_ctx - mov x0, sp - /* - * x0 now points to struct cpu_suspend_ctx allocated on the stack - */ - str x2, [x0, #CPU_CTX_SP] - ldr x1, =sleep_save_sp - ldr x1, [x1, #SLEEP_SAVE_SP_VIRT] -#ifdef CONFIG_SMP + str x2, [x0, #SLEEP_STACK_DATA_SYSTEM_REGS + CPU_CTX_SP] + + /* find the mpidr_hash */ + ldr x1, =sleep_save_stash + ldr x1, [x1] mrs x7, mpidr_el1 ldr x9, =mpidr_hash ldr x10, [x9, #MPIDR_HASH_MASK] @@ -94,35 +86,13 @@ ENTRY(__cpu_suspend_enter) ldp w5, w6, [x9, #(MPIDR_HASH_SHIFTS + 8)] compute_mpidr_hash x8, x3, x4, x5, x6, x7, x10 add x1, x1, x8, lsl #3 -#endif - bl __cpu_suspend_save - /* - * Grab suspend finisher in x20 and its argument in x19 - */ - mov x0, x19 - mov x1, x20 - /* - * We are ready for power down, fire off the suspend finisher - * in x1, with argument in x0 - */ - blr x1 - /* - * Never gets here, unless suspend finisher fails. - * Successful cpu_suspend should return from cpu_resume, returning - * through this code path is considered an error - * If the return value is set to 0 force x0 = -EOPNOTSUPP - * to make sure a proper error condition is propagated - */ - cmp x0, #0 - mov x3, #-EOPNOTSUPP - csel x0, x3, x0, eq - add sp, sp, #CPU_SUSPEND_SZ // rewind stack pointer - ldp x19, x20, [sp, #16] - ldp x21, x22, [sp, #32] - ldp x23, x24, [sp, #48] - ldp x25, x26, [sp, #64] - ldp x27, x28, [sp, #80] - ldp x29, lr, [sp], #96 + + str x0, [x1] + add x0, x0, #SLEEP_STACK_DATA_SYSTEM_REGS + stp x29, lr, [sp, #-16]! + bl cpu_do_suspend + ldp x29, lr, [sp], #16 + mov x0, #1 ret ENDPROC(__cpu_suspend_enter) .ltorg @@ -137,6 +107,10 @@ ENTRY(cpu_resume_mmu) br x3 // global jump to virtual address ENDPROC(cpu_resume_mmu) cpu_resume_after_mmu: +#ifdef CONFIG_KASAN + mov x0, sp + bl kasan_unpoison_remaining_stack +#endif mov x0, #0 // return zero on success ldp x19, x20, [sp, #16] ldp x21, x22, [sp, #32] @@ -149,7 +123,15 @@ ENDPROC(cpu_resume_after_mmu) ENTRY(cpu_resume) bl el2_setup // if in EL2 drop to EL1 cleanly -#ifdef CONFIG_SMP + /* enable the MMU early - so we can access sleep_save_stash by va */ + adr_l lr, __enable_mmu /* __cpu_setup will return here */ + ldr x27, =_cpu_resume /* __enable_mmu will branch here */ + adrp x25, idmap_pg_dir + adrp x26, swapper_pg_dir + b __cpu_setup +ENDPROC(cpu_resume) + +ENTRY(_cpu_resume) mrs x1, mpidr_el1 adrp x8, mpidr_hash add x8, x8, #:lo12:mpidr_hash // x8 = struct mpidr_hash phys address @@ -159,23 +141,32 @@ ENTRY(cpu_resume) ldp w5, w6, [x8, #(MPIDR_HASH_SHIFTS + 8)] compute_mpidr_hash x7, x3, x4, x5, x6, x1, x2 /* x7 contains hash index, let's use it to grab context pointer */ -#else - mov x7, xzr -#endif - adrp x0, sleep_save_sp - add x0, x0, #:lo12:sleep_save_sp - ldr x0, [x0, #SLEEP_SAVE_SP_PHYS] + ldr_l x0, sleep_save_stash ldr x0, [x0, x7, lsl #3] + add x29, x0, #SLEEP_STACK_DATA_CALLEE_REGS + add x0, x0, #SLEEP_STACK_DATA_SYSTEM_REGS /* load sp from context */ ldr x2, [x0, #CPU_CTX_SP] - adrp x1, sleep_idmap_phys - /* load physical address of identity map page table in x1 */ - ldr x1, [x1, #:lo12:sleep_idmap_phys] mov sp, x2 + /* save thread_info */ + and x2, x2, #~(THREAD_SIZE - 1) + msr sp_el0, x2 /* - * cpu_do_resume expects x0 to contain context physical address - * pointer and x1 to contain physical address of 1:1 page tables + * cpu_do_resume expects x0 to contain context address pointer */ - bl cpu_do_resume // PC relative jump, MMU off - b cpu_resume_mmu // Resume MMU, never returns -ENDPROC(cpu_resume) + bl cpu_do_resume + +#ifdef CONFIG_KASAN + mov x0, sp + bl kasan_unpoison_remaining_stack +#endif + + ldp x19, x20, [x29, #16] + ldp x21, x22, [x29, #32] + ldp x23, x24, [x29, #48] + ldp x25, x26, [x29, #64] + ldp x27, x28, [x29, #80] + ldp x29, lr, [x29] + mov x0, #0 + ret +ENDPROC(_cpu_resume) diff --git a/arch/arm64/kernel/smccc-call.S b/arch/arm64/kernel/smccc-call.S new file mode 100644 index 000000000000..ae0496fa4235 --- /dev/null +++ b/arch/arm64/kernel/smccc-call.S @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2015, Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License Version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include <linux/linkage.h> +#include <asm/asm-offsets.h> + + .macro SMCCC instr + .cfi_startproc + \instr #0 + ldr x4, [sp] + stp x0, x1, [x4, #ARM_SMCCC_RES_X0_OFFS] + stp x2, x3, [x4, #ARM_SMCCC_RES_X2_OFFS] + ret + .cfi_endproc + .endm + +/* + * void arm_smccc_smc(unsigned long a0, unsigned long a1, unsigned long a2, + * unsigned long a3, unsigned long a4, unsigned long a5, + * unsigned long a6, unsigned long a7, struct arm_smccc_res *res) + */ +ENTRY(arm_smccc_smc) + SMCCC smc +ENDPROC(arm_smccc_smc) + +/* + * void arm_smccc_hvc(unsigned long a0, unsigned long a1, unsigned long a2, + * unsigned long a3, unsigned long a4, unsigned long a5, + * unsigned long a6, unsigned long a7, struct arm_smccc_res *res) + */ +ENTRY(arm_smccc_hvc) + SMCCC hvc +ENDPROC(arm_smccc_hvc) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index edf73a6e16a8..54736b746e12 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -17,6 +17,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/acpi.h> #include <linux/delay.h> #include <linux/init.h> #include <linux/spinlock.h> @@ -143,15 +144,12 @@ asmlinkage void secondary_start_kernel(void) cpumask_set_cpu(cpu, mm_cpumask(mm)); set_my_cpu_offset(per_cpu_offset(smp_processor_id())); - printk("CPU%u: Booted secondary processor\n", cpu); /* * TTBR0 is only used for the identity mapping at this stage. Make it * point to zero page to avoid speculatively fetching new entries. */ - cpu_set_reserved_ttbr0(); - flush_tlb_all(); - cpu_set_default_tcr_t0sz(); + cpu_uninstall_idmap(); preempt_disable(); trace_hardirqs_off(); @@ -176,6 +174,8 @@ asmlinkage void secondary_start_kernel(void) * the CPU migration code to notice that the CPU is online * before we continue. */ + pr_info("CPU%u: Booted secondary processor [%08x]\n", + cpu, read_cpuid_id()); set_cpu_online(cpu, true); complete(&cpu_running); @@ -247,7 +247,7 @@ static int op_cpu_kill(unsigned int cpu) * time and hope that it's dead, so let's skip the wait and just hope. */ if (!cpu_ops[cpu]->cpu_kill) - return 1; + return 0; return cpu_ops[cpu]->cpu_kill(cpu); } @@ -260,6 +260,8 @@ static DECLARE_COMPLETION(cpu_died); */ void __cpu_die(unsigned int cpu) { + int err; + if (!wait_for_completion_timeout(&cpu_died, msecs_to_jiffies(5000))) { pr_crit("CPU%u: cpu didn't die\n", cpu); return; @@ -272,8 +274,10 @@ void __cpu_die(unsigned int cpu) * verify that it has really left the kernel before we consider * clobbering anything it might still be using. */ - if (!op_cpu_kill(cpu)) - pr_warn("CPU%d may not have shut down cleanly\n", cpu); + err = op_cpu_kill(cpu); + if (err) + pr_warn("CPU%d may not have shut down cleanly: %d\n", + cpu, err); } /* @@ -317,57 +321,158 @@ void __init smp_prepare_boot_cpu(void) set_my_cpu_offset(per_cpu_offset(smp_processor_id())); } +static u64 __init of_get_cpu_mpidr(struct device_node *dn) +{ + const __be32 *cell; + u64 hwid; + + /* + * A cpu node with missing "reg" property is + * considered invalid to build a cpu_logical_map + * entry. + */ + cell = of_get_property(dn, "reg", NULL); + if (!cell) { + pr_err("%s: missing reg property\n", dn->full_name); + return INVALID_HWID; + } + + hwid = of_read_number(cell, of_n_addr_cells(dn)); + /* + * Non affinity bits must be set to 0 in the DT + */ + if (hwid & ~MPIDR_HWID_BITMASK) { + pr_err("%s: invalid reg property\n", dn->full_name); + return INVALID_HWID; + } + return hwid; +} + +/* + * Duplicate MPIDRs are a recipe for disaster. Scan all initialized + * entries and check for duplicates. If any is found just ignore the + * cpu. cpu_logical_map was initialized to INVALID_HWID to avoid + * matching valid MPIDR values. + */ +static bool __init is_mpidr_duplicate(unsigned int cpu, u64 hwid) +{ + unsigned int i; + + for (i = 1; (i < cpu) && (i < NR_CPUS); i++) + if (cpu_logical_map(i) == hwid) + return true; + return false; +} + +/* + * Initialize cpu operations for a logical cpu and + * set it in the possible mask on success + */ +static int __init smp_cpu_setup(int cpu) +{ + if (cpu_read_ops(cpu)) + return -ENODEV; + + if (cpu_ops[cpu]->cpu_init(cpu)) + return -ENODEV; + + set_cpu_possible(cpu, true); + + return 0; +} + +static bool bootcpu_valid __initdata; +static unsigned int cpu_count = 1; + +#ifdef CONFIG_ACPI +/* + * acpi_map_gic_cpu_interface - parse processor MADT entry + * + * Carry out sanity checks on MADT processor entry and initialize + * cpu_logical_map on success + */ +static void __init +acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor) +{ + u64 hwid = processor->arm_mpidr; + + if (hwid & ~MPIDR_HWID_BITMASK || hwid == INVALID_HWID) { + pr_err("skipping CPU entry with invalid MPIDR 0x%llx\n", hwid); + return; + } + + if (!(processor->flags & ACPI_MADT_ENABLED)) { + pr_err("skipping disabled CPU entry with 0x%llx MPIDR\n", hwid); + return; + } + + if (is_mpidr_duplicate(cpu_count, hwid)) { + pr_err("duplicate CPU MPIDR 0x%llx in MADT\n", hwid); + return; + } + + /* Check if GICC structure of boot CPU is available in the MADT */ + if (cpu_logical_map(0) == hwid) { + if (bootcpu_valid) { + pr_err("duplicate boot CPU MPIDR: 0x%llx in MADT\n", + hwid); + return; + } + bootcpu_valid = true; + return; + } + + if (cpu_count >= NR_CPUS) + return; + + /* map the logical cpu id to cpu MPIDR */ + cpu_logical_map(cpu_count) = hwid; + + cpu_count++; +} + +static int __init +acpi_parse_gic_cpu_interface(struct acpi_subtable_header *header, + const unsigned long end) +{ + struct acpi_madt_generic_interrupt *processor; + + processor = (struct acpi_madt_generic_interrupt *)header; + if (BAD_MADT_ENTRY(processor, end)) + return -EINVAL; + + acpi_table_print_madt_entry(header); + + acpi_map_gic_cpu_interface(processor); + + return 0; +} +#else +#define acpi_table_parse_madt(...) do { } while (0) +#endif + /* * Enumerate the possible CPU set from the device tree and build the * cpu logical map array containing MPIDR values related to logical * cpus. Assumes that cpu_logical_map(0) has already been initialized. */ -void __init of_smp_init_cpus(void) +void __init of_parse_and_init_cpus(void) { struct device_node *dn = NULL; - unsigned int i, cpu = 1; - bool bootcpu_valid = false; while ((dn = of_find_node_by_type(dn, "cpu"))) { - const u32 *cell; - u64 hwid; + u64 hwid = of_get_cpu_mpidr(dn); - /* - * A cpu node with missing "reg" property is - * considered invalid to build a cpu_logical_map - * entry. - */ - cell = of_get_property(dn, "reg", NULL); - if (!cell) { - pr_err("%s: missing reg property\n", dn->full_name); + if (hwid == INVALID_HWID) goto next; - } - hwid = of_read_number(cell, of_n_addr_cells(dn)); - /* - * Non affinity bits must be set to 0 in the DT - */ - if (hwid & ~MPIDR_HWID_BITMASK) { - pr_err("%s: invalid reg property\n", dn->full_name); + if (is_mpidr_duplicate(cpu_count, hwid)) { + pr_err("%s: duplicate cpu reg properties in the DT\n", + dn->full_name); goto next; } /* - * Duplicate MPIDRs are a recipe for disaster. Scan - * all initialized entries and check for - * duplicates. If any is found just ignore the cpu. - * cpu_logical_map was initialized to INVALID_HWID to - * avoid matching valid MPIDR values. - */ - for (i = 1; (i < cpu) && (i < NR_CPUS); i++) { - if (cpu_logical_map(i) == hwid) { - pr_err("%s: duplicate cpu reg properties in the DT\n", - dn->full_name); - goto next; - } - } - - /* * The numbering scheme requires that the boot CPU * must be assigned logical id 0. Record it so that * the logical map built from DT is validated and can @@ -391,38 +496,58 @@ void __init of_smp_init_cpus(void) continue; } - if (cpu >= NR_CPUS) - goto next; - - if (cpu_read_ops(dn, cpu) != 0) - goto next; - - if (cpu_ops[cpu]->cpu_init(dn, cpu)) + if (cpu_count >= NR_CPUS) goto next; pr_debug("cpu logical map 0x%llx\n", hwid); - cpu_logical_map(cpu) = hwid; + cpu_logical_map(cpu_count) = hwid; next: - cpu++; + cpu_count++; } +} + +/* + * Enumerate the possible CPU set from the device tree or ACPI and build the + * cpu logical map array containing MPIDR values related to logical + * cpus. Assumes that cpu_logical_map(0) has already been initialized. + */ +void __init smp_init_cpus(void) +{ + int i; - /* sanity check */ - if (cpu > NR_CPUS) - pr_warning("no. of cores (%d) greater than configured maximum of %d - clipping\n", - cpu, NR_CPUS); + if (acpi_disabled) + of_parse_and_init_cpus(); + else + /* + * do a walk of MADT to determine how many CPUs + * we have including disabled CPUs, and get information + * we need for SMP init + */ + acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT, + acpi_parse_gic_cpu_interface, 0); + + if (cpu_count > NR_CPUS) + pr_warn("no. of cores (%d) greater than configured maximum of %d - clipping\n", + cpu_count, NR_CPUS); if (!bootcpu_valid) { - pr_err("DT missing boot CPU MPIDR, not enabling secondaries\n"); + pr_err("missing boot CPU MPIDR, not enabling secondaries\n"); return; } /* - * All the cpus that made it to the cpu_logical_map have been - * validated so set them as possible cpus. + * We need to set the cpu_logical_map entries before enabling + * the cpus so that cpu processor description entries (DT cpu nodes + * and ACPI MADT entries) can be retrieved by matching the cpu hwid + * with entries in cpu_logical_map while initializing the cpus. + * If the cpu set-up fails, invalidate the cpu_logical_map entry. */ - for (i = 0; i < NR_CPUS; i++) - if (cpu_logical_map(i) != INVALID_HWID) - set_cpu_possible(i, true); + for (i = 1; i < NR_CPUS; i++) { + if (cpu_logical_map(i) != INVALID_HWID) { + if (smp_cpu_setup(i)) + cpu_logical_map(i) = INVALID_HWID; + } + } } void __init smp_prepare_cpus(unsigned int max_cpus) diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c index 14944e5b28da..aef3605a8c47 100644 --- a/arch/arm64/kernel/smp_spin_table.c +++ b/arch/arm64/kernel/smp_spin_table.c @@ -49,8 +49,14 @@ static void write_pen_release(u64 val) } -static int smp_spin_table_cpu_init(struct device_node *dn, unsigned int cpu) +static int smp_spin_table_cpu_init(unsigned int cpu) { + struct device_node *dn; + + dn = of_get_cpu_node(cpu, NULL); + if (!dn) + return -ENODEV; + /* * Determine the address from which the CPU is polling. */ diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index ccb6078ed9f2..4f2369e36094 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -47,8 +47,8 @@ int notrace unwind_frame(struct stackframe *frame) return -EINVAL; frame->sp = fp + 0x10; - frame->fp = *(unsigned long *)(fp); - frame->pc = *(unsigned long *)(fp + 8); + frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp)); + frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8)); return 0; } diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 357418137db7..3e8e6ecf209d 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -10,30 +10,11 @@ #include <asm/suspend.h> #include <asm/tlbflush.h> -extern int __cpu_suspend_enter(unsigned long arg, int (*fn)(unsigned long)); /* - * This is called by __cpu_suspend_enter() to save the state, and do whatever - * flushing is required to ensure that when the CPU goes to sleep we have - * the necessary data available when the caches are not searched. - * - * ptr: CPU context virtual address - * save_ptr: address of the location where the context physical address - * must be saved + * This is allocated by cpu_suspend_init(), and used to store a pointer to + * the 'struct sleep_stack_data' the contains a particular CPUs state. */ -void notrace __cpu_suspend_save(struct cpu_suspend_ctx *ptr, - phys_addr_t *save_ptr) -{ - *save_ptr = virt_to_phys(ptr); - - cpu_do_suspend(ptr); - /* - * Only flush the context that must be retrieved with the MMU - * off. VA primitives ensure the flush is applied to all - * cache levels so context is pushed to DRAM. - */ - __flush_dcache_area(ptr, sizeof(*ptr)); - __flush_dcache_area(save_ptr, sizeof(*save_ptr)); -} +unsigned long *sleep_save_stash; /* * This hook is provided so that cpu_suspend code can restore HW @@ -51,6 +32,30 @@ void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) hw_breakpoint_restore = hw_bp_restore; } +void notrace __cpu_suspend_exit(void) +{ + /* + * We are resuming from reset with the idmap active in TTBR0_EL1. + * We must uninstall the idmap and restore the expected MMU + * state before we can possibly return to userspace. + */ + cpu_uninstall_idmap(); + + /* + * Restore per-cpu offset before any kernel + * subsystem relying on it has a chance to run. + */ + set_my_cpu_offset(per_cpu_offset(smp_processor_id())); + + /* + * Restore HW breakpoint registers to sane values + * before debug exceptions are possibly reenabled + * through local_dbg_restore. + */ + if (hw_breakpoint_restore) + hw_breakpoint_restore(NULL); +} + /* * cpu_suspend * @@ -60,9 +65,9 @@ void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) */ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) { - struct mm_struct *mm = current->active_mm; - int ret; + int ret = 0; unsigned long flags; + struct sleep_stack_data state; /* * From this point debug exceptions are disabled to prevent @@ -78,45 +83,21 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) */ pause_graph_tracing(); - /* - * mm context saved on the stack, it will be restored when - * the cpu comes out of reset through the identity mapped - * page tables, so that the thread address space is properly - * set-up on function return. - */ - ret = __cpu_suspend_enter(arg, fn); - if (ret == 0) { - /* - * We are resuming from reset with TTBR0_EL1 set to the - * idmap to enable the MMU; set the TTBR0 to the reserved - * page tables to prevent speculative TLB allocations, flush - * the local tlb and set the default tcr_el1.t0sz so that - * the TTBR0 address space set-up is properly restored. - * If the current active_mm != &init_mm we entered cpu_suspend - * with mappings in TTBR0 that must be restored, so we switch - * them back to complete the address space configuration - * restoration before returning. - */ - cpu_set_reserved_ttbr0(); - flush_tlb_all(); - cpu_set_default_tcr_t0sz(); - - if (mm != &init_mm) - cpu_switch_mm(mm->pgd, mm); - - /* - * Restore per-cpu offset before any kernel - * subsystem relying on it has a chance to run. - */ - set_my_cpu_offset(per_cpu_offset(smp_processor_id())); + if (__cpu_suspend_enter(&state)) { + /* Call the suspend finisher */ + ret = fn(arg); /* - * Restore HW breakpoint registers to sane values - * before debug exceptions are possibly reenabled - * through local_dbg_restore. + * Never gets here, unless the suspend finisher fails. + * Successful cpu_suspend() should return from cpu_resume(), + * returning through this code path is considered an error + * If the return value is set to 0 force ret = -EOPNOTSUPP + * to make sure a proper error condition is propagated */ - if (hw_breakpoint_restore) - hw_breakpoint_restore(NULL); + if (!ret) + ret = -EOPNOTSUPP; + } else { + __cpu_suspend_exit(); } unpause_graph_tracing(); @@ -131,25 +112,15 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) return ret; } -struct sleep_save_sp sleep_save_sp; -phys_addr_t sleep_idmap_phys; - static int __init cpu_suspend_init(void) { - void *ctx_ptr; - /* ctx_ptr is an array of physical addresses */ - ctx_ptr = kcalloc(mpidr_hash_size(), sizeof(phys_addr_t), GFP_KERNEL); + sleep_save_stash = kcalloc(mpidr_hash_size(), sizeof(*sleep_save_stash), + GFP_KERNEL); - if (WARN_ON(!ctx_ptr)) + if (WARN_ON(!sleep_save_stash)) return -ENOMEM; - sleep_save_sp.save_ptr_stash = ctx_ptr; - sleep_save_sp.save_ptr_stash_phys = virt_to_phys(ctx_ptr); - sleep_idmap_phys = virt_to_phys(idmap_pg_dir); - __flush_dcache_area(&sleep_save_sp, sizeof(struct sleep_save_sp)); - __flush_dcache_area(&sleep_idmap_phys, sizeof(sleep_idmap_phys)); - return 0; } early_initcall(cpu_suspend_init); diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c index 42f9195cf2f8..149151fb42bb 100644 --- a/arch/arm64/kernel/time.c +++ b/arch/arm64/kernel/time.c @@ -42,7 +42,6 @@ #include <asm/thread_info.h> #include <asm/stacktrace.h> -#ifdef CONFIG_SMP unsigned long profile_pc(struct pt_regs *regs) { struct stackframe frame; @@ -62,7 +61,6 @@ unsigned long profile_pc(struct pt_regs *regs) return frame.pc; } EXPORT_SYMBOL(profile_pc); -#endif void __init time_init(void) { diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 1ef2940df13c..0a7bc087838e 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -179,11 +179,7 @@ void show_stack(struct task_struct *tsk, unsigned long *sp) #else #define S_PREEMPT "" #endif -#ifdef CONFIG_SMP #define S_SMP " SMP" -#else -#define S_SMP "" -#endif static int __die(const char *str, int err, struct thread_info *thread, struct pt_regs *regs) diff --git a/arch/arm64/kernel/vdso/vdso.S b/arch/arm64/kernel/vdso/vdso.S index 60c1db54b41a..82379a70ef03 100644 --- a/arch/arm64/kernel/vdso/vdso.S +++ b/arch/arm64/kernel/vdso/vdso.S @@ -21,9 +21,8 @@ #include <linux/const.h> #include <asm/page.h> - __PAGE_ALIGNED_DATA - .globl vdso_start, vdso_end + .section .rodata .balign PAGE_SIZE vdso_start: .incbin "arch/arm64/kernel/vdso/vdso.so" diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index aff07bcad882..56c2c276ea9f 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -5,6 +5,7 @@ */ #include <asm-generic/vmlinux.lds.h> +#include <asm/kernel-pgtable.h> #include <asm/thread_info.h> #include <asm/memory.h> #include <asm/page.h> @@ -38,6 +39,22 @@ jiffies = jiffies_64; *(.hyp.text) \ VMLINUX_SYMBOL(__hyp_text_end) = .; +#define IDMAP_TEXT \ + . = ALIGN(SZ_4K); \ + VMLINUX_SYMBOL(__idmap_text_start) = .; \ + *(.idmap.text) \ + VMLINUX_SYMBOL(__idmap_text_end) = .; + +#ifdef CONFIG_HIBERNATION +#define HIBERNATE_TEXT \ + . = ALIGN(SZ_4K); \ + VMLINUX_SYMBOL(__hibernate_exit_text_start) = .;\ + *(.hibernate_exit.text) \ + VMLINUX_SYMBOL(__hibernate_exit_text_end) = .; +#else +#define HIBERNATE_TEXT +#endif + /* * The size of the PE/COFF section that covers the kernel image, which * runs from stext to _edata, must be a round multiple of the PE/COFF @@ -81,7 +98,7 @@ SECTIONS *(.discard.*) } - . = PAGE_OFFSET + TEXT_OFFSET; + . = KIMAGE_VADDR + TEXT_OFFSET; .head.text : { _text = .; @@ -98,6 +115,8 @@ SECTIONS SCHED_TEXT LOCK_TEXT HYPERVISOR_TEXT + IDMAP_TEXT + HIBERNATE_TEXT *(.fixup) *(.gnu.warning) . = ALIGN(16); @@ -134,9 +153,6 @@ SECTIONS PERCPU_SECTION(64) - . = ALIGN(PAGE_SIZE); - __init_end = .; - . = ALIGN(4); .altinstructions : { __alt_instructions = .; @@ -148,6 +164,8 @@ SECTIONS } . = ALIGN(PAGE_SIZE); + __init_end = .; + _data = .; _sdata = .; RW_DATA_SECTION(64, PAGE_SIZE, THREAD_SIZE) @@ -170,13 +188,19 @@ SECTIONS } /* - * The HYP init code can't be more than a page long, + * The HYP init code and ID map text can't be longer than a page each, * and should not cross a page boundary. */ ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K, "HYP init code too big or misaligned") +ASSERT(__idmap_text_end - (__idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K, + "ID map text too big or misaligned") +#ifdef CONFIG_HIBERNATION +ASSERT(__hibernate_exit_text_end - (__hibernate_exit_text_start & ~(SZ_4K - 1)) + <= SZ_4K, "Hibernate exit text too big or misaligned") +#endif /* * If padding is applied before .head.text, virt<->phys conversions will fail. */ -ASSERT(_text == (PAGE_OFFSET + TEXT_OFFSET), "HEAD is misaligned") +ASSERT(_text == (KIMAGE_VADDR + TEXT_OFFSET), "HEAD is misaligned") diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index d5904f876cdb..76000c00b0f7 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -10,6 +10,7 @@ KVM=../../../virt/kvm ARM=../../../arch/arm/kvm obj-$(CONFIG_KVM_ARM_HOST) += kvm.o +obj-$(CONFIG_KVM_ARM_HOST) += hyp/ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/arm.o $(ARM)/mmu.o $(ARM)/mmio.o @@ -17,13 +18,11 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o kvm-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o regmap.o kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o -kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o +kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2-emul.o -kvm-$(CONFIG_KVM_ARM_HOST) += vgic-v2-switch.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3-emul.o -kvm-$(CONFIG_KVM_ARM_HOST) += vgic-v3-switch.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c new file mode 100644 index 000000000000..30887810ff4b --- /dev/null +++ b/arch/arm64/kvm/debug.c @@ -0,0 +1,90 @@ +/* + * Debug and Guest Debug support + * + * Copyright (C) 2015 - Linaro Ltd + * Author: Alex Bennée <alex.bennee@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/kvm_host.h> + +#include <asm/kvm_arm.h> + +static DEFINE_PER_CPU(u32, mdcr_el2); + +/** + * kvm_arm_init_debug - grab what we need for debug + * + * Currently the sole task of this function is to retrieve the initial + * value of mdcr_el2 so we can preserve MDCR_EL2.HPMN which has + * presumably been set-up by some knowledgeable bootcode. + * + * It is called once per-cpu during CPU hyp initialisation. + */ + +void kvm_arm_init_debug(void) +{ + __this_cpu_write(mdcr_el2, kvm_call_hyp(__kvm_get_mdcr_el2)); +} + + +/** + * kvm_arm_reset_debug_ptr - reset the debug ptr to point to the vcpu state + */ + +void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) +{ + vcpu->arch.debug_ptr = &vcpu->arch.vcpu_debug_state; +} + +/** + * kvm_arm_setup_debug - set up debug related stuff + * + * @vcpu: the vcpu pointer + * + * This is called before each entry into the hypervisor to setup any + * debug related registers. Currently this just ensures we will trap + * access to: + * - Performance monitors (MDCR_EL2_TPM/MDCR_EL2_TPMCR) + * - Debug ROM Address (MDCR_EL2_TDRA) + * - OS related registers (MDCR_EL2_TDOSA) + * + * Additionally, KVM only traps guest accesses to the debug registers if + * the guest is not actively using them (see the KVM_ARM64_DEBUG_DIRTY + * flag on vcpu->arch.debug_flags). Since the guest must not interfere + * with the hardware state when debugging the guest, we must ensure that + * trapping is enabled whenever we are debugging the guest using the + * debug registers. + */ + +void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) +{ + bool trap_debug = !(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY); + + vcpu->arch.mdcr_el2 = __this_cpu_read(mdcr_el2) & MDCR_EL2_HPMN_MASK; + vcpu->arch.mdcr_el2 |= (MDCR_EL2_TPM | + MDCR_EL2_TPMCR | + MDCR_EL2_TDRA | + MDCR_EL2_TDOSA); + + /* Trap on access to debug registers? */ + if (trap_debug) + vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA; + +} + +void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) +{ + /* Nothing to do yet */ +} diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 524fa25671fc..43c1a2bf5a04 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -138,6 +138,13 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, exit_handler = kvm_get_exit_handler(vcpu); return exit_handler(vcpu, run); + case ARM_EXCEPTION_HYP_GONE: + /* + * EL2 has been reset to the hyp-stub. This happens when a guest + * is pre-empted by kvm_reboot()'s shutdown call. + */ + run->exit_reason = KVM_EXIT_FAIL_ENTRY; + return 0; default: kvm_pr_unimpl("Unsupported exception type: %d", exception_index); diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index 178ba2248a98..3270d392970d 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S @@ -21,6 +21,7 @@ #include <asm/kvm_arm.h> #include <asm/kvm_mmu.h> #include <asm/pgtable-hwdef.h> +#include <asm/sysreg.h> .text .pushsection .hyp.idmap.text, "ax" @@ -105,8 +106,8 @@ __do_hyp_init: dsb sy mrs x4, sctlr_el2 - and x4, x4, #SCTLR_EL2_EE // preserve endianness of EL2 - ldr x5, =SCTLR_EL2_FLAGS + and x4, x4, #SCTLR_ELx_EE // preserve endianness of EL2 + ldr x5, =SCTLR_ELx_FLAGS orr x4, x4, x5 msr sctlr_el2, x4 isb @@ -140,6 +141,49 @@ merged: eret ENDPROC(__kvm_hyp_init) + /* + * Reset kvm back to the hyp stub. This is the trampoline dance in + * reverse. If kvm used an extended idmap, __extended_idmap_trampoline + * calls this code directly in the idmap. In this case switching to the + * boot tables is a no-op. + * + * x0: HYP boot pgd + * x1: HYP phys_idmap_start + */ +ENTRY(__kvm_hyp_reset) + /* We're in trampoline code in VA, switch back to boot page tables */ + msr ttbr0_el2, x0 + isb + + /* Ensure the PA branch doesn't find a stale tlb entry or stale code. */ + ic iallu + tlbi alle2 + dsb sy + isb + + /* Branch into PA space */ + adr x0, 1f + bfi x1, x0, #0, #PAGE_SHIFT + br x1 + + /* We're now in idmap, disable MMU */ +1: mrs x0, sctlr_el2 + ldr x1, =SCTLR_ELx_FLAGS + bic x0, x0, x1 // Clear SCTL_M and etc + msr sctlr_el2, x0 + isb + + /* Invalidate the old TLBs */ + tlbi alle2 + dsb sy + + /* Install stub vectors */ + adr_l x0, __hyp_stub_vectors + msr vbar_el2, x0 + + eret +ENDPROC(__kvm_hyp_reset) + .ltorg .popsection diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 64f9e60b31da..8d231f19453e 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -17,1106 +17,9 @@ #include <linux/linkage.h> -#include <asm/asm-offsets.h> +#include <asm/alternative.h> #include <asm/assembler.h> -#include <asm/debug-monitors.h> -#include <asm/esr.h> -#include <asm/fpsimdmacros.h> -#include <asm/kvm.h> -#include <asm/kvm_arm.h> -#include <asm/kvm_asm.h> -#include <asm/kvm_mmu.h> -#include <asm/memory.h> - -#define CPU_GP_REG_OFFSET(x) (CPU_GP_REGS + x) -#define CPU_XREG_OFFSET(x) CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x) -#define CPU_SPSR_OFFSET(x) CPU_GP_REG_OFFSET(CPU_SPSR + 8*x) -#define CPU_SYSREG_OFFSET(x) (CPU_SYSREGS + 8*x) - - .text - .pushsection .hyp.text, "ax" - .align PAGE_SHIFT - -.macro save_common_regs - // x2: base address for cpu context - // x3: tmp register - - add x3, x2, #CPU_XREG_OFFSET(19) - stp x19, x20, [x3] - stp x21, x22, [x3, #16] - stp x23, x24, [x3, #32] - stp x25, x26, [x3, #48] - stp x27, x28, [x3, #64] - stp x29, lr, [x3, #80] - - mrs x19, sp_el0 - mrs x20, elr_el2 // EL1 PC - mrs x21, spsr_el2 // EL1 pstate - - stp x19, x20, [x3, #96] - str x21, [x3, #112] - - mrs x22, sp_el1 - mrs x23, elr_el1 - mrs x24, spsr_el1 - - str x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)] - str x23, [x2, #CPU_GP_REG_OFFSET(CPU_ELR_EL1)] - str x24, [x2, #CPU_SPSR_OFFSET(KVM_SPSR_EL1)] -.endm - -.macro restore_common_regs - // x2: base address for cpu context - // x3: tmp register - - ldr x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)] - ldr x23, [x2, #CPU_GP_REG_OFFSET(CPU_ELR_EL1)] - ldr x24, [x2, #CPU_SPSR_OFFSET(KVM_SPSR_EL1)] - - msr sp_el1, x22 - msr elr_el1, x23 - msr spsr_el1, x24 - - add x3, x2, #CPU_XREG_OFFSET(31) // SP_EL0 - ldp x19, x20, [x3] - ldr x21, [x3, #16] - - msr sp_el0, x19 - msr elr_el2, x20 // EL1 PC - msr spsr_el2, x21 // EL1 pstate - - add x3, x2, #CPU_XREG_OFFSET(19) - ldp x19, x20, [x3] - ldp x21, x22, [x3, #16] - ldp x23, x24, [x3, #32] - ldp x25, x26, [x3, #48] - ldp x27, x28, [x3, #64] - ldp x29, lr, [x3, #80] -.endm - -.macro save_host_regs - save_common_regs -.endm - -.macro restore_host_regs - restore_common_regs -.endm - -.macro save_fpsimd - // x2: cpu context address - // x3, x4: tmp regs - add x3, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS) - fpsimd_save x3, 4 -.endm - -.macro restore_fpsimd - // x2: cpu context address - // x3, x4: tmp regs - add x3, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS) - fpsimd_restore x3, 4 -.endm - -.macro save_guest_regs - // x0 is the vcpu address - // x1 is the return code, do not corrupt! - // x2 is the cpu context - // x3 is a tmp register - // Guest's x0-x3 are on the stack - - // Compute base to save registers - add x3, x2, #CPU_XREG_OFFSET(4) - stp x4, x5, [x3] - stp x6, x7, [x3, #16] - stp x8, x9, [x3, #32] - stp x10, x11, [x3, #48] - stp x12, x13, [x3, #64] - stp x14, x15, [x3, #80] - stp x16, x17, [x3, #96] - str x18, [x3, #112] - - pop x6, x7 // x2, x3 - pop x4, x5 // x0, x1 - - add x3, x2, #CPU_XREG_OFFSET(0) - stp x4, x5, [x3] - stp x6, x7, [x3, #16] - - save_common_regs -.endm - -.macro restore_guest_regs - // x0 is the vcpu address. - // x2 is the cpu context - // x3 is a tmp register - - // Prepare x0-x3 for later restore - add x3, x2, #CPU_XREG_OFFSET(0) - ldp x4, x5, [x3] - ldp x6, x7, [x3, #16] - push x4, x5 // Push x0-x3 on the stack - push x6, x7 - - // x4-x18 - ldp x4, x5, [x3, #32] - ldp x6, x7, [x3, #48] - ldp x8, x9, [x3, #64] - ldp x10, x11, [x3, #80] - ldp x12, x13, [x3, #96] - ldp x14, x15, [x3, #112] - ldp x16, x17, [x3, #128] - ldr x18, [x3, #144] - - // x19-x29, lr, sp*, elr*, spsr* - restore_common_regs - - // Last bits of the 64bit state - pop x2, x3 - pop x0, x1 - - // Do not touch any register after this! -.endm - -/* - * Macros to perform system register save/restore. - * - * Ordering here is absolutely critical, and must be kept consistent - * in {save,restore}_sysregs, {save,restore}_guest_32bit_state, - * and in kvm_asm.h. - * - * In other words, don't touch any of these unless you know what - * you are doing. - */ -.macro save_sysregs - // x2: base address for cpu context - // x3: tmp register - - add x3, x2, #CPU_SYSREG_OFFSET(MPIDR_EL1) - - mrs x4, vmpidr_el2 - mrs x5, csselr_el1 - mrs x6, sctlr_el1 - mrs x7, actlr_el1 - mrs x8, cpacr_el1 - mrs x9, ttbr0_el1 - mrs x10, ttbr1_el1 - mrs x11, tcr_el1 - mrs x12, esr_el1 - mrs x13, afsr0_el1 - mrs x14, afsr1_el1 - mrs x15, far_el1 - mrs x16, mair_el1 - mrs x17, vbar_el1 - mrs x18, contextidr_el1 - mrs x19, tpidr_el0 - mrs x20, tpidrro_el0 - mrs x21, tpidr_el1 - mrs x22, amair_el1 - mrs x23, cntkctl_el1 - mrs x24, par_el1 - mrs x25, mdscr_el1 - - stp x4, x5, [x3] - stp x6, x7, [x3, #16] - stp x8, x9, [x3, #32] - stp x10, x11, [x3, #48] - stp x12, x13, [x3, #64] - stp x14, x15, [x3, #80] - stp x16, x17, [x3, #96] - stp x18, x19, [x3, #112] - stp x20, x21, [x3, #128] - stp x22, x23, [x3, #144] - stp x24, x25, [x3, #160] -.endm - -.macro save_debug - // x2: base address for cpu context - // x3: tmp register - - mrs x26, id_aa64dfr0_el1 - ubfx x24, x26, #12, #4 // Extract BRPs - ubfx x25, x26, #20, #4 // Extract WRPs - mov w26, #15 - sub w24, w26, w24 // How many BPs to skip - sub w25, w26, w25 // How many WPs to skip - - add x3, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1) - - adr x26, 1f - add x26, x26, x24, lsl #2 - br x26 -1: - mrs x20, dbgbcr15_el1 - mrs x19, dbgbcr14_el1 - mrs x18, dbgbcr13_el1 - mrs x17, dbgbcr12_el1 - mrs x16, dbgbcr11_el1 - mrs x15, dbgbcr10_el1 - mrs x14, dbgbcr9_el1 - mrs x13, dbgbcr8_el1 - mrs x12, dbgbcr7_el1 - mrs x11, dbgbcr6_el1 - mrs x10, dbgbcr5_el1 - mrs x9, dbgbcr4_el1 - mrs x8, dbgbcr3_el1 - mrs x7, dbgbcr2_el1 - mrs x6, dbgbcr1_el1 - mrs x5, dbgbcr0_el1 - - adr x26, 1f - add x26, x26, x24, lsl #2 - br x26 - -1: - str x20, [x3, #(15 * 8)] - str x19, [x3, #(14 * 8)] - str x18, [x3, #(13 * 8)] - str x17, [x3, #(12 * 8)] - str x16, [x3, #(11 * 8)] - str x15, [x3, #(10 * 8)] - str x14, [x3, #(9 * 8)] - str x13, [x3, #(8 * 8)] - str x12, [x3, #(7 * 8)] - str x11, [x3, #(6 * 8)] - str x10, [x3, #(5 * 8)] - str x9, [x3, #(4 * 8)] - str x8, [x3, #(3 * 8)] - str x7, [x3, #(2 * 8)] - str x6, [x3, #(1 * 8)] - str x5, [x3, #(0 * 8)] - - add x3, x2, #CPU_SYSREG_OFFSET(DBGBVR0_EL1) - - adr x26, 1f - add x26, x26, x24, lsl #2 - br x26 -1: - mrs x20, dbgbvr15_el1 - mrs x19, dbgbvr14_el1 - mrs x18, dbgbvr13_el1 - mrs x17, dbgbvr12_el1 - mrs x16, dbgbvr11_el1 - mrs x15, dbgbvr10_el1 - mrs x14, dbgbvr9_el1 - mrs x13, dbgbvr8_el1 - mrs x12, dbgbvr7_el1 - mrs x11, dbgbvr6_el1 - mrs x10, dbgbvr5_el1 - mrs x9, dbgbvr4_el1 - mrs x8, dbgbvr3_el1 - mrs x7, dbgbvr2_el1 - mrs x6, dbgbvr1_el1 - mrs x5, dbgbvr0_el1 - - adr x26, 1f - add x26, x26, x24, lsl #2 - br x26 - -1: - str x20, [x3, #(15 * 8)] - str x19, [x3, #(14 * 8)] - str x18, [x3, #(13 * 8)] - str x17, [x3, #(12 * 8)] - str x16, [x3, #(11 * 8)] - str x15, [x3, #(10 * 8)] - str x14, [x3, #(9 * 8)] - str x13, [x3, #(8 * 8)] - str x12, [x3, #(7 * 8)] - str x11, [x3, #(6 * 8)] - str x10, [x3, #(5 * 8)] - str x9, [x3, #(4 * 8)] - str x8, [x3, #(3 * 8)] - str x7, [x3, #(2 * 8)] - str x6, [x3, #(1 * 8)] - str x5, [x3, #(0 * 8)] - - add x3, x2, #CPU_SYSREG_OFFSET(DBGWCR0_EL1) - - adr x26, 1f - add x26, x26, x25, lsl #2 - br x26 -1: - mrs x20, dbgwcr15_el1 - mrs x19, dbgwcr14_el1 - mrs x18, dbgwcr13_el1 - mrs x17, dbgwcr12_el1 - mrs x16, dbgwcr11_el1 - mrs x15, dbgwcr10_el1 - mrs x14, dbgwcr9_el1 - mrs x13, dbgwcr8_el1 - mrs x12, dbgwcr7_el1 - mrs x11, dbgwcr6_el1 - mrs x10, dbgwcr5_el1 - mrs x9, dbgwcr4_el1 - mrs x8, dbgwcr3_el1 - mrs x7, dbgwcr2_el1 - mrs x6, dbgwcr1_el1 - mrs x5, dbgwcr0_el1 - - adr x26, 1f - add x26, x26, x25, lsl #2 - br x26 - -1: - str x20, [x3, #(15 * 8)] - str x19, [x3, #(14 * 8)] - str x18, [x3, #(13 * 8)] - str x17, [x3, #(12 * 8)] - str x16, [x3, #(11 * 8)] - str x15, [x3, #(10 * 8)] - str x14, [x3, #(9 * 8)] - str x13, [x3, #(8 * 8)] - str x12, [x3, #(7 * 8)] - str x11, [x3, #(6 * 8)] - str x10, [x3, #(5 * 8)] - str x9, [x3, #(4 * 8)] - str x8, [x3, #(3 * 8)] - str x7, [x3, #(2 * 8)] - str x6, [x3, #(1 * 8)] - str x5, [x3, #(0 * 8)] - - add x3, x2, #CPU_SYSREG_OFFSET(DBGWVR0_EL1) - - adr x26, 1f - add x26, x26, x25, lsl #2 - br x26 -1: - mrs x20, dbgwvr15_el1 - mrs x19, dbgwvr14_el1 - mrs x18, dbgwvr13_el1 - mrs x17, dbgwvr12_el1 - mrs x16, dbgwvr11_el1 - mrs x15, dbgwvr10_el1 - mrs x14, dbgwvr9_el1 - mrs x13, dbgwvr8_el1 - mrs x12, dbgwvr7_el1 - mrs x11, dbgwvr6_el1 - mrs x10, dbgwvr5_el1 - mrs x9, dbgwvr4_el1 - mrs x8, dbgwvr3_el1 - mrs x7, dbgwvr2_el1 - mrs x6, dbgwvr1_el1 - mrs x5, dbgwvr0_el1 - - adr x26, 1f - add x26, x26, x25, lsl #2 - br x26 - -1: - str x20, [x3, #(15 * 8)] - str x19, [x3, #(14 * 8)] - str x18, [x3, #(13 * 8)] - str x17, [x3, #(12 * 8)] - str x16, [x3, #(11 * 8)] - str x15, [x3, #(10 * 8)] - str x14, [x3, #(9 * 8)] - str x13, [x3, #(8 * 8)] - str x12, [x3, #(7 * 8)] - str x11, [x3, #(6 * 8)] - str x10, [x3, #(5 * 8)] - str x9, [x3, #(4 * 8)] - str x8, [x3, #(3 * 8)] - str x7, [x3, #(2 * 8)] - str x6, [x3, #(1 * 8)] - str x5, [x3, #(0 * 8)] - - mrs x21, mdccint_el1 - str x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)] -.endm - -.macro restore_sysregs - // x2: base address for cpu context - // x3: tmp register - - add x3, x2, #CPU_SYSREG_OFFSET(MPIDR_EL1) - - ldp x4, x5, [x3] - ldp x6, x7, [x3, #16] - ldp x8, x9, [x3, #32] - ldp x10, x11, [x3, #48] - ldp x12, x13, [x3, #64] - ldp x14, x15, [x3, #80] - ldp x16, x17, [x3, #96] - ldp x18, x19, [x3, #112] - ldp x20, x21, [x3, #128] - ldp x22, x23, [x3, #144] - ldp x24, x25, [x3, #160] - - msr vmpidr_el2, x4 - msr csselr_el1, x5 - msr sctlr_el1, x6 - msr actlr_el1, x7 - msr cpacr_el1, x8 - msr ttbr0_el1, x9 - msr ttbr1_el1, x10 - msr tcr_el1, x11 - msr esr_el1, x12 - msr afsr0_el1, x13 - msr afsr1_el1, x14 - msr far_el1, x15 - msr mair_el1, x16 - msr vbar_el1, x17 - msr contextidr_el1, x18 - msr tpidr_el0, x19 - msr tpidrro_el0, x20 - msr tpidr_el1, x21 - msr amair_el1, x22 - msr cntkctl_el1, x23 - msr par_el1, x24 - msr mdscr_el1, x25 -.endm - -.macro restore_debug - // x2: base address for cpu context - // x3: tmp register - - mrs x26, id_aa64dfr0_el1 - ubfx x24, x26, #12, #4 // Extract BRPs - ubfx x25, x26, #20, #4 // Extract WRPs - mov w26, #15 - sub w24, w26, w24 // How many BPs to skip - sub w25, w26, w25 // How many WPs to skip - - add x3, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1) - - adr x26, 1f - add x26, x26, x24, lsl #2 - br x26 -1: - ldr x20, [x3, #(15 * 8)] - ldr x19, [x3, #(14 * 8)] - ldr x18, [x3, #(13 * 8)] - ldr x17, [x3, #(12 * 8)] - ldr x16, [x3, #(11 * 8)] - ldr x15, [x3, #(10 * 8)] - ldr x14, [x3, #(9 * 8)] - ldr x13, [x3, #(8 * 8)] - ldr x12, [x3, #(7 * 8)] - ldr x11, [x3, #(6 * 8)] - ldr x10, [x3, #(5 * 8)] - ldr x9, [x3, #(4 * 8)] - ldr x8, [x3, #(3 * 8)] - ldr x7, [x3, #(2 * 8)] - ldr x6, [x3, #(1 * 8)] - ldr x5, [x3, #(0 * 8)] - - adr x26, 1f - add x26, x26, x24, lsl #2 - br x26 -1: - msr dbgbcr15_el1, x20 - msr dbgbcr14_el1, x19 - msr dbgbcr13_el1, x18 - msr dbgbcr12_el1, x17 - msr dbgbcr11_el1, x16 - msr dbgbcr10_el1, x15 - msr dbgbcr9_el1, x14 - msr dbgbcr8_el1, x13 - msr dbgbcr7_el1, x12 - msr dbgbcr6_el1, x11 - msr dbgbcr5_el1, x10 - msr dbgbcr4_el1, x9 - msr dbgbcr3_el1, x8 - msr dbgbcr2_el1, x7 - msr dbgbcr1_el1, x6 - msr dbgbcr0_el1, x5 - - add x3, x2, #CPU_SYSREG_OFFSET(DBGBVR0_EL1) - - adr x26, 1f - add x26, x26, x24, lsl #2 - br x26 -1: - ldr x20, [x3, #(15 * 8)] - ldr x19, [x3, #(14 * 8)] - ldr x18, [x3, #(13 * 8)] - ldr x17, [x3, #(12 * 8)] - ldr x16, [x3, #(11 * 8)] - ldr x15, [x3, #(10 * 8)] - ldr x14, [x3, #(9 * 8)] - ldr x13, [x3, #(8 * 8)] - ldr x12, [x3, #(7 * 8)] - ldr x11, [x3, #(6 * 8)] - ldr x10, [x3, #(5 * 8)] - ldr x9, [x3, #(4 * 8)] - ldr x8, [x3, #(3 * 8)] - ldr x7, [x3, #(2 * 8)] - ldr x6, [x3, #(1 * 8)] - ldr x5, [x3, #(0 * 8)] - - adr x26, 1f - add x26, x26, x24, lsl #2 - br x26 -1: - msr dbgbvr15_el1, x20 - msr dbgbvr14_el1, x19 - msr dbgbvr13_el1, x18 - msr dbgbvr12_el1, x17 - msr dbgbvr11_el1, x16 - msr dbgbvr10_el1, x15 - msr dbgbvr9_el1, x14 - msr dbgbvr8_el1, x13 - msr dbgbvr7_el1, x12 - msr dbgbvr6_el1, x11 - msr dbgbvr5_el1, x10 - msr dbgbvr4_el1, x9 - msr dbgbvr3_el1, x8 - msr dbgbvr2_el1, x7 - msr dbgbvr1_el1, x6 - msr dbgbvr0_el1, x5 - - add x3, x2, #CPU_SYSREG_OFFSET(DBGWCR0_EL1) - - adr x26, 1f - add x26, x26, x25, lsl #2 - br x26 -1: - ldr x20, [x3, #(15 * 8)] - ldr x19, [x3, #(14 * 8)] - ldr x18, [x3, #(13 * 8)] - ldr x17, [x3, #(12 * 8)] - ldr x16, [x3, #(11 * 8)] - ldr x15, [x3, #(10 * 8)] - ldr x14, [x3, #(9 * 8)] - ldr x13, [x3, #(8 * 8)] - ldr x12, [x3, #(7 * 8)] - ldr x11, [x3, #(6 * 8)] - ldr x10, [x3, #(5 * 8)] - ldr x9, [x3, #(4 * 8)] - ldr x8, [x3, #(3 * 8)] - ldr x7, [x3, #(2 * 8)] - ldr x6, [x3, #(1 * 8)] - ldr x5, [x3, #(0 * 8)] - - adr x26, 1f - add x26, x26, x25, lsl #2 - br x26 -1: - msr dbgwcr15_el1, x20 - msr dbgwcr14_el1, x19 - msr dbgwcr13_el1, x18 - msr dbgwcr12_el1, x17 - msr dbgwcr11_el1, x16 - msr dbgwcr10_el1, x15 - msr dbgwcr9_el1, x14 - msr dbgwcr8_el1, x13 - msr dbgwcr7_el1, x12 - msr dbgwcr6_el1, x11 - msr dbgwcr5_el1, x10 - msr dbgwcr4_el1, x9 - msr dbgwcr3_el1, x8 - msr dbgwcr2_el1, x7 - msr dbgwcr1_el1, x6 - msr dbgwcr0_el1, x5 - - add x3, x2, #CPU_SYSREG_OFFSET(DBGWVR0_EL1) - - adr x26, 1f - add x26, x26, x25, lsl #2 - br x26 -1: - ldr x20, [x3, #(15 * 8)] - ldr x19, [x3, #(14 * 8)] - ldr x18, [x3, #(13 * 8)] - ldr x17, [x3, #(12 * 8)] - ldr x16, [x3, #(11 * 8)] - ldr x15, [x3, #(10 * 8)] - ldr x14, [x3, #(9 * 8)] - ldr x13, [x3, #(8 * 8)] - ldr x12, [x3, #(7 * 8)] - ldr x11, [x3, #(6 * 8)] - ldr x10, [x3, #(5 * 8)] - ldr x9, [x3, #(4 * 8)] - ldr x8, [x3, #(3 * 8)] - ldr x7, [x3, #(2 * 8)] - ldr x6, [x3, #(1 * 8)] - ldr x5, [x3, #(0 * 8)] - - adr x26, 1f - add x26, x26, x25, lsl #2 - br x26 -1: - msr dbgwvr15_el1, x20 - msr dbgwvr14_el1, x19 - msr dbgwvr13_el1, x18 - msr dbgwvr12_el1, x17 - msr dbgwvr11_el1, x16 - msr dbgwvr10_el1, x15 - msr dbgwvr9_el1, x14 - msr dbgwvr8_el1, x13 - msr dbgwvr7_el1, x12 - msr dbgwvr6_el1, x11 - msr dbgwvr5_el1, x10 - msr dbgwvr4_el1, x9 - msr dbgwvr3_el1, x8 - msr dbgwvr2_el1, x7 - msr dbgwvr1_el1, x6 - msr dbgwvr0_el1, x5 - - ldr x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)] - msr mdccint_el1, x21 -.endm - -.macro skip_32bit_state tmp, target - // Skip 32bit state if not needed - mrs \tmp, hcr_el2 - tbnz \tmp, #HCR_RW_SHIFT, \target -.endm - -.macro skip_tee_state tmp, target - // Skip ThumbEE state if not needed - mrs \tmp, id_pfr0_el1 - tbz \tmp, #12, \target -.endm - -.macro skip_debug_state tmp, target - ldr \tmp, [x0, #VCPU_DEBUG_FLAGS] - tbz \tmp, #KVM_ARM64_DEBUG_DIRTY_SHIFT, \target -.endm - -.macro compute_debug_state target - // Compute debug state: If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY - // is set, we do a full save/restore cycle and disable trapping. - add x25, x0, #VCPU_CONTEXT - - // Check the state of MDSCR_EL1 - ldr x25, [x25, #CPU_SYSREG_OFFSET(MDSCR_EL1)] - and x26, x25, #DBG_MDSCR_KDE - and x25, x25, #DBG_MDSCR_MDE - adds xzr, x25, x26 - b.eq 9998f // Nothing to see there - - // If any interesting bits was set, we must set the flag - mov x26, #KVM_ARM64_DEBUG_DIRTY - str x26, [x0, #VCPU_DEBUG_FLAGS] - b 9999f // Don't skip restore - -9998: - // Otherwise load the flags from memory in case we recently - // trapped - skip_debug_state x25, \target -9999: -.endm - -.macro save_guest_32bit_state - skip_32bit_state x3, 1f - - add x3, x2, #CPU_SPSR_OFFSET(KVM_SPSR_ABT) - mrs x4, spsr_abt - mrs x5, spsr_und - mrs x6, spsr_irq - mrs x7, spsr_fiq - stp x4, x5, [x3] - stp x6, x7, [x3, #16] - - add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2) - mrs x4, dacr32_el2 - mrs x5, ifsr32_el2 - mrs x6, fpexc32_el2 - stp x4, x5, [x3] - str x6, [x3, #16] - - skip_debug_state x8, 2f - mrs x7, dbgvcr32_el2 - str x7, [x3, #24] -2: - skip_tee_state x8, 1f - - add x3, x2, #CPU_SYSREG_OFFSET(TEECR32_EL1) - mrs x4, teecr32_el1 - mrs x5, teehbr32_el1 - stp x4, x5, [x3] -1: -.endm - -.macro restore_guest_32bit_state - skip_32bit_state x3, 1f - - add x3, x2, #CPU_SPSR_OFFSET(KVM_SPSR_ABT) - ldp x4, x5, [x3] - ldp x6, x7, [x3, #16] - msr spsr_abt, x4 - msr spsr_und, x5 - msr spsr_irq, x6 - msr spsr_fiq, x7 - - add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2) - ldp x4, x5, [x3] - ldr x6, [x3, #16] - msr dacr32_el2, x4 - msr ifsr32_el2, x5 - msr fpexc32_el2, x6 - - skip_debug_state x8, 2f - ldr x7, [x3, #24] - msr dbgvcr32_el2, x7 -2: - skip_tee_state x8, 1f - - add x3, x2, #CPU_SYSREG_OFFSET(TEECR32_EL1) - ldp x4, x5, [x3] - msr teecr32_el1, x4 - msr teehbr32_el1, x5 -1: -.endm - -.macro activate_traps - ldr x2, [x0, #VCPU_HCR_EL2] - msr hcr_el2, x2 - mov x2, #CPTR_EL2_TTA - msr cptr_el2, x2 - - mov x2, #(1 << 15) // Trap CP15 Cr=15 - msr hstr_el2, x2 - - mrs x2, mdcr_el2 - and x2, x2, #MDCR_EL2_HPMN_MASK - orr x2, x2, #(MDCR_EL2_TPM | MDCR_EL2_TPMCR) - orr x2, x2, #(MDCR_EL2_TDRA | MDCR_EL2_TDOSA) - - // Check for KVM_ARM64_DEBUG_DIRTY, and set debug to trap - // if not dirty. - ldr x3, [x0, #VCPU_DEBUG_FLAGS] - tbnz x3, #KVM_ARM64_DEBUG_DIRTY_SHIFT, 1f - orr x2, x2, #MDCR_EL2_TDA -1: - msr mdcr_el2, x2 -.endm - -.macro deactivate_traps - mov x2, #HCR_RW - msr hcr_el2, x2 - msr cptr_el2, xzr - msr hstr_el2, xzr - - mrs x2, mdcr_el2 - and x2, x2, #MDCR_EL2_HPMN_MASK - msr mdcr_el2, x2 -.endm - -.macro activate_vm - ldr x1, [x0, #VCPU_KVM] - kern_hyp_va x1 - ldr x2, [x1, #KVM_VTTBR] - msr vttbr_el2, x2 -.endm - -.macro deactivate_vm - msr vttbr_el2, xzr -.endm - -/* - * Call into the vgic backend for state saving - */ -.macro save_vgic_state - adr x24, __vgic_sr_vectors - ldr x24, [x24, VGIC_SAVE_FN] - kern_hyp_va x24 - blr x24 - mrs x24, hcr_el2 - mov x25, #HCR_INT_OVERRIDE - neg x25, x25 - and x24, x24, x25 - msr hcr_el2, x24 -.endm - -/* - * Call into the vgic backend for state restoring - */ -.macro restore_vgic_state - mrs x24, hcr_el2 - ldr x25, [x0, #VCPU_IRQ_LINES] - orr x24, x24, #HCR_INT_OVERRIDE - orr x24, x24, x25 - msr hcr_el2, x24 - adr x24, __vgic_sr_vectors - ldr x24, [x24, #VGIC_RESTORE_FN] - kern_hyp_va x24 - blr x24 -.endm - -.macro save_timer_state - // x0: vcpu pointer - ldr x2, [x0, #VCPU_KVM] - kern_hyp_va x2 - ldr w3, [x2, #KVM_TIMER_ENABLED] - cbz w3, 1f - - mrs x3, cntv_ctl_el0 - and x3, x3, #3 - str w3, [x0, #VCPU_TIMER_CNTV_CTL] - - isb - - mrs x3, cntv_cval_el0 - str x3, [x0, #VCPU_TIMER_CNTV_CVAL] - -1: - // Disable the virtual timer - msr cntv_ctl_el0, xzr - - // Allow physical timer/counter access for the host - mrs x2, cnthctl_el2 - orr x2, x2, #3 - msr cnthctl_el2, x2 - - // Clear cntvoff for the host - msr cntvoff_el2, xzr -.endm - -.macro restore_timer_state - // x0: vcpu pointer - // Disallow physical timer access for the guest - // Physical counter access is allowed - mrs x2, cnthctl_el2 - orr x2, x2, #1 - bic x2, x2, #2 - msr cnthctl_el2, x2 - - ldr x2, [x0, #VCPU_KVM] - kern_hyp_va x2 - ldr w3, [x2, #KVM_TIMER_ENABLED] - cbz w3, 1f - - ldr x3, [x2, #KVM_TIMER_CNTVOFF] - msr cntvoff_el2, x3 - ldr x2, [x0, #VCPU_TIMER_CNTV_CVAL] - msr cntv_cval_el0, x2 - isb - - ldr w2, [x0, #VCPU_TIMER_CNTV_CTL] - and x2, x2, #3 - msr cntv_ctl_el0, x2 -1: -.endm - -__save_sysregs: - save_sysregs - ret - -__restore_sysregs: - restore_sysregs - ret - -__save_debug: - save_debug - ret - -__restore_debug: - restore_debug - ret - -__save_fpsimd: - save_fpsimd - ret - -__restore_fpsimd: - restore_fpsimd - ret - -/* - * u64 __kvm_vcpu_run(struct kvm_vcpu *vcpu); - * - * This is the world switch. The first half of the function - * deals with entering the guest, and anything from __kvm_vcpu_return - * to the end of the function deals with reentering the host. - * On the enter path, only x0 (vcpu pointer) must be preserved until - * the last moment. On the exit path, x0 (vcpu pointer) and x1 (exception - * code) must both be preserved until the epilogue. - * In both cases, x2 points to the CPU context we're saving/restoring from/to. - */ -ENTRY(__kvm_vcpu_run) - kern_hyp_va x0 - msr tpidr_el2, x0 // Save the vcpu register - - // Host context - ldr x2, [x0, #VCPU_HOST_CONTEXT] - kern_hyp_va x2 - - save_host_regs - bl __save_fpsimd - bl __save_sysregs - - compute_debug_state 1f - bl __save_debug -1: - activate_traps - activate_vm - - restore_vgic_state - restore_timer_state - - // Guest context - add x2, x0, #VCPU_CONTEXT - - // We must restore the 32-bit state before the sysregs, thanks - // to Cortex-A57 erratum #852523. - restore_guest_32bit_state - bl __restore_sysregs - bl __restore_fpsimd - - skip_debug_state x3, 1f - bl __restore_debug -1: - restore_guest_regs - - // That's it, no more messing around. - eret - -__kvm_vcpu_return: - // Assume x0 is the vcpu pointer, x1 the return code - // Guest's x0-x3 are on the stack - - // Guest context - add x2, x0, #VCPU_CONTEXT - - save_guest_regs - bl __save_fpsimd - bl __save_sysregs - - skip_debug_state x3, 1f - bl __save_debug -1: - save_guest_32bit_state - - save_timer_state - save_vgic_state - - deactivate_traps - deactivate_vm - - // Host context - ldr x2, [x0, #VCPU_HOST_CONTEXT] - kern_hyp_va x2 - - bl __restore_sysregs - bl __restore_fpsimd - - skip_debug_state x3, 1f - // Clear the dirty flag for the next run, as all the state has - // already been saved. Note that we nuke the whole 64bit word. - // If we ever add more flags, we'll have to be more careful... - str xzr, [x0, #VCPU_DEBUG_FLAGS] - bl __restore_debug -1: - restore_host_regs - - mov x0, x1 - ret -END(__kvm_vcpu_run) - -// void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); -ENTRY(__kvm_tlb_flush_vmid_ipa) - dsb ishst - - kern_hyp_va x0 - ldr x2, [x0, #KVM_VTTBR] - msr vttbr_el2, x2 - isb - - /* - * We could do so much better if we had the VA as well. - * Instead, we invalidate Stage-2 for this IPA, and the - * whole of Stage-1. Weep... - */ - lsr x1, x1, #12 - tlbi ipas2e1is, x1 - /* - * We have to ensure completion of the invalidation at Stage-2, - * since a table walk on another CPU could refill a TLB with a - * complete (S1 + S2) walk based on the old Stage-2 mapping if - * the Stage-1 invalidation happened first. - */ - dsb ish - tlbi vmalle1is - dsb ish - isb - - msr vttbr_el2, xzr - ret -ENDPROC(__kvm_tlb_flush_vmid_ipa) - -/** - * void __kvm_tlb_flush_vmid(struct kvm *kvm) - Flush per-VMID TLBs - * @struct kvm *kvm - pointer to kvm structure - * - * Invalidates all Stage 1 and 2 TLB entries for current VMID. - */ -ENTRY(__kvm_tlb_flush_vmid) - dsb ishst - - kern_hyp_va x0 - ldr x2, [x0, #KVM_VTTBR] - msr vttbr_el2, x2 - isb - - tlbi vmalls12e1is - dsb ish - isb - - msr vttbr_el2, xzr - ret -ENDPROC(__kvm_tlb_flush_vmid) - -ENTRY(__kvm_flush_vm_context) - dsb ishst - tlbi alle1is - ic ialluis - dsb ish - ret -ENDPROC(__kvm_flush_vm_context) - - // struct vgic_sr_vectors __vgi_sr_vectors; - .align 3 -ENTRY(__vgic_sr_vectors) - .skip VGIC_SR_VECTOR_SZ -ENDPROC(__vgic_sr_vectors) - -__kvm_hyp_panic: - // Guess the context by looking at VTTBR: - // If zero, then we're already a host. - // Otherwise restore a minimal host context before panicing. - mrs x0, vttbr_el2 - cbz x0, 1f - - mrs x0, tpidr_el2 - - deactivate_traps - deactivate_vm - - ldr x2, [x0, #VCPU_HOST_CONTEXT] - kern_hyp_va x2 - - bl __restore_sysregs - -1: adr x0, __hyp_panic_str - adr x1, 2f - ldp x2, x3, [x1] - sub x0, x0, x2 - add x0, x0, x3 - mrs x1, spsr_el2 - mrs x2, elr_el2 - mrs x3, esr_el2 - mrs x4, far_el2 - mrs x5, hpfar_el2 - mrs x6, par_el1 - mrs x7, tpidr_el2 - - mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\ - PSR_MODE_EL1h) - msr spsr_el2, lr - ldr lr, =panic - msr elr_el2, lr - eret - - .align 3 -2: .quad HYP_PAGE_OFFSET - .quad PAGE_OFFSET -ENDPROC(__kvm_hyp_panic) - -__hyp_panic_str: - .ascii "HYP panic:\nPS:%08x PC:%p ESR:%p\nFAR:%p HPFAR:%p PAR:%p\nVCPU:%p\n\0" - - .align 2 +#include <asm/cpufeature.h> /* * u64 kvm_call_hyp(void *hypfn, ...); @@ -1130,172 +33,24 @@ __hyp_panic_str: * passed as x0, x1, and x2 (a maximum of 3 arguments in addition to the * function pointer can be passed). The function being called must be mapped * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c). Return values are - * passed in r0 and r1. + * passed in x0. * - * A function pointer with a value of 0 has a special meaning, and is - * used to implement __hyp_get_vectors in the same way as in + * A function pointer with a value less than 0xfff has a special meaning, + * and is used to implement __hyp_get_vectors in the same way as in * arch/arm64/kernel/hyp_stub.S. + * HVC behaves as a 'bl' call and will clobber lr. */ ENTRY(kvm_call_hyp) +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN + str lr, [sp, #-16]! hvc #0 - ret + ldr lr, [sp], #16 + ret +alternative_else + b __vhe_hyp_call + nop + nop + nop +alternative_endif ENDPROC(kvm_call_hyp) -.macro invalid_vector label, target - .align 2 -\label: - b \target -ENDPROC(\label) -.endm - - /* None of these should ever happen */ - invalid_vector el2t_sync_invalid, __kvm_hyp_panic - invalid_vector el2t_irq_invalid, __kvm_hyp_panic - invalid_vector el2t_fiq_invalid, __kvm_hyp_panic - invalid_vector el2t_error_invalid, __kvm_hyp_panic - invalid_vector el2h_sync_invalid, __kvm_hyp_panic - invalid_vector el2h_irq_invalid, __kvm_hyp_panic - invalid_vector el2h_fiq_invalid, __kvm_hyp_panic - invalid_vector el2h_error_invalid, __kvm_hyp_panic - invalid_vector el1_sync_invalid, __kvm_hyp_panic - invalid_vector el1_irq_invalid, __kvm_hyp_panic - invalid_vector el1_fiq_invalid, __kvm_hyp_panic - invalid_vector el1_error_invalid, __kvm_hyp_panic - -el1_sync: // Guest trapped into EL2 - push x0, x1 - push x2, x3 - - mrs x1, esr_el2 - lsr x2, x1, #ESR_ELx_EC_SHIFT - - cmp x2, #ESR_ELx_EC_HVC64 - b.ne el1_trap - - mrs x3, vttbr_el2 // If vttbr is valid, the 64bit guest - cbnz x3, el1_trap // called HVC - - /* Here, we're pretty sure the host called HVC. */ - pop x2, x3 - pop x0, x1 - - /* Check for __hyp_get_vectors */ - cbnz x0, 1f - mrs x0, vbar_el2 - b 2f - -1: push lr, xzr - - /* - * Compute the function address in EL2, and shuffle the parameters. - */ - kern_hyp_va x0 - mov lr, x0 - mov x0, x1 - mov x1, x2 - mov x2, x3 - blr lr - - pop lr, xzr -2: eret - -el1_trap: - /* - * x1: ESR - * x2: ESR_EC - */ - cmp x2, #ESR_ELx_EC_DABT_LOW - mov x0, #ESR_ELx_EC_IABT_LOW - ccmp x2, x0, #4, ne - b.ne 1f // Not an abort we care about - - /* This is an abort. Check for permission fault */ - and x2, x1, #ESR_ELx_FSC_TYPE - cmp x2, #FSC_PERM - b.ne 1f // Not a permission fault - - /* - * Check for Stage-1 page table walk, which is guaranteed - * to give a valid HPFAR_EL2. - */ - tbnz x1, #7, 1f // S1PTW is set - - /* Preserve PAR_EL1 */ - mrs x3, par_el1 - push x3, xzr - - /* - * Permission fault, HPFAR_EL2 is invalid. - * Resolve the IPA the hard way using the guest VA. - * Stage-1 translation already validated the memory access rights. - * As such, we can use the EL1 translation regime, and don't have - * to distinguish between EL0 and EL1 access. - */ - mrs x2, far_el2 - at s1e1r, x2 - isb - - /* Read result */ - mrs x3, par_el1 - pop x0, xzr // Restore PAR_EL1 from the stack - msr par_el1, x0 - tbnz x3, #0, 3f // Bail out if we failed the translation - ubfx x3, x3, #12, #36 // Extract IPA - lsl x3, x3, #4 // and present it like HPFAR - b 2f - -1: mrs x3, hpfar_el2 - mrs x2, far_el2 - -2: mrs x0, tpidr_el2 - str w1, [x0, #VCPU_ESR_EL2] - str x2, [x0, #VCPU_FAR_EL2] - str x3, [x0, #VCPU_HPFAR_EL2] - - mov x1, #ARM_EXCEPTION_TRAP - b __kvm_vcpu_return - - /* - * Translation failed. Just return to the guest and - * let it fault again. Another CPU is probably playing - * behind our back. - */ -3: pop x2, x3 - pop x0, x1 - - eret - -el1_irq: - push x0, x1 - push x2, x3 - mrs x0, tpidr_el2 - mov x1, #ARM_EXCEPTION_IRQ - b __kvm_vcpu_return - - .ltorg - - .align 11 - -ENTRY(__kvm_hyp_vector) - ventry el2t_sync_invalid // Synchronous EL2t - ventry el2t_irq_invalid // IRQ EL2t - ventry el2t_fiq_invalid // FIQ EL2t - ventry el2t_error_invalid // Error EL2t - - ventry el2h_sync_invalid // Synchronous EL2h - ventry el2h_irq_invalid // IRQ EL2h - ventry el2h_fiq_invalid // FIQ EL2h - ventry el2h_error_invalid // Error EL2h - - ventry el1_sync // Synchronous 64-bit EL1 - ventry el1_irq // IRQ 64-bit EL1 - ventry el1_fiq_invalid // FIQ 64-bit EL1 - ventry el1_error_invalid // Error 64-bit EL1 - - ventry el1_sync // Synchronous 32-bit EL1 - ventry el1_irq // IRQ 32-bit EL1 - ventry el1_fiq_invalid // FIQ 32-bit EL1 - ventry el1_error_invalid // Error 32-bit EL1 -ENDPROC(__kvm_hyp_vector) - - .popsection diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile new file mode 100644 index 000000000000..b3834fae3f0b --- /dev/null +++ b/arch/arm64/kvm/hyp/Makefile @@ -0,0 +1,14 @@ +# +# Makefile for Kernel-based Virtual Machine module, HYP part +# + +obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-sr.o +obj-$(CONFIG_KVM_ARM_HOST) += timer-sr.o +obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o +obj-$(CONFIG_KVM_ARM_HOST) += debug-sr.o +obj-$(CONFIG_KVM_ARM_HOST) += entry.o +obj-$(CONFIG_KVM_ARM_HOST) += switch.o +obj-$(CONFIG_KVM_ARM_HOST) += fpsimd.o +obj-$(CONFIG_KVM_ARM_HOST) += tlb.o +obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o +obj-$(CONFIG_KVM_ARM_HOST) += vgic-v3-sr.o diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c new file mode 100644 index 000000000000..9b7b9a9800d1 --- /dev/null +++ b/arch/arm64/kvm/hyp/debug-sr.c @@ -0,0 +1,141 @@ +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/compiler.h> +#include <linux/kvm_host.h> + +#include <asm/kvm_mmu.h> +#include <asm/debug-monitors.h> + +#include "hyp.h" + +#define read_debug(r,n) read_sysreg(r##n##_el1) +#define write_debug(v,r,n) write_sysreg(v, r##n##_el1) + +#define save_debug(ptr,reg,nr) \ + switch (nr) { \ + case 15: ptr[15] = read_debug(reg, 15); \ + case 14: ptr[14] = read_debug(reg, 14); \ + case 13: ptr[13] = read_debug(reg, 13); \ + case 12: ptr[12] = read_debug(reg, 12); \ + case 11: ptr[11] = read_debug(reg, 11); \ + case 10: ptr[10] = read_debug(reg, 10); \ + case 9: ptr[9] = read_debug(reg, 9); \ + case 8: ptr[8] = read_debug(reg, 8); \ + case 7: ptr[7] = read_debug(reg, 7); \ + case 6: ptr[6] = read_debug(reg, 6); \ + case 5: ptr[5] = read_debug(reg, 5); \ + case 4: ptr[4] = read_debug(reg, 4); \ + case 3: ptr[3] = read_debug(reg, 3); \ + case 2: ptr[2] = read_debug(reg, 2); \ + case 1: ptr[1] = read_debug(reg, 1); \ + default: ptr[0] = read_debug(reg, 0); \ + } + +#define restore_debug(ptr,reg,nr) \ + switch (nr) { \ + case 15: write_debug(ptr[15], reg, 15); \ + case 14: write_debug(ptr[14], reg, 14); \ + case 13: write_debug(ptr[13], reg, 13); \ + case 12: write_debug(ptr[12], reg, 12); \ + case 11: write_debug(ptr[11], reg, 11); \ + case 10: write_debug(ptr[10], reg, 10); \ + case 9: write_debug(ptr[9], reg, 9); \ + case 8: write_debug(ptr[8], reg, 8); \ + case 7: write_debug(ptr[7], reg, 7); \ + case 6: write_debug(ptr[6], reg, 6); \ + case 5: write_debug(ptr[5], reg, 5); \ + case 4: write_debug(ptr[4], reg, 4); \ + case 3: write_debug(ptr[3], reg, 3); \ + case 2: write_debug(ptr[2], reg, 2); \ + case 1: write_debug(ptr[1], reg, 1); \ + default: write_debug(ptr[0], reg, 0); \ + } + +void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu, + struct kvm_guest_debug_arch *dbg, + struct kvm_cpu_context *ctxt) +{ + u64 aa64dfr0; + int brps, wrps; + + if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)) + return; + + aa64dfr0 = read_sysreg(id_aa64dfr0_el1); + brps = (aa64dfr0 >> 12) & 0xf; + wrps = (aa64dfr0 >> 20) & 0xf; + + save_debug(dbg->dbg_bcr, dbgbcr, brps); + save_debug(dbg->dbg_bvr, dbgbvr, brps); + save_debug(dbg->dbg_wcr, dbgwcr, wrps); + save_debug(dbg->dbg_wvr, dbgwvr, wrps); + + ctxt->sys_regs[MDCCINT_EL1] = read_sysreg(mdccint_el1); +} + +void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu, + struct kvm_guest_debug_arch *dbg, + struct kvm_cpu_context *ctxt) +{ + u64 aa64dfr0; + int brps, wrps; + + if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)) + return; + + aa64dfr0 = read_sysreg(id_aa64dfr0_el1); + + brps = (aa64dfr0 >> 12) & 0xf; + wrps = (aa64dfr0 >> 20) & 0xf; + + restore_debug(dbg->dbg_bcr, dbgbcr, brps); + restore_debug(dbg->dbg_bvr, dbgbvr, brps); + restore_debug(dbg->dbg_wcr, dbgwcr, wrps); + restore_debug(dbg->dbg_wvr, dbgwvr, wrps); + + write_sysreg(ctxt->sys_regs[MDCCINT_EL1], mdccint_el1); +} + +void __hyp_text __debug_cond_save_host_state(struct kvm_vcpu *vcpu) +{ + /* If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY is set, perform + * a full save/restore cycle. */ + if ((vcpu->arch.ctxt.sys_regs[MDSCR_EL1] & DBG_MDSCR_KDE) || + (vcpu->arch.ctxt.sys_regs[MDSCR_EL1] & DBG_MDSCR_MDE)) + vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; + + __debug_save_state(vcpu, &vcpu->arch.host_debug_state, + kern_hyp_va(vcpu->arch.host_cpu_context)); +} + +void __hyp_text __debug_cond_restore_host_state(struct kvm_vcpu *vcpu) +{ + __debug_restore_state(vcpu, &vcpu->arch.host_debug_state, + kern_hyp_va(vcpu->arch.host_cpu_context)); + + if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY) + vcpu->arch.debug_flags &= ~KVM_ARM64_DEBUG_DIRTY; +} + +u32 __hyp_text __debug_read_mdcr_el2(void) +{ + return read_sysreg(mdcr_el2); +} + +__alias(__debug_read_mdcr_el2) +u32 __weak __kvm_get_mdcr_el2(void); diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S new file mode 100644 index 000000000000..ac65c52d6275 --- /dev/null +++ b/arch/arm64/kvm/hyp/entry.S @@ -0,0 +1,180 @@ +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/linkage.h> + +#include <asm/asm-offsets.h> +#include <asm/assembler.h> +#include <asm/fpsimdmacros.h> +#include <asm/kvm.h> +#include <asm/kvm_arm.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_mmu.h> + +#define CPU_GP_REG_OFFSET(x) (CPU_GP_REGS + x) +#define CPU_XREG_OFFSET(x) CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x) +#define CPU_SYSREG_OFFSET(x) (CPU_SYSREGS + 8*x) + + .text + .pushsection .hyp.text, "ax" + +.macro save_callee_saved_regs ctxt + stp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)] + stp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)] + stp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)] + stp x25, x26, [\ctxt, #CPU_XREG_OFFSET(25)] + stp x27, x28, [\ctxt, #CPU_XREG_OFFSET(27)] + stp x29, lr, [\ctxt, #CPU_XREG_OFFSET(29)] +.endm + +.macro restore_callee_saved_regs ctxt + ldp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)] + ldp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)] + ldp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)] + ldp x25, x26, [\ctxt, #CPU_XREG_OFFSET(25)] + ldp x27, x28, [\ctxt, #CPU_XREG_OFFSET(27)] + ldp x29, lr, [\ctxt, #CPU_XREG_OFFSET(29)] +.endm + +/* + * u64 __guest_enter(struct kvm_vcpu *vcpu, + * struct kvm_cpu_context *host_ctxt); + */ +ENTRY(__guest_enter) + // x0: vcpu + // x1: host/guest context + // x2-x18: clobbered by macros + + // Store the host regs + save_callee_saved_regs x1 + + // Preserve vcpu & host_ctxt for use at exit time + stp x0, x1, [sp, #-16]! + + add x1, x0, #VCPU_CONTEXT + + // Prepare x0-x1 for later restore by pushing them onto the stack + ldp x2, x3, [x1, #CPU_XREG_OFFSET(0)] + stp x2, x3, [sp, #-16]! + + // x2-x18 + ldp x2, x3, [x1, #CPU_XREG_OFFSET(2)] + ldp x4, x5, [x1, #CPU_XREG_OFFSET(4)] + ldp x6, x7, [x1, #CPU_XREG_OFFSET(6)] + ldp x8, x9, [x1, #CPU_XREG_OFFSET(8)] + ldp x10, x11, [x1, #CPU_XREG_OFFSET(10)] + ldp x12, x13, [x1, #CPU_XREG_OFFSET(12)] + ldp x14, x15, [x1, #CPU_XREG_OFFSET(14)] + ldp x16, x17, [x1, #CPU_XREG_OFFSET(16)] + ldr x18, [x1, #CPU_XREG_OFFSET(18)] + + // x19-x29, lr + restore_callee_saved_regs x1 + + // Last bits of the 64bit state + ldp x0, x1, [sp], #16 + + // Do not touch any register after this! + eret +ENDPROC(__guest_enter) + +ENTRY(__guest_exit) + // x0: vcpu + // x1: return code + // x2-x3: free + // x4-x29,lr: vcpu regs + // vcpu x0-x3 on the stack + + add x2, x0, #VCPU_CONTEXT + + stp x4, x5, [x2, #CPU_XREG_OFFSET(4)] + stp x6, x7, [x2, #CPU_XREG_OFFSET(6)] + stp x8, x9, [x2, #CPU_XREG_OFFSET(8)] + stp x10, x11, [x2, #CPU_XREG_OFFSET(10)] + stp x12, x13, [x2, #CPU_XREG_OFFSET(12)] + stp x14, x15, [x2, #CPU_XREG_OFFSET(14)] + stp x16, x17, [x2, #CPU_XREG_OFFSET(16)] + str x18, [x2, #CPU_XREG_OFFSET(18)] + + ldp x6, x7, [sp], #16 // x2, x3 + ldp x4, x5, [sp], #16 // x0, x1 + + stp x4, x5, [x2, #CPU_XREG_OFFSET(0)] + stp x6, x7, [x2, #CPU_XREG_OFFSET(2)] + + save_callee_saved_regs x2 + + // Restore vcpu & host_ctxt from the stack + // (preserving return code in x1) + ldp x0, x2, [sp], #16 + // Now restore the host regs + restore_callee_saved_regs x2 + + mov x0, x1 + ret +ENDPROC(__guest_exit) + +ENTRY(__fpsimd_guest_restore) + stp x4, lr, [sp, #-16]! + + mrs x2, cptr_el2 + bic x2, x2, #CPTR_EL2_TFP + msr cptr_el2, x2 + isb + + mrs x3, tpidr_el2 + + ldr x0, [x3, #VCPU_HOST_CONTEXT] + kern_hyp_va x0 + add x0, x0, #CPU_GP_REG_OFFSET(CPU_FP_REGS) + bl __fpsimd_save_state + + add x2, x3, #VCPU_CONTEXT + add x0, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS) + bl __fpsimd_restore_state + + // Skip restoring fpexc32 for AArch64 guests + mrs x1, hcr_el2 + tbnz x1, #HCR_RW_SHIFT, 1f + ldr x4, [x2, #CPU_SYSREG_OFFSET(FPEXC32_EL2)] + msr fpexc32_el2, x4 +1: + ldp x4, lr, [sp], #16 + ldp x2, x3, [sp], #16 + ldp x0, x1, [sp], #16 + + eret +ENDPROC(__fpsimd_guest_restore) + +/* + * When using the extended idmap, we don't have a trampoline page we can use + * while we switch pages tables during __kvm_hyp_reset. Accessing the idmap + * directly would be ideal, but if we're using the extended idmap then the + * idmap is located above HYP_PAGE_OFFSET, and the address will be masked by + * kvm_call_hyp using kern_hyp_va. + * + * x0: HYP boot pgd + * x1: HYP phys_idmap_start + */ +ENTRY(__extended_idmap_trampoline) + mov x4, x1 + adr_l x3, __kvm_hyp_reset + + /* insert __kvm_hyp_reset()s offset into phys_idmap_start */ + bfi x4, x3, #0, #PAGE_SHIFT + br x4 +ENDPROC(__extended_idmap_trampoline) diff --git a/arch/arm64/kernel/psci-call.S b/arch/arm64/kvm/hyp/fpsimd.S index cf83e61cd3b5..da3f22c7f14a 100644 --- a/arch/arm64/kernel/psci-call.S +++ b/arch/arm64/kvm/hyp/fpsimd.S @@ -1,4 +1,7 @@ /* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. @@ -8,21 +11,23 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * - * Copyright (C) 2015 ARM Limited - * - * Author: Will Deacon <will.deacon@arm.com> + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include <linux/linkage.h> -/* int __invoke_psci_fn_hvc(u64 function_id, u64 arg0, u64 arg1, u64 arg2) */ -ENTRY(__invoke_psci_fn_hvc) - hvc #0 +#include <asm/fpsimdmacros.h> + + .text + .pushsection .hyp.text, "ax" + +ENTRY(__fpsimd_save_state) + fpsimd_save x0, 1 ret -ENDPROC(__invoke_psci_fn_hvc) +ENDPROC(__fpsimd_save_state) -/* int __invoke_psci_fn_smc(u64 function_id, u64 arg0, u64 arg1, u64 arg2) */ -ENTRY(__invoke_psci_fn_smc) - smc #0 +ENTRY(__fpsimd_restore_state) + fpsimd_restore x0, 1 ret -ENDPROC(__invoke_psci_fn_smc) +ENDPROC(__fpsimd_restore_state) diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S new file mode 100644 index 000000000000..af82312bc6e1 --- /dev/null +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -0,0 +1,235 @@ +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/linkage.h> + +#include <asm/alternative.h> +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/cpufeature.h> +#include <asm/kvm_arm.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_mmu.h> + + .text + .pushsection .hyp.text, "ax" + +.macro save_x0_to_x3 + stp x0, x1, [sp, #-16]! + stp x2, x3, [sp, #-16]! +.endm + +.macro restore_x0_to_x3 + ldp x2, x3, [sp], #16 + ldp x0, x1, [sp], #16 +.endm + +.macro do_el2_call + /* + * Shuffle the parameters before calling the function + * pointed to in x0. Assumes parameters in x[1,2,3]. + */ + mov lr, x0 + mov x0, x1 + mov x1, x2 + mov x2, x3 + blr lr +.endm + +ENTRY(__vhe_hyp_call) + str lr, [sp, #-16]! + do_el2_call + ldr lr, [sp], #16 + /* + * We used to rely on having an exception return to get + * an implicit isb. In the E2H case, we don't have it anymore. + * rather than changing all the leaf functions, just do it here + * before returning to the rest of the kernel. + */ + isb + ret +ENDPROC(__vhe_hyp_call) + +el1_sync: // Guest trapped into EL2 + save_x0_to_x3 + + mrs x1, esr_el2 + lsr x2, x1, #ESR_ELx_EC_SHIFT + + cmp x2, #ESR_ELx_EC_HVC64 + b.ne el1_trap + + mrs x3, vttbr_el2 // If vttbr is valid, the 64bit guest + cbnz x3, el1_trap // called HVC + + /* Here, we're pretty sure the host called HVC. */ + restore_x0_to_x3 + + cmp x0, #HVC_GET_VECTORS + b.ne 1f + mrs x0, vbar_el2 + b 2f + +1: + /* + * Perform the EL2 call + */ + kern_hyp_va x0 + do_el2_call + +2: eret + +el1_trap: + /* + * x1: ESR + * x2: ESR_EC + */ + + /* Guest accessed VFP/SIMD registers, save host, restore Guest */ + cmp x2, #ESR_ELx_EC_FP_ASIMD + b.eq __fpsimd_guest_restore + + cmp x2, #ESR_ELx_EC_DABT_LOW + mov x0, #ESR_ELx_EC_IABT_LOW + ccmp x2, x0, #4, ne + b.ne 1f // Not an abort we care about + + /* This is an abort. Check for permission fault */ +alternative_if_not ARM64_WORKAROUND_834220 + and x2, x1, #ESR_ELx_FSC_TYPE + cmp x2, #FSC_PERM + b.ne 1f // Not a permission fault +alternative_else + nop // Use the permission fault path to + nop // check for a valid S1 translation, + nop // regardless of the ESR value. +alternative_endif + + /* + * Check for Stage-1 page table walk, which is guaranteed + * to give a valid HPFAR_EL2. + */ + tbnz x1, #7, 1f // S1PTW is set + + /* Preserve PAR_EL1 */ + mrs x3, par_el1 + stp x3, xzr, [sp, #-16]! + + /* + * Permission fault, HPFAR_EL2 is invalid. + * Resolve the IPA the hard way using the guest VA. + * Stage-1 translation already validated the memory access rights. + * As such, we can use the EL1 translation regime, and don't have + * to distinguish between EL0 and EL1 access. + */ + mrs x2, far_el2 + at s1e1r, x2 + isb + + /* Read result */ + mrs x3, par_el1 + ldp x0, xzr, [sp], #16 // Restore PAR_EL1 from the stack + msr par_el1, x0 + tbnz x3, #0, 3f // Bail out if we failed the translation + ubfx x3, x3, #12, #36 // Extract IPA + lsl x3, x3, #4 // and present it like HPFAR + b 2f + +1: mrs x3, hpfar_el2 + mrs x2, far_el2 + +2: mrs x0, tpidr_el2 + str w1, [x0, #VCPU_ESR_EL2] + str x2, [x0, #VCPU_FAR_EL2] + str x3, [x0, #VCPU_HPFAR_EL2] + + mov x1, #ARM_EXCEPTION_TRAP + b __guest_exit + + /* + * Translation failed. Just return to the guest and + * let it fault again. Another CPU is probably playing + * behind our back. + */ +3: restore_x0_to_x3 + + eret + +el1_irq: + save_x0_to_x3 + mrs x0, tpidr_el2 + mov x1, #ARM_EXCEPTION_IRQ + b __guest_exit + +ENTRY(__hyp_do_panic) + mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\ + PSR_MODE_EL1h) + msr spsr_el2, lr + ldr lr, =panic + msr elr_el2, lr + eret +ENDPROC(__hyp_do_panic) + +.macro invalid_vector label, target = __hyp_panic + .align 2 +\label: + b \target +ENDPROC(\label) +.endm + + /* None of these should ever happen */ + invalid_vector el2t_sync_invalid + invalid_vector el2t_irq_invalid + invalid_vector el2t_fiq_invalid + invalid_vector el2t_error_invalid + invalid_vector el2h_sync_invalid + invalid_vector el2h_irq_invalid + invalid_vector el2h_fiq_invalid + invalid_vector el2h_error_invalid + invalid_vector el1_sync_invalid + invalid_vector el1_irq_invalid + invalid_vector el1_fiq_invalid + invalid_vector el1_error_invalid + + .ltorg + + .align 11 + + .weak __kvm_hyp_vector +ENTRY(__kvm_hyp_vector) +ENTRY(__hyp_vector) + ventry el2t_sync_invalid // Synchronous EL2t + ventry el2t_irq_invalid // IRQ EL2t + ventry el2t_fiq_invalid // FIQ EL2t + ventry el2t_error_invalid // Error EL2t + + ventry el2h_sync_invalid // Synchronous EL2h + ventry el2h_irq_invalid // IRQ EL2h + ventry el2h_fiq_invalid // FIQ EL2h + ventry el2h_error_invalid // Error EL2h + + ventry el1_sync // Synchronous 64-bit EL1 + ventry el1_irq // IRQ 64-bit EL1 + ventry el1_fiq_invalid // FIQ 64-bit EL1 + ventry el1_error_invalid // Error 64-bit EL1 + + ventry el1_sync // Synchronous 32-bit EL1 + ventry el1_irq // IRQ 32-bit EL1 + ventry el1_fiq_invalid // FIQ 32-bit EL1 + ventry el1_error_invalid // Error 32-bit EL1 +ENDPROC(__hyp_vector) +ENDPROC(__kvm_hyp_vector) diff --git a/arch/arm64/kvm/hyp/hyp.h b/arch/arm64/kvm/hyp/hyp.h new file mode 100644 index 000000000000..e7986bb4b1e9 --- /dev/null +++ b/arch/arm64/kvm/hyp/hyp.h @@ -0,0 +1,89 @@ +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __ARM64_KVM_HYP_H__ +#define __ARM64_KVM_HYP_H__ + +#include <linux/compiler.h> +#include <linux/kvm_host.h> +#include <asm/kvm_mmu.h> +#include <asm/sysreg.h> + +#define __hyp_text __section(.hyp.text) notrace + +#define kern_hyp_va(v) (typeof(v))((unsigned long)(v) & HYP_PAGE_OFFSET_MASK) +#define hyp_kern_va(v) (typeof(v))((unsigned long)(v) - HYP_PAGE_OFFSET \ + + PAGE_OFFSET) + +/** + * hyp_alternate_select - Generates patchable code sequences that are + * used to switch between two implementations of a function, depending + * on the availability of a feature. + * + * @fname: a symbol name that will be defined as a function returning a + * function pointer whose type will match @orig and @alt + * @orig: A pointer to the default function, as returned by @fname when + * @cond doesn't hold + * @alt: A pointer to the alternate function, as returned by @fname + * when @cond holds + * @cond: a CPU feature (as described in asm/cpufeature.h) + */ +#define hyp_alternate_select(fname, orig, alt, cond) \ +typeof(orig) * __hyp_text fname(void) \ +{ \ + typeof(alt) *val = orig; \ + asm volatile(ALTERNATIVE("nop \n", \ + "mov %0, %1 \n", \ + cond) \ + : "+r" (val) : "r" (alt)); \ + return val; \ +} + +void __vgic_v2_save_state(struct kvm_vcpu *vcpu); +void __vgic_v2_restore_state(struct kvm_vcpu *vcpu); +void __vgic_v3_save_state(struct kvm_vcpu *vcpu); +void __vgic_v3_restore_state(struct kvm_vcpu *vcpu); + +void __timer_save_state(struct kvm_vcpu *vcpu); +void __timer_restore_state(struct kvm_vcpu *vcpu); + +void __sysreg_save_state(struct kvm_cpu_context *ctxt); +void __sysreg_restore_state(struct kvm_cpu_context *ctxt); +void __sysreg32_save_state(struct kvm_vcpu *vcpu); +void __sysreg32_restore_state(struct kvm_vcpu *vcpu); + +void __debug_save_state(struct kvm_vcpu *vcpu, + struct kvm_guest_debug_arch *dbg, + struct kvm_cpu_context *ctxt); +void __debug_restore_state(struct kvm_vcpu *vcpu, + struct kvm_guest_debug_arch *dbg, + struct kvm_cpu_context *ctxt); +void __debug_cond_save_host_state(struct kvm_vcpu *vcpu); +void __debug_cond_restore_host_state(struct kvm_vcpu *vcpu); + +void __fpsimd_save_state(struct user_fpsimd_state *fp_regs); +void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs); +static inline bool __fpsimd_enabled(void) +{ + return !(read_sysreg(cptr_el2) & CPTR_EL2_TFP); +} + +u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt); +void __noreturn __hyp_do_panic(unsigned long, ...); + +#endif /* __ARM64_KVM_HYP_H__ */ + diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c new file mode 100644 index 000000000000..7457ae4db86e --- /dev/null +++ b/arch/arm64/kvm/hyp/switch.c @@ -0,0 +1,176 @@ +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "hyp.h" + +static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) +{ + u64 val; + + /* + * We are about to set CPTR_EL2.TFP to trap all floating point + * register accesses to EL2, however, the ARM ARM clearly states that + * traps are only taken to EL2 if the operation would not otherwise + * trap to EL1. Therefore, always make sure that for 32-bit guests, + * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit. + */ + val = vcpu->arch.hcr_el2; + if (!(val & HCR_RW)) { + write_sysreg(1 << 30, fpexc32_el2); + isb(); + } + write_sysreg(val, hcr_el2); + /* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */ + write_sysreg(1 << 15, hstr_el2); + write_sysreg(CPTR_EL2_TTA | CPTR_EL2_TFP, cptr_el2); + write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); +} + +static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu) +{ + write_sysreg(HCR_RW, hcr_el2); + write_sysreg(0, hstr_el2); + write_sysreg(read_sysreg(mdcr_el2) & MDCR_EL2_HPMN_MASK, mdcr_el2); + write_sysreg(0, cptr_el2); +} + +static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = kern_hyp_va(vcpu->kvm); + write_sysreg(kvm->arch.vttbr, vttbr_el2); +} + +static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu) +{ + write_sysreg(0, vttbr_el2); +} + +static hyp_alternate_select(__vgic_call_save_state, + __vgic_v2_save_state, __vgic_v3_save_state, + ARM64_HAS_SYSREG_GIC_CPUIF); + +static hyp_alternate_select(__vgic_call_restore_state, + __vgic_v2_restore_state, __vgic_v3_restore_state, + ARM64_HAS_SYSREG_GIC_CPUIF); + +static void __hyp_text __vgic_save_state(struct kvm_vcpu *vcpu) +{ + __vgic_call_save_state()(vcpu); + write_sysreg(read_sysreg(hcr_el2) & ~HCR_INT_OVERRIDE, hcr_el2); +} + +static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu) +{ + u64 val; + + val = read_sysreg(hcr_el2); + val |= HCR_INT_OVERRIDE; + val |= vcpu->arch.irq_lines; + write_sysreg(val, hcr_el2); + + __vgic_call_restore_state()(vcpu); +} + +int __hyp_text __guest_run(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *host_ctxt; + struct kvm_cpu_context *guest_ctxt; + bool fp_enabled; + u64 exit_code; + + vcpu = kern_hyp_va(vcpu); + write_sysreg(vcpu, tpidr_el2); + + host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); + guest_ctxt = &vcpu->arch.ctxt; + + __sysreg_save_state(host_ctxt); + __debug_cond_save_host_state(vcpu); + + __activate_traps(vcpu); + __activate_vm(vcpu); + + __vgic_restore_state(vcpu); + __timer_restore_state(vcpu); + + /* + * We must restore the 32-bit state before the sysregs, thanks + * to Cortex-A57 erratum #852523. + */ + __sysreg32_restore_state(vcpu); + __sysreg_restore_state(guest_ctxt); + __debug_restore_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt); + + /* Jump in the fire! */ + exit_code = __guest_enter(vcpu, host_ctxt); + /* And we're baaack! */ + + fp_enabled = __fpsimd_enabled(); + + __sysreg_save_state(guest_ctxt); + __sysreg32_save_state(vcpu); + __timer_save_state(vcpu); + __vgic_save_state(vcpu); + + __deactivate_traps(vcpu); + __deactivate_vm(vcpu); + + __sysreg_restore_state(host_ctxt); + + if (fp_enabled) { + __fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs); + __fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs); + } + + __debug_save_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt); + __debug_cond_restore_host_state(vcpu); + + return exit_code; +} + +__alias(__guest_run) +int __weak __kvm_vcpu_run(struct kvm_vcpu *vcpu); + +static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n"; + +void __hyp_text __noreturn __hyp_panic(void) +{ + unsigned long str_va = (unsigned long)__hyp_panic_string; + u64 spsr = read_sysreg(spsr_el2); + u64 elr = read_sysreg(elr_el2); + u64 par = read_sysreg(par_el1); + + if (read_sysreg(vttbr_el2)) { + struct kvm_vcpu *vcpu; + struct kvm_cpu_context *host_ctxt; + + vcpu = (struct kvm_vcpu *)read_sysreg(tpidr_el2); + host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); + __deactivate_traps(vcpu); + __deactivate_vm(vcpu); + __sysreg_restore_state(host_ctxt); + } + + /* Call panic for real */ + __hyp_do_panic(hyp_kern_va(str_va), + spsr, elr, + read_sysreg(esr_el2), read_sysreg(far_el2), + read_sysreg(hpfar_el2), par, + (void *)read_sysreg(tpidr_el2)); + + unreachable(); +} diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c new file mode 100644 index 000000000000..36035417ec52 --- /dev/null +++ b/arch/arm64/kvm/hyp/sysreg-sr.c @@ -0,0 +1,137 @@ +/* + * Copyright (C) 2012-2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/compiler.h> +#include <linux/kvm_host.h> + +#include <asm/kvm_mmu.h> + +#include "hyp.h" + +/* ctxt is already in the HYP VA space */ +void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt) +{ + ctxt->sys_regs[MPIDR_EL1] = read_sysreg(vmpidr_el2); + ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1); + ctxt->sys_regs[SCTLR_EL1] = read_sysreg(sctlr_el1); + ctxt->sys_regs[ACTLR_EL1] = read_sysreg(actlr_el1); + ctxt->sys_regs[CPACR_EL1] = read_sysreg(cpacr_el1); + ctxt->sys_regs[TTBR0_EL1] = read_sysreg(ttbr0_el1); + ctxt->sys_regs[TTBR1_EL1] = read_sysreg(ttbr1_el1); + ctxt->sys_regs[TCR_EL1] = read_sysreg(tcr_el1); + ctxt->sys_regs[ESR_EL1] = read_sysreg(esr_el1); + ctxt->sys_regs[AFSR0_EL1] = read_sysreg(afsr0_el1); + ctxt->sys_regs[AFSR1_EL1] = read_sysreg(afsr1_el1); + ctxt->sys_regs[FAR_EL1] = read_sysreg(far_el1); + ctxt->sys_regs[MAIR_EL1] = read_sysreg(mair_el1); + ctxt->sys_regs[VBAR_EL1] = read_sysreg(vbar_el1); + ctxt->sys_regs[CONTEXTIDR_EL1] = read_sysreg(contextidr_el1); + ctxt->sys_regs[TPIDR_EL0] = read_sysreg(tpidr_el0); + ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0); + ctxt->sys_regs[TPIDR_EL1] = read_sysreg(tpidr_el1); + ctxt->sys_regs[AMAIR_EL1] = read_sysreg(amair_el1); + ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg(cntkctl_el1); + ctxt->sys_regs[PAR_EL1] = read_sysreg(par_el1); + ctxt->sys_regs[MDSCR_EL1] = read_sysreg(mdscr_el1); + + ctxt->gp_regs.regs.sp = read_sysreg(sp_el0); + ctxt->gp_regs.regs.pc = read_sysreg(elr_el2); + ctxt->gp_regs.regs.pstate = read_sysreg(spsr_el2); + ctxt->gp_regs.sp_el1 = read_sysreg(sp_el1); + ctxt->gp_regs.elr_el1 = read_sysreg(elr_el1); + ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg(spsr_el1); +} + +void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt) +{ + write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2); + write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1); + write_sysreg(ctxt->sys_regs[SCTLR_EL1], sctlr_el1); + write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1); + write_sysreg(ctxt->sys_regs[CPACR_EL1], cpacr_el1); + write_sysreg(ctxt->sys_regs[TTBR0_EL1], ttbr0_el1); + write_sysreg(ctxt->sys_regs[TTBR1_EL1], ttbr1_el1); + write_sysreg(ctxt->sys_regs[TCR_EL1], tcr_el1); + write_sysreg(ctxt->sys_regs[ESR_EL1], esr_el1); + write_sysreg(ctxt->sys_regs[AFSR0_EL1], afsr0_el1); + write_sysreg(ctxt->sys_regs[AFSR1_EL1], afsr1_el1); + write_sysreg(ctxt->sys_regs[FAR_EL1], far_el1); + write_sysreg(ctxt->sys_regs[MAIR_EL1], mair_el1); + write_sysreg(ctxt->sys_regs[VBAR_EL1], vbar_el1); + write_sysreg(ctxt->sys_regs[CONTEXTIDR_EL1], contextidr_el1); + write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0); + write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0); + write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1); + write_sysreg(ctxt->sys_regs[AMAIR_EL1], amair_el1); + write_sysreg(ctxt->sys_regs[CNTKCTL_EL1], cntkctl_el1); + write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1); + write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1); + + write_sysreg(ctxt->gp_regs.regs.sp, sp_el0); + write_sysreg(ctxt->gp_regs.regs.pc, elr_el2); + write_sysreg(ctxt->gp_regs.regs.pstate, spsr_el2); + write_sysreg(ctxt->gp_regs.sp_el1, sp_el1); + write_sysreg(ctxt->gp_regs.elr_el1, elr_el1); + write_sysreg(ctxt->gp_regs.spsr[KVM_SPSR_EL1], spsr_el1); +} + +void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu) +{ + u64 *spsr, *sysreg; + + if (read_sysreg(hcr_el2) & HCR_RW) + return; + + spsr = vcpu->arch.ctxt.gp_regs.spsr; + sysreg = vcpu->arch.ctxt.sys_regs; + + spsr[KVM_SPSR_ABT] = read_sysreg(spsr_abt); + spsr[KVM_SPSR_UND] = read_sysreg(spsr_und); + spsr[KVM_SPSR_IRQ] = read_sysreg(spsr_irq); + spsr[KVM_SPSR_FIQ] = read_sysreg(spsr_fiq); + + sysreg[DACR32_EL2] = read_sysreg(dacr32_el2); + sysreg[IFSR32_EL2] = read_sysreg(ifsr32_el2); + + if (__fpsimd_enabled()) + sysreg[FPEXC32_EL2] = read_sysreg(fpexc32_el2); + + if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY) + sysreg[DBGVCR32_EL2] = read_sysreg(dbgvcr32_el2); +} + +void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu) +{ + u64 *spsr, *sysreg; + + if (read_sysreg(hcr_el2) & HCR_RW) + return; + + spsr = vcpu->arch.ctxt.gp_regs.spsr; + sysreg = vcpu->arch.ctxt.sys_regs; + + write_sysreg(spsr[KVM_SPSR_ABT], spsr_abt); + write_sysreg(spsr[KVM_SPSR_UND], spsr_und); + write_sysreg(spsr[KVM_SPSR_IRQ], spsr_irq); + write_sysreg(spsr[KVM_SPSR_FIQ], spsr_fiq); + + write_sysreg(sysreg[DACR32_EL2], dacr32_el2); + write_sysreg(sysreg[IFSR32_EL2], ifsr32_el2); + + if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY) + write_sysreg(sysreg[DBGVCR32_EL2], dbgvcr32_el2); +} diff --git a/arch/arm64/kvm/hyp/timer-sr.c b/arch/arm64/kvm/hyp/timer-sr.c new file mode 100644 index 000000000000..1051e5d7320f --- /dev/null +++ b/arch/arm64/kvm/hyp/timer-sr.c @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2012-2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <clocksource/arm_arch_timer.h> +#include <linux/compiler.h> +#include <linux/kvm_host.h> + +#include <asm/kvm_mmu.h> + +#include "hyp.h" + +/* vcpu is already in the HYP VA space */ +void __hyp_text __timer_save_state(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = kern_hyp_va(vcpu->kvm); + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + u64 val; + + if (kvm->arch.timer.enabled) { + timer->cntv_ctl = read_sysreg(cntv_ctl_el0); + timer->cntv_cval = read_sysreg(cntv_cval_el0); + } + + /* Disable the virtual timer */ + write_sysreg(0, cntv_ctl_el0); + + /* Allow physical timer/counter access for the host */ + val = read_sysreg(cnthctl_el2); + val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN; + write_sysreg(val, cnthctl_el2); + + /* Clear cntvoff for the host */ + write_sysreg(0, cntvoff_el2); +} + +void __hyp_text __timer_restore_state(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = kern_hyp_va(vcpu->kvm); + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + u64 val; + + /* + * Disallow physical timer access for the guest + * Physical counter access is allowed + */ + val = read_sysreg(cnthctl_el2); + val &= ~CNTHCTL_EL1PCEN; + val |= CNTHCTL_EL1PCTEN; + write_sysreg(val, cnthctl_el2); + + if (kvm->arch.timer.enabled) { + write_sysreg(kvm->arch.timer.cntvoff, cntvoff_el2); + write_sysreg(timer->cntv_cval, cntv_cval_el0); + isb(); + write_sysreg(timer->cntv_ctl, cntv_ctl_el0); + } +} diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c new file mode 100644 index 000000000000..5f815cf53a9a --- /dev/null +++ b/arch/arm64/kvm/hyp/tlb.c @@ -0,0 +1,82 @@ +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "hyp.h" + +void __hyp_text __tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) +{ + dsb(ishst); + + /* Switch to requested VMID */ + kvm = kern_hyp_va(kvm); + write_sysreg(kvm->arch.vttbr, vttbr_el2); + isb(); + + /* + * We could do so much better if we had the VA as well. + * Instead, we invalidate Stage-2 for this IPA, and the + * whole of Stage-1. Weep... + */ + ipa >>= 12; + asm volatile("tlbi ipas2e1is, %0" : : "r" (ipa)); + + /* + * We have to ensure completion of the invalidation at Stage-2, + * since a table walk on another CPU could refill a TLB with a + * complete (S1 + S2) walk based on the old Stage-2 mapping if + * the Stage-1 invalidation happened first. + */ + dsb(ish); + asm volatile("tlbi vmalle1is" : : ); + dsb(ish); + isb(); + + write_sysreg(0, vttbr_el2); +} + +__alias(__tlb_flush_vmid_ipa) +void __weak __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); + +void __hyp_text __tlb_flush_vmid(struct kvm *kvm) +{ + dsb(ishst); + + /* Switch to requested VMID */ + kvm = kern_hyp_va(kvm); + write_sysreg(kvm->arch.vttbr, vttbr_el2); + isb(); + + asm volatile("tlbi vmalls12e1is" : : ); + dsb(ish); + isb(); + + write_sysreg(0, vttbr_el2); +} + +__alias(__tlb_flush_vmid) +void __weak __kvm_tlb_flush_vmid(struct kvm *kvm); + +void __hyp_text __tlb_flush_vm_context(void) +{ + dsb(ishst); + asm volatile("tlbi alle1is \n" + "ic ialluis ": : ); + dsb(ish); +} + +__alias(__tlb_flush_vm_context) +void __weak __kvm_flush_vm_context(void); diff --git a/arch/arm64/kvm/hyp/vgic-v2-sr.c b/arch/arm64/kvm/hyp/vgic-v2-sr.c new file mode 100644 index 000000000000..e71761238cfc --- /dev/null +++ b/arch/arm64/kvm/hyp/vgic-v2-sr.c @@ -0,0 +1,84 @@ +/* + * Copyright (C) 2012-2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/compiler.h> +#include <linux/irqchip/arm-gic.h> +#include <linux/kvm_host.h> + +#include <asm/kvm_mmu.h> + +#include "hyp.h" + +/* vcpu is already in the HYP VA space */ +void __hyp_text __vgic_v2_save_state(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = kern_hyp_va(vcpu->kvm); + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; + struct vgic_dist *vgic = &kvm->arch.vgic; + void __iomem *base = kern_hyp_va(vgic->vctrl_base); + u32 eisr0, eisr1, elrsr0, elrsr1; + int i, nr_lr; + + if (!base) + return; + + nr_lr = vcpu->arch.vgic_cpu.nr_lr; + cpu_if->vgic_vmcr = readl_relaxed(base + GICH_VMCR); + cpu_if->vgic_misr = readl_relaxed(base + GICH_MISR); + eisr0 = readl_relaxed(base + GICH_EISR0); + elrsr0 = readl_relaxed(base + GICH_ELRSR0); + if (unlikely(nr_lr > 32)) { + eisr1 = readl_relaxed(base + GICH_EISR1); + elrsr1 = readl_relaxed(base + GICH_ELRSR1); + } else { + eisr1 = elrsr1 = 0; + } +#ifdef CONFIG_CPU_BIG_ENDIAN + cpu_if->vgic_eisr = ((u64)eisr0 << 32) | eisr1; + cpu_if->vgic_elrsr = ((u64)elrsr0 << 32) | elrsr1; +#else + cpu_if->vgic_eisr = ((u64)eisr1 << 32) | eisr0; + cpu_if->vgic_elrsr = ((u64)elrsr1 << 32) | elrsr0; +#endif + cpu_if->vgic_apr = readl_relaxed(base + GICH_APR); + + writel_relaxed(0, base + GICH_HCR); + + for (i = 0; i < nr_lr; i++) + cpu_if->vgic_lr[i] = readl_relaxed(base + GICH_LR0 + (i * 4)); +} + +/* vcpu is already in the HYP VA space */ +void __hyp_text __vgic_v2_restore_state(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = kern_hyp_va(vcpu->kvm); + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; + struct vgic_dist *vgic = &kvm->arch.vgic; + void __iomem *base = kern_hyp_va(vgic->vctrl_base); + int i, nr_lr; + + if (!base) + return; + + writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR); + writel_relaxed(cpu_if->vgic_vmcr, base + GICH_VMCR); + writel_relaxed(cpu_if->vgic_apr, base + GICH_APR); + + nr_lr = vcpu->arch.vgic_cpu.nr_lr; + for (i = 0; i < nr_lr; i++) + writel_relaxed(cpu_if->vgic_lr[i], base + GICH_LR0 + (i * 4)); +} diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c new file mode 100644 index 000000000000..a8f5fd45ad8e --- /dev/null +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -0,0 +1,230 @@ +/* + * Copyright (C) 2012-2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/compiler.h> +#include <linux/of.h> +#include <linux/irqchip/arm-gic-v3.h> +#include <linux/kvm_host.h> + +#include <asm/kvm_mmu.h> + +#include "hyp.h" + +#define vtr_to_max_lr_idx(v) ((v) & 0xf) +#define vtr_to_nr_pri_bits(v) (((u32)(v) >> 29) + 1) + +#define read_gicreg(r) \ + ({ \ + u64 reg; \ + asm volatile("mrs_s %0, " __stringify(r) : "=r" (reg)); \ + reg; \ + }) + +#define write_gicreg(v,r) \ + do { \ + u64 __val = (v); \ + asm volatile("msr_s " __stringify(r) ", %0" : : "r" (__val));\ + } while (0) + +/* vcpu is already in the HYP VA space */ +void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + u64 val; + u32 max_lr_idx, nr_pri_bits; + + /* + * Make sure stores to the GIC via the memory mapped interface + * are now visible to the system register interface. + */ + dsb(st); + + cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2); + cpu_if->vgic_misr = read_gicreg(ICH_MISR_EL2); + cpu_if->vgic_eisr = read_gicreg(ICH_EISR_EL2); + cpu_if->vgic_elrsr = read_gicreg(ICH_ELSR_EL2); + + write_gicreg(0, ICH_HCR_EL2); + val = read_gicreg(ICH_VTR_EL2); + max_lr_idx = vtr_to_max_lr_idx(val); + nr_pri_bits = vtr_to_nr_pri_bits(val); + + switch (max_lr_idx) { + case 15: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(15)] = read_gicreg(ICH_LR15_EL2); + case 14: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(14)] = read_gicreg(ICH_LR14_EL2); + case 13: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(13)] = read_gicreg(ICH_LR13_EL2); + case 12: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(12)] = read_gicreg(ICH_LR12_EL2); + case 11: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(11)] = read_gicreg(ICH_LR11_EL2); + case 10: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(10)] = read_gicreg(ICH_LR10_EL2); + case 9: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(9)] = read_gicreg(ICH_LR9_EL2); + case 8: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(8)] = read_gicreg(ICH_LR8_EL2); + case 7: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(7)] = read_gicreg(ICH_LR7_EL2); + case 6: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(6)] = read_gicreg(ICH_LR6_EL2); + case 5: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(5)] = read_gicreg(ICH_LR5_EL2); + case 4: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(4)] = read_gicreg(ICH_LR4_EL2); + case 3: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(3)] = read_gicreg(ICH_LR3_EL2); + case 2: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(2)] = read_gicreg(ICH_LR2_EL2); + case 1: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(1)] = read_gicreg(ICH_LR1_EL2); + case 0: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(0)] = read_gicreg(ICH_LR0_EL2); + } + + switch (nr_pri_bits) { + case 7: + cpu_if->vgic_ap0r[3] = read_gicreg(ICH_AP0R3_EL2); + cpu_if->vgic_ap0r[2] = read_gicreg(ICH_AP0R2_EL2); + case 6: + cpu_if->vgic_ap0r[1] = read_gicreg(ICH_AP0R1_EL2); + default: + cpu_if->vgic_ap0r[0] = read_gicreg(ICH_AP0R0_EL2); + } + + switch (nr_pri_bits) { + case 7: + cpu_if->vgic_ap1r[3] = read_gicreg(ICH_AP1R3_EL2); + cpu_if->vgic_ap1r[2] = read_gicreg(ICH_AP1R2_EL2); + case 6: + cpu_if->vgic_ap1r[1] = read_gicreg(ICH_AP1R1_EL2); + default: + cpu_if->vgic_ap1r[0] = read_gicreg(ICH_AP1R0_EL2); + } + + val = read_gicreg(ICC_SRE_EL2); + write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2); + isb(); /* Make sure ENABLE is set at EL2 before setting SRE at EL1 */ + write_gicreg(1, ICC_SRE_EL1); +} + +void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + u64 val; + u32 max_lr_idx, nr_pri_bits; + + /* + * VFIQEn is RES1 if ICC_SRE_EL1.SRE is 1. This causes a + * Group0 interrupt (as generated in GICv2 mode) to be + * delivered as a FIQ to the guest, with potentially fatal + * consequences. So we must make sure that ICC_SRE_EL1 has + * been actually programmed with the value we want before + * starting to mess with the rest of the GIC. + */ + write_gicreg(cpu_if->vgic_sre, ICC_SRE_EL1); + isb(); + + write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); + write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2); + + val = read_gicreg(ICH_VTR_EL2); + max_lr_idx = vtr_to_max_lr_idx(val); + nr_pri_bits = vtr_to_nr_pri_bits(val); + + switch (nr_pri_bits) { + case 7: + write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2); + write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2); + case 6: + write_gicreg(cpu_if->vgic_ap1r[1], ICH_AP1R1_EL2); + default: + write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2); + } + + switch (nr_pri_bits) { + case 7: + write_gicreg(cpu_if->vgic_ap0r[3], ICH_AP0R3_EL2); + write_gicreg(cpu_if->vgic_ap0r[2], ICH_AP0R2_EL2); + case 6: + write_gicreg(cpu_if->vgic_ap0r[1], ICH_AP0R1_EL2); + default: + write_gicreg(cpu_if->vgic_ap0r[0], ICH_AP0R0_EL2); + } + + switch (max_lr_idx) { + case 15: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(15)], ICH_LR15_EL2); + case 14: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(14)], ICH_LR14_EL2); + case 13: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(13)], ICH_LR13_EL2); + case 12: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(12)], ICH_LR12_EL2); + case 11: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(11)], ICH_LR11_EL2); + case 10: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(10)], ICH_LR10_EL2); + case 9: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(9)], ICH_LR9_EL2); + case 8: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(8)], ICH_LR8_EL2); + case 7: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(7)], ICH_LR7_EL2); + case 6: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(6)], ICH_LR6_EL2); + case 5: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(5)], ICH_LR5_EL2); + case 4: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(4)], ICH_LR4_EL2); + case 3: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(3)], ICH_LR3_EL2); + case 2: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(2)], ICH_LR2_EL2); + case 1: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(1)], ICH_LR1_EL2); + case 0: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(0)], ICH_LR0_EL2); + } + + /* + * Ensures that the above will have reached the + * (re)distributors. This ensure the guest will read the + * correct values from the memory-mapped interface. + */ + isb(); + dsb(sy); + + /* + * Prevent the guest from touching the GIC system registers if + * SRE isn't enabled for GICv3 emulation. + */ + if (!cpu_if->vgic_sre) { + write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE, + ICC_SRE_EL2); + } +} + +u64 __hyp_text __vgic_v3_read_ich_vtr_el2(void) +{ + return read_gicreg(ICH_VTR_EL2); +} + +__alias(__vgic_v3_read_ich_vtr_el2) +u64 __weak __vgic_v3_get_ich_vtr_el2(void); diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 0b4326578985..e515b85220b4 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -28,7 +28,9 @@ #include <asm/cputype.h> #include <asm/ptrace.h> #include <asm/kvm_arm.h> +#include <asm/kvm_asm.h> #include <asm/kvm_coproc.h> +#include <asm/kvm_mmu.h> /* * ARMv8 Reset Values @@ -109,3 +111,31 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) return 0; } + +extern char __hyp_idmap_text_start[]; + +unsigned long kvm_hyp_reset_entry(void) +{ + if (!__kvm_cpu_uses_extended_idmap()) { + unsigned long offset; + + /* + * Find the address of __kvm_hyp_reset() in the trampoline page. + * This is present in the running page tables, and the boot page + * tables, so we call the code here to start the trampoline + * dance in reverse. + */ + offset = (unsigned long)__kvm_hyp_reset + - ((unsigned long)__hyp_idmap_text_start & PAGE_MASK); + + return TRAMPOLINE_VA + offset; + } else { + /* + * KVM is running with merged page tables, which don't have the + * trampoline page mapped. We know the idmap is still mapped, + * but can't be called into directly. Use + * __extended_idmap_trampoline to do the call. + */ + return (unsigned long)kvm_ksym_ref(__extended_idmap_trampoline); + } +} diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index c370b4014799..158bae7c52cc 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -211,6 +211,203 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu, return true; } +/* + * reg_to_dbg/dbg_to_reg + * + * A 32 bit write to a debug register leave top bits alone + * A 32 bit read from a debug register only returns the bottom bits + * + * All writes will set the KVM_ARM64_DEBUG_DIRTY flag to ensure the + * hyp.S code switches between host and guest values in future. + */ +static inline void reg_to_dbg(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + u64 *dbg_reg) +{ + u64 val = *vcpu_reg(vcpu, p->Rt); + + if (p->is_32bit) { + val &= 0xffffffffUL; + val |= ((*dbg_reg >> 32) << 32); + } + + *dbg_reg = val; + vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; +} + +static inline void dbg_to_reg(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + u64 *dbg_reg) +{ + u64 val = *dbg_reg; + + if (p->is_32bit) + val &= 0xffffffffUL; + + *vcpu_reg(vcpu, p->Rt) = val; +} + +static inline bool trap_bvr(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *rd) +{ + u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; + + if (p->is_write) + reg_to_dbg(vcpu, p, dbg_reg); + else + dbg_to_reg(vcpu, p, dbg_reg); + + return true; +} + +static int set_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr) +{ + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; + + if (copy_from_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) + return -EFAULT; + return 0; +} + +static int get_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr) +{ + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; + + if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) + return -EFAULT; + return 0; +} + +static inline void reset_bvr(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) +{ + vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg] = rd->val; +} + +static inline bool trap_bcr(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *rd) +{ + u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg]; + + if (p->is_write) + reg_to_dbg(vcpu, p, dbg_reg); + else + dbg_to_reg(vcpu, p, dbg_reg); + + return true; +} + +static int set_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr) +{ + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg]; + + if (copy_from_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) + return -EFAULT; + + return 0; +} + +static int get_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr) +{ + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg]; + + if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) + return -EFAULT; + return 0; +} + +static inline void reset_bcr(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) +{ + vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg] = rd->val; +} + +static inline bool trap_wvr(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *rd) +{ + u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]; + + if (p->is_write) + reg_to_dbg(vcpu, p, dbg_reg); + else + dbg_to_reg(vcpu, p, dbg_reg); + + return true; +} + +static int set_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr) +{ + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]; + + if (copy_from_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) + return -EFAULT; + return 0; +} + +static int get_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr) +{ + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]; + + if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) + return -EFAULT; + return 0; +} + +static inline void reset_wvr(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) +{ + vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg] = rd->val; +} + +static inline bool trap_wcr(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *rd) +{ + u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg]; + + if (p->is_write) + reg_to_dbg(vcpu, p, dbg_reg); + else + dbg_to_reg(vcpu, p, dbg_reg); + + return true; +} + +static int set_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr) +{ + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg]; + + if (copy_from_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) + return -EFAULT; + return 0; +} + +static int get_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr) +{ + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg]; + + if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) + return -EFAULT; + return 0; +} + +static inline void reset_wcr(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) +{ + vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg] = rd->val; +} + static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { u64 amair; @@ -240,16 +437,16 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) #define DBG_BCR_BVR_WCR_WVR_EL1(n) \ /* DBGBVRn_EL1 */ \ { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b100), \ - trap_debug_regs, reset_val, (DBGBVR0_EL1 + (n)), 0 }, \ + trap_bvr, reset_bvr, n, 0, get_bvr, set_bvr }, \ /* DBGBCRn_EL1 */ \ { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b101), \ - trap_debug_regs, reset_val, (DBGBCR0_EL1 + (n)), 0 }, \ + trap_bcr, reset_bcr, n, 0, get_bcr, set_bcr }, \ /* DBGWVRn_EL1 */ \ { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b110), \ - trap_debug_regs, reset_val, (DBGWVR0_EL1 + (n)), 0 }, \ + trap_wvr, reset_wvr, n, 0, get_wvr, set_wvr }, \ /* DBGWCRn_EL1 */ \ { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b111), \ - trap_debug_regs, reset_val, (DBGWCR0_EL1 + (n)), 0 } + trap_wcr, reset_wcr, n, 0, get_wcr, set_wcr } /* * Architected system registers. @@ -516,28 +713,55 @@ static bool trap_debug32(struct kvm_vcpu *vcpu, return true; } -#define DBG_BCR_BVR_WCR_WVR(n) \ - /* DBGBVRn */ \ - { Op1( 0), CRn( 0), CRm((n)), Op2( 4), trap_debug32, \ - NULL, (cp14_DBGBVR0 + (n) * 2) }, \ - /* DBGBCRn */ \ - { Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_debug32, \ - NULL, (cp14_DBGBCR0 + (n) * 2) }, \ - /* DBGWVRn */ \ - { Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_debug32, \ - NULL, (cp14_DBGWVR0 + (n) * 2) }, \ - /* DBGWCRn */ \ - { Op1( 0), CRn( 0), CRm((n)), Op2( 7), trap_debug32, \ - NULL, (cp14_DBGWCR0 + (n) * 2) } - -#define DBGBXVR(n) \ - { Op1( 0), CRn( 1), CRm((n)), Op2( 1), trap_debug32, \ - NULL, cp14_DBGBXVR0 + n * 2 } +/* AArch32 debug register mappings + * + * AArch32 DBGBVRn is mapped to DBGBVRn_EL1[31:0] + * AArch32 DBGBXVRn is mapped to DBGBVRn_EL1[63:32] + * + * All control registers and watchpoint value registers are mapped to + * the lower 32 bits of their AArch64 equivalents. We share the trap + * handlers with the above AArch64 code which checks what mode the + * system is in. + */ + +static inline bool trap_xvr(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *rd) +{ + u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; + + if (p->is_write) { + u64 val = *dbg_reg; + + val &= 0xffffffffUL; + val |= *vcpu_reg(vcpu, p->Rt) << 32; + *dbg_reg = val; + + vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; + } else { + *vcpu_reg(vcpu, p->Rt) = *dbg_reg >> 32; + } + + return true; +} + +#define DBG_BCR_BVR_WCR_WVR(n) \ + /* DBGBVRn */ \ + { Op1( 0), CRn( 0), CRm((n)), Op2( 4), trap_bvr, NULL, n }, \ + /* DBGBCRn */ \ + { Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_bcr, NULL, n }, \ + /* DBGWVRn */ \ + { Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_wvr, NULL, n }, \ + /* DBGWCRn */ \ + { Op1( 0), CRn( 0), CRm((n)), Op2( 7), trap_wcr, NULL, n } + +#define DBGBXVR(n) \ + { Op1( 0), CRn( 1), CRm((n)), Op2( 1), trap_xvr, NULL, n } /* * Trapped cp14 registers. We generally ignore most of the external * debug, on the principle that they don't really make sense to a - * guest. Revisit this one day, whould this principle change. + * guest. Revisit this one day, would this principle change. */ static const struct sys_reg_desc cp14_regs[] = { /* DBGIDR */ @@ -1303,6 +1527,9 @@ int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg if (!r) return get_invariant_sys_reg(reg->id, uaddr); + if (r->get_user) + return (r->get_user)(vcpu, r, reg, uaddr); + return reg_to_user(uaddr, &vcpu_sys_reg(vcpu, r->reg), reg->id); } @@ -1321,6 +1548,9 @@ int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg if (!r) return set_invariant_sys_reg(reg->id, uaddr); + if (r->set_user) + return (r->set_user)(vcpu, r, reg, uaddr); + return reg_from_user(&vcpu_sys_reg(vcpu, r->reg), uaddr, reg->id); } diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index d411e251412c..eaa324e4db4d 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -55,6 +55,12 @@ struct sys_reg_desc { /* Value (usually reset value) */ u64 val; + + /* Custom get/set_user functions, fallback to generic if NULL */ + int (*get_user)(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr); + int (*set_user)(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr); }; static inline void print_sys_reg_instr(const struct sys_reg_params *p) diff --git a/arch/arm64/kvm/vgic-v2-switch.S b/arch/arm64/kvm/vgic-v2-switch.S deleted file mode 100644 index f002fe1c3700..000000000000 --- a/arch/arm64/kvm/vgic-v2-switch.S +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (C) 2012,2013 - ARM Ltd - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <linux/linkage.h> -#include <linux/irqchip/arm-gic.h> - -#include <asm/assembler.h> -#include <asm/memory.h> -#include <asm/asm-offsets.h> -#include <asm/kvm.h> -#include <asm/kvm_asm.h> -#include <asm/kvm_arm.h> -#include <asm/kvm_mmu.h> - - .text - .pushsection .hyp.text, "ax" - -/* - * Save the VGIC CPU state into memory - * x0: Register pointing to VCPU struct - * Do not corrupt x1!!! - */ -ENTRY(__save_vgic_v2_state) -__save_vgic_v2_state: - /* Get VGIC VCTRL base into x2 */ - ldr x2, [x0, #VCPU_KVM] - kern_hyp_va x2 - ldr x2, [x2, #KVM_VGIC_VCTRL] - kern_hyp_va x2 - cbz x2, 2f // disabled - - /* Compute the address of struct vgic_cpu */ - add x3, x0, #VCPU_VGIC_CPU - - /* Save all interesting registers */ - ldr w4, [x2, #GICH_HCR] - ldr w5, [x2, #GICH_VMCR] - ldr w6, [x2, #GICH_MISR] - ldr w7, [x2, #GICH_EISR0] - ldr w8, [x2, #GICH_EISR1] - ldr w9, [x2, #GICH_ELRSR0] - ldr w10, [x2, #GICH_ELRSR1] - ldr w11, [x2, #GICH_APR] -CPU_BE( rev w4, w4 ) -CPU_BE( rev w5, w5 ) -CPU_BE( rev w6, w6 ) -CPU_BE( rev w7, w7 ) -CPU_BE( rev w8, w8 ) -CPU_BE( rev w9, w9 ) -CPU_BE( rev w10, w10 ) -CPU_BE( rev w11, w11 ) - - str w4, [x3, #VGIC_V2_CPU_HCR] - str w5, [x3, #VGIC_V2_CPU_VMCR] - str w6, [x3, #VGIC_V2_CPU_MISR] -CPU_LE( str w7, [x3, #VGIC_V2_CPU_EISR] ) -CPU_LE( str w8, [x3, #(VGIC_V2_CPU_EISR + 4)] ) -CPU_LE( str w9, [x3, #VGIC_V2_CPU_ELRSR] ) -CPU_LE( str w10, [x3, #(VGIC_V2_CPU_ELRSR + 4)] ) -CPU_BE( str w7, [x3, #(VGIC_V2_CPU_EISR + 4)] ) -CPU_BE( str w8, [x3, #VGIC_V2_CPU_EISR] ) -CPU_BE( str w9, [x3, #(VGIC_V2_CPU_ELRSR + 4)] ) -CPU_BE( str w10, [x3, #VGIC_V2_CPU_ELRSR] ) - str w11, [x3, #VGIC_V2_CPU_APR] - - /* Clear GICH_HCR */ - str wzr, [x2, #GICH_HCR] - - /* Save list registers */ - add x2, x2, #GICH_LR0 - ldr w4, [x3, #VGIC_CPU_NR_LR] - add x3, x3, #VGIC_V2_CPU_LR -1: ldr w5, [x2], #4 -CPU_BE( rev w5, w5 ) - str w5, [x3], #4 - sub w4, w4, #1 - cbnz w4, 1b -2: - ret -ENDPROC(__save_vgic_v2_state) - -/* - * Restore the VGIC CPU state from memory - * x0: Register pointing to VCPU struct - */ -ENTRY(__restore_vgic_v2_state) -__restore_vgic_v2_state: - /* Get VGIC VCTRL base into x2 */ - ldr x2, [x0, #VCPU_KVM] - kern_hyp_va x2 - ldr x2, [x2, #KVM_VGIC_VCTRL] - kern_hyp_va x2 - cbz x2, 2f // disabled - - /* Compute the address of struct vgic_cpu */ - add x3, x0, #VCPU_VGIC_CPU - - /* We only restore a minimal set of registers */ - ldr w4, [x3, #VGIC_V2_CPU_HCR] - ldr w5, [x3, #VGIC_V2_CPU_VMCR] - ldr w6, [x3, #VGIC_V2_CPU_APR] -CPU_BE( rev w4, w4 ) -CPU_BE( rev w5, w5 ) -CPU_BE( rev w6, w6 ) - - str w4, [x2, #GICH_HCR] - str w5, [x2, #GICH_VMCR] - str w6, [x2, #GICH_APR] - - /* Restore list registers */ - add x2, x2, #GICH_LR0 - ldr w4, [x3, #VGIC_CPU_NR_LR] - add x3, x3, #VGIC_V2_CPU_LR -1: ldr w5, [x3], #4 -CPU_BE( rev w5, w5 ) - str w5, [x2], #4 - sub w4, w4, #1 - cbnz w4, 1b -2: - ret -ENDPROC(__restore_vgic_v2_state) - - .popsection diff --git a/arch/arm64/kvm/vgic-v3-switch.S b/arch/arm64/kvm/vgic-v3-switch.S deleted file mode 100644 index 617a012a0107..000000000000 --- a/arch/arm64/kvm/vgic-v3-switch.S +++ /dev/null @@ -1,271 +0,0 @@ -/* - * Copyright (C) 2012,2013 - ARM Ltd - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <linux/linkage.h> -#include <linux/irqchip/arm-gic-v3.h> - -#include <asm/assembler.h> -#include <asm/memory.h> -#include <asm/asm-offsets.h> -#include <asm/kvm.h> -#include <asm/kvm_asm.h> -#include <asm/kvm_arm.h> - - .text - .pushsection .hyp.text, "ax" - -/* - * We store LRs in reverse order to let the CPU deal with streaming - * access. Use this macro to make it look saner... - */ -#define LR_OFFSET(n) (VGIC_V3_CPU_LR + (15 - n) * 8) - -/* - * Save the VGIC CPU state into memory - * x0: Register pointing to VCPU struct - * Do not corrupt x1!!! - */ -.macro save_vgic_v3_state - // Compute the address of struct vgic_cpu - add x3, x0, #VCPU_VGIC_CPU - - // Make sure stores to the GIC via the memory mapped interface - // are now visible to the system register interface - dsb st - - // Save all interesting registers - mrs_s x4, ICH_HCR_EL2 - mrs_s x5, ICH_VMCR_EL2 - mrs_s x6, ICH_MISR_EL2 - mrs_s x7, ICH_EISR_EL2 - mrs_s x8, ICH_ELSR_EL2 - - str w4, [x3, #VGIC_V3_CPU_HCR] - str w5, [x3, #VGIC_V3_CPU_VMCR] - str w6, [x3, #VGIC_V3_CPU_MISR] - str w7, [x3, #VGIC_V3_CPU_EISR] - str w8, [x3, #VGIC_V3_CPU_ELRSR] - - msr_s ICH_HCR_EL2, xzr - - mrs_s x21, ICH_VTR_EL2 - mvn w22, w21 - ubfiz w23, w22, 2, 4 // w23 = (15 - ListRegs) * 4 - - adr x24, 1f - add x24, x24, x23 - br x24 - -1: - mrs_s x20, ICH_LR15_EL2 - mrs_s x19, ICH_LR14_EL2 - mrs_s x18, ICH_LR13_EL2 - mrs_s x17, ICH_LR12_EL2 - mrs_s x16, ICH_LR11_EL2 - mrs_s x15, ICH_LR10_EL2 - mrs_s x14, ICH_LR9_EL2 - mrs_s x13, ICH_LR8_EL2 - mrs_s x12, ICH_LR7_EL2 - mrs_s x11, ICH_LR6_EL2 - mrs_s x10, ICH_LR5_EL2 - mrs_s x9, ICH_LR4_EL2 - mrs_s x8, ICH_LR3_EL2 - mrs_s x7, ICH_LR2_EL2 - mrs_s x6, ICH_LR1_EL2 - mrs_s x5, ICH_LR0_EL2 - - adr x24, 1f - add x24, x24, x23 - br x24 - -1: - str x20, [x3, #LR_OFFSET(15)] - str x19, [x3, #LR_OFFSET(14)] - str x18, [x3, #LR_OFFSET(13)] - str x17, [x3, #LR_OFFSET(12)] - str x16, [x3, #LR_OFFSET(11)] - str x15, [x3, #LR_OFFSET(10)] - str x14, [x3, #LR_OFFSET(9)] - str x13, [x3, #LR_OFFSET(8)] - str x12, [x3, #LR_OFFSET(7)] - str x11, [x3, #LR_OFFSET(6)] - str x10, [x3, #LR_OFFSET(5)] - str x9, [x3, #LR_OFFSET(4)] - str x8, [x3, #LR_OFFSET(3)] - str x7, [x3, #LR_OFFSET(2)] - str x6, [x3, #LR_OFFSET(1)] - str x5, [x3, #LR_OFFSET(0)] - - tbnz w21, #29, 6f // 6 bits - tbz w21, #30, 5f // 5 bits - // 7 bits - mrs_s x20, ICH_AP0R3_EL2 - str w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)] - mrs_s x19, ICH_AP0R2_EL2 - str w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)] -6: mrs_s x18, ICH_AP0R1_EL2 - str w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)] -5: mrs_s x17, ICH_AP0R0_EL2 - str w17, [x3, #VGIC_V3_CPU_AP0R] - - tbnz w21, #29, 6f // 6 bits - tbz w21, #30, 5f // 5 bits - // 7 bits - mrs_s x20, ICH_AP1R3_EL2 - str w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)] - mrs_s x19, ICH_AP1R2_EL2 - str w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)] -6: mrs_s x18, ICH_AP1R1_EL2 - str w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)] -5: mrs_s x17, ICH_AP1R0_EL2 - str w17, [x3, #VGIC_V3_CPU_AP1R] - - // Restore SRE_EL1 access and re-enable SRE at EL1. - mrs_s x5, ICC_SRE_EL2 - orr x5, x5, #ICC_SRE_EL2_ENABLE - msr_s ICC_SRE_EL2, x5 - isb - mov x5, #1 - msr_s ICC_SRE_EL1, x5 -.endm - -/* - * Restore the VGIC CPU state from memory - * x0: Register pointing to VCPU struct - */ -.macro restore_vgic_v3_state - // Compute the address of struct vgic_cpu - add x3, x0, #VCPU_VGIC_CPU - - // Restore all interesting registers - ldr w4, [x3, #VGIC_V3_CPU_HCR] - ldr w5, [x3, #VGIC_V3_CPU_VMCR] - ldr w25, [x3, #VGIC_V3_CPU_SRE] - - msr_s ICC_SRE_EL1, x25 - - // make sure SRE is valid before writing the other registers - isb - - msr_s ICH_HCR_EL2, x4 - msr_s ICH_VMCR_EL2, x5 - - mrs_s x21, ICH_VTR_EL2 - - tbnz w21, #29, 6f // 6 bits - tbz w21, #30, 5f // 5 bits - // 7 bits - ldr w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)] - msr_s ICH_AP1R3_EL2, x20 - ldr w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)] - msr_s ICH_AP1R2_EL2, x19 -6: ldr w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)] - msr_s ICH_AP1R1_EL2, x18 -5: ldr w17, [x3, #VGIC_V3_CPU_AP1R] - msr_s ICH_AP1R0_EL2, x17 - - tbnz w21, #29, 6f // 6 bits - tbz w21, #30, 5f // 5 bits - // 7 bits - ldr w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)] - msr_s ICH_AP0R3_EL2, x20 - ldr w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)] - msr_s ICH_AP0R2_EL2, x19 -6: ldr w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)] - msr_s ICH_AP0R1_EL2, x18 -5: ldr w17, [x3, #VGIC_V3_CPU_AP0R] - msr_s ICH_AP0R0_EL2, x17 - - and w22, w21, #0xf - mvn w22, w21 - ubfiz w23, w22, 2, 4 // w23 = (15 - ListRegs) * 4 - - adr x24, 1f - add x24, x24, x23 - br x24 - -1: - ldr x20, [x3, #LR_OFFSET(15)] - ldr x19, [x3, #LR_OFFSET(14)] - ldr x18, [x3, #LR_OFFSET(13)] - ldr x17, [x3, #LR_OFFSET(12)] - ldr x16, [x3, #LR_OFFSET(11)] - ldr x15, [x3, #LR_OFFSET(10)] - ldr x14, [x3, #LR_OFFSET(9)] - ldr x13, [x3, #LR_OFFSET(8)] - ldr x12, [x3, #LR_OFFSET(7)] - ldr x11, [x3, #LR_OFFSET(6)] - ldr x10, [x3, #LR_OFFSET(5)] - ldr x9, [x3, #LR_OFFSET(4)] - ldr x8, [x3, #LR_OFFSET(3)] - ldr x7, [x3, #LR_OFFSET(2)] - ldr x6, [x3, #LR_OFFSET(1)] - ldr x5, [x3, #LR_OFFSET(0)] - - adr x24, 1f - add x24, x24, x23 - br x24 - -1: - msr_s ICH_LR15_EL2, x20 - msr_s ICH_LR14_EL2, x19 - msr_s ICH_LR13_EL2, x18 - msr_s ICH_LR12_EL2, x17 - msr_s ICH_LR11_EL2, x16 - msr_s ICH_LR10_EL2, x15 - msr_s ICH_LR9_EL2, x14 - msr_s ICH_LR8_EL2, x13 - msr_s ICH_LR7_EL2, x12 - msr_s ICH_LR6_EL2, x11 - msr_s ICH_LR5_EL2, x10 - msr_s ICH_LR4_EL2, x9 - msr_s ICH_LR3_EL2, x8 - msr_s ICH_LR2_EL2, x7 - msr_s ICH_LR1_EL2, x6 - msr_s ICH_LR0_EL2, x5 - - // Ensure that the above will have reached the - // (re)distributors. This ensure the guest will read - // the correct values from the memory-mapped interface. - isb - dsb sy - - // Prevent the guest from touching the GIC system registers - // if SRE isn't enabled for GICv3 emulation - cbnz x25, 1f - mrs_s x5, ICC_SRE_EL2 - and x5, x5, #~ICC_SRE_EL2_ENABLE - msr_s ICC_SRE_EL2, x5 -1: -.endm - -ENTRY(__save_vgic_v3_state) - save_vgic_v3_state - ret -ENDPROC(__save_vgic_v3_state) - -ENTRY(__restore_vgic_v3_state) - restore_vgic_v3_state - ret -ENDPROC(__restore_vgic_v3_state) - -ENTRY(__vgic_v3_get_ich_vtr_el2) - mrs_s x0, ICH_VTR_EL2 - ret -ENDPROC(__vgic_v3_get_ich_vtr_el2) - - .popsection diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S index c17967fdf5f6..a9723c71c52b 100644 --- a/arch/arm64/lib/clear_user.S +++ b/arch/arm64/lib/clear_user.S @@ -16,7 +16,11 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include <linux/linkage.h> + +#include <asm/alternative.h> #include <asm/assembler.h> +#include <asm/cpufeature.h> +#include <asm/sysreg.h> .text @@ -29,6 +33,8 @@ * Alignment fixed up by hardware. */ ENTRY(__clear_user) +ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \ + CONFIG_ARM64_PAN) mov x2, x1 // save the size for fixup return subs x1, x1, #8 b.mi 2f @@ -48,6 +54,8 @@ USER(9f, strh wzr, [x0], #2 ) b.mi 5f USER(9f, strb wzr, [x0] ) 5: mov x0, #0 +ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \ + CONFIG_ARM64_PAN) ret ENDPROC(__clear_user) diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index 5e27add9d362..1be9ef27be97 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -15,7 +15,11 @@ */ #include <linux/linkage.h> + +#include <asm/alternative.h> #include <asm/assembler.h> +#include <asm/cpufeature.h> +#include <asm/sysreg.h> /* * Copy from user space to a kernel buffer (alignment handled by the hardware) @@ -28,14 +32,21 @@ * x0 - bytes not copied */ ENTRY(__copy_from_user) - add x4, x1, x2 // upper user buffer boundary - subs x2, x2, #8 +ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \ + CONFIG_ARM64_PAN) + add x5, x1, x2 // upper user buffer boundary + subs x2, x2, #16 + b.mi 1f +0: +USER(9f, ldp x3, x4, [x1], #16) + subs x2, x2, #16 + stp x3, x4, [x0], #16 + b.pl 0b +1: adds x2, x2, #8 b.mi 2f -1: USER(9f, ldr x3, [x1], #8 ) - subs x2, x2, #8 + sub x2, x2, #8 str x3, [x0], #8 - b.pl 1b 2: adds x2, x2, #4 b.mi 3f USER(9f, ldr w3, [x1], #4 ) @@ -51,12 +62,14 @@ USER(9f, ldrh w3, [x1], #2 ) USER(9f, ldrb w3, [x1] ) strb w3, [x0] 5: mov x0, #0 +ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \ + CONFIG_ARM64_PAN) ret ENDPROC(__copy_from_user) .section .fixup,"ax" .align 2 -9: sub x2, x4, x1 +9: sub x2, x5, x1 mov x3, x2 10: strb wzr, [x0], #1 // zero remaining buffer space subs x3, x3, #1 diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S index 84b6c9bb9b93..1b94661e22b3 100644 --- a/arch/arm64/lib/copy_in_user.S +++ b/arch/arm64/lib/copy_in_user.S @@ -17,7 +17,11 @@ */ #include <linux/linkage.h> + +#include <asm/alternative.h> #include <asm/assembler.h> +#include <asm/cpufeature.h> +#include <asm/sysreg.h> /* * Copy from user space to user space (alignment handled by the hardware) @@ -30,14 +34,21 @@ * x0 - bytes not copied */ ENTRY(__copy_in_user) - add x4, x0, x2 // upper user buffer boundary - subs x2, x2, #8 +ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \ + CONFIG_ARM64_PAN) + add x5, x0, x2 // upper user buffer boundary + subs x2, x2, #16 + b.mi 1f +0: +USER(9f, ldp x3, x4, [x1], #16) + subs x2, x2, #16 +USER(9f, stp x3, x4, [x0], #16) + b.pl 0b +1: adds x2, x2, #8 b.mi 2f -1: USER(9f, ldr x3, [x1], #8 ) - subs x2, x2, #8 + sub x2, x2, #8 USER(9f, str x3, [x0], #8 ) - b.pl 1b 2: adds x2, x2, #4 b.mi 3f USER(9f, ldr w3, [x1], #4 ) @@ -53,11 +64,13 @@ USER(9f, strh w3, [x0], #2 ) USER(9f, ldrb w3, [x1] ) USER(9f, strb w3, [x0] ) 5: mov x0, #0 +ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \ + CONFIG_ARM64_PAN) ret ENDPROC(__copy_in_user) .section .fixup,"ax" .align 2 -9: sub x0, x4, x0 // bytes not copied +9: sub x0, x5, x0 // bytes not copied ret .previous diff --git a/arch/arm64/lib/copy_template.S b/arch/arm64/lib/copy_template.S new file mode 100644 index 000000000000..410fbdb8163f --- /dev/null +++ b/arch/arm64/lib/copy_template.S @@ -0,0 +1,193 @@ +/* + * Copyright (C) 2013 ARM Ltd. + * Copyright (C) 2013 Linaro. + * + * This code is based on glibc cortex strings work originally authored by Linaro + * and re-licensed under GPLv2 for the Linux kernel. The original code can + * be found @ + * + * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/ + * files/head:/src/aarch64/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + + +/* + * Copy a buffer from src to dest (alignment handled by the hardware) + * + * Parameters: + * x0 - dest + * x1 - src + * x2 - n + * Returns: + * x0 - dest + */ +dstin .req x0 +src .req x1 +count .req x2 +tmp1 .req x3 +tmp1w .req w3 +tmp2 .req x4 +tmp2w .req w4 +dst .req x6 + +A_l .req x7 +A_h .req x8 +B_l .req x9 +B_h .req x10 +C_l .req x11 +C_h .req x12 +D_l .req x13 +D_h .req x14 + + mov dst, dstin + cmp count, #16 + /*When memory length is less than 16, the accessed are not aligned.*/ + b.lo .Ltiny15 + + neg tmp2, src + ands tmp2, tmp2, #15/* Bytes to reach alignment. */ + b.eq .LSrcAligned + sub count, count, tmp2 + /* + * Copy the leading memory data from src to dst in an increasing + * address order.By this way,the risk of overwritting the source + * memory data is eliminated when the distance between src and + * dst is less than 16. The memory accesses here are alignment. + */ + tbz tmp2, #0, 1f + ldrb1 tmp1w, src, #1 + strb1 tmp1w, dst, #1 +1: + tbz tmp2, #1, 2f + ldrh1 tmp1w, src, #2 + strh1 tmp1w, dst, #2 +2: + tbz tmp2, #2, 3f + ldr1 tmp1w, src, #4 + str1 tmp1w, dst, #4 +3: + tbz tmp2, #3, .LSrcAligned + ldr1 tmp1, src, #8 + str1 tmp1, dst, #8 + +.LSrcAligned: + cmp count, #64 + b.ge .Lcpy_over64 + /* + * Deal with small copies quickly by dropping straight into the + * exit block. + */ +.Ltail63: + /* + * Copy up to 48 bytes of data. At this point we only need the + * bottom 6 bits of count to be accurate. + */ + ands tmp1, count, #0x30 + b.eq .Ltiny15 + cmp tmp1w, #0x20 + b.eq 1f + b.lt 2f + ldp1 A_l, A_h, src, #16 + stp1 A_l, A_h, dst, #16 +1: + ldp1 A_l, A_h, src, #16 + stp1 A_l, A_h, dst, #16 +2: + ldp1 A_l, A_h, src, #16 + stp1 A_l, A_h, dst, #16 +.Ltiny15: + /* + * Prefer to break one ldp/stp into several load/store to access + * memory in an increasing address order,rather than to load/store 16 + * bytes from (src-16) to (dst-16) and to backward the src to aligned + * address,which way is used in original cortex memcpy. If keeping + * the original memcpy process here, memmove need to satisfy the + * precondition that src address is at least 16 bytes bigger than dst + * address,otherwise some source data will be overwritten when memove + * call memcpy directly. To make memmove simpler and decouple the + * memcpy's dependency on memmove, withdrew the original process. + */ + tbz count, #3, 1f + ldr1 tmp1, src, #8 + str1 tmp1, dst, #8 +1: + tbz count, #2, 2f + ldr1 tmp1w, src, #4 + str1 tmp1w, dst, #4 +2: + tbz count, #1, 3f + ldrh1 tmp1w, src, #2 + strh1 tmp1w, dst, #2 +3: + tbz count, #0, .Lexitfunc + ldrb1 tmp1w, src, #1 + strb1 tmp1w, dst, #1 + + b .Lexitfunc + +.Lcpy_over64: + subs count, count, #128 + b.ge .Lcpy_body_large + /* + * Less than 128 bytes to copy, so handle 64 here and then jump + * to the tail. + */ + ldp1 A_l, A_h, src, #16 + stp1 A_l, A_h, dst, #16 + ldp1 B_l, B_h, src, #16 + ldp1 C_l, C_h, src, #16 + stp1 B_l, B_h, dst, #16 + stp1 C_l, C_h, dst, #16 + ldp1 D_l, D_h, src, #16 + stp1 D_l, D_h, dst, #16 + + tst count, #0x3f + b.ne .Ltail63 + b .Lexitfunc + + /* + * Critical loop. Start at a new cache line boundary. Assuming + * 64 bytes per line this ensures the entire loop is in one line. + */ + .p2align L1_CACHE_SHIFT +.Lcpy_body_large: + /* pre-get 64 bytes data. */ + ldp1 A_l, A_h, src, #16 + ldp1 B_l, B_h, src, #16 + ldp1 C_l, C_h, src, #16 + ldp1 D_l, D_h, src, #16 +1: + /* + * interlace the load of next 64 bytes data block with store of the last + * loaded 64 bytes data. + */ + stp1 A_l, A_h, dst, #16 + ldp1 A_l, A_h, src, #16 + stp1 B_l, B_h, dst, #16 + ldp1 B_l, B_h, src, #16 + stp1 C_l, C_h, dst, #16 + ldp1 C_l, C_h, src, #16 + stp1 D_l, D_h, dst, #16 + ldp1 D_l, D_h, src, #16 + subs count, count, #64 + b.ge 1b + stp1 A_l, A_h, dst, #16 + stp1 B_l, B_h, dst, #16 + stp1 C_l, C_h, dst, #16 + stp1 D_l, D_h, dst, #16 + + tst count, #0x3f + b.ne .Ltail63 +.Lexitfunc: diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index a0aeeb9b7a28..a257b47e2dc4 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -15,7 +15,11 @@ */ #include <linux/linkage.h> + +#include <asm/alternative.h> #include <asm/assembler.h> +#include <asm/cpufeature.h> +#include <asm/sysreg.h> /* * Copy to user space from a kernel buffer (alignment handled by the hardware) @@ -28,14 +32,21 @@ * x0 - bytes not copied */ ENTRY(__copy_to_user) - add x4, x0, x2 // upper user buffer boundary - subs x2, x2, #8 +ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \ + CONFIG_ARM64_PAN) + add x5, x0, x2 // upper user buffer boundary + subs x2, x2, #16 + b.mi 1f +0: + ldp x3, x4, [x1], #16 + subs x2, x2, #16 +USER(9f, stp x3, x4, [x0], #16) + b.pl 0b +1: adds x2, x2, #8 b.mi 2f -1: ldr x3, [x1], #8 - subs x2, x2, #8 + sub x2, x2, #8 USER(9f, str x3, [x0], #8 ) - b.pl 1b 2: adds x2, x2, #4 b.mi 3f ldr w3, [x1], #4 @@ -51,11 +62,13 @@ USER(9f, strh w3, [x0], #2 ) ldrb w3, [x1] USER(9f, strb w3, [x0] ) 5: mov x0, #0 +ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \ + CONFIG_ARM64_PAN) ret ENDPROC(__copy_to_user) .section .fixup,"ax" .align 2 -9: sub x0, x4, x0 // bytes not copied +9: sub x0, x5, x0 // bytes not copied ret .previous diff --git a/arch/arm64/lib/memchr.S b/arch/arm64/lib/memchr.S index 8636b7549163..4444c1d25f4b 100644 --- a/arch/arm64/lib/memchr.S +++ b/arch/arm64/lib/memchr.S @@ -41,4 +41,4 @@ ENTRY(memchr) ret 2: mov x0, #0 ret -ENDPROC(memchr) +ENDPIPROC(memchr) diff --git a/arch/arm64/lib/memcmp.S b/arch/arm64/lib/memcmp.S index 6ea0776ba6de..ffbdec00327d 100644 --- a/arch/arm64/lib/memcmp.S +++ b/arch/arm64/lib/memcmp.S @@ -255,4 +255,4 @@ CPU_LE( rev data2, data2 ) .Lret0: mov result, #0 ret -ENDPROC(memcmp) +ENDPIPROC(memcmp) diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S index 8a9a96d3ddae..67613937711f 100644 --- a/arch/arm64/lib/memcpy.S +++ b/arch/arm64/lib/memcpy.S @@ -36,166 +36,42 @@ * Returns: * x0 - dest */ -dstin .req x0 -src .req x1 -count .req x2 -tmp1 .req x3 -tmp1w .req w3 -tmp2 .req x4 -tmp2w .req w4 -tmp3 .req x5 -tmp3w .req w5 -dst .req x6 + .macro ldrb1 ptr, regB, val + ldrb \ptr, [\regB], \val + .endm -A_l .req x7 -A_h .req x8 -B_l .req x9 -B_h .req x10 -C_l .req x11 -C_h .req x12 -D_l .req x13 -D_h .req x14 + .macro strb1 ptr, regB, val + strb \ptr, [\regB], \val + .endm -ENTRY(memcpy) - mov dst, dstin - cmp count, #16 - /*When memory length is less than 16, the accessed are not aligned.*/ - b.lo .Ltiny15 + .macro ldrh1 ptr, regB, val + ldrh \ptr, [\regB], \val + .endm - neg tmp2, src - ands tmp2, tmp2, #15/* Bytes to reach alignment. */ - b.eq .LSrcAligned - sub count, count, tmp2 - /* - * Copy the leading memory data from src to dst in an increasing - * address order.By this way,the risk of overwritting the source - * memory data is eliminated when the distance between src and - * dst is less than 16. The memory accesses here are alignment. - */ - tbz tmp2, #0, 1f - ldrb tmp1w, [src], #1 - strb tmp1w, [dst], #1 -1: - tbz tmp2, #1, 2f - ldrh tmp1w, [src], #2 - strh tmp1w, [dst], #2 -2: - tbz tmp2, #2, 3f - ldr tmp1w, [src], #4 - str tmp1w, [dst], #4 -3: - tbz tmp2, #3, .LSrcAligned - ldr tmp1, [src],#8 - str tmp1, [dst],#8 + .macro strh1 ptr, regB, val + strh \ptr, [\regB], \val + .endm -.LSrcAligned: - cmp count, #64 - b.ge .Lcpy_over64 - /* - * Deal with small copies quickly by dropping straight into the - * exit block. - */ -.Ltail63: - /* - * Copy up to 48 bytes of data. At this point we only need the - * bottom 6 bits of count to be accurate. - */ - ands tmp1, count, #0x30 - b.eq .Ltiny15 - cmp tmp1w, #0x20 - b.eq 1f - b.lt 2f - ldp A_l, A_h, [src], #16 - stp A_l, A_h, [dst], #16 -1: - ldp A_l, A_h, [src], #16 - stp A_l, A_h, [dst], #16 -2: - ldp A_l, A_h, [src], #16 - stp A_l, A_h, [dst], #16 -.Ltiny15: - /* - * Prefer to break one ldp/stp into several load/store to access - * memory in an increasing address order,rather than to load/store 16 - * bytes from (src-16) to (dst-16) and to backward the src to aligned - * address,which way is used in original cortex memcpy. If keeping - * the original memcpy process here, memmove need to satisfy the - * precondition that src address is at least 16 bytes bigger than dst - * address,otherwise some source data will be overwritten when memove - * call memcpy directly. To make memmove simpler and decouple the - * memcpy's dependency on memmove, withdrew the original process. - */ - tbz count, #3, 1f - ldr tmp1, [src], #8 - str tmp1, [dst], #8 -1: - tbz count, #2, 2f - ldr tmp1w, [src], #4 - str tmp1w, [dst], #4 -2: - tbz count, #1, 3f - ldrh tmp1w, [src], #2 - strh tmp1w, [dst], #2 -3: - tbz count, #0, .Lexitfunc - ldrb tmp1w, [src] - strb tmp1w, [dst] + .macro ldr1 ptr, regB, val + ldr \ptr, [\regB], \val + .endm -.Lexitfunc: - ret + .macro str1 ptr, regB, val + str \ptr, [\regB], \val + .endm -.Lcpy_over64: - subs count, count, #128 - b.ge .Lcpy_body_large - /* - * Less than 128 bytes to copy, so handle 64 here and then jump - * to the tail. - */ - ldp A_l, A_h, [src],#16 - stp A_l, A_h, [dst],#16 - ldp B_l, B_h, [src],#16 - ldp C_l, C_h, [src],#16 - stp B_l, B_h, [dst],#16 - stp C_l, C_h, [dst],#16 - ldp D_l, D_h, [src],#16 - stp D_l, D_h, [dst],#16 + .macro ldp1 ptr, regB, regC, val + ldp \ptr, \regB, [\regC], \val + .endm - tst count, #0x3f - b.ne .Ltail63 - ret + .macro stp1 ptr, regB, regC, val + stp \ptr, \regB, [\regC], \val + .endm - /* - * Critical loop. Start at a new cache line boundary. Assuming - * 64 bytes per line this ensures the entire loop is in one line. - */ - .p2align L1_CACHE_SHIFT -.Lcpy_body_large: - /* pre-get 64 bytes data. */ - ldp A_l, A_h, [src],#16 - ldp B_l, B_h, [src],#16 - ldp C_l, C_h, [src],#16 - ldp D_l, D_h, [src],#16 -1: - /* - * interlace the load of next 64 bytes data block with store of the last - * loaded 64 bytes data. - */ - stp A_l, A_h, [dst],#16 - ldp A_l, A_h, [src],#16 - stp B_l, B_h, [dst],#16 - ldp B_l, B_h, [src],#16 - stp C_l, C_h, [dst],#16 - ldp C_l, C_h, [src],#16 - stp D_l, D_h, [dst],#16 - ldp D_l, D_h, [src],#16 - subs count, count, #64 - b.ge 1b - stp A_l, A_h, [dst],#16 - stp B_l, B_h, [dst],#16 - stp C_l, C_h, [dst],#16 - stp D_l, D_h, [dst],#16 - - tst count, #0x3f - b.ne .Ltail63 + .weak memcpy +ENTRY(__memcpy) +ENTRY(memcpy) +#include "copy_template.S" ret -ENDPROC(memcpy) +ENDPIPROC(memcpy) +ENDPROC(__memcpy) diff --git a/arch/arm64/lib/memmove.S b/arch/arm64/lib/memmove.S index 57b19ea2dad4..a5a4459013b1 100644 --- a/arch/arm64/lib/memmove.S +++ b/arch/arm64/lib/memmove.S @@ -57,12 +57,14 @@ C_h .req x12 D_l .req x13 D_h .req x14 + .weak memmove +ENTRY(__memmove) ENTRY(memmove) cmp dstin, src - b.lo memcpy + b.lo __memcpy add tmp1, src, count cmp dstin, tmp1 - b.hs memcpy /* No overlap. */ + b.hs __memcpy /* No overlap. */ add dst, dstin, count add src, src, count @@ -194,4 +196,5 @@ ENTRY(memmove) tst count, #0x3f b.ne .Ltail63 ret -ENDPROC(memmove) +ENDPIPROC(memmove) +ENDPROC(__memmove) diff --git a/arch/arm64/lib/memset.S b/arch/arm64/lib/memset.S index 7c72dfd36b63..f2670a9f218c 100644 --- a/arch/arm64/lib/memset.S +++ b/arch/arm64/lib/memset.S @@ -54,6 +54,8 @@ dst .req x8 tmp3w .req w9 tmp3 .req x9 + .weak memset +ENTRY(__memset) ENTRY(memset) mov dst, dstin /* Preserve return value. */ and A_lw, val, #255 @@ -213,4 +215,5 @@ ENTRY(memset) ands count, count, zva_bits_x b.ne .Ltail_maybe_long ret -ENDPROC(memset) +ENDPIPROC(memset) +ENDPROC(__memset) diff --git a/arch/arm64/lib/strcmp.S b/arch/arm64/lib/strcmp.S index 42f828b06c59..471fe61760ef 100644 --- a/arch/arm64/lib/strcmp.S +++ b/arch/arm64/lib/strcmp.S @@ -231,4 +231,4 @@ CPU_BE( orr syndrome, diff, has_nul ) lsr data1, data1, #56 sub result, data1, data2, lsr #56 ret -ENDPROC(strcmp) +ENDPIPROC(strcmp) diff --git a/arch/arm64/lib/strlen.S b/arch/arm64/lib/strlen.S index 987b68b9ce44..55ccc8e24c08 100644 --- a/arch/arm64/lib/strlen.S +++ b/arch/arm64/lib/strlen.S @@ -123,4 +123,4 @@ CPU_LE( lsr tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */ csinv data1, data1, xzr, le csel data2, data2, data2a, le b .Lrealigned -ENDPROC(strlen) +ENDPIPROC(strlen) diff --git a/arch/arm64/lib/strncmp.S b/arch/arm64/lib/strncmp.S index 0224cf5a5533..e267044761c6 100644 --- a/arch/arm64/lib/strncmp.S +++ b/arch/arm64/lib/strncmp.S @@ -307,4 +307,4 @@ CPU_BE( orr syndrome, diff, has_nul ) .Lret0: mov result, #0 ret -ENDPROC(strncmp) +ENDPIPROC(strncmp) diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index 773d37a14039..4e214e7202a2 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile @@ -4,3 +4,7 @@ obj-y := dma-mapping.o extable.o fault.o init.o \ context.o proc.o pageattr.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_ARM64_PTDUMP) += dump.o + +obj-$(CONFIG_KASAN) += kasan_init.o +KASAN_SANITIZE_kasan_init.o := n +CFLAGS_mmu.o := -I$(srctree)/scripts/dtc/libfdt/ diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 2560e1e1562e..c97f7eec519a 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -22,9 +22,7 @@ #include <linux/init.h> #include <asm/assembler.h> #include <asm/cpufeature.h> -#include <asm/alternative-asm.h> - -#include "proc-macros.S" +#include <asm/alternative.h> /* * __flush_dcache_all() @@ -154,24 +152,30 @@ ENDPROC(__flush_cache_user_range) /* * __flush_dcache_area(kaddr, size) * - * Ensure that the data held in the page kaddr is written back to the - * page in question. + * Ensure that any D-cache lines for the interval [kaddr, kaddr+size) + * are cleaned and invalidated to the PoC. * * - kaddr - kernel address * - size - size in question */ ENTRY(__flush_dcache_area) - dcache_line_size x2, x3 - add x1, x0, x1 - sub x3, x2, #1 - bic x0, x0, x3 -1: dc civac, x0 // clean & invalidate D line / unified line - add x0, x0, x2 - cmp x0, x1 - b.lo 1b - dsb sy + dcache_by_line_op civac, sy, x0, x1, x2, x3 + ret +ENDPIPROC(__flush_dcache_area) + +/* + * __clean_dcache_area_pou(kaddr, size) + * + * Ensure that any D-cache lines for the interval [kaddr, kaddr+size) + * are cleaned to the PoU. + * + * - kaddr - kernel address + * - size - size in question + */ +ENTRY(__clean_dcache_area_pou) + dcache_by_line_op cvau, ish, x0, x1, x2, x3 ret -ENDPROC(__flush_dcache_area) +ENDPROC(__clean_dcache_area_pou) /* * __inval_cache_range(start, end) @@ -204,7 +208,7 @@ __dma_inv_range: b.lo 2b dsb sy ret -ENDPROC(__inval_cache_range) +ENDPIPROC(__inval_cache_range) ENDPROC(__dma_inv_range) /* @@ -239,7 +243,7 @@ ENTRY(__dma_flush_range) b.lo 1b dsb sy ret -ENDPROC(__dma_flush_range) +ENDPIPROC(__dma_flush_range) /* * __dma_map_area(start, size, dir) @@ -252,7 +256,7 @@ ENTRY(__dma_map_area) cmp w2, #DMA_FROM_DEVICE b.eq __dma_inv_range b __dma_clean_range -ENDPROC(__dma_map_area) +ENDPIPROC(__dma_map_area) /* * __dma_unmap_area(start, size, dir) @@ -265,4 +269,4 @@ ENTRY(__dma_unmap_area) cmp w2, #DMA_TO_DEVICE b.ne __dma_inv_range ret -ENDPROC(__dma_unmap_area) +ENDPIPROC(__dma_unmap_area) diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index 76c1e6cd36fc..4b9ec4484e3f 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -17,151 +17,187 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include <linux/init.h> +#include <linux/bitops.h> #include <linux/sched.h> +#include <linux/slab.h> #include <linux/mm.h> -#include <linux/smp.h> -#include <linux/percpu.h> +#include <asm/cpufeature.h> #include <asm/mmu_context.h> #include <asm/tlbflush.h> -#include <asm/cachetype.h> -#define asid_bits(reg) \ - (((read_cpuid(ID_AA64MMFR0_EL1) & 0xf0) >> 2) + 8) +static u32 asid_bits; +static DEFINE_RAW_SPINLOCK(cpu_asid_lock); -#define ASID_FIRST_VERSION (1 << MAX_ASID_BITS) +static atomic64_t asid_generation; +static unsigned long *asid_map; -static DEFINE_RAW_SPINLOCK(cpu_asid_lock); -unsigned int cpu_last_asid = ASID_FIRST_VERSION; +static DEFINE_PER_CPU(atomic64_t, active_asids); +static DEFINE_PER_CPU(u64, reserved_asids); +static cpumask_t tlb_flush_pending; -/* - * We fork()ed a process, and we need a new context for the child to run in. - */ -void __init_new_context(struct task_struct *tsk, struct mm_struct *mm) -{ - mm->context.id = 0; - raw_spin_lock_init(&mm->context.id_lock); -} +#define ASID_MASK (~GENMASK(asid_bits - 1, 0)) +#define ASID_FIRST_VERSION (1UL << asid_bits) +#define NUM_USER_ASIDS ASID_FIRST_VERSION -static void flush_context(void) +static void flush_context(unsigned int cpu) { - /* set the reserved TTBR0 before flushing the TLB */ - cpu_set_reserved_ttbr0(); - flush_tlb_all(); + int i; + u64 asid; + + /* Update the list of reserved ASIDs and the ASID bitmap. */ + bitmap_clear(asid_map, 0, NUM_USER_ASIDS); + + /* + * Ensure the generation bump is observed before we xchg the + * active_asids. + */ + smp_wmb(); + + for_each_possible_cpu(i) { + asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0); + /* + * If this CPU has already been through a + * rollover, but hasn't run another task in + * the meantime, we must preserve its reserved + * ASID, as this is the only trace we have of + * the process it is still running. + */ + if (asid == 0) + asid = per_cpu(reserved_asids, i); + __set_bit(asid & ~ASID_MASK, asid_map); + per_cpu(reserved_asids, i) = asid; + } + + /* Queue a TLB invalidate and flush the I-cache if necessary. */ + cpumask_setall(&tlb_flush_pending); + if (icache_is_aivivt()) __flush_icache_all(); } -#ifdef CONFIG_SMP +static int is_reserved_asid(u64 asid) +{ + int cpu; + for_each_possible_cpu(cpu) + if (per_cpu(reserved_asids, cpu) == asid) + return 1; + return 0; +} -static void set_mm_context(struct mm_struct *mm, unsigned int asid) +static u64 new_context(struct mm_struct *mm, unsigned int cpu) { - unsigned long flags; + static u32 cur_idx = 1; + u64 asid = atomic64_read(&mm->context.id); + u64 generation = atomic64_read(&asid_generation); - /* - * Locking needed for multi-threaded applications where the same - * mm->context.id could be set from different CPUs during the - * broadcast. This function is also called via IPI so the - * mm->context.id_lock has to be IRQ-safe. - */ - raw_spin_lock_irqsave(&mm->context.id_lock, flags); - if (likely((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS)) { + if (asid != 0) { /* - * Old version of ASID found. Set the new one and reset - * mm_cpumask(mm). + * If our current ASID was active during a rollover, we + * can continue to use it and this was just a false alarm. */ - mm->context.id = asid; - cpumask_clear(mm_cpumask(mm)); + if (is_reserved_asid(asid)) + return generation | (asid & ~ASID_MASK); + + /* + * We had a valid ASID in a previous life, so try to re-use + * it if possible. + */ + asid &= ~ASID_MASK; + if (!__test_and_set_bit(asid, asid_map)) + goto bump_gen; } - raw_spin_unlock_irqrestore(&mm->context.id_lock, flags); /* - * Set the mm_cpumask(mm) bit for the current CPU. + * Allocate a free ASID. If we can't find one, take a note of the + * currently active ASIDs and mark the TLBs as requiring flushes. + * We always count from ASID #1, as we use ASID #0 when setting a + * reserved TTBR0 for the init_mm. */ - cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); + asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, cur_idx); + if (asid != NUM_USER_ASIDS) + goto set_asid; + + /* We're out of ASIDs, so increment the global generation count */ + generation = atomic64_add_return_relaxed(ASID_FIRST_VERSION, + &asid_generation); + flush_context(cpu); + + /* We have at least 1 ASID per CPU, so this will always succeed */ + asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1); + +set_asid: + __set_bit(asid, asid_map); + cur_idx = asid; + +bump_gen: + asid |= generation; + cpumask_clear(mm_cpumask(mm)); + return asid; } -/* - * Reset the ASID on the current CPU. This function call is broadcast from the - * CPU handling the ASID rollover and holding cpu_asid_lock. - */ -static void reset_context(void *info) +void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) { - unsigned int asid; - unsigned int cpu = smp_processor_id(); - struct mm_struct *mm = current->active_mm; + unsigned long flags; + u64 asid; + + asid = atomic64_read(&mm->context.id); /* - * current->active_mm could be init_mm for the idle thread immediately - * after secondary CPU boot or hotplug. TTBR0_EL1 is already set to - * the reserved value, so no need to reset any context. + * The memory ordering here is subtle. We rely on the control + * dependency between the generation read and the update of + * active_asids to ensure that we are synchronised with a + * parallel rollover (i.e. this pairs with the smp_wmb() in + * flush_context). */ - if (mm == &init_mm) - return; + if (!((asid ^ atomic64_read(&asid_generation)) >> asid_bits) + && atomic64_xchg_relaxed(&per_cpu(active_asids, cpu), asid)) + goto switch_mm_fastpath; + + raw_spin_lock_irqsave(&cpu_asid_lock, flags); + /* Check that our ASID belongs to the current generation. */ + asid = atomic64_read(&mm->context.id); + if ((asid ^ atomic64_read(&asid_generation)) >> asid_bits) { + asid = new_context(mm, cpu); + atomic64_set(&mm->context.id, asid); + } - smp_rmb(); - asid = cpu_last_asid + cpu; + if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending)) + local_flush_tlb_all(); - flush_context(); - set_mm_context(mm, asid); + atomic64_set(&per_cpu(active_asids, cpu), asid); + raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); - /* set the new ASID */ +switch_mm_fastpath: + cpumask_set_cpu(cpu, mm_cpumask(mm)); cpu_switch_mm(mm->pgd, mm); } -#else - -static inline void set_mm_context(struct mm_struct *mm, unsigned int asid) +static int asids_init(void) { - mm->context.id = asid; - cpumask_copy(mm_cpumask(mm), cpumask_of(smp_processor_id())); -} - -#endif - -void __new_context(struct mm_struct *mm) -{ - unsigned int asid; - unsigned int bits = asid_bits(); - - raw_spin_lock(&cpu_asid_lock); -#ifdef CONFIG_SMP - /* - * Check the ASID again, in case the change was broadcast from another - * CPU before we acquired the lock. - */ - if (!unlikely((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS)) { - cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); - raw_spin_unlock(&cpu_asid_lock); - return; - } -#endif - /* - * At this point, it is guaranteed that the current mm (with an old - * ASID) isn't active on any other CPU since the ASIDs are changed - * simultaneously via IPI. - */ - asid = ++cpu_last_asid; - - /* - * If we've used up all our ASIDs, we need to start a new version and - * flush the TLB. - */ - if (unlikely((asid & ((1 << bits) - 1)) == 0)) { - /* increment the ASID version */ - cpu_last_asid += (1 << MAX_ASID_BITS) - (1 << bits); - if (cpu_last_asid == 0) - cpu_last_asid = ASID_FIRST_VERSION; - asid = cpu_last_asid + smp_processor_id(); - flush_context(); -#ifdef CONFIG_SMP - smp_wmb(); - smp_call_function(reset_context, NULL, 1); -#endif - cpu_last_asid += NR_CPUS - 1; + int fld = cpuid_feature_extract_field(read_cpuid(ID_AA64MMFR0_EL1), 4); + + switch (fld) { + default: + pr_warn("Unknown ASID size (%d); assuming 8-bit\n", fld); + /* Fallthrough */ + case 0: + asid_bits = 8; + break; + case 2: + asid_bits = 16; } - set_mm_context(mm, asid); - raw_spin_unlock(&cpu_asid_lock); + /* If we end up with more CPUs than ASIDs, expect things to crash */ + WARN_ON(NUM_USER_ASIDS < num_possible_cpus()); + atomic64_set(&asid_generation, ASID_FIRST_VERSION); + asid_map = kzalloc(BITS_TO_LONGS(NUM_USER_ASIDS) * sizeof(*asid_map), + GFP_KERNEL); + if (!asid_map) + panic("Failed to allocate bitmap for %lu ASIDs\n", + NUM_USER_ASIDS); + + pr_info("ASID allocator initialised with %lu entries\n", NUM_USER_ASIDS); + return 0; } +early_initcall(asids_init); diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index b0bd4e5fd5cf..dbc71e8c37f3 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -18,6 +18,7 @@ */ #include <linux/gfp.h> +#include <linux/acpi.h> #include <linux/export.h> #include <linux/slab.h> #include <linux/genalloc.h> @@ -28,9 +29,6 @@ #include <asm/cacheflush.h> -struct dma_map_ops *dma_ops; -EXPORT_SYMBOL(dma_ops); - static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot, bool coherent) { @@ -414,15 +412,101 @@ out: return -ENOMEM; } -static int __init arm64_dma_init(void) +/******************************************** + * The following APIs are for dummy DMA ops * + ********************************************/ + +static void *__dummy_alloc(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flags, + struct dma_attrs *attrs) { - int ret; + return NULL; +} - dma_ops = &swiotlb_dma_ops; +static void __dummy_free(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle, + struct dma_attrs *attrs) +{ +} - ret = atomic_pool_init(); +static int __dummy_mmap(struct device *dev, + struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + struct dma_attrs *attrs) +{ + return -ENXIO; +} - return ret; +static dma_addr_t __dummy_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + return DMA_ERROR_CODE; +} + +static void __dummy_unmap_page(struct device *dev, dma_addr_t dev_addr, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ +} + +static int __dummy_map_sg(struct device *dev, struct scatterlist *sgl, + int nelems, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + return 0; +} + +static void __dummy_unmap_sg(struct device *dev, + struct scatterlist *sgl, int nelems, + enum dma_data_direction dir, + struct dma_attrs *attrs) +{ +} + +static void __dummy_sync_single(struct device *dev, + dma_addr_t dev_addr, size_t size, + enum dma_data_direction dir) +{ +} + +static void __dummy_sync_sg(struct device *dev, + struct scatterlist *sgl, int nelems, + enum dma_data_direction dir) +{ +} + +static int __dummy_mapping_error(struct device *hwdev, dma_addr_t dma_addr) +{ + return 1; +} + +static int __dummy_dma_supported(struct device *hwdev, u64 mask) +{ + return 0; +} + +struct dma_map_ops dummy_dma_ops = { + .alloc = __dummy_alloc, + .free = __dummy_free, + .mmap = __dummy_mmap, + .map_page = __dummy_map_page, + .unmap_page = __dummy_unmap_page, + .map_sg = __dummy_map_sg, + .unmap_sg = __dummy_unmap_sg, + .sync_single_for_cpu = __dummy_sync_single, + .sync_single_for_device = __dummy_sync_single, + .sync_sg_for_cpu = __dummy_sync_sg, + .sync_sg_for_device = __dummy_sync_sg, + .mapping_error = __dummy_mapping_error, + .dma_supported = __dummy_dma_supported, +}; +EXPORT_SYMBOL(dummy_dma_ops); + +static int __init arm64_dma_init(void) +{ + return atomic_pool_init(); } arch_initcall(arm64_dma_init); @@ -434,3 +518,464 @@ static int __init dma_debug_do_init(void) return 0; } fs_initcall(dma_debug_do_init); + + +#ifdef CONFIG_IOMMU_DMA +#include <linux/dma-iommu.h> +#include <linux/platform_device.h> +#include <linux/amba/bus.h> + +/* Thankfully, all cache ops are by VA so we can ignore phys here */ +static void flush_page(struct device *dev, const void *virt, phys_addr_t phys) +{ + __dma_flush_range(virt, virt + PAGE_SIZE); +} + +static void *__iommu_alloc_attrs(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t gfp, + struct dma_attrs *attrs) +{ + bool coherent = is_device_dma_coherent(dev); + int ioprot = dma_direction_to_prot(DMA_BIDIRECTIONAL, coherent); + void *addr; + + if (WARN(!dev, "cannot create IOMMU mapping for unknown device\n")) + return NULL; + /* + * Some drivers rely on this, and we probably don't want the + * possibility of stale kernel data being read by devices anyway. + */ + gfp |= __GFP_ZERO; + + if (gfp & __GFP_WAIT) { + struct page **pages; + pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent); + + pages = iommu_dma_alloc(dev, size, gfp, ioprot, handle, + flush_page); + if (!pages) + return NULL; + + addr = dma_common_pages_remap(pages, size, VM_USERMAP, prot, + __builtin_return_address(0)); + if (!addr) + iommu_dma_free(dev, pages, size, handle); + } else { + struct page *page; + /* + * In atomic context we can't remap anything, so we'll only + * get the virtually contiguous buffer we need by way of a + * physically contiguous allocation. + */ + if (coherent) { + page = alloc_pages(gfp, get_order(size)); + addr = page ? page_address(page) : NULL; + } else { + addr = __alloc_from_pool(size, &page, gfp); + } + if (!addr) + return NULL; + + *handle = iommu_dma_map_page(dev, page, 0, size, ioprot); + if (iommu_dma_mapping_error(dev, *handle)) { + if (coherent) + __free_pages(page, get_order(size)); + else + __free_from_pool(addr, size); + addr = NULL; + } + } + return addr; +} + +static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, struct dma_attrs *attrs) +{ + /* + * @cpu_addr will be one of 3 things depending on how it was allocated: + * - A remapped array of pages from iommu_dma_alloc(), for all + * non-atomic allocations. + * - A non-cacheable alias from the atomic pool, for atomic + * allocations by non-coherent devices. + * - A normal lowmem address, for atomic allocations by + * coherent devices. + * Hence how dodgy the below logic looks... + */ + if (__in_atomic_pool(cpu_addr, size)) { + iommu_dma_unmap_page(dev, handle, size, 0, NULL); + __free_from_pool(cpu_addr, size); + } else if (is_vmalloc_addr(cpu_addr)){ + struct vm_struct *area = find_vm_area(cpu_addr); + + if (WARN_ON(!area || !area->pages)) + return; + iommu_dma_free(dev, area->pages, size, &handle); + dma_common_free_remap(cpu_addr, size, VM_USERMAP); + } else { + iommu_dma_unmap_page(dev, handle, size, 0, NULL); + __free_pages(virt_to_page(cpu_addr), get_order(size)); + } +} + +static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + struct dma_attrs *attrs) +{ + struct vm_struct *area; + int ret; + + vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, + is_device_dma_coherent(dev)); + + if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) + return ret; + + area = find_vm_area(cpu_addr); + if (WARN_ON(!area || !area->pages)) + return -ENXIO; + + return iommu_dma_mmap(area->pages, size, vma); +} + +static int __iommu_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t dma_addr, + size_t size, struct dma_attrs *attrs) +{ + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + struct vm_struct *area = find_vm_area(cpu_addr); + + if (WARN_ON(!area || !area->pages)) + return -ENXIO; + + return sg_alloc_table_from_pages(sgt, area->pages, count, 0, size, + GFP_KERNEL); +} + +static void __iommu_sync_single_for_cpu(struct device *dev, + dma_addr_t dev_addr, size_t size, + enum dma_data_direction dir) +{ + phys_addr_t phys; + + if (is_device_dma_coherent(dev)) + return; + + phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr); + __dma_unmap_area(phys_to_virt(phys), size, dir); +} + +static void __iommu_sync_single_for_device(struct device *dev, + dma_addr_t dev_addr, size_t size, + enum dma_data_direction dir) +{ + phys_addr_t phys; + + if (is_device_dma_coherent(dev)) + return; + + phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr); + __dma_map_area(phys_to_virt(phys), size, dir); +} + +static dma_addr_t __iommu_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + bool coherent = is_device_dma_coherent(dev); + int prot = dma_direction_to_prot(dir, coherent); + dma_addr_t dev_addr = iommu_dma_map_page(dev, page, offset, size, prot); + + if (!iommu_dma_mapping_error(dev, dev_addr) && + !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + __iommu_sync_single_for_device(dev, dev_addr, size, dir); + + return dev_addr; +} + +static void __iommu_unmap_page(struct device *dev, dma_addr_t dev_addr, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + __iommu_sync_single_for_cpu(dev, dev_addr, size, dir); + + iommu_dma_unmap_page(dev, dev_addr, size, dir, attrs); +} + +static void __iommu_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sgl, int nelems, + enum dma_data_direction dir) +{ + struct scatterlist *sg; + int i; + + if (is_device_dma_coherent(dev)) + return; + + for_each_sg(sgl, sg, nelems, i) + __dma_unmap_area(sg_virt(sg), sg->length, dir); +} + +static void __iommu_sync_sg_for_device(struct device *dev, + struct scatterlist *sgl, int nelems, + enum dma_data_direction dir) +{ + struct scatterlist *sg; + int i; + + if (is_device_dma_coherent(dev)) + return; + + for_each_sg(sgl, sg, nelems, i) + __dma_map_area(sg_virt(sg), sg->length, dir); +} + +static int __iommu_map_sg_attrs(struct device *dev, struct scatterlist *sgl, + int nelems, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + bool coherent = is_device_dma_coherent(dev); + + if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + __iommu_sync_sg_for_device(dev, sgl, nelems, dir); + + return iommu_dma_map_sg(dev, sgl, nelems, + dma_direction_to_prot(dir, coherent)); +} + +static void __iommu_unmap_sg_attrs(struct device *dev, + struct scatterlist *sgl, int nelems, + enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + __iommu_sync_sg_for_cpu(dev, sgl, nelems, dir); + + iommu_dma_unmap_sg(dev, sgl, nelems, dir, attrs); +} + +static struct dma_map_ops iommu_dma_ops = { + .alloc = __iommu_alloc_attrs, + .free = __iommu_free_attrs, + .mmap = __iommu_mmap_attrs, + .get_sgtable = __iommu_get_sgtable, + .map_page = __iommu_map_page, + .unmap_page = __iommu_unmap_page, + .map_sg = __iommu_map_sg_attrs, + .unmap_sg = __iommu_unmap_sg_attrs, + .sync_single_for_cpu = __iommu_sync_single_for_cpu, + .sync_single_for_device = __iommu_sync_single_for_device, + .sync_sg_for_cpu = __iommu_sync_sg_for_cpu, + .sync_sg_for_device = __iommu_sync_sg_for_device, + .dma_supported = iommu_dma_supported, + .mapping_error = iommu_dma_mapping_error, +}; + +/* + * TODO: Right now __iommu_setup_dma_ops() gets called too early to do + * everything it needs to - the device is only partially created and the + * IOMMU driver hasn't seen it yet, so it can't have a group. Thus we + * need this delayed attachment dance. Once IOMMU probe ordering is sorted + * to move the arch_setup_dma_ops() call later, all the notifier bits below + * become unnecessary, and will go away. + */ +struct iommu_dma_notifier_data { + struct list_head list; + struct device *dev; + const struct iommu_ops *ops; + u64 dma_base; + u64 size; +}; +static LIST_HEAD(iommu_dma_masters); +static DEFINE_MUTEX(iommu_dma_notifier_lock); + +/* + * Temporarily "borrow" a domain feature flag to to tell if we had to resort + * to creating our own domain here, in case we need to clean it up again. + */ +#define __IOMMU_DOMAIN_FAKE_DEFAULT (1U << 31) + +static bool do_iommu_attach(struct device *dev, const struct iommu_ops *ops, + u64 dma_base, u64 size) +{ + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); + + /* + * Best case: The device is either part of a group which was + * already attached to a domain in a previous call, or it's + * been put in a default DMA domain by the IOMMU core. + */ + if (!domain) { + /* + * Urgh. The IOMMU core isn't going to do default domains + * for non-PCI devices anyway, until it has some means of + * abstracting the entirely implementation-specific + * sideband data/SoC topology/unicorn dust that may or + * may not differentiate upstream masters. + * So until then, HORRIBLE HACKS! + */ + domain = ops->domain_alloc(IOMMU_DOMAIN_DMA); + if (!domain) + goto out_no_domain; + + domain->ops = ops; + domain->type = IOMMU_DOMAIN_DMA | __IOMMU_DOMAIN_FAKE_DEFAULT; + + if (iommu_attach_device(domain, dev)) + goto out_put_domain; + } + + if (iommu_dma_init_domain(domain, dma_base, size)) + goto out_detach; + + dev->archdata.dma_ops = &iommu_dma_ops; + return true; + +out_detach: + iommu_detach_device(domain, dev); +out_put_domain: + if (domain->type & __IOMMU_DOMAIN_FAKE_DEFAULT) + iommu_domain_free(domain); +out_no_domain: + pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n", + dev_name(dev)); + return false; +} + +static void queue_iommu_attach(struct device *dev, const struct iommu_ops *ops, + u64 dma_base, u64 size) +{ + struct iommu_dma_notifier_data *iommudata; + + iommudata = kzalloc(sizeof(*iommudata), GFP_KERNEL); + if (!iommudata) + return; + + iommudata->dev = dev; + iommudata->ops = ops; + iommudata->dma_base = dma_base; + iommudata->size = size; + + mutex_lock(&iommu_dma_notifier_lock); + list_add(&iommudata->list, &iommu_dma_masters); + mutex_unlock(&iommu_dma_notifier_lock); +} + +static int __iommu_attach_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct iommu_dma_notifier_data *master, *tmp; + + if (action != BUS_NOTIFY_ADD_DEVICE) + return 0; + + mutex_lock(&iommu_dma_notifier_lock); + list_for_each_entry_safe(master, tmp, &iommu_dma_masters, list) { + if (do_iommu_attach(master->dev, master->ops, + master->dma_base, master->size)) { + list_del(&master->list); + kfree(master); + } + } + mutex_unlock(&iommu_dma_notifier_lock); + return 0; +} + +static int register_iommu_dma_ops_notifier(struct bus_type *bus) +{ + struct notifier_block *nb = kzalloc(sizeof(*nb), GFP_KERNEL); + int ret; + + if (!nb) + return -ENOMEM; + /* + * The device must be attached to a domain before the driver probe + * routine gets a chance to start allocating DMA buffers. However, + * the IOMMU driver also needs a chance to configure the iommu_group + * via its add_device callback first, so we need to make the attach + * happen between those two points. Since the IOMMU core uses a bus + * notifier with default priority for add_device, do the same but + * with a lower priority to ensure the appropriate ordering. + */ + nb->notifier_call = __iommu_attach_notifier; + nb->priority = -100; + + ret = bus_register_notifier(bus, nb); + if (ret) { + pr_warn("Failed to register DMA domain notifier; IOMMU DMA ops unavailable on bus '%s'\n", + bus->name); + kfree(nb); + } + return ret; +} + +static int __init __iommu_dma_init(void) +{ + int ret; + + ret = iommu_dma_init(); + if (!ret) + ret = register_iommu_dma_ops_notifier(&platform_bus_type); + if (!ret) + ret = register_iommu_dma_ops_notifier(&amba_bustype); + + /* handle devices queued before this arch_initcall */ + if (!ret) + __iommu_attach_notifier(NULL, BUS_NOTIFY_ADD_DEVICE, NULL); + return ret; +} +arch_initcall(__iommu_dma_init); + +static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, + const struct iommu_ops *ops) +{ + struct iommu_group *group; + + if (!ops) + return; + /* + * TODO: As a concession to the future, we're ready to handle being + * called both early and late (i.e. after bus_add_device). Once all + * the platform bus code is reworked to call us late and the notifier + * junk above goes away, move the body of do_iommu_attach here. + */ + group = iommu_group_get(dev); + if (group) { + do_iommu_attach(dev, ops, dma_base, size); + iommu_group_put(group); + } else { + queue_iommu_attach(dev, ops, dma_base, size); + } +} + +void arch_teardown_dma_ops(struct device *dev) +{ + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); + + if (domain) { + iommu_detach_device(domain, dev); + if (domain->type & __IOMMU_DOMAIN_FAKE_DEFAULT) + iommu_domain_free(domain); + } + + dev->archdata.dma_ops = NULL; +} + +#else + +static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, + struct iommu_ops *iommu) +{ } + +#endif /* CONFIG_IOMMU_DMA */ + +void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, + struct iommu_ops *iommu, bool coherent) +{ + if (!dev->archdata.dma_ops) + dev->archdata.dma_ops = &swiotlb_dma_ops; + + dev->archdata.dma_coherent = coherent; + __iommu_setup_dma_ops(dev, dma_base, size, iommu); +} diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index fa5efaa5c3ac..10a1fc5004dc 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -30,9 +30,11 @@ #include <linux/highmem.h> #include <linux/perf_event.h> +#include <asm/cpufeature.h> #include <asm/exception.h> #include <asm/debug-monitors.h> #include <asm/esr.h> +#include <asm/sysreg.h> #include <asm/system_misc.h> #include <asm/pgtable.h> #include <asm/tlbflush.h> @@ -225,6 +227,13 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, } /* + * PAN bit set implies the fault happened in kernel space, but not + * in the arch's user access functions. + */ + if (IS_ENABLED(CONFIG_ARM64_PAN) && (regs->pstate & PSR_PAN_BIT)) + goto no_context; + + /* * As per x86, we may deadlock here. However, since the kernel only * validly references user space from well defined areas of the code, * we can bug out early if this is from code which shouldn't. @@ -531,3 +540,10 @@ asmlinkage int __exception do_debug_exception(unsigned long addr, return 0; } + +#ifdef CONFIG_ARM64_PAN +void cpu_enable_pan(void) +{ + config_sctlr_el1(SCTLR_EL1_SPAN, 0); +} +#endif /* CONFIG_ARM64_PAN */ diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index bfb8eb168f2d..46649d6e6c5a 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -34,19 +34,24 @@ void flush_cache_range(struct vm_area_struct *vma, unsigned long start, __flush_icache_all(); } +static void sync_icache_aliases(void *kaddr, unsigned long len) +{ + unsigned long addr = (unsigned long)kaddr; + + if (icache_is_aliasing()) { + __clean_dcache_area_pou(kaddr, len); + __flush_icache_all(); + } else { + flush_icache_range(addr, addr + len); + } +} + static void flush_ptrace_access(struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *kaddr, unsigned long len) { - if (vma->vm_flags & VM_EXEC) { - unsigned long addr = (unsigned long)kaddr; - if (icache_is_aliasing()) { - __flush_dcache_area(kaddr, len); - __flush_icache_all(); - } else { - flush_icache_range(addr, addr + len); - } - } + if (vma->vm_flags & VM_EXEC) + sync_icache_aliases(kaddr, len); } /* @@ -60,27 +65,25 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len) { -#ifdef CONFIG_SMP preempt_disable(); -#endif memcpy(dst, src, len); flush_ptrace_access(vma, page, uaddr, dst, len); -#ifdef CONFIG_SMP preempt_enable(); -#endif } void __sync_icache_dcache(pte_t pte, unsigned long addr) { struct page *page = pte_page(pte); - if (!test_and_set_bit(PG_dcache_clean, &page->flags)) { - __flush_dcache_area(page_address(page), - PAGE_SIZE << compound_order(page)); - __flush_icache_all(); - } else if (icache_is_aivivt()) { + /* no flushing needed for anonymous pages */ + if (!page_mapping(page)) + return; + + if (!test_and_set_bit(PG_dcache_clean, &page->flags)) + sync_icache_aliases(page_address(page), + PAGE_SIZE << compound_order(page)); + else if (icache_is_aivivt()) __flush_icache_all(); - } } /* @@ -98,7 +101,6 @@ EXPORT_SYMBOL(flush_dcache_page); /* * Additional functions defined in assembly. */ -EXPORT_SYMBOL(flush_cache_all); EXPORT_SYMBOL(flush_icache_range); #ifdef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index ae8f940152aa..f81d89fa75b6 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -298,6 +298,9 @@ void __init mem_init(void) #define MLK_ROUNDUP(b, t) b, t, DIV_ROUND_UP(((t) - (b)), SZ_1K) pr_notice("Virtual kernel memory layout:\n" +#ifdef CONFIG_KASAN + " kasan : 0x%16lx - 0x%16lx (%6ld GB)\n" +#endif " vmalloc : 0x%16lx - 0x%16lx (%6ld GB)\n" #ifdef CONFIG_SPARSEMEM_VMEMMAP " vmemmap : 0x%16lx - 0x%16lx (%6ld GB maximum)\n" @@ -310,6 +313,9 @@ void __init mem_init(void) " .init : 0x%p" " - 0x%p" " (%6ld KB)\n" " .text : 0x%p" " - 0x%p" " (%6ld KB)\n" " .data : 0x%p" " - 0x%p" " (%6ld KB)\n", +#ifdef CONFIG_KASAN + MLG(KASAN_SHADOW_START, KASAN_SHADOW_END), +#endif MLG(VMALLOC_START, VMALLOC_END), #ifdef CONFIG_SPARSEMEM_VMEMMAP MLG(VMEMMAP_START, @@ -353,7 +359,6 @@ void free_initmem(void) { fixup_init(); free_initmem_default(0); - free_alternatives_memory(); } #ifdef CONFIG_BLK_DEV_INITRD diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c new file mode 100644 index 000000000000..cf038c7d9fa9 --- /dev/null +++ b/arch/arm64/mm/kasan_init.c @@ -0,0 +1,165 @@ +/* + * This file contains kasan initialization code for ARM64. + * + * Copyright (c) 2015 Samsung Electronics Co., Ltd. + * Author: Andrey Ryabinin <ryabinin.a.a@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#define pr_fmt(fmt) "kasan: " fmt +#include <linux/kasan.h> +#include <linux/kernel.h> +#include <linux/memblock.h> +#include <linux/start_kernel.h> + +#include <asm/page.h> +#include <asm/pgalloc.h> +#include <asm/pgtable.h> +#include <asm/tlbflush.h> + +static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE); + +static void __init kasan_early_pte_populate(pmd_t *pmd, unsigned long addr, + unsigned long end) +{ + pte_t *pte; + unsigned long next; + + if (pmd_none(*pmd)) + pmd_populate_kernel(&init_mm, pmd, kasan_zero_pte); + + pte = pte_offset_kernel(pmd, addr); + do { + next = addr + PAGE_SIZE; + set_pte(pte, pfn_pte(virt_to_pfn(kasan_zero_page), + PAGE_KERNEL)); + } while (pte++, addr = next, addr != end && pte_none(*pte)); +} + +static void __init kasan_early_pmd_populate(pud_t *pud, + unsigned long addr, + unsigned long end) +{ + pmd_t *pmd; + unsigned long next; + + if (pud_none(*pud)) + pud_populate(&init_mm, pud, kasan_zero_pmd); + + pmd = pmd_offset(pud, addr); + do { + next = pmd_addr_end(addr, end); + kasan_early_pte_populate(pmd, addr, next); + } while (pmd++, addr = next, addr != end && pmd_none(*pmd)); +} + +static void __init kasan_early_pud_populate(pgd_t *pgd, + unsigned long addr, + unsigned long end) +{ + pud_t *pud; + unsigned long next; + + if (pgd_none(*pgd)) + pgd_populate(&init_mm, pgd, kasan_zero_pud); + + pud = pud_offset(pgd, addr); + do { + next = pud_addr_end(addr, end); + kasan_early_pmd_populate(pud, addr, next); + } while (pud++, addr = next, addr != end && pud_none(*pud)); +} + +static void __init kasan_map_early_shadow(void) +{ + unsigned long addr = KASAN_SHADOW_START; + unsigned long end = KASAN_SHADOW_END; + unsigned long next; + pgd_t *pgd; + + pgd = pgd_offset_k(addr); + do { + next = pgd_addr_end(addr, end); + kasan_early_pud_populate(pgd, addr, next); + } while (pgd++, addr = next, addr != end); +} + +asmlinkage void __init kasan_early_init(void) +{ + BUILD_BUG_ON(KASAN_SHADOW_OFFSET != KASAN_SHADOW_END - (1UL << 61)); + BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE)); + BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE)); + kasan_map_early_shadow(); +} + +static void __init clear_pgds(unsigned long start, + unsigned long end) +{ + /* + * Remove references to kasan page tables from + * swapper_pg_dir. pgd_clear() can't be used + * here because it's nop on 2,3-level pagetable setups + */ + for (; start < end; start += PGDIR_SIZE) + set_pgd(pgd_offset_k(start), __pgd(0)); +} + +static void __init cpu_set_ttbr1(unsigned long ttbr1) +{ + asm( + " msr ttbr1_el1, %0\n" + " isb" + : + : "r" (ttbr1)); +} + +void __init kasan_init(void) +{ + struct memblock_region *reg; + + /* + * We are going to perform proper setup of shadow memory. + * At first we should unmap early shadow (clear_pgds() call bellow). + * However, instrumented code couldn't execute without shadow memory. + * tmp_pg_dir used to keep early shadow mapped until full shadow + * setup will be finished. + */ + memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir)); + cpu_set_ttbr1(__pa(tmp_pg_dir)); + flush_tlb_all(); + + clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); + + kasan_populate_zero_shadow((void *)KASAN_SHADOW_START, + kasan_mem_to_shadow((void *)MODULES_VADDR)); + + for_each_memblock(memory, reg) { + void *start = (void *)__phys_to_virt(reg->base); + void *end = (void *)__phys_to_virt(reg->base + reg->size); + + if (start >= end) + break; + + /* + * end + 1 here is intentional. We check several shadow bytes in + * advance to slightly speed up fastpath. In some rare cases + * we could cross boundary of mapped shadow, so we just map + * some more here. + */ + vmemmap_populate((unsigned long)kasan_mem_to_shadow(start), + (unsigned long)kasan_mem_to_shadow(end) + 1, + pfn_to_nid(virt_to_pfn(start))); + } + + memset(kasan_zero_page, 0, PAGE_SIZE); + cpu_set_ttbr1(__pa(swapper_pg_dir)); + flush_tlb_all(); + + /* At this point kasan is fully initialized. Enable error messages */ + init_task.kasan_depth = 0; + pr_info("KernelAddressSanitizer initialized\n"); +} diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index cb34eb8bbb9d..1d99f950d8ea 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -21,6 +21,7 @@ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/init.h> +#include <linux/libfdt.h> #include <linux/mman.h> #include <linux/nodemask.h> #include <linux/memblock.h> @@ -29,8 +30,10 @@ #include <linux/slab.h> #include <linux/stop_machine.h> +#include <asm/barrier.h> #include <asm/cputype.h> #include <asm/fixmap.h> +#include <asm/kernel-pgtable.h> #include <asm/sections.h> #include <asm/setup.h> #include <asm/sizes.h> @@ -46,7 +49,7 @@ u64 idmap_t0sz = TCR_T0SZ(VA_BITS); * Empty_zero_page is a special page that is used for zero-initialized data * and COW. */ -struct page *empty_zero_page; +unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; EXPORT_SYMBOL(empty_zero_page); pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, @@ -60,11 +63,18 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, } EXPORT_SYMBOL(phys_mem_access_prot); -static void __init *early_alloc(unsigned long sz) +static void __init *early_pgtable_alloc(void) { - void *ptr = __va(memblock_alloc(sz, sz)); - BUG_ON(!ptr); - memset(ptr, 0, sz); + phys_addr_t phys; + void *ptr; + + phys = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + BUG_ON(!phys); + ptr = __va(phys); + memset(ptr, 0, PAGE_SIZE); + + /* Ensure the zeroed page is visible to the page table walker */ + dsb(ishst); return ptr; } @@ -89,12 +99,12 @@ static void split_pmd(pmd_t *pmd, pte_t *pte) static void alloc_init_pte(pmd_t *pmd, unsigned long addr, unsigned long end, unsigned long pfn, pgprot_t prot, - void *(*alloc)(unsigned long size)) + void *(*pgtable_alloc)(void)) { pte_t *pte; if (pmd_none(*pmd) || pmd_sect(*pmd)) { - pte = alloc(PTRS_PER_PTE * sizeof(pte_t)); + pte = pgtable_alloc(); if (pmd_sect(*pmd)) split_pmd(pmd, pte); __pmd_populate(pmd, __pa(pte), PMD_TYPE_TABLE); @@ -116,7 +126,7 @@ void split_pud(pud_t *old_pud, pmd_t *pmd) int i = 0; do { - set_pmd(pmd, __pmd(addr | prot)); + set_pmd(pmd, __pmd(addr | pgprot_val(prot))); addr += PMD_SIZE; } while (pmd++, i++, i < PTRS_PER_PMD); } @@ -124,7 +134,7 @@ void split_pud(pud_t *old_pud, pmd_t *pmd) static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, - void *(*alloc)(unsigned long size)) + void *(*pgtable_alloc)(void)) { pmd_t *pmd; unsigned long next; @@ -133,7 +143,7 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, * Check for initial section mappings in the pgd/pud and remove them. */ if (pud_none(*pud) || pud_sect(*pud)) { - pmd = alloc(PTRS_PER_PMD * sizeof(pmd_t)); + pmd = pgtable_alloc(); if (pud_sect(*pud)) { /* * need to have the 1G of mappings continue to be @@ -168,7 +178,7 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, } } else { alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys), - prot, alloc); + prot, pgtable_alloc); } phys += next - addr; } while (pmd++, addr = next, addr != end); @@ -189,13 +199,13 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next, static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, - void *(*alloc)(unsigned long size)) + void *(*pgtable_alloc)(void)) { pud_t *pud; unsigned long next; if (pgd_none(*pgd)) { - pud = alloc(PTRS_PER_PUD * sizeof(pud_t)); + pud = pgtable_alloc(); pgd_populate(mm, pgd, pud); } BUG_ON(pgd_bad(*pgd)); @@ -228,7 +238,8 @@ static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, } } } else { - alloc_init_pmd(mm, pud, addr, next, phys, prot, alloc); + alloc_init_pmd(mm, pud, addr, next, phys, prot, + pgtable_alloc); } phys += next - addr; } while (pud++, addr = next, addr != end); @@ -241,7 +252,7 @@ static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, static void __create_mapping(struct mm_struct *mm, pgd_t *pgd, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot, - void *(*alloc)(unsigned long size)) + void *(*pgtable_alloc)(void)) { unsigned long addr, length, end, next; @@ -251,22 +262,22 @@ static void __create_mapping(struct mm_struct *mm, pgd_t *pgd, end = addr + length; do { next = pgd_addr_end(addr, end); - alloc_init_pud(mm, pgd, addr, next, phys, prot, alloc); + alloc_init_pud(mm, pgd, addr, next, phys, prot, pgtable_alloc); phys += next - addr; } while (pgd++, addr = next, addr != end); } -static void *late_alloc(unsigned long size) +static void *late_pgtable_alloc(void) { - void *ptr; - - BUG_ON(size > PAGE_SIZE); - ptr = (void *)__get_free_page(PGALLOC_GFP); + void *ptr = (void *)__get_free_page(PGALLOC_GFP); BUG_ON(!ptr); + + /* Ensure the zeroed page is visible to the page table walker */ + dsb(ishst); return ptr; } -static void __ref create_mapping(phys_addr_t phys, unsigned long virt, +static void __init create_mapping(phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot) { if (virt < VMALLOC_START) { @@ -274,8 +285,8 @@ static void __ref create_mapping(phys_addr_t phys, unsigned long virt, &phys, virt); return; } - __create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK), phys, virt, - size, prot, early_alloc); + __create_mapping(&init_mm, pgd_offset_k(virt), phys, virt, + size, prot, early_pgtable_alloc); } void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, @@ -283,7 +294,7 @@ void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, pgprot_t prot) { __create_mapping(mm, pgd_offset(mm, virt), phys, virt, size, prot, - late_alloc); + late_pgtable_alloc); } static void create_mapping_late(phys_addr_t phys, unsigned long virt, @@ -295,8 +306,8 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt, return; } - return __create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK), - phys, virt, size, prot, late_alloc); + return __create_mapping(&init_mm, pgd_offset_k(virt), + phys, virt, size, prot, late_pgtable_alloc); } #ifdef CONFIG_DEBUG_RODATA @@ -352,14 +363,11 @@ static void __init map_mem(void) * memory addressable from the initial direct kernel mapping. * * The initial direct kernel mapping, located at swapper_pg_dir, gives - * us PUD_SIZE (4K pages) or PMD_SIZE (64K pages) memory starting from - * PHYS_OFFSET (which must be aligned to 2MB as per - * Documentation/arm64/booting.txt). + * us PUD_SIZE (with SECTION maps) or PMD_SIZE (without SECTION maps, + * memory starting from PHYS_OFFSET (which must be aligned to 2MB as + * per Documentation/arm64/booting.txt). */ - if (IS_ENABLED(CONFIG_ARM64_64K_PAGES)) - limit = PHYS_OFFSET + PMD_SIZE; - else - limit = PHYS_OFFSET + PUD_SIZE; + limit = PHYS_OFFSET + SWAPPER_INIT_MAP_SIZE; memblock_set_current_limit(limit); /* map all the memory banks */ @@ -370,21 +378,24 @@ static void __init map_mem(void) if (start >= end) break; -#ifndef CONFIG_ARM64_64K_PAGES - /* - * For the first memory bank align the start address and - * current memblock limit to prevent create_mapping() from - * allocating pte page tables from unmapped memory. - * When 64K pages are enabled, the pte page table for the - * first PGDIR_SIZE is already present in swapper_pg_dir. - */ - if (start < limit) - start = ALIGN(start, PMD_SIZE); - if (end < limit) { - limit = end & PMD_MASK; - memblock_set_current_limit(limit); + if (ARM64_SWAPPER_USES_SECTION_MAPS) { + /* + * For the first memory bank align the start address and + * current memblock limit to prevent create_mapping() from + * allocating pte page tables from unmapped memory. With + * the section maps, if the first block doesn't end on section + * size boundary, create_mapping() will try to allocate a pte + * page, which may be returned from an unmapped area. + * When section maps are not used, the pte page table for the + * current limit is already present in swapper_pg_dir. + */ + if (start < limit) + start = ALIGN(start, SECTION_SIZE); + if (end < limit) { + limit = end & SECTION_MASK; + memblock_set_current_limit(limit); + } } -#endif __map_memblock(start, end); } @@ -438,39 +449,16 @@ void fixup_init(void) */ void __init paging_init(void) { - void *zero_page; - map_mem(); fixup_executable(); - /* allocate the zero page. */ - zero_page = early_alloc(PAGE_SIZE); - bootmem_init(); - empty_zero_page = virt_to_page(zero_page); - - /* Ensure the zero page is visible to the page table walker */ - dsb(ishst); - /* * TTBR0 is only used for the identity mapping at this stage. Make it * point to zero page to avoid speculatively fetching new entries. */ - cpu_set_reserved_ttbr0(); - flush_tlb_all(); - cpu_set_default_tcr_t0sz(); -} - -/* - * Enable the identity mapping to allow the MMU disabling. - */ -void setup_mm_for_reboot(void) -{ - cpu_set_reserved_ttbr0(); - flush_tlb_all(); - cpu_set_idmap_tcr_t0sz(); - cpu_switch_mm(idmap_pg_dir, &init_mm); + cpu_uninstall_idmap(); } /* @@ -511,12 +499,12 @@ int kern_addr_valid(unsigned long addr) return pfn_valid(pte_pfn(*pte)); } #ifdef CONFIG_SPARSEMEM_VMEMMAP -#ifdef CONFIG_ARM64_64K_PAGES +#if !ARM64_SWAPPER_USES_SECTION_MAPS int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) { return vmemmap_populate_basepages(start, end, node); } -#else /* !CONFIG_ARM64_64K_PAGES */ +#else /* !ARM64_SWAPPER_USES_SECTION_MAPS */ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) { unsigned long addr = start; @@ -646,3 +634,59 @@ void __set_fixmap(enum fixed_addresses idx, flush_tlb_kernel_range(addr, addr+PAGE_SIZE); } } + +void *__init fixmap_remap_fdt(phys_addr_t dt_phys) +{ + const u64 dt_virt_base = __fix_to_virt(FIX_FDT); + pgprot_t prot = PAGE_KERNEL | PTE_RDONLY; + int size, offset; + void *dt_virt; + + /* + * Check whether the physical FDT address is set and meets the minimum + * alignment requirement. Since we are relying on MIN_FDT_ALIGN to be + * at least 8 bytes so that we can always access the size field of the + * FDT header after mapping the first chunk, double check here if that + * is indeed the case. + */ + BUILD_BUG_ON(MIN_FDT_ALIGN < 8); + if (!dt_phys || dt_phys % MIN_FDT_ALIGN) + return NULL; + + /* + * Make sure that the FDT region can be mapped without the need to + * allocate additional translation table pages, so that it is safe + * to call create_mapping() this early. + * + * On 64k pages, the FDT will be mapped using PTEs, so we need to + * be in the same PMD as the rest of the fixmap. + * On 4k pages, we'll use section mappings for the FDT so we only + * have to be in the same PUD. + */ + BUILD_BUG_ON(dt_virt_base % SZ_2M); + + BUILD_BUG_ON(__fix_to_virt(FIX_FDT_END) >> SWAPPER_TABLE_SHIFT != + __fix_to_virt(FIX_BTMAP_BEGIN) >> SWAPPER_TABLE_SHIFT); + + offset = dt_phys % SWAPPER_BLOCK_SIZE; + dt_virt = (void *)dt_virt_base + offset; + + /* map the first chunk so we can read the size from the header */ + create_mapping(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base, + SWAPPER_BLOCK_SIZE, prot); + + if (fdt_check_header(dt_virt) != 0) + return NULL; + + size = fdt_totalsize(dt_virt); + if (size > MAX_FDT_SIZE) + return NULL; + + if (offset + size > SWAPPER_BLOCK_SIZE) + create_mapping(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base, + round_up(offset + size, SWAPPER_BLOCK_SIZE), prot); + + memblock_reserve(dt_phys, size); + + return dt_virt; +} diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c index 71ca104f97bd..cb3ba1b812e7 100644 --- a/arch/arm64/mm/pgd.c +++ b/arch/arm64/mm/pgd.c @@ -28,8 +28,6 @@ #include "mm.h" -#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t)) - static struct kmem_cache *pgd_cache; pgd_t *pgd_alloc(struct mm_struct *mm) diff --git a/arch/arm64/mm/proc-macros.S b/arch/arm64/mm/proc-macros.S deleted file mode 100644 index d69dffffaa89..000000000000 --- a/arch/arm64/mm/proc-macros.S +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Based on arch/arm/mm/proc-macros.S - * - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <asm/asm-offsets.h> -#include <asm/thread_info.h> - -/* - * vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm) - */ - .macro vma_vm_mm, rd, rn - ldr \rd, [\rn, #VMA_VM_MM] - .endm - -/* - * mmid - get context id from mm pointer (mm->context.id) - */ - .macro mmid, rd, rn - ldr \rd, [\rn, #MM_CONTEXT_ID] - .endm - -/* - * dcache_line_size - get the minimum D-cache line size from the CTR register. - */ - .macro dcache_line_size, reg, tmp - mrs \tmp, ctr_el0 // read CTR - ubfm \tmp, \tmp, #16, #19 // cache line size encoding - mov \reg, #4 // bytes per word - lsl \reg, \reg, \tmp // actual cache line size - .endm - -/* - * icache_line_size - get the minimum I-cache line size from the CTR register. - */ - .macro icache_line_size, reg, tmp - mrs \tmp, ctr_el0 // read CTR - and \tmp, \tmp, #0xf // cache line size encoding - mov \reg, #4 // bytes per word - lsl \reg, \reg, \tmp // actual cache line size - .endm - -/* - * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map - */ - .macro tcr_set_idmap_t0sz, valreg, tmpreg -#ifndef CONFIG_ARM64_VA_BITS_48 - ldr_l \tmpreg, idmap_t0sz - bfi \valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH -#endif - .endm - -/* - * reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present - */ - .macro reset_pmuserenr_el0, tmpreg - mrs \tmpreg, id_aa64dfr0_el1 // Check ID_AA64DFR0_EL1 PMUVer - sbfx \tmpreg, \tmpreg, #8, #4 - cmp \tmpreg, #1 // Skip if no PMU present - b.lt 9000f - msr pmuserenr_el0, xzr // Disable PMU access from EL0 -9000: - .endm diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 262c8ec55790..4c58844afedf 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -23,10 +23,10 @@ #include <asm/assembler.h> #include <asm/asm-offsets.h> #include <asm/hwcap.h> -#include <asm/pgtable-hwdef.h> #include <asm/pgtable.h> - -#include "proc-macros.S" +#include <asm/pgtable-hwdef.h> +#include <asm/cpufeature.h> +#include <asm/alternative.h> #ifdef CONFIG_ARM64_64K_PAGES #define TCR_TG_FLAGS TCR_TG0_64K | TCR_TG1_64K @@ -34,11 +34,7 @@ #define TCR_TG_FLAGS TCR_TG0_4K | TCR_TG1_4K #endif -#ifdef CONFIG_SMP #define TCR_SMP_FLAGS TCR_SHARED -#else -#define TCR_SMP_FLAGS 0 -#endif /* PTWs cacheable, inner/outer WBWA */ #define TCR_CACHE_FLAGS TCR_IRGN_WBWA | TCR_ORGN_WBWA @@ -46,52 +42,6 @@ #define MAIR(attr, mt) ((attr) << ((mt) * 8)) /* - * cpu_cache_off() - * - * Turn the CPU D-cache off. - */ -ENTRY(cpu_cache_off) - mrs x0, sctlr_el1 - bic x0, x0, #1 << 2 // clear SCTLR.C - msr sctlr_el1, x0 - isb - ret -ENDPROC(cpu_cache_off) - -/* - * cpu_reset(loc) - * - * Perform a soft reset of the system. Put the CPU into the same state - * as it would be if it had been reset, and branch to what would be the - * reset vector. It must be executed with the flat identity mapping. - * - * - loc - location to jump to for soft reset - */ - .align 5 -ENTRY(cpu_reset) - mrs x1, sctlr_el1 - bic x1, x1, #1 - msr sctlr_el1, x1 // disable the MMU - isb - ret x0 -ENDPROC(cpu_reset) - -ENTRY(cpu_soft_restart) - /* Save address of cpu_reset() and reset address */ - mov x19, x0 - mov x20, x1 - - /* Turn D-cache off */ - bl cpu_cache_off - - /* Push out all dirty data, and ensure cache is empty */ - bl flush_cache_all - - mov x0, x20 - ret x19 -ENDPROC(cpu_soft_restart) - -/* * cpu_do_idle() * * Idle the processor (wait for interrupt). @@ -112,62 +62,50 @@ ENTRY(cpu_do_suspend) mrs x2, tpidr_el0 mrs x3, tpidrro_el0 mrs x4, contextidr_el1 - mrs x5, mair_el1 - mrs x6, cpacr_el1 - mrs x7, ttbr1_el1 - mrs x8, tcr_el1 - mrs x9, vbar_el1 - mrs x10, mdscr_el1 - mrs x11, oslsr_el1 - mrs x12, sctlr_el1 + mrs x5, cpacr_el1 + mrs x6, tcr_el1 + mrs x7, vbar_el1 + mrs x8, mdscr_el1 + mrs x9, oslsr_el1 + mrs x10, sctlr_el1 stp x2, x3, [x0] - stp x4, x5, [x0, #16] - stp x6, x7, [x0, #32] - stp x8, x9, [x0, #48] - stp x10, x11, [x0, #64] - str x12, [x0, #80] + stp x4, xzr, [x0, #16] + stp x5, x6, [x0, #32] + stp x7, x8, [x0, #48] + stp x9, x10, [x0, #64] ret ENDPROC(cpu_do_suspend) /** * cpu_do_resume - restore CPU register context * - * x0: Physical address of context pointer - * x1: ttbr0_el1 to be restored - * - * Returns: - * sctlr_el1 value in x0 + * x0: Address of context pointer */ ENTRY(cpu_do_resume) - /* - * Invalidate local tlb entries before turning on MMU - */ - tlbi vmalle1 ldp x2, x3, [x0] ldp x4, x5, [x0, #16] - ldp x6, x7, [x0, #32] - ldp x8, x9, [x0, #48] - ldp x10, x11, [x0, #64] - ldr x12, [x0, #80] + ldp x6, x8, [x0, #32] + ldp x9, x10, [x0, #48] + ldp x11, x12, [x0, #64] msr tpidr_el0, x2 msr tpidrro_el0, x3 msr contextidr_el1, x4 - msr mair_el1, x5 msr cpacr_el1, x6 - msr ttbr0_el1, x1 - msr ttbr1_el1, x7 - tcr_set_idmap_t0sz x8, x7 + + /* Don't change t0sz here, mask those bits when restoring */ + mrs x5, tcr_el1 + bfi x8, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH + msr tcr_el1, x8 msr vbar_el1, x9 msr mdscr_el1, x10 + msr sctlr_el1, x12 /* * Restore oslsr_el1 by writing oslar_el1 */ ubfx x11, x11, #1, #1 msr oslar_el1, x11 reset_pmuserenr_el0 x0 // Disable PMU access from EL0 - mov x0, x12 - dsb nsh // Make sure local tlb invalidation completed isb ret ENDPROC(cpu_do_resume) @@ -181,14 +119,40 @@ ENDPROC(cpu_do_resume) * - pgd_phys - physical address of new TTB */ ENTRY(cpu_do_switch_mm) - mmid w1, x1 // get mm->context.id + mmid x1, x1 // get mm->context.id bfi x0, x1, #48, #16 // set the ASID msr ttbr0_el1, x0 // set TTBR0 isb ret ENDPROC(cpu_do_switch_mm) - .section ".text.init", #alloc, #execinstr + .pushsection ".idmap.text", "ax" +/* + * void idmap_cpu_replace_ttbr1(phys_addr_t new_pgd) + * + * This is the low-level counterpart to cpu_replace_ttbr1, and should not be + * called by anything else. It can only be executed from a TTBR0 mapping. + */ +ENTRY(idmap_cpu_replace_ttbr1) + mrs x2, daif + msr daifset, #0xf + + adrp x1, empty_zero_page + msr ttbr1_el1, x1 + isb + + tlbi vmalle1 + dsb nsh + isb + + msr ttbr1_el1, x0 + isb + + msr daif, x2 + + ret +ENDPROC(idmap_cpu_replace_ttbr1) + .popsection /* * __cpu_setup @@ -197,7 +161,6 @@ ENDPROC(cpu_do_switch_mm) * value of the SCTLR_EL1 register. */ ENTRY(__cpu_setup) - ic iallu // I+BTB cache invalidate tlbi vmalle1is // invalidate I + D TLBs dsb ish diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h index 608aa57799c8..e77672539e8e 100644 --- a/arch/mips/include/asm/jump_label.h +++ b/arch/mips/include/asm/jump_label.h @@ -26,14 +26,29 @@ #define NOP_INSN "nop" #endif -static __always_inline bool arch_static_branch(struct static_key *key) +static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { asm_volatile_goto("1:\t" NOP_INSN "\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" WORD_INSN " 1b, %l[l_yes], %0\n\t" ".popsection\n\t" - : : "i" (key) : : l_yes); + : : "i" (&((char *)key)[branch]) : : l_yes); + + return false; +l_yes: + return true; +} + +static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) +{ + asm_volatile_goto("1:\tj %l[l_yes]\n\t" + "nop\n\t" + ".pushsection __jump_table, \"aw\"\n\t" + WORD_INSN " 1b, %l[l_yes], %0\n\t" + ".popsection\n\t" + : : "i" (&((char *)key)[branch]) : : l_yes); + return false; l_yes: return true; diff --git a/arch/mips/kernel/jump_label.c b/arch/mips/kernel/jump_label.c index dda800e9e731..3e586daa3a32 100644 --- a/arch/mips/kernel/jump_label.c +++ b/arch/mips/kernel/jump_label.c @@ -51,7 +51,7 @@ void arch_jump_label_transform(struct jump_entry *e, /* Target must have the right alignment and ISA must be preserved. */ BUG_ON((e->target & J_ALIGN_MASK) != J_ISA_BIT); - if (type == JUMP_LABEL_ENABLE) { + if (type == JUMP_LABEL_JMP) { insn.j_format.opcode = J_ISA_BIT ? mm_j32_op : j_op; insn.j_format.target = e->target >> J_RANGE_SHIFT; } else { diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 485fdc462243..69311b9c551e 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -405,7 +405,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) kvm_mips_deliver_interrupts(vcpu, kvm_read_c0_guest_cause(vcpu->arch.cop0)); - kvm_guest_enter(); + __kvm_guest_enter(); /* Disable hardware page table walking while in guest */ htw_stop(); @@ -415,7 +415,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) /* Re-enable HTW before enabling interrupts */ htw_start(); - kvm_guest_exit(); + __kvm_guest_exit(); local_irq_enable(); if (vcpu->sigset_active) diff --git a/arch/parisc/include/asm/cache.h b/arch/parisc/include/asm/cache.h index 47f11c707b65..03d4d5afb120 100644 --- a/arch/parisc/include/asm/cache.h +++ b/arch/parisc/include/asm/cache.h @@ -30,6 +30,9 @@ #define __read_mostly __attribute__((__section__(".data..read_mostly"))) +/* Read-only memory is marked before mark_rodata_ro() is called. */ +#define __ro_after_init __read_mostly + void parisc_cache_init(void); /* initializes cache-flushing */ void disable_sr_hashing_asm(int); /* low level support for above */ void disable_sr_hashing(void); /* turns off space register hashing */ diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h index de65f66ea64e..b188d1f26027 100644 --- a/arch/parisc/include/asm/cacheflush.h +++ b/arch/parisc/include/asm/cacheflush.h @@ -121,10 +121,6 @@ flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vma } } -#ifdef CONFIG_DEBUG_RODATA -void mark_rodata_ro(void); -#endif - #include <asm/kmap_types.h> #define ARCH_HAS_KMAP diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h index efbf9a322a23..47e155f15433 100644 --- a/arch/powerpc/include/asm/jump_label.h +++ b/arch/powerpc/include/asm/jump_label.h @@ -18,14 +18,29 @@ #define JUMP_ENTRY_TYPE stringify_in_c(FTR_ENTRY_LONG) #define JUMP_LABEL_NOP_SIZE 4 -static __always_inline bool arch_static_branch(struct static_key *key) +static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { asm_volatile_goto("1:\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t" ".popsection \n\t" - : : "i" (key) : : l_yes); + : : "i" (&((char *)key)[branch]) : : l_yes); + + return false; +l_yes: + return true; +} + +static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) +{ + asm_volatile_goto("1:\n\t" + "b %l[l_yes]\n\t" + ".pushsection __jump_table, \"aw\"\n\t" + JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t" + ".popsection \n\t" + : : "i" (&((char *)key)[branch]) : : l_yes); + return false; l_yes: return true; diff --git a/arch/powerpc/kernel/jump_label.c b/arch/powerpc/kernel/jump_label.c index a1ed8a8c7cb4..6472472093d0 100644 --- a/arch/powerpc/kernel/jump_label.c +++ b/arch/powerpc/kernel/jump_label.c @@ -17,7 +17,7 @@ void arch_jump_label_transform(struct jump_entry *entry, { u32 *addr = (u32 *)(unsigned long)entry->code; - if (type == JUMP_LABEL_ENABLE) + if (type == JUMP_LABEL_JMP) patch_branch(addr, entry->target, 0); else patch_instruction(addr, PPC_INST_NOP); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index c8fe9ab10792..3220bace1480 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -115,7 +115,7 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) continue; } - kvm_guest_enter(); + __kvm_guest_enter(); return 1; } diff --git a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c index ca3a062ed1b9..11090ab4bf59 100644 --- a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c +++ b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c @@ -123,7 +123,8 @@ cpld_pic_cascade(unsigned int irq, struct irq_desc *desc) } static int -cpld_pic_host_match(struct irq_domain *h, struct device_node *node) +cpld_pic_host_match(struct irq_domain *h, struct device_node *node, + enum irq_domain_bus_token bus_token) { return cpld_pic_node == node; } diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c index 3af8324c122e..a15f1efc295f 100644 --- a/arch/powerpc/platforms/cell/interrupt.c +++ b/arch/powerpc/platforms/cell/interrupt.c @@ -222,7 +222,8 @@ void iic_request_IPIs(void) #endif /* CONFIG_SMP */ -static int iic_host_match(struct irq_domain *h, struct device_node *node) +static int iic_host_match(struct irq_domain *h, struct device_node *node, + enum irq_domain_bus_token bus_token) { return of_device_is_compatible(node, "IBM,CBEA-Internal-Interrupt-Controller"); diff --git a/arch/powerpc/platforms/embedded6xx/flipper-pic.c b/arch/powerpc/platforms/embedded6xx/flipper-pic.c index 4cde8e7da4b8..b7866e01483d 100644 --- a/arch/powerpc/platforms/embedded6xx/flipper-pic.c +++ b/arch/powerpc/platforms/embedded6xx/flipper-pic.c @@ -108,7 +108,8 @@ static int flipper_pic_map(struct irq_domain *h, unsigned int virq, return 0; } -static int flipper_pic_match(struct irq_domain *h, struct device_node *np) +static int flipper_pic_match(struct irq_domain *h, struct device_node *np, + enum irq_domain_bus_token bus_token) { return 1; } diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c index 59cfc9d63c2d..6f4f8b060def 100644 --- a/arch/powerpc/platforms/powermac/pic.c +++ b/arch/powerpc/platforms/powermac/pic.c @@ -268,7 +268,8 @@ static struct irqaction gatwick_cascade_action = { .name = "cascade", }; -static int pmac_pic_host_match(struct irq_domain *h, struct device_node *node) +static int pmac_pic_host_match(struct irq_domain *h, struct device_node *node, + enum irq_domain_bus_token bus_token) { /* We match all, we don't always have a node anyway */ return 1; diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c new file mode 100644 index 000000000000..2c91ee7800b9 --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-irqchip.c @@ -0,0 +1,254 @@ +/* + * This file implements an irqchip for OPAL events. Whenever there is + * an interrupt that is handled by OPAL we get passed a list of events + * that Linux needs to do something about. These basically look like + * interrupts to Linux so we implement an irqchip to handle them. + * + * Copyright Alistair Popple, IBM Corporation 2014. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ +#include <linux/bitops.h> +#include <linux/irq.h> +#include <linux/irqchip.h> +#include <linux/irqdomain.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/kthread.h> +#include <linux/delay.h> +#include <linux/slab.h> +#include <linux/irq_work.h> + +#include <asm/machdep.h> +#include <asm/opal.h> + +#include "powernv.h" + +/* Maximum number of events supported by OPAL firmware */ +#define MAX_NUM_EVENTS 64 + +struct opal_event_irqchip { + struct irq_chip irqchip; + struct irq_domain *domain; + unsigned long mask; +}; +static struct opal_event_irqchip opal_event_irqchip; + +static unsigned int opal_irq_count; +static unsigned int *opal_irqs; + +static void opal_handle_irq_work(struct irq_work *work); +static __be64 last_outstanding_events; +static struct irq_work opal_event_irq_work = { + .func = opal_handle_irq_work, +}; + +static void opal_event_mask(struct irq_data *d) +{ + clear_bit(d->hwirq, &opal_event_irqchip.mask); +} + +static void opal_event_unmask(struct irq_data *d) +{ + set_bit(d->hwirq, &opal_event_irqchip.mask); + + opal_poll_events(&last_outstanding_events); + if (last_outstanding_events & opal_event_irqchip.mask) + /* Need to retrigger the interrupt */ + irq_work_queue(&opal_event_irq_work); +} + +static int opal_event_set_type(struct irq_data *d, unsigned int flow_type) +{ + /* + * For now we only support level triggered events. The irq + * handler will be called continuously until the event has + * been cleared in OPAL. + */ + if (flow_type != IRQ_TYPE_LEVEL_HIGH) + return -EINVAL; + + return 0; +} + +static struct opal_event_irqchip opal_event_irqchip = { + .irqchip = { + .name = "OPAL EVT", + .irq_mask = opal_event_mask, + .irq_unmask = opal_event_unmask, + .irq_set_type = opal_event_set_type, + }, + .mask = 0, +}; + +static int opal_event_map(struct irq_domain *d, unsigned int irq, + irq_hw_number_t hwirq) +{ + irq_set_chip_data(irq, &opal_event_irqchip); + irq_set_chip_and_handler(irq, &opal_event_irqchip.irqchip, + handle_level_irq); + + return 0; +} + +void opal_handle_events(uint64_t events) +{ + int virq, hwirq = 0; + u64 mask = opal_event_irqchip.mask; + + if (!in_irq() && (events & mask)) { + last_outstanding_events = events; + irq_work_queue(&opal_event_irq_work); + return; + } + + while (events & mask) { + hwirq = fls64(events) - 1; + if (BIT_ULL(hwirq) & mask) { + virq = irq_find_mapping(opal_event_irqchip.domain, + hwirq); + if (virq) + generic_handle_irq(virq); + } + events &= ~BIT_ULL(hwirq); + } +} + +static irqreturn_t opal_interrupt(int irq, void *data) +{ + __be64 events; + + opal_handle_interrupt(virq_to_hw(irq), &events); + opal_handle_events(be64_to_cpu(events)); + + return IRQ_HANDLED; +} + +static void opal_handle_irq_work(struct irq_work *work) +{ + opal_handle_events(be64_to_cpu(last_outstanding_events)); +} + +static int opal_event_match(struct irq_domain *h, struct device_node *node, + enum irq_domain_bus_token bus_token) +{ + return h->of_node == node; +} + +static int opal_event_xlate(struct irq_domain *h, struct device_node *np, + const u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, unsigned int *out_flags) +{ + *out_hwirq = intspec[0]; + *out_flags = IRQ_TYPE_LEVEL_HIGH; + + return 0; +} + +static const struct irq_domain_ops opal_event_domain_ops = { + .match = opal_event_match, + .map = opal_event_map, + .xlate = opal_event_xlate, +}; + +void opal_event_shutdown(void) +{ + unsigned int i; + + /* First free interrupts, which will also mask them */ + for (i = 0; i < opal_irq_count; i++) { + if (opal_irqs[i]) + free_irq(opal_irqs[i], NULL); + opal_irqs[i] = 0; + } +} + +int __init opal_event_init(void) +{ + struct device_node *dn, *opal_node; + const __be32 *irqs; + int i, irqlen, rc = 0; + + opal_node = of_find_node_by_path("/ibm,opal"); + if (!opal_node) { + pr_warn("opal: Node not found\n"); + return -ENODEV; + } + + /* If dn is NULL it means the domain won't be linked to a DT + * node so therefore irq_of_parse_and_map(...) wont work. But + * that shouldn't be problem because if we're running a + * version of skiboot that doesn't have the dn then the + * devices won't have the correct properties and will have to + * fall back to the legacy method (opal_event_request(...)) + * anyway. */ + dn = of_find_compatible_node(NULL, NULL, "ibm,opal-event"); + opal_event_irqchip.domain = irq_domain_add_linear(dn, MAX_NUM_EVENTS, + &opal_event_domain_ops, &opal_event_irqchip); + of_node_put(dn); + if (!opal_event_irqchip.domain) { + pr_warn("opal: Unable to create irq domain\n"); + rc = -ENOMEM; + goto out; + } + + /* Get interrupt property */ + irqs = of_get_property(opal_node, "opal-interrupts", &irqlen); + opal_irq_count = irqs ? (irqlen / 4) : 0; + pr_debug("Found %d interrupts reserved for OPAL\n", opal_irq_count); + + /* Install interrupt handlers */ + opal_irqs = kcalloc(opal_irq_count, sizeof(*opal_irqs), GFP_KERNEL); + for (i = 0; irqs && i < opal_irq_count; i++, irqs++) { + unsigned int irq, virq; + + /* Get hardware and virtual IRQ */ + irq = be32_to_cpup(irqs); + virq = irq_create_mapping(NULL, irq); + if (virq == NO_IRQ) { + pr_warn("Failed to map irq 0x%x\n", irq); + continue; + } + + /* Install interrupt handler */ + rc = request_irq(virq, opal_interrupt, 0, "opal", NULL); + if (rc) { + irq_dispose_mapping(virq); + pr_warn("Error %d requesting irq %d (0x%x)\n", + rc, virq, irq); + continue; + } + + /* Cache IRQ */ + opal_irqs[i] = virq; + } + +out: + of_node_put(opal_node); + return rc; +} +machine_arch_initcall(powernv, opal_event_init); + +/** + * opal_event_request(unsigned int opal_event_nr) - Request an event + * @opal_event_nr: the opal event number to request + * + * This routine can be used to find the linux virq number which can + * then be passed to request_irq to assign a handler for a particular + * opal event. This should only be used by legacy devices which don't + * have proper device tree bindings. Most devices should use + * irq_of_parse_and_map() instead. + */ +int opal_event_request(unsigned int opal_event_nr) +{ + if (WARN_ON_ONCE(!opal_event_irqchip.domain)) + return NO_IRQ; + + return irq_create_mapping(opal_event_irqchip.domain, opal_event_nr); +} +EXPORT_SYMBOL(opal_event_request); diff --git a/arch/powerpc/platforms/ps3/interrupt.c b/arch/powerpc/platforms/ps3/interrupt.c index a6c42f34303a..638c4060938e 100644 --- a/arch/powerpc/platforms/ps3/interrupt.c +++ b/arch/powerpc/platforms/ps3/interrupt.c @@ -678,7 +678,8 @@ static int ps3_host_map(struct irq_domain *h, unsigned int virq, return 0; } -static int ps3_host_match(struct irq_domain *h, struct device_node *np) +static int ps3_host_match(struct irq_domain *h, struct device_node *np, + enum irq_domain_bus_token bus_token) { /* Match all */ return 1; diff --git a/arch/powerpc/sysdev/ehv_pic.c b/arch/powerpc/sysdev/ehv_pic.c index 2d20f10a4203..eca0b00794fa 100644 --- a/arch/powerpc/sysdev/ehv_pic.c +++ b/arch/powerpc/sysdev/ehv_pic.c @@ -177,7 +177,8 @@ unsigned int ehv_pic_get_irq(void) return irq_linear_revmap(global_ehv_pic->irqhost, irq); } -static int ehv_pic_host_match(struct irq_domain *h, struct device_node *node) +static int ehv_pic_host_match(struct irq_domain *h, struct device_node *node, + enum irq_domain_bus_token bus_token) { /* Exact match, unless ehv_pic node is NULL */ return h->of_node == NULL || h->of_node == node; diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c index 45598da0b321..8c3756cbc4f9 100644 --- a/arch/powerpc/sysdev/i8259.c +++ b/arch/powerpc/sysdev/i8259.c @@ -162,7 +162,8 @@ static struct resource pic_edgectrl_iores = { .flags = IORESOURCE_BUSY, }; -static int i8259_host_match(struct irq_domain *h, struct device_node *node) +static int i8259_host_match(struct irq_domain *h, struct device_node *node, + enum irq_domain_bus_token bus_token) { return h->of_node == NULL || h->of_node == node; } diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c index b28733727ed3..27d317a4856d 100644 --- a/arch/powerpc/sysdev/ipic.c +++ b/arch/powerpc/sysdev/ipic.c @@ -671,7 +671,8 @@ static struct irq_chip ipic_edge_irq_chip = { .irq_set_type = ipic_set_irq_type, }; -static int ipic_host_match(struct irq_domain *h, struct device_node *node) +static int ipic_host_match(struct irq_domain *h, struct device_node *node, + enum irq_domain_bus_token bus_token) { /* Exact match, unless ipic node is NULL */ return h->of_node == NULL || h->of_node == node; diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index b2b8447a227a..8b6e282479b8 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -1007,7 +1007,8 @@ static struct irq_chip mpic_irq_ht_chip = { #endif /* CONFIG_MPIC_U3_HT_IRQS */ -static int mpic_host_match(struct irq_domain *h, struct device_node *node) +static int mpic_host_match(struct irq_domain *h, struct device_node *node, + enum irq_domain_bus_token bus_token) { /* Exact match, unless mpic node is NULL */ return h->of_node == NULL || h->of_node == node; diff --git a/arch/powerpc/sysdev/qe_lib/qe_ic.c b/arch/powerpc/sysdev/qe_lib/qe_ic.c index 543765e1ef14..da00c9e761e1 100644 --- a/arch/powerpc/sysdev/qe_lib/qe_ic.c +++ b/arch/powerpc/sysdev/qe_lib/qe_ic.c @@ -244,7 +244,8 @@ static struct irq_chip qe_ic_irq_chip = { .irq_mask_ack = qe_ic_mask_irq, }; -static int qe_ic_host_match(struct irq_domain *h, struct device_node *node) +static int qe_ic_host_match(struct irq_domain *h, struct device_node *node, + enum irq_domain_bus_token bus_token) { /* Exact match, unless qe_ic node is NULL */ return h->of_node == NULL || h->of_node == node; diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c index 878a54036a25..41e334db8847 100644 --- a/arch/powerpc/sysdev/xics/xics-common.c +++ b/arch/powerpc/sysdev/xics/xics-common.c @@ -298,7 +298,8 @@ int xics_get_irq_server(unsigned int virq, const struct cpumask *cpumask, } #endif /* CONFIG_SMP */ -static int xics_host_match(struct irq_domain *h, struct device_node *node) +static int xics_host_match(struct irq_domain *h, struct device_node *node, + enum irq_domain_bus_token bus_token) { struct ics *ics; diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h index 69972b7957ee..7f9fd5e3f1bf 100644 --- a/arch/s390/include/asm/jump_label.h +++ b/arch/s390/include/asm/jump_label.h @@ -12,14 +12,29 @@ * We use a brcl 0,2 instruction for jump labels at compile time so it * can be easily distinguished from a hotpatch generated instruction. */ -static __always_inline bool arch_static_branch(struct static_key *key) +static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { asm_volatile_goto("0: brcl 0,"__stringify(JUMP_LABEL_NOP_OFFSET)"\n" ".pushsection __jump_table, \"aw\"\n" ".balign 8\n" ".quad 0b, %l[label], %0\n" ".popsection\n" - : : "X" (key) : : label); + : : "X" (&((char *)key)[branch]) : : label); + + return false; +label: + return true; +} + +static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) +{ + asm_volatile_goto("0: brcl 15, %l[label]\n" + ".pushsection __jump_table, \"aw\"\n" + ".balign 8\n" + ".quad 0b, %l[label], %0\n" + ".popsection\n" + : : "X" (&((char *)key)[branch]) : : label); + return false; label: return true; diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c index a90299600483..a83d2248fea9 100644 --- a/arch/s390/kernel/jump_label.c +++ b/arch/s390/kernel/jump_label.c @@ -64,7 +64,7 @@ static void __jump_label_transform(struct jump_entry *entry, { struct insn old, new; - if (type == JUMP_LABEL_ENABLE) { + if (type == JUMP_LABEL_JMP) { jump_label_make_nop(entry, &old); jump_label_make_branch(entry, &new); } else { diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index c3805cf4b982..2e840c64aa5c 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1999,12 +1999,14 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) * As PF_VCPU will be used in fault handler, between * guest_enter and guest_exit should be no uaccess. */ - preempt_disable(); - kvm_guest_enter(); - preempt_enable(); + local_irq_disable(); + __kvm_guest_enter(); + local_irq_enable(); exit_reason = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs); - kvm_guest_exit(); + local_irq_disable(); + __kvm_guest_exit(); + local_irq_enable(); vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); rc = vcpu_post_run(vcpu, exit_reason); diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h index cc9b04a2b11b..62d0354d1727 100644 --- a/arch/sparc/include/asm/jump_label.h +++ b/arch/sparc/include/asm/jump_label.h @@ -7,16 +7,33 @@ #define JUMP_LABEL_NOP_SIZE 4 -static __always_inline bool arch_static_branch(struct static_key *key) +static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" - "nop\n\t" - "nop\n\t" - ".pushsection __jump_table, \"aw\"\n\t" - ".align 4\n\t" - ".word 1b, %l[l_yes], %c0\n\t" - ".popsection \n\t" - : : "i" (key) : : l_yes); + asm_volatile_goto("1:\n\t" + "nop\n\t" + "nop\n\t" + ".pushsection __jump_table, \"aw\"\n\t" + ".align 4\n\t" + ".word 1b, %l[l_yes], %c0\n\t" + ".popsection \n\t" + : : "i" (&((char *)key)[branch]) : : l_yes); + + return false; +l_yes: + return true; +} + +static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) +{ + asm_volatile_goto("1:\n\t" + "b %l[l_yes]\n\t" + "nop\n\t" + ".pushsection __jump_table, \"aw\"\n\t" + ".align 4\n\t" + ".word 1b, %l[l_yes], %c0\n\t" + ".popsection \n\t" + : : "i" (&((char *)key)[branch]) : : l_yes); + return false; l_yes: return true; diff --git a/arch/sparc/kernel/jump_label.c b/arch/sparc/kernel/jump_label.c index 48565c11e82a..59bbeff55024 100644 --- a/arch/sparc/kernel/jump_label.c +++ b/arch/sparc/kernel/jump_label.c @@ -16,7 +16,7 @@ void arch_jump_label_transform(struct jump_entry *entry, u32 val; u32 *insn = (u32 *) (unsigned long) entry->code; - if (type == JUMP_LABEL_ENABLE) { + if (type == JUMP_LABEL_JMP) { s32 off = (s32)entry->target - (s32)entry->code; #ifdef CONFIG_SPARC64 diff --git a/arch/unicore32/include/asm/memory.h b/arch/unicore32/include/asm/memory.h index debafc40200a..3bb0a29fd2d7 100644 --- a/arch/unicore32/include/asm/memory.h +++ b/arch/unicore32/include/asm/memory.h @@ -61,12 +61,6 @@ #endif /* - * Convert a physical address to a Page Frame Number and back - */ -#define __phys_to_pfn(paddr) ((paddr) >> PAGE_SHIFT) -#define __pfn_to_phys(pfn) ((pfn) << PAGE_SHIFT) - -/* * Convert a page to/from a physical address */ #define page_to_phys(page) (__pfn_to_phys(page_to_pfn(page))) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 226d5696e1d1..983983c390ea 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -248,6 +248,11 @@ config ARCH_SUPPORTS_OPTIMIZED_INLINING config ARCH_SUPPORTS_DEBUG_PAGEALLOC def_bool y +config KASAN_SHADOW_OFFSET + hex + depends on KASAN + default 0xdffffc0000000000 + config HAVE_INTEL_TXT def_bool y depends on INTEL_IOMMU && ACPI @@ -279,6 +284,9 @@ config ARCH_SUPPORTS_UPROBES config FIX_EARLYCON_MEM def_bool y +config DEBUG_RODATA + def_bool y + config PGTABLE_LEVELS int default 4 if X86_64 diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 72484a645f05..467ccb4d13ea 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -86,23 +86,12 @@ config EFI_PGT_DUMP issues with the mapping of the EFI runtime regions into that table. -config DEBUG_RODATA - bool "Write protect kernel read-only data structures" - default y - depends on DEBUG_KERNEL - ---help--- - Mark the kernel read-only data as write-protected in the pagetables, - in order to catch accidental (and incorrect) writes to such const - data. This is recommended so that we can catch kernel bugs sooner. - If in doubt, say "Y". - config DEBUG_RODATA_TEST - bool "Testcase for the DEBUG_RODATA feature" - depends on DEBUG_RODATA + bool "Testcase for the marking rodata read-only" default y ---help--- - This option enables a testcase for the DEBUG_RODATA - feature as well as for the change_page_attr() infrastructure. + This option enables a testcase for the setting rodata read-only + as well as for the change_page_attr() infrastructure. If in doubt, say "N" config DEBUG_SET_MODULE_RONX diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h index 47c8e32f621a..5aa976c6648a 100644 --- a/arch/x86/include/asm/cacheflush.h +++ b/arch/x86/include/asm/cacheflush.h @@ -84,16 +84,12 @@ int set_pages_rw(struct page *page, int numpages); void clflush_cache_range(void *addr, unsigned int size); -#ifdef CONFIG_DEBUG_RODATA -void mark_rodata_ro(void); +#define mmio_flush_range(addr, size) clflush_cache_range(addr, size) + extern const int rodata_test_data; extern int kernel_set_to_readonly; void set_kernel_text_rw(void); void set_kernel_text_ro(void); -#else -static inline void set_kernel_text_rw(void) { } -static inline void set_kernel_text_ro(void) { } -#endif #ifdef CONFIG_DEBUG_RODATA_TEST int rodata_test(void); diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 3738b138b843..a7c9206b5bea 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -86,6 +86,18 @@ extern u64 asmlinkage efi_call(void *fp, ...); extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size, u32 type, u64 attribute); +#ifdef CONFIG_KASAN +/* + * CONFIG_KASAN may redefine memset to __memset. __memset function is present + * only in kernel binary. Since the EFI stub linked into a separate binary it + * doesn't have __memset(). So we should use standard memset from + * arch/x86/boot/compressed/string.c. The same applies to memcpy and memmove. + */ +#undef memcpy +#undef memset +#undef memmove +#endif + #endif /* CONFIG_X86_32 */ extern struct efi_scratch efi_scratch; diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index a4c1cf7e93f8..5daeca3d0f9e 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -16,15 +16,32 @@ # define STATIC_KEY_INIT_NOP GENERIC_NOP5_ATOMIC #endif -static __always_inline bool arch_static_branch(struct static_key *key) +static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { asm_volatile_goto("1:" ".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t" ".pushsection __jump_table, \"aw\" \n\t" _ASM_ALIGN "\n\t" - _ASM_PTR "1b, %l[l_yes], %c0 \n\t" + _ASM_PTR "1b, %l[l_yes], %c0 + %c1 \n\t" ".popsection \n\t" - : : "i" (key) : : l_yes); + : : "i" (key), "i" (branch) : : l_yes); + + return false; +l_yes: + return true; +} + +static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) +{ + asm_volatile_goto("1:" + ".byte 0xe9\n\t .long %l[l_yes] - 2f\n\t" + "2:\n\t" + ".pushsection __jump_table, \"aw\" \n\t" + _ASM_ALIGN "\n\t" + _ASM_PTR "1b, %l[l_yes], %c0 + %c1 \n\t" + ".popsection \n\t" + : : "i" (key), "i" (branch) : : l_yes); + return false; l_yes: return true; diff --git a/arch/x86/include/asm/kasan.h b/arch/x86/include/asm/kasan.h index 74a2a8dc9908..1410b567ecde 100644 --- a/arch/x86/include/asm/kasan.h +++ b/arch/x86/include/asm/kasan.h @@ -1,6 +1,9 @@ #ifndef _ASM_X86_KASAN_H #define _ASM_X86_KASAN_H +#include <linux/const.h> +#define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL) + /* * Compiler uses shadow offset assuming that addresses start * from 0. Kernel addresses don't start from 0, so shadow diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index c1adf33fdd0d..bc62e7cbf1b1 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -17,15 +17,8 @@ static inline bool kvm_check_and_clear_guest_paused(void) } #endif /* CONFIG_KVM_GUEST */ -#ifdef CONFIG_DEBUG_RODATA #define KVM_HYPERCALL \ ALTERNATIVE(".byte 0x0f,0x01,0xc1", ".byte 0x0f,0x01,0xd9", X86_FEATURE_VMMCALL) -#else -/* On AMD processors, vmcall will generate a trap that we will - * then rewrite to the appropriate instruction. - */ -#define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1" -#endif /* For KVM hypercalls, a three-byte sequence of either the vmcall or the vmmcall * instruction. The hypervisor may replace it with something else but only the diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 23ba6765b718..b3928c13f359 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -902,7 +902,7 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); * Return saved PC of a blocked thread. * What is this good for? it will be always the scheduler or ret_from_fork. */ -#define thread_saved_pc(t) (*(unsigned long *)((t)->thread.sp - 8)) +#define thread_saved_pc(t) READ_ONCE_NOCHECK(*(unsigned long *)((t)->thread.sp - 8)) #define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1) extern unsigned long KSTK_ESP(struct task_struct *task); diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h index 0a5242428659..13b6cdd0af57 100644 --- a/arch/x86/include/asm/sections.h +++ b/arch/x86/include/asm/sections.h @@ -7,7 +7,7 @@ extern char __brk_base[], __brk_limit[]; extern struct exception_table_entry __stop___ex_table[]; -#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) +#if defined(CONFIG_X86_64) extern char __end_rodata_hpage_align[]; #endif diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index e4661196994e..ff8b9a17dc4b 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h @@ -27,12 +27,11 @@ static __always_inline void *__inline_memcpy(void *to, const void *from, size_t function. */ #define __HAVE_ARCH_MEMCPY 1 +extern void *memcpy(void *to, const void *from, size_t len); extern void *__memcpy(void *to, const void *from, size_t len); #ifndef CONFIG_KMEMCHECK -#if (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __GNUC__ > 4 -extern void *memcpy(void *to, const void *from, size_t len); -#else +#if (__GNUC__ == 4 && __GNUC_MINOR__ < 3) || __GNUC__ < 4 #define memcpy(dst, src, len) \ ({ \ size_t __len = (len); \ diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 8b7b0a51e742..f74416dc3fc6 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -81,9 +81,9 @@ within(unsigned long addr, unsigned long start, unsigned long end) static unsigned long text_ip_addr(unsigned long ip) { /* - * On x86_64, kernel text mappings are mapped read-only with - * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead - * of the kernel text mapping to modify the kernel text. + * On x86_64, kernel text mappings are mapped read-only, so we use + * the kernel identity mapping instead of the kernel text mapping + * to modify the kernel text. * * For 32bit kernels, these mappings are same and we can use * kernel identity mapping to modify code. diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index 26d5a55a2736..e565e0e4d216 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c @@ -45,7 +45,7 @@ static void __jump_label_transform(struct jump_entry *entry, const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP }; const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5]; - if (type == JUMP_LABEL_ENABLE) { + if (type == JUMP_LABEL_JMP) { if (init) { /* * Jump label is enabled for the first time. diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index d6178d9791db..543dcb88988f 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -745,9 +745,7 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip) int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) { int err; -#ifdef CONFIG_DEBUG_RODATA char opc[BREAK_INSTR_SIZE]; -#endif /* CONFIG_DEBUG_RODATA */ bpt->type = BP_BREAKPOINT; err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr, @@ -756,7 +754,6 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) return err; err = probe_kernel_write((char *)bpt->bpt_addr, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE); -#ifdef CONFIG_DEBUG_RODATA if (!err) return err; /* @@ -773,13 +770,12 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) if (memcmp(opc, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE)) return -EINVAL; bpt->type = BP_POKE_BREAKPOINT; -#endif /* CONFIG_DEBUG_RODATA */ + return err; } int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) { -#ifdef CONFIG_DEBUG_RODATA int err; char opc[BREAK_INSTR_SIZE]; @@ -796,8 +792,8 @@ int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) if (err || memcmp(opc, bpt->saved_instr, BREAK_INSTR_SIZE)) goto knl_write; return err; + knl_write: -#endif /* CONFIG_DEBUG_RODATA */ return probe_kernel_write((char *)bpt->bpt_addr, (char *)bpt->saved_instr, BREAK_INSTR_SIZE); } diff --git a/arch/x86/kernel/test_nx.c b/arch/x86/kernel/test_nx.c index 3f92ce07e525..27538f183c3b 100644 --- a/arch/x86/kernel/test_nx.c +++ b/arch/x86/kernel/test_nx.c @@ -142,7 +142,6 @@ static int test_NX(void) * by the error message */ -#ifdef CONFIG_DEBUG_RODATA /* Test 3: Check if the .rodata section is executable */ if (rodata_test_data != 0xC3) { printk(KERN_ERR "test_nx: .rodata marker has invalid value\n"); @@ -151,7 +150,6 @@ static int test_NX(void) printk(KERN_ERR "test_nx: .rodata section is executable\n"); ret = -ENODEV; } -#endif #if 0 /* Test 4: Check if the .data section of a module is executable */ diff --git a/arch/x86/kernel/test_rodata.c b/arch/x86/kernel/test_rodata.c index 5ecbfe5099da..cb4a01b41e27 100644 --- a/arch/x86/kernel/test_rodata.c +++ b/arch/x86/kernel/test_rodata.c @@ -76,5 +76,5 @@ int rodata_test(void) } MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Testcase for the DEBUG_RODATA infrastructure"); +MODULE_DESCRIPTION("Testcase for marking rodata as read-only"); MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>"); diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 00bf300fd846..0384a734b417 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -41,29 +41,28 @@ ENTRY(phys_startup_64) jiffies_64 = jiffies; #endif -#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) +#if defined(CONFIG_X86_64) /* - * On 64-bit, align RODATA to 2MB so that even with CONFIG_DEBUG_RODATA - * we retain large page mappings for boundaries spanning kernel text, rodata - * and data sections. + * On 64-bit, align RODATA to 2MB so we retain large page mappings for + * boundaries spanning kernel text, rodata and data sections. * * However, kernel identity mappings will have different RWX permissions * to the pages mapping to text and to the pages padding (which are freed) the * text section. Hence kernel identity mappings will be broken to smaller * pages. For 64-bit, kernel text and kernel identity mappings are different, - * so we can enable protection checks that come with CONFIG_DEBUG_RODATA, - * as well as retain 2MB large page mappings for kernel text. + * so we can enable protection checks as well as retain 2MB large page + * mappings for kernel text. */ -#define X64_ALIGN_DEBUG_RODATA_BEGIN . = ALIGN(HPAGE_SIZE); +#define X64_ALIGN_RODATA_BEGIN . = ALIGN(HPAGE_SIZE); -#define X64_ALIGN_DEBUG_RODATA_END \ +#define X64_ALIGN_RODATA_END \ . = ALIGN(HPAGE_SIZE); \ __end_rodata_hpage_align = .; #else -#define X64_ALIGN_DEBUG_RODATA_BEGIN -#define X64_ALIGN_DEBUG_RODATA_END +#define X64_ALIGN_RODATA_BEGIN +#define X64_ALIGN_RODATA_END #endif @@ -112,13 +111,11 @@ SECTIONS EXCEPTION_TABLE(16) :text = 0x9090 -#if defined(CONFIG_DEBUG_RODATA) /* .text should occupy whole number of pages */ . = ALIGN(PAGE_SIZE); -#endif - X64_ALIGN_DEBUG_RODATA_BEGIN + X64_ALIGN_RODATA_BEGIN RO_DATA(PAGE_SIZE) - X64_ALIGN_DEBUG_RODATA_END + X64_ALIGN_RODATA_END /* Data */ .data : AT(ADDR(.data) - LOAD_OFFSET) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bd84d2226ca1..c6c09e241885 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6349,7 +6349,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (req_immediate_exit) smp_send_reschedule(vcpu->cpu); - kvm_guest_enter(); + __kvm_guest_enter(); if (unlikely(vcpu->arch.switch_db_regs)) { set_debugreg(0, 7); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index c23ab1ee3a9a..0f98553f995b 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -871,7 +871,6 @@ static noinline int do_test_wp_bit(void) return flag; } -#ifdef CONFIG_DEBUG_RODATA const int rodata_test_data = 0xC3; EXPORT_SYMBOL_GPL(rodata_test_data); @@ -958,5 +957,3 @@ void mark_rodata_ro(void) #endif mark_nxdata_nx(); } -#endif - diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index f9977a7a9444..e10635a19727 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1062,7 +1062,6 @@ void __init mem_init(void) mem_init_print_info(NULL); } -#ifdef CONFIG_DEBUG_RODATA const int rodata_test_data = 0xC3; EXPORT_SYMBOL_GPL(rodata_test_data); @@ -1152,8 +1151,6 @@ void mark_rodata_ro(void) (unsigned long) __va(__pa_symbol(_sdata))); } -#endif - int kern_addr_valid(unsigned long addr) { unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT; diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 9a54dbe98064..caf175745141 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -11,20 +11,6 @@ extern pgd_t early_level4_pgt[PTRS_PER_PGD]; extern struct range pfn_mapped[E820_X_MAX]; -static pud_t kasan_zero_pud[PTRS_PER_PUD] __page_aligned_bss; -static pmd_t kasan_zero_pmd[PTRS_PER_PMD] __page_aligned_bss; -static pte_t kasan_zero_pte[PTRS_PER_PTE] __page_aligned_bss; - -/* - * This page used as early shadow. We don't use empty_zero_page - * at early stages, stack instrumentation could write some garbage - * to this page. - * Latter we reuse it as zero shadow for large ranges of memory - * that allowed to access, but not instrumented by kasan - * (vmalloc/vmemmap ...). - */ -static unsigned char kasan_zero_page[PAGE_SIZE] __page_aligned_bss; - static int __init map_range(struct range *range) { unsigned long start; @@ -61,106 +47,6 @@ static void __init kasan_map_early_shadow(pgd_t *pgd) } } -static int __init zero_pte_populate(pmd_t *pmd, unsigned long addr, - unsigned long end) -{ - pte_t *pte = pte_offset_kernel(pmd, addr); - - while (addr + PAGE_SIZE <= end) { - WARN_ON(!pte_none(*pte)); - set_pte(pte, __pte(__pa_nodebug(kasan_zero_page) - | __PAGE_KERNEL_RO)); - addr += PAGE_SIZE; - pte = pte_offset_kernel(pmd, addr); - } - return 0; -} - -static int __init zero_pmd_populate(pud_t *pud, unsigned long addr, - unsigned long end) -{ - int ret = 0; - pmd_t *pmd = pmd_offset(pud, addr); - - while (IS_ALIGNED(addr, PMD_SIZE) && addr + PMD_SIZE <= end) { - WARN_ON(!pmd_none(*pmd)); - set_pmd(pmd, __pmd(__pa_nodebug(kasan_zero_pte) - | _KERNPG_TABLE)); - addr += PMD_SIZE; - pmd = pmd_offset(pud, addr); - } - if (addr < end) { - if (pmd_none(*pmd)) { - void *p = vmemmap_alloc_block(PAGE_SIZE, NUMA_NO_NODE); - if (!p) - return -ENOMEM; - set_pmd(pmd, __pmd(__pa_nodebug(p) | _KERNPG_TABLE)); - } - ret = zero_pte_populate(pmd, addr, end); - } - return ret; -} - - -static int __init zero_pud_populate(pgd_t *pgd, unsigned long addr, - unsigned long end) -{ - int ret = 0; - pud_t *pud = pud_offset(pgd, addr); - - while (IS_ALIGNED(addr, PUD_SIZE) && addr + PUD_SIZE <= end) { - WARN_ON(!pud_none(*pud)); - set_pud(pud, __pud(__pa_nodebug(kasan_zero_pmd) - | _KERNPG_TABLE)); - addr += PUD_SIZE; - pud = pud_offset(pgd, addr); - } - - if (addr < end) { - if (pud_none(*pud)) { - void *p = vmemmap_alloc_block(PAGE_SIZE, NUMA_NO_NODE); - if (!p) - return -ENOMEM; - set_pud(pud, __pud(__pa_nodebug(p) | _KERNPG_TABLE)); - } - ret = zero_pmd_populate(pud, addr, end); - } - return ret; -} - -static int __init zero_pgd_populate(unsigned long addr, unsigned long end) -{ - int ret = 0; - pgd_t *pgd = pgd_offset_k(addr); - - while (IS_ALIGNED(addr, PGDIR_SIZE) && addr + PGDIR_SIZE <= end) { - WARN_ON(!pgd_none(*pgd)); - set_pgd(pgd, __pgd(__pa_nodebug(kasan_zero_pud) - | _KERNPG_TABLE)); - addr += PGDIR_SIZE; - pgd = pgd_offset_k(addr); - } - - if (addr < end) { - if (pgd_none(*pgd)) { - void *p = vmemmap_alloc_block(PAGE_SIZE, NUMA_NO_NODE); - if (!p) - return -ENOMEM; - set_pgd(pgd, __pgd(__pa_nodebug(p) | _KERNPG_TABLE)); - } - ret = zero_pud_populate(pgd, addr, end); - } - return ret; -} - - -static void __init populate_zero_shadow(const void *start, const void *end) -{ - if (zero_pgd_populate((unsigned long)start, (unsigned long)end)) - panic("kasan: unable to map zero shadow!"); -} - - #ifdef CONFIG_KASAN_INLINE static int kasan_die_handler(struct notifier_block *self, unsigned long val, @@ -212,7 +98,7 @@ void __init kasan_init(void) clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); - populate_zero_shadow((void *)KASAN_SHADOW_START, + kasan_populate_zero_shadow((void *)KASAN_SHADOW_START, kasan_mem_to_shadow((void *)PAGE_OFFSET)); for (i = 0; i < E820_X_MAX; i++) { @@ -222,14 +108,15 @@ void __init kasan_init(void) if (map_range(&pfn_mapped[i])) panic("kasan: unable to allocate shadow!"); } - populate_zero_shadow(kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM), - kasan_mem_to_shadow((void *)__START_KERNEL_map)); + kasan_populate_zero_shadow( + kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM), + kasan_mem_to_shadow((void *)__START_KERNEL_map)); vmemmap_populate((unsigned long)kasan_mem_to_shadow(_stext), (unsigned long)kasan_mem_to_shadow(_end), NUMA_NO_NODE); - populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END), + kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END), (void *)KASAN_SHADOW_END); memset(kasan_zero_page, 0, PAGE_SIZE); @@ -237,4 +124,6 @@ void __init kasan_init(void) load_cr3(init_level4_pgt); __flush_tlb_all(); init_task.kasan_depth = 0; + + pr_info("KernelAddressSanitizer initialized\n"); } diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 2dd9b3ad3bb5..ccb4b86eb1cc 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -279,7 +279,7 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, __pa_symbol(__end_rodata) >> PAGE_SHIFT)) pgprot_val(forbidden) |= _PAGE_RW; -#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) +#if defined(CONFIG_X86_64) /* * Once the kernel maps the text as RO (kernel_set_to_readonly is set), * kernel text mappings for the large page aligned text, rodata sections diff --git a/arch/x86/vdso/vdso2c.h b/arch/x86/vdso/vdso2c.h index 0224987556ce..3f69326ed545 100644 --- a/arch/x86/vdso/vdso2c.h +++ b/arch/x86/vdso/vdso2c.h @@ -140,7 +140,7 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len, fprintf(outfile, "#include <asm/vdso.h>\n"); fprintf(outfile, "\n"); fprintf(outfile, - "static unsigned char raw_data[%lu] __page_aligned_data = {", + "static unsigned char raw_data[%lu] __ro_after_init __aligned(PAGE_SIZE) = {", mapping_size); for (j = 0; j < stripped_len; j++) { if (j % 10 == 0) |