aboutsummaryrefslogtreecommitdiff
path: root/arch/arm64/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/kernel')
-rw-r--r--arch/arm64/kernel/Makefile4
-rw-r--r--arch/arm64/kernel/alternative.c7
-rw-r--r--arch/arm64/kernel/armv8_deprecated.c11
-rw-r--r--arch/arm64/kernel/asm-offsets.c23
-rw-r--r--arch/arm64/kernel/cpufeature.c1
-rw-r--r--arch/arm64/kernel/efi.c334
-rw-r--r--arch/arm64/kernel/entry.S138
-rw-r--r--arch/arm64/kernel/head.S37
-rw-r--r--arch/arm64/kernel/hibernate.c51
-rw-r--r--arch/arm64/kernel/hw_breakpoint.c153
-rw-r--r--arch/arm64/kernel/io.c12
-rw-r--r--arch/arm64/kernel/module-plts.c108
-rw-r--r--arch/arm64/kernel/module.c2
-rw-r--r--arch/arm64/kernel/module.lds1
-rw-r--r--arch/arm64/kernel/probes/decode-insn.c4
-rw-r--r--arch/arm64/kernel/probes/kprobes.c4
-rw-r--r--arch/arm64/kernel/process.c111
-rw-r--r--arch/arm64/kernel/ptrace.c7
-rw-r--r--arch/arm64/kernel/return_address.c1
-rw-r--r--arch/arm64/kernel/setup.c15
-rw-r--r--arch/arm64/kernel/sleep.S4
-rw-r--r--arch/arm64/kernel/smp.c18
-rw-r--r--arch/arm64/kernel/smp_spin_table.c3
-rw-r--r--arch/arm64/kernel/stacktrace.c7
-rw-r--r--arch/arm64/kernel/suspend.c8
-rw-r--r--arch/arm64/kernel/topology.c86
-rw-r--r--arch/arm64/kernel/traps.c26
-rw-r--r--arch/arm64/kernel/vdso.c12
-rw-r--r--arch/arm64/kernel/vdso/Makefile7
-rw-r--r--arch/arm64/kernel/vdso/gettimeofday.S331
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S31
31 files changed, 926 insertions, 631 deletions
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 96ae961ada72..01c0b3881f88 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -52,7 +52,3 @@ obj-y += $(arm64-obj-y) vdso/ probes/
obj-m += $(arm64-obj-m)
head-y := head.o
extra-y += $(head-y) vmlinux.lds
-
-# vDSO - this must be built first to generate the symbol offsets
-$(call objectify,$(arm64-obj-y)): $(obj)/vdso/vdso-offsets.h
-$(obj)/vdso/vdso-offsets.h: $(obj)/vdso
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index d2ee1b21a10d..4434dabde898 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -25,14 +25,13 @@
#include <asm/alternative.h>
#include <asm/cpufeature.h>
#include <asm/insn.h>
+#include <asm/sections.h>
#include <linux/stop_machine.h>
#define __ALT_PTR(a,f) (u32 *)((void *)&(a)->f + (a)->f)
#define ALT_ORIG_PTR(a) __ALT_PTR(a, orig_offset)
#define ALT_REPL_PTR(a) __ALT_PTR(a, alt_offset)
-extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
-
struct alt_region {
struct alt_instr *begin;
struct alt_instr *end;
@@ -124,8 +123,8 @@ static int __apply_alternatives_multi_stop(void *unused)
{
static int patched = 0;
struct alt_region region = {
- .begin = __alt_instructions,
- .end = __alt_instructions_end,
+ .begin = (struct alt_instr *)__alt_instructions,
+ .end = (struct alt_instr *)__alt_instructions_end,
};
/* We always have a CPU 0 at this point (__init) */
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index d03e311b6f3c..10d3642deb7c 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -14,7 +14,6 @@
#include <linux/slab.h>
#include <linux/sysctl.h>
-#include <asm/alternative.h>
#include <asm/cpufeature.h>
#include <asm/insn.h>
#include <asm/opcodes.h>
@@ -281,9 +280,9 @@ static void __init register_insn_emulation_sysctl(struct ctl_table *table)
* Error-checking SWP macros implemented using ldxr{b}/stxr{b}
*/
#define __user_swpX_asm(data, addr, res, temp, B) \
+do { \
+ uaccess_enable(); \
__asm__ __volatile__( \
- ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, \
- CONFIG_ARM64_PAN) \
"0: ldxr"B" %w2, [%3]\n" \
"1: stxr"B" %w0, %w1, [%3]\n" \
" cbz %w0, 2f\n" \
@@ -299,12 +298,12 @@ static void __init register_insn_emulation_sysctl(struct ctl_table *table)
" .popsection" \
_ASM_EXTABLE(0b, 4b) \
_ASM_EXTABLE(1b, 4b) \
- ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \
- CONFIG_ARM64_PAN) \
: "=&r" (res), "+r" (data), "=&r" (temp) \
: "r" ((unsigned long)addr), "i" (-EAGAIN), \
"i" (-EFAULT) \
- : "memory")
+ : "memory"); \
+ uaccess_disable(); \
+} while (0)
#define __user_swp_asm(data, addr, res, temp) \
__user_swpX_asm(data, addr, res, temp, "")
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index a506ddc270c0..67ebe708e30c 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -35,11 +35,19 @@ int main(void)
{
DEFINE(TSK_ACTIVE_MM, offsetof(struct task_struct, active_mm));
BLANK();
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+ DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags));
+ DEFINE(TSK_TI_PREEMPT, offsetof(struct task_struct, thread_info.preempt_count));
+ DEFINE(TSK_TI_ADDR_LIMIT, offsetof(struct task_struct, thread_info.addr_limit));
+ DEFINE(TSK_STACK, offsetof(struct task_struct, stack));
+#else
DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit));
- DEFINE(TI_TASK, offsetof(struct thread_info, task));
- DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
+#endif
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+ DEFINE(TSK_TI_TTBR0, offsetof(struct thread_info, ttbr0));
+#endif
BLANK();
DEFINE(THREAD_CPU_CONTEXT, offsetof(struct task_struct, thread.cpu_context));
BLANK();
@@ -89,6 +97,7 @@ int main(void)
BLANK();
DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
+ DEFINE(CLOCK_MONOTONIC_RAW, CLOCK_MONOTONIC_RAW);
DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC);
DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE);
DEFINE(CLOCK_MONOTONIC_COARSE,CLOCK_MONOTONIC_COARSE);
@@ -96,6 +105,8 @@ int main(void)
DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
BLANK();
DEFINE(VDSO_CS_CYCLE_LAST, offsetof(struct vdso_data, cs_cycle_last));
+ DEFINE(VDSO_RAW_TIME_SEC, offsetof(struct vdso_data, raw_time_sec));
+ DEFINE(VDSO_RAW_TIME_NSEC, offsetof(struct vdso_data, raw_time_nsec));
DEFINE(VDSO_XTIME_CLK_SEC, offsetof(struct vdso_data, xtime_clock_sec));
DEFINE(VDSO_XTIME_CLK_NSEC, offsetof(struct vdso_data, xtime_clock_nsec));
DEFINE(VDSO_XTIME_CRS_SEC, offsetof(struct vdso_data, xtime_coarse_sec));
@@ -103,7 +114,8 @@ int main(void)
DEFINE(VDSO_WTM_CLK_SEC, offsetof(struct vdso_data, wtm_clock_sec));
DEFINE(VDSO_WTM_CLK_NSEC, offsetof(struct vdso_data, wtm_clock_nsec));
DEFINE(VDSO_TB_SEQ_COUNT, offsetof(struct vdso_data, tb_seq_count));
- DEFINE(VDSO_CS_MULT, offsetof(struct vdso_data, cs_mult));
+ DEFINE(VDSO_CS_MONO_MULT, offsetof(struct vdso_data, cs_mono_mult));
+ DEFINE(VDSO_CS_RAW_MULT, offsetof(struct vdso_data, cs_raw_mult));
DEFINE(VDSO_CS_SHIFT, offsetof(struct vdso_data, cs_shift));
DEFINE(VDSO_TZ_MINWEST, offsetof(struct vdso_data, tz_minuteswest));
DEFINE(VDSO_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime));
@@ -119,6 +131,11 @@ int main(void)
BLANK();
DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack));
BLANK();
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+ DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack));
+ DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task));
+ BLANK();
+#endif
#ifdef CONFIG_KVM_ARM_HOST
DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt));
DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs));
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 476fb5caa361..53fab76d3c39 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -46,6 +46,7 @@ unsigned int compat_elf_hwcap2 __read_mostly;
#endif
DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
+EXPORT_SYMBOL(cpu_hwcaps);
#define __ARM64_FTR_BITS(SIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
{ \
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index 40270c1fa947..b60da856fc19 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -11,317 +11,34 @@
*
*/
-#include <linux/atomic.h>
#include <linux/dmi.h>
#include <linux/efi.h>
-#include <linux/export.h>
-#include <linux/memblock.h>
-#include <linux/mm_types.h>
-#include <linux/bootmem.h>
-#include <linux/of.h>
-#include <linux/of_fdt.h>
-#include <linux/preempt.h>
-#include <linux/rbtree.h>
-#include <linux/rwsem.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
+#include <linux/init.h>
-#include <asm/cacheflush.h>
#include <asm/efi.h>
-#include <asm/tlbflush.h>
-#include <asm/mmu_context.h>
-#include <asm/mmu.h>
-#include <asm/pgtable.h>
-struct efi_memory_map memmap;
-
-static u64 efi_system_table;
-
-static pgd_t efi_pgd[PTRS_PER_PGD] __page_aligned_bss;
-
-static struct mm_struct efi_mm = {
- .mm_rb = RB_ROOT,
- .pgd = efi_pgd,
- .mm_users = ATOMIC_INIT(2),
- .mm_count = ATOMIC_INIT(1),
- .mmap_sem = __RWSEM_INITIALIZER(efi_mm.mmap_sem),
- .page_table_lock = __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock),
- .mmlist = LIST_HEAD_INIT(efi_mm.mmlist),
-};
-
-static int __init is_normal_ram(efi_memory_desc_t *md)
-{
- if (md->attribute & EFI_MEMORY_WB)
- return 1;
- return 0;
-}
-
-/*
- * Translate a EFI virtual address into a physical address: this is necessary,
- * as some data members of the EFI system table are virtually remapped after
- * SetVirtualAddressMap() has been called.
- */
-static phys_addr_t efi_to_phys(unsigned long addr)
+int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md)
{
- efi_memory_desc_t *md;
-
- for_each_efi_memory_desc(&memmap, md) {
- if (!(md->attribute & EFI_MEMORY_RUNTIME))
- continue;
- if (md->virt_addr == 0)
- /* no virtual mapping has been installed by the stub */
- break;
- if (md->virt_addr <= addr &&
- (addr - md->virt_addr) < (md->num_pages << EFI_PAGE_SHIFT))
- return md->phys_addr + addr - md->virt_addr;
- }
- return addr;
-}
-
-static int __init uefi_init(void)
-{
- efi_char16_t *c16;
- void *config_tables;
- u64 table_size;
- char vendor[100] = "unknown";
- int i, retval;
-
- efi.systab = early_memremap(efi_system_table,
- sizeof(efi_system_table_t));
- if (efi.systab == NULL) {
- pr_warn("Unable to map EFI system table.\n");
- return -ENOMEM;
- }
-
- set_bit(EFI_BOOT, &efi.flags);
- set_bit(EFI_64BIT, &efi.flags);
+ pteval_t prot_val;
/*
- * Verify the EFI Table
+ * Only regions of type EFI_RUNTIME_SERVICES_CODE need to be
+ * executable, everything else can be mapped with the XN bits
+ * set.
*/
- if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) {
- pr_err("System table signature incorrect\n");
- retval = -EINVAL;
- goto out;
- }
- if ((efi.systab->hdr.revision >> 16) < 2)
- pr_warn("Warning: EFI system table version %d.%02d, expected 2.00 or greater\n",
- efi.systab->hdr.revision >> 16,
- efi.systab->hdr.revision & 0xffff);
-
- /* Show what we know for posterity */
- c16 = early_memremap(efi_to_phys(efi.systab->fw_vendor),
- sizeof(vendor) * sizeof(efi_char16_t));
- if (c16) {
- for (i = 0; i < (int) sizeof(vendor) - 1 && *c16; ++i)
- vendor[i] = c16[i];
- vendor[i] = '\0';
- early_memunmap(c16, sizeof(vendor) * sizeof(efi_char16_t));
- }
-
- pr_info("EFI v%u.%.02u by %s\n",
- efi.systab->hdr.revision >> 16,
- efi.systab->hdr.revision & 0xffff, vendor);
-
- table_size = sizeof(efi_config_table_64_t) * efi.systab->nr_tables;
- config_tables = early_memremap(efi_to_phys(efi.systab->tables),
- table_size);
- if (config_tables == NULL) {
- pr_warn("Unable to map EFI config table array.\n");
- retval = -ENOMEM;
- goto out;
- }
- retval = efi_config_parse_tables(config_tables, efi.systab->nr_tables,
- sizeof(efi_config_table_64_t), NULL);
-
- early_memunmap(config_tables, table_size);
-out:
- early_memunmap(efi.systab, sizeof(efi_system_table_t));
- return retval;
-}
-
-/*
- * Return true for RAM regions we want to permanently reserve.
- */
-static __init int is_reserve_region(efi_memory_desc_t *md)
-{
- switch (md->type) {
- case EFI_LOADER_CODE:
- case EFI_LOADER_DATA:
- case EFI_BOOT_SERVICES_CODE:
- case EFI_BOOT_SERVICES_DATA:
- case EFI_CONVENTIONAL_MEMORY:
- case EFI_PERSISTENT_MEMORY:
- return 0;
- default:
- break;
- }
- return is_normal_ram(md);
-}
-
-static __init void reserve_regions(void)
-{
- efi_memory_desc_t *md;
- u64 paddr, npages, size;
-
- if (efi_enabled(EFI_DBG))
- pr_info("Processing EFI memory map:\n");
-
- for_each_efi_memory_desc(&memmap, md) {
- paddr = md->phys_addr;
- npages = md->num_pages;
-
- if (efi_enabled(EFI_DBG)) {
- char buf[64];
-
- pr_info(" 0x%012llx-0x%012llx %s",
- paddr, paddr + (npages << EFI_PAGE_SHIFT) - 1,
- efi_md_typeattr_format(buf, sizeof(buf), md));
- }
-
- memrange_efi_to_native(&paddr, &npages);
- size = npages << PAGE_SHIFT;
-
- if (is_normal_ram(md))
- early_init_dt_add_memory_arch(paddr, size);
-
- if (is_reserve_region(md)) {
- memblock_reserve(paddr, size);
- if (efi_enabled(EFI_DBG))
- pr_cont("*");
- }
-
- if (efi_enabled(EFI_DBG))
- pr_cont("\n");
- }
-
- set_bit(EFI_MEMMAP, &efi.flags);
-}
-
-void __init efi_init(void)
-{
- struct efi_fdt_params params;
-
- /* Grab UEFI information placed in FDT by stub */
- if (!efi_get_fdt_params(&params))
- return;
-
- efi_system_table = params.system_table;
-
- memblock_reserve(params.mmap & PAGE_MASK,
- PAGE_ALIGN(params.mmap_size + (params.mmap & ~PAGE_MASK)));
- memmap.phys_map = params.mmap;
- memmap.map = early_memremap(params.mmap, params.mmap_size);
- if (memmap.map == NULL) {
- /*
- * If we are booting via UEFI, the UEFI memory map is the only
- * description of memory we have, so there is little point in
- * proceeding if we cannot access it.
- */
- panic("Unable to map EFI memory map.\n");
- }
- memmap.map_end = memmap.map + params.mmap_size;
- memmap.desc_size = params.desc_size;
- memmap.desc_version = params.desc_ver;
-
- if (uefi_init() < 0)
- return;
-
- reserve_regions();
- early_memunmap(memmap.map, params.mmap_size);
-}
-
-static bool __init efi_virtmap_init(void)
-{
- efi_memory_desc_t *md;
-
- init_new_context(NULL, &efi_mm);
-
- for_each_efi_memory_desc(&memmap, md) {
- pgprot_t prot;
-
- if (!(md->attribute & EFI_MEMORY_RUNTIME))
- continue;
- if (md->virt_addr == 0)
- return false;
-
- pr_info(" EFI remap 0x%016llx => %p\n",
- md->phys_addr, (void *)md->virt_addr);
-
- /*
- * Only regions of type EFI_RUNTIME_SERVICES_CODE need to be
- * executable, everything else can be mapped with the XN bits
- * set.
- */
- if (!is_normal_ram(md))
- prot = __pgprot(PROT_DEVICE_nGnRE);
- else if (md->type == EFI_RUNTIME_SERVICES_CODE ||
- !PAGE_ALIGNED(md->phys_addr))
- prot = PAGE_KERNEL_EXEC;
- else
- prot = PAGE_KERNEL;
-
- create_pgd_mapping(&efi_mm, md->phys_addr, md->virt_addr,
- md->num_pages << EFI_PAGE_SHIFT,
- __pgprot(pgprot_val(prot) | PTE_NG), true);
- }
- return true;
-}
-
-/*
- * Enable the UEFI Runtime Services if all prerequisites are in place, i.e.,
- * non-early mapping of the UEFI system table and virtual mappings for all
- * EFI_MEMORY_RUNTIME regions.
- */
-static int __init arm64_enable_runtime_services(void)
-{
- u64 mapsize;
-
- if (!efi_enabled(EFI_BOOT)) {
- pr_info("EFI services will not be available.\n");
- return 0;
- }
-
- if (efi_runtime_disabled()) {
- pr_info("EFI runtime services will be disabled.\n");
- return 0;
- }
-
- pr_info("Remapping and enabling EFI services.\n");
-
- mapsize = memmap.map_end - memmap.map;
- memmap.map = (__force void *)ioremap_cache(memmap.phys_map,
- mapsize);
- if (!memmap.map) {
- pr_err("Failed to remap EFI memory map\n");
- return -ENOMEM;
- }
- memmap.map_end = memmap.map + mapsize;
- efi.memmap = &memmap;
-
- efi.systab = (__force void *)ioremap_cache(efi_system_table,
- sizeof(efi_system_table_t));
- if (!efi.systab) {
- pr_err("Failed to remap EFI System Table\n");
- return -ENOMEM;
- }
- set_bit(EFI_SYSTEM_TABLES, &efi.flags);
-
- if (!efi_virtmap_init()) {
- pr_err("No UEFI virtual mapping was installed -- runtime services will not be available\n");
- return -ENOMEM;
- }
-
- /* Set up runtime services function pointers */
- efi_native_runtime_setup();
- set_bit(EFI_RUNTIME_SERVICES, &efi.flags);
-
- efi.runtime_version = efi.systab->hdr.revision;
-
+ if ((md->attribute & EFI_MEMORY_WB) == 0)
+ prot_val = PROT_DEVICE_nGnRE;
+ else if (md->type == EFI_RUNTIME_SERVICES_CODE ||
+ !PAGE_ALIGNED(md->phys_addr))
+ prot_val = pgprot_val(PAGE_KERNEL_EXEC);
+ else
+ prot_val = pgprot_val(PAGE_KERNEL);
+
+ create_pgd_mapping(mm, md->phys_addr, md->virt_addr,
+ md->num_pages << EFI_PAGE_SHIFT,
+ __pgprot(prot_val | PTE_NG), true);
return 0;
}
-early_initcall(arm64_enable_runtime_services);
static int __init arm64_dmi_init(void)
{
@@ -337,23 +54,6 @@ static int __init arm64_dmi_init(void)
}
core_initcall(arm64_dmi_init);
-static void efi_set_pgd(struct mm_struct *mm)
-{
- switch_mm(NULL, mm, NULL);
-}
-
-void efi_virtmap_load(void)
-{
- preempt_disable();
- efi_set_pgd(&efi_mm);
-}
-
-void efi_virtmap_unload(void)
-{
- efi_set_pgd(current->active_mm);
- preempt_enable();
-}
-
/*
* UpdateCapsule() depends on the system being shutdown via
* ResetSystem().
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 6f69650c8b7b..b5119f7f5cd6 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -29,7 +29,9 @@
#include <asm/esr.h>
#include <asm/irq.h>
#include <asm/memory.h>
+#include <asm/ptrace.h>
#include <asm/thread_info.h>
+#include <asm/uaccess.h>
#include <asm/asm-uaccess.h>
#include <asm/unistd.h>
@@ -91,9 +93,14 @@
.if \el == 0
mrs x21, sp_el0
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+ ldr_this_cpu tsk, __entry_task, x20 // Ensure MDSCR_EL1.SS is clear,
+ ldr x19, [tsk, #TSK_TI_FLAGS] // since we can unmask debug
+#else
mov tsk, sp
and tsk, tsk, #~(THREAD_SIZE - 1) // Ensure MDSCR_EL1.SS is clear,
ldr x19, [tsk, #TI_FLAGS] // since we can unmask debug
+#endif
disable_step_tsk x19, x20 // exceptions when scheduling.
mov x29, xzr // fp pointed to user-space
@@ -101,14 +108,48 @@
add x21, sp, #S_FRAME_SIZE
get_thread_info tsk
/* Save the task's original addr_limit and set USER_DS (TASK_SIZE_64) */
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+ ldr x20, [tsk, #TSK_TI_ADDR_LIMIT]
+#else
ldr x20, [tsk, #TI_ADDR_LIMIT]
+#endif
str x20, [sp, #S_ORIG_ADDR_LIMIT]
mov x20, #TASK_SIZE_64
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+ str x20, [tsk, #TSK_TI_ADDR_LIMIT]
+#else
str x20, [tsk, #TI_ADDR_LIMIT]
+#endif
.endif /* \el == 0 */
mrs x22, elr_el1
mrs x23, spsr_el1
stp lr, x21, [sp, #S_LR]
+
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+ /*
+ * Set the TTBR0 PAN bit in SPSR. When the exception is taken from
+ * EL0, there is no need to check the state of TTBR0_EL1 since
+ * accesses are always enabled.
+ * Note that the meaning of this bit differs from the ARMv8.1 PAN
+ * feature as all TTBR0_EL1 accesses are disabled, not just those to
+ * user mappings.
+ */
+alternative_if ARM64_HAS_PAN
+ b 1f // skip TTBR0 PAN
+alternative_else_nop_endif
+
+ .if \el != 0
+ mrs x21, ttbr0_el1
+ tst x21, #0xffff << 48 // Check for the reserved ASID
+ orr x23, x23, #PSR_PAN_BIT // Set the emulated PAN in the saved SPSR
+ b.eq 1f // TTBR0 access already disabled
+ and x23, x23, #~PSR_PAN_BIT // Clear the emulated PAN in the saved SPSR
+ .endif
+
+ __uaccess_ttbr0_disable x21
+1:
+#endif
+
stp x22, x23, [sp, #S_PC]
/*
@@ -139,12 +180,50 @@
.if \el != 0
/* Restore the task's original addr_limit. */
ldr x20, [sp, #S_ORIG_ADDR_LIMIT]
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+ str x20, [tsk, #TSK_TI_ADDR_LIMIT]
+#else
str x20, [tsk, #TI_ADDR_LIMIT]
+#endif
.endif
ldp x21, x22, [sp, #S_PC] // load ELR, SPSR
.if \el == 0
ct_user_enter
+ .endif
+
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+ /*
+ * Restore access to TTBR0_EL1. If returning to EL0, no need for SPSR
+ * PAN bit checking.
+ */
+alternative_if ARM64_HAS_PAN
+ b 2f // skip TTBR0 PAN
+alternative_else_nop_endif
+
+ .if \el != 0
+ tbnz x22, #22, 1f // Skip re-enabling TTBR0 access if the PSR_PAN_BIT is set
+ .endif
+
+ __uaccess_ttbr0_enable x0
+
+ .if \el == 0
+ /*
+ * Enable errata workarounds only if returning to user. The only
+ * workaround currently required for TTBR0_EL1 changes are for the
+ * Cavium erratum 27456 (broadcast TLBI instructions may cause I-cache
+ * corruption).
+ */
+ post_ttbr0_update_workaround
+ .endif
+1:
+ .if \el != 0
+ and x22, x22, #~PSR_PAN_BIT // ARMv8.0 CPUs do not understand this bit
+ .endif
+2:
+#endif
+
+ .if \el == 0
ldr x23, [sp, #S_SP] // load return stack pointer
msr sp_el0, x23
#ifdef CONFIG_ARM64_ERRATUM_845719
@@ -166,6 +245,7 @@ alternative_else
alternative_endif
#endif
.endif
+
msr elr_el1, x21 // set up the return data
msr spsr_el1, x22
ldp x0, x1, [sp, #16 * 0]
@@ -188,23 +268,26 @@ alternative_endif
eret // return to kernel
.endm
- .macro get_thread_info, rd
- mrs \rd, sp_el0
- .endm
-
.macro irq_stack_entry
mov x19, sp // preserve the original sp
/*
- * Compare sp with the current thread_info, if the top
- * ~(THREAD_SIZE - 1) bits match, we are on a task stack, and
- * should switch to the irq stack.
+ * Compare sp with the base of the task stack.
+ * If the top ~(THREAD_SIZE - 1) bits match, we are on a task stack,
+ * and should switch to the irq stack.
*/
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+ ldr x25, [tsk, TSK_STACK]
+ eor x25, x25, x19
+ and x25, x25, #~(THREAD_SIZE - 1)
+ cbnz x25, 9998f
+#else
and x25, x19, #~(THREAD_SIZE - 1)
cmp x25, tsk
b.ne 9998f
+#endif
- this_cpu_ptr irq_stack, x25, x26
+ adr_this_cpu x25, irq_stack, x26
mov x26, #IRQ_STACK_START_SP
add x26, x25, x26
@@ -292,7 +375,7 @@ END(vectors)
* Invalid mode handlers
*/
.macro inv_entry, el, reason, regsize = 64
- kernel_entry el, \regsize
+ kernel_entry \el, \regsize
mov x0, sp
mov x1, #\reason
mrs x2, esr_el1
@@ -351,6 +434,8 @@ el1_sync:
lsr x24, x1, #ESR_ELx_EC_SHIFT // exception class
cmp x24, #ESR_ELx_EC_DABT_CUR // data abort in EL1
b.eq el1_da
+ cmp x24, #ESR_ELx_EC_IABT_CUR // instruction abort in EL1
+ b.eq el1_ia
cmp x24, #ESR_ELx_EC_SYS64 // configurable trap
b.eq el1_undef
cmp x24, #ESR_ELx_EC_SP_ALIGN // stack alignment exception
@@ -362,6 +447,11 @@ el1_sync:
cmp x24, #ESR_ELx_EC_BREAKPT_CUR // debug exception in EL1
b.ge el1_dbg
b el1_inv
+
+el1_ia:
+ /*
+ * Fall through to the Data abort case
+ */
el1_da:
/*
* Data abort handling
@@ -426,9 +516,17 @@ el1_irq:
irq_handler
#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+ ldr w24, [tsk, #TSK_TI_PREEMPT] // get preempt count
+#else
ldr w24, [tsk, #TI_PREEMPT] // get preempt count
+#endif
cbnz w24, 1f // preempt count != 0
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+ ldr x0, [tsk, #TSK_TI_FLAGS] // get flags
+#else
ldr x0, [tsk, #TI_FLAGS] // get flags
+#endif
tbz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling?
bl el1_preempt
1:
@@ -443,7 +541,11 @@ ENDPROC(el1_irq)
el1_preempt:
mov x24, lr
1: bl preempt_schedule_irq // irq en/disable is done inside
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+ ldr x0, [tsk, #TSK_TI_FLAGS] // get new tasks TI_FLAGS
+#else
ldr x0, [tsk, #TI_FLAGS] // get new tasks TI_FLAGS
+#endif
tbnz x0, #TIF_NEED_RESCHED, 1b // needs rescheduling?
ret x24
#endif
@@ -548,7 +650,7 @@ el0_ia:
enable_dbg_and_irq
ct_user_exit
mov x0, x26
- orr x1, x25, #1 << 24 // use reserved ISS bit for instruction aborts
+ mov x1, x25
mov x2, sp
bl do_mem_abort
b ret_to_user
@@ -663,8 +765,12 @@ ENTRY(cpu_switch_to)
ldp x29, x9, [x8], #16
ldr lr, [x8]
mov sp, x9
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+ msr sp_el0, x1
+#else
and x9, x9, #~(THREAD_SIZE - 1)
msr sp_el0, x9
+#endif
ret
ENDPROC(cpu_switch_to)
@@ -675,7 +781,11 @@ ENDPROC(cpu_switch_to)
ret_fast_syscall:
disable_irq // disable interrupts
str x0, [sp, #S_X0] // returned x0
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+ ldr x1, [tsk, #TSK_TI_FLAGS] // re-check for syscall tracing
+#else
ldr x1, [tsk, #TI_FLAGS] // re-check for syscall tracing
+#endif
and x2, x1, #_TIF_SYSCALL_WORK
cbnz x2, ret_fast_syscall_trace
and x2, x1, #_TIF_WORK_MASK
@@ -707,7 +817,11 @@ work_resched:
*/
ret_to_user:
disable_irq // disable interrupts
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+ ldr x1, [tsk, #TSK_TI_FLAGS]
+#else
ldr x1, [tsk, #TI_FLAGS]
+#endif
and x2, x1, #_TIF_WORK_MASK
cbnz x2, work_pending
enable_step_tsk x1, x2
@@ -739,7 +853,11 @@ el0_svc_naked: // compat entry point
enable_dbg_and_irq
ct_user_exit 1
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+ ldr x16, [tsk, #TSK_TI_FLAGS] // check for syscall hooks
+#else
ldr x16, [tsk, #TI_FLAGS] // check for syscall hooks
+#endif
tst x16, #_TIF_SYSCALL_WORK
b.ne __sys_trace
cmp scno, sc_nr // check upper syscall limit
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 23cf8e9a295f..3e99814b6463 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -319,14 +319,14 @@ __create_page_tables:
* dirty cache lines being evicted.
*/
mov x0, x25
- add x1, x26, #SWAPPER_DIR_SIZE
+ add x1, x26, #SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE
bl __inval_cache_range
/*
* Clear the idmap and swapper page tables.
*/
mov x0, x25
- add x6, x26, #SWAPPER_DIR_SIZE
+ add x6, x26, #SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE
1: stp xzr, xzr, [x0], #16
stp xzr, xzr, [x0], #16
stp xzr, xzr, [x0], #16
@@ -405,7 +405,7 @@ __create_page_tables:
* tables again to remove any speculatively loaded cache lines.
*/
mov x0, x25
- add x1, x26, #SWAPPER_DIR_SIZE
+ add x1, x26, #SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE
dmb sy
bl __inval_cache_range
@@ -419,6 +419,7 @@ ENDPROC(__create_page_tables)
.set initial_sp, init_thread_union + THREAD_START_SP
__primary_switched:
mov x28, lr // preserve LR
+
adr_l x8, vectors // load VBAR_EL1 with virtual
msr vbar_el1, x8 // vector table address
isb
@@ -431,10 +432,18 @@ __primary_switched:
bl __pi_memset
dsb ishst // Make zero page visible to PTW
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+ adrp x4, init_thread_union
+ add sp, x4, #THREAD_SIZE
+ adr_l x5, init_task
+ msr sp_el0, x5 // Save thread_info
+#else
adr_l sp, initial_sp, x4
mov x4, sp
and x4, x4, #~(THREAD_SIZE - 1)
msr sp_el0, x4 // Save thread_info
+#endif
+
str_l x21, __fdt_pointer, x5 // Save FDT pointer
ldr_l x4, kimage_vaddr // Save the offset between
@@ -464,7 +473,7 @@ ENDPROC(__primary_switched)
* end early head section, begin head code that is also used for
* hotplug and needs to have the same protections as the text region
*/
- .section ".text","ax"
+ .section ".idmap.text","ax"
ENTRY(kimage_vaddr)
.quad _text - TEXT_OFFSET
@@ -587,17 +596,23 @@ set_cpu_boot_mode_flag:
ENDPROC(set_cpu_boot_mode_flag)
/*
+ * These values are written with the MMU off, but read with the MMU on.
+ * Writers will invalidate the corresponding address, discarding up to a
+ * 'Cache Writeback Granule' (CWG) worth of data. The linker script ensures
+ * sufficient alignment that the CWG doesn't overlap another section.
+ */
+ .pushsection ".mmuoff.data.write", "aw"
+/*
* We need to find out the CPU boot mode long after boot, so we need to
* store it in a writable variable.
*
* This is not in .bss, because we set it sufficiently early that the boot-time
* zeroing of .bss would clobber it.
*/
- .pushsection .data..cacheline_aligned
- .align L1_CACHE_SHIFT
ENTRY(__boot_cpu_mode)
.long BOOT_CPU_MODE_EL2
.long BOOT_CPU_MODE_EL1
+
.popsection
/*
@@ -644,12 +659,19 @@ __secondary_switched:
adr_l x5, vectors
msr vbar_el1, x5
isb
-
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+ adr_l x0, secondary_data
+ ldr x1, [x0, #CPU_BOOT_STACK] // get secondary_data.stack
+ mov sp, x1
+ ldr x2, [x0, #CPU_BOOT_TASK]
+ msr sp_el0, x2
+#else
adr_l x0, secondary_data
ldr x0, [x0, #CPU_BOOT_STACK] // get secondary_data.stack
mov sp, x0
and x0, x0, #~(THREAD_SIZE - 1)
msr sp_el0, x0 // save thread_info
+#endif
mov x29, #0
b secondary_start_kernel
ENDPROC(__secondary_switched)
@@ -688,7 +710,6 @@ ENTRY(__early_cpu_boot_status)
* Checks if the selected granule size is supported by the CPU.
* If it isn't, park the CPU
*/
- .section ".idmap.text", "ax"
ENTRY(__enable_mmu)
mrs x18, sctlr_el1 // preserve old SCTLR_EL1 value
mrs x1, ID_AA64MMFR0_EL1
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index 35a33d705536..4284ced551fe 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -54,12 +54,6 @@ extern int in_suspend;
/* Do we need to reset el2? */
#define el2_reset_needed() (is_hyp_mode_available() && !is_kernel_in_hyp_mode())
-/*
- * Start/end of the hibernate exit code, this must be copied to a 'safe'
- * location in memory, and executed from there.
- */
-extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[];
-
/* temporary el2 vectors in the __hibernate_exit_text section. */
extern char hibernate_el2_vectors[];
@@ -242,6 +236,7 @@ out:
return rc;
}
+#define dcache_clean_range(start, end) __flush_dcache_area(start, (end - start))
int swsusp_arch_suspend(void)
{
@@ -257,8 +252,13 @@ int swsusp_arch_suspend(void)
ret = swsusp_save();
} else {
- /* Clean kernel to PoC for secondary core startup */
- __flush_dcache_area(LMADDR(KERNEL_START), KERNEL_END - KERNEL_START);
+ /* Clean kernel core startup/idle code to PoC*/
+ dcache_clean_range(__mmuoff_data_start, __mmuoff_data_end);
+ dcache_clean_range(__idmap_text_start, __idmap_text_end);
+
+ /* Clean kvm setup code to PoC? */
+ if (el2_reset_needed())
+ dcache_clean_range(__hyp_idmap_text_start, __hyp_idmap_text_end);
/* make the crash dump kernel image protected again */
crash_post_resume();
@@ -277,6 +277,33 @@ int swsusp_arch_suspend(void)
return ret;
}
+static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr)
+{
+ pte_t pte = *src_pte;
+
+ if (pte_valid(pte)) {
+ /*
+ * Resume will overwrite areas that may be marked
+ * read only (code, rodata). Clear the RDONLY bit from
+ * the temporary mappings we use during restore.
+ */
+ set_pte(dst_pte, pte_clear_rdonly(pte));
+ } else if (debug_pagealloc_enabled() && !pte_none(pte)) {
+ /*
+ * debug_pagealloc will removed the PTE_VALID bit if
+ * the page isn't in use by the resume kernel. It may have
+ * been in use by the original kernel, in which case we need
+ * to put it back in our copy to do the restore.
+ *
+ * Before marking this entry valid, check the pfn should
+ * be mapped.
+ */
+ BUG_ON(!pfn_valid(pte_pfn(pte)));
+
+ set_pte(dst_pte, pte_mkpresent(pte_clear_rdonly(pte)));
+ }
+}
+
static int copy_pte(pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long start,
unsigned long end)
{
@@ -292,13 +319,7 @@ static int copy_pte(pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long start,
src_pte = pte_offset_kernel(src_pmd, start);
do {
- if (!pte_none(*src_pte))
- /*
- * Resume will overwrite areas that may be marked
- * read only (code, rodata). Clear the RDONLY bit from
- * the temporary mappings we use during restore.
- */
- set_pte(dst_pte, __pte(pte_val(*src_pte) & ~PTE_RDONLY));
+ _copy_pte(dst_pte, src_pte, addr);
} while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
return 0;
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index 22543cb1a8e1..1c694f3c643c 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -318,9 +318,21 @@ static int get_hbp_len(u8 hbp_len)
case ARM_BREAKPOINT_LEN_2:
len_in_bytes = 2;
break;
+ case ARM_BREAKPOINT_LEN_3:
+ len_in_bytes = 3;
+ break;
case ARM_BREAKPOINT_LEN_4:
len_in_bytes = 4;
break;
+ case ARM_BREAKPOINT_LEN_5:
+ len_in_bytes = 5;
+ break;
+ case ARM_BREAKPOINT_LEN_6:
+ len_in_bytes = 6;
+ break;
+ case ARM_BREAKPOINT_LEN_7:
+ len_in_bytes = 7;
+ break;
case ARM_BREAKPOINT_LEN_8:
len_in_bytes = 8;
break;
@@ -350,7 +362,7 @@ int arch_check_bp_in_kernelspace(struct perf_event *bp)
* to generic breakpoint descriptions.
*/
int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl,
- int *gen_len, int *gen_type)
+ int *gen_len, int *gen_type, int *offset)
{
/* Type */
switch (ctrl.type) {
@@ -370,17 +382,33 @@ int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl,
return -EINVAL;
}
+ if (!ctrl.len)
+ return -EINVAL;
+ *offset = __ffs(ctrl.len);
+
/* Len */
- switch (ctrl.len) {
+ switch (ctrl.len >> *offset) {
case ARM_BREAKPOINT_LEN_1:
*gen_len = HW_BREAKPOINT_LEN_1;
break;
case ARM_BREAKPOINT_LEN_2:
*gen_len = HW_BREAKPOINT_LEN_2;
break;
+ case ARM_BREAKPOINT_LEN_3:
+ *gen_len = HW_BREAKPOINT_LEN_3;
+ break;
case ARM_BREAKPOINT_LEN_4:
*gen_len = HW_BREAKPOINT_LEN_4;
break;
+ case ARM_BREAKPOINT_LEN_5:
+ *gen_len = HW_BREAKPOINT_LEN_5;
+ break;
+ case ARM_BREAKPOINT_LEN_6:
+ *gen_len = HW_BREAKPOINT_LEN_6;
+ break;
+ case ARM_BREAKPOINT_LEN_7:
+ *gen_len = HW_BREAKPOINT_LEN_7;
+ break;
case ARM_BREAKPOINT_LEN_8:
*gen_len = HW_BREAKPOINT_LEN_8;
break;
@@ -424,9 +452,21 @@ static int arch_build_bp_info(struct perf_event *bp)
case HW_BREAKPOINT_LEN_2:
info->ctrl.len = ARM_BREAKPOINT_LEN_2;
break;
+ case HW_BREAKPOINT_LEN_3:
+ info->ctrl.len = ARM_BREAKPOINT_LEN_3;
+ break;
case HW_BREAKPOINT_LEN_4:
info->ctrl.len = ARM_BREAKPOINT_LEN_4;
break;
+ case HW_BREAKPOINT_LEN_5:
+ info->ctrl.len = ARM_BREAKPOINT_LEN_5;
+ break;
+ case HW_BREAKPOINT_LEN_6:
+ info->ctrl.len = ARM_BREAKPOINT_LEN_6;
+ break;
+ case HW_BREAKPOINT_LEN_7:
+ info->ctrl.len = ARM_BREAKPOINT_LEN_7;
+ break;
case HW_BREAKPOINT_LEN_8:
info->ctrl.len = ARM_BREAKPOINT_LEN_8;
break;
@@ -518,18 +558,17 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
default:
return -EINVAL;
}
-
- info->address &= ~alignment_mask;
- info->ctrl.len <<= offset;
} else {
if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE)
alignment_mask = 0x3;
else
alignment_mask = 0x7;
- if (info->address & alignment_mask)
- return -EINVAL;
+ offset = info->address & alignment_mask;
}
+ info->address &= ~alignment_mask;
+ info->ctrl.len <<= offset;
+
/*
* Disallow per-task kernel breakpoints since these would
* complicate the stepping code.
@@ -662,12 +701,47 @@ unlock:
}
NOKPROBE_SYMBOL(breakpoint_handler);
+/*
+ * Arm64 hardware does not always report a watchpoint hit address that matches
+ * one of the watchpoints set. It can also report an address "near" the
+ * watchpoint if a single instruction access both watched and unwatched
+ * addresses. There is no straight-forward way, short of disassembling the
+ * offending instruction, to map that address back to the watchpoint. This
+ * function computes the distance of the memory access from the watchpoint as a
+ * heuristic for the likelyhood that a given access triggered the watchpoint.
+ *
+ * See Section D2.10.5 "Determining the memory location that caused a Watchpoint
+ * exception" of ARMv8 Architecture Reference Manual for details.
+ *
+ * The function returns the distance of the address from the bytes watched by
+ * the watchpoint. In case of an exact match, it returns 0.
+ */
+static u64 get_distance_from_watchpoint(unsigned long addr, u64 val,
+ struct arch_hw_breakpoint_ctrl *ctrl)
+{
+ u64 wp_low, wp_high;
+ u32 lens, lene;
+
+ lens = __ffs(ctrl->len);
+ lene = __fls(ctrl->len);
+
+ wp_low = val + lens;
+ wp_high = val + lene;
+ if (addr < wp_low)
+ return wp_low - addr;
+ else if (addr > wp_high)
+ return addr - wp_high;
+ else
+ return 0;
+}
+
static int watchpoint_handler(unsigned long addr, unsigned int esr,
struct pt_regs *regs)
{
- int i, step = 0, *kernel_step, access;
+ int i, step = 0, *kernel_step, access, closest_match = 0;
+ u64 min_dist = -1, dist;
u32 ctrl_reg;
- u64 val, alignment_mask;
+ u64 val;
struct perf_event *wp, **slots;
struct debug_info *debug_info;
struct arch_hw_breakpoint *info;
@@ -676,35 +750,15 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr,
slots = this_cpu_ptr(wp_on_reg);
debug_info = &current->thread.debug;
+ /*
+ * Find all watchpoints that match the reported address. If no exact
+ * match is found. Attribute the hit to the closest watchpoint.
+ */
+ rcu_read_lock();
for (i = 0; i < core_num_wrps; ++i) {
- rcu_read_lock();
-
wp = slots[i];
-
if (wp == NULL)
- goto unlock;
-
- info = counter_arch_bp(wp);
- /* AArch32 watchpoints are either 4 or 8 bytes aligned. */
- if (is_compat_task()) {
- if (info->ctrl.len == ARM_BREAKPOINT_LEN_8)
- alignment_mask = 0x7;
- else
- alignment_mask = 0x3;
- } else {
- alignment_mask = 0x7;
- }
-
- /* Check if the watchpoint value matches. */
- val = read_wb_reg(AARCH64_DBG_REG_WVR, i);
- if (val != (untagged_addr(addr) & ~alignment_mask))
- goto unlock;
-
- /* Possible match, check the byte address select to confirm. */
- ctrl_reg = read_wb_reg(AARCH64_DBG_REG_WCR, i);
- decode_ctrl_reg(ctrl_reg, &ctrl);
- if (!((1 << (addr & alignment_mask)) & ctrl.len))
- goto unlock;
+ continue;
/*
* Check that the access type matches.
@@ -713,18 +767,41 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr,
access = (esr & AARCH64_ESR_ACCESS_MASK) ? HW_BREAKPOINT_W :
HW_BREAKPOINT_R;
if (!(access & hw_breakpoint_type(wp)))
- goto unlock;
+ continue;
+ /* Check if the watchpoint value and byte select match. */
+ val = read_wb_reg(AARCH64_DBG_REG_WVR, i);
+ ctrl_reg = read_wb_reg(AARCH64_DBG_REG_WCR, i);
+ decode_ctrl_reg(ctrl_reg, &ctrl);
+ dist = get_distance_from_watchpoint(addr, val, &ctrl);
+ if (dist < min_dist) {
+ min_dist = dist;
+ closest_match = i;
+ }
+ /* Is this an exact match? */
+ if (dist != 0)
+ continue;
+
+ info = counter_arch_bp(wp);
info->trigger = addr;
perf_bp_event(wp, regs);
/* Do we need to handle the stepping? */
if (!wp->overflow_handler)
step = 1;
+ }
+ if (min_dist > 0 && min_dist != -1) {
+ /* No exact match found. */
+ wp = slots[closest_match];
+ info = counter_arch_bp(wp);
+ info->trigger = addr;
+ perf_bp_event(wp, regs);
-unlock:
- rcu_read_unlock();
+ /* Do we need to handle the stepping? */
+ if (!wp->overflow_handler)
+ step = 1;
}
+ rcu_read_unlock();
if (!step)
return 0;
diff --git a/arch/arm64/kernel/io.c b/arch/arm64/kernel/io.c
index 354be2a872ae..79b17384effa 100644
--- a/arch/arm64/kernel/io.c
+++ b/arch/arm64/kernel/io.c
@@ -25,8 +25,7 @@
*/
void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count)
{
- while (count && (!IS_ALIGNED((unsigned long)from, 8) ||
- !IS_ALIGNED((unsigned long)to, 8))) {
+ while (count && !IS_ALIGNED((unsigned long)from, 8)) {
*(u8 *)to = __raw_readb(from);
from++;
to++;
@@ -54,23 +53,22 @@ EXPORT_SYMBOL(__memcpy_fromio);
*/
void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count)
{
- while (count && (!IS_ALIGNED((unsigned long)to, 8) ||
- !IS_ALIGNED((unsigned long)from, 8))) {
- __raw_writeb(*(volatile u8 *)from, to);
+ while (count && !IS_ALIGNED((unsigned long)to, 8)) {
+ __raw_writeb(*(u8 *)from, to);
from++;
to++;
count--;
}
while (count >= 8) {
- __raw_writeq(*(volatile u64 *)from, to);
+ __raw_writeq(*(u64 *)from, to);
from += 8;
to += 8;
count -= 8;
}
while (count) {
- __raw_writeb(*(volatile u8 *)from, to);
+ __raw_writeb(*(u8 *)from, to);
from++;
to++;
count--;
diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c
index 1ce90d8450ae..d05dbe658409 100644
--- a/arch/arm64/kernel/module-plts.c
+++ b/arch/arm64/kernel/module-plts.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2014-2016 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2014-2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -26,35 +26,21 @@ struct plt_entry {
__le32 br; /* br x16 */
};
-u64 module_emit_plt_entry(struct module *mod, const Elf64_Rela *rela,
+static bool in_init(const struct module *mod, void *loc)
+{
+ return (u64)loc - (u64)mod->init_layout.base < mod->init_layout.size;
+}
+
+u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela,
Elf64_Sym *sym)
{
- struct plt_entry *plt = (struct plt_entry *)mod->arch.plt->sh_addr;
- int i = mod->arch.plt_num_entries;
+ struct mod_plt_sec *pltsec = !in_init(mod, loc) ? &mod->arch.core :
+ &mod->arch.init;
+ struct plt_entry *plt = (struct plt_entry *)pltsec->plt->sh_addr;
+ int i = pltsec->plt_num_entries;
u64 val = sym->st_value + rela->r_addend;
/*
- * We only emit PLT entries against undefined (SHN_UNDEF) symbols,
- * which are listed in the ELF symtab section, but without a type
- * or a size.
- * So, similar to how the module loader uses the Elf64_Sym::st_value
- * field to store the resolved addresses of undefined symbols, let's
- * borrow the Elf64_Sym::st_size field (whose value is never used by
- * the module loader, even for symbols that are defined) to record
- * the address of a symbol's associated PLT entry as we emit it for a
- * zero addend relocation (which is the only kind we have to deal with
- * in practice). This allows us to find duplicates without having to
- * go through the table every time.
- */
- if (rela->r_addend == 0 && sym->st_size != 0) {
- BUG_ON(sym->st_size < (u64)plt || sym->st_size >= (u64)&plt[i]);
- return sym->st_size;
- }
-
- mod->arch.plt_num_entries++;
- BUG_ON(mod->arch.plt_num_entries > mod->arch.plt_max_entries);
-
- /*
* MOVK/MOVN/MOVZ opcode:
* +--------+------------+--------+-----------+-------------+---------+
* | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] |
@@ -72,8 +58,19 @@ u64 module_emit_plt_entry(struct module *mod, const Elf64_Rela *rela,
cpu_to_le32(0xd61f0200)
};
- if (rela->r_addend == 0)
- sym->st_size = (u64)&plt[i];
+ /*
+ * Check if the entry we just created is a duplicate. Given that the
+ * relocations are sorted, this will be the last entry we allocated.
+ * (if one exists).
+ */
+ if (i > 0 &&
+ plt[i].mov0 == plt[i - 1].mov0 &&
+ plt[i].mov1 == plt[i - 1].mov1 &&
+ plt[i].mov2 == plt[i - 1].mov2)
+ return (u64)&plt[i - 1];
+
+ pltsec->plt_num_entries++;
+ BUG_ON(pltsec->plt_num_entries > pltsec->plt_max_entries);
return (u64)&plt[i];
}
@@ -104,7 +101,8 @@ static bool duplicate_rel(const Elf64_Rela *rela, int num)
return num > 0 && cmp_rela(rela + num, rela + num - 1) == 0;
}
-static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num)
+static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num,
+ Elf64_Word dstidx)
{
unsigned int ret = 0;
Elf64_Sym *s;
@@ -116,13 +114,17 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num)
case R_AARCH64_CALL26:
/*
* We only have to consider branch targets that resolve
- * to undefined symbols. This is not simply a heuristic,
- * it is a fundamental limitation, since the PLT itself
- * is part of the module, and needs to be within 128 MB
- * as well, so modules can never grow beyond that limit.
+ * to symbols that are defined in a different section.
+ * This is not simply a heuristic, it is a fundamental
+ * limitation, since there is no guaranteed way to emit
+ * PLT entries sufficiently close to the branch if the
+ * section size exceeds the range of a branch
+ * instruction. So ignore relocations against defined
+ * symbols if they live in the same section as the
+ * relocation target.
*/
s = syms + ELF64_R_SYM(rela[i].r_info);
- if (s->st_shndx != SHN_UNDEF)
+ if (s->st_shndx == dstidx)
break;
/*
@@ -149,7 +151,8 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num)
int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
char *secstrings, struct module *mod)
{
- unsigned long plt_max_entries = 0;
+ unsigned long core_plts = 0;
+ unsigned long init_plts = 0;
Elf64_Sym *syms = NULL;
int i;
@@ -158,14 +161,16 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
* entries. Record the symtab address as well.
*/
for (i = 0; i < ehdr->e_shnum; i++) {
- if (strcmp(".plt", secstrings + sechdrs[i].sh_name) == 0)
- mod->arch.plt = sechdrs + i;
+ if (!strcmp(secstrings + sechdrs[i].sh_name, ".plt"))
+ mod->arch.core.plt = sechdrs + i;
+ else if (!strcmp(secstrings + sechdrs[i].sh_name, ".init.plt"))
+ mod->arch.init.plt = sechdrs + i;
else if (sechdrs[i].sh_type == SHT_SYMTAB)
syms = (Elf64_Sym *)sechdrs[i].sh_addr;
}
- if (!mod->arch.plt) {
- pr_err("%s: module PLT section missing\n", mod->name);
+ if (!mod->arch.core.plt || !mod->arch.init.plt) {
+ pr_err("%s: module PLT section(s) missing\n", mod->name);
return -ENOEXEC;
}
if (!syms) {
@@ -188,14 +193,27 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
/* sort by type, symbol index and addend */
sort(rels, numrels, sizeof(Elf64_Rela), cmp_rela, NULL);
- plt_max_entries += count_plts(syms, rels, numrels);
+ if (strncmp(secstrings + dstsec->sh_name, ".init", 5) != 0)
+ core_plts += count_plts(syms, rels, numrels,
+ sechdrs[i].sh_info);
+ else
+ init_plts += count_plts(syms, rels, numrels,
+ sechdrs[i].sh_info);
}
- mod->arch.plt->sh_type = SHT_NOBITS;
- mod->arch.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
- mod->arch.plt->sh_addralign = L1_CACHE_BYTES;
- mod->arch.plt->sh_size = plt_max_entries * sizeof(struct plt_entry);
- mod->arch.plt_num_entries = 0;
- mod->arch.plt_max_entries = plt_max_entries;
+ mod->arch.core.plt->sh_type = SHT_NOBITS;
+ mod->arch.core.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+ mod->arch.core.plt->sh_addralign = L1_CACHE_BYTES;
+ mod->arch.core.plt->sh_size = (core_plts + 1) * sizeof(struct plt_entry);
+ mod->arch.core.plt_num_entries = 0;
+ mod->arch.core.plt_max_entries = core_plts;
+
+ mod->arch.init.plt->sh_type = SHT_NOBITS;
+ mod->arch.init.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+ mod->arch.init.plt->sh_addralign = L1_CACHE_BYTES;
+ mod->arch.init.plt->sh_size = (init_plts + 1) * sizeof(struct plt_entry);
+ mod->arch.init.plt_num_entries = 0;
+ mod->arch.init.plt_max_entries = init_plts;
+
return 0;
}
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 7f316982ce00..c9a2ab446dc6 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -380,7 +380,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) &&
ovf == -ERANGE) {
- val = module_emit_plt_entry(me, &rel[i], sym);
+ val = module_emit_plt_entry(me, loc, &rel[i], sym);
ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2,
26, AARCH64_INSN_IMM_26);
}
diff --git a/arch/arm64/kernel/module.lds b/arch/arm64/kernel/module.lds
index 8949f6c6f729..f7c9781a9d48 100644
--- a/arch/arm64/kernel/module.lds
+++ b/arch/arm64/kernel/module.lds
@@ -1,3 +1,4 @@
SECTIONS {
.plt (NOLOAD) : { BYTE(0) }
+ .init.plt (NOLOAD) : { BYTE(0) }
}
diff --git a/arch/arm64/kernel/probes/decode-insn.c b/arch/arm64/kernel/probes/decode-insn.c
index f7931d900bca..37e47a9d617e 100644
--- a/arch/arm64/kernel/probes/decode-insn.c
+++ b/arch/arm64/kernel/probes/decode-insn.c
@@ -157,10 +157,10 @@ arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi)
mod = __module_address((unsigned long)addr);
if (mod && within_module_init((unsigned long)addr, mod) &&
!within_module_init((unsigned long)scan_end, mod))
- scan_end = (kprobe_opcode_t *)mod->module_init;
+ scan_end = (kprobe_opcode_t *)mod->init_layout.base;
else if (mod && within_module_core((unsigned long)addr, mod) &&
!within_module_core((unsigned long)scan_end, mod))
- scan_end = (kprobe_opcode_t *)mod->module_core;
+ scan_end = (kprobe_opcode_t *)mod->core_layout.base;
preempt_enable();
}
#endif
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index 1ee93c7c5a75..17a6449c96a2 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -31,7 +31,7 @@
#include <asm/insn.h>
#include <asm/uaccess.h>
#include <asm/irq.h>
-#include <asm-generic/sections.h>
+#include <asm/sections.h>
#include "decode-insn.h"
@@ -543,8 +543,6 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
bool arch_within_kprobe_blacklist(unsigned long addr)
{
- extern char __idmap_text_start[], __idmap_text_end[];
-
if ((addr >= (unsigned long)__kprobes_text_start &&
addr < (unsigned long)__kprobes_text_end) ||
(addr >= (unsigned long)__entry_text_start &&
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 80624829db61..e34bcf3f2c35 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -45,10 +45,14 @@
#include <linux/personality.h>
#include <linux/notifier.h>
#include <trace/events/power.h>
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+#include <linux/percpu.h>
+#endif
#include <asm/alternative.h>
#include <asm/compat.h>
#include <asm/cacheflush.h>
+#include <asm/exec.h>
#include <asm/fpsimd.h>
#include <asm/mmu_context.h>
#include <asm/processor.h>
@@ -165,6 +169,70 @@ void machine_restart(char *cmd)
while (1);
}
+/*
+ * dump a block of kernel memory from around the given address
+ */
+static void show_data(unsigned long addr, int nbytes, const char *name)
+{
+ int i, j;
+ int nlines;
+ u32 *p;
+
+ /*
+ * don't attempt to dump non-kernel addresses or
+ * values that are probably just small negative numbers
+ */
+ if (addr < PAGE_OFFSET || addr > -256UL)
+ return;
+
+ printk("\n%s: %#lx:\n", name, addr);
+
+ /*
+ * round address down to a 32 bit boundary
+ * and always dump a multiple of 32 bytes
+ */
+ p = (u32 *)(addr & ~(sizeof(u32) - 1));
+ nbytes += (addr & (sizeof(u32) - 1));
+ nlines = (nbytes + 31) / 32;
+
+
+ for (i = 0; i < nlines; i++) {
+ /*
+ * just display low 16 bits of address to keep
+ * each line of the dump < 80 characters
+ */
+ printk("%04lx ", (unsigned long)p & 0xffff);
+ for (j = 0; j < 8; j++) {
+ u32 data;
+ if (probe_kernel_address(p, data)) {
+ printk(" ********");
+ } else {
+ printk(" %08x", data);
+ }
+ ++p;
+ }
+ printk("\n");
+ }
+}
+
+static void show_extra_register_data(struct pt_regs *regs, int nbytes)
+{
+ mm_segment_t fs;
+ unsigned int i;
+
+ fs = get_fs();
+ set_fs(KERNEL_DS);
+ show_data(regs->pc - nbytes, nbytes * 2, "PC");
+ show_data(regs->regs[30] - nbytes, nbytes * 2, "LR");
+ show_data(regs->sp - nbytes, nbytes * 2, "SP");
+ for (i = 0; i < 30; i++) {
+ char name[4];
+ snprintf(name, sizeof(name), "X%u", i);
+ show_data(regs->regs[i] - nbytes, nbytes * 2, name);
+ }
+ set_fs(fs);
+}
+
void __show_regs(struct pt_regs *regs)
{
int i, top_reg;
@@ -191,6 +259,8 @@ void __show_regs(struct pt_regs *regs)
if (i % 2 == 0)
printk("\n");
}
+ if (!user_mode(regs))
+ show_extra_register_data(regs, 128);
printk("\n");
}
@@ -313,7 +383,7 @@ static void tls_thread_switch(struct task_struct *next)
}
/* Restore the UAO state depending on next's addr_limit */
-static void uao_thread_switch(struct task_struct *next)
+void uao_thread_switch(struct task_struct *next)
{
if (IS_ENABLED(CONFIG_ARM64_UAO)) {
if (task_thread_info(next)->addr_limit == KERNEL_DS)
@@ -323,6 +393,22 @@ static void uao_thread_switch(struct task_struct *next)
}
}
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+/*
+ * We store our current task in sp_el0, which is clobbered by userspace. Keep a
+ * shadow copy so that we can restore this upon entry from userspace.
+ *
+ * This is *only* for exception entry from EL0, and is not valid until we
+ * __switch_to() a user task.
+ */
+DEFINE_PER_CPU(struct task_struct *, __entry_task);
+
+static void entry_task_switch(struct task_struct *next)
+{
+ __this_cpu_write(__entry_task, next);
+}
+#endif
+
/*
* Thread switching.
*/
@@ -335,6 +421,9 @@ struct task_struct *__switch_to(struct task_struct *prev,
tls_thread_switch(next);
hw_breakpoint_thread_switch(next);
contextidr_thread_switch(next);
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+ entry_task_switch(next);
+#endif
uao_thread_switch(next);
/*
@@ -352,27 +441,35 @@ struct task_struct *__switch_to(struct task_struct *prev,
unsigned long get_wchan(struct task_struct *p)
{
struct stackframe frame;
- unsigned long stack_page;
+ unsigned long stack_page, ret = 0;
int count = 0;
if (!p || p == current || p->state == TASK_RUNNING)
return 0;
+ stack_page = (unsigned long)try_get_task_stack(p);
+ if (!stack_page)
+ return 0;
+
frame.fp = thread_saved_fp(p);
frame.sp = thread_saved_sp(p);
frame.pc = thread_saved_pc(p);
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
frame.graph = p->curr_ret_stack;
#endif
- stack_page = (unsigned long)task_stack_page(p);
do {
if (frame.sp < stack_page ||
frame.sp >= stack_page + THREAD_SIZE ||
unwind_frame(p, &frame))
- return 0;
- if (!in_sched_functions(frame.pc))
- return frame.pc;
+ goto out;
+ if (!in_sched_functions(frame.pc)) {
+ ret = frame.pc;
+ goto out;
+ }
} while (count ++ < 16);
- return 0;
+
+out:
+ put_task_stack(p);
+ return ret;
}
unsigned long arch_align_stack(unsigned long sp)
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index c5ef05959813..6204b7600d1b 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -327,13 +327,13 @@ static int ptrace_hbp_fill_attr_ctrl(unsigned int note_type,
struct arch_hw_breakpoint_ctrl ctrl,
struct perf_event_attr *attr)
{
- int err, len, type, disabled = !ctrl.enabled;
+ int err, len, type, offset, disabled = !ctrl.enabled;
attr->disabled = disabled;
if (disabled)
return 0;
- err = arch_bp_generic_fields(ctrl, &len, &type);
+ err = arch_bp_generic_fields(ctrl, &len, &type, &offset);
if (err)
return err;
@@ -352,6 +352,7 @@ static int ptrace_hbp_fill_attr_ctrl(unsigned int note_type,
attr->bp_len = len;
attr->bp_type = type;
+ attr->bp_addr += offset;
return 0;
}
@@ -404,7 +405,7 @@ static int ptrace_hbp_get_addr(unsigned int note_type,
if (IS_ERR(bp))
return PTR_ERR(bp);
- *addr = bp ? bp->attr.bp_addr : 0;
+ *addr = bp ? counter_arch_bp(bp)->address : 0;
return 0;
}
diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c
index 1718706fde83..12a87f2600f2 100644
--- a/arch/arm64/kernel/return_address.c
+++ b/arch/arm64/kernel/return_address.c
@@ -12,6 +12,7 @@
#include <linux/export.h>
#include <linux/ftrace.h>
+#include <asm/stack_pointer.h>
#include <asm/stacktrace.h>
struct return_address_data {
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 566f3b9b1588..f1fae3cbc4ca 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -200,7 +200,7 @@ static void __init request_standard_resources(void)
struct resource *res;
kernel_code.start = virt_to_phys(_text);
- kernel_code.end = virt_to_phys(_etext - 1);
+ kernel_code.end = virt_to_phys(__init_begin - 1);
kernel_data.start = virt_to_phys(_sdata);
kernel_data.end = virt_to_phys(_end - 1);
@@ -351,6 +351,19 @@ void __init setup_arch(char **cmdline_p)
smp_init_cpus();
smp_build_mpidr_hash();
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+ /*
+ * Make sure thread_info.ttbr0 always generates translation
+ * faults in case uaccess_enable() is inadvertently called by the init
+ * thread.
+ */
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+ init_task.thread_info.ttbr0 = virt_to_phys(empty_zero_page);
+#else
+ init_thread_info.ttbr0 = virt_to_phys(empty_zero_page);
+#endif
+#endif
+
#ifdef CONFIG_VT
#if defined(CONFIG_VGA_CONSOLE)
conswitchp = &vga_con;
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index c2bf5a58039f..9696c5239a3a 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -97,6 +97,7 @@ ENTRY(__cpu_suspend_enter)
ENDPROC(__cpu_suspend_enter)
.ltorg
+ .pushsection ".idmap.text", "ax"
ENTRY(cpu_resume)
bl el2_setup // if in EL2 drop to EL1 cleanly
/* enable the MMU early - so we can access sleep_save_stash by va */
@@ -124,9 +125,6 @@ ENTRY(_cpu_resume)
/* load sp from context */
ldr x2, [x0, #CPU_CTX_SP]
mov sp, x2
- /* save thread_info */
- and x2, x2, #~(THREAD_SIZE - 1)
- msr sp_el0, x2
/*
* cpu_do_resume expects x0 to contain context address pointer
*/
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index a1d06fc42048..fb3a21e17bff 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -58,6 +58,9 @@
#define CREATE_TRACE_POINTS
#include <trace/events/ipi.h>
+DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number);
+EXPORT_PER_CPU_SYMBOL(cpu_number);
+
/*
* as from 2.5, kernels no longer have an init_tasks structure
* so we need some other way of telling a new secondary core
@@ -110,6 +113,9 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
* We need to tell the secondary core where to find its stack and the
* page tables.
*/
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+ secondary_data.task = idle;
+#endif
secondary_data.stack = task_stack_page(idle) + THREAD_START_SP;
update_cpu_boot_status(CPU_MMU_OFF);
__flush_dcache_area(&secondary_data, sizeof(secondary_data));
@@ -134,6 +140,9 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
pr_err("CPU%u: failed to boot: %d\n", cpu, ret);
}
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+ secondary_data.task = NULL;
+#endif
secondary_data.stack = NULL;
status = READ_ONCE(secondary_data.status);
if (ret && status) {
@@ -177,7 +186,10 @@ static void smp_store_cpu_info(unsigned int cpuid)
asmlinkage void secondary_start_kernel(void)
{
struct mm_struct *mm = &init_mm;
- unsigned int cpu = smp_processor_id();
+ unsigned int cpu;
+
+ cpu = task_cpu(current);
+ set_my_cpu_offset(per_cpu_offset(cpu));
/*
* All kernel threads share the same mm context; grab a
@@ -186,8 +198,6 @@ asmlinkage void secondary_start_kernel(void)
atomic_inc(&mm->mm_count);
current->active_mm = mm;
- set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
-
/*
* TTBR0 is only used for the identity mapping at this stage. Make it
* point to zero page to avoid speculatively fetching new entries.
@@ -676,6 +686,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
if (max_cpus == 0)
break;
+ per_cpu(cpu_number, cpu) = cpu;
+
if (cpu == smp_processor_id())
continue;
diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c
index aef3605a8c47..4b4b0ad6ebde 100644
--- a/arch/arm64/kernel/smp_spin_table.c
+++ b/arch/arm64/kernel/smp_spin_table.c
@@ -29,7 +29,8 @@
#include <asm/smp_plat.h>
extern void secondary_holding_pen(void);
-volatile unsigned long secondary_holding_pen_release = INVALID_HWID;
+volatile unsigned long __section(".mmuoff.data.read")
+secondary_holding_pen_release = INVALID_HWID;
static phys_addr_t cpu_release_addr[NR_CPUS];
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index a99eff9afc1f..95b915d17afc 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -22,6 +22,7 @@
#include <linux/stacktrace.h>
#include <asm/irq.h>
+#include <asm/stack_pointer.h>
#include <asm/stacktrace.h>
/*
@@ -128,7 +129,6 @@ void notrace walk_stackframe(struct task_struct *tsk, struct stackframe *frame,
break;
}
}
-EXPORT_SYMBOL(walk_stackframe);
#ifdef CONFIG_STACKTRACE
struct stack_trace_data {
@@ -160,6 +160,9 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
struct stack_trace_data data;
struct stackframe frame;
+ if (!try_get_task_stack(tsk))
+ return;
+
data.trace = trace;
data.skip = trace->skip;
@@ -181,6 +184,8 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
walk_stackframe(tsk, &frame, save_trace, &data);
if (trace->nr_entries < trace->max_entries)
trace->entries[trace->nr_entries++] = ULONG_MAX;
+
+ put_task_stack(tsk);
}
void save_stack_trace(struct stack_trace *trace)
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index 5a0b1088c17c..468b939f3471 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -5,6 +5,7 @@
#include <asm/cacheflush.h>
#include <asm/cpufeature.h>
#include <asm/debug-monitors.h>
+#include <asm/exec.h>
#include <asm/pgtable.h>
#include <asm/memory.h>
#include <asm/mmu_context.h>
@@ -44,12 +45,6 @@ void notrace __cpu_suspend_exit(void)
cpu_uninstall_idmap();
/*
- * Restore per-cpu offset before any kernel
- * subsystem relying on it has a chance to run.
- */
- set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
-
- /*
* Restore HW breakpoint registers to sane values
* before debug exceptions are possibly reenabled
* through local_dbg_restore.
@@ -95,6 +90,7 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
*/
asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN,
CONFIG_ARM64_PAN));
+ uao_thread_switch(current);
/*
* Restore HW breakpoint registers to sane values
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 694f6deedbab..7758f7ff131b 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -19,10 +19,30 @@
#include <linux/nodemask.h>
#include <linux/of.h>
#include <linux/sched.h>
+#include <linux/sched.h>
+#include <linux/sched_energy.h>
#include <asm/cputype.h>
#include <asm/topology.h>
+static DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
+
+unsigned long scale_cpu_capacity(struct sched_domain *sd, int cpu)
+{
+#ifdef CONFIG_CPU_FREQ
+ unsigned long max_freq_scale = cpufreq_scale_max_freq_capacity(cpu);
+
+ return per_cpu(cpu_scale, cpu) * max_freq_scale >> SCHED_CAPACITY_SHIFT;
+#else
+ return per_cpu(cpu_scale, cpu);
+#endif
+}
+
+static void set_capacity_scale(unsigned int cpu, unsigned long capacity)
+{
+ per_cpu(cpu_scale, cpu) = capacity;
+}
+
static int __init get_cpu_for_node(struct device_node *node)
{
struct device_node *cpu_node;
@@ -206,11 +226,72 @@ out:
struct cpu_topology cpu_topology[NR_CPUS];
EXPORT_SYMBOL_GPL(cpu_topology);
+/* sd energy functions */
+static inline
+const struct sched_group_energy * const cpu_cluster_energy(int cpu)
+{
+ struct sched_group_energy *sge = sge_array[cpu][SD_LEVEL1];
+
+ if (!sge) {
+ pr_warn("Invalid sched_group_energy for Cluster%d\n", cpu);
+ return NULL;
+ }
+
+ return sge;
+}
+
+static inline
+const struct sched_group_energy * const cpu_core_energy(int cpu)
+{
+ struct sched_group_energy *sge = sge_array[cpu][SD_LEVEL0];
+
+ if (!sge) {
+ pr_warn("Invalid sched_group_energy for CPU%d\n", cpu);
+ return NULL;
+ }
+
+ return sge;
+}
+
const struct cpumask *cpu_coregroup_mask(int cpu)
{
return &cpu_topology[cpu].core_sibling;
}
+static int cpu_cpu_flags(void)
+{
+ return SD_ASYM_CPUCAPACITY;
+}
+
+static inline int cpu_corepower_flags(void)
+{
+ return SD_SHARE_PKG_RESOURCES | SD_SHARE_POWERDOMAIN | \
+ SD_SHARE_CAP_STATES;
+}
+
+static struct sched_domain_topology_level arm64_topology[] = {
+#ifdef CONFIG_SCHED_MC
+ { cpu_coregroup_mask, cpu_corepower_flags, cpu_core_energy, SD_INIT_NAME(MC) },
+#endif
+ { cpu_cpu_mask, cpu_cpu_flags, cpu_cluster_energy, SD_INIT_NAME(DIE) },
+ { NULL, },
+};
+
+static void update_cpu_capacity(unsigned int cpu)
+{
+ unsigned long capacity = SCHED_CAPACITY_SCALE;
+
+ if (cpu_core_energy(cpu)) {
+ int max_cap_idx = cpu_core_energy(cpu)->nr_cap_states - 1;
+ capacity = cpu_core_energy(cpu)->cap_states[max_cap_idx].cap;
+ }
+
+ set_capacity_scale(cpu, capacity);
+
+ pr_info("CPU%d: update cpu_capacity %lu\n",
+ cpu, arch_scale_cpu_capacity(NULL, cpu));
+}
+
static void update_siblings_masks(unsigned int cpuid)
{
struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
@@ -272,6 +353,7 @@ void store_cpu_topology(unsigned int cpuid)
topology_populated:
update_siblings_masks(cpuid);
+ update_cpu_capacity(cpuid);
}
static void __init reset_cpu_topology(void)
@@ -302,4 +384,8 @@ void __init init_cpu_topology(void)
*/
if (of_have_populated_dt() && parse_dt_topology())
reset_cpu_topology();
+ else
+ set_sched_topology(arm64_topology);
+
+ init_sched_energy_costs();
}
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index b62ec1e8a843..6543cd7bcb93 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -38,6 +38,7 @@
#include <asm/esr.h>
#include <asm/insn.h>
#include <asm/traps.h>
+#include <asm/stack_pointer.h>
#include <asm/stacktrace.h>
#include <asm/exception.h>
#include <asm/system_misc.h>
@@ -154,6 +155,14 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
if (!tsk)
tsk = current;
+ pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
+
+ if (!tsk)
+ tsk = current;
+
+ if (!try_get_task_stack(tsk))
+ return;
+
/*
* Switching between stacks is valid when tracing current and in
* non-preemptible context.
@@ -219,6 +228,8 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
stack + sizeof(struct pt_regs), false);
}
}
+
+ put_task_stack(tsk);
}
void show_stack(struct task_struct *tsk, unsigned long *sp)
@@ -234,10 +245,9 @@ void show_stack(struct task_struct *tsk, unsigned long *sp)
#endif
#define S_SMP " SMP"
-static int __die(const char *str, int err, struct thread_info *thread,
- struct pt_regs *regs)
+static int __die(const char *str, int err, struct pt_regs *regs)
{
- struct task_struct *tsk = thread->task;
+ struct task_struct *tsk = current;
static int die_counter;
int ret;
@@ -252,7 +262,8 @@ static int __die(const char *str, int err, struct thread_info *thread,
print_modules();
__show_regs(regs);
pr_emerg("Process %.*s (pid: %d, stack limit = 0x%p)\n",
- TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), thread + 1);
+ TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk),
+ end_of_stack(tsk));
if (!user_mode(regs) || in_interrupt()) {
dump_mem(KERN_EMERG, "Stack: ", regs->sp,
@@ -272,7 +283,6 @@ static DEFINE_RAW_SPINLOCK(die_lock);
*/
void die(const char *str, struct pt_regs *regs, int err)
{
- struct thread_info *thread = current_thread_info();
int ret;
oops_enter();
@@ -280,9 +290,9 @@ void die(const char *str, struct pt_regs *regs, int err)
raw_spin_lock_irq(&die_lock);
console_verbose();
bust_spinlocks(1);
- ret = __die(str, err, thread, regs);
+ ret = __die(str, err, regs);
- if (regs && kexec_should_crash(thread->task))
+ if (regs && kexec_should_crash(current))
crash_kexec(regs);
bust_spinlocks(0);
@@ -465,7 +475,7 @@ static const char *esr_class_str[] = {
const char *esr_get_class_string(u32 esr)
{
- return esr_class_str[esr >> ESR_ELx_EC_SHIFT];
+ return esr_class_str[ESR_ELx_EC(esr)];
}
/*
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 97bc68f4c689..3b8acfae7797 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -55,7 +55,7 @@ struct vdso_data *vdso_data = &vdso_data_store.data;
*/
static struct page *vectors_page[1];
-static int alloc_vectors_page(void)
+static int __init alloc_vectors_page(void)
{
extern char __kuser_helper_start[], __kuser_helper_end[];
extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[];
@@ -88,7 +88,7 @@ int aarch32_setup_vectors_page(struct linux_binprm *bprm, int uses_interp)
{
struct mm_struct *mm = current->mm;
unsigned long addr = AARCH32_VECTORS_BASE;
- static struct vm_special_mapping spec = {
+ static const struct vm_special_mapping spec = {
.name = "[vectors]",
.pages = vectors_page,
@@ -212,10 +212,16 @@ void update_vsyscall(struct timekeeper *tk)
vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec;
if (!use_syscall) {
+ /* tkr_mono.cycle_last == tkr_raw.cycle_last */
vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last;
+ vdso_data->raw_time_sec = tk->raw_time.tv_sec;
+ vdso_data->raw_time_nsec = tk->raw_time.tv_nsec;
vdso_data->xtime_clock_sec = tk->xtime_sec;
vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
- vdso_data->cs_mult = tk->tkr_mono.mult;
+ /* tkr_raw.xtime_nsec == 0 */
+ vdso_data->cs_mono_mult = tk->tkr_mono.mult;
+ vdso_data->cs_raw_mult = tk->tkr_raw.mult;
+ /* tkr_mono.shift == tkr_raw.shift */
vdso_data->cs_shift = tk->tkr_mono.shift;
}
diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index b467fd0a384b..62c84f7cb01b 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -23,7 +23,7 @@ GCOV_PROFILE := n
ccflags-y += -Wl,-shared
obj-y += vdso.o
-extra-y += vdso.lds vdso-offsets.h
+extra-y += vdso.lds
CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
# Force dependency (incbin is bad)
@@ -42,11 +42,10 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh
quiet_cmd_vdsosym = VDSOSYM $@
define cmd_vdsosym
- $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ && \
- cp $@ include/generated/
+ $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
endef
-$(obj)/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE
+include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE
$(call if_changed,vdsosym)
# Assembly rules for the .S files
diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S
index efa79e8d4196..e00b4671bd7c 100644
--- a/arch/arm64/kernel/vdso/gettimeofday.S
+++ b/arch/arm64/kernel/vdso/gettimeofday.S
@@ -26,24 +26,109 @@
#define NSEC_PER_SEC_HI16 0x3b9a
vdso_data .req x6
-use_syscall .req w7
-seqcnt .req w8
+seqcnt .req w7
+w_tmp .req w8
+x_tmp .req x8
+
+/*
+ * Conventions for macro arguments:
+ * - An argument is write-only if its name starts with "res".
+ * - All other arguments are read-only, unless otherwise specified.
+ */
.macro seqcnt_acquire
9999: ldr seqcnt, [vdso_data, #VDSO_TB_SEQ_COUNT]
tbnz seqcnt, #0, 9999b
dmb ishld
- ldr use_syscall, [vdso_data, #VDSO_USE_SYSCALL]
.endm
- .macro seqcnt_read, cnt
+ .macro seqcnt_check fail
dmb ishld
- ldr \cnt, [vdso_data, #VDSO_TB_SEQ_COUNT]
+ ldr w_tmp, [vdso_data, #VDSO_TB_SEQ_COUNT]
+ cmp w_tmp, seqcnt
+ b.ne \fail
.endm
- .macro seqcnt_check, cnt, fail
- cmp \cnt, seqcnt
- b.ne \fail
+ .macro syscall_check fail
+ ldr w_tmp, [vdso_data, #VDSO_USE_SYSCALL]
+ cbnz w_tmp, \fail
+ .endm
+
+ .macro get_nsec_per_sec res
+ mov \res, #NSEC_PER_SEC_LO16
+ movk \res, #NSEC_PER_SEC_HI16, lsl #16
+ .endm
+
+ /*
+ * Returns the clock delta, in nanoseconds left-shifted by the clock
+ * shift.
+ */
+ .macro get_clock_shifted_nsec res, cycle_last, mult
+ /* Read the virtual counter. */
+ isb
+ mrs x_tmp, cntvct_el0
+ /* Calculate cycle delta and convert to ns. */
+ sub \res, x_tmp, \cycle_last
+ /* We can only guarantee 56 bits of precision. */
+ movn x_tmp, #0xff00, lsl #48
+ and \res, x_tmp, \res
+ mul \res, \res, \mult
+ .endm
+
+ /*
+ * Returns in res_{sec,nsec} the REALTIME timespec, based on the
+ * "wall time" (xtime) and the clock_mono delta.
+ */
+ .macro get_ts_realtime res_sec, res_nsec, \
+ clock_nsec, xtime_sec, xtime_nsec, nsec_to_sec
+ add \res_nsec, \clock_nsec, \xtime_nsec
+ udiv x_tmp, \res_nsec, \nsec_to_sec
+ add \res_sec, \xtime_sec, x_tmp
+ msub \res_nsec, x_tmp, \nsec_to_sec, \res_nsec
+ .endm
+
+ /*
+ * Returns in res_{sec,nsec} the timespec based on the clock_raw delta,
+ * used for CLOCK_MONOTONIC_RAW.
+ */
+ .macro get_ts_clock_raw res_sec, res_nsec, clock_nsec, nsec_to_sec
+ udiv \res_sec, \clock_nsec, \nsec_to_sec
+ msub \res_nsec, \res_sec, \nsec_to_sec, \clock_nsec
+ .endm
+
+ /* sec and nsec are modified in place. */
+ .macro add_ts sec, nsec, ts_sec, ts_nsec, nsec_to_sec
+ /* Add timespec. */
+ add \sec, \sec, \ts_sec
+ add \nsec, \nsec, \ts_nsec
+
+ /* Normalise the new timespec. */
+ cmp \nsec, \nsec_to_sec
+ b.lt 9999f
+ sub \nsec, \nsec, \nsec_to_sec
+ add \sec, \sec, #1
+9999:
+ cmp \nsec, #0
+ b.ge 9998f
+ add \nsec, \nsec, \nsec_to_sec
+ sub \sec, \sec, #1
+9998:
+ .endm
+
+ .macro clock_gettime_return, shift=0
+ .if \shift == 1
+ lsr x11, x11, x12
+ .endif
+ stp x10, x11, [x1, #TSPEC_TV_SEC]
+ mov x0, xzr
+ ret
+ .endm
+
+ .macro jump_slot jumptable, index, label
+ .if (. - \jumptable) != 4 * (\index)
+ .error "Jump slot index mismatch"
+ .endif
+ b \label
.endm
.text
@@ -51,18 +136,25 @@ seqcnt .req w8
/* int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz); */
ENTRY(__kernel_gettimeofday)
.cfi_startproc
- mov x2, x30
- .cfi_register x30, x2
-
- /* Acquire the sequence counter and get the timespec. */
adr vdso_data, _vdso_data
-1: seqcnt_acquire
- cbnz use_syscall, 4f
-
/* If tv is NULL, skip to the timezone code. */
cbz x0, 2f
- bl __do_get_tspec
- seqcnt_check w9, 1b
+
+ /* Compute the time of day. */
+1: seqcnt_acquire
+ syscall_check fail=4f
+ ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
+ /* w11 = cs_mono_mult, w12 = cs_shift */
+ ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT]
+ ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
+ seqcnt_check fail=1b
+
+ get_nsec_per_sec res=x9
+ lsl x9, x9, x12
+
+ get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
+ get_ts_realtime res_sec=x10, res_nsec=x11, \
+ clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
/* Convert ns to us. */
mov x13, #1000
@@ -76,95 +168,126 @@ ENTRY(__kernel_gettimeofday)
stp w4, w5, [x1, #TZ_MINWEST]
3:
mov x0, xzr
- ret x2
+ ret
4:
/* Syscall fallback. */
mov x8, #__NR_gettimeofday
svc #0
- ret x2
+ ret
.cfi_endproc
ENDPROC(__kernel_gettimeofday)
+#define JUMPSLOT_MAX CLOCK_MONOTONIC_COARSE
+
/* int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp); */
ENTRY(__kernel_clock_gettime)
.cfi_startproc
- cmp w0, #CLOCK_REALTIME
- ccmp w0, #CLOCK_MONOTONIC, #0x4, ne
- b.ne 2f
+ cmp w0, #JUMPSLOT_MAX
+ b.hi syscall
+ adr vdso_data, _vdso_data
+ adr x_tmp, jumptable
+ add x_tmp, x_tmp, w0, uxtw #2
+ br x_tmp
+
+ ALIGN
+jumptable:
+ jump_slot jumptable, CLOCK_REALTIME, realtime
+ jump_slot jumptable, CLOCK_MONOTONIC, monotonic
+ b syscall
+ b syscall
+ jump_slot jumptable, CLOCK_MONOTONIC_RAW, monotonic_raw
+ jump_slot jumptable, CLOCK_REALTIME_COARSE, realtime_coarse
+ jump_slot jumptable, CLOCK_MONOTONIC_COARSE, monotonic_coarse
+
+ .if (. - jumptable) != 4 * (JUMPSLOT_MAX + 1)
+ .error "Wrong jumptable size"
+ .endif
+
+ ALIGN
+realtime:
+ seqcnt_acquire
+ syscall_check fail=syscall
+ ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
+ /* w11 = cs_mono_mult, w12 = cs_shift */
+ ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT]
+ ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
+ seqcnt_check fail=realtime
- mov x2, x30
- .cfi_register x30, x2
+ /* All computations are done with left-shifted nsecs. */
+ get_nsec_per_sec res=x9
+ lsl x9, x9, x12
- /* Get kernel timespec. */
- adr vdso_data, _vdso_data
-1: seqcnt_acquire
- cbnz use_syscall, 7f
+ get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
+ get_ts_realtime res_sec=x10, res_nsec=x11, \
+ clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
+ clock_gettime_return, shift=1
- bl __do_get_tspec
- seqcnt_check w9, 1b
+ ALIGN
+monotonic:
+ seqcnt_acquire
+ syscall_check fail=syscall
+ ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
+ /* w11 = cs_mono_mult, w12 = cs_shift */
+ ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT]
+ ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
+ ldp x3, x4, [vdso_data, #VDSO_WTM_CLK_SEC]
+ seqcnt_check fail=monotonic
- mov x30, x2
+ /* All computations are done with left-shifted nsecs. */
+ lsl x4, x4, x12
+ get_nsec_per_sec res=x9
+ lsl x9, x9, x12
- cmp w0, #CLOCK_MONOTONIC
- b.ne 6f
+ get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
+ get_ts_realtime res_sec=x10, res_nsec=x11, \
+ clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
- /* Get wtm timespec. */
- ldp x13, x14, [vdso_data, #VDSO_WTM_CLK_SEC]
+ add_ts sec=x10, nsec=x11, ts_sec=x3, ts_nsec=x4, nsec_to_sec=x9
+ clock_gettime_return, shift=1
- /* Check the sequence counter. */
- seqcnt_read w9
- seqcnt_check w9, 1b
- b 4f
-2:
- cmp w0, #CLOCK_REALTIME_COARSE
- ccmp w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne
- b.ne 8f
+ ALIGN
+monotonic_raw:
+ seqcnt_acquire
+ syscall_check fail=syscall
+ ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
+ /* w11 = cs_raw_mult, w12 = cs_shift */
+ ldp w12, w11, [vdso_data, #VDSO_CS_SHIFT]
+ ldp x13, x14, [vdso_data, #VDSO_RAW_TIME_SEC]
+ seqcnt_check fail=monotonic_raw
- /* xtime_coarse_nsec is already right-shifted */
- mov x12, #0
+ /* All computations are done with left-shifted nsecs. */
+ lsl x14, x14, x12
+ get_nsec_per_sec res=x9
+ lsl x9, x9, x12
- /* Get coarse timespec. */
- adr vdso_data, _vdso_data
-3: seqcnt_acquire
+ get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
+ get_ts_clock_raw res_sec=x10, res_nsec=x11, \
+ clock_nsec=x15, nsec_to_sec=x9
+
+ add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9
+ clock_gettime_return, shift=1
+
+ ALIGN
+realtime_coarse:
+ seqcnt_acquire
ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC]
+ seqcnt_check fail=realtime_coarse
+ clock_gettime_return
- /* Get wtm timespec. */
+ ALIGN
+monotonic_coarse:
+ seqcnt_acquire
+ ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC]
ldp x13, x14, [vdso_data, #VDSO_WTM_CLK_SEC]
+ seqcnt_check fail=monotonic_coarse
- /* Check the sequence counter. */
- seqcnt_read w9
- seqcnt_check w9, 3b
+ /* Computations are done in (non-shifted) nsecs. */
+ get_nsec_per_sec res=x9
+ add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9
+ clock_gettime_return
- cmp w0, #CLOCK_MONOTONIC_COARSE
- b.ne 6f
-4:
- /* Add on wtm timespec. */
- add x10, x10, x13
- lsl x14, x14, x12
- add x11, x11, x14
-
- /* Normalise the new timespec. */
- mov x15, #NSEC_PER_SEC_LO16
- movk x15, #NSEC_PER_SEC_HI16, lsl #16
- lsl x15, x15, x12
- cmp x11, x15
- b.lt 5f
- sub x11, x11, x15
- add x10, x10, #1
-5:
- cmp x11, #0
- b.ge 6f
- add x11, x11, x15
- sub x10, x10, #1
-
-6: /* Store to the user timespec. */
- lsr x11, x11, x12
- stp x10, x11, [x1, #TSPEC_TV_SEC]
- mov x0, xzr
- ret
-7:
- mov x30, x2
-8: /* Syscall fallback. */
+ ALIGN
+syscall: /* Syscall fallback. */
mov x8, #__NR_clock_gettime
svc #0
ret
@@ -176,6 +299,7 @@ ENTRY(__kernel_clock_getres)
.cfi_startproc
cmp w0, #CLOCK_REALTIME
ccmp w0, #CLOCK_MONOTONIC, #0x4, ne
+ ccmp w0, #CLOCK_MONOTONIC_RAW, #0x4, ne
b.ne 1f
ldr x2, 5f
@@ -203,46 +327,3 @@ ENTRY(__kernel_clock_getres)
.quad CLOCK_COARSE_RES
.cfi_endproc
ENDPROC(__kernel_clock_getres)
-
-/*
- * Read the current time from the architected counter.
- * Expects vdso_data to be initialised.
- * Clobbers the temporary registers (x9 - x15).
- * Returns:
- * - w9 = vDSO sequence counter
- * - (x10, x11) = (ts->tv_sec, shifted ts->tv_nsec)
- * - w12 = cs_shift
- */
-ENTRY(__do_get_tspec)
- .cfi_startproc
-
- /* Read from the vDSO data page. */
- ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
- ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
- ldp w11, w12, [vdso_data, #VDSO_CS_MULT]
- seqcnt_read w9
-
- /* Read the virtual counter. */
- isb
- mrs x15, cntvct_el0
-
- /* Calculate cycle delta and convert to ns. */
- sub x10, x15, x10
- /* We can only guarantee 56 bits of precision. */
- movn x15, #0xff00, lsl #48
- and x10, x15, x10
- mul x10, x10, x11
-
- /* Use the kernel time to calculate the new timespec. */
- mov x11, #NSEC_PER_SEC_LO16
- movk x11, #NSEC_PER_SEC_HI16, lsl #16
- lsl x11, x11, x12
- add x15, x10, x14
- udiv x14, x15, x11
- add x10, x13, x14
- mul x13, x14, x11
- sub x11, x15, x13
-
- ret
- .cfi_endproc
-ENDPROC(__do_get_tspec)
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 623532f44323..f2d34d4c0a6b 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -133,12 +133,13 @@ SECTIONS
}
. = ALIGN(SEGMENT_ALIGN);
- RO_DATA(PAGE_SIZE) /* everything from this point to */
- EXCEPTION_TABLE(8) /* _etext will be marked RO NX */
+ _etext = .; /* End of text section */
+
+ RO_DATA(PAGE_SIZE) /* everything from this point to */
+ EXCEPTION_TABLE(8) /* __init_begin will be marked RO NX */
NOTES
. = ALIGN(SEGMENT_ALIGN);
- _etext = .; /* End of text and rodata section */
__init_begin = .;
INIT_TEXT_SECTION(8)
@@ -182,6 +183,25 @@ SECTIONS
_data = .;
_sdata = .;
RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+
+ /*
+ * Data written with the MMU off but read with the MMU on requires
+ * cache lines to be invalidated, discarding up to a Cache Writeback
+ * Granule (CWG) of data from the cache. Keep the section that
+ * requires this type of maintenance to be in its own Cache Writeback
+ * Granule (CWG) area so the cache maintenance operations don't
+ * interfere with adjacent data.
+ */
+ .mmuoff.data.write : ALIGN(SZ_2K) {
+ __mmuoff_data_start = .;
+ *(.mmuoff.data.write)
+ }
+ . = ALIGN(SZ_2K);
+ .mmuoff.data.read : {
+ *(.mmuoff.data.read)
+ __mmuoff_data_end = .;
+ }
+
PECOFF_EDATA_PADDING
_edata = .;
@@ -193,6 +213,11 @@ SECTIONS
swapper_pg_dir = .;
. += SWAPPER_DIR_SIZE;
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+ reserved_ttbr0 = .;
+ . += RESERVED_TTBR0_SIZE;
+#endif
+
_end = .;
STABS_DEBUG