aboutsummaryrefslogtreecommitdiff
path: root/arch/arm64/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/kernel')
-rw-r--r--arch/arm64/kernel/efi-entry.S3
-rw-r--r--arch/arm64/kernel/efi.c400
-rw-r--r--arch/arm64/kernel/entry.S6
-rw-r--r--arch/arm64/kernel/head.S434
-rw-r--r--arch/arm64/kernel/insn.c47
-rw-r--r--arch/arm64/kernel/irq.c2
-rw-r--r--arch/arm64/kernel/jump_label.c23
-rw-r--r--arch/arm64/kernel/setup.c3
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S34
9 files changed, 466 insertions, 486 deletions
diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S
index d18a44940968..8ce9b0577442 100644
--- a/arch/arm64/kernel/efi-entry.S
+++ b/arch/arm64/kernel/efi-entry.S
@@ -61,7 +61,8 @@ ENTRY(efi_stub_entry)
*/
mov x20, x0 // DTB address
ldr x0, [sp, #16] // relocated _text address
- mov x21, x0
+ ldr x21, =stext_offset
+ add x21, x0, x21
/*
* Calculate size of the kernel Image (same for original and copy).
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index 1d85a7c5a850..a98415b5979c 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -11,24 +11,31 @@
*
*/
+#include <linux/atomic.h>
+#include <linux/dmi.h>
#include <linux/efi.h>
#include <linux/export.h>
#include <linux/memblock.h>
+#include <linux/mm_types.h>
#include <linux/bootmem.h>
#include <linux/of.h>
#include <linux/of_fdt.h>
+#include <linux/preempt.h>
+#include <linux/rbtree.h>
+#include <linux/rwsem.h>
#include <linux/sched.h>
#include <linux/slab.h>
+#include <linux/spinlock.h>
#include <asm/cacheflush.h>
#include <asm/efi.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
+#include <asm/mmu.h>
+#include <asm/pgtable.h>
struct efi_memory_map memmap;
-static efi_runtime_services_t *runtime;
-
static u64 efi_system_table;
static int uefi_debug __initdata;
@@ -47,30 +54,33 @@ static int __init is_normal_ram(efi_memory_desc_t *md)
return 0;
}
-static void __init efi_setup_idmap(void)
+/*
+ * Translate a EFI virtual address into a physical address: this is necessary,
+ * as some data members of the EFI system table are virtually remapped after
+ * SetVirtualAddressMap() has been called.
+ */
+static phys_addr_t efi_to_phys(unsigned long addr)
{
- struct memblock_region *r;
efi_memory_desc_t *md;
- u64 paddr, npages, size;
-
- for_each_memblock(memory, r)
- create_id_mapping(r->base, r->size, 0);
- /* map runtime io spaces */
for_each_efi_memory_desc(&memmap, md) {
- if (!(md->attribute & EFI_MEMORY_RUNTIME) || is_normal_ram(md))
+ if (!(md->attribute & EFI_MEMORY_RUNTIME))
continue;
- paddr = md->phys_addr;
- npages = md->num_pages;
- memrange_efi_to_native(&paddr, &npages);
- size = npages << PAGE_SHIFT;
- create_id_mapping(paddr, size, 1);
+ if (md->virt_addr == 0)
+ /* no virtual mapping has been installed by the stub */
+ break;
+ if (md->virt_addr <= addr &&
+ (addr - md->virt_addr) < (md->num_pages << EFI_PAGE_SHIFT))
+ return md->phys_addr + addr - md->virt_addr;
}
+ return addr;
}
static int __init uefi_init(void)
{
efi_char16_t *c16;
+ void *config_tables;
+ u64 table_size;
char vendor[100] = "unknown";
int i, retval;
@@ -98,7 +108,7 @@ static int __init uefi_init(void)
efi.systab->hdr.revision & 0xffff);
/* Show what we know for posterity */
- c16 = early_memremap(efi.systab->fw_vendor,
+ c16 = early_memremap(efi_to_phys(efi.systab->fw_vendor),
sizeof(vendor));
if (c16) {
for (i = 0; i < (int) sizeof(vendor) - 1 && *c16; ++i)
@@ -111,10 +121,14 @@ static int __init uefi_init(void)
efi.systab->hdr.revision >> 16,
efi.systab->hdr.revision & 0xffff, vendor);
- retval = efi_config_init(NULL);
- if (retval == 0)
- set_bit(EFI_CONFIG_TABLES, &efi.flags);
+ table_size = sizeof(efi_config_table_64_t) * efi.systab->nr_tables;
+ config_tables = early_memremap(efi_to_phys(efi.systab->tables),
+ table_size);
+
+ retval = efi_config_parse_tables(config_tables, efi.systab->nr_tables,
+ sizeof(efi_config_table_64_t), NULL);
+ early_memunmap(config_tables, table_size);
out:
early_memunmap(efi.systab, sizeof(efi_system_table_t));
return retval;
@@ -125,17 +139,17 @@ out:
*/
static __init int is_reserve_region(efi_memory_desc_t *md)
{
- if (!is_normal_ram(md))
+ switch (md->type) {
+ case EFI_LOADER_CODE:
+ case EFI_LOADER_DATA:
+ case EFI_BOOT_SERVICES_CODE:
+ case EFI_BOOT_SERVICES_DATA:
+ case EFI_CONVENTIONAL_MEMORY:
return 0;
-
- if (md->attribute & EFI_MEMORY_RUNTIME)
- return 1;
-
- if (md->type == EFI_ACPI_RECLAIM_MEMORY ||
- md->type == EFI_RESERVED_TYPE)
- return 1;
-
- return 0;
+ default:
+ break;
+ }
+ return is_normal_ram(md);
}
static __init void reserve_regions(void)
@@ -164,9 +178,7 @@ static __init void reserve_regions(void)
if (is_normal_ram(md))
early_init_dt_add_memory_arch(paddr, size);
- if (is_reserve_region(md) ||
- md->type == EFI_BOOT_SERVICES_CODE ||
- md->type == EFI_BOOT_SERVICES_DATA) {
+ if (is_reserve_region(md)) {
memblock_reserve(paddr, size);
if (uefi_debug)
pr_cont("*");
@@ -179,123 +191,6 @@ static __init void reserve_regions(void)
set_bit(EFI_MEMMAP, &efi.flags);
}
-
-static u64 __init free_one_region(u64 start, u64 end)
-{
- u64 size = end - start;
-
- if (uefi_debug)
- pr_info(" EFI freeing: 0x%012llx-0x%012llx\n", start, end - 1);
-
- free_bootmem_late(start, size);
- return size;
-}
-
-static u64 __init free_region(u64 start, u64 end)
-{
- u64 map_start, map_end, total = 0;
-
- if (end <= start)
- return total;
-
- map_start = (u64)memmap.phys_map;
- map_end = PAGE_ALIGN(map_start + (memmap.map_end - memmap.map));
- map_start &= PAGE_MASK;
-
- if (start < map_end && end > map_start) {
- /* region overlaps UEFI memmap */
- if (start < map_start)
- total += free_one_region(start, map_start);
-
- if (map_end < end)
- total += free_one_region(map_end, end);
- } else
- total += free_one_region(start, end);
-
- return total;
-}
-
-static void __init free_boot_services(void)
-{
- u64 total_freed = 0;
- u64 keep_end, free_start, free_end;
- efi_memory_desc_t *md;
-
- /*
- * If kernel uses larger pages than UEFI, we have to be careful
- * not to inadvertantly free memory we want to keep if there is
- * overlap at the kernel page size alignment. We do not want to
- * free is_reserve_region() memory nor the UEFI memmap itself.
- *
- * The memory map is sorted, so we keep track of the end of
- * any previous region we want to keep, remember any region
- * we want to free and defer freeing it until we encounter
- * the next region we want to keep. This way, before freeing
- * it, we can clip it as needed to avoid freeing memory we
- * want to keep for UEFI.
- */
-
- keep_end = 0;
- free_start = 0;
-
- for_each_efi_memory_desc(&memmap, md) {
- u64 paddr, npages, size;
-
- if (is_reserve_region(md)) {
- /*
- * We don't want to free any memory from this region.
- */
- if (free_start) {
- /* adjust free_end then free region */
- if (free_end > md->phys_addr)
- free_end -= PAGE_SIZE;
- total_freed += free_region(free_start, free_end);
- free_start = 0;
- }
- keep_end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
- continue;
- }
-
- if (md->type != EFI_BOOT_SERVICES_CODE &&
- md->type != EFI_BOOT_SERVICES_DATA) {
- /* no need to free this region */
- continue;
- }
-
- /*
- * We want to free memory from this region.
- */
- paddr = md->phys_addr;
- npages = md->num_pages;
- memrange_efi_to_native(&paddr, &npages);
- size = npages << PAGE_SHIFT;
-
- if (free_start) {
- if (paddr <= free_end)
- free_end = paddr + size;
- else {
- total_freed += free_region(free_start, free_end);
- free_start = paddr;
- free_end = paddr + size;
- }
- } else {
- free_start = paddr;
- free_end = paddr + size;
- }
- if (free_start < keep_end) {
- free_start += PAGE_SIZE;
- if (free_start >= free_end)
- free_start = 0;
- }
- }
- if (free_start)
- total_freed += free_region(free_start, free_end);
-
- if (total_freed)
- pr_info("Freed 0x%llx bytes of EFI boot services memory",
- total_freed);
-}
-
void __init efi_init(void)
{
struct efi_fdt_params params;
@@ -320,61 +215,14 @@ void __init efi_init(void)
reserve_regions();
}
-void __init efi_idmap_init(void)
-{
- if (!efi_enabled(EFI_BOOT))
- return;
-
- /* boot time idmap_pg_dir is incomplete, so fill in missing parts */
- efi_setup_idmap();
- early_memunmap(memmap.map, memmap.map_end - memmap.map);
-}
-
-static int __init remap_region(efi_memory_desc_t *md, void **new)
-{
- u64 paddr, vaddr, npages, size;
-
- paddr = md->phys_addr;
- npages = md->num_pages;
- memrange_efi_to_native(&paddr, &npages);
- size = npages << PAGE_SHIFT;
-
- if (is_normal_ram(md))
- vaddr = (__force u64)ioremap_cache(paddr, size);
- else
- vaddr = (__force u64)ioremap(paddr, size);
-
- if (!vaddr) {
- pr_err("Unable to remap 0x%llx pages @ %p\n",
- npages, (void *)paddr);
- return 0;
- }
-
- /* adjust for any rounding when EFI and system pagesize differs */
- md->virt_addr = vaddr + (md->phys_addr - paddr);
-
- if (uefi_debug)
- pr_info(" EFI remap 0x%012llx => %p\n",
- md->phys_addr, (void *)md->virt_addr);
-
- memcpy(*new, md, memmap.desc_size);
- *new += memmap.desc_size;
-
- return 1;
-}
-
/*
- * Switch UEFI from an identity map to a kernel virtual map
+ * Enable the UEFI Runtime Services if all prerequisites are in place, i.e.,
+ * non-early mapping of the UEFI system table and virtual mappings for all
+ * EFI_MEMORY_RUNTIME regions.
*/
-static int __init arm64_enter_virtual_mode(void)
+static int __init arm64_enable_runtime_services(void)
{
- efi_memory_desc_t *md;
- phys_addr_t virtmap_phys;
- void *virtmap, *virt_md;
- efi_status_t status;
u64 mapsize;
- int count = 0;
- unsigned long flags;
if (!efi_enabled(EFI_BOOT)) {
pr_info("EFI services will not be available.\n");
@@ -396,78 +244,116 @@ static int __init arm64_enter_virtual_mode(void)
efi.memmap = &memmap;
- /* Map the runtime regions */
- virtmap = kmalloc(mapsize, GFP_KERNEL);
- if (!virtmap) {
- pr_err("Failed to allocate EFI virtual memmap\n");
+ efi.systab = (__force void *)ioremap_cache(efi_system_table,
+ sizeof(efi_system_table_t));
+ if (!efi.systab) {
+ pr_err("Failed to remap EFI System Table\n");
return -1;
}
- virtmap_phys = virt_to_phys(virtmap);
- virt_md = virtmap;
+ set_bit(EFI_SYSTEM_TABLES, &efi.flags);
- for_each_efi_memory_desc(&memmap, md) {
- if (!(md->attribute & EFI_MEMORY_RUNTIME))
- continue;
- if (!remap_region(md, &virt_md))
- goto err_unmap;
- ++count;
+ if (!efi_enabled(EFI_VIRTMAP)) {
+ pr_err("No UEFI virtual mapping was installed -- runtime services will not be available\n");
+ return -1;
}
- efi.systab = (__force void *)efi_lookup_mapped_addr(efi_system_table);
- if (!efi.systab) {
- /*
- * If we have no virtual mapping for the System Table at this
- * point, the memory map doesn't cover the physical offset where
- * it resides. This means the System Table will be inaccessible
- * to Runtime Services themselves once the virtual mapping is
- * installed.
- */
- pr_err("Failed to remap EFI System Table -- buggy firmware?\n");
- goto err_unmap;
- }
- set_bit(EFI_SYSTEM_TABLES, &efi.flags);
+ /* Set up runtime services function pointers */
+ efi_native_runtime_setup();
+ set_bit(EFI_RUNTIME_SERVICES, &efi.flags);
- local_irq_save(flags);
- cpu_switch_mm(idmap_pg_dir, &init_mm);
+ efi.runtime_version = efi.systab->hdr.revision;
- /* Call SetVirtualAddressMap with the physical address of the map */
- runtime = efi.systab->runtime;
- efi.set_virtual_address_map = runtime->set_virtual_address_map;
+ return 0;
+}
+early_initcall(arm64_enable_runtime_services);
- status = efi.set_virtual_address_map(count * memmap.desc_size,
- memmap.desc_size,
- memmap.desc_version,
- (efi_memory_desc_t *)virtmap_phys);
- cpu_set_reserved_ttbr0();
+static int __init arm64_dmi_init(void)
+{
+ /*
+ * On arm64, DMI depends on UEFI, and dmi_scan_machine() needs to
+ * be called early because dmi_id_init(), which is an arch_initcall
+ * itself, depends on dmi_scan_machine() having been called already.
+ */
+ dmi_scan_machine();
+ if (dmi_available)
+ dmi_set_dump_stack_arch_desc();
+ return 0;
+}
+core_initcall(arm64_dmi_init);
+
+static pgd_t efi_pgd[PTRS_PER_PGD] __page_aligned_bss;
+
+static struct mm_struct efi_mm = {
+ .mm_rb = RB_ROOT,
+ .pgd = efi_pgd,
+ .mm_users = ATOMIC_INIT(2),
+ .mm_count = ATOMIC_INIT(1),
+ .mmap_sem = __RWSEM_INITIALIZER(efi_mm.mmap_sem),
+ .page_table_lock = __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock),
+ .mmlist = LIST_HEAD_INIT(efi_mm.mmlist),
+ INIT_MM_CONTEXT(efi_mm)
+};
+
+static void efi_set_pgd(struct mm_struct *mm)
+{
+ cpu_switch_mm(mm->pgd, mm);
flush_tlb_all();
- local_irq_restore(flags);
+ if (icache_is_aivivt())
+ __flush_icache_all();
+}
- kfree(virtmap);
+void efi_virtmap_load(void)
+{
+ preempt_disable();
+ efi_set_pgd(&efi_mm);
+}
- free_boot_services();
+void efi_virtmap_unload(void)
+{
+ efi_set_pgd(current->active_mm);
+ preempt_enable();
+}
- if (status != EFI_SUCCESS) {
- pr_err("Failed to set EFI virtual address map! [%lx]\n",
- status);
- return -1;
- }
+void __init efi_virtmap_init(void)
+{
+ efi_memory_desc_t *md;
- /* Set up runtime services function pointers */
- runtime = efi.systab->runtime;
- efi_native_runtime_setup();
- set_bit(EFI_RUNTIME_SERVICES, &efi.flags);
+ if (!efi_enabled(EFI_BOOT))
+ return;
- efi.runtime_version = efi.systab->hdr.revision;
+ for_each_efi_memory_desc(&memmap, md) {
+ u64 paddr, npages, size;
+ pgprot_t prot;
- return 0;
+ if (!(md->attribute & EFI_MEMORY_RUNTIME))
+ continue;
+ if (WARN(md->virt_addr == 0,
+ "UEFI virtual mapping incomplete or missing -- no entry found for 0x%llx\n",
+ md->phys_addr))
+ return;
-err_unmap:
- /* unmap all mappings that succeeded: there are 'count' of those */
- for (virt_md = virtmap; count--; virt_md += memmap.desc_size) {
- md = virt_md;
- iounmap((__force void __iomem *)md->virt_addr);
+ paddr = md->phys_addr;
+ npages = md->num_pages;
+ memrange_efi_to_native(&paddr, &npages);
+ size = npages << PAGE_SHIFT;
+
+ pr_info(" EFI remap 0x%016llx => %p\n",
+ md->phys_addr, (void *)md->virt_addr);
+
+ /*
+ * Only regions of type EFI_RUNTIME_SERVICES_CODE need to be
+ * executable, everything else can be mapped with the XN bits
+ * set.
+ */
+ if (!is_normal_ram(md))
+ prot = __pgprot(PROT_DEVICE_nGnRE);
+ else if (md->type == EFI_RUNTIME_SERVICES_CODE)
+ prot = PAGE_KERNEL_EXEC;
+ else
+ prot = PAGE_KERNEL;
+
+ create_pgd_mapping(&efi_mm, paddr, md->virt_addr, size, prot);
}
- kfree(virtmap);
- return -1;
+ set_bit(EFI_VIRTMAP, &efi.flags);
+ early_memunmap(memmap.map, memmap.map_end - memmap.map);
}
-early_initcall(arm64_enter_virtual_mode);
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index c19999ce2fb1..a4f58c8760cf 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -188,7 +188,8 @@ tsk .req x28 // current thread_info
* Interrupt handling.
*/
.macro irq_handler
- ldr x1, handle_arch_irq
+ adrp x1, handle_arch_irq
+ ldr x1, [x1, #:lo12:handle_arch_irq]
mov x0, sp
blr x1
.endm
@@ -729,6 +730,3 @@ ENTRY(sys_rt_sigreturn_wrapper)
mov x0, sp
b sys_rt_sigreturn
ENDPROC(sys_rt_sigreturn_wrapper)
-
-ENTRY(handle_arch_irq)
- .quad 0
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 0a6e4f924df8..8ce88e08c030 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -132,6 +132,8 @@ efi_head:
#endif
#ifdef CONFIG_EFI
+ .globl stext_offset
+ .set stext_offset, stext - efi_head
.align 3
pe_header:
.ascii "PE"
@@ -155,12 +157,12 @@ optional_header:
.long 0 // SizeOfInitializedData
.long 0 // SizeOfUninitializedData
.long efi_stub_entry - efi_head // AddressOfEntryPoint
- .long stext - efi_head // BaseOfCode
+ .long stext_offset // BaseOfCode
extra_header_fields:
.quad 0 // ImageBase
- .long 0x20 // SectionAlignment
- .long 0x8 // FileAlignment
+ .long 0x1000 // SectionAlignment
+ .long PECOFF_FILE_ALIGNMENT // FileAlignment
.short 0 // MajorOperatingSystemVersion
.short 0 // MinorOperatingSystemVersion
.short 0 // MajorImageVersion
@@ -172,7 +174,7 @@ extra_header_fields:
.long _end - efi_head // SizeOfImage
// Everything before the kernel image is considered part of the header
- .long stext - efi_head // SizeOfHeaders
+ .long stext_offset // SizeOfHeaders
.long 0 // CheckSum
.short 0xa // Subsystem (EFI application)
.short 0 // DllCharacteristics
@@ -217,16 +219,24 @@ section_table:
.byte 0
.byte 0 // end of 0 padding of section name
.long _end - stext // VirtualSize
- .long stext - efi_head // VirtualAddress
+ .long stext_offset // VirtualAddress
.long _edata - stext // SizeOfRawData
- .long stext - efi_head // PointerToRawData
+ .long stext_offset // PointerToRawData
.long 0 // PointerToRelocations (0 for executables)
.long 0 // PointerToLineNumbers (0 for executables)
.short 0 // NumberOfRelocations (0 for executables)
.short 0 // NumberOfLineNumbers (0 for executables)
.long 0xe0500020 // Characteristics (section flags)
- .align 5
+
+ /*
+ * EFI will load stext onwards at the 4k section alignment
+ * described in the PE/COFF header. To ensure that instruction
+ * sequences using an adrp and a :lo12: immediate will function
+ * correctly at this alignment, we must ensure that stext is
+ * placed at a 4k boundary in the Image to begin with.
+ */
+ .align 12
#endif
ENTRY(stext)
@@ -238,7 +248,13 @@ ENTRY(stext)
mov x0, x22
bl lookup_processor_type
mov x23, x0 // x23=current cpu_table
- cbz x23, __error_p // invalid processor (x23=0)?
+ /*
+ * __error_p may end up out of range for cbz if text areas are
+ * aligned up to section sizes.
+ */
+ cbnz x23, 1f // invalid processor (x23=0)?
+ b __error_p
+1:
bl __vet_fdt
bl __create_page_tables // x25=TTBR0, x26=TTBR1
/*
@@ -250,13 +266,214 @@ ENTRY(stext)
*/
ldr x27, __switch_data // address to jump to after
// MMU has been enabled
- adr lr, __enable_mmu // return (PIC) address
+ adrp lr, __enable_mmu // return (PIC) address
+ add lr, lr, #:lo12:__enable_mmu
ldr x12, [x23, #CPU_INFO_SETUP]
add x12, x12, x28 // __virt_to_phys
br x12 // initialise processor
ENDPROC(stext)
/*
+ * Determine validity of the x21 FDT pointer.
+ * The dtb must be 8-byte aligned and live in the first 512M of memory.
+ */
+__vet_fdt:
+ tst x21, #0x7
+ b.ne 1f
+ cmp x21, x24
+ b.lt 1f
+ mov x0, #(1 << 29)
+ add x0, x0, x24
+ cmp x21, x0
+ b.ge 1f
+ ret
+1:
+ mov x21, #0
+ ret
+ENDPROC(__vet_fdt)
+/*
+ * Macro to create a table entry to the next page.
+ *
+ * tbl: page table address
+ * virt: virtual address
+ * shift: #imm page table shift
+ * ptrs: #imm pointers per table page
+ *
+ * Preserves: virt
+ * Corrupts: tmp1, tmp2
+ * Returns: tbl -> next level table page address
+ */
+ .macro create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2
+ lsr \tmp1, \virt, #\shift
+ and \tmp1, \tmp1, #\ptrs - 1 // table index
+ add \tmp2, \tbl, #PAGE_SIZE
+ orr \tmp2, \tmp2, #PMD_TYPE_TABLE // address of next table and entry type
+ str \tmp2, [\tbl, \tmp1, lsl #3]
+ add \tbl, \tbl, #PAGE_SIZE // next level table page
+ .endm
+
+/*
+ * Macro to populate the PGD (and possibily PUD) for the corresponding
+ * block entry in the next level (tbl) for the given virtual address.
+ *
+ * Preserves: tbl, next, virt
+ * Corrupts: tmp1, tmp2
+ */
+ .macro create_pgd_entry, tbl, virt, tmp1, tmp2
+ create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2
+#if SWAPPER_PGTABLE_LEVELS == 3
+ create_table_entry \tbl, \virt, TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2
+#endif
+ .endm
+
+/*
+ * Macro to populate block entries in the page table for the start..end
+ * virtual range (inclusive).
+ *
+ * Preserves: tbl, flags
+ * Corrupts: phys, start, end, pstate
+ */
+ .macro create_block_map, tbl, flags, phys, start, end
+ lsr \phys, \phys, #BLOCK_SHIFT
+ lsr \start, \start, #BLOCK_SHIFT
+ and \start, \start, #PTRS_PER_PTE - 1 // table index
+ orr \phys, \flags, \phys, lsl #BLOCK_SHIFT // table entry
+ lsr \end, \end, #BLOCK_SHIFT
+ and \end, \end, #PTRS_PER_PTE - 1 // table end index
+9999: str \phys, [\tbl, \start, lsl #3] // store the entry
+ add \start, \start, #1 // next entry
+ add \phys, \phys, #BLOCK_SIZE // next block
+ cmp \start, \end
+ b.ls 9999b
+ .endm
+
+/*
+ * Setup the initial page tables. We only setup the barest amount which is
+ * required to get the kernel running. The following sections are required:
+ * - identity mapping to enable the MMU (low address, TTBR0)
+ * - first few MB of the kernel linear mapping to jump to once the MMU has
+ * been enabled, including the FDT blob (TTBR1)
+ * - pgd entry for fixed mappings (TTBR1)
+ */
+__create_page_tables:
+ pgtbl x25, x26, x28 // idmap_pg_dir and swapper_pg_dir addresses
+ mov x27, lr
+
+ /*
+ * Invalidate the idmap and swapper page tables to avoid potential
+ * dirty cache lines being evicted.
+ */
+ mov x0, x25
+ add x1, x26, #SWAPPER_DIR_SIZE
+ bl __inval_cache_range
+
+ /*
+ * Clear the idmap and swapper page tables.
+ */
+ mov x0, x25
+ add x6, x26, #SWAPPER_DIR_SIZE
+1: stp xzr, xzr, [x0], #16
+ stp xzr, xzr, [x0], #16
+ stp xzr, xzr, [x0], #16
+ stp xzr, xzr, [x0], #16
+ cmp x0, x6
+ b.lo 1b
+
+ ldr x7, =MM_MMUFLAGS
+
+ /*
+ * Create the identity mapping.
+ */
+ mov x0, x25 // idmap_pg_dir
+ ldr x3, =KERNEL_START
+ add x3, x3, x28 // __pa(KERNEL_START)
+ create_pgd_entry x0, x3, x5, x6
+ ldr x6, =KERNEL_END
+ mov x5, x3 // __pa(KERNEL_START)
+ add x6, x6, x28 // __pa(KERNEL_END)
+ create_block_map x0, x7, x3, x5, x6
+
+ /*
+ * Map the kernel image (starting with PHYS_OFFSET).
+ */
+ mov x0, x26 // swapper_pg_dir
+ mov x5, #PAGE_OFFSET
+ create_pgd_entry x0, x5, x3, x6
+ ldr x6, =KERNEL_END
+ mov x3, x24 // phys offset
+ create_block_map x0, x7, x3, x5, x6
+
+ /*
+ * Map the FDT blob (maximum 2MB; must be within 512MB of
+ * PHYS_OFFSET).
+ */
+ mov x3, x21 // FDT phys address
+ and x3, x3, #~((1 << 21) - 1) // 2MB aligned
+ mov x6, #PAGE_OFFSET
+ sub x5, x3, x24 // subtract PHYS_OFFSET
+ tst x5, #~((1 << 29) - 1) // within 512MB?
+ csel x21, xzr, x21, ne // zero the FDT pointer
+ b.ne 1f
+ add x5, x5, x6 // __va(FDT blob)
+ add x6, x5, #1 << 21 // 2MB for the FDT blob
+ sub x6, x6, #1 // inclusive range
+ create_block_map x0, x7, x3, x5, x6
+1:
+ /*
+ * Since the page tables have been populated with non-cacheable
+ * accesses (MMU disabled), invalidate the idmap and swapper page
+ * tables again to remove any speculatively loaded cache lines.
+ */
+ mov x0, x25
+ add x1, x26, #SWAPPER_DIR_SIZE
+ bl __inval_cache_range
+
+ mov lr, x27
+ ret
+ENDPROC(__create_page_tables)
+ .ltorg
+
+ .align 3
+ .type __switch_data, %object
+__switch_data:
+ .quad __mmap_switched
+ .quad __bss_start // x6
+ .quad __bss_stop // x7
+ .quad processor_id // x4
+ .quad __fdt_pointer // x5
+ .quad memstart_addr // x6
+ .quad init_thread_union + THREAD_START_SP // sp
+
+/*
+ * The following fragment of code is executed with the MMU on in MMU mode, and
+ * uses absolute addresses; this is not position independent.
+ */
+__mmap_switched:
+ adr x3, __switch_data + 8
+
+ ldp x6, x7, [x3], #16
+1: cmp x6, x7
+ b.hs 2f
+ str xzr, [x6], #8 // Clear BSS
+ b 1b
+2:
+ ldp x4, x5, [x3], #16
+ ldr x6, [x3], #8
+ ldr x16, [x3]
+ mov sp, x16
+ str x22, [x4] // Save processor ID
+ str x21, [x5] // Save FDT pointer
+ str x24, [x6] // Save PHYS_OFFSET
+ mov x29, #0
+ b start_kernel
+ENDPROC(__mmap_switched)
+
+/*
+ * end early head section, begin head code that is also used for
+ * hotplug and needs to have the same protections as the text region
+ */
+ .section ".text","ax"
+/*
* If we're fortunate enough to boot at EL2, ensure that the world is
* sane before dropping to EL1.
*
@@ -331,7 +548,8 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems
msr vttbr_el2, xzr
/* Hypervisor stub */
- adr x0, __hyp_stub_vectors
+ adrp x0, __hyp_stub_vectors
+ add x0, x0, #:lo12:__hyp_stub_vectors
msr vbar_el2, x0
/* spsr */
@@ -492,183 +710,6 @@ ENDPROC(__calc_phys_offset)
.quad PAGE_OFFSET
/*
- * Macro to create a table entry to the next page.
- *
- * tbl: page table address
- * virt: virtual address
- * shift: #imm page table shift
- * ptrs: #imm pointers per table page
- *
- * Preserves: virt
- * Corrupts: tmp1, tmp2
- * Returns: tbl -> next level table page address
- */
- .macro create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2
- lsr \tmp1, \virt, #\shift
- and \tmp1, \tmp1, #\ptrs - 1 // table index
- add \tmp2, \tbl, #PAGE_SIZE
- orr \tmp2, \tmp2, #PMD_TYPE_TABLE // address of next table and entry type
- str \tmp2, [\tbl, \tmp1, lsl #3]
- add \tbl, \tbl, #PAGE_SIZE // next level table page
- .endm
-
-/*
- * Macro to populate the PGD (and possibily PUD) for the corresponding
- * block entry in the next level (tbl) for the given virtual address.
- *
- * Preserves: tbl, next, virt
- * Corrupts: tmp1, tmp2
- */
- .macro create_pgd_entry, tbl, virt, tmp1, tmp2
- create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2
-#if SWAPPER_PGTABLE_LEVELS == 3
- create_table_entry \tbl, \virt, TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2
-#endif
- .endm
-
-/*
- * Macro to populate block entries in the page table for the start..end
- * virtual range (inclusive).
- *
- * Preserves: tbl, flags
- * Corrupts: phys, start, end, pstate
- */
- .macro create_block_map, tbl, flags, phys, start, end
- lsr \phys, \phys, #BLOCK_SHIFT
- lsr \start, \start, #BLOCK_SHIFT
- and \start, \start, #PTRS_PER_PTE - 1 // table index
- orr \phys, \flags, \phys, lsl #BLOCK_SHIFT // table entry
- lsr \end, \end, #BLOCK_SHIFT
- and \end, \end, #PTRS_PER_PTE - 1 // table end index
-9999: str \phys, [\tbl, \start, lsl #3] // store the entry
- add \start, \start, #1 // next entry
- add \phys, \phys, #BLOCK_SIZE // next block
- cmp \start, \end
- b.ls 9999b
- .endm
-
-/*
- * Setup the initial page tables. We only setup the barest amount which is
- * required to get the kernel running. The following sections are required:
- * - identity mapping to enable the MMU (low address, TTBR0)
- * - first few MB of the kernel linear mapping to jump to once the MMU has
- * been enabled, including the FDT blob (TTBR1)
- * - pgd entry for fixed mappings (TTBR1)
- */
-__create_page_tables:
- pgtbl x25, x26, x28 // idmap_pg_dir and swapper_pg_dir addresses
- mov x27, lr
-
- /*
- * Invalidate the idmap and swapper page tables to avoid potential
- * dirty cache lines being evicted.
- */
- mov x0, x25
- add x1, x26, #SWAPPER_DIR_SIZE
- bl __inval_cache_range
-
- /*
- * Clear the idmap and swapper page tables.
- */
- mov x0, x25
- add x6, x26, #SWAPPER_DIR_SIZE
-1: stp xzr, xzr, [x0], #16
- stp xzr, xzr, [x0], #16
- stp xzr, xzr, [x0], #16
- stp xzr, xzr, [x0], #16
- cmp x0, x6
- b.lo 1b
-
- ldr x7, =MM_MMUFLAGS
-
- /*
- * Create the identity mapping.
- */
- mov x0, x25 // idmap_pg_dir
- ldr x3, =KERNEL_START
- add x3, x3, x28 // __pa(KERNEL_START)
- create_pgd_entry x0, x3, x5, x6
- ldr x6, =KERNEL_END
- mov x5, x3 // __pa(KERNEL_START)
- add x6, x6, x28 // __pa(KERNEL_END)
- create_block_map x0, x7, x3, x5, x6
-
- /*
- * Map the kernel image (starting with PHYS_OFFSET).
- */
- mov x0, x26 // swapper_pg_dir
- mov x5, #PAGE_OFFSET
- create_pgd_entry x0, x5, x3, x6
- ldr x6, =KERNEL_END
- mov x3, x24 // phys offset
- create_block_map x0, x7, x3, x5, x6
-
- /*
- * Map the FDT blob (maximum 2MB; must be within 512MB of
- * PHYS_OFFSET).
- */
- mov x3, x21 // FDT phys address
- and x3, x3, #~((1 << 21) - 1) // 2MB aligned
- mov x6, #PAGE_OFFSET
- sub x5, x3, x24 // subtract PHYS_OFFSET
- tst x5, #~((1 << 29) - 1) // within 512MB?
- csel x21, xzr, x21, ne // zero the FDT pointer
- b.ne 1f
- add x5, x5, x6 // __va(FDT blob)
- add x6, x5, #1 << 21 // 2MB for the FDT blob
- sub x6, x6, #1 // inclusive range
- create_block_map x0, x7, x3, x5, x6
-1:
- /*
- * Since the page tables have been populated with non-cacheable
- * accesses (MMU disabled), invalidate the idmap and swapper page
- * tables again to remove any speculatively loaded cache lines.
- */
- mov x0, x25
- add x1, x26, #SWAPPER_DIR_SIZE
- bl __inval_cache_range
-
- mov lr, x27
- ret
-ENDPROC(__create_page_tables)
- .ltorg
-
- .align 3
- .type __switch_data, %object
-__switch_data:
- .quad __mmap_switched
- .quad __bss_start // x6
- .quad __bss_stop // x7
- .quad processor_id // x4
- .quad __fdt_pointer // x5
- .quad memstart_addr // x6
- .quad init_thread_union + THREAD_START_SP // sp
-
-/*
- * The following fragment of code is executed with the MMU on in MMU mode, and
- * uses absolute addresses; this is not position independent.
- */
-__mmap_switched:
- adr x3, __switch_data + 8
-
- ldp x6, x7, [x3], #16
-1: cmp x6, x7
- b.hs 2f
- str xzr, [x6], #8 // Clear BSS
- b 1b
-2:
- ldp x4, x5, [x3], #16
- ldr x6, [x3], #8
- ldr x16, [x3]
- mov sp, x16
- str x22, [x4] // Save processor ID
- str x21, [x5] // Save FDT pointer
- str x24, [x6] // Save PHYS_OFFSET
- mov x29, #0
- b start_kernel
-ENDPROC(__mmap_switched)
-
-/*
* Exception handling. Something went wrong and we can't proceed. We ought to
* tell the user, but since we don't have any guarantee that we're even
* running on the right architecture, we do virtually nothing.
@@ -715,22 +756,3 @@ __lookup_processor_type_data:
.quad .
.quad cpu_table
.size __lookup_processor_type_data, . - __lookup_processor_type_data
-
-/*
- * Determine validity of the x21 FDT pointer.
- * The dtb must be 8-byte aligned and live in the first 512M of memory.
- */
-__vet_fdt:
- tst x21, #0x7
- b.ne 1f
- cmp x21, x24
- b.lt 1f
- mov x0, #(1 << 29)
- add x0, x0, x24
- cmp x21, x0
- b.ge 1f
- ret
-1:
- mov x21, #0
- ret
-ENDPROC(__vet_fdt)
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index 7e9327a0986d..27d4864577e5 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -17,14 +17,19 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/bitops.h>
+#include <linux/bug.h>
#include <linux/compiler.h>
#include <linux/kernel.h>
+#include <linux/mm.h>
#include <linux/smp.h>
+#include <linux/spinlock.h>
#include <linux/stop_machine.h>
+#include <linux/types.h>
#include <linux/uaccess.h>
#include <asm/cacheflush.h>
#include <asm/debug-monitors.h>
+#include <asm/fixmap.h>
#include <asm/insn.h>
#define AARCH64_INSN_SF_BIT BIT(31)
@@ -72,6 +77,29 @@ bool __kprobes aarch64_insn_is_nop(u32 insn)
}
}
+static DEFINE_SPINLOCK(patch_lock);
+
+static void __kprobes *patch_map(void *addr, int fixmap)
+{
+ unsigned long uintaddr = (uintptr_t) addr;
+ bool module = !core_kernel_text(uintaddr);
+ struct page *page;
+
+ if (module && IS_ENABLED(CONFIG_DEBUG_SET_MODULE_RONX))
+ page = vmalloc_to_page(addr);
+ else
+ page = virt_to_page(addr);
+
+ BUG_ON(!page);
+ set_fixmap(fixmap, page_to_phys(page));
+
+ return (void *) (__fix_to_virt(fixmap) + (uintaddr & ~PAGE_MASK));
+}
+
+static void __kprobes patch_unmap(int fixmap)
+{
+ clear_fixmap(fixmap);
+}
/*
* In ARMv8-A, A64 instructions have a fixed length of 32 bits and are always
* little-endian.
@@ -88,10 +116,27 @@ int __kprobes aarch64_insn_read(void *addr, u32 *insnp)
return ret;
}
+static int __kprobes __aarch64_insn_write(void *addr, u32 insn)
+{
+ void *waddr = addr;
+ unsigned long flags = 0;
+ int ret;
+
+ spin_lock_irqsave(&patch_lock, flags);
+ waddr = patch_map(addr, FIX_TEXT_POKE0);
+
+ ret = probe_kernel_write(waddr, &insn, AARCH64_INSN_SIZE);
+
+ patch_unmap(FIX_TEXT_POKE0);
+ spin_unlock_irqrestore(&patch_lock, flags);
+
+ return ret;
+}
+
int __kprobes aarch64_insn_write(void *addr, u32 insn)
{
insn = cpu_to_le32(insn);
- return probe_kernel_write(addr, &insn, AARCH64_INSN_SIZE);
+ return __aarch64_insn_write(addr, insn);
}
static bool __kprobes __aarch64_insn_hotpatch_safe(u32 insn)
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index 071a6ec13bd8..240b75c0e94f 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -40,6 +40,8 @@ int arch_show_interrupts(struct seq_file *p, int prec)
return 0;
}
+void (*handle_arch_irq)(struct pt_regs *) = NULL;
+
void __init set_handle_irq(void (*handle_irq)(struct pt_regs *))
{
if (handle_arch_irq)
diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c
index 263a166291fb..4f1fec7a46db 100644
--- a/arch/arm64/kernel/jump_label.c
+++ b/arch/arm64/kernel/jump_label.c
@@ -22,9 +22,8 @@
#ifdef HAVE_JUMP_LABEL
-static void __arch_jump_label_transform(struct jump_entry *entry,
- enum jump_label_type type,
- bool is_static)
+void arch_jump_label_transform(struct jump_entry *entry,
+ enum jump_label_type type)
{
void *addr = (void *)entry->code;
u32 insn;
@@ -37,22 +36,18 @@ static void __arch_jump_label_transform(struct jump_entry *entry,
insn = aarch64_insn_gen_nop();
}
- if (is_static)
- aarch64_insn_patch_text_nosync(addr, insn);
- else
- aarch64_insn_patch_text(&addr, &insn, 1);
-}
-
-void arch_jump_label_transform(struct jump_entry *entry,
- enum jump_label_type type)
-{
- __arch_jump_label_transform(entry, type, false);
+ aarch64_insn_patch_text(&addr, &insn, 1);
}
void arch_jump_label_transform_static(struct jump_entry *entry,
enum jump_label_type type)
{
- __arch_jump_label_transform(entry, type, true);
+ /*
+ * We use the architected A64 NOP in arch_static_branch, so there's no
+ * need to patch an identical A64 NOP over the top of it here. The core
+ * will call arch_jump_label_transform from a module notifier if the
+ * NOP needs to be replaced by a branch.
+ */
}
#endif /* HAVE_JUMP_LABEL */
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index ae8be923f2cd..789ce6cf7ff6 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -380,6 +380,7 @@ void __init setup_arch(char **cmdline_p)
*cmdline_p = boot_command_line;
+ early_fixmap_init();
early_ioremap_init();
parse_early_param();
@@ -396,7 +397,7 @@ void __init setup_arch(char **cmdline_p)
paging_init();
request_standard_resources();
- efi_idmap_init();
+ efi_virtmap_init();
early_ioremap_reset();
unflatten_device_tree();
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 2f600294e8ca..5f11ebe652a4 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -8,6 +8,7 @@
#include <asm/thread_info.h>
#include <asm/memory.h>
#include <asm/page.h>
+#include <asm/pgtable.h>
#include "image.h"
@@ -32,6 +33,30 @@ jiffies = jiffies_64;
*(.hyp.text) \
VMLINUX_SYMBOL(__hyp_text_end) = .;
+/*
+ * The size of the PE/COFF section that covers the kernel image, which
+ * runs from stext to _edata, must be a round multiple of the PE/COFF
+ * FileAlignment, which we set to its minimum value of 0x200. 'stext'
+ * itself is 4 KB aligned, so padding out _edata to a 0x200 aligned
+ * boundary should be sufficient.
+ */
+PECOFF_FILE_ALIGNMENT = 0x200;
+
+#ifdef CONFIG_EFI
+#define PECOFF_EDATA_PADDING \
+ .pecoff_edata_padding : { BYTE(0); . = ALIGN(PECOFF_FILE_ALIGNMENT); }
+#else
+#define PECOFF_EDATA_PADDING
+#endif
+
+#ifdef CONFIG_DEBUG_ALIGN_RODATA
+#define ALIGN_DEBUG_RO . = ALIGN(1<<SECTION_SHIFT);
+#define ALIGN_DEBUG_RO_MIN(min) ALIGN_DEBUG_RO
+#else
+#define ALIGN_DEBUG_RO
+#define ALIGN_DEBUG_RO_MIN(min) . = ALIGN(min);
+#endif
+
SECTIONS
{
/*
@@ -54,6 +79,7 @@ SECTIONS
_text = .;
HEAD_TEXT
}
+ ALIGN_DEBUG_RO
.text : { /* Real text segment */
_stext = .; /* Text and read-only data */
__exception_text_start = .;
@@ -70,19 +96,22 @@ SECTIONS
*(.got) /* Global offset table */
}
+ ALIGN_DEBUG_RO
RO_DATA(PAGE_SIZE)
EXCEPTION_TABLE(8)
NOTES
+ ALIGN_DEBUG_RO
_etext = .; /* End of text and rodata section */
- . = ALIGN(PAGE_SIZE);
+ ALIGN_DEBUG_RO_MIN(PAGE_SIZE)
__init_begin = .;
INIT_TEXT_SECTION(8)
.exit.text : {
ARM_EXIT_KEEP(EXIT_TEXT)
}
- . = ALIGN(16);
+
+ ALIGN_DEBUG_RO_MIN(16)
.init.data : {
INIT_DATA
INIT_SETUP(16)
@@ -114,6 +143,7 @@ SECTIONS
_data = .;
_sdata = .;
RW_DATA_SECTION(64, PAGE_SIZE, THREAD_SIZE)
+ PECOFF_EDATA_PADDING
_edata = .;
BSS_SECTION(0, 0, 0)