diff options
Diffstat (limited to 'arch/arm64/kernel/head.S')
-rw-r--r-- | arch/arm64/kernel/head.S | 357 |
1 files changed, 140 insertions, 217 deletions
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 77bfa3470ca0..f3c9d89c2c1e 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -29,6 +29,7 @@ #include <asm/asm-offsets.h> #include <asm/cache.h> #include <asm/cputype.h> +#include <asm/kernel-pgtable.h> #include <asm/memory.h> #include <asm/thread_info.h> #include <asm/pgtable-hwdef.h> @@ -36,7 +37,7 @@ #include <asm/page.h> #include <asm/virt.h> -#define KERNEL_RAM_VADDR (PAGE_OFFSET + TEXT_OFFSET) +#define __PHYS_OFFSET (KERNEL_START - TEXT_OFFSET) #if (TEXT_OFFSET & 0xfff) != 0 #error TEXT_OFFSET must be at least 4KB aligned @@ -46,44 +47,10 @@ #error TEXT_OFFSET must be less than 2MB #endif - .macro pgtbl, ttb0, ttb1, virt_to_phys - ldr \ttb1, =swapper_pg_dir - ldr \ttb0, =idmap_pg_dir - add \ttb1, \ttb1, \virt_to_phys - add \ttb0, \ttb0, \virt_to_phys - .endm - -#ifdef CONFIG_ARM64_64K_PAGES -#define BLOCK_SHIFT PAGE_SHIFT -#define BLOCK_SIZE PAGE_SIZE -#define TABLE_SHIFT PMD_SHIFT -#else -#define BLOCK_SHIFT SECTION_SHIFT -#define BLOCK_SIZE SECTION_SIZE -#define TABLE_SHIFT PUD_SHIFT -#endif - -#define KERNEL_START KERNEL_RAM_VADDR +#define KERNEL_START _text #define KERNEL_END _end /* - * Initial memory map attributes. - */ -#ifndef CONFIG_SMP -#define PTE_FLAGS PTE_TYPE_PAGE | PTE_AF -#define PMD_FLAGS PMD_TYPE_SECT | PMD_SECT_AF -#else -#define PTE_FLAGS PTE_TYPE_PAGE | PTE_AF | PTE_SHARED -#define PMD_FLAGS PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S -#endif - -#ifdef CONFIG_ARM64_64K_PAGES -#define MM_MMUFLAGS PTE_ATTRINDX(MT_NORMAL) | PTE_FLAGS -#else -#define MM_MMUFLAGS PMD_ATTRINDX(MT_NORMAL) | PMD_FLAGS -#endif - -/* * Kernel startup entry point. * --------------------------- * @@ -132,6 +99,8 @@ efi_head: #endif #ifdef CONFIG_EFI + .globl stext_offset + .set stext_offset, stext - efi_head .align 3 pe_header: .ascii "PE" @@ -155,12 +124,12 @@ optional_header: .long 0 // SizeOfInitializedData .long 0 // SizeOfUninitializedData .long efi_stub_entry - efi_head // AddressOfEntryPoint - .long stext - efi_head // BaseOfCode + .long stext_offset // BaseOfCode extra_header_fields: .quad 0 // ImageBase - .long 0x20 // SectionAlignment - .long 0x8 // FileAlignment + .long 0x1000 // SectionAlignment + .long PECOFF_FILE_ALIGNMENT // FileAlignment .short 0 // MajorOperatingSystemVersion .short 0 // MinorOperatingSystemVersion .short 0 // MajorImageVersion @@ -172,7 +141,7 @@ extra_header_fields: .long _end - efi_head // SizeOfImage // Everything before the kernel image is considered part of the header - .long stext - efi_head // SizeOfHeaders + .long stext_offset // SizeOfHeaders .long 0 // CheckSum .short 0xa // Subsystem (EFI application) .short 0 // DllCharacteristics @@ -217,53 +186,64 @@ section_table: .byte 0 .byte 0 // end of 0 padding of section name .long _end - stext // VirtualSize - .long stext - efi_head // VirtualAddress + .long stext_offset // VirtualAddress .long _edata - stext // SizeOfRawData - .long stext - efi_head // PointerToRawData + .long stext_offset // PointerToRawData .long 0 // PointerToRelocations (0 for executables) .long 0 // PointerToLineNumbers (0 for executables) .short 0 // NumberOfRelocations (0 for executables) .short 0 // NumberOfLineNumbers (0 for executables) .long 0xe0500020 // Characteristics (section flags) - .align 5 + + /* + * EFI will load stext onwards at the 4k section alignment + * described in the PE/COFF header. To ensure that instruction + * sequences using an adrp and a :lo12: immediate will function + * correctly at this alignment, we must ensure that stext is + * placed at a 4k boundary in the Image to begin with. + */ + .align 12 #endif ENTRY(stext) - mov x21, x0 // x21=FDT + bl preserve_boot_args bl el2_setup // Drop to EL1, w20=cpu_boot_mode - bl __calc_phys_offset // x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET + adrp x24, __PHYS_OFFSET bl set_cpu_boot_mode_flag - mrs x22, midr_el1 // x22=cpuid - mov x0, x22 - bl lookup_processor_type - mov x23, x0 // x23=current cpu_table - /* - * __error_p may end up out of range for cbz if text areas are - * aligned up to section sizes. - */ - cbnz x23, 1f // invalid processor (x23=0)? - b __error_p -1: + bl __vet_fdt bl __create_page_tables // x25=TTBR0, x26=TTBR1 /* - * The following calls CPU specific code in a position independent - * manner. See arch/arm64/mm/proc.S for details. x23 = base of - * cpu_info structure selected by lookup_processor_type above. + * The following calls CPU setup code, see arch/arm64/mm/proc.S for + * details. * On return, the CPU will be ready for the MMU to be turned on and * the TCR will have been set. */ - ldr x27, __switch_data // address to jump to after + ldr x27, =__mmap_switched // address to jump to after // MMU has been enabled - adrp lr, __enable_mmu // return (PIC) address - add lr, lr, #:lo12:__enable_mmu - ldr x12, [x23, #CPU_INFO_SETUP] - add x12, x12, x28 // __virt_to_phys - br x12 // initialise processor + adr_l lr, __enable_mmu // return (PIC) address + b __cpu_setup // initialise processor ENDPROC(stext) /* + * Preserve the arguments passed by the bootloader in x0 .. x3 + */ +preserve_boot_args: + mov x21, x0 // x21=FDT + + adr_l x0, boot_args // record the contents of + stp x21, x1, [x0] // x0 .. x3 at kernel entry + stp x2, x3, [x0, #16] + + dmb sy // needed before dc ivac with + // MMU off + + add x1, x0, #0x20 // 4 x 8 bytes + b __inval_cache_range // tail call +ENDPROC(preserve_boot_args) + +/* * Determine validity of the x21 FDT pointer. * The dtb must be 8-byte aligned and live in the first 512M of memory. */ @@ -312,7 +292,7 @@ ENDPROC(__vet_fdt) .macro create_pgd_entry, tbl, virt, tmp1, tmp2 create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2 #if SWAPPER_PGTABLE_LEVELS == 3 - create_table_entry \tbl, \virt, TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2 + create_table_entry \tbl, \virt, SWAPPER_TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2 #endif .endm @@ -324,15 +304,15 @@ ENDPROC(__vet_fdt) * Corrupts: phys, start, end, pstate */ .macro create_block_map, tbl, flags, phys, start, end - lsr \phys, \phys, #BLOCK_SHIFT - lsr \start, \start, #BLOCK_SHIFT + lsr \phys, \phys, #SWAPPER_BLOCK_SHIFT + lsr \start, \start, #SWAPPER_BLOCK_SHIFT and \start, \start, #PTRS_PER_PTE - 1 // table index - orr \phys, \flags, \phys, lsl #BLOCK_SHIFT // table entry - lsr \end, \end, #BLOCK_SHIFT + orr \phys, \flags, \phys, lsl #SWAPPER_BLOCK_SHIFT // table entry + lsr \end, \end, #SWAPPER_BLOCK_SHIFT and \end, \end, #PTRS_PER_PTE - 1 // table end index 9999: str \phys, [\tbl, \start, lsl #3] // store the entry add \start, \start, #1 // next entry - add \phys, \phys, #BLOCK_SIZE // next block + add \phys, \phys, #SWAPPER_BLOCK_SIZE // next block cmp \start, \end b.ls 9999b .endm @@ -346,7 +326,8 @@ ENDPROC(__vet_fdt) * - pgd entry for fixed mappings (TTBR1) */ __create_page_tables: - pgtbl x25, x26, x28 // idmap_pg_dir and swapper_pg_dir addresses + adrp x25, idmap_pg_dir + adrp x26, swapper_pg_dir mov x27, lr /* @@ -354,14 +335,14 @@ __create_page_tables: * dirty cache lines being evicted. */ mov x0, x25 - add x1, x26, #SWAPPER_DIR_SIZE + add x1, x26, #SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE bl __inval_cache_range /* * Clear the idmap and swapper page tables. */ mov x0, x25 - add x6, x26, #SWAPPER_DIR_SIZE + add x6, x26, #SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE 1: stp xzr, xzr, [x0], #16 stp xzr, xzr, [x0], #16 stp xzr, xzr, [x0], #16 @@ -369,18 +350,56 @@ __create_page_tables: cmp x0, x6 b.lo 1b - ldr x7, =MM_MMUFLAGS + ldr x7, =SWAPPER_MM_MMUFLAGS /* * Create the identity mapping. */ mov x0, x25 // idmap_pg_dir - ldr x3, =KERNEL_START - add x3, x3, x28 // __pa(KERNEL_START) + adrp x3, KERNEL_START // __pa(KERNEL_START) + +#ifndef CONFIG_ARM64_VA_BITS_48 +#define EXTRA_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3) +#define EXTRA_PTRS (1 << (48 - EXTRA_SHIFT)) + + /* + * If VA_BITS < 48, it may be too small to allow for an ID mapping to be + * created that covers system RAM if that is located sufficiently high + * in the physical address space. So for the ID map, use an extended + * virtual range in that case, by configuring an additional translation + * level. + * First, we have to verify our assumption that the current value of + * VA_BITS was chosen such that all translation levels are fully + * utilised, and that lowering T0SZ will always result in an additional + * translation level to be configured. + */ +#if VA_BITS != EXTRA_SHIFT +#error "Mismatch between VA_BITS and page size/number of translation levels" +#endif + + /* + * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the + * entire kernel image can be ID mapped. As T0SZ == (64 - #bits used), + * this number conveniently equals the number of leading zeroes in + * the physical address of KERNEL_END. + */ + adrp x5, KERNEL_END + clz x5, x5 + cmp x5, TCR_T0SZ(VA_BITS) // default T0SZ small enough? + b.ge 1f // .. then skip additional level + + adr_l x6, idmap_t0sz + str x5, [x6] + dmb sy + dc ivac, x6 // Invalidate potentially stale cache line + + create_table_entry x0, x3, EXTRA_SHIFT, EXTRA_PTRS, x5, x6 +1: +#endif + create_pgd_entry x0, x3, x5, x6 - ldr x6, =KERNEL_END mov x5, x3 // __pa(KERNEL_START) - add x6, x6, x28 // __pa(KERNEL_END) + adr_l x6, KERNEL_END // __pa(KERNEL_END) create_block_map x0, x7, x3, x5, x6 /* @@ -389,7 +408,7 @@ __create_page_tables: mov x0, x26 // swapper_pg_dir mov x5, #PAGE_OFFSET create_pgd_entry x0, x5, x3, x6 - ldr x6, =KERNEL_END + ldr x6, =KERNEL_END // __va(KERNEL_END) mov x3, x24 // phys offset create_block_map x0, x7, x3, x5, x6 @@ -415,7 +434,8 @@ __create_page_tables: * tables again to remove any speculatively loaded cache lines. */ mov x0, x25 - add x1, x26, #SWAPPER_DIR_SIZE + add x1, x26, #SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE + dmb sy bl __inval_cache_range mov lr, x27 @@ -423,37 +443,25 @@ __create_page_tables: ENDPROC(__create_page_tables) .ltorg - .align 3 - .type __switch_data, %object -__switch_data: - .quad __mmap_switched - .quad __bss_start // x6 - .quad __bss_stop // x7 - .quad processor_id // x4 - .quad __fdt_pointer // x5 - .quad memstart_addr // x6 - .quad init_thread_union + THREAD_START_SP // sp - /* - * The following fragment of code is executed with the MMU on in MMU mode, and - * uses absolute addresses; this is not position independent. + * The following fragment of code is executed with the MMU enabled. */ + .set initial_sp, init_thread_union + THREAD_START_SP __mmap_switched: - adr x3, __switch_data + 8 - - ldp x6, x7, [x3], #16 -1: cmp x6, x7 - b.hs 2f - str xzr, [x6], #8 // Clear BSS - b 1b -2: - ldp x4, x5, [x3], #16 - ldr x6, [x3], #8 - ldr x16, [x3] - mov sp, x16 - str x22, [x4] // Save processor ID - str x21, [x5] // Save FDT pointer - str x24, [x6] // Save PHYS_OFFSET + // Clear BSS + adr_l x0, __bss_start + mov x1, xzr + adr_l x2, __bss_stop + sub x2, x2, x0 + bl __pi_memset + dsb ishst // Make zero page visible to PTW + + adr_l sp, initial_sp, x4 + mov x4, sp + and x4, x4, #~(THREAD_SIZE - 1) + msr sp_el0, x4 // Save thread_info + str_l x21, __fdt_pointer, x5 // Save FDT pointer + str_l x24, memstart_addr, x6 // Save PHYS_OFFSET mov x29, #0 #ifdef CONFIG_KASAN bl kasan_early_init @@ -546,7 +554,8 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems msr vttbr_el2, xzr /* Hypervisor stub */ - adr x0, __hyp_stub_vectors + adrp x0, __hyp_stub_vectors + add x0, x0, #:lo12:__hyp_stub_vectors msr vbar_el2, x0 /* spsr */ @@ -563,8 +572,7 @@ ENDPROC(el2_setup) * in x20. See arch/arm64/include/asm/virt.h for more info. */ ENTRY(set_cpu_boot_mode_flag) - ldr x1, =__boot_cpu_mode // Compute __boot_cpu_mode - add x1, x1, x28 + adr_l x1, __boot_cpu_mode cmp w20, #BOOT_CPU_MODE_EL2 b.ne 1f add x1, x1, #4 @@ -588,7 +596,6 @@ ENTRY(__boot_cpu_mode) .long 0 .popsection -#ifdef CONFIG_SMP .align 3 1: .quad . .quad secondary_holding_pen_release @@ -599,15 +606,11 @@ ENTRY(__boot_cpu_mode) */ ENTRY(secondary_holding_pen) bl el2_setup // Drop to EL1, w20=cpu_boot_mode - bl __calc_phys_offset // x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET bl set_cpu_boot_mode_flag mrs x0, mpidr_el1 ldr x1, =MPIDR_HWID_BITMASK and x0, x0, x1 - adr x1, 1b - ldp x2, x3, [x1] - sub x1, x1, x2 - add x3, x3, x1 + adr_l x3, secondary_holding_pen_release pen: ldr x4, [x3] cmp x4, x0 b.eq secondary_startup @@ -621,7 +624,6 @@ ENDPROC(secondary_holding_pen) */ ENTRY(secondary_entry) bl el2_setup // Drop to EL1 - bl __calc_phys_offset // x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET bl set_cpu_boot_mode_flag b secondary_startup ENDPROC(secondary_entry) @@ -630,16 +632,9 @@ ENTRY(secondary_startup) /* * Common entry point for secondary CPUs. */ - mrs x22, midr_el1 // x22=cpuid - mov x0, x22 - bl lookup_processor_type - mov x23, x0 // x23=current cpu_table - cbz x23, __error_p // invalid processor (x23=0)? - - pgtbl x25, x26, x28 // x25=TTBR0, x26=TTBR1 - ldr x12, [x23, #CPU_INFO_SETUP] - add x12, x12, x28 // __virt_to_phys - blr x12 // initialise processor + adrp x25, idmap_pg_dir + adrp x26, swapper_pg_dir + bl __cpu_setup // initialise processor ldr x21, =secondary_data ldr x27, =__secondary_switched // address to jump to after enabling the MMU @@ -649,17 +644,19 @@ ENDPROC(secondary_startup) ENTRY(__secondary_switched) ldr x0, [x21] // get secondary_data.stack mov sp, x0 + and x0, x0, #~(THREAD_SIZE - 1) + msr sp_el0, x0 // save thread_info mov x29, #0 b secondary_start_kernel ENDPROC(__secondary_switched) -#endif /* CONFIG_SMP */ /* - * Setup common bits before finally enabling the MMU. Essentially this is just - * loading the page table pointer and vector base registers. + * Enable the MMU. * - * On entry to this code, x0 must contain the SCTLR_EL1 value for turning on - * the MMU. + * x0 = SCTLR_EL1 value for turning on the MMU. + * x27 = *virtual* address to jump to upon completion + * + * other registers depend on the function called upon completion */ __enable_mmu: ldr x5, =vectors @@ -667,89 +664,15 @@ __enable_mmu: msr ttbr0_el1, x25 // load TTBR0 msr ttbr1_el1, x26 // load TTBR1 isb - b __turn_mmu_on -ENDPROC(__enable_mmu) - -/* - * Enable the MMU. This completely changes the structure of the visible memory - * space. You will not be able to trace execution through this. - * - * x0 = system control register - * x27 = *virtual* address to jump to upon completion - * - * other registers depend on the function called upon completion - * - * We align the entire function to the smallest power of two larger than it to - * ensure it fits within a single block map entry. Otherwise were PHYS_OFFSET - * close to the end of a 512MB or 1GB block we might require an additional - * table to map the entire function. - */ - .align 4 -__turn_mmu_on: msr sctlr_el1, x0 isb + /* + * Invalidate the local I-cache so that any instructions fetched + * speculatively from the PoC are discarded, since they may have + * been dynamically patched at the PoU. + */ + ic iallu + dsb nsh + isb br x27 -ENDPROC(__turn_mmu_on) - -/* - * Calculate the start of physical memory. - */ -__calc_phys_offset: - adr x0, 1f - ldp x1, x2, [x0] - sub x28, x0, x1 // x28 = PHYS_OFFSET - PAGE_OFFSET - add x24, x2, x28 // x24 = PHYS_OFFSET - ret -ENDPROC(__calc_phys_offset) - - .align 3 -1: .quad . - .quad PAGE_OFFSET - -/* - * Exception handling. Something went wrong and we can't proceed. We ought to - * tell the user, but since we don't have any guarantee that we're even - * running on the right architecture, we do virtually nothing. - */ -__error_p: -ENDPROC(__error_p) - -__error: -1: nop - b 1b -ENDPROC(__error) - -/* - * This function gets the processor ID in w0 and searches the cpu_table[] for - * a match. It returns a pointer to the struct cpu_info it found. The - * cpu_table[] must end with an empty (all zeros) structure. - * - * This routine can be called via C code and it needs to work with the MMU - * both disabled and enabled (the offset is calculated automatically). - */ -ENTRY(lookup_processor_type) - adr x1, __lookup_processor_type_data - ldp x2, x3, [x1] - sub x1, x1, x2 // get offset between VA and PA - add x3, x3, x1 // convert VA to PA -1: - ldp w5, w6, [x3] // load cpu_id_val and cpu_id_mask - cbz w5, 2f // end of list? - and w6, w6, w0 - cmp w5, w6 - b.eq 3f - add x3, x3, #CPU_INFO_SZ - b 1b -2: - mov x3, #0 // unknown processor -3: - mov x0, x3 - ret -ENDPROC(lookup_processor_type) - - .align 3 - .type __lookup_processor_type_data, %object -__lookup_processor_type_data: - .quad . - .quad cpu_table - .size __lookup_processor_type_data, . - __lookup_processor_type_data +ENDPROC(__enable_mmu) |