diff options
Diffstat (limited to 'arch/arm64/kernel/head.S')
-rw-r--r-- | arch/arm64/kernel/head.S | 308 |
1 files changed, 237 insertions, 71 deletions
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 53dcae49e729..6254f4a28eed 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -22,10 +22,12 @@ #include <linux/linkage.h> #include <linux/init.h> +#include <linux/irqchip/arm-gic-v3.h> #include <asm/assembler.h> #include <asm/ptrace.h> #include <asm/asm-offsets.h> +#include <asm/cache.h> #include <asm/cputype.h> #include <asm/memory.h> #include <asm/thread_info.h> @@ -34,29 +36,21 @@ #include <asm/page.h> #include <asm/virt.h> -/* - * swapper_pg_dir is the virtual address of the initial page table. We place - * the page tables 3 * PAGE_SIZE below KERNEL_RAM_VADDR. The idmap_pg_dir has - * 2 pages and is placed below swapper_pg_dir. - */ #define KERNEL_RAM_VADDR (PAGE_OFFSET + TEXT_OFFSET) -#if (KERNEL_RAM_VADDR & 0xfffff) != 0x80000 -#error KERNEL_RAM_VADDR must start at 0xXXX80000 +#if (TEXT_OFFSET & 0xfff) != 0 +#error TEXT_OFFSET must be at least 4KB aligned +#elif (PAGE_OFFSET & 0x1fffff) != 0 +#error PAGE_OFFSET must be at least 2MB aligned +#elif TEXT_OFFSET > 0x1fffff +#error TEXT_OFFSET must be less than 2MB #endif -#define SWAPPER_DIR_SIZE (3 * PAGE_SIZE) -#define IDMAP_DIR_SIZE (2 * PAGE_SIZE) - - .globl swapper_pg_dir - .equ swapper_pg_dir, KERNEL_RAM_VADDR - SWAPPER_DIR_SIZE - - .globl idmap_pg_dir - .equ idmap_pg_dir, swapper_pg_dir - IDMAP_DIR_SIZE - - .macro pgtbl, ttb0, ttb1, phys - add \ttb1, \phys, #TEXT_OFFSET - SWAPPER_DIR_SIZE - sub \ttb0, \ttb1, #IDMAP_DIR_SIZE + .macro pgtbl, ttb0, ttb1, virt_to_phys + ldr \ttb1, =swapper_pg_dir + ldr \ttb0, =idmap_pg_dir + add \ttb1, \ttb1, \virt_to_phys + add \ttb0, \ttb0, \virt_to_phys .endm #ifdef CONFIG_ARM64_64K_PAGES @@ -107,16 +101,137 @@ /* * DO NOT MODIFY. Image header expected by Linux boot-loaders. */ +#ifdef CONFIG_EFI +efi_head: + /* + * This add instruction has no meaningful effect except that + * its opcode forms the magic "MZ" signature required by UEFI. + */ + add x13, x18, #0x16 + b stext +#else b stext // branch to kernel start, magic .long 0 // reserved +#endif .quad TEXT_OFFSET // Image load offset from start of RAM .quad 0 // reserved .quad 0 // reserved + .quad 0 // reserved + .quad 0 // reserved + .quad 0 // reserved + .byte 0x41 // Magic number, "ARM\x64" + .byte 0x52 + .byte 0x4d + .byte 0x64 +#ifdef CONFIG_EFI + .long pe_header - efi_head // Offset to the PE header. +#else + .word 0 // reserved +#endif + +#ifdef CONFIG_EFI + .align 3 +pe_header: + .ascii "PE" + .short 0 +coff_header: + .short 0xaa64 // AArch64 + .short 2 // nr_sections + .long 0 // TimeDateStamp + .long 0 // PointerToSymbolTable + .long 1 // NumberOfSymbols + .short section_table - optional_header // SizeOfOptionalHeader + .short 0x206 // Characteristics. + // IMAGE_FILE_DEBUG_STRIPPED | + // IMAGE_FILE_EXECUTABLE_IMAGE | + // IMAGE_FILE_LINE_NUMS_STRIPPED +optional_header: + .short 0x20b // PE32+ format + .byte 0x02 // MajorLinkerVersion + .byte 0x14 // MinorLinkerVersion + .long _edata - stext // SizeOfCode + .long 0 // SizeOfInitializedData + .long 0 // SizeOfUninitializedData + .long efi_stub_entry - efi_head // AddressOfEntryPoint + .long stext - efi_head // BaseOfCode + +extra_header_fields: + .quad 0 // ImageBase + .long 0x20 // SectionAlignment + .long 0x8 // FileAlignment + .short 0 // MajorOperatingSystemVersion + .short 0 // MinorOperatingSystemVersion + .short 0 // MajorImageVersion + .short 0 // MinorImageVersion + .short 0 // MajorSubsystemVersion + .short 0 // MinorSubsystemVersion + .long 0 // Win32VersionValue + + .long _edata - efi_head // SizeOfImage + + // Everything before the kernel image is considered part of the header + .long stext - efi_head // SizeOfHeaders + .long 0 // CheckSum + .short 0xa // Subsystem (EFI application) + .short 0 // DllCharacteristics + .quad 0 // SizeOfStackReserve + .quad 0 // SizeOfStackCommit + .quad 0 // SizeOfHeapReserve + .quad 0 // SizeOfHeapCommit + .long 0 // LoaderFlags + .long 0x6 // NumberOfRvaAndSizes + + .quad 0 // ExportTable + .quad 0 // ImportTable + .quad 0 // ResourceTable + .quad 0 // ExceptionTable + .quad 0 // CertificationTable + .quad 0 // BaseRelocationTable + + // Section table +section_table: + + /* + * The EFI application loader requires a relocation section + * because EFI applications must be relocatable. This is a + * dummy section as far as we are concerned. + */ + .ascii ".reloc" + .byte 0 + .byte 0 // end of 0 padding of section name + .long 0 + .long 0 + .long 0 // SizeOfRawData + .long 0 // PointerToRawData + .long 0 // PointerToRelocations + .long 0 // PointerToLineNumbers + .short 0 // NumberOfRelocations + .short 0 // NumberOfLineNumbers + .long 0x42100040 // Characteristics (section flags) + + + .ascii ".text" + .byte 0 + .byte 0 + .byte 0 // end of 0 padding of section name + .long _edata - stext // VirtualSize + .long stext - efi_head // VirtualAddress + .long _edata - stext // SizeOfRawData + .long stext - efi_head // PointerToRawData + + .long 0 // PointerToRelocations (0 for executables) + .long 0 // PointerToLineNumbers (0 for executables) + .short 0 // NumberOfRelocations (0 for executables) + .short 0 // NumberOfLineNumbers (0 for executables) + .long 0xe0500020 // Characteristics (section flags) + .align 5 +#endif ENTRY(stext) mov x21, x0 // x21=FDT + bl el2_setup // Drop to EL1, w20=cpu_boot_mode bl __calc_phys_offset // x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET - bl el2_setup // Drop to EL1 + bl set_cpu_boot_mode_flag mrs x22, midr_el1 // x22=cpuid mov x0, x22 bl lookup_processor_type @@ -142,21 +257,30 @@ ENDPROC(stext) /* * If we're fortunate enough to boot at EL2, ensure that the world is * sane before dropping to EL1. + * + * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in x20 if + * booted in EL1 or EL2 respectively. */ ENTRY(el2_setup) mrs x0, CurrentEL cmp x0, #PSR_MODE_EL2t ccmp x0, #PSR_MODE_EL2h, #0x4, ne - ldr x0, =__boot_cpu_mode // Compute __boot_cpu_mode - add x0, x0, x28 - b.eq 1f - str wzr, [x0] // Remember we don't have EL2... + b.ne 1f + mrs x0, sctlr_el2 +CPU_BE( orr x0, x0, #(1 << 25) ) // Set the EE bit for EL2 +CPU_LE( bic x0, x0, #(1 << 25) ) // Clear the EE bit for EL2 + msr sctlr_el2, x0 + b 2f +1: mrs x0, sctlr_el1 +CPU_BE( orr x0, x0, #(3 << 24) ) // Set the EE and E0E bits for EL1 +CPU_LE( bic x0, x0, #(3 << 24) ) // Clear the EE and E0E bits for EL1 + msr sctlr_el1, x0 + mov w20, #BOOT_CPU_MODE_EL1 // This cpu booted in EL1 + isb ret /* Hyp configuration. */ -1: ldr w1, =BOOT_CPU_MODE_EL2 - str w1, [x0, #4] // This CPU has EL2 - mov x0, #(1 << 31) // 64-bit EL1 +2: mov x0, #(1 << 31) // 64-bit EL1 msr hcr_el2, x0 /* Generic timers. */ @@ -165,6 +289,23 @@ ENTRY(el2_setup) msr cnthctl_el2, x0 msr cntvoff_el2, xzr // Clear virtual offset +#ifdef CONFIG_ARM_GIC_V3 + /* GICv3 system register access */ + mrs x0, id_aa64pfr0_el1 + ubfx x0, x0, #24, #4 + cmp x0, #1 + b.ne 3f + + mrs_s x0, ICC_SRE_EL2 + orr x0, x0, #ICC_SRE_EL2_SRE // Set ICC_SRE_EL2.SRE==1 + orr x0, x0, #ICC_SRE_EL2_ENABLE // Set ICC_SRE_EL2.Enable==1 + msr_s ICC_SRE_EL2, x0 + isb // Make sure SRE is now set + msr_s ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults + +3: +#endif + /* Populate ID registers. */ mrs x0, midr_el1 mrs x1, mpidr_el1 @@ -173,7 +314,8 @@ ENTRY(el2_setup) /* sctlr_el1 */ mov x0, #0x0800 // Set/clear RES{1,0} bits - movk x0, #0x30d0, lsl #16 +CPU_BE( movk x0, #0x33d0, lsl #16 ) // Set EE and E0E on BE systems +CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems msr sctlr_el1, x0 /* Coprocessor traps. */ @@ -196,28 +338,41 @@ ENTRY(el2_setup) PSR_MODE_EL1h) msr spsr_el2, x0 msr elr_el2, lr + mov w20, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2 eret ENDPROC(el2_setup) /* + * Sets the __boot_cpu_mode flag depending on the CPU boot mode passed + * in x20. See arch/arm64/include/asm/virt.h for more info. + */ +ENTRY(set_cpu_boot_mode_flag) + ldr x1, =__boot_cpu_mode // Compute __boot_cpu_mode + add x1, x1, x28 + cmp w20, #BOOT_CPU_MODE_EL2 + b.ne 1f + add x1, x1, #4 +1: str w20, [x1] // This CPU has booted in EL1 + dmb sy + dc ivac, x1 // Invalidate potentially stale cache line + ret +ENDPROC(set_cpu_boot_mode_flag) + +/* * We need to find out the CPU boot mode long after boot, so we need to * store it in a writable variable. * * This is not in .bss, because we set it sufficiently early that the boot-time * zeroing of .bss would clobber it. */ - .pushsection .data + .pushsection .data..cacheline_aligned ENTRY(__boot_cpu_mode) + .align L1_CACHE_SHIFT .long BOOT_CPU_MODE_EL2 .long 0 .popsection - .align 3 -2: .quad . - .quad PAGE_OFFSET - #ifdef CONFIG_SMP - .pushsection .smp.pen.text, "ax" .align 3 1: .quad . .quad secondary_holding_pen_release @@ -227,8 +382,9 @@ ENTRY(__boot_cpu_mode) * cores are held until we're ready for them to initialise. */ ENTRY(secondary_holding_pen) - bl __calc_phys_offset // x24=phys offset - bl el2_setup // Drop to EL1 + bl el2_setup // Drop to EL1, w20=cpu_boot_mode + bl __calc_phys_offset // x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET + bl set_cpu_boot_mode_flag mrs x0, mpidr_el1 ldr x1, =MPIDR_HWID_BITMASK and x0, x0, x1 @@ -242,7 +398,16 @@ pen: ldr x4, [x3] wfe b pen ENDPROC(secondary_holding_pen) - .popsection + + /* + * Secondary entry point that jumps straight into the kernel. Only to + * be used where CPUs are brought online dynamically by the kernel. + */ +ENTRY(secondary_entry) + bl __calc_phys_offset // x2=phys offset + bl el2_setup // Drop to EL1 + b secondary_startup +ENDPROC(secondary_entry) ENTRY(secondary_startup) /* @@ -254,7 +419,7 @@ ENTRY(secondary_startup) mov x23, x0 // x23=current cpu_table cbz x23, __error_p // invalid processor (x23=0)? - pgtbl x25, x26, x24 // x25=TTBR0, x26=TTBR1 + pgtbl x25, x26, x28 // x25=TTBR0, x26=TTBR1 ldr x12, [x23, #CPU_INFO_SETUP] add x12, x12, x28 // __virt_to_phys blr x12 // initialise processor @@ -296,8 +461,13 @@ ENDPROC(__enable_mmu) * x27 = *virtual* address to jump to upon completion * * other registers depend on the function called upon completion + * + * We align the entire function to the smallest power of two larger than it to + * ensure it fits within a single block map entry. Otherwise were PHYS_OFFSET + * close to the end of a 512MB or 1GB block we might require an additional + * table to map the entire function. */ - .align 6 + .align 4 __turn_mmu_on: msr sctlr_el1, x0 isb @@ -340,26 +510,18 @@ ENDPROC(__calc_phys_offset) * Preserves: tbl, flags * Corrupts: phys, start, end, pstate */ - .macro create_block_map, tbl, flags, phys, start, end, idmap=0 + .macro create_block_map, tbl, flags, phys, start, end lsr \phys, \phys, #BLOCK_SHIFT - .if \idmap - and \start, \phys, #PTRS_PER_PTE - 1 // table index - .else lsr \start, \start, #BLOCK_SHIFT and \start, \start, #PTRS_PER_PTE - 1 // table index - .endif orr \phys, \flags, \phys, lsl #BLOCK_SHIFT // table entry - .ifnc \start,\end lsr \end, \end, #BLOCK_SHIFT and \end, \end, #PTRS_PER_PTE - 1 // table end index - .endif 9999: str \phys, [\tbl, \start, lsl #3] // store the entry - .ifnc \start,\end add \start, \start, #1 // next entry add \phys, \phys, #BLOCK_SIZE // next block cmp \start, \end b.ls 9999b - .endif .endm /* @@ -368,10 +530,19 @@ ENDPROC(__calc_phys_offset) * - identity mapping to enable the MMU (low address, TTBR0) * - first few MB of the kernel linear mapping to jump to once the MMU has * been enabled, including the FDT blob (TTBR1) - * - UART mapping if CONFIG_EARLY_PRINTK is enabled (TTBR1) + * - pgd entry for fixed mappings (TTBR1) */ __create_page_tables: - pgtbl x25, x26, x24 // idmap_pg_dir and swapper_pg_dir addresses + pgtbl x25, x26, x28 // idmap_pg_dir and swapper_pg_dir addresses + mov x27, lr + + /* + * Invalidate the idmap and swapper page tables to avoid potential + * dirty cache lines being evicted. + */ + mov x0, x25 + add x1, x26, #SWAPPER_DIR_SIZE + bl __inval_cache_range /* * Clear the idmap and swapper page tables. @@ -391,9 +562,13 @@ __create_page_tables: * Create the identity mapping. */ add x0, x25, #PAGE_SIZE // section table address - adr x3, __turn_mmu_on // virtual/physical address + ldr x3, =KERNEL_START + add x3, x3, x28 // __pa(KERNEL_START) create_pgd_entry x25, x0, x3, x5, x6 - create_block_map x0, x7, x3, x5, x5, idmap=1 + ldr x6, =KERNEL_END + mov x5, x3 // __pa(KERNEL_START) + add x6, x6, x28 // __pa(KERNEL_END) + create_block_map x0, x7, x3, x5, x6 /* * Map the kernel image (starting with PHYS_OFFSET). @@ -401,7 +576,7 @@ __create_page_tables: add x0, x26, #PAGE_SIZE // section table address mov x5, #PAGE_OFFSET create_pgd_entry x26, x0, x5, x3, x6 - ldr x6, =KERNEL_END - 1 + ldr x6, =KERNEL_END mov x3, x24 // phys offset create_block_map x0, x7, x3, x5, x6 @@ -421,15 +596,16 @@ __create_page_tables: sub x6, x6, #1 // inclusive range create_block_map x0, x7, x3, x5, x6 1: -#ifdef CONFIG_EARLY_PRINTK /* - * Create the pgd entry for the UART mapping. The full mapping is done - * later based earlyprintk kernel parameter. + * Since the page tables have been populated with non-cacheable + * accesses (MMU disabled), invalidate the idmap and swapper page + * tables again to remove any speculatively loaded cache lines. */ - ldr x5, =EARLYCON_IOBASE // UART virtual address - add x0, x26, #2 * PAGE_SIZE // section table address - create_pgd_entry x26, x0, x5, x6, x7 -#endif + mov x0, x25 + add x1, x26, #SWAPPER_DIR_SIZE + bl __inval_cache_range + + mov lr, x27 ret ENDPROC(__create_page_tables) .ltorg @@ -438,10 +614,8 @@ ENDPROC(__create_page_tables) .type __switch_data, %object __switch_data: .quad __mmap_switched - .quad __data_loc // x4 - .quad _data // x5 .quad __bss_start // x6 - .quad _end // x7 + .quad __bss_stop // x7 .quad processor_id // x4 .quad __fdt_pointer // x5 .quad memstart_addr // x6 @@ -454,15 +628,7 @@ __switch_data: __mmap_switched: adr x3, __switch_data + 8 - ldp x4, x5, [x3], #16 ldp x6, x7, [x3], #16 - cmp x4, x5 // Copy data segment if needed -1: ccmp x5, x6, #4, ne - b.eq 2f - ldr x16, [x4], #8 - str x16, [x5], #8 - b 1b -2: 1: cmp x6, x7 b.hs 2f str xzr, [x6], #8 // Clear BSS |