diff options
Diffstat (limited to 'arch')
83 files changed, 7092 insertions, 897 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 89c4b5ccc68d..3a350d35901c 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -296,6 +296,33 @@ config MMU Select if you want MMU-based virtualised addressing space support by paged memory management. If unsure, say 'Y'. +config ARCH_MMAP_RND_BITS_MIN + depends on MMU + int + default 8 + +config ARCH_MMAP_RND_BITS_MAX + depends on MMU + int + default 14 if MMU && PAGE_OFFSET=0x40000000 + default 15 if MMU && PAGE_OFFSET=0x80000000 + default 16 if MMU + default 8 + +config ARCH_MMAP_RND_BITS + depends on MMU + int "Number of bits to use for ASLR of mmap base address" if EXPERT + range ARCH_MMAP_RND_BITS_MIN ARCH_MMAP_RND_BITS_MAX + default ARCH_MMAP_RND_BITS_MIN + help + This value can be used to select the number of bits to use to + determine the random offset to the base address of vma regions + resulting from mmap allocations. This value will be bounded + by the architecture's minimum and maximum supported values. + + This value can be changed after boot using the + /proc/sys/kernel/mmap_rnd_bits tunable + # # The "ARM system type" choice list is ordered alphabetically by option # text. Please add new entries in the option alphabetic order. @@ -1790,6 +1817,15 @@ config XEN help Say Y if you want to run Linux in a Virtual Machine on Xen on ARM. +config ARM_FLUSH_CONSOLE_ON_RESTART + bool "Force flush the console on restart" + help + If the console is locked while the system is rebooted, the messages + in the temporary logbuffer would not have propogated to all the + console drivers. This option forces the console lock to be + released if it failed to be acquired, which will cause all the + pending messages to be flushed. + endmenu menu "Boot options" @@ -1820,6 +1856,21 @@ config DEPRECATED_PARAM_STRUCT This was deprecated in 2001 and announced to live on for 5 years. Some old boot loaders still use this way. +config BUILD_ARM_APPENDED_DTB_IMAGE + bool "Build a concatenated zImage/dtb by default" + depends on OF + help + Enabling this option will cause a concatenated zImage and list of + DTBs to be built by default (instead of a standalone zImage.) + The image will built in arch/arm/boot/zImage-dtb + +config BUILD_ARM_APPENDED_DTB_IMAGE_NAMES + string "Default dtb names" + depends on BUILD_ARM_APPENDED_DTB_IMAGE + help + Space separated list of names of dtbs to append when + building a concatenated zImage-dtb. + # Compressed boot loader in ROM. Yes, we really want to ask about # TEXT and BSS so we preserve their values in the config files. config ZBOOT_ROM_TEXT diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index 21c031fe76d8..02a0527fd10d 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -1297,6 +1297,14 @@ config EARLY_PRINTK kernel low-level debugging functions. Add earlyprintk to your kernel parameters to enable this console. +config EARLY_PRINTK_DIRECT + bool "Early printk direct" + depends on DEBUG_LL + help + Say Y here if you want to have an early console using the + kernel low-level debugging functions and EARLY_PRINTK is + not early enough. + config OC_ETM bool "On-chip ETM and ETB" depends on ARM_AMBA && !CORESIGHT diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 93a30a285ad2..6898ac11993e 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -286,6 +286,8 @@ libs-y := arch/arm/lib/ $(libs-y) # Default target when executing plain make ifeq ($(CONFIG_XIP_KERNEL),y) KBUILD_IMAGE := xipImage +else ifeq ($(CONFIG_BUILD_ARM_APPENDED_DTB_IMAGE),y) +KBUILD_IMAGE := zImage-dtb else KBUILD_IMAGE := zImage endif @@ -327,6 +329,9 @@ dtbs: prepare scripts dtbs_install: $(Q)$(MAKE) $(dtbinst)=$(boot)/dts MACHINE=$(MACHINE) +zImage-dtb: vmlinux scripts dtbs + $(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $(boot)/$@ + # We use MRPROPER_FILES and CLEAN_FILES now archclean: $(Q)$(MAKE) $(clean)=$(boot) diff --git a/arch/arm/boot/.gitignore b/arch/arm/boot/.gitignore index 3c79f85975aa..ad7a0253ea96 100644 --- a/arch/arm/boot/.gitignore +++ b/arch/arm/boot/.gitignore @@ -4,3 +4,4 @@ xipImage bootpImage uImage *.dtb +zImage-dtb
\ No newline at end of file diff --git a/arch/arm/boot/Makefile b/arch/arm/boot/Makefile index ec2f8065f955..d09b00ad5fb4 100644 --- a/arch/arm/boot/Makefile +++ b/arch/arm/boot/Makefile @@ -14,6 +14,7 @@ ifneq ($(MACHINE),) include $(srctree)/$(MACHINE)/Makefile.boot endif +include $(srctree)/arch/arm/boot/dts/Makefile # Note: the following conditions must always be true: # ZRELADDR == virt_to_phys(PAGE_OFFSET + TEXT_OFFSET) @@ -27,6 +28,14 @@ export ZRELADDR INITRD_PHYS PARAMS_PHYS targets := Image zImage xipImage bootpImage uImage +DTB_NAMES := $(subst $\",,$(CONFIG_BUILD_ARM_APPENDED_DTB_IMAGE_NAMES)) +ifneq ($(DTB_NAMES),) +DTB_LIST := $(addsuffix .dtb,$(DTB_NAMES)) +else +DTB_LIST := $(dtb-y) +endif +DTB_OBJS := $(addprefix $(obj)/dts/,$(DTB_LIST)) + ifeq ($(CONFIG_XIP_KERNEL),y) $(obj)/xipImage: vmlinux FORCE @@ -55,6 +64,10 @@ $(obj)/zImage: $(obj)/compressed/vmlinux FORCE $(call if_changed,objcopy) @$(kecho) ' Kernel: $@ is ready' +$(obj)/zImage-dtb: $(obj)/zImage $(DTB_OBJS) FORCE + $(call if_changed,cat) + @echo ' Kernel: $@ is ready' + endif ifneq ($(LOADADDR),) diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 68be9017593d..3f25fac7a84c 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -730,6 +730,8 @@ __armv7_mmu_cache_on: bic r6, r6, #1 << 31 @ 32-bit translation system bic r6, r6, #3 << 0 @ use only ttbr0 mcrne p15, 0, r3, c2, c0, 0 @ load page table pointer + mcrne p15, 0, r0, c8, c7, 0 @ flush I,D TLBs + mcr p15, 0, r0, c7, c5, 4 @ ISB mcrne p15, 0, r1, c3, c0, 0 @ load domain access control mcrne p15, 0, r6, c2, c0, 2 @ load ttb control #endif diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index 6e784fac5798..7fdb71cbca5c 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -517,7 +517,14 @@ dtb-$(CONFIG_MACH_DOVE) += dove-cm-a510.dtb \ dove-dove-db.dtb dtb-$(CONFIG_ARCH_MEDIATEK) += mt6589-aquaris5.dtb +DTB_NAMES := $(subst $\",,$(CONFIG_BUILD_ARM_APPENDED_DTB_IMAGE_NAMES)) +ifneq ($(DTB_NAMES),) +DTB_LIST := $(addsuffix .dtb,$(DTB_NAMES)) +else +DTB_LIST := $(dtb-y) endif -always := $(dtb-y) +endif + +always := $(DTB_LIST) clean-files := *.dtb diff --git a/arch/arm/common/Kconfig b/arch/arm/common/Kconfig index c3a4e9ceba34..f4a38a76db64 100644 --- a/arch/arm/common/Kconfig +++ b/arch/arm/common/Kconfig @@ -20,3 +20,7 @@ config SHARP_SCOOP config TI_PRIV_EDMA bool + +config FIQ_GLUE + bool + select FIQ diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile index 70b1eff477b3..5748afda0681 100644 --- a/arch/arm/common/Makefile +++ b/arch/arm/common/Makefile @@ -4,6 +4,7 @@ obj-y += firmware.o +obj-$(CONFIG_FIQ_GLUE) += fiq_glue.o fiq_glue_setup.o obj-$(CONFIG_ICST) += icst.o obj-$(CONFIG_SA1111) += sa1111.o obj-$(CONFIG_DMABOUNCE) += dmabounce.o diff --git a/arch/arm/common/fiq_glue.S b/arch/arm/common/fiq_glue.S new file mode 100644 index 000000000000..24b42cec4813 --- /dev/null +++ b/arch/arm/common/fiq_glue.S @@ -0,0 +1,118 @@ +/* + * Copyright (C) 2008 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + + .global fiq_glue_end + + /* fiq stack: r0-r15,cpsr,spsr of interrupted mode */ + +ENTRY(fiq_glue) + /* store pc, cpsr from previous mode, reserve space for spsr */ + mrs r12, spsr + sub lr, lr, #4 + subs r10, #1 + bne nested_fiq + + str r12, [sp, #-8]! + str lr, [sp, #-4]! + + /* store r8-r14 from previous mode */ + sub sp, sp, #(7 * 4) + stmia sp, {r8-r14}^ + nop + + /* store r0-r7 from previous mode */ + stmfd sp!, {r0-r7} + + /* setup func(data,regs) arguments */ + mov r0, r9 + mov r1, sp + mov r3, r8 + + mov r7, sp + + /* Get sp and lr from non-user modes */ + and r4, r12, #MODE_MASK + cmp r4, #USR_MODE + beq fiq_from_usr_mode + + mov r7, sp + orr r4, r4, #(PSR_I_BIT | PSR_F_BIT) + msr cpsr_c, r4 + str sp, [r7, #(4 * 13)] + str lr, [r7, #(4 * 14)] + mrs r5, spsr + str r5, [r7, #(4 * 17)] + + cmp r4, #(SVC_MODE | PSR_I_BIT | PSR_F_BIT) + /* use fiq stack if we reenter this mode */ + subne sp, r7, #(4 * 3) + +fiq_from_usr_mode: + msr cpsr_c, #(SVC_MODE | PSR_I_BIT | PSR_F_BIT) + mov r2, sp + sub sp, r7, #12 + stmfd sp!, {r2, ip, lr} + /* call func(data,regs) */ + blx r3 + ldmfd sp, {r2, ip, lr} + mov sp, r2 + + /* restore/discard saved state */ + cmp r4, #USR_MODE + beq fiq_from_usr_mode_exit + + msr cpsr_c, r4 + ldr sp, [r7, #(4 * 13)] + ldr lr, [r7, #(4 * 14)] + msr spsr_cxsf, r5 + +fiq_from_usr_mode_exit: + msr cpsr_c, #(FIQ_MODE | PSR_I_BIT | PSR_F_BIT) + + ldmfd sp!, {r0-r7} + ldr lr, [sp, #(4 * 7)] + ldr r12, [sp, #(4 * 8)] + add sp, sp, #(10 * 4) +exit_fiq: + msr spsr_cxsf, r12 + add r10, #1 + cmp r11, #0 + moveqs pc, lr + bx r11 /* jump to custom fiq return function */ + +nested_fiq: + orr r12, r12, #(PSR_F_BIT) + b exit_fiq + +fiq_glue_end: + +ENTRY(fiq_glue_setup) /* func, data, sp, smc call number */ + stmfd sp!, {r4} + mrs r4, cpsr + msr cpsr_c, #(FIQ_MODE | PSR_I_BIT | PSR_F_BIT) + movs r8, r0 + mov r9, r1 + mov sp, r2 + mov r11, r3 + moveq r10, #0 + movne r10, #1 + msr cpsr_c, r4 + ldmfd sp!, {r4} + bx lr + diff --git a/arch/arm/common/fiq_glue_setup.c b/arch/arm/common/fiq_glue_setup.c new file mode 100644 index 000000000000..8cb1b611c6d5 --- /dev/null +++ b/arch/arm/common/fiq_glue_setup.c @@ -0,0 +1,147 @@ +/* + * Copyright (C) 2010 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/kernel.h> +#include <linux/percpu.h> +#include <linux/slab.h> +#include <asm/fiq.h> +#include <asm/fiq_glue.h> + +extern unsigned char fiq_glue, fiq_glue_end; +extern void fiq_glue_setup(void *func, void *data, void *sp, + fiq_return_handler_t fiq_return_handler); + +static struct fiq_handler fiq_debbuger_fiq_handler = { + .name = "fiq_glue", +}; +DEFINE_PER_CPU(void *, fiq_stack); +static struct fiq_glue_handler *current_handler; +static fiq_return_handler_t fiq_return_handler; +static DEFINE_MUTEX(fiq_glue_lock); + +static void fiq_glue_setup_helper(void *info) +{ + struct fiq_glue_handler *handler = info; + fiq_glue_setup(handler->fiq, handler, + __get_cpu_var(fiq_stack) + THREAD_START_SP, + fiq_return_handler); +} + +int fiq_glue_register_handler(struct fiq_glue_handler *handler) +{ + int ret; + int cpu; + + if (!handler || !handler->fiq) + return -EINVAL; + + mutex_lock(&fiq_glue_lock); + if (fiq_stack) { + ret = -EBUSY; + goto err_busy; + } + + for_each_possible_cpu(cpu) { + void *stack; + stack = (void *)__get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER); + if (WARN_ON(!stack)) { + ret = -ENOMEM; + goto err_alloc_fiq_stack; + } + per_cpu(fiq_stack, cpu) = stack; + } + + ret = claim_fiq(&fiq_debbuger_fiq_handler); + if (WARN_ON(ret)) + goto err_claim_fiq; + + current_handler = handler; + on_each_cpu(fiq_glue_setup_helper, handler, true); + set_fiq_handler(&fiq_glue, &fiq_glue_end - &fiq_glue); + + mutex_unlock(&fiq_glue_lock); + return 0; + +err_claim_fiq: +err_alloc_fiq_stack: + for_each_possible_cpu(cpu) { + __free_pages(per_cpu(fiq_stack, cpu), THREAD_SIZE_ORDER); + per_cpu(fiq_stack, cpu) = NULL; + } +err_busy: + mutex_unlock(&fiq_glue_lock); + return ret; +} + +static void fiq_glue_update_return_handler(void (*fiq_return)(void)) +{ + fiq_return_handler = fiq_return; + if (current_handler) + on_each_cpu(fiq_glue_setup_helper, current_handler, true); +} + +int fiq_glue_set_return_handler(void (*fiq_return)(void)) +{ + int ret; + + mutex_lock(&fiq_glue_lock); + if (fiq_return_handler) { + ret = -EBUSY; + goto err_busy; + } + fiq_glue_update_return_handler(fiq_return); + ret = 0; +err_busy: + mutex_unlock(&fiq_glue_lock); + + return ret; +} +EXPORT_SYMBOL(fiq_glue_set_return_handler); + +int fiq_glue_clear_return_handler(void (*fiq_return)(void)) +{ + int ret; + + mutex_lock(&fiq_glue_lock); + if (WARN_ON(fiq_return_handler != fiq_return)) { + ret = -EINVAL; + goto err_inval; + } + fiq_glue_update_return_handler(NULL); + ret = 0; +err_inval: + mutex_unlock(&fiq_glue_lock); + + return ret; +} +EXPORT_SYMBOL(fiq_glue_clear_return_handler); + +/** + * fiq_glue_resume - Restore fiqs after suspend or low power idle states + * + * This must be called before calling local_fiq_enable after returning from a + * power state where the fiq mode registers were lost. If a driver provided + * a resume hook when it registered the handler it will be called. + */ + +void fiq_glue_resume(void) +{ + if (!current_handler) + return; + fiq_glue_setup(current_handler->fiq, current_handler, + __get_cpu_var(fiq_stack) + THREAD_START_SP, + fiq_return_handler); + if (current_handler->resume) + current_handler->resume(current_handler); +} + diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index b48fa341648d..2cee53b27238 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -6,12 +6,15 @@ obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o +obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o aes-arm-y := aes-armv4.o aes_glue.o aes-arm-bs-y := aesbs-core.o aesbs-glue.o sha1-arm-y := sha1-armv4-large.o sha1_glue.o sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o +sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o +sha256-arm-y := sha256-core.o sha256_glue.o $(sha256-arm-neon-y) sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o quiet_cmd_perl = PERL $@ @@ -20,4 +23,7 @@ quiet_cmd_perl = PERL $@ $(src)/aesbs-core.S_shipped: $(src)/bsaes-armv7.pl $(call cmd,perl) -.PRECIOUS: $(obj)/aesbs-core.S +$(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl + $(call cmd,perl) + +.PRECIOUS: $(obj)/aesbs-core.S $(obj)/sha256-core.S diff --git a/arch/arm/crypto/sha256-armv4.pl b/arch/arm/crypto/sha256-armv4.pl new file mode 100644 index 000000000000..fac0533ea633 --- /dev/null +++ b/arch/arm/crypto/sha256-armv4.pl @@ -0,0 +1,716 @@ +#!/usr/bin/env perl + +# ==================================================================== +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# +# Permission to use under GPL terms is granted. +# ==================================================================== + +# SHA256 block procedure for ARMv4. May 2007. + +# Performance is ~2x better than gcc 3.4 generated code and in "abso- +# lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per +# byte [on single-issue Xscale PXA250 core]. + +# July 2010. +# +# Rescheduling for dual-issue pipeline resulted in 22% improvement on +# Cortex A8 core and ~20 cycles per processed byte. + +# February 2011. +# +# Profiler-assisted and platform-specific optimization resulted in 16% +# improvement on Cortex A8 core and ~15.4 cycles per processed byte. + +# September 2013. +# +# Add NEON implementation. On Cortex A8 it was measured to process one +# byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon +# S4 does it in 12.5 cycles too, but it's 50% faster than integer-only +# code (meaning that latter performs sub-optimally, nothing was done +# about it). + +# May 2014. +# +# Add ARMv8 code path performing at 2.0 cpb on Apple A7. + +while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} +open STDOUT,">$output"; + +$ctx="r0"; $t0="r0"; +$inp="r1"; $t4="r1"; +$len="r2"; $t1="r2"; +$T1="r3"; $t3="r3"; +$A="r4"; +$B="r5"; +$C="r6"; +$D="r7"; +$E="r8"; +$F="r9"; +$G="r10"; +$H="r11"; +@V=($A,$B,$C,$D,$E,$F,$G,$H); +$t2="r12"; +$Ktbl="r14"; + +@Sigma0=( 2,13,22); +@Sigma1=( 6,11,25); +@sigma0=( 7,18, 3); +@sigma1=(17,19,10); + +sub BODY_00_15 { +my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; + +$code.=<<___ if ($i<16); +#if __ARM_ARCH__>=7 + @ ldr $t1,[$inp],#4 @ $i +# if $i==15 + str $inp,[sp,#17*4] @ make room for $t4 +# endif + eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` + add $a,$a,$t2 @ h+=Maj(a,b,c) from the past + eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e) +# ifndef __ARMEB__ + rev $t1,$t1 +# endif +#else + @ ldrb $t1,[$inp,#3] @ $i + add $a,$a,$t2 @ h+=Maj(a,b,c) from the past + ldrb $t2,[$inp,#2] + ldrb $t0,[$inp,#1] + orr $t1,$t1,$t2,lsl#8 + ldrb $t2,[$inp],#4 + orr $t1,$t1,$t0,lsl#16 +# if $i==15 + str $inp,[sp,#17*4] @ make room for $t4 +# endif + eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` + orr $t1,$t1,$t2,lsl#24 + eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e) +#endif +___ +$code.=<<___; + ldr $t2,[$Ktbl],#4 @ *K256++ + add $h,$h,$t1 @ h+=X[i] + str $t1,[sp,#`$i%16`*4] + eor $t1,$f,$g + add $h,$h,$t0,ror#$Sigma1[0] @ h+=Sigma1(e) + and $t1,$t1,$e + add $h,$h,$t2 @ h+=K256[i] + eor $t1,$t1,$g @ Ch(e,f,g) + eor $t0,$a,$a,ror#`$Sigma0[1]-$Sigma0[0]` + add $h,$h,$t1 @ h+=Ch(e,f,g) +#if $i==31 + and $t2,$t2,#0xff + cmp $t2,#0xf2 @ done? +#endif +#if $i<15 +# if __ARM_ARCH__>=7 + ldr $t1,[$inp],#4 @ prefetch +# else + ldrb $t1,[$inp,#3] +# endif + eor $t2,$a,$b @ a^b, b^c in next round +#else + ldr $t1,[sp,#`($i+2)%16`*4] @ from future BODY_16_xx + eor $t2,$a,$b @ a^b, b^c in next round + ldr $t4,[sp,#`($i+15)%16`*4] @ from future BODY_16_xx +#endif + eor $t0,$t0,$a,ror#`$Sigma0[2]-$Sigma0[0]` @ Sigma0(a) + and $t3,$t3,$t2 @ (b^c)&=(a^b) + add $d,$d,$h @ d+=h + eor $t3,$t3,$b @ Maj(a,b,c) + add $h,$h,$t0,ror#$Sigma0[0] @ h+=Sigma0(a) + @ add $h,$h,$t3 @ h+=Maj(a,b,c) +___ + ($t2,$t3)=($t3,$t2); +} + +sub BODY_16_XX { +my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; + +$code.=<<___; + @ ldr $t1,[sp,#`($i+1)%16`*4] @ $i + @ ldr $t4,[sp,#`($i+14)%16`*4] + mov $t0,$t1,ror#$sigma0[0] + add $a,$a,$t2 @ h+=Maj(a,b,c) from the past + mov $t2,$t4,ror#$sigma1[0] + eor $t0,$t0,$t1,ror#$sigma0[1] + eor $t2,$t2,$t4,ror#$sigma1[1] + eor $t0,$t0,$t1,lsr#$sigma0[2] @ sigma0(X[i+1]) + ldr $t1,[sp,#`($i+0)%16`*4] + eor $t2,$t2,$t4,lsr#$sigma1[2] @ sigma1(X[i+14]) + ldr $t4,[sp,#`($i+9)%16`*4] + + add $t2,$t2,$t0 + eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` @ from BODY_00_15 + add $t1,$t1,$t2 + eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e) + add $t1,$t1,$t4 @ X[i] +___ + &BODY_00_15(@_); +} + +$code=<<___; +#ifndef __KERNEL__ +# include "arm_arch.h" +#else +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +# define __ARM_MAX_ARCH__ 7 +#endif + +.text +#if __ARM_ARCH__<7 +.code 32 +#else +.syntax unified +# ifdef __thumb2__ +# define adrl adr +.thumb +# else +.code 32 +# endif +#endif + +.type K256,%object +.align 5 +K256: +.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.size K256,.-K256 +.word 0 @ terminator +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) +.LOPENSSL_armcap: +.word OPENSSL_armcap_P-sha256_block_data_order +#endif +.align 5 + +.global sha256_block_data_order +.type sha256_block_data_order,%function +sha256_block_data_order: +#if __ARM_ARCH__<7 + sub r3,pc,#8 @ sha256_block_data_order +#else + adr r3,sha256_block_data_order +#endif +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) + ldr r12,.LOPENSSL_armcap + ldr r12,[r3,r12] @ OPENSSL_armcap_P + tst r12,#ARMV8_SHA256 + bne .LARMv8 + tst r12,#ARMV7_NEON + bne .LNEON +#endif + add $len,$inp,$len,lsl#6 @ len to point at the end of inp + stmdb sp!,{$ctx,$inp,$len,r4-r11,lr} + ldmia $ctx,{$A,$B,$C,$D,$E,$F,$G,$H} + sub $Ktbl,r3,#256+32 @ K256 + sub sp,sp,#16*4 @ alloca(X[16]) +.Loop: +# if __ARM_ARCH__>=7 + ldr $t1,[$inp],#4 +# else + ldrb $t1,[$inp,#3] +# endif + eor $t3,$B,$C @ magic + eor $t2,$t2,$t2 +___ +for($i=0;$i<16;$i++) { &BODY_00_15($i,@V); unshift(@V,pop(@V)); } +$code.=".Lrounds_16_xx:\n"; +for (;$i<32;$i++) { &BODY_16_XX($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; +#if __ARM_ARCH__>=7 + ite eq @ Thumb2 thing, sanity check in ARM +#endif + ldreq $t3,[sp,#16*4] @ pull ctx + bne .Lrounds_16_xx + + add $A,$A,$t2 @ h+=Maj(a,b,c) from the past + ldr $t0,[$t3,#0] + ldr $t1,[$t3,#4] + ldr $t2,[$t3,#8] + add $A,$A,$t0 + ldr $t0,[$t3,#12] + add $B,$B,$t1 + ldr $t1,[$t3,#16] + add $C,$C,$t2 + ldr $t2,[$t3,#20] + add $D,$D,$t0 + ldr $t0,[$t3,#24] + add $E,$E,$t1 + ldr $t1,[$t3,#28] + add $F,$F,$t2 + ldr $inp,[sp,#17*4] @ pull inp + ldr $t2,[sp,#18*4] @ pull inp+len + add $G,$G,$t0 + add $H,$H,$t1 + stmia $t3,{$A,$B,$C,$D,$E,$F,$G,$H} + cmp $inp,$t2 + sub $Ktbl,$Ktbl,#256 @ rewind Ktbl + bne .Loop + + add sp,sp,#`16+3`*4 @ destroy frame +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r11,pc} +#else + ldmia sp!,{r4-r11,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + bx lr @ interoperable with Thumb ISA:-) +#endif +.size sha256_block_data_order,.-sha256_block_data_order +___ +###################################################################### +# NEON stuff +# +{{{ +my @X=map("q$_",(0..3)); +my ($T0,$T1,$T2,$T3,$T4,$T5)=("q8","q9","q10","q11","d24","d25"); +my $Xfer=$t4; +my $j=0; + +sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; } +sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; } + +sub AUTOLOAD() # thunk [simplified] x86-style perlasm +{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./; + my $arg = pop; + $arg = "#$arg" if ($arg*1 eq $arg); + $code .= "\t$opcode\t".join(',',@_,$arg)."\n"; +} + +sub Xupdate() +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); + my ($a,$b,$c,$d,$e,$f,$g,$h); + + &vext_8 ($T0,@X[0],@X[1],4); # X[1..4] + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vext_8 ($T1,@X[2],@X[3],4); # X[9..12] + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T2,$T0,$sigma0[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += X[9..12] + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T1,$T0,$sigma0[2]); + eval(shift(@insns)); + eval(shift(@insns)); + &vsli_32 ($T2,$T0,32-$sigma0[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T3,$T0,$sigma0[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &veor ($T1,$T1,$T2); + eval(shift(@insns)); + eval(shift(@insns)); + &vsli_32 ($T3,$T0,32-$sigma0[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &veor ($T1,$T1,$T3); # sigma0(X[1..4]) + eval(shift(@insns)); + eval(shift(@insns)); + &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T5,&Dhi(@X[3]),$sigma1[2]); + eval(shift(@insns)); + eval(shift(@insns)); + &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += sigma0(X[1..4]) + eval(shift(@insns)); + eval(shift(@insns)); + &veor ($T5,$T5,$T4); + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &veor ($T5,$T5,$T4); # sigma1(X[14..15]) + eval(shift(@insns)); + eval(shift(@insns)); + &vadd_i32 (&Dlo(@X[0]),&Dlo(@X[0]),$T5);# X[0..1] += sigma1(X[14..15]) + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T5,&Dlo(@X[0]),$sigma1[2]); + eval(shift(@insns)); + eval(shift(@insns)); + &veor ($T5,$T5,$T4); + eval(shift(@insns)); + eval(shift(@insns)); + &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &vld1_32 ("{$T0}","[$Ktbl,:128]!"); + eval(shift(@insns)); + eval(shift(@insns)); + &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &veor ($T5,$T5,$T4); # sigma1(X[16..17]) + eval(shift(@insns)); + eval(shift(@insns)); + &vadd_i32 (&Dhi(@X[0]),&Dhi(@X[0]),$T5);# X[2..3] += sigma1(X[16..17]) + eval(shift(@insns)); + eval(shift(@insns)); + &vadd_i32 ($T0,$T0,@X[0]); + while($#insns>=2) { eval(shift(@insns)); } + &vst1_32 ("{$T0}","[$Xfer,:128]!"); + eval(shift(@insns)); + eval(shift(@insns)); + + push(@X,shift(@X)); # "rotate" X[] +} + +sub Xpreload() +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); + my ($a,$b,$c,$d,$e,$f,$g,$h); + + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vld1_32 ("{$T0}","[$Ktbl,:128]!"); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vrev32_8 (@X[0],@X[0]); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &vadd_i32 ($T0,$T0,@X[0]); + foreach (@insns) { eval; } # remaining instructions + &vst1_32 ("{$T0}","[$Xfer,:128]!"); + + push(@X,shift(@X)); # "rotate" X[] +} + +sub body_00_15 () { + ( + '($a,$b,$c,$d,$e,$f,$g,$h)=@V;'. + '&add ($h,$h,$t1)', # h+=X[i]+K[i] + '&eor ($t1,$f,$g)', + '&eor ($t0,$e,$e,"ror#".($Sigma1[1]-$Sigma1[0]))', + '&add ($a,$a,$t2)', # h+=Maj(a,b,c) from the past + '&and ($t1,$t1,$e)', + '&eor ($t2,$t0,$e,"ror#".($Sigma1[2]-$Sigma1[0]))', # Sigma1(e) + '&eor ($t0,$a,$a,"ror#".($Sigma0[1]-$Sigma0[0]))', + '&eor ($t1,$t1,$g)', # Ch(e,f,g) + '&add ($h,$h,$t2,"ror#$Sigma1[0]")', # h+=Sigma1(e) + '&eor ($t2,$a,$b)', # a^b, b^c in next round + '&eor ($t0,$t0,$a,"ror#".($Sigma0[2]-$Sigma0[0]))', # Sigma0(a) + '&add ($h,$h,$t1)', # h+=Ch(e,f,g) + '&ldr ($t1,sprintf "[sp,#%d]",4*(($j+1)&15)) if (($j&15)!=15);'. + '&ldr ($t1,"[$Ktbl]") if ($j==15);'. + '&ldr ($t1,"[sp,#64]") if ($j==31)', + '&and ($t3,$t3,$t2)', # (b^c)&=(a^b) + '&add ($d,$d,$h)', # d+=h + '&add ($h,$h,$t0,"ror#$Sigma0[0]");'. # h+=Sigma0(a) + '&eor ($t3,$t3,$b)', # Maj(a,b,c) + '$j++; unshift(@V,pop(@V)); ($t2,$t3)=($t3,$t2);' + ) +} + +$code.=<<___; +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + +.global sha256_block_data_order_neon +.type sha256_block_data_order_neon,%function +.align 4 +sha256_block_data_order_neon: +.LNEON: + stmdb sp!,{r4-r12,lr} + + sub $H,sp,#16*4+16 + adrl $Ktbl,K256 + bic $H,$H,#15 @ align for 128-bit stores + mov $t2,sp + mov sp,$H @ alloca + add $len,$inp,$len,lsl#6 @ len to point at the end of inp + + vld1.8 {@X[0]},[$inp]! + vld1.8 {@X[1]},[$inp]! + vld1.8 {@X[2]},[$inp]! + vld1.8 {@X[3]},[$inp]! + vld1.32 {$T0},[$Ktbl,:128]! + vld1.32 {$T1},[$Ktbl,:128]! + vld1.32 {$T2},[$Ktbl,:128]! + vld1.32 {$T3},[$Ktbl,:128]! + vrev32.8 @X[0],@X[0] @ yes, even on + str $ctx,[sp,#64] + vrev32.8 @X[1],@X[1] @ big-endian + str $inp,[sp,#68] + mov $Xfer,sp + vrev32.8 @X[2],@X[2] + str $len,[sp,#72] + vrev32.8 @X[3],@X[3] + str $t2,[sp,#76] @ save original sp + vadd.i32 $T0,$T0,@X[0] + vadd.i32 $T1,$T1,@X[1] + vst1.32 {$T0},[$Xfer,:128]! + vadd.i32 $T2,$T2,@X[2] + vst1.32 {$T1},[$Xfer,:128]! + vadd.i32 $T3,$T3,@X[3] + vst1.32 {$T2},[$Xfer,:128]! + vst1.32 {$T3},[$Xfer,:128]! + + ldmia $ctx,{$A-$H} + sub $Xfer,$Xfer,#64 + ldr $t1,[sp,#0] + eor $t2,$t2,$t2 + eor $t3,$B,$C + b .L_00_48 + +.align 4 +.L_00_48: +___ + &Xupdate(\&body_00_15); + &Xupdate(\&body_00_15); + &Xupdate(\&body_00_15); + &Xupdate(\&body_00_15); +$code.=<<___; + teq $t1,#0 @ check for K256 terminator + ldr $t1,[sp,#0] + sub $Xfer,$Xfer,#64 + bne .L_00_48 + + ldr $inp,[sp,#68] + ldr $t0,[sp,#72] + sub $Ktbl,$Ktbl,#256 @ rewind $Ktbl + teq $inp,$t0 + it eq + subeq $inp,$inp,#64 @ avoid SEGV + vld1.8 {@X[0]},[$inp]! @ load next input block + vld1.8 {@X[1]},[$inp]! + vld1.8 {@X[2]},[$inp]! + vld1.8 {@X[3]},[$inp]! + it ne + strne $inp,[sp,#68] + mov $Xfer,sp +___ + &Xpreload(\&body_00_15); + &Xpreload(\&body_00_15); + &Xpreload(\&body_00_15); + &Xpreload(\&body_00_15); +$code.=<<___; + ldr $t0,[$t1,#0] + add $A,$A,$t2 @ h+=Maj(a,b,c) from the past + ldr $t2,[$t1,#4] + ldr $t3,[$t1,#8] + ldr $t4,[$t1,#12] + add $A,$A,$t0 @ accumulate + ldr $t0,[$t1,#16] + add $B,$B,$t2 + ldr $t2,[$t1,#20] + add $C,$C,$t3 + ldr $t3,[$t1,#24] + add $D,$D,$t4 + ldr $t4,[$t1,#28] + add $E,$E,$t0 + str $A,[$t1],#4 + add $F,$F,$t2 + str $B,[$t1],#4 + add $G,$G,$t3 + str $C,[$t1],#4 + add $H,$H,$t4 + str $D,[$t1],#4 + stmia $t1,{$E-$H} + + ittte ne + movne $Xfer,sp + ldrne $t1,[sp,#0] + eorne $t2,$t2,$t2 + ldreq sp,[sp,#76] @ restore original sp + itt ne + eorne $t3,$B,$C + bne .L_00_48 + + ldmia sp!,{r4-r12,pc} +.size sha256_block_data_order_neon,.-sha256_block_data_order_neon +#endif +___ +}}} +###################################################################### +# ARMv8 stuff +# +{{{ +my ($ABCD,$EFGH,$abcd)=map("q$_",(0..2)); +my @MSG=map("q$_",(8..11)); +my ($W0,$W1,$ABCD_SAVE,$EFGH_SAVE)=map("q$_",(12..15)); +my $Ktbl="r3"; + +$code.=<<___; +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) + +# ifdef __thumb2__ +# define INST(a,b,c,d) .byte c,d|0xc,a,b +# else +# define INST(a,b,c,d) .byte a,b,c,d +# endif + +.type sha256_block_data_order_armv8,%function +.align 5 +sha256_block_data_order_armv8: +.LARMv8: + vld1.32 {$ABCD,$EFGH},[$ctx] +# ifdef __thumb2__ + adr $Ktbl,.LARMv8 + sub $Ktbl,$Ktbl,#.LARMv8-K256 +# else + adrl $Ktbl,K256 +# endif + add $len,$inp,$len,lsl#6 @ len to point at the end of inp + +.Loop_v8: + vld1.8 {@MSG[0]-@MSG[1]},[$inp]! + vld1.8 {@MSG[2]-@MSG[3]},[$inp]! + vld1.32 {$W0},[$Ktbl]! + vrev32.8 @MSG[0],@MSG[0] + vrev32.8 @MSG[1],@MSG[1] + vrev32.8 @MSG[2],@MSG[2] + vrev32.8 @MSG[3],@MSG[3] + vmov $ABCD_SAVE,$ABCD @ offload + vmov $EFGH_SAVE,$EFGH + teq $inp,$len +___ +for($i=0;$i<12;$i++) { +$code.=<<___; + vld1.32 {$W1},[$Ktbl]! + vadd.i32 $W0,$W0,@MSG[0] + sha256su0 @MSG[0],@MSG[1] + vmov $abcd,$ABCD + sha256h $ABCD,$EFGH,$W0 + sha256h2 $EFGH,$abcd,$W0 + sha256su1 @MSG[0],@MSG[2],@MSG[3] +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); +} +$code.=<<___; + vld1.32 {$W1},[$Ktbl]! + vadd.i32 $W0,$W0,@MSG[0] + vmov $abcd,$ABCD + sha256h $ABCD,$EFGH,$W0 + sha256h2 $EFGH,$abcd,$W0 + + vld1.32 {$W0},[$Ktbl]! + vadd.i32 $W1,$W1,@MSG[1] + vmov $abcd,$ABCD + sha256h $ABCD,$EFGH,$W1 + sha256h2 $EFGH,$abcd,$W1 + + vld1.32 {$W1},[$Ktbl] + vadd.i32 $W0,$W0,@MSG[2] + sub $Ktbl,$Ktbl,#256-16 @ rewind + vmov $abcd,$ABCD + sha256h $ABCD,$EFGH,$W0 + sha256h2 $EFGH,$abcd,$W0 + + vadd.i32 $W1,$W1,@MSG[3] + vmov $abcd,$ABCD + sha256h $ABCD,$EFGH,$W1 + sha256h2 $EFGH,$abcd,$W1 + + vadd.i32 $ABCD,$ABCD,$ABCD_SAVE + vadd.i32 $EFGH,$EFGH,$EFGH_SAVE + it ne + bne .Loop_v8 + + vst1.32 {$ABCD,$EFGH},[$ctx] + + ret @ bx lr +.size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8 +#endif +___ +}}} +$code.=<<___; +.asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>" +.align 2 +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) +.comm OPENSSL_armcap_P,4,4 +#endif +___ + +open SELF,$0; +while(<SELF>) { + next if (/^#!/); + last if (!s/^#/@/ and !/^$/); + print; +} +close SELF; + +{ my %opcode = ( + "sha256h" => 0xf3000c40, "sha256h2" => 0xf3100c40, + "sha256su0" => 0xf3ba03c0, "sha256su1" => 0xf3200c40 ); + + sub unsha256 { + my ($mnemonic,$arg)=@_; + + if ($arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o) { + my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19) + |(($2&7)<<17)|(($2&8)<<4) + |(($3&7)<<1) |(($3&8)<<2); + # since ARMv7 instructions are always encoded little-endian. + # correct solution is to use .inst directive, but older + # assemblers don't implement it:-( + sprintf "INST(0x%02x,0x%02x,0x%02x,0x%02x)\t@ %s %s", + $word&0xff,($word>>8)&0xff, + ($word>>16)&0xff,($word>>24)&0xff, + $mnemonic,$arg; + } + } +} + +foreach (split($/,$code)) { + + s/\`([^\`]*)\`/eval $1/geo; + + s/\b(sha256\w+)\s+(q.*)/unsha256($1,$2)/geo; + + s/\bret\b/bx lr/go or + s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4 + + print $_,"\n"; +} + +close STDOUT; # enforce flush diff --git a/arch/arm/crypto/sha256-core.S_shipped b/arch/arm/crypto/sha256-core.S_shipped new file mode 100644 index 000000000000..555a1a8eec90 --- /dev/null +++ b/arch/arm/crypto/sha256-core.S_shipped @@ -0,0 +1,2808 @@ + +@ ==================================================================== +@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL +@ project. The module is, however, dual licensed under OpenSSL and +@ CRYPTOGAMS licenses depending on where you obtain it. For further +@ details see http://www.openssl.org/~appro/cryptogams/. +@ +@ Permission to use under GPL terms is granted. +@ ==================================================================== + +@ SHA256 block procedure for ARMv4. May 2007. + +@ Performance is ~2x better than gcc 3.4 generated code and in "abso- +@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per +@ byte [on single-issue Xscale PXA250 core]. + +@ July 2010. +@ +@ Rescheduling for dual-issue pipeline resulted in 22% improvement on +@ Cortex A8 core and ~20 cycles per processed byte. + +@ February 2011. +@ +@ Profiler-assisted and platform-specific optimization resulted in 16% +@ improvement on Cortex A8 core and ~15.4 cycles per processed byte. + +@ September 2013. +@ +@ Add NEON implementation. On Cortex A8 it was measured to process one +@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon +@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only +@ code (meaning that latter performs sub-optimally, nothing was done +@ about it). + +@ May 2014. +@ +@ Add ARMv8 code path performing at 2.0 cpb on Apple A7. + +#ifndef __KERNEL__ +# include "arm_arch.h" +#else +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +# define __ARM_MAX_ARCH__ 7 +#endif + +.text +#if __ARM_ARCH__<7 +.code 32 +#else +.syntax unified +# ifdef __thumb2__ +# define adrl adr +.thumb +# else +.code 32 +# endif +#endif + +.type K256,%object +.align 5 +K256: +.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.size K256,.-K256 +.word 0 @ terminator +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) +.LOPENSSL_armcap: +.word OPENSSL_armcap_P-sha256_block_data_order +#endif +.align 5 + +.global sha256_block_data_order +.type sha256_block_data_order,%function +sha256_block_data_order: +#if __ARM_ARCH__<7 + sub r3,pc,#8 @ sha256_block_data_order +#else + adr r3,sha256_block_data_order +#endif +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) + ldr r12,.LOPENSSL_armcap + ldr r12,[r3,r12] @ OPENSSL_armcap_P + tst r12,#ARMV8_SHA256 + bne .LARMv8 + tst r12,#ARMV7_NEON + bne .LNEON +#endif + add r2,r1,r2,lsl#6 @ len to point at the end of inp + stmdb sp!,{r0,r1,r2,r4-r11,lr} + ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11} + sub r14,r3,#256+32 @ K256 + sub sp,sp,#16*4 @ alloca(X[16]) +.Loop: +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ magic + eor r12,r12,r12 +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 0 +# if 0==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r8,r8,ror#5 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r8,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 0 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 0==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r8,r8,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r8,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r11,r11,r2 @ h+=X[i] + str r2,[sp,#0*4] + eor r2,r9,r10 + add r11,r11,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r8 + add r11,r11,r12 @ h+=K256[i] + eor r2,r2,r10 @ Ch(e,f,g) + eor r0,r4,r4,ror#11 + add r11,r11,r2 @ h+=Ch(e,f,g) +#if 0==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 0<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r4,r5 @ a^b, b^c in next round +#else + ldr r2,[sp,#2*4] @ from future BODY_16_xx + eor r12,r4,r5 @ a^b, b^c in next round + ldr r1,[sp,#15*4] @ from future BODY_16_xx +#endif + eor r0,r0,r4,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r7,r7,r11 @ d+=h + eor r3,r3,r5 @ Maj(a,b,c) + add r11,r11,r0,ror#2 @ h+=Sigma0(a) + @ add r11,r11,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 1 +# if 1==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r7,r7,ror#5 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r7,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 1 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 1==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r7,r7,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r7,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r10,r10,r2 @ h+=X[i] + str r2,[sp,#1*4] + eor r2,r8,r9 + add r10,r10,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r7 + add r10,r10,r3 @ h+=K256[i] + eor r2,r2,r9 @ Ch(e,f,g) + eor r0,r11,r11,ror#11 + add r10,r10,r2 @ h+=Ch(e,f,g) +#if 1==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 1<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r11,r4 @ a^b, b^c in next round +#else + ldr r2,[sp,#3*4] @ from future BODY_16_xx + eor r3,r11,r4 @ a^b, b^c in next round + ldr r1,[sp,#0*4] @ from future BODY_16_xx +#endif + eor r0,r0,r11,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r6,r6,r10 @ d+=h + eor r12,r12,r4 @ Maj(a,b,c) + add r10,r10,r0,ror#2 @ h+=Sigma0(a) + @ add r10,r10,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 2 +# if 2==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r6,r6,ror#5 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r6,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 2 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 2==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r6,r6,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r6,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r9,r9,r2 @ h+=X[i] + str r2,[sp,#2*4] + eor r2,r7,r8 + add r9,r9,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r6 + add r9,r9,r12 @ h+=K256[i] + eor r2,r2,r8 @ Ch(e,f,g) + eor r0,r10,r10,ror#11 + add r9,r9,r2 @ h+=Ch(e,f,g) +#if 2==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 2<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r10,r11 @ a^b, b^c in next round +#else + ldr r2,[sp,#4*4] @ from future BODY_16_xx + eor r12,r10,r11 @ a^b, b^c in next round + ldr r1,[sp,#1*4] @ from future BODY_16_xx +#endif + eor r0,r0,r10,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r5,r5,r9 @ d+=h + eor r3,r3,r11 @ Maj(a,b,c) + add r9,r9,r0,ror#2 @ h+=Sigma0(a) + @ add r9,r9,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 3 +# if 3==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r5,r5,ror#5 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r5,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 3 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 3==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r5,r5,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r5,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r8,r8,r2 @ h+=X[i] + str r2,[sp,#3*4] + eor r2,r6,r7 + add r8,r8,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r5 + add r8,r8,r3 @ h+=K256[i] + eor r2,r2,r7 @ Ch(e,f,g) + eor r0,r9,r9,ror#11 + add r8,r8,r2 @ h+=Ch(e,f,g) +#if 3==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 3<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r9,r10 @ a^b, b^c in next round +#else + ldr r2,[sp,#5*4] @ from future BODY_16_xx + eor r3,r9,r10 @ a^b, b^c in next round + ldr r1,[sp,#2*4] @ from future BODY_16_xx +#endif + eor r0,r0,r9,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r4,r4,r8 @ d+=h + eor r12,r12,r10 @ Maj(a,b,c) + add r8,r8,r0,ror#2 @ h+=Sigma0(a) + @ add r8,r8,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 4 +# if 4==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r4,r4,ror#5 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r4,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 4 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 4==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r4,r4,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r4,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r7,r7,r2 @ h+=X[i] + str r2,[sp,#4*4] + eor r2,r5,r6 + add r7,r7,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r4 + add r7,r7,r12 @ h+=K256[i] + eor r2,r2,r6 @ Ch(e,f,g) + eor r0,r8,r8,ror#11 + add r7,r7,r2 @ h+=Ch(e,f,g) +#if 4==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 4<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r8,r9 @ a^b, b^c in next round +#else + ldr r2,[sp,#6*4] @ from future BODY_16_xx + eor r12,r8,r9 @ a^b, b^c in next round + ldr r1,[sp,#3*4] @ from future BODY_16_xx +#endif + eor r0,r0,r8,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r11,r11,r7 @ d+=h + eor r3,r3,r9 @ Maj(a,b,c) + add r7,r7,r0,ror#2 @ h+=Sigma0(a) + @ add r7,r7,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 5 +# if 5==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r11,r11,ror#5 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r11,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 5 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 5==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r11,r11,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r11,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r6,r6,r2 @ h+=X[i] + str r2,[sp,#5*4] + eor r2,r4,r5 + add r6,r6,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r11 + add r6,r6,r3 @ h+=K256[i] + eor r2,r2,r5 @ Ch(e,f,g) + eor r0,r7,r7,ror#11 + add r6,r6,r2 @ h+=Ch(e,f,g) +#if 5==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 5<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r7,r8 @ a^b, b^c in next round +#else + ldr r2,[sp,#7*4] @ from future BODY_16_xx + eor r3,r7,r8 @ a^b, b^c in next round + ldr r1,[sp,#4*4] @ from future BODY_16_xx +#endif + eor r0,r0,r7,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r10,r10,r6 @ d+=h + eor r12,r12,r8 @ Maj(a,b,c) + add r6,r6,r0,ror#2 @ h+=Sigma0(a) + @ add r6,r6,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 6 +# if 6==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r10,r10,ror#5 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r10,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 6 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 6==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r10,r10,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r10,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r5,r5,r2 @ h+=X[i] + str r2,[sp,#6*4] + eor r2,r11,r4 + add r5,r5,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r10 + add r5,r5,r12 @ h+=K256[i] + eor r2,r2,r4 @ Ch(e,f,g) + eor r0,r6,r6,ror#11 + add r5,r5,r2 @ h+=Ch(e,f,g) +#if 6==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 6<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r6,r7 @ a^b, b^c in next round +#else + ldr r2,[sp,#8*4] @ from future BODY_16_xx + eor r12,r6,r7 @ a^b, b^c in next round + ldr r1,[sp,#5*4] @ from future BODY_16_xx +#endif + eor r0,r0,r6,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r9,r9,r5 @ d+=h + eor r3,r3,r7 @ Maj(a,b,c) + add r5,r5,r0,ror#2 @ h+=Sigma0(a) + @ add r5,r5,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 7 +# if 7==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r9,r9,ror#5 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r9,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 7 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 7==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r9,r9,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r9,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r4,r4,r2 @ h+=X[i] + str r2,[sp,#7*4] + eor r2,r10,r11 + add r4,r4,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r9 + add r4,r4,r3 @ h+=K256[i] + eor r2,r2,r11 @ Ch(e,f,g) + eor r0,r5,r5,ror#11 + add r4,r4,r2 @ h+=Ch(e,f,g) +#if 7==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 7<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ a^b, b^c in next round +#else + ldr r2,[sp,#9*4] @ from future BODY_16_xx + eor r3,r5,r6 @ a^b, b^c in next round + ldr r1,[sp,#6*4] @ from future BODY_16_xx +#endif + eor r0,r0,r5,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r8,r8,r4 @ d+=h + eor r12,r12,r6 @ Maj(a,b,c) + add r4,r4,r0,ror#2 @ h+=Sigma0(a) + @ add r4,r4,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 8 +# if 8==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r8,r8,ror#5 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r8,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 8 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 8==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r8,r8,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r8,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r11,r11,r2 @ h+=X[i] + str r2,[sp,#8*4] + eor r2,r9,r10 + add r11,r11,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r8 + add r11,r11,r12 @ h+=K256[i] + eor r2,r2,r10 @ Ch(e,f,g) + eor r0,r4,r4,ror#11 + add r11,r11,r2 @ h+=Ch(e,f,g) +#if 8==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 8<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r4,r5 @ a^b, b^c in next round +#else + ldr r2,[sp,#10*4] @ from future BODY_16_xx + eor r12,r4,r5 @ a^b, b^c in next round + ldr r1,[sp,#7*4] @ from future BODY_16_xx +#endif + eor r0,r0,r4,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r7,r7,r11 @ d+=h + eor r3,r3,r5 @ Maj(a,b,c) + add r11,r11,r0,ror#2 @ h+=Sigma0(a) + @ add r11,r11,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 9 +# if 9==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r7,r7,ror#5 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r7,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 9 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 9==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r7,r7,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r7,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r10,r10,r2 @ h+=X[i] + str r2,[sp,#9*4] + eor r2,r8,r9 + add r10,r10,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r7 + add r10,r10,r3 @ h+=K256[i] + eor r2,r2,r9 @ Ch(e,f,g) + eor r0,r11,r11,ror#11 + add r10,r10,r2 @ h+=Ch(e,f,g) +#if 9==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 9<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r11,r4 @ a^b, b^c in next round +#else + ldr r2,[sp,#11*4] @ from future BODY_16_xx + eor r3,r11,r4 @ a^b, b^c in next round + ldr r1,[sp,#8*4] @ from future BODY_16_xx +#endif + eor r0,r0,r11,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r6,r6,r10 @ d+=h + eor r12,r12,r4 @ Maj(a,b,c) + add r10,r10,r0,ror#2 @ h+=Sigma0(a) + @ add r10,r10,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 10 +# if 10==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r6,r6,ror#5 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r6,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 10 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 10==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r6,r6,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r6,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r9,r9,r2 @ h+=X[i] + str r2,[sp,#10*4] + eor r2,r7,r8 + add r9,r9,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r6 + add r9,r9,r12 @ h+=K256[i] + eor r2,r2,r8 @ Ch(e,f,g) + eor r0,r10,r10,ror#11 + add r9,r9,r2 @ h+=Ch(e,f,g) +#if 10==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 10<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r10,r11 @ a^b, b^c in next round +#else + ldr r2,[sp,#12*4] @ from future BODY_16_xx + eor r12,r10,r11 @ a^b, b^c in next round + ldr r1,[sp,#9*4] @ from future BODY_16_xx +#endif + eor r0,r0,r10,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r5,r5,r9 @ d+=h + eor r3,r3,r11 @ Maj(a,b,c) + add r9,r9,r0,ror#2 @ h+=Sigma0(a) + @ add r9,r9,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 11 +# if 11==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r5,r5,ror#5 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r5,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 11 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 11==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r5,r5,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r5,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r8,r8,r2 @ h+=X[i] + str r2,[sp,#11*4] + eor r2,r6,r7 + add r8,r8,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r5 + add r8,r8,r3 @ h+=K256[i] + eor r2,r2,r7 @ Ch(e,f,g) + eor r0,r9,r9,ror#11 + add r8,r8,r2 @ h+=Ch(e,f,g) +#if 11==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 11<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r9,r10 @ a^b, b^c in next round +#else + ldr r2,[sp,#13*4] @ from future BODY_16_xx + eor r3,r9,r10 @ a^b, b^c in next round + ldr r1,[sp,#10*4] @ from future BODY_16_xx +#endif + eor r0,r0,r9,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r4,r4,r8 @ d+=h + eor r12,r12,r10 @ Maj(a,b,c) + add r8,r8,r0,ror#2 @ h+=Sigma0(a) + @ add r8,r8,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 12 +# if 12==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r4,r4,ror#5 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r4,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 12 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 12==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r4,r4,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r4,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r7,r7,r2 @ h+=X[i] + str r2,[sp,#12*4] + eor r2,r5,r6 + add r7,r7,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r4 + add r7,r7,r12 @ h+=K256[i] + eor r2,r2,r6 @ Ch(e,f,g) + eor r0,r8,r8,ror#11 + add r7,r7,r2 @ h+=Ch(e,f,g) +#if 12==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 12<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r8,r9 @ a^b, b^c in next round +#else + ldr r2,[sp,#14*4] @ from future BODY_16_xx + eor r12,r8,r9 @ a^b, b^c in next round + ldr r1,[sp,#11*4] @ from future BODY_16_xx +#endif + eor r0,r0,r8,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r11,r11,r7 @ d+=h + eor r3,r3,r9 @ Maj(a,b,c) + add r7,r7,r0,ror#2 @ h+=Sigma0(a) + @ add r7,r7,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 13 +# if 13==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r11,r11,ror#5 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r11,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 13 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 13==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r11,r11,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r11,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r6,r6,r2 @ h+=X[i] + str r2,[sp,#13*4] + eor r2,r4,r5 + add r6,r6,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r11 + add r6,r6,r3 @ h+=K256[i] + eor r2,r2,r5 @ Ch(e,f,g) + eor r0,r7,r7,ror#11 + add r6,r6,r2 @ h+=Ch(e,f,g) +#if 13==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 13<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r7,r8 @ a^b, b^c in next round +#else + ldr r2,[sp,#15*4] @ from future BODY_16_xx + eor r3,r7,r8 @ a^b, b^c in next round + ldr r1,[sp,#12*4] @ from future BODY_16_xx +#endif + eor r0,r0,r7,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r10,r10,r6 @ d+=h + eor r12,r12,r8 @ Maj(a,b,c) + add r6,r6,r0,ror#2 @ h+=Sigma0(a) + @ add r6,r6,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 14 +# if 14==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r10,r10,ror#5 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r10,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 14 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 14==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r10,r10,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r10,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r5,r5,r2 @ h+=X[i] + str r2,[sp,#14*4] + eor r2,r11,r4 + add r5,r5,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r10 + add r5,r5,r12 @ h+=K256[i] + eor r2,r2,r4 @ Ch(e,f,g) + eor r0,r6,r6,ror#11 + add r5,r5,r2 @ h+=Ch(e,f,g) +#if 14==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 14<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r6,r7 @ a^b, b^c in next round +#else + ldr r2,[sp,#0*4] @ from future BODY_16_xx + eor r12,r6,r7 @ a^b, b^c in next round + ldr r1,[sp,#13*4] @ from future BODY_16_xx +#endif + eor r0,r0,r6,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r9,r9,r5 @ d+=h + eor r3,r3,r7 @ Maj(a,b,c) + add r5,r5,r0,ror#2 @ h+=Sigma0(a) + @ add r5,r5,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 15 +# if 15==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r9,r9,ror#5 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r9,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 15 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 15==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r9,r9,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r9,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r4,r4,r2 @ h+=X[i] + str r2,[sp,#15*4] + eor r2,r10,r11 + add r4,r4,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r9 + add r4,r4,r3 @ h+=K256[i] + eor r2,r2,r11 @ Ch(e,f,g) + eor r0,r5,r5,ror#11 + add r4,r4,r2 @ h+=Ch(e,f,g) +#if 15==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 15<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ a^b, b^c in next round +#else + ldr r2,[sp,#1*4] @ from future BODY_16_xx + eor r3,r5,r6 @ a^b, b^c in next round + ldr r1,[sp,#14*4] @ from future BODY_16_xx +#endif + eor r0,r0,r5,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r8,r8,r4 @ d+=h + eor r12,r12,r6 @ Maj(a,b,c) + add r4,r4,r0,ror#2 @ h+=Sigma0(a) + @ add r4,r4,r12 @ h+=Maj(a,b,c) +.Lrounds_16_xx: + @ ldr r2,[sp,#1*4] @ 16 + @ ldr r1,[sp,#14*4] + mov r0,r2,ror#7 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#0*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#9*4] + + add r12,r12,r0 + eor r0,r8,r8,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r8,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r11,r11,r2 @ h+=X[i] + str r2,[sp,#0*4] + eor r2,r9,r10 + add r11,r11,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r8 + add r11,r11,r12 @ h+=K256[i] + eor r2,r2,r10 @ Ch(e,f,g) + eor r0,r4,r4,ror#11 + add r11,r11,r2 @ h+=Ch(e,f,g) +#if 16==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 16<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r4,r5 @ a^b, b^c in next round +#else + ldr r2,[sp,#2*4] @ from future BODY_16_xx + eor r12,r4,r5 @ a^b, b^c in next round + ldr r1,[sp,#15*4] @ from future BODY_16_xx +#endif + eor r0,r0,r4,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r7,r7,r11 @ d+=h + eor r3,r3,r5 @ Maj(a,b,c) + add r11,r11,r0,ror#2 @ h+=Sigma0(a) + @ add r11,r11,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#2*4] @ 17 + @ ldr r1,[sp,#15*4] + mov r0,r2,ror#7 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#1*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#10*4] + + add r3,r3,r0 + eor r0,r7,r7,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r7,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r10,r10,r2 @ h+=X[i] + str r2,[sp,#1*4] + eor r2,r8,r9 + add r10,r10,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r7 + add r10,r10,r3 @ h+=K256[i] + eor r2,r2,r9 @ Ch(e,f,g) + eor r0,r11,r11,ror#11 + add r10,r10,r2 @ h+=Ch(e,f,g) +#if 17==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 17<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r11,r4 @ a^b, b^c in next round +#else + ldr r2,[sp,#3*4] @ from future BODY_16_xx + eor r3,r11,r4 @ a^b, b^c in next round + ldr r1,[sp,#0*4] @ from future BODY_16_xx +#endif + eor r0,r0,r11,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r6,r6,r10 @ d+=h + eor r12,r12,r4 @ Maj(a,b,c) + add r10,r10,r0,ror#2 @ h+=Sigma0(a) + @ add r10,r10,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#3*4] @ 18 + @ ldr r1,[sp,#0*4] + mov r0,r2,ror#7 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#2*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#11*4] + + add r12,r12,r0 + eor r0,r6,r6,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r6,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r9,r9,r2 @ h+=X[i] + str r2,[sp,#2*4] + eor r2,r7,r8 + add r9,r9,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r6 + add r9,r9,r12 @ h+=K256[i] + eor r2,r2,r8 @ Ch(e,f,g) + eor r0,r10,r10,ror#11 + add r9,r9,r2 @ h+=Ch(e,f,g) +#if 18==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 18<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r10,r11 @ a^b, b^c in next round +#else + ldr r2,[sp,#4*4] @ from future BODY_16_xx + eor r12,r10,r11 @ a^b, b^c in next round + ldr r1,[sp,#1*4] @ from future BODY_16_xx +#endif + eor r0,r0,r10,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r5,r5,r9 @ d+=h + eor r3,r3,r11 @ Maj(a,b,c) + add r9,r9,r0,ror#2 @ h+=Sigma0(a) + @ add r9,r9,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#4*4] @ 19 + @ ldr r1,[sp,#1*4] + mov r0,r2,ror#7 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#3*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#12*4] + + add r3,r3,r0 + eor r0,r5,r5,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r5,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r8,r8,r2 @ h+=X[i] + str r2,[sp,#3*4] + eor r2,r6,r7 + add r8,r8,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r5 + add r8,r8,r3 @ h+=K256[i] + eor r2,r2,r7 @ Ch(e,f,g) + eor r0,r9,r9,ror#11 + add r8,r8,r2 @ h+=Ch(e,f,g) +#if 19==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 19<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r9,r10 @ a^b, b^c in next round +#else + ldr r2,[sp,#5*4] @ from future BODY_16_xx + eor r3,r9,r10 @ a^b, b^c in next round + ldr r1,[sp,#2*4] @ from future BODY_16_xx +#endif + eor r0,r0,r9,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r4,r4,r8 @ d+=h + eor r12,r12,r10 @ Maj(a,b,c) + add r8,r8,r0,ror#2 @ h+=Sigma0(a) + @ add r8,r8,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#5*4] @ 20 + @ ldr r1,[sp,#2*4] + mov r0,r2,ror#7 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#4*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#13*4] + + add r12,r12,r0 + eor r0,r4,r4,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r4,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r7,r7,r2 @ h+=X[i] + str r2,[sp,#4*4] + eor r2,r5,r6 + add r7,r7,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r4 + add r7,r7,r12 @ h+=K256[i] + eor r2,r2,r6 @ Ch(e,f,g) + eor r0,r8,r8,ror#11 + add r7,r7,r2 @ h+=Ch(e,f,g) +#if 20==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 20<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r8,r9 @ a^b, b^c in next round +#else + ldr r2,[sp,#6*4] @ from future BODY_16_xx + eor r12,r8,r9 @ a^b, b^c in next round + ldr r1,[sp,#3*4] @ from future BODY_16_xx +#endif + eor r0,r0,r8,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r11,r11,r7 @ d+=h + eor r3,r3,r9 @ Maj(a,b,c) + add r7,r7,r0,ror#2 @ h+=Sigma0(a) + @ add r7,r7,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#6*4] @ 21 + @ ldr r1,[sp,#3*4] + mov r0,r2,ror#7 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#5*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#14*4] + + add r3,r3,r0 + eor r0,r11,r11,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r11,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r6,r6,r2 @ h+=X[i] + str r2,[sp,#5*4] + eor r2,r4,r5 + add r6,r6,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r11 + add r6,r6,r3 @ h+=K256[i] + eor r2,r2,r5 @ Ch(e,f,g) + eor r0,r7,r7,ror#11 + add r6,r6,r2 @ h+=Ch(e,f,g) +#if 21==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 21<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r7,r8 @ a^b, b^c in next round +#else + ldr r2,[sp,#7*4] @ from future BODY_16_xx + eor r3,r7,r8 @ a^b, b^c in next round + ldr r1,[sp,#4*4] @ from future BODY_16_xx +#endif + eor r0,r0,r7,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r10,r10,r6 @ d+=h + eor r12,r12,r8 @ Maj(a,b,c) + add r6,r6,r0,ror#2 @ h+=Sigma0(a) + @ add r6,r6,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#7*4] @ 22 + @ ldr r1,[sp,#4*4] + mov r0,r2,ror#7 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#6*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#15*4] + + add r12,r12,r0 + eor r0,r10,r10,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r10,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r5,r5,r2 @ h+=X[i] + str r2,[sp,#6*4] + eor r2,r11,r4 + add r5,r5,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r10 + add r5,r5,r12 @ h+=K256[i] + eor r2,r2,r4 @ Ch(e,f,g) + eor r0,r6,r6,ror#11 + add r5,r5,r2 @ h+=Ch(e,f,g) +#if 22==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 22<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r6,r7 @ a^b, b^c in next round +#else + ldr r2,[sp,#8*4] @ from future BODY_16_xx + eor r12,r6,r7 @ a^b, b^c in next round + ldr r1,[sp,#5*4] @ from future BODY_16_xx +#endif + eor r0,r0,r6,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r9,r9,r5 @ d+=h + eor r3,r3,r7 @ Maj(a,b,c) + add r5,r5,r0,ror#2 @ h+=Sigma0(a) + @ add r5,r5,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#8*4] @ 23 + @ ldr r1,[sp,#5*4] + mov r0,r2,ror#7 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#7*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#0*4] + + add r3,r3,r0 + eor r0,r9,r9,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r9,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r4,r4,r2 @ h+=X[i] + str r2,[sp,#7*4] + eor r2,r10,r11 + add r4,r4,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r9 + add r4,r4,r3 @ h+=K256[i] + eor r2,r2,r11 @ Ch(e,f,g) + eor r0,r5,r5,ror#11 + add r4,r4,r2 @ h+=Ch(e,f,g) +#if 23==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 23<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ a^b, b^c in next round +#else + ldr r2,[sp,#9*4] @ from future BODY_16_xx + eor r3,r5,r6 @ a^b, b^c in next round + ldr r1,[sp,#6*4] @ from future BODY_16_xx +#endif + eor r0,r0,r5,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r8,r8,r4 @ d+=h + eor r12,r12,r6 @ Maj(a,b,c) + add r4,r4,r0,ror#2 @ h+=Sigma0(a) + @ add r4,r4,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#9*4] @ 24 + @ ldr r1,[sp,#6*4] + mov r0,r2,ror#7 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#8*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#1*4] + + add r12,r12,r0 + eor r0,r8,r8,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r8,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r11,r11,r2 @ h+=X[i] + str r2,[sp,#8*4] + eor r2,r9,r10 + add r11,r11,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r8 + add r11,r11,r12 @ h+=K256[i] + eor r2,r2,r10 @ Ch(e,f,g) + eor r0,r4,r4,ror#11 + add r11,r11,r2 @ h+=Ch(e,f,g) +#if 24==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 24<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r4,r5 @ a^b, b^c in next round +#else + ldr r2,[sp,#10*4] @ from future BODY_16_xx + eor r12,r4,r5 @ a^b, b^c in next round + ldr r1,[sp,#7*4] @ from future BODY_16_xx +#endif + eor r0,r0,r4,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r7,r7,r11 @ d+=h + eor r3,r3,r5 @ Maj(a,b,c) + add r11,r11,r0,ror#2 @ h+=Sigma0(a) + @ add r11,r11,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#10*4] @ 25 + @ ldr r1,[sp,#7*4] + mov r0,r2,ror#7 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#9*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#2*4] + + add r3,r3,r0 + eor r0,r7,r7,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r7,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r10,r10,r2 @ h+=X[i] + str r2,[sp,#9*4] + eor r2,r8,r9 + add r10,r10,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r7 + add r10,r10,r3 @ h+=K256[i] + eor r2,r2,r9 @ Ch(e,f,g) + eor r0,r11,r11,ror#11 + add r10,r10,r2 @ h+=Ch(e,f,g) +#if 25==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 25<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r11,r4 @ a^b, b^c in next round +#else + ldr r2,[sp,#11*4] @ from future BODY_16_xx + eor r3,r11,r4 @ a^b, b^c in next round + ldr r1,[sp,#8*4] @ from future BODY_16_xx +#endif + eor r0,r0,r11,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r6,r6,r10 @ d+=h + eor r12,r12,r4 @ Maj(a,b,c) + add r10,r10,r0,ror#2 @ h+=Sigma0(a) + @ add r10,r10,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#11*4] @ 26 + @ ldr r1,[sp,#8*4] + mov r0,r2,ror#7 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#10*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#3*4] + + add r12,r12,r0 + eor r0,r6,r6,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r6,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r9,r9,r2 @ h+=X[i] + str r2,[sp,#10*4] + eor r2,r7,r8 + add r9,r9,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r6 + add r9,r9,r12 @ h+=K256[i] + eor r2,r2,r8 @ Ch(e,f,g) + eor r0,r10,r10,ror#11 + add r9,r9,r2 @ h+=Ch(e,f,g) +#if 26==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 26<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r10,r11 @ a^b, b^c in next round +#else + ldr r2,[sp,#12*4] @ from future BODY_16_xx + eor r12,r10,r11 @ a^b, b^c in next round + ldr r1,[sp,#9*4] @ from future BODY_16_xx +#endif + eor r0,r0,r10,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r5,r5,r9 @ d+=h + eor r3,r3,r11 @ Maj(a,b,c) + add r9,r9,r0,ror#2 @ h+=Sigma0(a) + @ add r9,r9,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#12*4] @ 27 + @ ldr r1,[sp,#9*4] + mov r0,r2,ror#7 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#11*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#4*4] + + add r3,r3,r0 + eor r0,r5,r5,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r5,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r8,r8,r2 @ h+=X[i] + str r2,[sp,#11*4] + eor r2,r6,r7 + add r8,r8,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r5 + add r8,r8,r3 @ h+=K256[i] + eor r2,r2,r7 @ Ch(e,f,g) + eor r0,r9,r9,ror#11 + add r8,r8,r2 @ h+=Ch(e,f,g) +#if 27==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 27<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r9,r10 @ a^b, b^c in next round +#else + ldr r2,[sp,#13*4] @ from future BODY_16_xx + eor r3,r9,r10 @ a^b, b^c in next round + ldr r1,[sp,#10*4] @ from future BODY_16_xx +#endif + eor r0,r0,r9,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r4,r4,r8 @ d+=h + eor r12,r12,r10 @ Maj(a,b,c) + add r8,r8,r0,ror#2 @ h+=Sigma0(a) + @ add r8,r8,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#13*4] @ 28 + @ ldr r1,[sp,#10*4] + mov r0,r2,ror#7 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#12*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#5*4] + + add r12,r12,r0 + eor r0,r4,r4,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r4,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r7,r7,r2 @ h+=X[i] + str r2,[sp,#12*4] + eor r2,r5,r6 + add r7,r7,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r4 + add r7,r7,r12 @ h+=K256[i] + eor r2,r2,r6 @ Ch(e,f,g) + eor r0,r8,r8,ror#11 + add r7,r7,r2 @ h+=Ch(e,f,g) +#if 28==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 28<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r8,r9 @ a^b, b^c in next round +#else + ldr r2,[sp,#14*4] @ from future BODY_16_xx + eor r12,r8,r9 @ a^b, b^c in next round + ldr r1,[sp,#11*4] @ from future BODY_16_xx +#endif + eor r0,r0,r8,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r11,r11,r7 @ d+=h + eor r3,r3,r9 @ Maj(a,b,c) + add r7,r7,r0,ror#2 @ h+=Sigma0(a) + @ add r7,r7,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#14*4] @ 29 + @ ldr r1,[sp,#11*4] + mov r0,r2,ror#7 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#13*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#6*4] + + add r3,r3,r0 + eor r0,r11,r11,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r11,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r6,r6,r2 @ h+=X[i] + str r2,[sp,#13*4] + eor r2,r4,r5 + add r6,r6,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r11 + add r6,r6,r3 @ h+=K256[i] + eor r2,r2,r5 @ Ch(e,f,g) + eor r0,r7,r7,ror#11 + add r6,r6,r2 @ h+=Ch(e,f,g) +#if 29==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 29<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r7,r8 @ a^b, b^c in next round +#else + ldr r2,[sp,#15*4] @ from future BODY_16_xx + eor r3,r7,r8 @ a^b, b^c in next round + ldr r1,[sp,#12*4] @ from future BODY_16_xx +#endif + eor r0,r0,r7,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r10,r10,r6 @ d+=h + eor r12,r12,r8 @ Maj(a,b,c) + add r6,r6,r0,ror#2 @ h+=Sigma0(a) + @ add r6,r6,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#15*4] @ 30 + @ ldr r1,[sp,#12*4] + mov r0,r2,ror#7 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#14*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#7*4] + + add r12,r12,r0 + eor r0,r10,r10,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r10,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r5,r5,r2 @ h+=X[i] + str r2,[sp,#14*4] + eor r2,r11,r4 + add r5,r5,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r10 + add r5,r5,r12 @ h+=K256[i] + eor r2,r2,r4 @ Ch(e,f,g) + eor r0,r6,r6,ror#11 + add r5,r5,r2 @ h+=Ch(e,f,g) +#if 30==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 30<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r6,r7 @ a^b, b^c in next round +#else + ldr r2,[sp,#0*4] @ from future BODY_16_xx + eor r12,r6,r7 @ a^b, b^c in next round + ldr r1,[sp,#13*4] @ from future BODY_16_xx +#endif + eor r0,r0,r6,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r9,r9,r5 @ d+=h + eor r3,r3,r7 @ Maj(a,b,c) + add r5,r5,r0,ror#2 @ h+=Sigma0(a) + @ add r5,r5,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#0*4] @ 31 + @ ldr r1,[sp,#13*4] + mov r0,r2,ror#7 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#15*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#8*4] + + add r3,r3,r0 + eor r0,r9,r9,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r9,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r4,r4,r2 @ h+=X[i] + str r2,[sp,#15*4] + eor r2,r10,r11 + add r4,r4,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r9 + add r4,r4,r3 @ h+=K256[i] + eor r2,r2,r11 @ Ch(e,f,g) + eor r0,r5,r5,ror#11 + add r4,r4,r2 @ h+=Ch(e,f,g) +#if 31==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 31<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ a^b, b^c in next round +#else + ldr r2,[sp,#1*4] @ from future BODY_16_xx + eor r3,r5,r6 @ a^b, b^c in next round + ldr r1,[sp,#14*4] @ from future BODY_16_xx +#endif + eor r0,r0,r5,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r8,r8,r4 @ d+=h + eor r12,r12,r6 @ Maj(a,b,c) + add r4,r4,r0,ror#2 @ h+=Sigma0(a) + @ add r4,r4,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + ite eq @ Thumb2 thing, sanity check in ARM +#endif + ldreq r3,[sp,#16*4] @ pull ctx + bne .Lrounds_16_xx + + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + ldr r0,[r3,#0] + ldr r2,[r3,#4] + ldr r12,[r3,#8] + add r4,r4,r0 + ldr r0,[r3,#12] + add r5,r5,r2 + ldr r2,[r3,#16] + add r6,r6,r12 + ldr r12,[r3,#20] + add r7,r7,r0 + ldr r0,[r3,#24] + add r8,r8,r2 + ldr r2,[r3,#28] + add r9,r9,r12 + ldr r1,[sp,#17*4] @ pull inp + ldr r12,[sp,#18*4] @ pull inp+len + add r10,r10,r0 + add r11,r11,r2 + stmia r3,{r4,r5,r6,r7,r8,r9,r10,r11} + cmp r1,r12 + sub r14,r14,#256 @ rewind Ktbl + bne .Loop + + add sp,sp,#19*4 @ destroy frame +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r11,pc} +#else + ldmia sp!,{r4-r11,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size sha256_block_data_order,.-sha256_block_data_order +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + +.global sha256_block_data_order_neon +.type sha256_block_data_order_neon,%function +.align 4 +sha256_block_data_order_neon: +.LNEON: + stmdb sp!,{r4-r12,lr} + + sub r11,sp,#16*4+16 + adrl r14,K256 + bic r11,r11,#15 @ align for 128-bit stores + mov r12,sp + mov sp,r11 @ alloca + add r2,r1,r2,lsl#6 @ len to point at the end of inp + + vld1.8 {q0},[r1]! + vld1.8 {q1},[r1]! + vld1.8 {q2},[r1]! + vld1.8 {q3},[r1]! + vld1.32 {q8},[r14,:128]! + vld1.32 {q9},[r14,:128]! + vld1.32 {q10},[r14,:128]! + vld1.32 {q11},[r14,:128]! + vrev32.8 q0,q0 @ yes, even on + str r0,[sp,#64] + vrev32.8 q1,q1 @ big-endian + str r1,[sp,#68] + mov r1,sp + vrev32.8 q2,q2 + str r2,[sp,#72] + vrev32.8 q3,q3 + str r12,[sp,#76] @ save original sp + vadd.i32 q8,q8,q0 + vadd.i32 q9,q9,q1 + vst1.32 {q8},[r1,:128]! + vadd.i32 q10,q10,q2 + vst1.32 {q9},[r1,:128]! + vadd.i32 q11,q11,q3 + vst1.32 {q10},[r1,:128]! + vst1.32 {q11},[r1,:128]! + + ldmia r0,{r4-r11} + sub r1,r1,#64 + ldr r2,[sp,#0] + eor r12,r12,r12 + eor r3,r5,r6 + b .L_00_48 + +.align 4 +.L_00_48: + vext.8 q8,q0,q1,#4 + add r11,r11,r2 + eor r2,r9,r10 + eor r0,r8,r8,ror#5 + vext.8 q9,q2,q3,#4 + add r4,r4,r12 + and r2,r2,r8 + eor r12,r0,r8,ror#19 + vshr.u32 q10,q8,#7 + eor r0,r4,r4,ror#11 + eor r2,r2,r10 + vadd.i32 q0,q0,q9 + add r11,r11,r12,ror#6 + eor r12,r4,r5 + vshr.u32 q9,q8,#3 + eor r0,r0,r4,ror#20 + add r11,r11,r2 + vsli.32 q10,q8,#25 + ldr r2,[sp,#4] + and r3,r3,r12 + vshr.u32 q11,q8,#18 + add r7,r7,r11 + add r11,r11,r0,ror#2 + eor r3,r3,r5 + veor q9,q9,q10 + add r10,r10,r2 + vsli.32 q11,q8,#14 + eor r2,r8,r9 + eor r0,r7,r7,ror#5 + vshr.u32 d24,d7,#17 + add r11,r11,r3 + and r2,r2,r7 + veor q9,q9,q11 + eor r3,r0,r7,ror#19 + eor r0,r11,r11,ror#11 + vsli.32 d24,d7,#15 + eor r2,r2,r9 + add r10,r10,r3,ror#6 + vshr.u32 d25,d7,#10 + eor r3,r11,r4 + eor r0,r0,r11,ror#20 + vadd.i32 q0,q0,q9 + add r10,r10,r2 + ldr r2,[sp,#8] + veor d25,d25,d24 + and r12,r12,r3 + add r6,r6,r10 + vshr.u32 d24,d7,#19 + add r10,r10,r0,ror#2 + eor r12,r12,r4 + vsli.32 d24,d7,#13 + add r9,r9,r2 + eor r2,r7,r8 + veor d25,d25,d24 + eor r0,r6,r6,ror#5 + add r10,r10,r12 + vadd.i32 d0,d0,d25 + and r2,r2,r6 + eor r12,r0,r6,ror#19 + vshr.u32 d24,d0,#17 + eor r0,r10,r10,ror#11 + eor r2,r2,r8 + vsli.32 d24,d0,#15 + add r9,r9,r12,ror#6 + eor r12,r10,r11 + vshr.u32 d25,d0,#10 + eor r0,r0,r10,ror#20 + add r9,r9,r2 + veor d25,d25,d24 + ldr r2,[sp,#12] + and r3,r3,r12 + vshr.u32 d24,d0,#19 + add r5,r5,r9 + add r9,r9,r0,ror#2 + eor r3,r3,r11 + vld1.32 {q8},[r14,:128]! + add r8,r8,r2 + vsli.32 d24,d0,#13 + eor r2,r6,r7 + eor r0,r5,r5,ror#5 + veor d25,d25,d24 + add r9,r9,r3 + and r2,r2,r5 + vadd.i32 d1,d1,d25 + eor r3,r0,r5,ror#19 + eor r0,r9,r9,ror#11 + vadd.i32 q8,q8,q0 + eor r2,r2,r7 + add r8,r8,r3,ror#6 + eor r3,r9,r10 + eor r0,r0,r9,ror#20 + add r8,r8,r2 + ldr r2,[sp,#16] + and r12,r12,r3 + add r4,r4,r8 + vst1.32 {q8},[r1,:128]! + add r8,r8,r0,ror#2 + eor r12,r12,r10 + vext.8 q8,q1,q2,#4 + add r7,r7,r2 + eor r2,r5,r6 + eor r0,r4,r4,ror#5 + vext.8 q9,q3,q0,#4 + add r8,r8,r12 + and r2,r2,r4 + eor r12,r0,r4,ror#19 + vshr.u32 q10,q8,#7 + eor r0,r8,r8,ror#11 + eor r2,r2,r6 + vadd.i32 q1,q1,q9 + add r7,r7,r12,ror#6 + eor r12,r8,r9 + vshr.u32 q9,q8,#3 + eor r0,r0,r8,ror#20 + add r7,r7,r2 + vsli.32 q10,q8,#25 + ldr r2,[sp,#20] + and r3,r3,r12 + vshr.u32 q11,q8,#18 + add r11,r11,r7 + add r7,r7,r0,ror#2 + eor r3,r3,r9 + veor q9,q9,q10 + add r6,r6,r2 + vsli.32 q11,q8,#14 + eor r2,r4,r5 + eor r0,r11,r11,ror#5 + vshr.u32 d24,d1,#17 + add r7,r7,r3 + and r2,r2,r11 + veor q9,q9,q11 + eor r3,r0,r11,ror#19 + eor r0,r7,r7,ror#11 + vsli.32 d24,d1,#15 + eor r2,r2,r5 + add r6,r6,r3,ror#6 + vshr.u32 d25,d1,#10 + eor r3,r7,r8 + eor r0,r0,r7,ror#20 + vadd.i32 q1,q1,q9 + add r6,r6,r2 + ldr r2,[sp,#24] + veor d25,d25,d24 + and r12,r12,r3 + add r10,r10,r6 + vshr.u32 d24,d1,#19 + add r6,r6,r0,ror#2 + eor r12,r12,r8 + vsli.32 d24,d1,#13 + add r5,r5,r2 + eor r2,r11,r4 + veor d25,d25,d24 + eor r0,r10,r10,ror#5 + add r6,r6,r12 + vadd.i32 d2,d2,d25 + and r2,r2,r10 + eor r12,r0,r10,ror#19 + vshr.u32 d24,d2,#17 + eor r0,r6,r6,ror#11 + eor r2,r2,r4 + vsli.32 d24,d2,#15 + add r5,r5,r12,ror#6 + eor r12,r6,r7 + vshr.u32 d25,d2,#10 + eor r0,r0,r6,ror#20 + add r5,r5,r2 + veor d25,d25,d24 + ldr r2,[sp,#28] + and r3,r3,r12 + vshr.u32 d24,d2,#19 + add r9,r9,r5 + add r5,r5,r0,ror#2 + eor r3,r3,r7 + vld1.32 {q8},[r14,:128]! + add r4,r4,r2 + vsli.32 d24,d2,#13 + eor r2,r10,r11 + eor r0,r9,r9,ror#5 + veor d25,d25,d24 + add r5,r5,r3 + and r2,r2,r9 + vadd.i32 d3,d3,d25 + eor r3,r0,r9,ror#19 + eor r0,r5,r5,ror#11 + vadd.i32 q8,q8,q1 + eor r2,r2,r11 + add r4,r4,r3,ror#6 + eor r3,r5,r6 + eor r0,r0,r5,ror#20 + add r4,r4,r2 + ldr r2,[sp,#32] + and r12,r12,r3 + add r8,r8,r4 + vst1.32 {q8},[r1,:128]! + add r4,r4,r0,ror#2 + eor r12,r12,r6 + vext.8 q8,q2,q3,#4 + add r11,r11,r2 + eor r2,r9,r10 + eor r0,r8,r8,ror#5 + vext.8 q9,q0,q1,#4 + add r4,r4,r12 + and r2,r2,r8 + eor r12,r0,r8,ror#19 + vshr.u32 q10,q8,#7 + eor r0,r4,r4,ror#11 + eor r2,r2,r10 + vadd.i32 q2,q2,q9 + add r11,r11,r12,ror#6 + eor r12,r4,r5 + vshr.u32 q9,q8,#3 + eor r0,r0,r4,ror#20 + add r11,r11,r2 + vsli.32 q10,q8,#25 + ldr r2,[sp,#36] + and r3,r3,r12 + vshr.u32 q11,q8,#18 + add r7,r7,r11 + add r11,r11,r0,ror#2 + eor r3,r3,r5 + veor q9,q9,q10 + add r10,r10,r2 + vsli.32 q11,q8,#14 + eor r2,r8,r9 + eor r0,r7,r7,ror#5 + vshr.u32 d24,d3,#17 + add r11,r11,r3 + and r2,r2,r7 + veor q9,q9,q11 + eor r3,r0,r7,ror#19 + eor r0,r11,r11,ror#11 + vsli.32 d24,d3,#15 + eor r2,r2,r9 + add r10,r10,r3,ror#6 + vshr.u32 d25,d3,#10 + eor r3,r11,r4 + eor r0,r0,r11,ror#20 + vadd.i32 q2,q2,q9 + add r10,r10,r2 + ldr r2,[sp,#40] + veor d25,d25,d24 + and r12,r12,r3 + add r6,r6,r10 + vshr.u32 d24,d3,#19 + add r10,r10,r0,ror#2 + eor r12,r12,r4 + vsli.32 d24,d3,#13 + add r9,r9,r2 + eor r2,r7,r8 + veor d25,d25,d24 + eor r0,r6,r6,ror#5 + add r10,r10,r12 + vadd.i32 d4,d4,d25 + and r2,r2,r6 + eor r12,r0,r6,ror#19 + vshr.u32 d24,d4,#17 + eor r0,r10,r10,ror#11 + eor r2,r2,r8 + vsli.32 d24,d4,#15 + add r9,r9,r12,ror#6 + eor r12,r10,r11 + vshr.u32 d25,d4,#10 + eor r0,r0,r10,ror#20 + add r9,r9,r2 + veor d25,d25,d24 + ldr r2,[sp,#44] + and r3,r3,r12 + vshr.u32 d24,d4,#19 + add r5,r5,r9 + add r9,r9,r0,ror#2 + eor r3,r3,r11 + vld1.32 {q8},[r14,:128]! + add r8,r8,r2 + vsli.32 d24,d4,#13 + eor r2,r6,r7 + eor r0,r5,r5,ror#5 + veor d25,d25,d24 + add r9,r9,r3 + and r2,r2,r5 + vadd.i32 d5,d5,d25 + eor r3,r0,r5,ror#19 + eor r0,r9,r9,ror#11 + vadd.i32 q8,q8,q2 + eor r2,r2,r7 + add r8,r8,r3,ror#6 + eor r3,r9,r10 + eor r0,r0,r9,ror#20 + add r8,r8,r2 + ldr r2,[sp,#48] + and r12,r12,r3 + add r4,r4,r8 + vst1.32 {q8},[r1,:128]! + add r8,r8,r0,ror#2 + eor r12,r12,r10 + vext.8 q8,q3,q0,#4 + add r7,r7,r2 + eor r2,r5,r6 + eor r0,r4,r4,ror#5 + vext.8 q9,q1,q2,#4 + add r8,r8,r12 + and r2,r2,r4 + eor r12,r0,r4,ror#19 + vshr.u32 q10,q8,#7 + eor r0,r8,r8,ror#11 + eor r2,r2,r6 + vadd.i32 q3,q3,q9 + add r7,r7,r12,ror#6 + eor r12,r8,r9 + vshr.u32 q9,q8,#3 + eor r0,r0,r8,ror#20 + add r7,r7,r2 + vsli.32 q10,q8,#25 + ldr r2,[sp,#52] + and r3,r3,r12 + vshr.u32 q11,q8,#18 + add r11,r11,r7 + add r7,r7,r0,ror#2 + eor r3,r3,r9 + veor q9,q9,q10 + add r6,r6,r2 + vsli.32 q11,q8,#14 + eor r2,r4,r5 + eor r0,r11,r11,ror#5 + vshr.u32 d24,d5,#17 + add r7,r7,r3 + and r2,r2,r11 + veor q9,q9,q11 + eor r3,r0,r11,ror#19 + eor r0,r7,r7,ror#11 + vsli.32 d24,d5,#15 + eor r2,r2,r5 + add r6,r6,r3,ror#6 + vshr.u32 d25,d5,#10 + eor r3,r7,r8 + eor r0,r0,r7,ror#20 + vadd.i32 q3,q3,q9 + add r6,r6,r2 + ldr r2,[sp,#56] + veor d25,d25,d24 + and r12,r12,r3 + add r10,r10,r6 + vshr.u32 d24,d5,#19 + add r6,r6,r0,ror#2 + eor r12,r12,r8 + vsli.32 d24,d5,#13 + add r5,r5,r2 + eor r2,r11,r4 + veor d25,d25,d24 + eor r0,r10,r10,ror#5 + add r6,r6,r12 + vadd.i32 d6,d6,d25 + and r2,r2,r10 + eor r12,r0,r10,ror#19 + vshr.u32 d24,d6,#17 + eor r0,r6,r6,ror#11 + eor r2,r2,r4 + vsli.32 d24,d6,#15 + add r5,r5,r12,ror#6 + eor r12,r6,r7 + vshr.u32 d25,d6,#10 + eor r0,r0,r6,ror#20 + add r5,r5,r2 + veor d25,d25,d24 + ldr r2,[sp,#60] + and r3,r3,r12 + vshr.u32 d24,d6,#19 + add r9,r9,r5 + add r5,r5,r0,ror#2 + eor r3,r3,r7 + vld1.32 {q8},[r14,:128]! + add r4,r4,r2 + vsli.32 d24,d6,#13 + eor r2,r10,r11 + eor r0,r9,r9,ror#5 + veor d25,d25,d24 + add r5,r5,r3 + and r2,r2,r9 + vadd.i32 d7,d7,d25 + eor r3,r0,r9,ror#19 + eor r0,r5,r5,ror#11 + vadd.i32 q8,q8,q3 + eor r2,r2,r11 + add r4,r4,r3,ror#6 + eor r3,r5,r6 + eor r0,r0,r5,ror#20 + add r4,r4,r2 + ldr r2,[r14] + and r12,r12,r3 + add r8,r8,r4 + vst1.32 {q8},[r1,:128]! + add r4,r4,r0,ror#2 + eor r12,r12,r6 + teq r2,#0 @ check for K256 terminator + ldr r2,[sp,#0] + sub r1,r1,#64 + bne .L_00_48 + + ldr r1,[sp,#68] + ldr r0,[sp,#72] + sub r14,r14,#256 @ rewind r14 + teq r1,r0 + it eq + subeq r1,r1,#64 @ avoid SEGV + vld1.8 {q0},[r1]! @ load next input block + vld1.8 {q1},[r1]! + vld1.8 {q2},[r1]! + vld1.8 {q3},[r1]! + it ne + strne r1,[sp,#68] + mov r1,sp + add r11,r11,r2 + eor r2,r9,r10 + eor r0,r8,r8,ror#5 + add r4,r4,r12 + vld1.32 {q8},[r14,:128]! + and r2,r2,r8 + eor r12,r0,r8,ror#19 + eor r0,r4,r4,ror#11 + eor r2,r2,r10 + vrev32.8 q0,q0 + add r11,r11,r12,ror#6 + eor r12,r4,r5 + eor r0,r0,r4,ror#20 + add r11,r11,r2 + vadd.i32 q8,q8,q0 + ldr r2,[sp,#4] + and r3,r3,r12 + add r7,r7,r11 + add r11,r11,r0,ror#2 + eor r3,r3,r5 + add r10,r10,r2 + eor r2,r8,r9 + eor r0,r7,r7,ror#5 + add r11,r11,r3 + and r2,r2,r7 + eor r3,r0,r7,ror#19 + eor r0,r11,r11,ror#11 + eor r2,r2,r9 + add r10,r10,r3,ror#6 + eor r3,r11,r4 + eor r0,r0,r11,ror#20 + add r10,r10,r2 + ldr r2,[sp,#8] + and r12,r12,r3 + add r6,r6,r10 + add r10,r10,r0,ror#2 + eor r12,r12,r4 + add r9,r9,r2 + eor r2,r7,r8 + eor r0,r6,r6,ror#5 + add r10,r10,r12 + and r2,r2,r6 + eor r12,r0,r6,ror#19 + eor r0,r10,r10,ror#11 + eor r2,r2,r8 + add r9,r9,r12,ror#6 + eor r12,r10,r11 + eor r0,r0,r10,ror#20 + add r9,r9,r2 + ldr r2,[sp,#12] + and r3,r3,r12 + add r5,r5,r9 + add r9,r9,r0,ror#2 + eor r3,r3,r11 + add r8,r8,r2 + eor r2,r6,r7 + eor r0,r5,r5,ror#5 + add r9,r9,r3 + and r2,r2,r5 + eor r3,r0,r5,ror#19 + eor r0,r9,r9,ror#11 + eor r2,r2,r7 + add r8,r8,r3,ror#6 + eor r3,r9,r10 + eor r0,r0,r9,ror#20 + add r8,r8,r2 + ldr r2,[sp,#16] + and r12,r12,r3 + add r4,r4,r8 + add r8,r8,r0,ror#2 + eor r12,r12,r10 + vst1.32 {q8},[r1,:128]! + add r7,r7,r2 + eor r2,r5,r6 + eor r0,r4,r4,ror#5 + add r8,r8,r12 + vld1.32 {q8},[r14,:128]! + and r2,r2,r4 + eor r12,r0,r4,ror#19 + eor r0,r8,r8,ror#11 + eor r2,r2,r6 + vrev32.8 q1,q1 + add r7,r7,r12,ror#6 + eor r12,r8,r9 + eor r0,r0,r8,ror#20 + add r7,r7,r2 + vadd.i32 q8,q8,q1 + ldr r2,[sp,#20] + and r3,r3,r12 + add r11,r11,r7 + add r7,r7,r0,ror#2 + eor r3,r3,r9 + add r6,r6,r2 + eor r2,r4,r5 + eor r0,r11,r11,ror#5 + add r7,r7,r3 + and r2,r2,r11 + eor r3,r0,r11,ror#19 + eor r0,r7,r7,ror#11 + eor r2,r2,r5 + add r6,r6,r3,ror#6 + eor r3,r7,r8 + eor r0,r0,r7,ror#20 + add r6,r6,r2 + ldr r2,[sp,#24] + and r12,r12,r3 + add r10,r10,r6 + add r6,r6,r0,ror#2 + eor r12,r12,r8 + add r5,r5,r2 + eor r2,r11,r4 + eor r0,r10,r10,ror#5 + add r6,r6,r12 + and r2,r2,r10 + eor r12,r0,r10,ror#19 + eor r0,r6,r6,ror#11 + eor r2,r2,r4 + add r5,r5,r12,ror#6 + eor r12,r6,r7 + eor r0,r0,r6,ror#20 + add r5,r5,r2 + ldr r2,[sp,#28] + and r3,r3,r12 + add r9,r9,r5 + add r5,r5,r0,ror#2 + eor r3,r3,r7 + add r4,r4,r2 + eor r2,r10,r11 + eor r0,r9,r9,ror#5 + add r5,r5,r3 + and r2,r2,r9 + eor r3,r0,r9,ror#19 + eor r0,r5,r5,ror#11 + eor r2,r2,r11 + add r4,r4,r3,ror#6 + eor r3,r5,r6 + eor r0,r0,r5,ror#20 + add r4,r4,r2 + ldr r2,[sp,#32] + and r12,r12,r3 + add r8,r8,r4 + add r4,r4,r0,ror#2 + eor r12,r12,r6 + vst1.32 {q8},[r1,:128]! + add r11,r11,r2 + eor r2,r9,r10 + eor r0,r8,r8,ror#5 + add r4,r4,r12 + vld1.32 {q8},[r14,:128]! + and r2,r2,r8 + eor r12,r0,r8,ror#19 + eor r0,r4,r4,ror#11 + eor r2,r2,r10 + vrev32.8 q2,q2 + add r11,r11,r12,ror#6 + eor r12,r4,r5 + eor r0,r0,r4,ror#20 + add r11,r11,r2 + vadd.i32 q8,q8,q2 + ldr r2,[sp,#36] + and r3,r3,r12 + add r7,r7,r11 + add r11,r11,r0,ror#2 + eor r3,r3,r5 + add r10,r10,r2 + eor r2,r8,r9 + eor r0,r7,r7,ror#5 + add r11,r11,r3 + and r2,r2,r7 + eor r3,r0,r7,ror#19 + eor r0,r11,r11,ror#11 + eor r2,r2,r9 + add r10,r10,r3,ror#6 + eor r3,r11,r4 + eor r0,r0,r11,ror#20 + add r10,r10,r2 + ldr r2,[sp,#40] + and r12,r12,r3 + add r6,r6,r10 + add r10,r10,r0,ror#2 + eor r12,r12,r4 + add r9,r9,r2 + eor r2,r7,r8 + eor r0,r6,r6,ror#5 + add r10,r10,r12 + and r2,r2,r6 + eor r12,r0,r6,ror#19 + eor r0,r10,r10,ror#11 + eor r2,r2,r8 + add r9,r9,r12,ror#6 + eor r12,r10,r11 + eor r0,r0,r10,ror#20 + add r9,r9,r2 + ldr r2,[sp,#44] + and r3,r3,r12 + add r5,r5,r9 + add r9,r9,r0,ror#2 + eor r3,r3,r11 + add r8,r8,r2 + eor r2,r6,r7 + eor r0,r5,r5,ror#5 + add r9,r9,r3 + and r2,r2,r5 + eor r3,r0,r5,ror#19 + eor r0,r9,r9,ror#11 + eor r2,r2,r7 + add r8,r8,r3,ror#6 + eor r3,r9,r10 + eor r0,r0,r9,ror#20 + add r8,r8,r2 + ldr r2,[sp,#48] + and r12,r12,r3 + add r4,r4,r8 + add r8,r8,r0,ror#2 + eor r12,r12,r10 + vst1.32 {q8},[r1,:128]! + add r7,r7,r2 + eor r2,r5,r6 + eor r0,r4,r4,ror#5 + add r8,r8,r12 + vld1.32 {q8},[r14,:128]! + and r2,r2,r4 + eor r12,r0,r4,ror#19 + eor r0,r8,r8,ror#11 + eor r2,r2,r6 + vrev32.8 q3,q3 + add r7,r7,r12,ror#6 + eor r12,r8,r9 + eor r0,r0,r8,ror#20 + add r7,r7,r2 + vadd.i32 q8,q8,q3 + ldr r2,[sp,#52] + and r3,r3,r12 + add r11,r11,r7 + add r7,r7,r0,ror#2 + eor r3,r3,r9 + add r6,r6,r2 + eor r2,r4,r5 + eor r0,r11,r11,ror#5 + add r7,r7,r3 + and r2,r2,r11 + eor r3,r0,r11,ror#19 + eor r0,r7,r7,ror#11 + eor r2,r2,r5 + add r6,r6,r3,ror#6 + eor r3,r7,r8 + eor r0,r0,r7,ror#20 + add r6,r6,r2 + ldr r2,[sp,#56] + and r12,r12,r3 + add r10,r10,r6 + add r6,r6,r0,ror#2 + eor r12,r12,r8 + add r5,r5,r2 + eor r2,r11,r4 + eor r0,r10,r10,ror#5 + add r6,r6,r12 + and r2,r2,r10 + eor r12,r0,r10,ror#19 + eor r0,r6,r6,ror#11 + eor r2,r2,r4 + add r5,r5,r12,ror#6 + eor r12,r6,r7 + eor r0,r0,r6,ror#20 + add r5,r5,r2 + ldr r2,[sp,#60] + and r3,r3,r12 + add r9,r9,r5 + add r5,r5,r0,ror#2 + eor r3,r3,r7 + add r4,r4,r2 + eor r2,r10,r11 + eor r0,r9,r9,ror#5 + add r5,r5,r3 + and r2,r2,r9 + eor r3,r0,r9,ror#19 + eor r0,r5,r5,ror#11 + eor r2,r2,r11 + add r4,r4,r3,ror#6 + eor r3,r5,r6 + eor r0,r0,r5,ror#20 + add r4,r4,r2 + ldr r2,[sp,#64] + and r12,r12,r3 + add r8,r8,r4 + add r4,r4,r0,ror#2 + eor r12,r12,r6 + vst1.32 {q8},[r1,:128]! + ldr r0,[r2,#0] + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + ldr r12,[r2,#4] + ldr r3,[r2,#8] + ldr r1,[r2,#12] + add r4,r4,r0 @ accumulate + ldr r0,[r2,#16] + add r5,r5,r12 + ldr r12,[r2,#20] + add r6,r6,r3 + ldr r3,[r2,#24] + add r7,r7,r1 + ldr r1,[r2,#28] + add r8,r8,r0 + str r4,[r2],#4 + add r9,r9,r12 + str r5,[r2],#4 + add r10,r10,r3 + str r6,[r2],#4 + add r11,r11,r1 + str r7,[r2],#4 + stmia r2,{r8-r11} + + ittte ne + movne r1,sp + ldrne r2,[sp,#0] + eorne r12,r12,r12 + ldreq sp,[sp,#76] @ restore original sp + itt ne + eorne r3,r5,r6 + bne .L_00_48 + + ldmia sp!,{r4-r12,pc} +.size sha256_block_data_order_neon,.-sha256_block_data_order_neon +#endif +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) + +# ifdef __thumb2__ +# define INST(a,b,c,d) .byte c,d|0xc,a,b +# else +# define INST(a,b,c,d) .byte a,b,c,d +# endif + +.type sha256_block_data_order_armv8,%function +.align 5 +sha256_block_data_order_armv8: +.LARMv8: + vld1.32 {q0,q1},[r0] +# ifdef __thumb2__ + adr r3,.LARMv8 + sub r3,r3,#.LARMv8-K256 +# else + adrl r3,K256 +# endif + add r2,r1,r2,lsl#6 @ len to point at the end of inp + +.Loop_v8: + vld1.8 {q8-q9},[r1]! + vld1.8 {q10-q11},[r1]! + vld1.32 {q12},[r3]! + vrev32.8 q8,q8 + vrev32.8 q9,q9 + vrev32.8 q10,q10 + vrev32.8 q11,q11 + vmov q14,q0 @ offload + vmov q15,q1 + teq r1,r2 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q8 + INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q9 + INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q10 + INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q11 + INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q8 + INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q9 + INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q10 + INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q11 + INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q8 + INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q9 + INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q10 + INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q11 + INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q8 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q9 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + + vld1.32 {q13},[r3] + vadd.i32 q12,q12,q10 + sub r3,r3,#256-16 @ rewind + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + + vadd.i32 q13,q13,q11 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + + vadd.i32 q0,q0,q14 + vadd.i32 q1,q1,q15 + it ne + bne .Loop_v8 + + vst1.32 {q0,q1},[r0] + + bx lr @ bx lr +.size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8 +#endif +.asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro@openssl.org>" +.align 2 +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) +.comm OPENSSL_armcap_P,4,4 +#endif diff --git a/arch/arm/crypto/sha256_glue.c b/arch/arm/crypto/sha256_glue.c new file mode 100644 index 000000000000..ff90ced528a1 --- /dev/null +++ b/arch/arm/crypto/sha256_glue.c @@ -0,0 +1,246 @@ +/* + * Glue code for the SHA256 Secure Hash Algorithm assembly implementation + * using optimized ARM assembler and NEON instructions. + * + * Copyright © 2015 Google Inc. + * + * This file is based on sha256_ssse3_glue.c: + * Copyright (C) 2013 Intel Corporation + * Author: Tim Chen <tim.c.chen@linux.intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include <crypto/internal/hash.h> +#include <linux/crypto.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/cryptohash.h> +#include <linux/types.h> +#include <linux/string.h> +#include <crypto/sha.h> +#include <asm/byteorder.h> +#include <asm/simd.h> +#include <asm/neon.h> +#include "sha256_glue.h" + +asmlinkage void sha256_block_data_order(u32 *digest, const void *data, + unsigned int num_blks); + + +int sha256_init(struct shash_desc *desc) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + + sctx->state[0] = SHA256_H0; + sctx->state[1] = SHA256_H1; + sctx->state[2] = SHA256_H2; + sctx->state[3] = SHA256_H3; + sctx->state[4] = SHA256_H4; + sctx->state[5] = SHA256_H5; + sctx->state[6] = SHA256_H6; + sctx->state[7] = SHA256_H7; + sctx->count = 0; + + return 0; +} + +int sha224_init(struct shash_desc *desc) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + + sctx->state[0] = SHA224_H0; + sctx->state[1] = SHA224_H1; + sctx->state[2] = SHA224_H2; + sctx->state[3] = SHA224_H3; + sctx->state[4] = SHA224_H4; + sctx->state[5] = SHA224_H5; + sctx->state[6] = SHA224_H6; + sctx->state[7] = SHA224_H7; + sctx->count = 0; + + return 0; +} + +int __sha256_update(struct shash_desc *desc, const u8 *data, unsigned int len, + unsigned int partial) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + unsigned int done = 0; + + sctx->count += len; + + if (partial) { + done = SHA256_BLOCK_SIZE - partial; + memcpy(sctx->buf + partial, data, done); + sha256_block_data_order(sctx->state, sctx->buf, 1); + } + + if (len - done >= SHA256_BLOCK_SIZE) { + const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE; + + sha256_block_data_order(sctx->state, data + done, rounds); + done += rounds * SHA256_BLOCK_SIZE; + } + + memcpy(sctx->buf, data + done, len - done); + + return 0; +} + +int sha256_update(struct shash_desc *desc, const u8 *data, unsigned int len) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + unsigned int partial = sctx->count % SHA256_BLOCK_SIZE; + + /* Handle the fast case right here */ + if (partial + len < SHA256_BLOCK_SIZE) { + sctx->count += len; + memcpy(sctx->buf + partial, data, len); + + return 0; + } + + return __sha256_update(desc, data, len, partial); +} + +/* Add padding and return the message digest. */ +static int sha256_final(struct shash_desc *desc, u8 *out) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + unsigned int i, index, padlen; + __be32 *dst = (__be32 *)out; + __be64 bits; + static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, }; + + /* save number of bits */ + bits = cpu_to_be64(sctx->count << 3); + + /* Pad out to 56 mod 64 and append length */ + index = sctx->count % SHA256_BLOCK_SIZE; + padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56)-index); + + /* We need to fill a whole block for __sha256_update */ + if (padlen <= 56) { + sctx->count += padlen; + memcpy(sctx->buf + index, padding, padlen); + } else { + __sha256_update(desc, padding, padlen, index); + } + __sha256_update(desc, (const u8 *)&bits, sizeof(bits), 56); + + /* Store state in digest */ + for (i = 0; i < 8; i++) + dst[i] = cpu_to_be32(sctx->state[i]); + + /* Wipe context */ + memset(sctx, 0, sizeof(*sctx)); + + return 0; +} + +static int sha224_final(struct shash_desc *desc, u8 *out) +{ + u8 D[SHA256_DIGEST_SIZE]; + + sha256_final(desc, D); + + memcpy(out, D, SHA224_DIGEST_SIZE); + memzero_explicit(D, SHA256_DIGEST_SIZE); + + return 0; +} + +int sha256_export(struct shash_desc *desc, void *out) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + + memcpy(out, sctx, sizeof(*sctx)); + + return 0; +} + +int sha256_import(struct shash_desc *desc, const void *in) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + + memcpy(sctx, in, sizeof(*sctx)); + + return 0; +} + +static struct shash_alg algs[] = { { + .digestsize = SHA256_DIGEST_SIZE, + .init = sha256_init, + .update = sha256_update, + .final = sha256_final, + .export = sha256_export, + .import = sha256_import, + .descsize = sizeof(struct sha256_state), + .statesize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha256", + .cra_driver_name = "sha256-asm", + .cra_priority = 150, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}, { + .digestsize = SHA224_DIGEST_SIZE, + .init = sha224_init, + .update = sha256_update, + .final = sha224_final, + .export = sha256_export, + .import = sha256_import, + .descsize = sizeof(struct sha256_state), + .statesize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha224", + .cra_driver_name = "sha224-asm", + .cra_priority = 150, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +} }; + +static int __init sha256_mod_init(void) +{ + int res = crypto_register_shashes(algs, ARRAY_SIZE(algs)); + + if (res < 0) + return res; + + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon()) { + res = crypto_register_shashes(sha256_neon_algs, + ARRAY_SIZE(sha256_neon_algs)); + + if (res < 0) + crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); + } + + return res; +} + +static void __exit sha256_mod_fini(void) +{ + crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); + + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon()) + crypto_unregister_shashes(sha256_neon_algs, + ARRAY_SIZE(sha256_neon_algs)); +} + +module_init(sha256_mod_init); +module_exit(sha256_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm (ARM), including NEON"); + +MODULE_ALIAS("sha256"); diff --git a/arch/arm/crypto/sha256_glue.h b/arch/arm/crypto/sha256_glue.h new file mode 100644 index 000000000000..0312f4ffe8cc --- /dev/null +++ b/arch/arm/crypto/sha256_glue.h @@ -0,0 +1,23 @@ +#ifndef _CRYPTO_SHA256_GLUE_H +#define _CRYPTO_SHA256_GLUE_H + +#include <linux/crypto.h> +#include <crypto/sha.h> + +extern struct shash_alg sha256_neon_algs[2]; + +extern int sha256_init(struct shash_desc *desc); + +extern int sha224_init(struct shash_desc *desc); + +extern int __sha256_update(struct shash_desc *desc, const u8 *data, + unsigned int len, unsigned int partial); + +extern int sha256_update(struct shash_desc *desc, const u8 *data, + unsigned int len); + +extern int sha256_export(struct shash_desc *desc, void *out); + +extern int sha256_import(struct shash_desc *desc, const void *in); + +#endif /* _CRYPTO_SHA256_GLUE_H */ diff --git a/arch/arm/crypto/sha256_neon_glue.c b/arch/arm/crypto/sha256_neon_glue.c new file mode 100644 index 000000000000..c4da10090eee --- /dev/null +++ b/arch/arm/crypto/sha256_neon_glue.c @@ -0,0 +1,172 @@ +/* + * Glue code for the SHA256 Secure Hash Algorithm assembly implementation + * using NEON instructions. + * + * Copyright © 2015 Google Inc. + * + * This file is based on sha512_neon_glue.c: + * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include <crypto/internal/hash.h> +#include <linux/cryptohash.h> +#include <linux/types.h> +#include <linux/string.h> +#include <crypto/sha.h> +#include <asm/byteorder.h> +#include <asm/simd.h> +#include <asm/neon.h> +#include "sha256_glue.h" + +asmlinkage void sha256_block_data_order_neon(u32 *digest, const void *data, + unsigned int num_blks); + + +static int __sha256_neon_update(struct shash_desc *desc, const u8 *data, + unsigned int len, unsigned int partial) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + unsigned int done = 0; + + sctx->count += len; + + if (partial) { + done = SHA256_BLOCK_SIZE - partial; + memcpy(sctx->buf + partial, data, done); + sha256_block_data_order_neon(sctx->state, sctx->buf, 1); + } + + if (len - done >= SHA256_BLOCK_SIZE) { + const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE; + + sha256_block_data_order_neon(sctx->state, data + done, rounds); + done += rounds * SHA256_BLOCK_SIZE; + } + + memcpy(sctx->buf, data + done, len - done); + + return 0; +} + +static int sha256_neon_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + unsigned int partial = sctx->count % SHA256_BLOCK_SIZE; + int res; + + /* Handle the fast case right here */ + if (partial + len < SHA256_BLOCK_SIZE) { + sctx->count += len; + memcpy(sctx->buf + partial, data, len); + + return 0; + } + + if (!may_use_simd()) { + res = __sha256_update(desc, data, len, partial); + } else { + kernel_neon_begin(); + res = __sha256_neon_update(desc, data, len, partial); + kernel_neon_end(); + } + + return res; +} + +/* Add padding and return the message digest. */ +static int sha256_neon_final(struct shash_desc *desc, u8 *out) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + unsigned int i, index, padlen; + __be32 *dst = (__be32 *)out; + __be64 bits; + static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, }; + + /* save number of bits */ + bits = cpu_to_be64(sctx->count << 3); + + /* Pad out to 56 mod 64 and append length */ + index = sctx->count % SHA256_BLOCK_SIZE; + padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56)-index); + + if (!may_use_simd()) { + sha256_update(desc, padding, padlen); + sha256_update(desc, (const u8 *)&bits, sizeof(bits)); + } else { + kernel_neon_begin(); + /* We need to fill a whole block for __sha256_neon_update() */ + if (padlen <= 56) { + sctx->count += padlen; + memcpy(sctx->buf + index, padding, padlen); + } else { + __sha256_neon_update(desc, padding, padlen, index); + } + __sha256_neon_update(desc, (const u8 *)&bits, + sizeof(bits), 56); + kernel_neon_end(); + } + + /* Store state in digest */ + for (i = 0; i < 8; i++) + dst[i] = cpu_to_be32(sctx->state[i]); + + /* Wipe context */ + memzero_explicit(sctx, sizeof(*sctx)); + + return 0; +} + +static int sha224_neon_final(struct shash_desc *desc, u8 *out) +{ + u8 D[SHA256_DIGEST_SIZE]; + + sha256_neon_final(desc, D); + + memcpy(out, D, SHA224_DIGEST_SIZE); + memzero_explicit(D, SHA256_DIGEST_SIZE); + + return 0; +} + +struct shash_alg sha256_neon_algs[] = { { + .digestsize = SHA256_DIGEST_SIZE, + .init = sha256_init, + .update = sha256_neon_update, + .final = sha256_neon_final, + .export = sha256_export, + .import = sha256_import, + .descsize = sizeof(struct sha256_state), + .statesize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha256", + .cra_driver_name = "sha256-neon", + .cra_priority = 250, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}, { + .digestsize = SHA224_DIGEST_SIZE, + .init = sha224_init, + .update = sha256_neon_update, + .final = sha224_neon_final, + .export = sha256_export, + .import = sha256_import, + .descsize = sizeof(struct sha256_state), + .statesize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha224", + .cra_driver_name = "sha224-neon", + .cra_priority = 250, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +} }; diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index 10e78d00a0bb..2d46862e7bef 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -487,6 +487,16 @@ int set_memory_rw(unsigned long addr, int numpages); int set_memory_x(unsigned long addr, int numpages); int set_memory_nx(unsigned long addr, int numpages); +#ifdef CONFIG_DEBUG_RODATA +void mark_rodata_ro(void); +void set_kernel_text_rw(void); +void set_kernel_text_ro(void); +#else +static inline void set_kernel_text_rw(void) { } +static inline void set_kernel_text_ro(void) { } +#endif + void flush_uprobe_xol_access(struct page *page, unsigned long uaddr, void *kaddr, unsigned long len); + #endif diff --git a/arch/arm/include/asm/fiq_glue.h b/arch/arm/include/asm/fiq_glue.h new file mode 100644 index 000000000000..a9e244f9f197 --- /dev/null +++ b/arch/arm/include/asm/fiq_glue.h @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2010 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __ASM_FIQ_GLUE_H +#define __ASM_FIQ_GLUE_H + +struct fiq_glue_handler { + void (*fiq)(struct fiq_glue_handler *h, void *regs, void *svc_sp); + void (*resume)(struct fiq_glue_handler *h); +}; +typedef void (*fiq_return_handler_t)(void); + +int fiq_glue_register_handler(struct fiq_glue_handler *handler); +int fiq_glue_set_return_handler(fiq_return_handler_t fiq_return); +int fiq_glue_clear_return_handler(fiq_return_handler_t fiq_return); + +#ifdef CONFIG_FIQ_GLUE +void fiq_glue_resume(void); +#else +static inline void fiq_glue_resume(void) {} +#endif + +#endif diff --git a/arch/arm/include/asm/fixmap.h b/arch/arm/include/asm/fixmap.h index 74124b0d0d79..0415eae1df27 100644 --- a/arch/arm/include/asm/fixmap.h +++ b/arch/arm/include/asm/fixmap.h @@ -2,27 +2,24 @@ #define _ASM_FIXMAP_H #define FIXADDR_START 0xffc00000UL -#define FIXADDR_TOP 0xffe00000UL -#define FIXADDR_SIZE (FIXADDR_TOP - FIXADDR_START) +#define FIXADDR_END 0xfff00000UL +#define FIXADDR_TOP (FIXADDR_END - PAGE_SIZE) -#define FIX_KMAP_NR_PTES (FIXADDR_SIZE >> PAGE_SHIFT) +#include <asm/kmap_types.h> -#define __fix_to_virt(x) (FIXADDR_START + ((x) << PAGE_SHIFT)) -#define __virt_to_fix(x) (((x) - FIXADDR_START) >> PAGE_SHIFT) +enum fixed_addresses { + FIX_KMAP_BEGIN, + FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_TYPE_NR * NR_CPUS) - 1, -extern void __this_fixmap_does_not_exist(void); + /* Support writing RO kernel text via kprobes, jump labels, etc. */ + FIX_TEXT_POKE0, + FIX_TEXT_POKE1, -static inline unsigned long fix_to_virt(const unsigned int idx) -{ - if (idx >= FIX_KMAP_NR_PTES) - __this_fixmap_does_not_exist(); - return __fix_to_virt(idx); -} + __end_of_fixed_addresses +}; -static inline unsigned int virt_to_fix(const unsigned long vaddr) -{ - BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START); - return __virt_to_fix(vaddr); -} +void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot); + +#include <asm-generic/fixmap.h> #endif diff --git a/arch/arm/include/asm/hardirq.h b/arch/arm/include/asm/hardirq.h index fe3ea776dc34..5df33e30ae1b 100644 --- a/arch/arm/include/asm/hardirq.h +++ b/arch/arm/include/asm/hardirq.h @@ -5,7 +5,7 @@ #include <linux/threads.h> #include <asm/irq.h> -#define NR_IPI 8 +#define NR_IPI 9 typedef struct { unsigned int __softirq_pending; diff --git a/arch/arm/include/asm/hardware/coresight.h b/arch/arm/include/asm/hardware/coresight.h index ad774f37c47c..b3507650b41c 100644 --- a/arch/arm/include/asm/hardware/coresight.h +++ b/arch/arm/include/asm/hardware/coresight.h @@ -17,15 +17,23 @@ #define TRACER_ACCESSED_BIT 0 #define TRACER_RUNNING_BIT 1 #define TRACER_CYCLE_ACC_BIT 2 +#define TRACER_TRACE_DATA_BIT 3 +#define TRACER_TIMESTAMP_BIT 4 +#define TRACER_BRANCHOUTPUT_BIT 5 +#define TRACER_RETURN_STACK_BIT 6 #define TRACER_ACCESSED BIT(TRACER_ACCESSED_BIT) #define TRACER_RUNNING BIT(TRACER_RUNNING_BIT) #define TRACER_CYCLE_ACC BIT(TRACER_CYCLE_ACC_BIT) +#define TRACER_TRACE_DATA BIT(TRACER_TRACE_DATA_BIT) +#define TRACER_TIMESTAMP BIT(TRACER_TIMESTAMP_BIT) +#define TRACER_BRANCHOUTPUT BIT(TRACER_BRANCHOUTPUT_BIT) +#define TRACER_RETURN_STACK BIT(TRACER_RETURN_STACK_BIT) #define TRACER_TIMEOUT 10000 -#define etm_writel(t, v, x) \ - (writel_relaxed((v), (t)->etm_regs + (x))) -#define etm_readl(t, x) (readl_relaxed((t)->etm_regs + (x))) +#define etm_writel(t, id, v, x) \ + (writel_relaxed((v), (t)->etm_regs[(id)] + (x))) +#define etm_readl(t, id, x) (readl_relaxed((t)->etm_regs[(id)] + (x))) /* CoreSight Management Registers */ #define CSMR_LOCKACCESS 0xfb0 @@ -43,7 +51,7 @@ #define ETMCTRL_POWERDOWN 1 #define ETMCTRL_PROGRAM (1 << 10) #define ETMCTRL_PORTSEL (1 << 11) -#define ETMCTRL_DO_CONTEXTID (3 << 14) +#define ETMCTRL_CONTEXTIDSIZE(x) (((x) & 3) << 14) #define ETMCTRL_PORTMASK1 (7 << 4) #define ETMCTRL_PORTMASK2 (1 << 21) #define ETMCTRL_PORTMASK (ETMCTRL_PORTMASK1 | ETMCTRL_PORTMASK2) @@ -55,9 +63,12 @@ #define ETMCTRL_DATA_DO_BOTH (ETMCTRL_DATA_DO_DATA | ETMCTRL_DATA_DO_ADDR) #define ETMCTRL_BRANCH_OUTPUT (1 << 8) #define ETMCTRL_CYCLEACCURATE (1 << 12) +#define ETMCTRL_TIMESTAMP_EN (1 << 28) +#define ETMCTRL_RETURN_STACK_EN (1 << 29) /* ETM configuration code register */ #define ETMR_CONFCODE (0x04) +#define ETMCCR_ETMIDR_PRESENT BIT(31) /* ETM trace start/stop resource control register */ #define ETMR_TRACESSCTRL (0x18) @@ -113,10 +124,25 @@ #define ETMR_TRACEENCTRL 0x24 #define ETMTE_INCLEXCL BIT(24) #define ETMR_TRACEENEVT 0x20 -#define ETMCTRL_OPTS (ETMCTRL_DO_CPRT | \ - ETMCTRL_DATA_DO_ADDR | \ - ETMCTRL_BRANCH_OUTPUT | \ - ETMCTRL_DO_CONTEXTID) + +#define ETMR_VIEWDATAEVT 0x30 +#define ETMR_VIEWDATACTRL1 0x34 +#define ETMR_VIEWDATACTRL2 0x38 +#define ETMR_VIEWDATACTRL3 0x3c +#define ETMVDC3_EXCLONLY BIT(16) + +#define ETMCTRL_OPTS (ETMCTRL_DO_CPRT) + +#define ETMR_ID 0x1e4 +#define ETMIDR_VERSION(x) (((x) >> 4) & 0xff) +#define ETMIDR_VERSION_3_1 0x21 +#define ETMIDR_VERSION_PFT_1_0 0x30 + +#define ETMR_CCE 0x1e8 +#define ETMCCER_RETURN_STACK_IMPLEMENTED BIT(23) +#define ETMCCER_TIMESTAMPING_IMPLEMENTED BIT(22) + +#define ETMR_TRACEIDR 0x200 /* ETM management registers, "ETM Architecture", 3.5.24 */ #define ETMMR_OSLAR 0x300 @@ -140,14 +166,16 @@ #define ETBFF_TRIGIN BIT(8) #define ETBFF_TRIGEVT BIT(9) #define ETBFF_TRIGFL BIT(10) +#define ETBFF_STOPFL BIT(12) #define etb_writel(t, v, x) \ (writel_relaxed((v), (t)->etb_regs + (x))) #define etb_readl(t, x) (readl_relaxed((t)->etb_regs + (x))) -#define etm_lock(t) do { etm_writel((t), 0, CSMR_LOCKACCESS); } while (0) -#define etm_unlock(t) \ - do { etm_writel((t), CS_LAR_KEY, CSMR_LOCKACCESS); } while (0) +#define etm_lock(t, id) \ + do { etm_writel((t), (id), 0, CSMR_LOCKACCESS); } while (0) +#define etm_unlock(t, id) \ + do { etm_writel((t), (id), CS_LAR_KEY, CSMR_LOCKACCESS); } while (0) #define etb_lock(t) do { etb_writel((t), 0, CSMR_LOCKACCESS); } while (0) #define etb_unlock(t) \ diff --git a/arch/arm/include/asm/irq.h b/arch/arm/include/asm/irq.h index 53c15dec7af6..809203a4b71b 100644 --- a/arch/arm/include/asm/irq.h +++ b/arch/arm/include/asm/irq.h @@ -35,6 +35,9 @@ extern void (*handle_arch_irq)(struct pt_regs *); extern void set_handle_irq(void (*handle_irq)(struct pt_regs *)); #endif +void arch_trigger_all_cpu_backtrace(void); +#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace + #endif #endif diff --git a/arch/arm/include/asm/mach/mmc.h b/arch/arm/include/asm/mach/mmc.h new file mode 100644 index 000000000000..bca864ac945f --- /dev/null +++ b/arch/arm/include/asm/mach/mmc.h @@ -0,0 +1,28 @@ +/* + * arch/arm/include/asm/mach/mmc.h + */ +#ifndef ASMARM_MACH_MMC_H +#define ASMARM_MACH_MMC_H + +#include <linux/mmc/host.h> +#include <linux/mmc/card.h> +#include <linux/mmc/sdio_func.h> + +struct embedded_sdio_data { + struct sdio_cis cis; + struct sdio_cccr cccr; + struct sdio_embedded_func *funcs; + int num_funcs; +}; + +struct mmc_platform_data { + unsigned int ocr_mask; /* available voltages */ + int built_in; /* built-in device flag */ + int card_present; /* card detect state */ + u32 (*translate_vdd)(struct device *, unsigned int); + unsigned int (*status)(struct device *); + struct embedded_sdio_data *embedded_sdio; + int (*register_status_notify)(void (*callback)(int card_present, void *dev_id), void *dev_id); +}; + +#endif diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h index 18f5a554134f..7f31b4fd4180 100644 --- a/arch/arm/include/asm/smp.h +++ b/arch/arm/include/asm/smp.h @@ -81,6 +81,8 @@ extern void arch_send_wakeup_ipi_mask(const struct cpumask *mask); extern int register_ipi_completion(struct completion *completion, int cpu); +extern void smp_send_all_cpu_backtrace(void); + struct smp_operations { #ifdef CONFIG_SMP /* diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 2ecc7d15bc09..e03714662cb6 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -67,7 +67,7 @@ test-kprobes-objs += kprobes-test-arm.o endif obj-$(CONFIG_OABI_COMPAT) += sys_oabi-compat.o obj-$(CONFIG_ARM_THUMBEE) += thumbee.o -obj-$(CONFIG_KGDB) += kgdb.o +obj-$(CONFIG_KGDB) += kgdb.o patch.o obj-$(CONFIG_ARM_UNWIND) += unwind.o obj-$(CONFIG_HAVE_TCM) += tcm.o obj-$(CONFIG_OF) += devtree.o diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index 2d2d6087b9b1..713e807621d2 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -10,7 +10,6 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -#include <linux/compiler.h> #include <linux/sched.h> #include <linux/mm.h> #include <linux/dma-mapping.h> @@ -40,19 +39,10 @@ * GCC 3.2.x: miscompiles NEW_AUX_ENT in fs/binfmt_elf.c * (http://gcc.gnu.org/PR8896) and incorrect structure * initialisation in fs/jffs2/erase.c - * GCC 4.8.0-4.8.2: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58854 - * miscompiles find_get_entry(), and can result in EXT3 and EXT4 - * filesystem corruption (possibly other FS too). */ -#ifdef __GNUC__ #if (__GNUC__ == 3 && __GNUC_MINOR__ < 3) #error Your compiler is too buggy; it is known to miscompile kernels. -#error Known good compilers: 3.3, 4.x -#endif -#if GCC_VERSION >= 40800 && GCC_VERSION < 40803 -#error Your compiler is too buggy; it is known to miscompile kernels -#error and result in filesystem corruption and oopses. -#endif +#error Known good compilers: 3.3 #endif int main(void) diff --git a/arch/arm/kernel/etm.c b/arch/arm/kernel/etm.c index 131a6ab5f355..a634e0ef6d1f 100644 --- a/arch/arm/kernel/etm.c +++ b/arch/arm/kernel/etm.c @@ -15,6 +15,7 @@ #include <linux/init.h> #include <linux/types.h> #include <linux/io.h> +#include <linux/slab.h> #include <linux/sysrq.h> #include <linux/device.h> #include <linux/clk.h> @@ -37,26 +38,37 @@ MODULE_AUTHOR("Alexander Shishkin"); struct tracectx { unsigned int etb_bufsz; void __iomem *etb_regs; - void __iomem *etm_regs; + void __iomem **etm_regs; + int etm_regs_count; unsigned long flags; int ncmppairs; int etm_portsz; + int etm_contextid_size; + u32 etb_fc; + unsigned long range_start; + unsigned long range_end; + unsigned long data_range_start; + unsigned long data_range_end; + bool dump_initial_etb; struct device *dev; struct clk *emu_clk; struct mutex mutex; }; -static struct tracectx tracer; +static struct tracectx tracer = { + .range_start = (unsigned long)_stext, + .range_end = (unsigned long)_etext, +}; static inline bool trace_isrunning(struct tracectx *t) { return !!(t->flags & TRACER_RUNNING); } -static int etm_setup_address_range(struct tracectx *t, int n, +static int etm_setup_address_range(struct tracectx *t, int id, int n, unsigned long start, unsigned long end, int exclude, int data) { - u32 flags = ETMAAT_ARM | ETMAAT_IGNCONTEXTID | ETMAAT_NSONLY | \ + u32 flags = ETMAAT_ARM | ETMAAT_IGNCONTEXTID | ETMAAT_IGNSECURITY | ETMAAT_NOVALCMP; if (n < 1 || n > t->ncmppairs) @@ -72,95 +84,185 @@ static int etm_setup_address_range(struct tracectx *t, int n, flags |= ETMAAT_IEXEC; /* first comparator for the range */ - etm_writel(t, flags, ETMR_COMP_ACC_TYPE(n * 2)); - etm_writel(t, start, ETMR_COMP_VAL(n * 2)); + etm_writel(t, id, flags, ETMR_COMP_ACC_TYPE(n * 2)); + etm_writel(t, id, start, ETMR_COMP_VAL(n * 2)); /* second comparator is right next to it */ - etm_writel(t, flags, ETMR_COMP_ACC_TYPE(n * 2 + 1)); - etm_writel(t, end, ETMR_COMP_VAL(n * 2 + 1)); - - flags = exclude ? ETMTE_INCLEXCL : 0; - etm_writel(t, flags | (1 << n), ETMR_TRACEENCTRL); + etm_writel(t, id, flags, ETMR_COMP_ACC_TYPE(n * 2 + 1)); + etm_writel(t, id, end, ETMR_COMP_VAL(n * 2 + 1)); + + if (data) { + flags = exclude ? ETMVDC3_EXCLONLY : 0; + if (exclude) + n += 8; + etm_writel(t, id, flags | BIT(n), ETMR_VIEWDATACTRL3); + } else { + flags = exclude ? ETMTE_INCLEXCL : 0; + etm_writel(t, id, flags | (1 << n), ETMR_TRACEENCTRL); + } return 0; } -static int trace_start(struct tracectx *t) +static int trace_start_etm(struct tracectx *t, int id) { u32 v; unsigned long timeout = TRACER_TIMEOUT; - etb_unlock(t); - - etb_writel(t, 0, ETBR_FORMATTERCTRL); - etb_writel(t, 1, ETBR_CTRL); - - etb_lock(t); - - /* configure etm */ v = ETMCTRL_OPTS | ETMCTRL_PROGRAM | ETMCTRL_PORTSIZE(t->etm_portsz); + v |= ETMCTRL_CONTEXTIDSIZE(t->etm_contextid_size); if (t->flags & TRACER_CYCLE_ACC) v |= ETMCTRL_CYCLEACCURATE; - etm_unlock(t); + if (t->flags & TRACER_BRANCHOUTPUT) + v |= ETMCTRL_BRANCH_OUTPUT; + + if (t->flags & TRACER_TRACE_DATA) + v |= ETMCTRL_DATA_DO_ADDR; + + if (t->flags & TRACER_TIMESTAMP) + v |= ETMCTRL_TIMESTAMP_EN; + + if (t->flags & TRACER_RETURN_STACK) + v |= ETMCTRL_RETURN_STACK_EN; - etm_writel(t, v, ETMR_CTRL); + etm_unlock(t, id); - while (!(etm_readl(t, ETMR_CTRL) & ETMCTRL_PROGRAM) && --timeout) + etm_writel(t, id, v, ETMR_CTRL); + + while (!(etm_readl(t, id, ETMR_CTRL) & ETMCTRL_PROGRAM) && --timeout) ; if (!timeout) { dev_dbg(t->dev, "Waiting for progbit to assert timed out\n"); - etm_lock(t); + etm_lock(t, id); return -EFAULT; } - etm_setup_address_range(t, 1, (unsigned long)_stext, - (unsigned long)_etext, 0, 0); - etm_writel(t, 0, ETMR_TRACEENCTRL2); - etm_writel(t, 0, ETMR_TRACESSCTRL); - etm_writel(t, 0x6f, ETMR_TRACEENEVT); + if (t->range_start || t->range_end) + etm_setup_address_range(t, id, 1, + t->range_start, t->range_end, 0, 0); + else + etm_writel(t, id, ETMTE_INCLEXCL, ETMR_TRACEENCTRL); + + etm_writel(t, id, 0, ETMR_TRACEENCTRL2); + etm_writel(t, id, 0, ETMR_TRACESSCTRL); + etm_writel(t, id, 0x6f, ETMR_TRACEENEVT); + + etm_writel(t, id, 0, ETMR_VIEWDATACTRL1); + etm_writel(t, id, 0, ETMR_VIEWDATACTRL2); + + if (t->data_range_start || t->data_range_end) + etm_setup_address_range(t, id, 2, t->data_range_start, + t->data_range_end, 0, 1); + else + etm_writel(t, id, ETMVDC3_EXCLONLY, ETMR_VIEWDATACTRL3); + + etm_writel(t, id, 0x6f, ETMR_VIEWDATAEVT); v &= ~ETMCTRL_PROGRAM; v |= ETMCTRL_PORTSEL; - etm_writel(t, v, ETMR_CTRL); + etm_writel(t, id, v, ETMR_CTRL); timeout = TRACER_TIMEOUT; - while (etm_readl(t, ETMR_CTRL) & ETMCTRL_PROGRAM && --timeout) + while (etm_readl(t, id, ETMR_CTRL) & ETMCTRL_PROGRAM && --timeout) ; if (!timeout) { dev_dbg(t->dev, "Waiting for progbit to deassert timed out\n"); - etm_lock(t); + etm_lock(t, id); return -EFAULT; } - etm_lock(t); + etm_lock(t, id); + return 0; +} + +static int trace_start(struct tracectx *t) +{ + int ret; + int id; + u32 etb_fc = t->etb_fc; + + etb_unlock(t); + + t->dump_initial_etb = false; + etb_writel(t, 0, ETBR_WRITEADDR); + etb_writel(t, etb_fc, ETBR_FORMATTERCTRL); + etb_writel(t, 1, ETBR_CTRL); + + etb_lock(t); + + /* configure etm(s) */ + for (id = 0; id < t->etm_regs_count; id++) { + ret = trace_start_etm(t, id); + if (ret) + return ret; + } t->flags |= TRACER_RUNNING; return 0; } -static int trace_stop(struct tracectx *t) +static int trace_stop_etm(struct tracectx *t, int id) { unsigned long timeout = TRACER_TIMEOUT; - etm_unlock(t); + etm_unlock(t, id); - etm_writel(t, 0x440, ETMR_CTRL); - while (!(etm_readl(t, ETMR_CTRL) & ETMCTRL_PROGRAM) && --timeout) + etm_writel(t, id, 0x440, ETMR_CTRL); + while (!(etm_readl(t, id, ETMR_CTRL) & ETMCTRL_PROGRAM) && --timeout) ; if (!timeout) { - dev_dbg(t->dev, "Waiting for progbit to assert timed out\n"); - etm_lock(t); + dev_err(t->dev, + "etm%d: Waiting for progbit to assert timed out\n", + id); + etm_lock(t, id); return -EFAULT; } - etm_lock(t); + etm_lock(t, id); + return 0; +} + +static int trace_power_down_etm(struct tracectx *t, int id) +{ + unsigned long timeout = TRACER_TIMEOUT; + etm_unlock(t, id); + while (!(etm_readl(t, id, ETMR_STATUS) & ETMST_PROGBIT) && --timeout) + ; + if (!timeout) { + dev_err(t->dev, "etm%d: Waiting for status progbit to assert timed out\n", + id); + etm_lock(t, id); + return -EFAULT; + } + + etm_writel(t, id, 0x441, ETMR_CTRL); + + etm_lock(t, id); + return 0; +} + +static int trace_stop(struct tracectx *t) +{ + int id; + unsigned long timeout = TRACER_TIMEOUT; + u32 etb_fc = t->etb_fc; + + for (id = 0; id < t->etm_regs_count; id++) + trace_stop_etm(t, id); + + for (id = 0; id < t->etm_regs_count; id++) + trace_power_down_etm(t, id); etb_unlock(t); - etb_writel(t, ETBFF_MANUAL_FLUSH, ETBR_FORMATTERCTRL); + if (etb_fc) { + etb_fc |= ETBFF_STOPFL; + etb_writel(t, t->etb_fc, ETBR_FORMATTERCTRL); + } + etb_writel(t, etb_fc | ETBFF_MANUAL_FLUSH, ETBR_FORMATTERCTRL); timeout = TRACER_TIMEOUT; while (etb_readl(t, ETBR_FORMATTERCTRL) & @@ -185,24 +287,15 @@ static int trace_stop(struct tracectx *t) static int etb_getdatalen(struct tracectx *t) { u32 v; - int rp, wp; + int wp; v = etb_readl(t, ETBR_STATUS); if (v & 1) return t->etb_bufsz; - rp = etb_readl(t, ETBR_READADDR); wp = etb_readl(t, ETBR_WRITEADDR); - - if (rp > wp) { - etb_writel(t, 0, ETBR_READADDR); - etb_writel(t, 0, ETBR_WRITEADDR); - - return 0; - } - - return wp - rp; + return wp; } /* sysrq+v will always stop the running trace and leave it at that */ @@ -235,21 +328,18 @@ static void etm_dump(void) printk("%08x", cpu_to_be32(etb_readl(t, ETBR_READMEM))); printk(KERN_INFO "\n--- ETB buffer end ---\n"); - /* deassert the overflow bit */ - etb_writel(t, 1, ETBR_CTRL); - etb_writel(t, 0, ETBR_CTRL); - - etb_writel(t, 0, ETBR_TRIGGERCOUNT); - etb_writel(t, 0, ETBR_READADDR); - etb_writel(t, 0, ETBR_WRITEADDR); - etb_lock(t); } static void sysrq_etm_dump(int key) { + if (!mutex_trylock(&tracer.mutex)) { + printk(KERN_INFO "Tracing hardware busy\n"); + return; + } dev_dbg(tracer.dev, "Dumping ETB buffer\n"); etm_dump(); + mutex_unlock(&tracer.mutex); } static struct sysrq_key_op sysrq_etm_op = { @@ -276,6 +366,10 @@ static ssize_t etb_read(struct file *file, char __user *data, struct tracectx *t = file->private_data; u32 first = 0; u32 *buf; + int wpos; + int skip; + long wlength; + loff_t pos = *ppos; mutex_lock(&t->mutex); @@ -287,31 +381,39 @@ static ssize_t etb_read(struct file *file, char __user *data, etb_unlock(t); total = etb_getdatalen(t); + if (total == 0 && t->dump_initial_etb) + total = t->etb_bufsz; if (total == t->etb_bufsz) first = etb_readl(t, ETBR_WRITEADDR); + if (pos > total * 4) { + skip = 0; + wpos = total; + } else { + skip = (int)pos % 4; + wpos = (int)pos / 4; + } + total -= wpos; + first = (first + wpos) % t->etb_bufsz; + etb_writel(t, first, ETBR_READADDR); - length = min(total * 4, (int)len); - buf = vmalloc(length); + wlength = min(total, DIV_ROUND_UP(skip + (int)len, 4)); + length = min(total * 4 - skip, (int)len); + buf = vmalloc(wlength * 4); - dev_dbg(t->dev, "ETB buffer length: %d\n", total); + dev_dbg(t->dev, "ETB read %ld bytes to %lld from %ld words at %d\n", + length, pos, wlength, first); + dev_dbg(t->dev, "ETB buffer length: %d\n", total + wpos); dev_dbg(t->dev, "ETB status reg: %x\n", etb_readl(t, ETBR_STATUS)); - for (i = 0; i < length / 4; i++) + for (i = 0; i < wlength; i++) buf[i] = etb_readl(t, ETBR_READMEM); - /* the only way to deassert overflow bit in ETB status is this */ - etb_writel(t, 1, ETBR_CTRL); - etb_writel(t, 0, ETBR_CTRL); - - etb_writel(t, 0, ETBR_WRITEADDR); - etb_writel(t, 0, ETBR_READADDR); - etb_writel(t, 0, ETBR_TRIGGERCOUNT); - etb_lock(t); - length -= copy_to_user(data, buf, length); + length -= copy_to_user(data, (u8 *)buf + skip, length); vfree(buf); + *ppos = pos + length; out: mutex_unlock(&t->mutex); @@ -348,28 +450,17 @@ static int etb_probe(struct amba_device *dev, const struct amba_id *id) if (ret) goto out; + mutex_lock(&t->mutex); t->etb_regs = ioremap_nocache(dev->res.start, resource_size(&dev->res)); if (!t->etb_regs) { ret = -ENOMEM; goto out_release; } + t->dev = &dev->dev; + t->dump_initial_etb = true; amba_set_drvdata(dev, t); - etb_miscdev.parent = &dev->dev; - - ret = misc_register(&etb_miscdev); - if (ret) - goto out_unmap; - - t->emu_clk = clk_get(&dev->dev, "emu_src_ck"); - if (IS_ERR(t->emu_clk)) { - dev_dbg(&dev->dev, "Failed to obtain emu_src_ck.\n"); - return -EFAULT; - } - - clk_enable(t->emu_clk); - etb_unlock(t); t->etb_bufsz = etb_readl(t, ETBR_DEPTH); dev_dbg(&dev->dev, "Size: %x\n", t->etb_bufsz); @@ -378,6 +469,20 @@ static int etb_probe(struct amba_device *dev, const struct amba_id *id) etb_writel(t, 0, ETBR_CTRL); etb_writel(t, 0x1000, ETBR_FORMATTERCTRL); etb_lock(t); + mutex_unlock(&t->mutex); + + etb_miscdev.parent = &dev->dev; + + ret = misc_register(&etb_miscdev); + if (ret) + goto out_unmap; + + /* Get optional clock. Currently used to select clock source on omap3 */ + t->emu_clk = clk_get(&dev->dev, "emu_src_ck"); + if (IS_ERR(t->emu_clk)) + dev_dbg(&dev->dev, "Failed to obtain emu_src_ck.\n"); + else + clk_enable(t->emu_clk); dev_dbg(&dev->dev, "ETB AMBA driver initialized.\n"); @@ -385,9 +490,12 @@ out: return ret; out_unmap: + mutex_lock(&t->mutex); iounmap(t->etb_regs); + t->etb_regs = NULL; out_release: + mutex_unlock(&t->mutex); amba_release_regions(dev); return ret; @@ -400,8 +508,10 @@ static int etb_remove(struct amba_device *dev) iounmap(t->etb_regs); t->etb_regs = NULL; - clk_disable(t->emu_clk); - clk_put(t->emu_clk); + if (!IS_ERR(t->emu_clk)) { + clk_disable(t->emu_clk); + clk_put(t->emu_clk); + } amba_release_regions(dev); @@ -445,7 +555,10 @@ static ssize_t trace_running_store(struct kobject *kobj, return -EINVAL; mutex_lock(&tracer.mutex); - ret = value ? trace_start(&tracer) : trace_stop(&tracer); + if (!tracer.etb_regs) + ret = -ENODEV; + else + ret = value ? trace_start(&tracer) : trace_stop(&tracer); mutex_unlock(&tracer.mutex); return ret ? : n; @@ -460,36 +573,50 @@ static ssize_t trace_info_show(struct kobject *kobj, { u32 etb_wa, etb_ra, etb_st, etb_fc, etm_ctrl, etm_st; int datalen; + int id; + int ret; - etb_unlock(&tracer); - datalen = etb_getdatalen(&tracer); - etb_wa = etb_readl(&tracer, ETBR_WRITEADDR); - etb_ra = etb_readl(&tracer, ETBR_READADDR); - etb_st = etb_readl(&tracer, ETBR_STATUS); - etb_fc = etb_readl(&tracer, ETBR_FORMATTERCTRL); - etb_lock(&tracer); - - etm_unlock(&tracer); - etm_ctrl = etm_readl(&tracer, ETMR_CTRL); - etm_st = etm_readl(&tracer, ETMR_STATUS); - etm_lock(&tracer); + mutex_lock(&tracer.mutex); + if (tracer.etb_regs) { + etb_unlock(&tracer); + datalen = etb_getdatalen(&tracer); + etb_wa = etb_readl(&tracer, ETBR_WRITEADDR); + etb_ra = etb_readl(&tracer, ETBR_READADDR); + etb_st = etb_readl(&tracer, ETBR_STATUS); + etb_fc = etb_readl(&tracer, ETBR_FORMATTERCTRL); + etb_lock(&tracer); + } else { + etb_wa = etb_ra = etb_st = etb_fc = ~0; + datalen = -1; + } - return sprintf(buf, "Trace buffer len: %d\nComparator pairs: %d\n" + ret = sprintf(buf, "Trace buffer len: %d\nComparator pairs: %d\n" "ETBR_WRITEADDR:\t%08x\n" "ETBR_READADDR:\t%08x\n" "ETBR_STATUS:\t%08x\n" - "ETBR_FORMATTERCTRL:\t%08x\n" - "ETMR_CTRL:\t%08x\n" - "ETMR_STATUS:\t%08x\n", + "ETBR_FORMATTERCTRL:\t%08x\n", datalen, tracer.ncmppairs, etb_wa, etb_ra, etb_st, - etb_fc, + etb_fc + ); + + for (id = 0; id < tracer.etm_regs_count; id++) { + etm_unlock(&tracer, id); + etm_ctrl = etm_readl(&tracer, id, ETMR_CTRL); + etm_st = etm_readl(&tracer, id, ETMR_STATUS); + etm_lock(&tracer, id); + ret += sprintf(buf + ret, "ETMR_CTRL:\t%08x\n" + "ETMR_STATUS:\t%08x\n", etm_ctrl, etm_st ); + } + mutex_unlock(&tracer.mutex); + + return ret; } static struct kobj_attribute trace_info_attr = @@ -528,42 +655,260 @@ static ssize_t trace_mode_store(struct kobject *kobj, static struct kobj_attribute trace_mode_attr = __ATTR(trace_mode, 0644, trace_mode_show, trace_mode_store); +static ssize_t trace_contextid_size_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + /* 0: No context id tracing, 1: One byte, 2: Two bytes, 3: Four bytes */ + return sprintf(buf, "%d\n", (1 << tracer.etm_contextid_size) >> 1); +} + +static ssize_t trace_contextid_size_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + unsigned int contextid_size; + + if (sscanf(buf, "%u", &contextid_size) != 1) + return -EINVAL; + + if (contextid_size == 3 || contextid_size > 4) + return -EINVAL; + + mutex_lock(&tracer.mutex); + tracer.etm_contextid_size = fls(contextid_size); + mutex_unlock(&tracer.mutex); + + return n; +} + +static struct kobj_attribute trace_contextid_size_attr = + __ATTR(trace_contextid_size, 0644, + trace_contextid_size_show, trace_contextid_size_store); + +static ssize_t trace_branch_output_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%d\n", !!(tracer.flags & TRACER_BRANCHOUTPUT)); +} + +static ssize_t trace_branch_output_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + unsigned int branch_output; + + if (sscanf(buf, "%u", &branch_output) != 1) + return -EINVAL; + + mutex_lock(&tracer.mutex); + if (branch_output) { + tracer.flags |= TRACER_BRANCHOUTPUT; + /* Branch broadcasting is incompatible with the return stack */ + tracer.flags &= ~TRACER_RETURN_STACK; + } else { + tracer.flags &= ~TRACER_BRANCHOUTPUT; + } + mutex_unlock(&tracer.mutex); + + return n; +} + +static struct kobj_attribute trace_branch_output_attr = + __ATTR(trace_branch_output, 0644, + trace_branch_output_show, trace_branch_output_store); + +static ssize_t trace_return_stack_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%d\n", !!(tracer.flags & TRACER_RETURN_STACK)); +} + +static ssize_t trace_return_stack_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + unsigned int return_stack; + + if (sscanf(buf, "%u", &return_stack) != 1) + return -EINVAL; + + mutex_lock(&tracer.mutex); + if (return_stack) { + tracer.flags |= TRACER_RETURN_STACK; + /* Return stack is incompatible with branch broadcasting */ + tracer.flags &= ~TRACER_BRANCHOUTPUT; + } else { + tracer.flags &= ~TRACER_RETURN_STACK; + } + mutex_unlock(&tracer.mutex); + + return n; +} + +static struct kobj_attribute trace_return_stack_attr = + __ATTR(trace_return_stack, 0644, + trace_return_stack_show, trace_return_stack_store); + +static ssize_t trace_timestamp_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%d\n", !!(tracer.flags & TRACER_TIMESTAMP)); +} + +static ssize_t trace_timestamp_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + unsigned int timestamp; + + if (sscanf(buf, "%u", ×tamp) != 1) + return -EINVAL; + + mutex_lock(&tracer.mutex); + if (timestamp) + tracer.flags |= TRACER_TIMESTAMP; + else + tracer.flags &= ~TRACER_TIMESTAMP; + mutex_unlock(&tracer.mutex); + + return n; +} + +static struct kobj_attribute trace_timestamp_attr = + __ATTR(trace_timestamp, 0644, + trace_timestamp_show, trace_timestamp_store); + +static ssize_t trace_range_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%08lx %08lx\n", + tracer.range_start, tracer.range_end); +} + +static ssize_t trace_range_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + unsigned long range_start, range_end; + + if (sscanf(buf, "%lx %lx", &range_start, &range_end) != 2) + return -EINVAL; + + mutex_lock(&tracer.mutex); + tracer.range_start = range_start; + tracer.range_end = range_end; + mutex_unlock(&tracer.mutex); + + return n; +} + + +static struct kobj_attribute trace_range_attr = + __ATTR(trace_range, 0644, trace_range_show, trace_range_store); + +static ssize_t trace_data_range_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + unsigned long range_start; + u64 range_end; + mutex_lock(&tracer.mutex); + range_start = tracer.data_range_start; + range_end = tracer.data_range_end; + if (!range_end && (tracer.flags & TRACER_TRACE_DATA)) + range_end = 0x100000000ULL; + mutex_unlock(&tracer.mutex); + return sprintf(buf, "%08lx %08llx\n", range_start, range_end); +} + +static ssize_t trace_data_range_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + unsigned long range_start; + u64 range_end; + + if (sscanf(buf, "%lx %llx", &range_start, &range_end) != 2) + return -EINVAL; + + mutex_lock(&tracer.mutex); + tracer.data_range_start = range_start; + tracer.data_range_end = (unsigned long)range_end; + if (range_end) + tracer.flags |= TRACER_TRACE_DATA; + else + tracer.flags &= ~TRACER_TRACE_DATA; + mutex_unlock(&tracer.mutex); + + return n; +} + + +static struct kobj_attribute trace_data_range_attr = + __ATTR(trace_data_range, 0644, + trace_data_range_show, trace_data_range_store); + static int etm_probe(struct amba_device *dev, const struct amba_id *id) { struct tracectx *t = &tracer; int ret = 0; + void __iomem **new_regs; + int new_count; + u32 etmccr; + u32 etmidr; + u32 etmccer = 0; + u8 etm_version = 0; + + mutex_lock(&t->mutex); + new_count = t->etm_regs_count + 1; + new_regs = krealloc(t->etm_regs, + sizeof(t->etm_regs[0]) * new_count, GFP_KERNEL); - if (t->etm_regs) { - dev_dbg(&dev->dev, "ETM already initialized\n"); - ret = -EBUSY; + if (!new_regs) { + dev_dbg(&dev->dev, "Failed to allocate ETM register array\n"); + ret = -ENOMEM; goto out; } + t->etm_regs = new_regs; ret = amba_request_regions(dev, NULL); if (ret) goto out; - t->etm_regs = ioremap_nocache(dev->res.start, resource_size(&dev->res)); - if (!t->etm_regs) { + t->etm_regs[t->etm_regs_count] = + ioremap_nocache(dev->res.start, resource_size(&dev->res)); + if (!t->etm_regs[t->etm_regs_count]) { ret = -ENOMEM; goto out_release; } - amba_set_drvdata(dev, t); + amba_set_drvdata(dev, t->etm_regs[t->etm_regs_count]); - mutex_init(&t->mutex); - t->dev = &dev->dev; - t->flags = TRACER_CYCLE_ACC; + t->flags = TRACER_CYCLE_ACC | TRACER_TRACE_DATA | TRACER_BRANCHOUTPUT; t->etm_portsz = 1; + t->etm_contextid_size = 3; - etm_unlock(t); - (void)etm_readl(t, ETMMR_PDSR); + etm_unlock(t, t->etm_regs_count); + (void)etm_readl(t, t->etm_regs_count, ETMMR_PDSR); /* dummy first read */ - (void)etm_readl(&tracer, ETMMR_OSSRR); - - t->ncmppairs = etm_readl(t, ETMR_CONFCODE) & 0xf; - etm_writel(t, 0x440, ETMR_CTRL); - etm_lock(t); + (void)etm_readl(&tracer, t->etm_regs_count, ETMMR_OSSRR); + + etmccr = etm_readl(t, t->etm_regs_count, ETMR_CONFCODE); + t->ncmppairs = etmccr & 0xf; + if (etmccr & ETMCCR_ETMIDR_PRESENT) { + etmidr = etm_readl(t, t->etm_regs_count, ETMR_ID); + etm_version = ETMIDR_VERSION(etmidr); + if (etm_version >= ETMIDR_VERSION_3_1) + etmccer = etm_readl(t, t->etm_regs_count, ETMR_CCE); + } + etm_writel(t, t->etm_regs_count, 0x441, ETMR_CTRL); + etm_writel(t, t->etm_regs_count, new_count, ETMR_TRACEIDR); + etm_lock(t, t->etm_regs_count); ret = sysfs_create_file(&dev->dev.kobj, &trace_running_attr.attr); @@ -579,32 +924,97 @@ static int etm_probe(struct amba_device *dev, const struct amba_id *id) if (ret) dev_dbg(&dev->dev, "Failed to create trace_mode in sysfs\n"); - dev_dbg(t->dev, "ETM AMBA driver initialized.\n"); + ret = sysfs_create_file(&dev->dev.kobj, + &trace_contextid_size_attr.attr); + if (ret) + dev_dbg(&dev->dev, + "Failed to create trace_contextid_size in sysfs\n"); + + ret = sysfs_create_file(&dev->dev.kobj, + &trace_branch_output_attr.attr); + if (ret) + dev_dbg(&dev->dev, + "Failed to create trace_branch_output in sysfs\n"); + + if (etmccer & ETMCCER_RETURN_STACK_IMPLEMENTED) { + ret = sysfs_create_file(&dev->dev.kobj, + &trace_return_stack_attr.attr); + if (ret) + dev_dbg(&dev->dev, + "Failed to create trace_return_stack in sysfs\n"); + } + + if (etmccer & ETMCCER_TIMESTAMPING_IMPLEMENTED) { + ret = sysfs_create_file(&dev->dev.kobj, + &trace_timestamp_attr.attr); + if (ret) + dev_dbg(&dev->dev, + "Failed to create trace_timestamp in sysfs\n"); + } + + ret = sysfs_create_file(&dev->dev.kobj, &trace_range_attr.attr); + if (ret) + dev_dbg(&dev->dev, "Failed to create trace_range in sysfs\n"); + + if (etm_version < ETMIDR_VERSION_PFT_1_0) { + ret = sysfs_create_file(&dev->dev.kobj, + &trace_data_range_attr.attr); + if (ret) + dev_dbg(&dev->dev, + "Failed to create trace_data_range in sysfs\n"); + } else { + tracer.flags &= ~TRACER_TRACE_DATA; + } + + dev_dbg(&dev->dev, "ETM AMBA driver initialized.\n"); + + /* Enable formatter if there are multiple trace sources */ + if (new_count > 1) + t->etb_fc = ETBFF_ENFCONT | ETBFF_ENFTC; + + t->etm_regs_count = new_count; out: + mutex_unlock(&t->mutex); return ret; out_unmap: - iounmap(t->etm_regs); + iounmap(t->etm_regs[t->etm_regs_count]); out_release: amba_release_regions(dev); + mutex_unlock(&t->mutex); return ret; } static int etm_remove(struct amba_device *dev) { - struct tracectx *t = amba_get_drvdata(dev); - - iounmap(t->etm_regs); - t->etm_regs = NULL; - - amba_release_regions(dev); + int i; + struct tracectx *t = &tracer; + void __iomem *etm_regs = amba_get_drvdata(dev); sysfs_remove_file(&dev->dev.kobj, &trace_running_attr.attr); sysfs_remove_file(&dev->dev.kobj, &trace_info_attr.attr); sysfs_remove_file(&dev->dev.kobj, &trace_mode_attr.attr); + sysfs_remove_file(&dev->dev.kobj, &trace_range_attr.attr); + sysfs_remove_file(&dev->dev.kobj, &trace_data_range_attr.attr); + + mutex_lock(&t->mutex); + for (i = 0; i < t->etm_regs_count; i++) + if (t->etm_regs[i] == etm_regs) + break; + for (; i < t->etm_regs_count - 1; i++) + t->etm_regs[i] = t->etm_regs[i + 1]; + t->etm_regs_count--; + if (!t->etm_regs_count) { + kfree(t->etm_regs); + t->etm_regs = NULL; + } + mutex_unlock(&t->mutex); + + iounmap(etm_regs); + amba_release_regions(dev); return 0; } @@ -614,6 +1024,10 @@ static struct amba_id etm_ids[] = { .id = 0x0003b921, .mask = 0x0007ffff, }, + { + .id = 0x0003b950, + .mask = 0x0007ffff, + }, { 0, 0 }, }; @@ -631,6 +1045,8 @@ static int __init etm_init(void) { int retval; + mutex_init(&tracer.mutex); + retval = amba_driver_register(&etb_driver); if (retval) { printk(KERN_ERR "Failed to register etb\n"); diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index af9a8a927a4e..b8c75e45a950 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -15,6 +15,7 @@ #include <linux/ftrace.h> #include <linux/uaccess.h> #include <linux/module.h> +#include <linux/stop_machine.h> #include <asm/cacheflush.h> #include <asm/opcodes.h> @@ -35,6 +36,22 @@ #define OLD_NOP 0xe1a00000 /* mov r0, r0 */ +static int __ftrace_modify_code(void *data) +{ + int *command = data; + + set_kernel_text_rw(); + ftrace_modify_all_code(*command); + set_kernel_text_ro(); + + return 0; +} + +void arch_ftrace_update_code(int command) +{ + stop_machine(__ftrace_modify_code, &command, NULL); +} + static unsigned long ftrace_nop_replace(struct dyn_ftrace *rec) { return rec->arch.old_mcount ? OLD_NOP : NOP; @@ -73,6 +90,8 @@ int ftrace_arch_code_modify_prepare(void) int ftrace_arch_code_modify_post_process(void) { set_all_modules_text_ro(); + /* Make sure any TLB misses during machine stop are cleared. */ + flush_tlb_all(); return 0; } diff --git a/arch/arm/kernel/jump_label.c b/arch/arm/kernel/jump_label.c index 4ce4f789446d..afeeb9ea6f43 100644 --- a/arch/arm/kernel/jump_label.c +++ b/arch/arm/kernel/jump_label.c @@ -19,7 +19,7 @@ static void __arch_jump_label_transform(struct jump_entry *entry, insn = arm_gen_nop(); if (is_static) - __patch_text(addr, insn); + __patch_text_early(addr, insn); else patch_text(addr, insn); } diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c index a74b53c1b7df..07f5059dac5e 100644 --- a/arch/arm/kernel/kgdb.c +++ b/arch/arm/kernel/kgdb.c @@ -12,8 +12,12 @@ #include <linux/irq.h> #include <linux/kdebug.h> #include <linux/kgdb.h> +#include <linux/uaccess.h> + #include <asm/traps.h> +#include "patch.h" + struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { { "r0", 4, offsetof(struct pt_regs, ARM_r0)}, @@ -144,6 +148,8 @@ int kgdb_arch_handle_exception(int exception_vector, int signo, static int kgdb_brk_fn(struct pt_regs *regs, unsigned int instr) { + if (user_mode(regs)) + return -1; kgdb_handle_exception(1, SIGTRAP, 0, regs); return 0; @@ -151,6 +157,8 @@ static int kgdb_brk_fn(struct pt_regs *regs, unsigned int instr) static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int instr) { + if (user_mode(regs)) + return -1; compiled_break = 1; kgdb_handle_exception(1, SIGTRAP, 0, regs); @@ -244,6 +252,33 @@ void kgdb_arch_exit(void) unregister_die_notifier(&kgdb_notifier); } +int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) +{ + int err; + + /* patch_text() only supports int-sized breakpoints */ + BUILD_BUG_ON(sizeof(int) != BREAK_INSTR_SIZE); + + err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr, + BREAK_INSTR_SIZE); + if (err) + return err; + + /* Machine is already stopped, so we can use __patch_text() directly */ + __patch_text((void *)bpt->bpt_addr, + *(unsigned int *)arch_kgdb_ops.gdb_bpt_instr); + + return err; +} + +int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) +{ + /* Machine is already stopped, so we can use __patch_text() directly */ + __patch_text((void *)bpt->bpt_addr, *(unsigned int *)bpt->saved_instr); + + return 0; +} + /* * Register our undef instruction hooks with ARM undef core. * We regsiter a hook specifically looking for the KGB break inst diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c index 8cf0996aa1a8..4423a565ef6f 100644 --- a/arch/arm/kernel/machine_kexec.c +++ b/arch/arm/kernel/machine_kexec.c @@ -29,6 +29,7 @@ extern unsigned long kexec_boot_atags; static atomic_t waiting_for_crash_ipi; +static unsigned long dt_mem; /* * Provide a dummy crash_notes definition while crash dump arrives to arm. * This prevents breakage of crash_notes attribute in kernel/ksysfs.c. @@ -64,7 +65,7 @@ int machine_kexec_prepare(struct kimage *image) return err; if (be32_to_cpu(header) == OF_DT_HEADER) - kexec_boot_atags = current_segment->mem; + dt_mem = current_segment->mem; } return 0; } @@ -163,12 +164,12 @@ void machine_kexec(struct kimage *image) reboot_code_buffer = page_address(image->control_code_page); /* Prepare parameters for reboot_code_buffer*/ + set_kernel_text_rw(); kexec_start_address = image->start; kexec_indirection_page = page_list; kexec_mach_type = machine_arch_type; - if (!kexec_boot_atags) - kexec_boot_atags = image->start - KEXEC_ARM_ZIMAGE_OFFSET + KEXEC_ARM_ATAGS_OFFSET; - + kexec_boot_atags = dt_mem ?: image->start - KEXEC_ARM_ZIMAGE_OFFSET + + KEXEC_ARM_ATAGS_OFFSET; /* copy our kernel relocation code to the control code page */ reboot_entry = fncpy(reboot_code_buffer, diff --git a/arch/arm/kernel/patch.c b/arch/arm/kernel/patch.c index 07314af47733..5038960e3c55 100644 --- a/arch/arm/kernel/patch.c +++ b/arch/arm/kernel/patch.c @@ -1,8 +1,11 @@ #include <linux/kernel.h> +#include <linux/spinlock.h> #include <linux/kprobes.h> +#include <linux/mm.h> #include <linux/stop_machine.h> #include <asm/cacheflush.h> +#include <asm/fixmap.h> #include <asm/smp_plat.h> #include <asm/opcodes.h> @@ -13,21 +16,77 @@ struct patch { unsigned int insn; }; -void __kprobes __patch_text(void *addr, unsigned int insn) +static DEFINE_SPINLOCK(patch_lock); + +static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags) + __acquires(&patch_lock) +{ + unsigned int uintaddr = (uintptr_t) addr; + bool module = !core_kernel_text(uintaddr); + struct page *page; + + if (module && IS_ENABLED(CONFIG_DEBUG_SET_MODULE_RONX)) + page = vmalloc_to_page(addr); + else if (!module && IS_ENABLED(CONFIG_DEBUG_RODATA)) + page = virt_to_page(addr); + else + return addr; + + if (flags) + spin_lock_irqsave(&patch_lock, *flags); + else + __acquire(&patch_lock); + + set_fixmap(fixmap, page_to_phys(page)); + + return (void *) (__fix_to_virt(fixmap) + (uintaddr & ~PAGE_MASK)); +} + +static void __kprobes patch_unmap(int fixmap, unsigned long *flags) + __releases(&patch_lock) +{ + clear_fixmap(fixmap); + + if (flags) + spin_unlock_irqrestore(&patch_lock, *flags); + else + __release(&patch_lock); +} + +void __kprobes __patch_text_real(void *addr, unsigned int insn, bool remap) { bool thumb2 = IS_ENABLED(CONFIG_THUMB2_KERNEL); + unsigned int uintaddr = (uintptr_t) addr; + bool twopage = false; + unsigned long flags; + void *waddr = addr; int size; + if (remap) + waddr = patch_map(addr, FIX_TEXT_POKE0, &flags); + else + __acquire(&patch_lock); + if (thumb2 && __opcode_is_thumb16(insn)) { - *(u16 *)addr = __opcode_to_mem_thumb16(insn); + *(u16 *)waddr = __opcode_to_mem_thumb16(insn); size = sizeof(u16); - } else if (thumb2 && ((uintptr_t)addr & 2)) { + } else if (thumb2 && (uintaddr & 2)) { u16 first = __opcode_thumb32_first(insn); u16 second = __opcode_thumb32_second(insn); - u16 *addrh = addr; + u16 *addrh0 = waddr; + u16 *addrh1 = waddr + 2; + + twopage = (uintaddr & ~PAGE_MASK) == PAGE_SIZE - 2; + if (twopage && remap) + addrh1 = patch_map(addr + 2, FIX_TEXT_POKE1, NULL); + + *addrh0 = __opcode_to_mem_thumb16(first); + *addrh1 = __opcode_to_mem_thumb16(second); - addrh[0] = __opcode_to_mem_thumb16(first); - addrh[1] = __opcode_to_mem_thumb16(second); + if (twopage && addrh1 != addr + 2) { + flush_kernel_vmap_range(addrh1, 2); + patch_unmap(FIX_TEXT_POKE1, NULL); + } size = sizeof(u32); } else { @@ -36,10 +95,16 @@ void __kprobes __patch_text(void *addr, unsigned int insn) else insn = __opcode_to_mem_arm(insn); - *(u32 *)addr = insn; + *(u32 *)waddr = insn; size = sizeof(u32); } + if (waddr != addr) { + flush_kernel_vmap_range(waddr, twopage ? size / 2 : size); + patch_unmap(FIX_TEXT_POKE0, &flags); + } else + __release(&patch_lock); + flush_icache_range((uintptr_t)(addr), (uintptr_t)(addr) + size); } @@ -60,16 +125,5 @@ void __kprobes patch_text(void *addr, unsigned int insn) .insn = insn, }; - if (cache_ops_need_broadcast()) { - stop_machine(patch_text_stop_machine, &patch, cpu_online_mask); - } else { - bool straddles_word = IS_ENABLED(CONFIG_THUMB2_KERNEL) - && __opcode_is_thumb32(insn) - && ((uintptr_t)addr & 2); - - if (straddles_word) - stop_machine(patch_text_stop_machine, &patch, NULL); - else - __patch_text(addr, insn); - } + stop_machine(patch_text_stop_machine, &patch, NULL); } diff --git a/arch/arm/kernel/patch.h b/arch/arm/kernel/patch.h index b4731f2dac38..77e054c2f6cd 100644 --- a/arch/arm/kernel/patch.h +++ b/arch/arm/kernel/patch.h @@ -2,6 +2,16 @@ #define _ARM_KERNEL_PATCH_H void patch_text(void *addr, unsigned int insn); -void __patch_text(void *addr, unsigned int insn); +void __patch_text_real(void *addr, unsigned int insn, bool remap); + +static inline void __patch_text(void *addr, unsigned int insn) +{ + __patch_text_real(addr, insn, true); +} + +static inline void __patch_text_early(void *addr, unsigned int insn) +{ + __patch_text_real(addr, insn, false); +} #endif diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index ecefea4e2929..617ec4ab4d81 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -32,6 +32,7 @@ #include <linux/hw_breakpoint.h> #include <linux/leds.h> #include <linux/reboot.h> +#include <linux/console.h> #include <asm/cacheflush.h> #include <asm/idmap.h> @@ -60,9 +61,46 @@ static const char *isa_modes[] __maybe_unused = { "ARM" , "Thumb" , "Jazelle", "ThumbEE" }; +#ifdef CONFIG_SMP +void arch_trigger_all_cpu_backtrace(void) +{ + smp_send_all_cpu_backtrace(); +} +#else +void arch_trigger_all_cpu_backtrace(void) +{ + dump_stack(); +} +#endif + extern void call_with_stack(void (*fn)(void *), void *arg, void *sp); typedef void (*phys_reset_t)(unsigned long); +#ifdef CONFIG_ARM_FLUSH_CONSOLE_ON_RESTART +void arm_machine_flush_console(void) +{ + printk("\n"); + pr_emerg("Restarting %s\n", linux_banner); + if (console_trylock()) { + console_unlock(); + return; + } + + mdelay(50); + + local_irq_disable(); + if (!console_trylock()) + pr_emerg("arm_restart: Console was locked! Busting\n"); + else + pr_emerg("arm_restart: Console was locked!\n"); + console_unlock(); +} +#else +void arm_machine_flush_console(void) +{ +} +#endif + /* * A temporary stack to use for CPU reset. This is static so that we * don't clobber it with the identity mapping. When running with this @@ -154,6 +192,7 @@ void arch_cpu_idle_prepare(void) void arch_cpu_idle_enter(void) { + idle_notifier_call_chain(IDLE_START); ledtrig_cpu(CPU_LED_IDLE_START); #ifdef CONFIG_PL310_ERRATA_769419 wmb(); @@ -163,6 +202,7 @@ void arch_cpu_idle_enter(void) void arch_cpu_idle_exit(void) { ledtrig_cpu(CPU_LED_IDLE_END); + idle_notifier_call_chain(IDLE_END); } #ifdef CONFIG_HOTPLUG_CPU @@ -183,6 +223,16 @@ void arch_cpu_idle_dead(void) */ void machine_shutdown(void) { +#ifdef CONFIG_SMP + /* + * Disable preemption so we're guaranteed to + * run to power off or reboot and prevent + * the possibility of switching to another + * thread that might wind up blocking on + * one of the stopped CPUs. + */ + preempt_disable(); +#endif disable_nonboot_cpus(); } @@ -231,6 +281,11 @@ void machine_restart(char *cmd) local_irq_disable(); smp_send_stop(); + + /* Flush the console to make sure all the relevant messages make it + * out to the console drivers */ + arm_machine_flush_console(); + if (arm_pm_restart) arm_pm_restart(reboot_mode, cmd); else @@ -245,6 +300,77 @@ void machine_restart(char *cmd) while (1); } +/* + * dump a block of kernel memory from around the given address + */ +static void show_data(unsigned long addr, int nbytes, const char *name) +{ + int i, j; + int nlines; + u32 *p; + + /* + * don't attempt to dump non-kernel addresses or + * values that are probably just small negative numbers + */ + if (addr < PAGE_OFFSET || addr > -256UL) + return; + + printk("\n%s: %#lx:\n", name, addr); + + /* + * round address down to a 32 bit boundary + * and always dump a multiple of 32 bytes + */ + p = (u32 *)(addr & ~(sizeof(u32) - 1)); + nbytes += (addr & (sizeof(u32) - 1)); + nlines = (nbytes + 31) / 32; + + + for (i = 0; i < nlines; i++) { + /* + * just display low 16 bits of address to keep + * each line of the dump < 80 characters + */ + printk("%04lx ", (unsigned long)p & 0xffff); + for (j = 0; j < 8; j++) { + u32 data; + if (probe_kernel_address(p, data)) { + printk(" ********"); + } else { + printk(" %08x", data); + } + ++p; + } + printk("\n"); + } +} + +static void show_extra_register_data(struct pt_regs *regs, int nbytes) +{ + mm_segment_t fs; + + fs = get_fs(); + set_fs(KERNEL_DS); + show_data(regs->ARM_pc - nbytes, nbytes * 2, "PC"); + show_data(regs->ARM_lr - nbytes, nbytes * 2, "LR"); + show_data(regs->ARM_sp - nbytes, nbytes * 2, "SP"); + show_data(regs->ARM_ip - nbytes, nbytes * 2, "IP"); + show_data(regs->ARM_fp - nbytes, nbytes * 2, "FP"); + show_data(regs->ARM_r0 - nbytes, nbytes * 2, "R0"); + show_data(regs->ARM_r1 - nbytes, nbytes * 2, "R1"); + show_data(regs->ARM_r2 - nbytes, nbytes * 2, "R2"); + show_data(regs->ARM_r3 - nbytes, nbytes * 2, "R3"); + show_data(regs->ARM_r4 - nbytes, nbytes * 2, "R4"); + show_data(regs->ARM_r5 - nbytes, nbytes * 2, "R5"); + show_data(regs->ARM_r6 - nbytes, nbytes * 2, "R6"); + show_data(regs->ARM_r7 - nbytes, nbytes * 2, "R7"); + show_data(regs->ARM_r8 - nbytes, nbytes * 2, "R8"); + show_data(regs->ARM_r9 - nbytes, nbytes * 2, "R9"); + show_data(regs->ARM_r10 - nbytes, nbytes * 2, "R10"); + set_fs(fs); +} + void __show_regs(struct pt_regs *regs) { unsigned long flags; @@ -306,6 +432,8 @@ void __show_regs(struct pt_regs *regs) printk("Control: %08x%s\n", ctrl, buf); } #endif + + show_extra_register_data(regs, 128); } void show_regs(struct pt_regs * regs) diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index a8e32aaf0383..9ce02e6e3963 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -72,6 +72,7 @@ enum ipi_msg_type { IPI_CPU_STOP, IPI_IRQ_WORK, IPI_COMPLETION, + IPI_CPU_BACKTRACE, }; static DECLARE_COMPLETION(cpu_running); @@ -456,6 +457,7 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = { S(IPI_CPU_STOP, "CPU stop interrupts"), S(IPI_IRQ_WORK, "IRQ work interrupts"), S(IPI_COMPLETION, "completion interrupts"), + S(IPI_CPU_BACKTRACE, "CPU backtrace"), }; static void smp_cross_call(const struct cpumask *target, unsigned int ipinr) @@ -557,6 +559,58 @@ static void ipi_complete(unsigned int cpu) complete(per_cpu(cpu_completion, cpu)); } +static cpumask_t backtrace_mask; +static DEFINE_RAW_SPINLOCK(backtrace_lock); + +/* "in progress" flag of arch_trigger_all_cpu_backtrace */ +static unsigned long backtrace_flag; + +void smp_send_all_cpu_backtrace(void) +{ + unsigned int this_cpu = smp_processor_id(); + int i; + + if (test_and_set_bit(0, &backtrace_flag)) + /* + * If there is already a trigger_all_cpu_backtrace() in progress + * (backtrace_flag == 1), don't output double cpu dump infos. + */ + return; + + cpumask_copy(&backtrace_mask, cpu_online_mask); + cpu_clear(this_cpu, backtrace_mask); + + pr_info("Backtrace for cpu %d (current):\n", this_cpu); + dump_stack(); + + pr_info("\nsending IPI to all other CPUs:\n"); + smp_cross_call(&backtrace_mask, IPI_CPU_BACKTRACE); + + /* Wait for up to 10 seconds for all other CPUs to do the backtrace */ + for (i = 0; i < 10 * 1000; i++) { + if (cpumask_empty(&backtrace_mask)) + break; + mdelay(1); + } + + clear_bit(0, &backtrace_flag); + smp_mb__after_atomic(); +} + +/* + * ipi_cpu_backtrace - handle IPI from smp_send_all_cpu_backtrace() + */ +static void ipi_cpu_backtrace(unsigned int cpu, struct pt_regs *regs) +{ + if (cpu_isset(cpu, backtrace_mask)) { + raw_spin_lock(&backtrace_lock); + pr_warning("IPI backtrace for cpu %d\n", cpu); + show_regs(regs); + raw_spin_unlock(&backtrace_lock); + cpu_clear(cpu, backtrace_mask); + } +} + /* * Main handler for inter-processor interrupts */ @@ -623,6 +677,10 @@ void handle_IPI(int ipinr, struct pt_regs *regs) irq_exit(); break; + case IPI_CPU_BACKTRACE: + ipi_cpu_backtrace(cpu, regs); + break; + default: printk(KERN_CRIT "CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr); diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index 8e95aa47457a..3afcb6c2cf06 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -8,6 +8,9 @@ #include <asm/thread_info.h> #include <asm/memory.h> #include <asm/page.h> +#ifdef CONFIG_ARM_KERNMEM_PERMS +#include <asm/pgtable.h> +#endif #define PROC_INFO \ . = ALIGN(4); \ @@ -90,6 +93,11 @@ SECTIONS _text = .; HEAD_TEXT } + +#ifdef CONFIG_ARM_KERNMEM_PERMS + . = ALIGN(1<<SECTION_SHIFT); +#endif + .text : { /* Real text segment */ _stext = .; /* Text and read-only data */ __exception_text_start = .; @@ -112,6 +120,9 @@ SECTIONS ARM_CPU_KEEP(PROC_INFO) } +#ifdef CONFIG_DEBUG_RODATA + . = ALIGN(1<<SECTION_SHIFT); +#endif RO_DATA(PAGE_SIZE) . = ALIGN(4); @@ -145,7 +156,11 @@ SECTIONS _etext = .; /* End of text and rodata section */ #ifndef CONFIG_XIP_KERNEL +# ifdef CONFIG_ARM_KERNMEM_PERMS + . = ALIGN(1<<SECTION_SHIFT); +# else . = ALIGN(PAGE_SIZE); +# endif __init_begin = .; #endif /* @@ -219,8 +234,12 @@ SECTIONS __data_loc = ALIGN(4); /* location in binary */ . = PAGE_OFFSET + TEXT_OFFSET; #else - . = ALIGN(THREAD_SIZE); __init_end = .; +#ifdef CONFIG_ARM_KERNMEM_PERMS + . = ALIGN(1<<SECTION_SHIFT); +#else + . = ALIGN(THREAD_SIZE); +#endif __data_loc = .; #endif diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 7eb94e6fc376..bc219b303bc7 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -1009,3 +1009,24 @@ config ARCH_SUPPORTS_BIG_ENDIAN help This option specifies the architecture can support big endian operation. + +config ARM_KERNMEM_PERMS + bool "Restrict kernel memory permissions" + help + If this is set, kernel memory other than kernel text (and rodata) + will be made non-executable. The tradeoff is that each region is + padded to section-size (1MiB) boundaries (because their permissions + are different and splitting the 1M pages into 4K ones causes TLB + performance problems), wasting memory. + +config DEBUG_RODATA + bool "Make kernel text and rodata read-only" + depends on ARM_KERNMEM_PERMS + default y + help + If this is set, kernel text and rodata will be made read-only. This + is to help catch accidental or malicious attempts to change the + kernel's executable code. Additionally splits rodata from kernel + text so it can be made explicitly non-executable. This creates + another section-size padded region, so it can waste more memory + space while gaining the read-only protections. diff --git a/arch/arm/mm/cache-v6.S b/arch/arm/mm/cache-v6.S index 24659952c278..11da0f50a1fe 100644 --- a/arch/arm/mm/cache-v6.S +++ b/arch/arm/mm/cache-v6.S @@ -270,6 +270,11 @@ v6_dma_clean_range: * - end - virtual end address of region */ ENTRY(v6_dma_flush_range) +#ifdef CONFIG_CACHE_FLUSH_RANGE_LIMIT + sub r2, r1, r0 + cmp r2, #CONFIG_CACHE_FLUSH_RANGE_LIMIT + bhi v6_dma_flush_dcache_all +#endif #ifdef CONFIG_DMA_CACHE_RWFO ldrb r2, [r0] @ read for ownership strb r2, [r0] @ write for ownership @@ -292,6 +297,18 @@ ENTRY(v6_dma_flush_range) mcr p15, 0, r0, c7, c10, 4 @ drain write buffer ret lr +#ifdef CONFIG_CACHE_FLUSH_RANGE_LIMIT +v6_dma_flush_dcache_all: + mov r0, #0 +#ifdef HARVARD_CACHE + mcr p15, 0, r0, c7, c14, 0 @ D cache clean+invalidate +#else + mcr p15, 0, r0, c7, c15, 0 @ Cache clean+invalidate +#endif + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mov pc, lr +#endif + /* * dma_map_area(start, size, dir) * - start - kernel virtual start address diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index eb8830a4c5ed..b784579545e8 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -274,10 +274,10 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) local_irq_enable(); /* - * If we're in an interrupt or have no user + * If we're in an interrupt, or have no irqs, or have no user * context, we must not take the fault.. */ - if (in_atomic() || !mm) + if (in_atomic() || irqs_disabled() || !mm) goto no_context; if (user_mode(regs)) diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c index e17ed00828d7..b98895d9fe57 100644 --- a/arch/arm/mm/highmem.c +++ b/arch/arm/mm/highmem.c @@ -18,19 +18,20 @@ #include <asm/tlbflush.h> #include "mm.h" -pte_t *fixmap_page_table; - static inline void set_fixmap_pte(int idx, pte_t pte) { unsigned long vaddr = __fix_to_virt(idx); - set_pte_ext(fixmap_page_table + idx, pte, 0); + pte_t *ptep = pte_offset_kernel(pmd_off_k(vaddr), vaddr); + + set_pte_ext(ptep, pte, 0); local_flush_tlb_kernel_page(vaddr); } static inline pte_t get_fixmap_pte(unsigned long vaddr) { - unsigned long idx = __virt_to_fix(vaddr); - return *(fixmap_page_table + idx); + pte_t *ptep = pte_offset_kernel(pmd_off_k(vaddr), vaddr); + + return *ptep; } void *kmap(struct page *page) @@ -84,7 +85,7 @@ void *kmap_atomic(struct page *page) * With debugging enabled, kunmap_atomic forces that entry to 0. * Make sure it was indeed properly unmapped. */ - BUG_ON(!pte_none(*(fixmap_page_table + idx))); + BUG_ON(!pte_none(get_fixmap_pte(vaddr))); #endif /* * When debugging is off, kunmap_atomic leaves the previous mapping @@ -137,7 +138,7 @@ void *kmap_atomic_pfn(unsigned long pfn) idx = type + KM_TYPE_NR * smp_processor_id(); vaddr = __fix_to_virt(idx); #ifdef CONFIG_DEBUG_HIGHMEM - BUG_ON(!pte_none(*(fixmap_page_table + idx))); + BUG_ON(!pte_none(get_fixmap_pte(vaddr))); #endif set_fixmap_pte(idx, pfn_pte(pfn, kmap_prot)); diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 9481f85c56e6..4fe8ecf6a520 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -29,6 +29,7 @@ #include <asm/prom.h> #include <asm/sections.h> #include <asm/setup.h> +#include <asm/system_info.h> #include <asm/tlb.h> #include <asm/fixmap.h> @@ -570,7 +571,7 @@ void __init mem_init(void) MLK(DTCM_OFFSET, (unsigned long) dtcm_end), MLK(ITCM_OFFSET, (unsigned long) itcm_end), #endif - MLK(FIXADDR_START, FIXADDR_TOP), + MLK(FIXADDR_START, FIXADDR_END), MLM(VMALLOC_START, VMALLOC_END), MLM(PAGE_OFFSET, (unsigned long)high_memory), #ifdef CONFIG_HIGHMEM @@ -615,7 +616,145 @@ void __init mem_init(void) } } -void free_initmem(void) +#ifdef CONFIG_ARM_KERNMEM_PERMS +struct section_perm { + unsigned long start; + unsigned long end; + pmdval_t mask; + pmdval_t prot; + pmdval_t clear; +}; + +static struct section_perm nx_perms[] = { + /* Make pages tables, etc before _stext RW (set NX). */ + { + .start = PAGE_OFFSET, + .end = (unsigned long)_stext, + .mask = ~PMD_SECT_XN, + .prot = PMD_SECT_XN, + }, + /* Make init RW (set NX). */ + { + .start = (unsigned long)__init_begin, + .end = (unsigned long)_sdata, + .mask = ~PMD_SECT_XN, + .prot = PMD_SECT_XN, + }, +#ifdef CONFIG_DEBUG_RODATA + /* Make rodata NX (set RO in ro_perms below). */ + { + .start = (unsigned long)__start_rodata, + .end = (unsigned long)__init_begin, + .mask = ~PMD_SECT_XN, + .prot = PMD_SECT_XN, + }, +#endif +}; + +#ifdef CONFIG_DEBUG_RODATA +static struct section_perm ro_perms[] = { + /* Make kernel code and rodata RX (set RO). */ + { + .start = (unsigned long)_stext, + .end = (unsigned long)__init_begin, +#ifdef CONFIG_ARM_LPAE + .mask = ~PMD_SECT_RDONLY, + .prot = PMD_SECT_RDONLY, +#else + .mask = ~(PMD_SECT_APX | PMD_SECT_AP_WRITE), + .prot = PMD_SECT_APX | PMD_SECT_AP_WRITE, + .clear = PMD_SECT_AP_WRITE, +#endif + }, +}; +#endif + +/* + * Updates section permissions only for the current mm (sections are + * copied into each mm). During startup, this is the init_mm. Is only + * safe to be called with preemption disabled, as under stop_machine(). + */ +static inline void section_update(unsigned long addr, pmdval_t mask, + pmdval_t prot) +{ + struct mm_struct *mm; + pmd_t *pmd; + + mm = current->active_mm; + pmd = pmd_offset(pud_offset(pgd_offset(mm, addr), addr), addr); + +#ifdef CONFIG_ARM_LPAE + pmd[0] = __pmd((pmd_val(pmd[0]) & mask) | prot); +#else + if (addr & SECTION_SIZE) + pmd[1] = __pmd((pmd_val(pmd[1]) & mask) | prot); + else + pmd[0] = __pmd((pmd_val(pmd[0]) & mask) | prot); +#endif + flush_pmd_entry(pmd); + local_flush_tlb_kernel_range(addr, addr + SECTION_SIZE); +} + +/* Make sure extended page tables are in use. */ +static inline bool arch_has_strict_perms(void) +{ + if (cpu_architecture() < CPU_ARCH_ARMv6) + return false; + + return !!(get_cr() & CR_XP); +} + +#define set_section_perms(perms, field) { \ + size_t i; \ + unsigned long addr; \ + \ + if (!arch_has_strict_perms()) \ + return; \ + \ + for (i = 0; i < ARRAY_SIZE(perms); i++) { \ + if (!IS_ALIGNED(perms[i].start, SECTION_SIZE) || \ + !IS_ALIGNED(perms[i].end, SECTION_SIZE)) { \ + pr_err("BUG: section %lx-%lx not aligned to %lx\n", \ + perms[i].start, perms[i].end, \ + SECTION_SIZE); \ + continue; \ + } \ + \ + for (addr = perms[i].start; \ + addr < perms[i].end; \ + addr += SECTION_SIZE) \ + section_update(addr, perms[i].mask, \ + perms[i].field); \ + } \ +} + +static inline void fix_kernmem_perms(void) +{ + set_section_perms(nx_perms, prot); +} + +#ifdef CONFIG_DEBUG_RODATA +void mark_rodata_ro(void) +{ + set_section_perms(ro_perms, prot); +} + +void set_kernel_text_rw(void) +{ + set_section_perms(ro_perms, clear); +} + +void set_kernel_text_ro(void) +{ + set_section_perms(ro_perms, prot); +} +#endif /* CONFIG_DEBUG_RODATA */ + +#else +static inline void fix_kernmem_perms(void) { } +#endif /* CONFIG_ARM_KERNMEM_PERMS */ + +void free_tcmmem(void) { #ifdef CONFIG_HAVE_TCM extern char __tcm_start, __tcm_end; @@ -623,6 +762,12 @@ void free_initmem(void) poison_init_mem(&__tcm_start, &__tcm_end - &__tcm_start); free_reserved_area(&__tcm_start, &__tcm_end, -1, "TCM link"); #endif +} + +void free_initmem(void) +{ + fix_kernmem_perms(); + free_tcmmem(); poison_init_mem(__init_begin, __init_end - __init_begin); if (!machine_is_integrator() && !machine_is_cintegrator()) diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c index 5e85ed371364..290cbb310486 100644 --- a/arch/arm/mm/mmap.c +++ b/arch/arm/mm/mmap.c @@ -11,6 +11,10 @@ #include <linux/random.h> #include <asm/cachetype.h> +int mmap_rnd_bits_min = CONFIG_ARCH_MMAP_RND_BITS_MIN; +int mmap_rnd_bits_max = CONFIG_ARCH_MMAP_RND_BITS_MAX; +int mmap_rnd_bits = CONFIG_ARCH_MMAP_RND_BITS; + #define COLOUR_ALIGN(addr,pgoff) \ ((((addr)+SHMLBA-1)&~(SHMLBA-1)) + \ (((pgoff)<<PAGE_SHIFT) & (SHMLBA-1))) @@ -173,10 +177,9 @@ void arch_pick_mmap_layout(struct mm_struct *mm) { unsigned long random_factor = 0UL; - /* 8 bits of randomness in 20 address space bits */ if ((current->flags & PF_RANDOMIZE) && !(current->personality & ADDR_NO_RANDOMIZE)) - random_factor = (get_random_int() % (1 << 8)) << PAGE_SHIFT; + random_factor = (get_random_int() % (1 << mmap_rnd_bits)) << PAGE_SHIFT; if (mmap_is_legacy()) { mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index fb9d305c874b..d7c0e974695c 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -22,6 +22,7 @@ #include <asm/cputype.h> #include <asm/sections.h> #include <asm/cachetype.h> +#include <asm/fixmap.h> #include <asm/sections.h> #include <asm/setup.h> #include <asm/smp_plat.h> @@ -393,6 +394,29 @@ SET_MEMORY_FN(x, pte_set_x) SET_MEMORY_FN(nx, pte_set_nx) /* + * To avoid TLB flush broadcasts, this uses local_flush_tlb_kernel_range(). + * As a result, this can only be called with preemption disabled, as under + * stop_machine(). + */ +void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot) +{ + unsigned long vaddr = __fix_to_virt(idx); + pte_t *pte = pte_offset_kernel(pmd_off_k(vaddr), vaddr); + + /* Make sure fixmap region does not exceed available allocation. */ + BUILD_BUG_ON(FIXADDR_START + (__end_of_fixed_addresses * PAGE_SIZE) > + FIXADDR_END); + BUG_ON(idx >= __end_of_fixed_addresses); + + if (pgprot_val(prot)) + set_pte_at(NULL, vaddr, pte, + pfn_pte(phys >> PAGE_SHIFT, prot)); + else + pte_clear(NULL, vaddr, pte); + local_flush_tlb_kernel_range(vaddr, vaddr + PAGE_SIZE); +} + +/* * Adjust the PMD section entries according to the CPU in use. */ static void __init build_mem_type_table(void) @@ -1326,10 +1350,10 @@ static void __init kmap_init(void) #ifdef CONFIG_HIGHMEM pkmap_page_table = early_pte_alloc(pmd_off_k(PKMAP_BASE), PKMAP_BASE, _PAGE_KERNEL_TABLE); - - fixmap_page_table = early_pte_alloc(pmd_off_k(FIXADDR_START), - FIXADDR_START, _PAGE_KERNEL_TABLE); #endif + + early_pte_alloc(pmd_off_k(FIXADDR_START), FIXADDR_START, + _PAGE_KERNEL_TABLE); } static void __init map_lowmem(void) @@ -1349,13 +1373,20 @@ static void __init map_lowmem(void) if (start >= end) break; - if (end < kernel_x_start || start >= kernel_x_end) { + if (end < kernel_x_start) { map.pfn = __phys_to_pfn(start); map.virtual = __phys_to_virt(start); map.length = end - start; map.type = MT_MEMORY_RWX; create_mapping(&map); + } else if (start >= kernel_x_end) { + map.pfn = __phys_to_pfn(start); + map.virtual = __phys_to_virt(start); + map.length = end - start; + map.type = MT_MEMORY_RW; + + create_mapping(&map); } else { /* This better cover the entire kernel */ if (start < kernel_x_start) { diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 6595637c62c6..c624b08d0de2 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -39,6 +39,7 @@ config ARM64 select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_KGDB + select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_BPF_JIT select HAVE_C_RECORDMCOUNT @@ -90,6 +91,10 @@ config MMU config NO_IOPORT_MAP def_bool y if !PCI +config ILLEGAL_POINTER_VALUE + hex + default 0xdead000000000000 + config STACKTRACE_SUPPORT def_bool y @@ -480,6 +485,19 @@ config ARCH_HAS_CACHE_LINE_SIZE source "mm/Kconfig" +config SECCOMP + bool "Enable seccomp to safely compute untrusted bytecode" + ---help--- + This kernel feature is useful for number crunching applications + that may need to compute untrusted bytecode during their + execution. By using pipes or other transports made available to + the process as file descriptors supporting the read/write + syscalls, it's possible to isolate those applications in + their own address space using seccomp. Once seccomp is + enabled via prctl(PR_SET_SECCOMP), it cannot be disabled + and the task is only allowed to execute a few safe syscalls + defined by each seccomp mode. + config XEN_DOM0 def_bool y depends on XEN @@ -575,6 +593,7 @@ config SETEND_EMULATION be unexpected results in the applications. If unsure, say Y + endif endmenu @@ -589,6 +608,23 @@ config CMDLINE entering them here. As a minimum, you should specify the the root device (e.g. root=/dev/nfs). +choice + prompt "Kernel command line type" if CMDLINE != "" + default CMDLINE_FROM_BOOTLOADER + +config CMDLINE_FROM_BOOTLOADER + bool "Use bootloader kernel arguments if available" + help + Uses the command-line options passed by the boot loader. If + the boot loader doesn't provide any, the default kernel command + string provided in CMDLINE will be used. + +config CMDLINE_EXTEND + bool "Extend bootloader kernel arguments" + help + The command-line arguments provided by the boot loader will be + appended to the default kernel command string. + config CMDLINE_FORCE bool "Always use the default kernel command string" help @@ -596,6 +632,7 @@ config CMDLINE_FORCE loader passes other arguments to the kernel. This is useful if you cannot or don't want to change the command-line options your boot loader passes to the kernel. +endchoice config EFI_STUB bool @@ -617,6 +654,32 @@ config EFI allow the kernel to be booted as an EFI application. This is only useful on systems that have UEFI firmware. +config BUILD_ARM64_APPENDED_DTB_IMAGE + bool "Build a concatenated Image.gz/dtb by default" + depends on OF + help + Enabling this option will cause a concatenated Image.gz and list of + DTBs to be built by default (instead of a standalone Image.gz.) + The image will built in arch/arm64/boot/Image.gz-dtb + +config BUILD_ARM64_APPENDED_DTB_IMAGE_NAMES + string "Default dtb names" + depends on BUILD_ARM64_APPENDED_DTB_IMAGE + help + Space separated list of names of dtbs to append when + building a concatenated Image.gz-dtb. + +config DMI + bool "Enable support for SMBIOS (DMI) tables" + depends on EFI + default y + help + This enables SMBIOS/DMI feature for systems. + + This option is only useful on systems that have UEFI firmware. + However, even with this option, the resultant kernel should + continue to boot on existing non-UEFI platforms. + endmenu menu "Userspace binary formats" diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug index 8dd3a551c170..d6285ef9b5f9 100644 --- a/arch/arm64/Kconfig.debug +++ b/arch/arm64/Kconfig.debug @@ -6,6 +6,18 @@ config FRAME_POINTER bool default y +config ARM64_PTDUMP + bool "Export kernel pagetable layout to userspace via debugfs" + depends on DEBUG_KERNEL + select DEBUG_FS + help + Say Y here if you want to show the kernel pagetable layout in a + debugfs file. This information is only useful for kernel developers + who are working in architecture specific areas of the kernel. + It is probably not a good idea to enable this feature in a production + kernel. + If in doubt, say "N" + config STRICT_DEVMEM bool "Filter access to /dev/mem" depends on MMU @@ -54,6 +66,29 @@ config DEBUG_SET_MODULE_RONX against certain classes of kernel exploits. If in doubt, say "N". +config DEBUG_RODATA + bool "Make kernel text and rodata read-only" + help + If this is set, kernel text and rodata will be made read-only. This + is to help catch accidental or malicious attempts to change the + kernel's executable code. Additionally splits rodata from kernel + text so it can be made explicitly non-executable. + + If in doubt, say Y + +config DEBUG_ALIGN_RODATA + depends on DEBUG_RODATA && !ARM64_64K_PAGES + bool "Align linker sections up to SECTION_SIZE" + help + If this option is enabled, sections that may potentially be marked as + read only or non-executable will be aligned up to the section size of + the kernel. This prevents sections from being split into pages and + avoids a potential TLB penalty. The downside is an increase in + alignment and potentially wasted space. Turn on this option if + performance is more important than memory pressure. + + If in doubt, say N + source "drivers/hwtracing/coresight/Kconfig" endmenu diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 7cf8a29169ea..9403ec6133bd 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -20,6 +20,7 @@ LIBGCC := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name) KBUILD_DEFCONFIG := defconfig KBUILD_CFLAGS += -mgeneral-regs-only +KBUILD_CFLAGS += -fno-pic ifeq ($(CONFIG_CPU_BIG_ENDIAN), y) KBUILD_CPPFLAGS += -mbig-endian AS += -EB @@ -58,7 +59,12 @@ libs-y += $(LIBGCC) libs-$(CONFIG_EFI_STUB) += drivers/firmware/efi/libstub/ # Default target when executing plain make +ifeq ($(CONFIG_BUILD_ARM64_APPENDED_DTB_IMAGE),y) +KBUILD_IMAGE := Image.gz-dtb +else KBUILD_IMAGE := Image.gz +endif + KBUILD_DTBS := dtbs all: $(KBUILD_IMAGE) $(KBUILD_DTBS) @@ -82,6 +88,9 @@ dtbs: prepare scripts dtbs_install: $(Q)$(MAKE) $(dtbinst)=$(boot)/dts +Image.gz-dtb: vmlinux scripts dtbs + $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ + PHONY += vdso_install vdso_install: $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso $@ diff --git a/arch/arm64/boot/.gitignore b/arch/arm64/boot/.gitignore index 8dab0bb6ae66..eb3551131b1e 100644 --- a/arch/arm64/boot/.gitignore +++ b/arch/arm64/boot/.gitignore @@ -1,2 +1,3 @@ Image Image.gz +Image.gz-dtb diff --git a/arch/arm64/boot/Makefile b/arch/arm64/boot/Makefile index 5a0e3ab854a5..71f513face11 100644 --- a/arch/arm64/boot/Makefile +++ b/arch/arm64/boot/Makefile @@ -14,14 +14,27 @@ # Based on the ia64 boot/Makefile. # +include $(srctree)/arch/arm64/boot/dts/Makefile + targets := Image Image.gz +DTB_NAMES := $(subst $\",,$(CONFIG_BUILD_ARM64_APPENDED_DTB_IMAGE_NAMES)) +ifneq ($(DTB_NAMES),) +DTB_LIST := $(addsuffix .dtb,$(DTB_NAMES)) +DTB_OBJS := $(addprefix $(obj)/dts/,$(DTB_LIST)) +else +DTB_OBJS := $(shell find $(obj)/dts/ -name \*.dtb) +endif + $(obj)/Image: vmlinux FORCE $(call if_changed,objcopy) $(obj)/Image.gz: $(obj)/Image FORCE $(call if_changed,gzip) +$(obj)/Image.gz-dtb: $(obj)/Image.gz $(DTB_OBJS) FORCE + $(call if_changed,cat) + install: $(obj)/Image $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \ $(obj)/Image System.map "$(INSTALL_PATH)" diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 689b6379188c..81a5e4df8fec 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -152,4 +152,9 @@ int set_memory_ro(unsigned long addr, int numpages); int set_memory_rw(unsigned long addr, int numpages); int set_memory_x(unsigned long addr, int numpages); int set_memory_nx(unsigned long addr, int numpages); + +#ifdef CONFIG_DEBUG_RODATA +void mark_rodata_ro(void); +#endif + #endif diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h index 56de5aadede2..3fb053fa6e98 100644 --- a/arch/arm64/include/asm/compat.h +++ b/arch/arm64/include/asm/compat.h @@ -205,6 +205,13 @@ typedef struct compat_siginfo { compat_long_t _band; /* POLL_IN, POLL_OUT, POLL_MSG */ int _fd; } _sigpoll; + + /* SIGSYS */ + struct { + compat_uptr_t _call_addr; /* calling user insn */ + int _syscall; /* triggering system call number */ + compat_uint_t _arch; /* AUDIT_ARCH_* of syscall */ + } _sigsys; } _sifields; } compat_siginfo_t; diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index d71140b76773..343feb33d07b 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -86,6 +86,9 @@ void check_local_cpu_features(void); bool cpu_supports_mixed_endian_el0(void); bool system_supports_mixed_endian_el0(void); +bool cpu_supports_mixed_endian_el0(void); +bool system_supports_mixed_endian_el0(void); + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/dmi.h b/arch/arm64/include/asm/dmi.h new file mode 100644 index 000000000000..69d37d87b159 --- /dev/null +++ b/arch/arm64/include/asm/dmi.h @@ -0,0 +1,31 @@ +/* + * arch/arm64/include/asm/dmi.h + * + * Copyright (C) 2013 Linaro Limited. + * Written by: Yi Li (yi.li@linaro.org) + * + * based on arch/ia64/include/asm/dmi.h + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ + +#ifndef __ASM_DMI_H +#define __ASM_DMI_H + +#include <linux/io.h> +#include <linux/slab.h> + +/* + * According to section 2.3.6 of the UEFI spec, the firmware should not + * request a virtual mapping for configuration tables such as SMBIOS. + * This means we have to map them before use. + */ +#define dmi_early_remap(x, l) ioremap_cache(x, l) +#define dmi_early_unmap(x, l) iounmap(x) +#define dmi_remap(x, l) ioremap_cache(x, l) +#define dmi_unmap(x) iounmap(x) +#define dmi_alloc(l) kzalloc(l, GFP_KERNEL) + +#endif diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index a34fd3b12e2b..7baf2cc04e1e 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -6,29 +6,35 @@ #ifdef CONFIG_EFI extern void efi_init(void); -extern void efi_idmap_init(void); +extern void efi_virtmap_init(void); #else #define efi_init() -#define efi_idmap_init() +#define efi_virtmap_init() #endif #define efi_call_virt(f, ...) \ ({ \ - efi_##f##_t *__f = efi.systab->runtime->f; \ + efi_##f##_t *__f; \ efi_status_t __s; \ \ kernel_neon_begin(); \ + efi_virtmap_load(); \ + __f = efi.systab->runtime->f; \ __s = __f(__VA_ARGS__); \ + efi_virtmap_unload(); \ kernel_neon_end(); \ __s; \ }) #define __efi_call_virt(f, ...) \ ({ \ - efi_##f##_t *__f = efi.systab->runtime->f; \ + efi_##f##_t *__f; \ \ kernel_neon_begin(); \ + efi_virtmap_load(); \ + __f = efi.systab->runtime->f; \ __f(__VA_ARGS__); \ + efi_virtmap_unload(); \ kernel_neon_end(); \ }) @@ -44,4 +50,28 @@ extern void efi_idmap_init(void); #define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__) +#define EFI_ALLOC_ALIGN SZ_64K + +/* + * On ARM systems, virtually remapped UEFI runtime services are set up in three + * distinct stages: + * - The stub retrieves the final version of the memory map from UEFI, populates + * the virt_addr fields and calls the SetVirtualAddressMap() [SVAM] runtime + * service to communicate the new mapping to the firmware (Note that the new + * mapping is not live at this time) + * - During early boot, the page tables are allocated and populated based on the + * virt_addr fields in the memory map, but only if all descriptors with the + * EFI_MEMORY_RUNTIME attribute have a non-zero value for virt_addr. If this + * succeeds, the EFI_VIRTMAP flag is set to indicate that the virtual mappings + * have been installed successfully. + * - During an early initcall(), the UEFI Runtime Services are enabled and the + * EFI_RUNTIME_SERVICES bit set if some conditions are met, i.e., we need a + * non-early mapping of the UEFI system table, and we need to have the virtmap + * installed. + */ +#define EFI_VIRTMAP EFI_ARCH_1 + +void efi_virtmap_load(void); +void efi_virtmap_unload(void); + #endif /* _ASM_EFI_H */ diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index 5f7bfe6df723..defa0ff98250 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -31,6 +31,7 @@ * */ enum fixed_addresses { + FIX_HOLE, FIX_EARLYCON_MEM_BASE, __end_of_permanent_fixed_addresses, @@ -48,6 +49,7 @@ enum fixed_addresses { FIX_BTMAP_END = __end_of_permanent_fixed_addresses, FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1, + FIX_TEXT_POKE0, __end_of_fixed_addresses }; @@ -56,10 +58,11 @@ enum fixed_addresses { #define FIXMAP_PAGE_IO __pgprot(PROT_DEVICE_nGnRE) -extern void __early_set_fixmap(enum fixed_addresses idx, - phys_addr_t phys, pgprot_t flags); +void __init early_fixmap_init(void); -#define __set_fixmap __early_set_fixmap +#define __early_set_fixmap __set_fixmap + +extern void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot); #include <asm-generic/fixmap.h> diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h index e1f7ecdde11f..1eebf5bb0b58 100644 --- a/arch/arm64/include/asm/irq.h +++ b/arch/arm64/include/asm/irq.h @@ -3,7 +3,6 @@ #include <asm-generic/irq.h> -extern void (*handle_arch_irq)(struct pt_regs *); extern void migrate_irqs(void); extern void set_handle_irq(void (*handle_irq)(struct pt_regs *)); diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index c2f006c48bdb..3d311761e3c2 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -31,7 +31,8 @@ extern void paging_init(void); extern void setup_mm_for_reboot(void); extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt); extern void init_mem_pgprot(void); -/* create an identity mapping for memory (or io if map_io is true) */ -extern void create_id_mapping(phys_addr_t addr, phys_addr_t size, int map_io); +extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, + unsigned long virt, phys_addr_t size, + pgprot_t prot); #endif diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 41a43bf26492..1abe4d08725b 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -264,6 +264,11 @@ static inline pmd_t pte_pmd(pte_t pte) return __pmd(pte_val(pte)); } +static inline pgprot_t mk_sect_prot(pgprot_t prot) +{ + return __pgprot(pgprot_val(prot) & ~PTE_TABLE_BIT); +} + /* * THP definitions. */ diff --git a/arch/arm64/include/asm/seccomp.h b/arch/arm64/include/asm/seccomp.h new file mode 100644 index 000000000000..c76fac979629 --- /dev/null +++ b/arch/arm64/include/asm/seccomp.h @@ -0,0 +1,25 @@ +/* + * arch/arm64/include/asm/seccomp.h + * + * Copyright (C) 2014 Linaro Limited + * Author: AKASHI Takahiro <takahiro.akashi@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef _ASM_SECCOMP_H +#define _ASM_SECCOMP_H + +#include <asm/unistd.h> + +#ifdef CONFIG_COMPAT +#define __NR_seccomp_read_32 __NR_compat_read +#define __NR_seccomp_write_32 __NR_compat_write +#define __NR_seccomp_exit_32 __NR_compat_exit +#define __NR_seccomp_sigreturn_32 __NR_compat_rt_sigreturn +#endif /* CONFIG_COMPAT */ + +#include <asm-generic/seccomp.h> + +#endif /* _ASM_SECCOMP_H */ diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 6d2bf419431d..49c9aefd24a5 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -31,6 +31,9 @@ * Compat syscall numbers used by the AArch64 kernel. */ #define __NR_compat_restart_syscall 0 +#define __NR_compat_exit 1 +#define __NR_compat_read 3 +#define __NR_compat_write 4 #define __NR_compat_sigreturn 119 #define __NR_compat_rt_sigreturn 173 diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 9dfdac4a74a1..8893cebcea5b 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -787,7 +787,8 @@ __SYSCALL(__NR_sched_setattr, sys_sched_setattr) __SYSCALL(__NR_sched_getattr, sys_sched_getattr) #define __NR_renameat2 382 __SYSCALL(__NR_renameat2, sys_renameat2) - /* 383 for seccomp */ +#define __NR_seccomp 383 +__SYSCALL(__NR_seccomp, sys_seccomp) #define __NR_getrandom 384 __SYSCALL(__NR_getrandom, sys_getrandom) #define __NR_memfd_create 385 diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index a31374cb8627..f0e250b1b1ea 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -19,7 +19,7 @@ arm64-obj-y := cputable.o debug-monitors.o entry.o irq.o fpsimd.o \ return_address.o cpuinfo.o cpu_errata.o cpufeature.o alternative.o arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ - sys_compat.o \ + sys_compat.o \ ../../arm/kernel/opcodes.o arm64-obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S index d18a44940968..8ce9b0577442 100644 --- a/arch/arm64/kernel/efi-entry.S +++ b/arch/arm64/kernel/efi-entry.S @@ -61,7 +61,8 @@ ENTRY(efi_stub_entry) */ mov x20, x0 // DTB address ldr x0, [sp, #16] // relocated _text address - mov x21, x0 + ldr x21, =stext_offset + add x21, x0, x21 /* * Calculate size of the kernel Image (same for original and copy). diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 1d85a7c5a850..a98415b5979c 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -11,24 +11,31 @@ * */ +#include <linux/atomic.h> +#include <linux/dmi.h> #include <linux/efi.h> #include <linux/export.h> #include <linux/memblock.h> +#include <linux/mm_types.h> #include <linux/bootmem.h> #include <linux/of.h> #include <linux/of_fdt.h> +#include <linux/preempt.h> +#include <linux/rbtree.h> +#include <linux/rwsem.h> #include <linux/sched.h> #include <linux/slab.h> +#include <linux/spinlock.h> #include <asm/cacheflush.h> #include <asm/efi.h> #include <asm/tlbflush.h> #include <asm/mmu_context.h> +#include <asm/mmu.h> +#include <asm/pgtable.h> struct efi_memory_map memmap; -static efi_runtime_services_t *runtime; - static u64 efi_system_table; static int uefi_debug __initdata; @@ -47,30 +54,33 @@ static int __init is_normal_ram(efi_memory_desc_t *md) return 0; } -static void __init efi_setup_idmap(void) +/* + * Translate a EFI virtual address into a physical address: this is necessary, + * as some data members of the EFI system table are virtually remapped after + * SetVirtualAddressMap() has been called. + */ +static phys_addr_t efi_to_phys(unsigned long addr) { - struct memblock_region *r; efi_memory_desc_t *md; - u64 paddr, npages, size; - - for_each_memblock(memory, r) - create_id_mapping(r->base, r->size, 0); - /* map runtime io spaces */ for_each_efi_memory_desc(&memmap, md) { - if (!(md->attribute & EFI_MEMORY_RUNTIME) || is_normal_ram(md)) + if (!(md->attribute & EFI_MEMORY_RUNTIME)) continue; - paddr = md->phys_addr; - npages = md->num_pages; - memrange_efi_to_native(&paddr, &npages); - size = npages << PAGE_SHIFT; - create_id_mapping(paddr, size, 1); + if (md->virt_addr == 0) + /* no virtual mapping has been installed by the stub */ + break; + if (md->virt_addr <= addr && + (addr - md->virt_addr) < (md->num_pages << EFI_PAGE_SHIFT)) + return md->phys_addr + addr - md->virt_addr; } + return addr; } static int __init uefi_init(void) { efi_char16_t *c16; + void *config_tables; + u64 table_size; char vendor[100] = "unknown"; int i, retval; @@ -98,7 +108,7 @@ static int __init uefi_init(void) efi.systab->hdr.revision & 0xffff); /* Show what we know for posterity */ - c16 = early_memremap(efi.systab->fw_vendor, + c16 = early_memremap(efi_to_phys(efi.systab->fw_vendor), sizeof(vendor)); if (c16) { for (i = 0; i < (int) sizeof(vendor) - 1 && *c16; ++i) @@ -111,10 +121,14 @@ static int __init uefi_init(void) efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff, vendor); - retval = efi_config_init(NULL); - if (retval == 0) - set_bit(EFI_CONFIG_TABLES, &efi.flags); + table_size = sizeof(efi_config_table_64_t) * efi.systab->nr_tables; + config_tables = early_memremap(efi_to_phys(efi.systab->tables), + table_size); + + retval = efi_config_parse_tables(config_tables, efi.systab->nr_tables, + sizeof(efi_config_table_64_t), NULL); + early_memunmap(config_tables, table_size); out: early_memunmap(efi.systab, sizeof(efi_system_table_t)); return retval; @@ -125,17 +139,17 @@ out: */ static __init int is_reserve_region(efi_memory_desc_t *md) { - if (!is_normal_ram(md)) + switch (md->type) { + case EFI_LOADER_CODE: + case EFI_LOADER_DATA: + case EFI_BOOT_SERVICES_CODE: + case EFI_BOOT_SERVICES_DATA: + case EFI_CONVENTIONAL_MEMORY: return 0; - - if (md->attribute & EFI_MEMORY_RUNTIME) - return 1; - - if (md->type == EFI_ACPI_RECLAIM_MEMORY || - md->type == EFI_RESERVED_TYPE) - return 1; - - return 0; + default: + break; + } + return is_normal_ram(md); } static __init void reserve_regions(void) @@ -164,9 +178,7 @@ static __init void reserve_regions(void) if (is_normal_ram(md)) early_init_dt_add_memory_arch(paddr, size); - if (is_reserve_region(md) || - md->type == EFI_BOOT_SERVICES_CODE || - md->type == EFI_BOOT_SERVICES_DATA) { + if (is_reserve_region(md)) { memblock_reserve(paddr, size); if (uefi_debug) pr_cont("*"); @@ -179,123 +191,6 @@ static __init void reserve_regions(void) set_bit(EFI_MEMMAP, &efi.flags); } - -static u64 __init free_one_region(u64 start, u64 end) -{ - u64 size = end - start; - - if (uefi_debug) - pr_info(" EFI freeing: 0x%012llx-0x%012llx\n", start, end - 1); - - free_bootmem_late(start, size); - return size; -} - -static u64 __init free_region(u64 start, u64 end) -{ - u64 map_start, map_end, total = 0; - - if (end <= start) - return total; - - map_start = (u64)memmap.phys_map; - map_end = PAGE_ALIGN(map_start + (memmap.map_end - memmap.map)); - map_start &= PAGE_MASK; - - if (start < map_end && end > map_start) { - /* region overlaps UEFI memmap */ - if (start < map_start) - total += free_one_region(start, map_start); - - if (map_end < end) - total += free_one_region(map_end, end); - } else - total += free_one_region(start, end); - - return total; -} - -static void __init free_boot_services(void) -{ - u64 total_freed = 0; - u64 keep_end, free_start, free_end; - efi_memory_desc_t *md; - - /* - * If kernel uses larger pages than UEFI, we have to be careful - * not to inadvertantly free memory we want to keep if there is - * overlap at the kernel page size alignment. We do not want to - * free is_reserve_region() memory nor the UEFI memmap itself. - * - * The memory map is sorted, so we keep track of the end of - * any previous region we want to keep, remember any region - * we want to free and defer freeing it until we encounter - * the next region we want to keep. This way, before freeing - * it, we can clip it as needed to avoid freeing memory we - * want to keep for UEFI. - */ - - keep_end = 0; - free_start = 0; - - for_each_efi_memory_desc(&memmap, md) { - u64 paddr, npages, size; - - if (is_reserve_region(md)) { - /* - * We don't want to free any memory from this region. - */ - if (free_start) { - /* adjust free_end then free region */ - if (free_end > md->phys_addr) - free_end -= PAGE_SIZE; - total_freed += free_region(free_start, free_end); - free_start = 0; - } - keep_end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT); - continue; - } - - if (md->type != EFI_BOOT_SERVICES_CODE && - md->type != EFI_BOOT_SERVICES_DATA) { - /* no need to free this region */ - continue; - } - - /* - * We want to free memory from this region. - */ - paddr = md->phys_addr; - npages = md->num_pages; - memrange_efi_to_native(&paddr, &npages); - size = npages << PAGE_SHIFT; - - if (free_start) { - if (paddr <= free_end) - free_end = paddr + size; - else { - total_freed += free_region(free_start, free_end); - free_start = paddr; - free_end = paddr + size; - } - } else { - free_start = paddr; - free_end = paddr + size; - } - if (free_start < keep_end) { - free_start += PAGE_SIZE; - if (free_start >= free_end) - free_start = 0; - } - } - if (free_start) - total_freed += free_region(free_start, free_end); - - if (total_freed) - pr_info("Freed 0x%llx bytes of EFI boot services memory", - total_freed); -} - void __init efi_init(void) { struct efi_fdt_params params; @@ -320,61 +215,14 @@ void __init efi_init(void) reserve_regions(); } -void __init efi_idmap_init(void) -{ - if (!efi_enabled(EFI_BOOT)) - return; - - /* boot time idmap_pg_dir is incomplete, so fill in missing parts */ - efi_setup_idmap(); - early_memunmap(memmap.map, memmap.map_end - memmap.map); -} - -static int __init remap_region(efi_memory_desc_t *md, void **new) -{ - u64 paddr, vaddr, npages, size; - - paddr = md->phys_addr; - npages = md->num_pages; - memrange_efi_to_native(&paddr, &npages); - size = npages << PAGE_SHIFT; - - if (is_normal_ram(md)) - vaddr = (__force u64)ioremap_cache(paddr, size); - else - vaddr = (__force u64)ioremap(paddr, size); - - if (!vaddr) { - pr_err("Unable to remap 0x%llx pages @ %p\n", - npages, (void *)paddr); - return 0; - } - - /* adjust for any rounding when EFI and system pagesize differs */ - md->virt_addr = vaddr + (md->phys_addr - paddr); - - if (uefi_debug) - pr_info(" EFI remap 0x%012llx => %p\n", - md->phys_addr, (void *)md->virt_addr); - - memcpy(*new, md, memmap.desc_size); - *new += memmap.desc_size; - - return 1; -} - /* - * Switch UEFI from an identity map to a kernel virtual map + * Enable the UEFI Runtime Services if all prerequisites are in place, i.e., + * non-early mapping of the UEFI system table and virtual mappings for all + * EFI_MEMORY_RUNTIME regions. */ -static int __init arm64_enter_virtual_mode(void) +static int __init arm64_enable_runtime_services(void) { - efi_memory_desc_t *md; - phys_addr_t virtmap_phys; - void *virtmap, *virt_md; - efi_status_t status; u64 mapsize; - int count = 0; - unsigned long flags; if (!efi_enabled(EFI_BOOT)) { pr_info("EFI services will not be available.\n"); @@ -396,78 +244,116 @@ static int __init arm64_enter_virtual_mode(void) efi.memmap = &memmap; - /* Map the runtime regions */ - virtmap = kmalloc(mapsize, GFP_KERNEL); - if (!virtmap) { - pr_err("Failed to allocate EFI virtual memmap\n"); + efi.systab = (__force void *)ioremap_cache(efi_system_table, + sizeof(efi_system_table_t)); + if (!efi.systab) { + pr_err("Failed to remap EFI System Table\n"); return -1; } - virtmap_phys = virt_to_phys(virtmap); - virt_md = virtmap; + set_bit(EFI_SYSTEM_TABLES, &efi.flags); - for_each_efi_memory_desc(&memmap, md) { - if (!(md->attribute & EFI_MEMORY_RUNTIME)) - continue; - if (!remap_region(md, &virt_md)) - goto err_unmap; - ++count; + if (!efi_enabled(EFI_VIRTMAP)) { + pr_err("No UEFI virtual mapping was installed -- runtime services will not be available\n"); + return -1; } - efi.systab = (__force void *)efi_lookup_mapped_addr(efi_system_table); - if (!efi.systab) { - /* - * If we have no virtual mapping for the System Table at this - * point, the memory map doesn't cover the physical offset where - * it resides. This means the System Table will be inaccessible - * to Runtime Services themselves once the virtual mapping is - * installed. - */ - pr_err("Failed to remap EFI System Table -- buggy firmware?\n"); - goto err_unmap; - } - set_bit(EFI_SYSTEM_TABLES, &efi.flags); + /* Set up runtime services function pointers */ + efi_native_runtime_setup(); + set_bit(EFI_RUNTIME_SERVICES, &efi.flags); - local_irq_save(flags); - cpu_switch_mm(idmap_pg_dir, &init_mm); + efi.runtime_version = efi.systab->hdr.revision; - /* Call SetVirtualAddressMap with the physical address of the map */ - runtime = efi.systab->runtime; - efi.set_virtual_address_map = runtime->set_virtual_address_map; + return 0; +} +early_initcall(arm64_enable_runtime_services); - status = efi.set_virtual_address_map(count * memmap.desc_size, - memmap.desc_size, - memmap.desc_version, - (efi_memory_desc_t *)virtmap_phys); - cpu_set_reserved_ttbr0(); +static int __init arm64_dmi_init(void) +{ + /* + * On arm64, DMI depends on UEFI, and dmi_scan_machine() needs to + * be called early because dmi_id_init(), which is an arch_initcall + * itself, depends on dmi_scan_machine() having been called already. + */ + dmi_scan_machine(); + if (dmi_available) + dmi_set_dump_stack_arch_desc(); + return 0; +} +core_initcall(arm64_dmi_init); + +static pgd_t efi_pgd[PTRS_PER_PGD] __page_aligned_bss; + +static struct mm_struct efi_mm = { + .mm_rb = RB_ROOT, + .pgd = efi_pgd, + .mm_users = ATOMIC_INIT(2), + .mm_count = ATOMIC_INIT(1), + .mmap_sem = __RWSEM_INITIALIZER(efi_mm.mmap_sem), + .page_table_lock = __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock), + .mmlist = LIST_HEAD_INIT(efi_mm.mmlist), + INIT_MM_CONTEXT(efi_mm) +}; + +static void efi_set_pgd(struct mm_struct *mm) +{ + cpu_switch_mm(mm->pgd, mm); flush_tlb_all(); - local_irq_restore(flags); + if (icache_is_aivivt()) + __flush_icache_all(); +} - kfree(virtmap); +void efi_virtmap_load(void) +{ + preempt_disable(); + efi_set_pgd(&efi_mm); +} - free_boot_services(); +void efi_virtmap_unload(void) +{ + efi_set_pgd(current->active_mm); + preempt_enable(); +} - if (status != EFI_SUCCESS) { - pr_err("Failed to set EFI virtual address map! [%lx]\n", - status); - return -1; - } +void __init efi_virtmap_init(void) +{ + efi_memory_desc_t *md; - /* Set up runtime services function pointers */ - runtime = efi.systab->runtime; - efi_native_runtime_setup(); - set_bit(EFI_RUNTIME_SERVICES, &efi.flags); + if (!efi_enabled(EFI_BOOT)) + return; - efi.runtime_version = efi.systab->hdr.revision; + for_each_efi_memory_desc(&memmap, md) { + u64 paddr, npages, size; + pgprot_t prot; - return 0; + if (!(md->attribute & EFI_MEMORY_RUNTIME)) + continue; + if (WARN(md->virt_addr == 0, + "UEFI virtual mapping incomplete or missing -- no entry found for 0x%llx\n", + md->phys_addr)) + return; -err_unmap: - /* unmap all mappings that succeeded: there are 'count' of those */ - for (virt_md = virtmap; count--; virt_md += memmap.desc_size) { - md = virt_md; - iounmap((__force void __iomem *)md->virt_addr); + paddr = md->phys_addr; + npages = md->num_pages; + memrange_efi_to_native(&paddr, &npages); + size = npages << PAGE_SHIFT; + + pr_info(" EFI remap 0x%016llx => %p\n", + md->phys_addr, (void *)md->virt_addr); + + /* + * Only regions of type EFI_RUNTIME_SERVICES_CODE need to be + * executable, everything else can be mapped with the XN bits + * set. + */ + if (!is_normal_ram(md)) + prot = __pgprot(PROT_DEVICE_nGnRE); + else if (md->type == EFI_RUNTIME_SERVICES_CODE) + prot = PAGE_KERNEL_EXEC; + else + prot = PAGE_KERNEL; + + create_pgd_mapping(&efi_mm, paddr, md->virt_addr, size, prot); } - kfree(virtmap); - return -1; + set_bit(EFI_VIRTMAP, &efi.flags); + early_memunmap(memmap.map, memmap.map_end - memmap.map); } -early_initcall(arm64_enter_virtual_mode); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index b5d2f23e4f86..1b1aa13e6b67 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -188,7 +188,8 @@ tsk .req x28 // current thread_info * Interrupt handling. */ .macro irq_handler - ldr x1, handle_arch_irq + adrp x1, handle_arch_irq + ldr x1, [x1, #:lo12:handle_arch_irq] mov x0, sp blr x1 .endm @@ -475,8 +476,8 @@ el0_da: bic x0, x26, #(0xff << 56) mov x1, x25 mov x2, sp - adr lr, ret_to_user - b do_mem_abort + bl do_mem_abort + b ret_to_user el0_ia: /* * Instruction abort handling @@ -488,8 +489,8 @@ el0_ia: mov x0, x26 orr x1, x25, #1 << 24 // use reserved ISS bit for instruction aborts mov x2, sp - adr lr, ret_to_user - b do_mem_abort + bl do_mem_abort + b ret_to_user el0_fpsimd_acc: /* * Floating Point or Advanced SIMD access @@ -498,8 +499,8 @@ el0_fpsimd_acc: ct_user_exit mov x0, x25 mov x1, sp - adr lr, ret_to_user - b do_fpsimd_acc + bl do_fpsimd_acc + b ret_to_user el0_fpsimd_exc: /* * Floating Point or Advanced SIMD exception @@ -508,8 +509,8 @@ el0_fpsimd_exc: ct_user_exit mov x0, x25 mov x1, sp - adr lr, ret_to_user - b do_fpsimd_exc + bl do_fpsimd_exc + b ret_to_user el0_sp_pc: /* * Stack or PC alignment exception handling @@ -521,8 +522,8 @@ el0_sp_pc: mov x0, x26 mov x1, x25 mov x2, sp - adr lr, ret_to_user - b do_sp_pc_abort + bl do_sp_pc_abort + b ret_to_user el0_undef: /* * Undefined instruction @@ -531,8 +532,8 @@ el0_undef: enable_dbg_and_irq ct_user_exit mov x0, sp - adr lr, ret_to_user - b do_undefinstr + bl do_undefinstr + b ret_to_user el0_dbg: /* * Debug exception handling @@ -551,8 +552,8 @@ el0_inv: mov x0, sp mov x1, #BAD_SYNC mrs x2, esr_el1 - adr lr, ret_to_user - b bad_mode + bl bad_mode + b ret_to_user ENDPROC(el0_sync) .align 6 @@ -674,14 +675,15 @@ el0_svc_naked: // compat entry point ldr x16, [tsk, #TI_FLAGS] // check for syscall hooks tst x16, #_TIF_SYSCALL_WORK b.ne __sys_trace - adr lr, ret_fast_syscall // return address cmp scno, sc_nr // check upper syscall limit b.hs ni_sys ldr x16, [stbl, scno, lsl #3] // address in the syscall table - br x16 // call sys_* routine + blr x16 // call sys_* routine + b ret_fast_syscall ni_sys: mov x0, sp - b do_ni_syscall + bl do_ni_syscall + b ret_fast_syscall ENDPROC(el0_svc) /* @@ -689,26 +691,38 @@ ENDPROC(el0_svc) * switches, and waiting for our parent to respond. */ __sys_trace: - mov x0, sp + mov w0, #-1 // set default errno for + cmp scno, x0 // user-issued syscall(-1) + b.ne 1f + mov x0, #-ENOSYS + str x0, [sp, #S_X0] +1: mov x0, sp bl syscall_trace_enter - adr lr, __sys_trace_return // return address + cmp w0, #-1 // skip the syscall? + b.eq __sys_trace_return_skipped uxtw scno, w0 // syscall number (possibly new) mov x1, sp // pointer to regs cmp scno, sc_nr // check upper syscall limit - b.hs ni_sys + b.hs __ni_sys_trace ldp x0, x1, [sp] // restore the syscall args ldp x2, x3, [sp, #S_X2] ldp x4, x5, [sp, #S_X4] ldp x6, x7, [sp, #S_X6] ldr x16, [stbl, scno, lsl #3] // address in the syscall table - br x16 // call sys_* routine + blr x16 // call sys_* routine __sys_trace_return: - str x0, [sp] // save returned x0 + str x0, [sp, #S_X0] // save returned x0 +__sys_trace_return_skipped: mov x0, sp bl syscall_trace_exit b ret_to_user +__ni_sys_trace: + mov x0, sp + bl do_ni_syscall + b __sys_trace_return + /* * Special system call wrappers. */ @@ -716,6 +730,3 @@ ENTRY(sys_rt_sigreturn_wrapper) mov x0, sp b sys_rt_sigreturn ENDPROC(sys_rt_sigreturn_wrapper) - -ENTRY(handle_arch_irq) - .quad 0 diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 2877dd818977..53f3c8ed53a9 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -132,6 +132,8 @@ efi_head: #endif #ifdef CONFIG_EFI + .globl stext_offset + .set stext_offset, stext - efi_head .align 3 pe_header: .ascii "PE" @@ -155,12 +157,12 @@ optional_header: .long 0 // SizeOfInitializedData .long 0 // SizeOfUninitializedData .long efi_stub_entry - efi_head // AddressOfEntryPoint - .long stext - efi_head // BaseOfCode + .long stext_offset // BaseOfCode extra_header_fields: .quad 0 // ImageBase - .long 0x20 // SectionAlignment - .long 0x8 // FileAlignment + .long 0x1000 // SectionAlignment + .long PECOFF_FILE_ALIGNMENT // FileAlignment .short 0 // MajorOperatingSystemVersion .short 0 // MinorOperatingSystemVersion .short 0 // MajorImageVersion @@ -172,7 +174,7 @@ extra_header_fields: .long _end - efi_head // SizeOfImage // Everything before the kernel image is considered part of the header - .long stext - efi_head // SizeOfHeaders + .long stext_offset // SizeOfHeaders .long 0 // CheckSum .short 0xa // Subsystem (EFI application) .short 0 // DllCharacteristics @@ -217,16 +219,24 @@ section_table: .byte 0 .byte 0 // end of 0 padding of section name .long _end - stext // VirtualSize - .long stext - efi_head // VirtualAddress + .long stext_offset // VirtualAddress .long _edata - stext // SizeOfRawData - .long stext - efi_head // PointerToRawData + .long stext_offset // PointerToRawData .long 0 // PointerToRelocations (0 for executables) .long 0 // PointerToLineNumbers (0 for executables) .short 0 // NumberOfRelocations (0 for executables) .short 0 // NumberOfLineNumbers (0 for executables) .long 0xe0500020 // Characteristics (section flags) - .align 5 + + /* + * EFI will load stext onwards at the 4k section alignment + * described in the PE/COFF header. To ensure that instruction + * sequences using an adrp and a :lo12: immediate will function + * correctly at this alignment, we must ensure that stext is + * placed at a 4k boundary in the Image to begin with. + */ + .align 12 #endif ENTRY(stext) @@ -238,7 +248,13 @@ ENTRY(stext) mov x0, x22 bl lookup_processor_type mov x23, x0 // x23=current cpu_table - cbz x23, __error_p // invalid processor (x23=0)? + /* + * __error_p may end up out of range for cbz if text areas are + * aligned up to section sizes. + */ + cbnz x23, 1f // invalid processor (x23=0)? + b __error_p +1: bl __vet_fdt bl __create_page_tables // x25=TTBR0, x26=TTBR1 /* @@ -250,13 +266,214 @@ ENTRY(stext) */ ldr x27, __switch_data // address to jump to after // MMU has been enabled - adr lr, __enable_mmu // return (PIC) address + adrp lr, __enable_mmu // return (PIC) address + add lr, lr, #:lo12:__enable_mmu ldr x12, [x23, #CPU_INFO_SETUP] add x12, x12, x28 // __virt_to_phys br x12 // initialise processor ENDPROC(stext) /* + * Determine validity of the x21 FDT pointer. + * The dtb must be 8-byte aligned and live in the first 512M of memory. + */ +__vet_fdt: + tst x21, #0x7 + b.ne 1f + cmp x21, x24 + b.lt 1f + mov x0, #(1 << 29) + add x0, x0, x24 + cmp x21, x0 + b.ge 1f + ret +1: + mov x21, #0 + ret +ENDPROC(__vet_fdt) +/* + * Macro to create a table entry to the next page. + * + * tbl: page table address + * virt: virtual address + * shift: #imm page table shift + * ptrs: #imm pointers per table page + * + * Preserves: virt + * Corrupts: tmp1, tmp2 + * Returns: tbl -> next level table page address + */ + .macro create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2 + lsr \tmp1, \virt, #\shift + and \tmp1, \tmp1, #\ptrs - 1 // table index + add \tmp2, \tbl, #PAGE_SIZE + orr \tmp2, \tmp2, #PMD_TYPE_TABLE // address of next table and entry type + str \tmp2, [\tbl, \tmp1, lsl #3] + add \tbl, \tbl, #PAGE_SIZE // next level table page + .endm + +/* + * Macro to populate the PGD (and possibily PUD) for the corresponding + * block entry in the next level (tbl) for the given virtual address. + * + * Preserves: tbl, next, virt + * Corrupts: tmp1, tmp2 + */ + .macro create_pgd_entry, tbl, virt, tmp1, tmp2 + create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2 +#if SWAPPER_PGTABLE_LEVELS == 3 + create_table_entry \tbl, \virt, TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2 +#endif + .endm + +/* + * Macro to populate block entries in the page table for the start..end + * virtual range (inclusive). + * + * Preserves: tbl, flags + * Corrupts: phys, start, end, pstate + */ + .macro create_block_map, tbl, flags, phys, start, end + lsr \phys, \phys, #BLOCK_SHIFT + lsr \start, \start, #BLOCK_SHIFT + and \start, \start, #PTRS_PER_PTE - 1 // table index + orr \phys, \flags, \phys, lsl #BLOCK_SHIFT // table entry + lsr \end, \end, #BLOCK_SHIFT + and \end, \end, #PTRS_PER_PTE - 1 // table end index +9999: str \phys, [\tbl, \start, lsl #3] // store the entry + add \start, \start, #1 // next entry + add \phys, \phys, #BLOCK_SIZE // next block + cmp \start, \end + b.ls 9999b + .endm + +/* + * Setup the initial page tables. We only setup the barest amount which is + * required to get the kernel running. The following sections are required: + * - identity mapping to enable the MMU (low address, TTBR0) + * - first few MB of the kernel linear mapping to jump to once the MMU has + * been enabled, including the FDT blob (TTBR1) + * - pgd entry for fixed mappings (TTBR1) + */ +__create_page_tables: + pgtbl x25, x26, x28 // idmap_pg_dir and swapper_pg_dir addresses + mov x27, lr + + /* + * Invalidate the idmap and swapper page tables to avoid potential + * dirty cache lines being evicted. + */ + mov x0, x25 + add x1, x26, #SWAPPER_DIR_SIZE + bl __inval_cache_range + + /* + * Clear the idmap and swapper page tables. + */ + mov x0, x25 + add x6, x26, #SWAPPER_DIR_SIZE +1: stp xzr, xzr, [x0], #16 + stp xzr, xzr, [x0], #16 + stp xzr, xzr, [x0], #16 + stp xzr, xzr, [x0], #16 + cmp x0, x6 + b.lo 1b + + ldr x7, =MM_MMUFLAGS + + /* + * Create the identity mapping. + */ + mov x0, x25 // idmap_pg_dir + ldr x3, =KERNEL_START + add x3, x3, x28 // __pa(KERNEL_START) + create_pgd_entry x0, x3, x5, x6 + ldr x6, =KERNEL_END + mov x5, x3 // __pa(KERNEL_START) + add x6, x6, x28 // __pa(KERNEL_END) + create_block_map x0, x7, x3, x5, x6 + + /* + * Map the kernel image (starting with PHYS_OFFSET). + */ + mov x0, x26 // swapper_pg_dir + mov x5, #PAGE_OFFSET + create_pgd_entry x0, x5, x3, x6 + ldr x6, =KERNEL_END + mov x3, x24 // phys offset + create_block_map x0, x7, x3, x5, x6 + + /* + * Map the FDT blob (maximum 2MB; must be within 512MB of + * PHYS_OFFSET). + */ + mov x3, x21 // FDT phys address + and x3, x3, #~((1 << 21) - 1) // 2MB aligned + mov x6, #PAGE_OFFSET + sub x5, x3, x24 // subtract PHYS_OFFSET + tst x5, #~((1 << 29) - 1) // within 512MB? + csel x21, xzr, x21, ne // zero the FDT pointer + b.ne 1f + add x5, x5, x6 // __va(FDT blob) + add x6, x5, #1 << 21 // 2MB for the FDT blob + sub x6, x6, #1 // inclusive range + create_block_map x0, x7, x3, x5, x6 +1: + /* + * Since the page tables have been populated with non-cacheable + * accesses (MMU disabled), invalidate the idmap and swapper page + * tables again to remove any speculatively loaded cache lines. + */ + mov x0, x25 + add x1, x26, #SWAPPER_DIR_SIZE + bl __inval_cache_range + + mov lr, x27 + ret +ENDPROC(__create_page_tables) + .ltorg + + .align 3 + .type __switch_data, %object +__switch_data: + .quad __mmap_switched + .quad __bss_start // x6 + .quad __bss_stop // x7 + .quad processor_id // x4 + .quad __fdt_pointer // x5 + .quad memstart_addr // x6 + .quad init_thread_union + THREAD_START_SP // sp + +/* + * The following fragment of code is executed with the MMU on in MMU mode, and + * uses absolute addresses; this is not position independent. + */ +__mmap_switched: + adr x3, __switch_data + 8 + + ldp x6, x7, [x3], #16 +1: cmp x6, x7 + b.hs 2f + str xzr, [x6], #8 // Clear BSS + b 1b +2: + ldp x4, x5, [x3], #16 + ldr x6, [x3], #8 + ldr x16, [x3] + mov sp, x16 + str x22, [x4] // Save processor ID + str x21, [x5] // Save FDT pointer + str x24, [x6] // Save PHYS_OFFSET + mov x29, #0 + b start_kernel +ENDPROC(__mmap_switched) + +/* + * end early head section, begin head code that is also used for + * hotplug and needs to have the same protections as the text region + */ + .section ".text","ax" +/* * If we're fortunate enough to boot at EL2, ensure that the world is * sane before dropping to EL1. * @@ -336,7 +553,8 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems msr vttbr_el2, xzr /* Hypervisor stub */ - adr x0, __hyp_stub_vectors + adrp x0, __hyp_stub_vectors + add x0, x0, #:lo12:__hyp_stub_vectors msr vbar_el2, x0 /* spsr */ @@ -497,183 +715,6 @@ ENDPROC(__calc_phys_offset) .quad PAGE_OFFSET /* - * Macro to create a table entry to the next page. - * - * tbl: page table address - * virt: virtual address - * shift: #imm page table shift - * ptrs: #imm pointers per table page - * - * Preserves: virt - * Corrupts: tmp1, tmp2 - * Returns: tbl -> next level table page address - */ - .macro create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2 - lsr \tmp1, \virt, #\shift - and \tmp1, \tmp1, #\ptrs - 1 // table index - add \tmp2, \tbl, #PAGE_SIZE - orr \tmp2, \tmp2, #PMD_TYPE_TABLE // address of next table and entry type - str \tmp2, [\tbl, \tmp1, lsl #3] - add \tbl, \tbl, #PAGE_SIZE // next level table page - .endm - -/* - * Macro to populate the PGD (and possibily PUD) for the corresponding - * block entry in the next level (tbl) for the given virtual address. - * - * Preserves: tbl, next, virt - * Corrupts: tmp1, tmp2 - */ - .macro create_pgd_entry, tbl, virt, tmp1, tmp2 - create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2 -#if SWAPPER_PGTABLE_LEVELS == 3 - create_table_entry \tbl, \virt, TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2 -#endif - .endm - -/* - * Macro to populate block entries in the page table for the start..end - * virtual range (inclusive). - * - * Preserves: tbl, flags - * Corrupts: phys, start, end, pstate - */ - .macro create_block_map, tbl, flags, phys, start, end - lsr \phys, \phys, #BLOCK_SHIFT - lsr \start, \start, #BLOCK_SHIFT - and \start, \start, #PTRS_PER_PTE - 1 // table index - orr \phys, \flags, \phys, lsl #BLOCK_SHIFT // table entry - lsr \end, \end, #BLOCK_SHIFT - and \end, \end, #PTRS_PER_PTE - 1 // table end index -9999: str \phys, [\tbl, \start, lsl #3] // store the entry - add \start, \start, #1 // next entry - add \phys, \phys, #BLOCK_SIZE // next block - cmp \start, \end - b.ls 9999b - .endm - -/* - * Setup the initial page tables. We only setup the barest amount which is - * required to get the kernel running. The following sections are required: - * - identity mapping to enable the MMU (low address, TTBR0) - * - first few MB of the kernel linear mapping to jump to once the MMU has - * been enabled, including the FDT blob (TTBR1) - * - pgd entry for fixed mappings (TTBR1) - */ -__create_page_tables: - pgtbl x25, x26, x28 // idmap_pg_dir and swapper_pg_dir addresses - mov x27, lr - - /* - * Invalidate the idmap and swapper page tables to avoid potential - * dirty cache lines being evicted. - */ - mov x0, x25 - add x1, x26, #SWAPPER_DIR_SIZE - bl __inval_cache_range - - /* - * Clear the idmap and swapper page tables. - */ - mov x0, x25 - add x6, x26, #SWAPPER_DIR_SIZE -1: stp xzr, xzr, [x0], #16 - stp xzr, xzr, [x0], #16 - stp xzr, xzr, [x0], #16 - stp xzr, xzr, [x0], #16 - cmp x0, x6 - b.lo 1b - - ldr x7, =MM_MMUFLAGS - - /* - * Create the identity mapping. - */ - mov x0, x25 // idmap_pg_dir - ldr x3, =KERNEL_START - add x3, x3, x28 // __pa(KERNEL_START) - create_pgd_entry x0, x3, x5, x6 - ldr x6, =KERNEL_END - mov x5, x3 // __pa(KERNEL_START) - add x6, x6, x28 // __pa(KERNEL_END) - create_block_map x0, x7, x3, x5, x6 - - /* - * Map the kernel image (starting with PHYS_OFFSET). - */ - mov x0, x26 // swapper_pg_dir - mov x5, #PAGE_OFFSET - create_pgd_entry x0, x5, x3, x6 - ldr x6, =KERNEL_END - mov x3, x24 // phys offset - create_block_map x0, x7, x3, x5, x6 - - /* - * Map the FDT blob (maximum 2MB; must be within 512MB of - * PHYS_OFFSET). - */ - mov x3, x21 // FDT phys address - and x3, x3, #~((1 << 21) - 1) // 2MB aligned - mov x6, #PAGE_OFFSET - sub x5, x3, x24 // subtract PHYS_OFFSET - tst x5, #~((1 << 29) - 1) // within 512MB? - csel x21, xzr, x21, ne // zero the FDT pointer - b.ne 1f - add x5, x5, x6 // __va(FDT blob) - add x6, x5, #1 << 21 // 2MB for the FDT blob - sub x6, x6, #1 // inclusive range - create_block_map x0, x7, x3, x5, x6 -1: - /* - * Since the page tables have been populated with non-cacheable - * accesses (MMU disabled), invalidate the idmap and swapper page - * tables again to remove any speculatively loaded cache lines. - */ - mov x0, x25 - add x1, x26, #SWAPPER_DIR_SIZE - bl __inval_cache_range - - mov lr, x27 - ret -ENDPROC(__create_page_tables) - .ltorg - - .align 3 - .type __switch_data, %object -__switch_data: - .quad __mmap_switched - .quad __bss_start // x6 - .quad __bss_stop // x7 - .quad processor_id // x4 - .quad __fdt_pointer // x5 - .quad memstart_addr // x6 - .quad init_thread_union + THREAD_START_SP // sp - -/* - * The following fragment of code is executed with the MMU on in MMU mode, and - * uses absolute addresses; this is not position independent. - */ -__mmap_switched: - adr x3, __switch_data + 8 - - ldp x6, x7, [x3], #16 -1: cmp x6, x7 - b.hs 2f - str xzr, [x6], #8 // Clear BSS - b 1b -2: - ldp x4, x5, [x3], #16 - ldr x6, [x3], #8 - ldr x16, [x3] - mov sp, x16 - str x22, [x4] // Save processor ID - str x21, [x5] // Save FDT pointer - str x24, [x6] // Save PHYS_OFFSET - mov x29, #0 - b start_kernel -ENDPROC(__mmap_switched) - -/* * Exception handling. Something went wrong and we can't proceed. We ought to * tell the user, but since we don't have any guarantee that we're even * running on the right architecture, we do virtually nothing. @@ -720,22 +761,3 @@ __lookup_processor_type_data: .quad . .quad cpu_table .size __lookup_processor_type_data, . - __lookup_processor_type_data - -/* - * Determine validity of the x21 FDT pointer. - * The dtb must be 8-byte aligned and live in the first 512M of memory. - */ -__vet_fdt: - tst x21, #0x7 - b.ne 1f - cmp x21, x24 - b.lt 1f - mov x0, #(1 << 29) - add x0, x0, x24 - cmp x21, x0 - b.ge 1f - ret -1: - mov x21, #0 - ret -ENDPROC(__vet_fdt) diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 7e9327a0986d..27d4864577e5 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -17,14 +17,19 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include <linux/bitops.h> +#include <linux/bug.h> #include <linux/compiler.h> #include <linux/kernel.h> +#include <linux/mm.h> #include <linux/smp.h> +#include <linux/spinlock.h> #include <linux/stop_machine.h> +#include <linux/types.h> #include <linux/uaccess.h> #include <asm/cacheflush.h> #include <asm/debug-monitors.h> +#include <asm/fixmap.h> #include <asm/insn.h> #define AARCH64_INSN_SF_BIT BIT(31) @@ -72,6 +77,29 @@ bool __kprobes aarch64_insn_is_nop(u32 insn) } } +static DEFINE_SPINLOCK(patch_lock); + +static void __kprobes *patch_map(void *addr, int fixmap) +{ + unsigned long uintaddr = (uintptr_t) addr; + bool module = !core_kernel_text(uintaddr); + struct page *page; + + if (module && IS_ENABLED(CONFIG_DEBUG_SET_MODULE_RONX)) + page = vmalloc_to_page(addr); + else + page = virt_to_page(addr); + + BUG_ON(!page); + set_fixmap(fixmap, page_to_phys(page)); + + return (void *) (__fix_to_virt(fixmap) + (uintaddr & ~PAGE_MASK)); +} + +static void __kprobes patch_unmap(int fixmap) +{ + clear_fixmap(fixmap); +} /* * In ARMv8-A, A64 instructions have a fixed length of 32 bits and are always * little-endian. @@ -88,10 +116,27 @@ int __kprobes aarch64_insn_read(void *addr, u32 *insnp) return ret; } +static int __kprobes __aarch64_insn_write(void *addr, u32 insn) +{ + void *waddr = addr; + unsigned long flags = 0; + int ret; + + spin_lock_irqsave(&patch_lock, flags); + waddr = patch_map(addr, FIX_TEXT_POKE0); + + ret = probe_kernel_write(waddr, &insn, AARCH64_INSN_SIZE); + + patch_unmap(FIX_TEXT_POKE0); + spin_unlock_irqrestore(&patch_lock, flags); + + return ret; +} + int __kprobes aarch64_insn_write(void *addr, u32 insn) { insn = cpu_to_le32(insn); - return probe_kernel_write(addr, &insn, AARCH64_INSN_SIZE); + return __aarch64_insn_write(addr, insn); } static bool __kprobes __aarch64_insn_hotpatch_safe(u32 insn) diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index 071a6ec13bd8..240b75c0e94f 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -40,6 +40,8 @@ int arch_show_interrupts(struct seq_file *p, int prec) return 0; } +void (*handle_arch_irq)(struct pt_regs *) = NULL; + void __init set_handle_irq(void (*handle_irq)(struct pt_regs *)) { if (handle_arch_irq) diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c index 263a166291fb..4f1fec7a46db 100644 --- a/arch/arm64/kernel/jump_label.c +++ b/arch/arm64/kernel/jump_label.c @@ -22,9 +22,8 @@ #ifdef HAVE_JUMP_LABEL -static void __arch_jump_label_transform(struct jump_entry *entry, - enum jump_label_type type, - bool is_static) +void arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type) { void *addr = (void *)entry->code; u32 insn; @@ -37,22 +36,18 @@ static void __arch_jump_label_transform(struct jump_entry *entry, insn = aarch64_insn_gen_nop(); } - if (is_static) - aarch64_insn_patch_text_nosync(addr, insn); - else - aarch64_insn_patch_text(&addr, &insn, 1); -} - -void arch_jump_label_transform(struct jump_entry *entry, - enum jump_label_type type) -{ - __arch_jump_label_transform(entry, type, false); + aarch64_insn_patch_text(&addr, &insn, 1); } void arch_jump_label_transform_static(struct jump_entry *entry, enum jump_label_type type) { - __arch_jump_label_transform(entry, type, true); + /* + * We use the architected A64 NOP in arch_static_branch, so there's no + * need to patch an identical A64 NOP over the top of it here. The core + * will call arch_jump_label_transform from a module notifier if the + * NOP needs to be replaced by a branch. + */ } #endif /* HAVE_JUMP_LABEL */ diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index fde9923af859..07c4c53d7504 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -163,6 +163,70 @@ void machine_restart(char *cmd) while (1); } +/* + * dump a block of kernel memory from around the given address + */ +static void show_data(unsigned long addr, int nbytes, const char *name) +{ + int i, j; + int nlines; + u32 *p; + + /* + * don't attempt to dump non-kernel addresses or + * values that are probably just small negative numbers + */ + if (addr < PAGE_OFFSET || addr > -256UL) + return; + + printk("\n%s: %#lx:\n", name, addr); + + /* + * round address down to a 32 bit boundary + * and always dump a multiple of 32 bytes + */ + p = (u32 *)(addr & ~(sizeof(u32) - 1)); + nbytes += (addr & (sizeof(u32) - 1)); + nlines = (nbytes + 31) / 32; + + + for (i = 0; i < nlines; i++) { + /* + * just display low 16 bits of address to keep + * each line of the dump < 80 characters + */ + printk("%04lx ", (unsigned long)p & 0xffff); + for (j = 0; j < 8; j++) { + u32 data; + if (probe_kernel_address(p, data)) { + printk(" ********"); + } else { + printk(" %08x", data); + } + ++p; + } + printk("\n"); + } +} + +static void show_extra_register_data(struct pt_regs *regs, int nbytes) +{ + mm_segment_t fs; + unsigned int i; + + fs = get_fs(); + set_fs(KERNEL_DS); + show_data(regs->pc - nbytes, nbytes * 2, "PC"); + show_data(regs->regs[30] - nbytes, nbytes * 2, "LR"); + show_data(regs->sp - nbytes, nbytes * 2, "SP"); + for (i = 0; i < 30; i++) { + char name[4]; + snprintf(name, sizeof(name), "X%u", i); + show_data(regs->regs[i] - nbytes, nbytes * 2, name); + } + set_fs(fs); +} + void __show_regs(struct pt_regs *regs) { int i, top_reg; @@ -189,6 +253,8 @@ void __show_regs(struct pt_regs *regs) if (i % 2 == 0) printk("\n"); } + if (!user_mode(regs)) + show_extra_register_data(regs, 128); printk("\n"); } diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 8a4ae8e73213..d882b833dbdb 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -27,6 +27,7 @@ #include <linux/smp.h> #include <linux/ptrace.h> #include <linux/user.h> +#include <linux/seccomp.h> #include <linux/security.h> #include <linux/init.h> #include <linux/signal.h> @@ -551,6 +552,32 @@ static int tls_set(struct task_struct *target, const struct user_regset *regset, return ret; } +static int system_call_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int syscallno = task_pt_regs(target)->syscallno; + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &syscallno, 0, -1); +} + +static int system_call_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int syscallno, ret; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &syscallno, 0, -1); + if (ret) + return ret; + + task_pt_regs(target)->syscallno = syscallno; + return ret; +} + enum aarch64_regset { REGSET_GPR, REGSET_FPR, @@ -559,6 +586,7 @@ enum aarch64_regset { REGSET_HW_BREAK, REGSET_HW_WATCH, #endif + REGSET_SYSTEM_CALL, }; static const struct user_regset aarch64_regsets[] = { @@ -608,6 +636,14 @@ static const struct user_regset aarch64_regsets[] = { .set = hw_break_set, }, #endif + [REGSET_SYSTEM_CALL] = { + .core_note_type = NT_ARM_SYSTEM_CALL, + .n = 1, + .size = sizeof(int), + .align = sizeof(int), + .get = system_call_get, + .set = system_call_set, + }, }; static const struct user_regset_view user_aarch64_view = { @@ -1114,6 +1150,10 @@ static void tracehook_report_syscall(struct pt_regs *regs, asmlinkage int syscall_trace_enter(struct pt_regs *regs) { + /* Do the secure computing check first; failures should be fast. */ + if (secure_computing() == -1) + return -1; + if (test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER); diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index ef961ab8a224..3db3a60ab216 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -314,6 +314,8 @@ static void __init setup_machine_fdt(phys_addr_t dt_phys) while (true) cpu_relax(); } + + dump_stack_set_arch_desc("%s (DT)", of_flat_dt_get_machine_name()); } /* @@ -378,6 +380,7 @@ void __init setup_arch(char **cmdline_p) *cmdline_p = boot_command_line; + early_fixmap_init(); early_ioremap_init(); parse_early_param(); @@ -394,7 +397,7 @@ void __init setup_arch(char **cmdline_p) paging_init(); request_standard_resources(); - efi_idmap_init(); + efi_virtmap_init(); early_ioremap_reset(); unflatten_device_tree(); diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index b6da20fa7a48..1a158233bfb8 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -186,6 +186,12 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) err |= __put_user(from->si_uid, &to->si_uid); err |= __put_user(from->si_int, &to->si_int); break; + case __SI_SYS: + err |= __put_user((compat_uptr_t)(unsigned long) + from->si_call_addr, &to->si_call_addr); + err |= __put_user(from->si_syscall, &to->si_syscall); + err |= __put_user(from->si_arch, &to->si_arch); + break; default: /* this is just in case for now ... */ err |= __put_user(from->si_pid, &to->si_pid); err |= __put_user(from->si_uid, &to->si_uid); diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 2f600294e8ca..5f11ebe652a4 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -8,6 +8,7 @@ #include <asm/thread_info.h> #include <asm/memory.h> #include <asm/page.h> +#include <asm/pgtable.h> #include "image.h" @@ -32,6 +33,30 @@ jiffies = jiffies_64; *(.hyp.text) \ VMLINUX_SYMBOL(__hyp_text_end) = .; +/* + * The size of the PE/COFF section that covers the kernel image, which + * runs from stext to _edata, must be a round multiple of the PE/COFF + * FileAlignment, which we set to its minimum value of 0x200. 'stext' + * itself is 4 KB aligned, so padding out _edata to a 0x200 aligned + * boundary should be sufficient. + */ +PECOFF_FILE_ALIGNMENT = 0x200; + +#ifdef CONFIG_EFI +#define PECOFF_EDATA_PADDING \ + .pecoff_edata_padding : { BYTE(0); . = ALIGN(PECOFF_FILE_ALIGNMENT); } +#else +#define PECOFF_EDATA_PADDING +#endif + +#ifdef CONFIG_DEBUG_ALIGN_RODATA +#define ALIGN_DEBUG_RO . = ALIGN(1<<SECTION_SHIFT); +#define ALIGN_DEBUG_RO_MIN(min) ALIGN_DEBUG_RO +#else +#define ALIGN_DEBUG_RO +#define ALIGN_DEBUG_RO_MIN(min) . = ALIGN(min); +#endif + SECTIONS { /* @@ -54,6 +79,7 @@ SECTIONS _text = .; HEAD_TEXT } + ALIGN_DEBUG_RO .text : { /* Real text segment */ _stext = .; /* Text and read-only data */ __exception_text_start = .; @@ -70,19 +96,22 @@ SECTIONS *(.got) /* Global offset table */ } + ALIGN_DEBUG_RO RO_DATA(PAGE_SIZE) EXCEPTION_TABLE(8) NOTES + ALIGN_DEBUG_RO _etext = .; /* End of text and rodata section */ - . = ALIGN(PAGE_SIZE); + ALIGN_DEBUG_RO_MIN(PAGE_SIZE) __init_begin = .; INIT_TEXT_SECTION(8) .exit.text : { ARM_EXIT_KEEP(EXIT_TEXT) } - . = ALIGN(16); + + ALIGN_DEBUG_RO_MIN(16) .init.data : { INIT_DATA INIT_SETUP(16) @@ -114,6 +143,7 @@ SECTIONS _data = .; _sdata = .; RW_DATA_SECTION(64, PAGE_SIZE, THREAD_SIZE) + PECOFF_EDATA_PADDING _edata = .; BSS_SECTION(0, 0, 0) diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index c56179ed2c09..773d37a14039 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile @@ -3,3 +3,4 @@ obj-y := dma-mapping.o extable.o fault.o init.o \ ioremap.o mmap.o pgd.o mmu.o \ context.o proc.o pageattr.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o +obj-$(CONFIG_ARM64_PTDUMP) += dump.o diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 6efbb52cb92e..6ee26f82966b 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -170,7 +170,7 @@ static void *__dma_alloc_noncoherent(struct device *dev, size_t size, coherent_ptr = dma_common_contiguous_remap(page, size, VM_USERMAP, __get_dma_pgprot(attrs, __pgprot(PROT_NORMAL_NC), false), - NULL); + __builtin_return_address(0)); if (!coherent_ptr) goto no_map; diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c new file mode 100644 index 000000000000..bf69601be546 --- /dev/null +++ b/arch/arm64/mm/dump.c @@ -0,0 +1,332 @@ +/* + * Copyright (c) 2014, The Linux Foundation. All rights reserved. + * Debug helper to dump the current kernel pagetables of the system + * so that we can see what the various memory ranges are set to. + * + * Derived from x86 and arm implementation: + * (C) Copyright 2008 Intel Corporation + * + * Author: Arjan van de Ven <arjan@linux.intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ +#include <linux/debugfs.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/seq_file.h> + +#include <asm/fixmap.h> +#include <asm/pgtable.h> + +#define LOWEST_ADDR (UL(0xffffffffffffffff) << VA_BITS) + +struct addr_marker { + unsigned long start_address; + const char *name; +}; + +enum address_markers_idx { + VMALLOC_START_NR = 0, + VMALLOC_END_NR, +#ifdef CONFIG_SPARSEMEM_VMEMMAP + VMEMMAP_START_NR, + VMEMMAP_END_NR, +#endif + PCI_START_NR, + PCI_END_NR, + FIXADDR_START_NR, + FIXADDR_END_NR, + MODULES_START_NR, + MODUELS_END_NR, + KERNEL_SPACE_NR, +}; + +static struct addr_marker address_markers[] = { + { VMALLOC_START, "vmalloc() Area" }, + { VMALLOC_END, "vmalloc() End" }, +#ifdef CONFIG_SPARSEMEM_VMEMMAP + { 0, "vmemmap start" }, + { 0, "vmemmap end" }, +#endif + { (unsigned long) PCI_IOBASE, "PCI I/O start" }, + { (unsigned long) PCI_IOBASE + SZ_16M, "PCI I/O end" }, + { FIXADDR_START, "Fixmap start" }, + { FIXADDR_TOP, "Fixmap end" }, + { MODULES_VADDR, "Modules start" }, + { MODULES_END, "Modules end" }, + { PAGE_OFFSET, "Kernel Mapping" }, + { -1, NULL }, +}; + +struct pg_state { + struct seq_file *seq; + const struct addr_marker *marker; + unsigned long start_address; + unsigned level; + u64 current_prot; +}; + +struct prot_bits { + u64 mask; + u64 val; + const char *set; + const char *clear; +}; + +static const struct prot_bits pte_bits[] = { + { + .mask = PTE_USER, + .val = PTE_USER, + .set = "USR", + .clear = " ", + }, { + .mask = PTE_RDONLY, + .val = PTE_RDONLY, + .set = "ro", + .clear = "RW", + }, { + .mask = PTE_PXN, + .val = PTE_PXN, + .set = "NX", + .clear = "x ", + }, { + .mask = PTE_SHARED, + .val = PTE_SHARED, + .set = "SHD", + .clear = " ", + }, { + .mask = PTE_AF, + .val = PTE_AF, + .set = "AF", + .clear = " ", + }, { + .mask = PTE_NG, + .val = PTE_NG, + .set = "NG", + .clear = " ", + }, { + .mask = PTE_UXN, + .val = PTE_UXN, + .set = "UXN", + }, { + .mask = PTE_ATTRINDX_MASK, + .val = PTE_ATTRINDX(MT_DEVICE_nGnRnE), + .set = "DEVICE/nGnRnE", + }, { + .mask = PTE_ATTRINDX_MASK, + .val = PTE_ATTRINDX(MT_DEVICE_nGnRE), + .set = "DEVICE/nGnRE", + }, { + .mask = PTE_ATTRINDX_MASK, + .val = PTE_ATTRINDX(MT_DEVICE_GRE), + .set = "DEVICE/GRE", + }, { + .mask = PTE_ATTRINDX_MASK, + .val = PTE_ATTRINDX(MT_NORMAL_NC), + .set = "MEM/NORMAL-NC", + }, { + .mask = PTE_ATTRINDX_MASK, + .val = PTE_ATTRINDX(MT_NORMAL), + .set = "MEM/NORMAL", + } +}; + +struct pg_level { + const struct prot_bits *bits; + size_t num; + u64 mask; +}; + +static struct pg_level pg_level[] = { + { + }, { /* pgd */ + .bits = pte_bits, + .num = ARRAY_SIZE(pte_bits), + }, { /* pud */ + .bits = pte_bits, + .num = ARRAY_SIZE(pte_bits), + }, { /* pmd */ + .bits = pte_bits, + .num = ARRAY_SIZE(pte_bits), + }, { /* pte */ + .bits = pte_bits, + .num = ARRAY_SIZE(pte_bits), + }, +}; + +static void dump_prot(struct pg_state *st, const struct prot_bits *bits, + size_t num) +{ + unsigned i; + + for (i = 0; i < num; i++, bits++) { + const char *s; + + if ((st->current_prot & bits->mask) == bits->val) + s = bits->set; + else + s = bits->clear; + + if (s) + seq_printf(st->seq, " %s", s); + } +} + +static void note_page(struct pg_state *st, unsigned long addr, unsigned level, + u64 val) +{ + static const char units[] = "KMGTPE"; + u64 prot = val & pg_level[level].mask; + + if (addr < LOWEST_ADDR) + return; + + if (!st->level) { + st->level = level; + st->current_prot = prot; + st->start_address = addr; + seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); + } else if (prot != st->current_prot || level != st->level || + addr >= st->marker[1].start_address) { + const char *unit = units; + unsigned long delta; + + if (st->current_prot) { + seq_printf(st->seq, "0x%16lx-0x%16lx ", + st->start_address, addr); + + delta = (addr - st->start_address) >> 10; + while (!(delta & 1023) && unit[1]) { + delta >>= 10; + unit++; + } + seq_printf(st->seq, "%9lu%c", delta, *unit); + if (pg_level[st->level].bits) + dump_prot(st, pg_level[st->level].bits, + pg_level[st->level].num); + seq_puts(st->seq, "\n"); + } + + if (addr >= st->marker[1].start_address) { + st->marker++; + seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); + } + + st->start_address = addr; + st->current_prot = prot; + st->level = level; + } + + if (addr >= st->marker[1].start_address) { + st->marker++; + seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); + } + +} + +static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start) +{ + pte_t *pte = pte_offset_kernel(pmd, 0); + unsigned long addr; + unsigned i; + + for (i = 0; i < PTRS_PER_PTE; i++, pte++) { + addr = start + i * PAGE_SIZE; + note_page(st, addr, 4, pte_val(*pte)); + } +} + +static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) +{ + pmd_t *pmd = pmd_offset(pud, 0); + unsigned long addr; + unsigned i; + + for (i = 0; i < PTRS_PER_PMD; i++, pmd++) { + addr = start + i * PMD_SIZE; + if (pmd_none(*pmd) || pmd_sect(*pmd) || pmd_bad(*pmd)) + note_page(st, addr, 3, pmd_val(*pmd)); + else + walk_pte(st, pmd, addr); + } +} + +static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) +{ + pud_t *pud = pud_offset(pgd, 0); + unsigned long addr; + unsigned i; + + for (i = 0; i < PTRS_PER_PUD; i++, pud++) { + addr = start + i * PUD_SIZE; + if (pud_none(*pud) || pud_sect(*pud) || pud_bad(*pud)) + note_page(st, addr, 2, pud_val(*pud)); + else + walk_pmd(st, pud, addr); + } +} + +static void walk_pgd(struct pg_state *st, struct mm_struct *mm, unsigned long start) +{ + pgd_t *pgd = pgd_offset(mm, 0); + unsigned i; + unsigned long addr; + + for (i = 0; i < PTRS_PER_PGD; i++, pgd++) { + addr = start + i * PGDIR_SIZE; + if (pgd_none(*pgd) || pgd_bad(*pgd)) + note_page(st, addr, 1, pgd_val(*pgd)); + else + walk_pud(st, pgd, addr); + } +} + +static int ptdump_show(struct seq_file *m, void *v) +{ + struct pg_state st = { + .seq = m, + .marker = address_markers, + }; + + walk_pgd(&st, &init_mm, LOWEST_ADDR); + + note_page(&st, 0, 0, 0); + return 0; +} + +static int ptdump_open(struct inode *inode, struct file *file) +{ + return single_open(file, ptdump_show, NULL); +} + +static const struct file_operations ptdump_fops = { + .open = ptdump_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int ptdump_init(void) +{ + struct dentry *pe; + unsigned i, j; + + for (i = 0; i < ARRAY_SIZE(pg_level); i++) + if (pg_level[i].bits) + for (j = 0; j < pg_level[i].num; j++) + pg_level[i].mask |= pg_level[i].bits[j].mask; + + address_markers[VMEMMAP_START_NR].start_address = + (unsigned long)virt_to_page(PAGE_OFFSET); + address_markers[VMEMMAP_END_NR].start_address = + (unsigned long)virt_to_page(high_memory); + + pe = debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, + &ptdump_fops); + return pe ? 0 : -ENOMEM; +} +device_initcall(ptdump_init); diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index f752943f75d2..b24ed5e112ec 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -114,9 +114,11 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) } #ifdef CONFIG_HAVE_ARCH_PFN_VALID +#define PFN_MASK ((1UL << (64 - PAGE_SHIFT)) - 1) + int pfn_valid(unsigned long pfn) { - return memblock_is_memory(pfn << PAGE_SHIFT); + return (pfn & PFN_MASK) == pfn && memblock_is_memory(pfn << PAGE_SHIFT); } EXPORT_SYMBOL(pfn_valid); #endif @@ -325,6 +327,7 @@ void __init mem_init(void) void free_initmem(void) { + fixup_init(); free_initmem_default(0); free_alternatives_memory(); } diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c index 4a07630a6616..cbb99c8f1e04 100644 --- a/arch/arm64/mm/ioremap.c +++ b/arch/arm64/mm/ioremap.c @@ -103,97 +103,10 @@ void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size) } EXPORT_SYMBOL(ioremap_cache); -static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; -#if CONFIG_ARM64_PGTABLE_LEVELS > 2 -static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss; -#endif -#if CONFIG_ARM64_PGTABLE_LEVELS > 3 -static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss; -#endif - -static inline pud_t * __init early_ioremap_pud(unsigned long addr) -{ - pgd_t *pgd; - - pgd = pgd_offset_k(addr); - BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd)); - - return pud_offset(pgd, addr); -} - -static inline pmd_t * __init early_ioremap_pmd(unsigned long addr) -{ - pud_t *pud = early_ioremap_pud(addr); - - BUG_ON(pud_none(*pud) || pud_bad(*pud)); - - return pmd_offset(pud, addr); -} - -static inline pte_t * __init early_ioremap_pte(unsigned long addr) -{ - pmd_t *pmd = early_ioremap_pmd(addr); - - BUG_ON(pmd_none(*pmd) || pmd_bad(*pmd)); - - return pte_offset_kernel(pmd, addr); -} - +/* + * Must be called after early_fixmap_init + */ void __init early_ioremap_init(void) { - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - unsigned long addr = fix_to_virt(FIX_BTMAP_BEGIN); - - pgd = pgd_offset_k(addr); - pgd_populate(&init_mm, pgd, bm_pud); - pud = pud_offset(pgd, addr); - pud_populate(&init_mm, pud, bm_pmd); - pmd = pmd_offset(pud, addr); - pmd_populate_kernel(&init_mm, pmd, bm_pte); - - /* - * The boot-ioremap range spans multiple pmds, for which - * we are not prepared: - */ - BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT) - != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT)); - - if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) { - WARN_ON(1); - pr_warn("pmd %p != %p\n", - pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))); - pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n", - fix_to_virt(FIX_BTMAP_BEGIN)); - pr_warn("fix_to_virt(FIX_BTMAP_END): %08lx\n", - fix_to_virt(FIX_BTMAP_END)); - - pr_warn("FIX_BTMAP_END: %d\n", FIX_BTMAP_END); - pr_warn("FIX_BTMAP_BEGIN: %d\n", - FIX_BTMAP_BEGIN); - } - early_ioremap_setup(); } - -void __init __early_set_fixmap(enum fixed_addresses idx, - phys_addr_t phys, pgprot_t flags) -{ - unsigned long addr = __fix_to_virt(idx); - pte_t *pte; - - if (idx >= __end_of_fixed_addresses) { - BUG(); - return; - } - - pte = early_ioremap_pte(addr); - - if (pgprot_val(flags)) - set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags)); - else { - pte_clear(&init_mm, addr, pte); - flush_tlb_kernel_range(addr, addr+PAGE_SIZE); - } -} diff --git a/arch/arm64/mm/mm.h b/arch/arm64/mm/mm.h index d519f4f50c8c..ef47d99b5cbc 100644 --- a/arch/arm64/mm/mm.h +++ b/arch/arm64/mm/mm.h @@ -1,2 +1,3 @@ extern void __init bootmem_init(void); -extern void __init arm64_swiotlb_init(void); + +void fixup_init(void); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index f4f8b500f74c..e83d30e544cc 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -26,8 +26,10 @@ #include <linux/memblock.h> #include <linux/fs.h> #include <linux/io.h> +#include <linux/stop_machine.h> #include <asm/cputype.h> +#include <asm/fixmap.h> #include <asm/sections.h> #include <asm/setup.h> #include <asm/sizes.h> @@ -132,21 +134,43 @@ EXPORT_SYMBOL(phys_mem_access_prot); static void __init *early_alloc(unsigned long sz) { void *ptr = __va(memblock_alloc(sz, sz)); + BUG_ON(!ptr); memset(ptr, 0, sz); return ptr; } -static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr, +/* + * remap a PMD into pages + */ +static void split_pmd(pmd_t *pmd, pte_t *pte) +{ + unsigned long pfn = pmd_pfn(*pmd); + int i = 0; + + do { + /* + * Need to have the least restrictive permissions available + * permissions will be fixed up later + */ + set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); + pfn++; + } while (pte++, i++, i < PTRS_PER_PTE); +} + +static void alloc_init_pte(pmd_t *pmd, unsigned long addr, unsigned long end, unsigned long pfn, - pgprot_t prot) + pgprot_t prot, + void *(*alloc)(unsigned long size)) { pte_t *pte; - if (pmd_none(*pmd)) { - pte = early_alloc(PTRS_PER_PTE * sizeof(pte_t)); + if (pmd_none(*pmd) || pmd_bad(*pmd)) { + pte = alloc(PTRS_PER_PTE * sizeof(pte_t)); + if (pmd_sect(*pmd)) + split_pmd(pmd, pte); __pmd_populate(pmd, __pa(pte), PMD_TYPE_TABLE); + flush_tlb_all(); } - BUG_ON(pmd_bad(*pmd)); pte = pte_offset_kernel(pmd, addr); do { @@ -155,29 +179,40 @@ static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr, } while (pte++, addr += PAGE_SIZE, addr != end); } -static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, - unsigned long end, phys_addr_t phys, - int map_io) +void split_pud(pud_t *old_pud, pmd_t *pmd) +{ + unsigned long addr = pud_pfn(*old_pud) << PAGE_SHIFT; + pgprot_t prot = __pgprot(pud_val(*old_pud) ^ addr); + int i = 0; + + do { + set_pmd(pmd, __pmd(addr | prot)); + addr += PMD_SIZE; + } while (pmd++, i++, i < PTRS_PER_PMD); +} + +static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, + unsigned long addr, unsigned long end, + phys_addr_t phys, pgprot_t prot, + void *(*alloc)(unsigned long size)) { pmd_t *pmd; unsigned long next; - pmdval_t prot_sect; - pgprot_t prot_pte; - - if (map_io) { - prot_sect = PROT_SECT_DEVICE_nGnRE; - prot_pte = __pgprot(PROT_DEVICE_nGnRE); - } else { - prot_sect = PROT_SECT_NORMAL_EXEC; - prot_pte = PAGE_KERNEL_EXEC; - } /* * Check for initial section mappings in the pgd/pud and remove them. */ if (pud_none(*pud) || pud_bad(*pud)) { - pmd = early_alloc(PTRS_PER_PMD * sizeof(pmd_t)); - pud_populate(&init_mm, pud, pmd); + pmd = alloc(PTRS_PER_PMD * sizeof(pmd_t)); + if (pud_sect(*pud)) { + /* + * need to have the 1G of mappings continue to be + * present + */ + split_pud(pud, pmd); + } + pud_populate(mm, pud, pmd); + flush_tlb_all(); } pmd = pmd_offset(pud, addr); @@ -186,7 +221,8 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, /* try section mapping first */ if (((addr | next | phys) & ~SECTION_MASK) == 0) { pmd_t old_pmd =*pmd; - set_pmd(pmd, __pmd(phys | prot_sect)); + set_pmd(pmd, __pmd(phys | + pgprot_val(mk_sect_prot(prot)))); /* * Check for previous table entries created during * boot (__create_page_tables) and flush them. @@ -195,22 +231,35 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, flush_tlb_all(); } else { alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys), - prot_pte); + prot, alloc); } phys += next - addr; } while (pmd++, addr = next, addr != end); } -static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, - unsigned long end, phys_addr_t phys, - int map_io) +static inline bool use_1G_block(unsigned long addr, unsigned long next, + unsigned long phys) +{ + if (PAGE_SHIFT != 12) + return false; + + if (((addr | next | phys) & ~PUD_MASK) != 0) + return false; + + return true; +} + +static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, + unsigned long addr, unsigned long end, + phys_addr_t phys, pgprot_t prot, + void *(*alloc)(unsigned long size)) { pud_t *pud; unsigned long next; if (pgd_none(*pgd)) { - pud = early_alloc(PTRS_PER_PUD * sizeof(pud_t)); - pgd_populate(&init_mm, pgd, pud); + pud = alloc(PTRS_PER_PUD * sizeof(pud_t)); + pgd_populate(mm, pgd, pud); } BUG_ON(pgd_bad(*pgd)); @@ -221,10 +270,10 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, /* * For 4K granule only, attempt to put down a 1GB block */ - if (!map_io && (PAGE_SHIFT == 12) && - ((addr | next | phys) & ~PUD_MASK) == 0) { + if (use_1G_block(addr, next, phys)) { pud_t old_pud = *pud; - set_pud(pud, __pud(phys | PROT_SECT_NORMAL_EXEC)); + set_pud(pud, __pud(phys | + pgprot_val(mk_sect_prot(prot)))); /* * If we have an old value for a pud, it will @@ -239,7 +288,7 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, flush_tlb_all(); } } else { - alloc_init_pmd(pud, addr, next, phys, map_io); + alloc_init_pmd(mm, pud, addr, next, phys, prot, alloc); } phys += next - addr; } while (pud++, addr = next, addr != end); @@ -249,9 +298,10 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, * Create the page directory entries and any necessary page tables for the * mapping specified by 'md'. */ -static void __init __create_mapping(pgd_t *pgd, phys_addr_t phys, - unsigned long virt, phys_addr_t size, - int map_io) +static void __create_mapping(struct mm_struct *mm, pgd_t *pgd, + phys_addr_t phys, unsigned long virt, + phys_addr_t size, pgprot_t prot, + void *(*alloc)(unsigned long size)) { unsigned long addr, length, end, next; @@ -261,32 +311,96 @@ static void __init __create_mapping(pgd_t *pgd, phys_addr_t phys, end = addr + length; do { next = pgd_addr_end(addr, end); - alloc_init_pud(pgd, addr, next, phys, map_io); + alloc_init_pud(mm, pgd, addr, next, phys, prot, alloc); phys += next - addr; } while (pgd++, addr = next, addr != end); } -static void __init create_mapping(phys_addr_t phys, unsigned long virt, - phys_addr_t size) +static void *late_alloc(unsigned long size) +{ + void *ptr; + + BUG_ON(size > PAGE_SIZE); + ptr = (void *)__get_free_page(PGALLOC_GFP); + BUG_ON(!ptr); + return ptr; +} + +static void __ref create_mapping(phys_addr_t phys, unsigned long virt, + phys_addr_t size, pgprot_t prot) { if (virt < VMALLOC_START) { pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n", &phys, virt); return; } - __create_mapping(pgd_offset_k(virt & PAGE_MASK), phys, virt, size, 0); + __create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK), phys, virt, + size, prot, early_alloc); +} + +void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, + unsigned long virt, phys_addr_t size, + pgprot_t prot) +{ + __create_mapping(mm, pgd_offset(mm, virt), phys, virt, size, prot, + early_alloc); } -void __init create_id_mapping(phys_addr_t addr, phys_addr_t size, int map_io) +static void create_mapping_late(phys_addr_t phys, unsigned long virt, + phys_addr_t size, pgprot_t prot) { - if ((addr >> PGDIR_SHIFT) >= ARRAY_SIZE(idmap_pg_dir)) { - pr_warn("BUG: not creating id mapping for %pa\n", &addr); + if (virt < VMALLOC_START) { + pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n", + &phys, virt); return; } - __create_mapping(&idmap_pg_dir[pgd_index(addr)], - addr, addr, size, map_io); + + return __create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK), + phys, virt, size, prot, late_alloc); } +#ifdef CONFIG_DEBUG_RODATA +static void __init __map_memblock(phys_addr_t start, phys_addr_t end) +{ + /* + * Set up the executable regions using the existing section mappings + * for now. This will get more fine grained later once all memory + * is mapped + */ + unsigned long kernel_x_start = round_down(__pa(_stext), SECTION_SIZE); + unsigned long kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE); + + if (end < kernel_x_start) { + create_mapping(start, __phys_to_virt(start), + end - start, PAGE_KERNEL); + } else if (start >= kernel_x_end) { + create_mapping(start, __phys_to_virt(start), + end - start, PAGE_KERNEL); + } else { + if (start < kernel_x_start) + create_mapping(start, __phys_to_virt(start), + kernel_x_start - start, + PAGE_KERNEL); + create_mapping(kernel_x_start, + __phys_to_virt(kernel_x_start), + kernel_x_end - kernel_x_start, + PAGE_KERNEL_EXEC); + if (kernel_x_end < end) + create_mapping(kernel_x_end, + __phys_to_virt(kernel_x_end), + end - kernel_x_end, + PAGE_KERNEL); + } + +} +#else +static void __init __map_memblock(phys_addr_t start, phys_addr_t end) +{ + create_mapping(start, __phys_to_virt(start), end - start, + PAGE_KERNEL_EXEC); +} +#endif + static void __init map_mem(void) { struct memblock_region *reg; @@ -331,14 +445,53 @@ static void __init map_mem(void) memblock_set_current_limit(limit); } #endif - - create_mapping(start, __phys_to_virt(start), end - start); + __map_memblock(start, end); } /* Limit no longer required. */ memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); } +void __init fixup_executable(void) +{ +#ifdef CONFIG_DEBUG_RODATA + /* now that we are actually fully mapped, make the start/end more fine grained */ + if (!IS_ALIGNED((unsigned long)_stext, SECTION_SIZE)) { + unsigned long aligned_start = round_down(__pa(_stext), + SECTION_SIZE); + + create_mapping(aligned_start, __phys_to_virt(aligned_start), + __pa(_stext) - aligned_start, + PAGE_KERNEL); + } + + if (!IS_ALIGNED((unsigned long)__init_end, SECTION_SIZE)) { + unsigned long aligned_end = round_up(__pa(__init_end), + SECTION_SIZE); + create_mapping(__pa(__init_end), (unsigned long)__init_end, + aligned_end - __pa(__init_end), + PAGE_KERNEL); + } +#endif +} + +#ifdef CONFIG_DEBUG_RODATA +void mark_rodata_ro(void) +{ + create_mapping_late(__pa(_stext), (unsigned long)_stext, + (unsigned long)_etext - (unsigned long)_stext, + PAGE_KERNEL_EXEC | PTE_RDONLY); + +} +#endif + +void fixup_init(void) +{ + create_mapping_late(__pa(__init_begin), (unsigned long)__init_begin, + (unsigned long)__init_end - (unsigned long)__init_begin, + PAGE_KERNEL); +} + /* * paging_init() sets up the page tables, initialises the zone memory * maps and sets up the zero page. @@ -348,6 +501,7 @@ void __init paging_init(void) void *zero_page; map_mem(); + fixup_executable(); /* * Finally flush the caches and tlb to ensure that we're in a @@ -463,3 +617,96 @@ void vmemmap_free(unsigned long start, unsigned long end) { } #endif /* CONFIG_SPARSEMEM_VMEMMAP */ + +static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; +#if CONFIG_ARM64_PGTABLE_LEVELS > 2 +static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss; +#endif +#if CONFIG_ARM64_PGTABLE_LEVELS > 3 +static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss; +#endif + +static inline pud_t * fixmap_pud(unsigned long addr) +{ + pgd_t *pgd = pgd_offset_k(addr); + + BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd)); + + return pud_offset(pgd, addr); +} + +static inline pmd_t * fixmap_pmd(unsigned long addr) +{ + pud_t *pud = fixmap_pud(addr); + + BUG_ON(pud_none(*pud) || pud_bad(*pud)); + + return pmd_offset(pud, addr); +} + +static inline pte_t * fixmap_pte(unsigned long addr) +{ + pmd_t *pmd = fixmap_pmd(addr); + + BUG_ON(pmd_none(*pmd) || pmd_bad(*pmd)); + + return pte_offset_kernel(pmd, addr); +} + +void __init early_fixmap_init(void) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + unsigned long addr = FIXADDR_START; + + pgd = pgd_offset_k(addr); + pgd_populate(&init_mm, pgd, bm_pud); + pud = pud_offset(pgd, addr); + pud_populate(&init_mm, pud, bm_pmd); + pmd = pmd_offset(pud, addr); + pmd_populate_kernel(&init_mm, pmd, bm_pte); + + /* + * The boot-ioremap range spans multiple pmds, for which + * we are not preparted: + */ + BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT) + != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT)); + + if ((pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN))) + || pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) { + WARN_ON(1); + pr_warn("pmd %p != %p, %p\n", + pmd, fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)), + fixmap_pmd(fix_to_virt(FIX_BTMAP_END))); + pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n", + fix_to_virt(FIX_BTMAP_BEGIN)); + pr_warn("fix_to_virt(FIX_BTMAP_END): %08lx\n", + fix_to_virt(FIX_BTMAP_END)); + + pr_warn("FIX_BTMAP_END: %d\n", FIX_BTMAP_END); + pr_warn("FIX_BTMAP_BEGIN: %d\n", FIX_BTMAP_BEGIN); + } +} + +void __set_fixmap(enum fixed_addresses idx, + phys_addr_t phys, pgprot_t flags) +{ + unsigned long addr = __fix_to_virt(idx); + pte_t *pte; + + if (idx >= __end_of_fixed_addresses) { + BUG(); + return; + } + + pte = fixmap_pte(addr); + + if (pgprot_val(flags)) { + set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags)); + } else { + pte_clear(&init_mm, addr, pte); + flush_tlb_kernel_range(addr, addr+PAGE_SIZE); + } +} diff --git a/arch/x86/include/asm/idle.h b/arch/x86/include/asm/idle.h index c5d1785373ed..02bab09707f2 100644 --- a/arch/x86/include/asm/idle.h +++ b/arch/x86/include/asm/idle.h @@ -1,13 +1,6 @@ #ifndef _ASM_X86_IDLE_H #define _ASM_X86_IDLE_H -#define IDLE_START 1 -#define IDLE_END 2 - -struct notifier_block; -void idle_notifier_register(struct notifier_block *n); -void idle_notifier_unregister(struct notifier_block *n); - #ifdef CONFIG_X86_64 void enter_idle(void); void exit_idle(void); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index a388bb883128..0885df57ce7a 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -42,19 +42,6 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS; #ifdef CONFIG_X86_64 static DEFINE_PER_CPU(unsigned char, is_idle); -static ATOMIC_NOTIFIER_HEAD(idle_notifier); - -void idle_notifier_register(struct notifier_block *n) -{ - atomic_notifier_chain_register(&idle_notifier, n); -} -EXPORT_SYMBOL_GPL(idle_notifier_register); - -void idle_notifier_unregister(struct notifier_block *n) -{ - atomic_notifier_chain_unregister(&idle_notifier, n); -} -EXPORT_SYMBOL_GPL(idle_notifier_unregister); #endif struct kmem_cache *task_xstate_cachep; @@ -262,14 +249,14 @@ static inline void play_dead(void) void enter_idle(void) { this_cpu_write(is_idle, 1); - atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); + idle_notifier_call_chain(IDLE_START); } static void __exit_idle(void) { if (x86_test_and_clear_bit_percpu(0, is_idle) == 0) return; - atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); + idle_notifier_call_chain(IDLE_END); } /* Called from interrupts to signify idle end */ |