aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Hilman <khilman@linaro.org>2015-10-20 11:26:33 -0700
committerKevin Hilman <khilman@linaro.org>2015-10-20 11:26:33 -0700
commit09147a74376428080eddfed0cbce2cab205c2508 (patch)
tree7c903b82831eb03498dfc89c615e2acd5e1d9d6f
parente4bec9707eb7446e0cd1961f91e6948819327933 (diff)
parentc2fab09b2ec32f252029757ba3353ea3d308c811 (diff)
Merge branch 'test/linux-linaro-lsk-v3.18-android' of git://android.git.linaro.org/kernel/linaro-android into linux-linaro-lsk-v3.18-android
-rw-r--r--Documentation/arm/memory.txt2
-rw-r--r--Documentation/device-mapper/verity.txt12
-rw-r--r--Documentation/filesystems/squashfs.txt8
-rw-r--r--Documentation/networking/ip-sysctl.txt7
-rw-r--r--arch/arm/crypto/Makefile8
-rw-r--r--arch/arm/crypto/sha256-armv4.pl716
-rw-r--r--arch/arm/crypto/sha256-core.S_shipped2808
-rw-r--r--arch/arm/crypto/sha256_glue.c246
-rw-r--r--arch/arm/crypto/sha256_glue.h23
-rw-r--r--arch/arm/crypto/sha256_neon_glue.c172
-rw-r--r--arch/arm/include/asm/cacheflush.h10
-rw-r--r--arch/arm/include/asm/fixmap.h31
-rw-r--r--arch/arm/kernel/Makefile2
-rw-r--r--arch/arm/kernel/ftrace.c19
-rw-r--r--arch/arm/kernel/jump_label.c2
-rw-r--r--arch/arm/kernel/kgdb.c31
-rw-r--r--arch/arm/kernel/machine_kexec.c9
-rw-r--r--arch/arm/kernel/patch.c92
-rw-r--r--arch/arm/kernel/patch.h12
-rw-r--r--arch/arm/kernel/vmlinux.lds.S21
-rw-r--r--arch/arm/mm/Kconfig21
-rw-r--r--arch/arm/mm/highmem.c15
-rw-r--r--arch/arm/mm/init.c149
-rw-r--r--arch/arm/mm/mmu.c39
-rw-r--r--arch/arm64/Kconfig11
-rw-r--r--arch/arm64/Kconfig.debug35
-rw-r--r--arch/arm64/include/asm/cacheflush.h5
-rw-r--r--arch/arm64/include/asm/dmi.h31
-rw-r--r--arch/arm64/include/asm/efi.h38
-rw-r--r--arch/arm64/include/asm/fixmap.h9
-rw-r--r--arch/arm64/include/asm/irq.h1
-rw-r--r--arch/arm64/include/asm/mmu.h5
-rw-r--r--arch/arm64/include/asm/pgtable.h5
-rw-r--r--arch/arm64/kernel/efi-entry.S3
-rw-r--r--arch/arm64/kernel/efi.c400
-rw-r--r--arch/arm64/kernel/entry.S6
-rw-r--r--arch/arm64/kernel/head.S434
-rw-r--r--arch/arm64/kernel/insn.c47
-rw-r--r--arch/arm64/kernel/irq.c2
-rw-r--r--arch/arm64/kernel/jump_label.c23
-rw-r--r--arch/arm64/kernel/setup.c3
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S34
-rw-r--r--arch/arm64/mm/Makefile1
-rw-r--r--arch/arm64/mm/dma-mapping.c2
-rw-r--r--arch/arm64/mm/dump.c332
-rw-r--r--arch/arm64/mm/fault.c1
-rw-r--r--arch/arm64/mm/init.c1
-rw-r--r--arch/arm64/mm/ioremap.c93
-rw-r--r--arch/arm64/mm/mm.h3
-rw-r--r--arch/arm64/mm/mmu.c337
-rw-r--r--crypto/Kconfig8
-rw-r--r--drivers/firmware/dmi_scan.c79
-rw-r--r--drivers/firmware/efi/efi.c60
-rw-r--r--drivers/firmware/efi/libstub/arm-stub.c70
-rw-r--r--drivers/firmware/efi/libstub/efi-stub-helper.c25
-rw-r--r--drivers/firmware/efi/libstub/efistub.h4
-rw-r--r--drivers/firmware/efi/libstub/fdt.c62
-rw-r--r--drivers/md/dm-verity.c102
-rw-r--r--drivers/staging/android/lowmemorykiller.c12
-rw-r--r--drivers/staging/android/trace/lowmemorykiller.h41
-rw-r--r--drivers/usb/gadget/udc/udc-core.c10
-rw-r--r--drivers/xen/efi.c1
-rw-r--r--fs/squashfs/Kconfig15
-rw-r--r--fs/squashfs/Makefile1
-rw-r--r--fs/squashfs/decompressor.c7
-rw-r--r--fs/squashfs/decompressor.h4
-rw-r--r--fs/squashfs/lz4_wrapper.c142
-rw-r--r--fs/squashfs/squashfs_fs.h1
-rw-r--r--include/linux/efi.h8
-rw-r--r--include/linux/ipv6.h1
-rw-r--r--include/net/addrconf.h3
-rw-r--r--include/net/route.h3
-rw-r--r--include/uapi/linux/ipv6.h1
-rw-r--r--ipc/msg.c14
-rw-r--r--ipc/shm.c13
-rw-r--r--ipc/util.c8
-rw-r--r--lib/lz4/lz4_decompress.c30
-rw-r--r--net/ipv6/addrconf.c244
-rw-r--r--net/ipv6/ndisc.c4
-rw-r--r--security/selinux/hooks.c3
80 files changed, 6399 insertions, 894 deletions
diff --git a/Documentation/arm/memory.txt b/Documentation/arm/memory.txt
index 38dc06d0a791..4178ebda6e66 100644
--- a/Documentation/arm/memory.txt
+++ b/Documentation/arm/memory.txt
@@ -41,7 +41,7 @@ fffe8000 fffeffff DTCM mapping area for platforms with
fffe0000 fffe7fff ITCM mapping area for platforms with
ITCM mounted inside the CPU.
-ffc00000 ffdfffff Fixmap mapping region. Addresses provided
+ffc00000 ffefffff Fixmap mapping region. Addresses provided
by fix_to_virt() will be located here.
fee00000 feffffff Mapping of PCI I/O space. This is a static
diff --git a/Documentation/device-mapper/verity.txt b/Documentation/device-mapper/verity.txt
index 0075f70cd3f9..ac71f1f63218 100644
--- a/Documentation/device-mapper/verity.txt
+++ b/Documentation/device-mapper/verity.txt
@@ -10,7 +10,7 @@ Construction Parameters
<version> <dev> <hash_dev>
<data_block_size> <hash_block_size>
<num_data_blocks> <hash_start_block>
- <algorithm> <digest> <salt>
+ <algorithm> <digest> <salt> <mode>
<version>
This is the type of the on-disk hash format.
@@ -62,6 +62,16 @@ Construction Parameters
<salt>
The hexadecimal encoding of the salt value.
+<mode>
+ Optional. The mode of operation.
+
+ 0 is the normal mode of operation where a corrupted block will result in an
+ I/O error.
+
+ 1 is logging mode where corrupted blocks are logged and a uevent is sent to
+ notify user space.
+
+
Theory of operation
===================
diff --git a/Documentation/filesystems/squashfs.txt b/Documentation/filesystems/squashfs.txt
index 403c090aca39..e5274f84dc56 100644
--- a/Documentation/filesystems/squashfs.txt
+++ b/Documentation/filesystems/squashfs.txt
@@ -2,10 +2,10 @@ SQUASHFS 4.0 FILESYSTEM
=======================
Squashfs is a compressed read-only filesystem for Linux.
-It uses zlib/lzo/xz compression to compress files, inodes and directories.
-Inodes in the system are very small and all blocks are packed to minimise
-data overhead. Block sizes greater than 4K are supported up to a maximum
-of 1Mbytes (default block size 128K).
+It uses zlib, lz4, lzo, or xz compression to compress files, inodes and
+directories. Inodes in the system are very small and all blocks are packed to
+minimise data overhead. Block sizes greater than 4K are supported up to a
+maximum of 1Mbytes (default block size 128K).
Squashfs is intended for general read-only filesystem use, for archival
use (i.e. in cases where a .tar.gz file may be used), and in constrained
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 64a0fc5985d6..a9e8802b729c 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1378,6 +1378,13 @@ router_solicitations - INTEGER
routers are present.
Default: 3
+use_oif_addrs_only - BOOLEAN
+ When enabled, the candidate source addresses for destinations
+ routed via this interface are restricted to the set of addresses
+ configured on this interface (vis. RFC 6724, section 4).
+
+ Default: false
+
use_tempaddr - INTEGER
Preference for Privacy Extensions (RFC3041).
<= 0 : disable Privacy Extensions
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index b48fa341648d..2cee53b27238 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -6,12 +6,15 @@ obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o
obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o
obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o
obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
+obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o
aes-arm-y := aes-armv4.o aes_glue.o
aes-arm-bs-y := aesbs-core.o aesbs-glue.o
sha1-arm-y := sha1-armv4-large.o sha1_glue.o
sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o
+sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o
+sha256-arm-y := sha256-core.o sha256_glue.o $(sha256-arm-neon-y)
sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o
quiet_cmd_perl = PERL $@
@@ -20,4 +23,7 @@ quiet_cmd_perl = PERL $@
$(src)/aesbs-core.S_shipped: $(src)/bsaes-armv7.pl
$(call cmd,perl)
-.PRECIOUS: $(obj)/aesbs-core.S
+$(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl
+ $(call cmd,perl)
+
+.PRECIOUS: $(obj)/aesbs-core.S $(obj)/sha256-core.S
diff --git a/arch/arm/crypto/sha256-armv4.pl b/arch/arm/crypto/sha256-armv4.pl
new file mode 100644
index 000000000000..fac0533ea633
--- /dev/null
+++ b/arch/arm/crypto/sha256-armv4.pl
@@ -0,0 +1,716 @@
+#!/usr/bin/env perl
+
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+#
+# Permission to use under GPL terms is granted.
+# ====================================================================
+
+# SHA256 block procedure for ARMv4. May 2007.
+
+# Performance is ~2x better than gcc 3.4 generated code and in "abso-
+# lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
+# byte [on single-issue Xscale PXA250 core].
+
+# July 2010.
+#
+# Rescheduling for dual-issue pipeline resulted in 22% improvement on
+# Cortex A8 core and ~20 cycles per processed byte.
+
+# February 2011.
+#
+# Profiler-assisted and platform-specific optimization resulted in 16%
+# improvement on Cortex A8 core and ~15.4 cycles per processed byte.
+
+# September 2013.
+#
+# Add NEON implementation. On Cortex A8 it was measured to process one
+# byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
+# S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
+# code (meaning that latter performs sub-optimally, nothing was done
+# about it).
+
+# May 2014.
+#
+# Add ARMv8 code path performing at 2.0 cpb on Apple A7.
+
+while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
+open STDOUT,">$output";
+
+$ctx="r0"; $t0="r0";
+$inp="r1"; $t4="r1";
+$len="r2"; $t1="r2";
+$T1="r3"; $t3="r3";
+$A="r4";
+$B="r5";
+$C="r6";
+$D="r7";
+$E="r8";
+$F="r9";
+$G="r10";
+$H="r11";
+@V=($A,$B,$C,$D,$E,$F,$G,$H);
+$t2="r12";
+$Ktbl="r14";
+
+@Sigma0=( 2,13,22);
+@Sigma1=( 6,11,25);
+@sigma0=( 7,18, 3);
+@sigma1=(17,19,10);
+
+sub BODY_00_15 {
+my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
+
+$code.=<<___ if ($i<16);
+#if __ARM_ARCH__>=7
+ @ ldr $t1,[$inp],#4 @ $i
+# if $i==15
+ str $inp,[sp,#17*4] @ make room for $t4
+# endif
+ eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]`
+ add $a,$a,$t2 @ h+=Maj(a,b,c) from the past
+ eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e)
+# ifndef __ARMEB__
+ rev $t1,$t1
+# endif
+#else
+ @ ldrb $t1,[$inp,#3] @ $i
+ add $a,$a,$t2 @ h+=Maj(a,b,c) from the past
+ ldrb $t2,[$inp,#2]
+ ldrb $t0,[$inp,#1]
+ orr $t1,$t1,$t2,lsl#8
+ ldrb $t2,[$inp],#4
+ orr $t1,$t1,$t0,lsl#16
+# if $i==15
+ str $inp,[sp,#17*4] @ make room for $t4
+# endif
+ eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]`
+ orr $t1,$t1,$t2,lsl#24
+ eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e)
+#endif
+___
+$code.=<<___;
+ ldr $t2,[$Ktbl],#4 @ *K256++
+ add $h,$h,$t1 @ h+=X[i]
+ str $t1,[sp,#`$i%16`*4]
+ eor $t1,$f,$g
+ add $h,$h,$t0,ror#$Sigma1[0] @ h+=Sigma1(e)
+ and $t1,$t1,$e
+ add $h,$h,$t2 @ h+=K256[i]
+ eor $t1,$t1,$g @ Ch(e,f,g)
+ eor $t0,$a,$a,ror#`$Sigma0[1]-$Sigma0[0]`
+ add $h,$h,$t1 @ h+=Ch(e,f,g)
+#if $i==31
+ and $t2,$t2,#0xff
+ cmp $t2,#0xf2 @ done?
+#endif
+#if $i<15
+# if __ARM_ARCH__>=7
+ ldr $t1,[$inp],#4 @ prefetch
+# else
+ ldrb $t1,[$inp,#3]
+# endif
+ eor $t2,$a,$b @ a^b, b^c in next round
+#else
+ ldr $t1,[sp,#`($i+2)%16`*4] @ from future BODY_16_xx
+ eor $t2,$a,$b @ a^b, b^c in next round
+ ldr $t4,[sp,#`($i+15)%16`*4] @ from future BODY_16_xx
+#endif
+ eor $t0,$t0,$a,ror#`$Sigma0[2]-$Sigma0[0]` @ Sigma0(a)
+ and $t3,$t3,$t2 @ (b^c)&=(a^b)
+ add $d,$d,$h @ d+=h
+ eor $t3,$t3,$b @ Maj(a,b,c)
+ add $h,$h,$t0,ror#$Sigma0[0] @ h+=Sigma0(a)
+ @ add $h,$h,$t3 @ h+=Maj(a,b,c)
+___
+ ($t2,$t3)=($t3,$t2);
+}
+
+sub BODY_16_XX {
+my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
+
+$code.=<<___;
+ @ ldr $t1,[sp,#`($i+1)%16`*4] @ $i
+ @ ldr $t4,[sp,#`($i+14)%16`*4]
+ mov $t0,$t1,ror#$sigma0[0]
+ add $a,$a,$t2 @ h+=Maj(a,b,c) from the past
+ mov $t2,$t4,ror#$sigma1[0]
+ eor $t0,$t0,$t1,ror#$sigma0[1]
+ eor $t2,$t2,$t4,ror#$sigma1[1]
+ eor $t0,$t0,$t1,lsr#$sigma0[2] @ sigma0(X[i+1])
+ ldr $t1,[sp,#`($i+0)%16`*4]
+ eor $t2,$t2,$t4,lsr#$sigma1[2] @ sigma1(X[i+14])
+ ldr $t4,[sp,#`($i+9)%16`*4]
+
+ add $t2,$t2,$t0
+ eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` @ from BODY_00_15
+ add $t1,$t1,$t2
+ eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e)
+ add $t1,$t1,$t4 @ X[i]
+___
+ &BODY_00_15(@_);
+}
+
+$code=<<___;
+#ifndef __KERNEL__
+# include "arm_arch.h"
+#else
+# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+# define __ARM_MAX_ARCH__ 7
+#endif
+
+.text
+#if __ARM_ARCH__<7
+.code 32
+#else
+.syntax unified
+# ifdef __thumb2__
+# define adrl adr
+.thumb
+# else
+.code 32
+# endif
+#endif
+
+.type K256,%object
+.align 5
+K256:
+.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+.word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+.word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+.size K256,.-K256
+.word 0 @ terminator
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+.LOPENSSL_armcap:
+.word OPENSSL_armcap_P-sha256_block_data_order
+#endif
+.align 5
+
+.global sha256_block_data_order
+.type sha256_block_data_order,%function
+sha256_block_data_order:
+#if __ARM_ARCH__<7
+ sub r3,pc,#8 @ sha256_block_data_order
+#else
+ adr r3,sha256_block_data_order
+#endif
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+ ldr r12,.LOPENSSL_armcap
+ ldr r12,[r3,r12] @ OPENSSL_armcap_P
+ tst r12,#ARMV8_SHA256
+ bne .LARMv8
+ tst r12,#ARMV7_NEON
+ bne .LNEON
+#endif
+ add $len,$inp,$len,lsl#6 @ len to point at the end of inp
+ stmdb sp!,{$ctx,$inp,$len,r4-r11,lr}
+ ldmia $ctx,{$A,$B,$C,$D,$E,$F,$G,$H}
+ sub $Ktbl,r3,#256+32 @ K256
+ sub sp,sp,#16*4 @ alloca(X[16])
+.Loop:
+# if __ARM_ARCH__>=7
+ ldr $t1,[$inp],#4
+# else
+ ldrb $t1,[$inp,#3]
+# endif
+ eor $t3,$B,$C @ magic
+ eor $t2,$t2,$t2
+___
+for($i=0;$i<16;$i++) { &BODY_00_15($i,@V); unshift(@V,pop(@V)); }
+$code.=".Lrounds_16_xx:\n";
+for (;$i<32;$i++) { &BODY_16_XX($i,@V); unshift(@V,pop(@V)); }
+$code.=<<___;
+#if __ARM_ARCH__>=7
+ ite eq @ Thumb2 thing, sanity check in ARM
+#endif
+ ldreq $t3,[sp,#16*4] @ pull ctx
+ bne .Lrounds_16_xx
+
+ add $A,$A,$t2 @ h+=Maj(a,b,c) from the past
+ ldr $t0,[$t3,#0]
+ ldr $t1,[$t3,#4]
+ ldr $t2,[$t3,#8]
+ add $A,$A,$t0
+ ldr $t0,[$t3,#12]
+ add $B,$B,$t1
+ ldr $t1,[$t3,#16]
+ add $C,$C,$t2
+ ldr $t2,[$t3,#20]
+ add $D,$D,$t0
+ ldr $t0,[$t3,#24]
+ add $E,$E,$t1
+ ldr $t1,[$t3,#28]
+ add $F,$F,$t2
+ ldr $inp,[sp,#17*4] @ pull inp
+ ldr $t2,[sp,#18*4] @ pull inp+len
+ add $G,$G,$t0
+ add $H,$H,$t1
+ stmia $t3,{$A,$B,$C,$D,$E,$F,$G,$H}
+ cmp $inp,$t2
+ sub $Ktbl,$Ktbl,#256 @ rewind Ktbl
+ bne .Loop
+
+ add sp,sp,#`16+3`*4 @ destroy frame
+#if __ARM_ARCH__>=5
+ ldmia sp!,{r4-r11,pc}
+#else
+ ldmia sp!,{r4-r11,lr}
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ bx lr @ interoperable with Thumb ISA:-)
+#endif
+.size sha256_block_data_order,.-sha256_block_data_order
+___
+######################################################################
+# NEON stuff
+#
+{{{
+my @X=map("q$_",(0..3));
+my ($T0,$T1,$T2,$T3,$T4,$T5)=("q8","q9","q10","q11","d24","d25");
+my $Xfer=$t4;
+my $j=0;
+
+sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; }
+sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; }
+
+sub AUTOLOAD() # thunk [simplified] x86-style perlasm
+{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./;
+ my $arg = pop;
+ $arg = "#$arg" if ($arg*1 eq $arg);
+ $code .= "\t$opcode\t".join(',',@_,$arg)."\n";
+}
+
+sub Xupdate()
+{ use integer;
+ my $body = shift;
+ my @insns = (&$body,&$body,&$body,&$body);
+ my ($a,$b,$c,$d,$e,$f,$g,$h);
+
+ &vext_8 ($T0,@X[0],@X[1],4); # X[1..4]
+ eval(shift(@insns));
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vext_8 ($T1,@X[2],@X[3],4); # X[9..12]
+ eval(shift(@insns));
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vshr_u32 ($T2,$T0,$sigma0[0]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += X[9..12]
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vshr_u32 ($T1,$T0,$sigma0[2]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vsli_32 ($T2,$T0,32-$sigma0[0]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vshr_u32 ($T3,$T0,$sigma0[1]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &veor ($T1,$T1,$T2);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vsli_32 ($T3,$T0,32-$sigma0[1]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[0]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &veor ($T1,$T1,$T3); # sigma0(X[1..4])
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[0]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vshr_u32 ($T5,&Dhi(@X[3]),$sigma1[2]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += sigma0(X[1..4])
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &veor ($T5,$T5,$T4);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[1]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[1]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &veor ($T5,$T5,$T4); # sigma1(X[14..15])
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vadd_i32 (&Dlo(@X[0]),&Dlo(@X[0]),$T5);# X[0..1] += sigma1(X[14..15])
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[0]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[0]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vshr_u32 ($T5,&Dlo(@X[0]),$sigma1[2]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &veor ($T5,$T5,$T4);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[1]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vld1_32 ("{$T0}","[$Ktbl,:128]!");
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[1]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &veor ($T5,$T5,$T4); # sigma1(X[16..17])
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vadd_i32 (&Dhi(@X[0]),&Dhi(@X[0]),$T5);# X[2..3] += sigma1(X[16..17])
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vadd_i32 ($T0,$T0,@X[0]);
+ while($#insns>=2) { eval(shift(@insns)); }
+ &vst1_32 ("{$T0}","[$Xfer,:128]!");
+ eval(shift(@insns));
+ eval(shift(@insns));
+
+ push(@X,shift(@X)); # "rotate" X[]
+}
+
+sub Xpreload()
+{ use integer;
+ my $body = shift;
+ my @insns = (&$body,&$body,&$body,&$body);
+ my ($a,$b,$c,$d,$e,$f,$g,$h);
+
+ eval(shift(@insns));
+ eval(shift(@insns));
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vld1_32 ("{$T0}","[$Ktbl,:128]!");
+ eval(shift(@insns));
+ eval(shift(@insns));
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vrev32_8 (@X[0],@X[0]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vadd_i32 ($T0,$T0,@X[0]);
+ foreach (@insns) { eval; } # remaining instructions
+ &vst1_32 ("{$T0}","[$Xfer,:128]!");
+
+ push(@X,shift(@X)); # "rotate" X[]
+}
+
+sub body_00_15 () {
+ (
+ '($a,$b,$c,$d,$e,$f,$g,$h)=@V;'.
+ '&add ($h,$h,$t1)', # h+=X[i]+K[i]
+ '&eor ($t1,$f,$g)',
+ '&eor ($t0,$e,$e,"ror#".($Sigma1[1]-$Sigma1[0]))',
+ '&add ($a,$a,$t2)', # h+=Maj(a,b,c) from the past
+ '&and ($t1,$t1,$e)',
+ '&eor ($t2,$t0,$e,"ror#".($Sigma1[2]-$Sigma1[0]))', # Sigma1(e)
+ '&eor ($t0,$a,$a,"ror#".($Sigma0[1]-$Sigma0[0]))',
+ '&eor ($t1,$t1,$g)', # Ch(e,f,g)
+ '&add ($h,$h,$t2,"ror#$Sigma1[0]")', # h+=Sigma1(e)
+ '&eor ($t2,$a,$b)', # a^b, b^c in next round
+ '&eor ($t0,$t0,$a,"ror#".($Sigma0[2]-$Sigma0[0]))', # Sigma0(a)
+ '&add ($h,$h,$t1)', # h+=Ch(e,f,g)
+ '&ldr ($t1,sprintf "[sp,#%d]",4*(($j+1)&15)) if (($j&15)!=15);'.
+ '&ldr ($t1,"[$Ktbl]") if ($j==15);'.
+ '&ldr ($t1,"[sp,#64]") if ($j==31)',
+ '&and ($t3,$t3,$t2)', # (b^c)&=(a^b)
+ '&add ($d,$d,$h)', # d+=h
+ '&add ($h,$h,$t0,"ror#$Sigma0[0]");'. # h+=Sigma0(a)
+ '&eor ($t3,$t3,$b)', # Maj(a,b,c)
+ '$j++; unshift(@V,pop(@V)); ($t2,$t3)=($t3,$t2);'
+ )
+}
+
+$code.=<<___;
+#if __ARM_MAX_ARCH__>=7
+.arch armv7-a
+.fpu neon
+
+.global sha256_block_data_order_neon
+.type sha256_block_data_order_neon,%function
+.align 4
+sha256_block_data_order_neon:
+.LNEON:
+ stmdb sp!,{r4-r12,lr}
+
+ sub $H,sp,#16*4+16
+ adrl $Ktbl,K256
+ bic $H,$H,#15 @ align for 128-bit stores
+ mov $t2,sp
+ mov sp,$H @ alloca
+ add $len,$inp,$len,lsl#6 @ len to point at the end of inp
+
+ vld1.8 {@X[0]},[$inp]!
+ vld1.8 {@X[1]},[$inp]!
+ vld1.8 {@X[2]},[$inp]!
+ vld1.8 {@X[3]},[$inp]!
+ vld1.32 {$T0},[$Ktbl,:128]!
+ vld1.32 {$T1},[$Ktbl,:128]!
+ vld1.32 {$T2},[$Ktbl,:128]!
+ vld1.32 {$T3},[$Ktbl,:128]!
+ vrev32.8 @X[0],@X[0] @ yes, even on
+ str $ctx,[sp,#64]
+ vrev32.8 @X[1],@X[1] @ big-endian
+ str $inp,[sp,#68]
+ mov $Xfer,sp
+ vrev32.8 @X[2],@X[2]
+ str $len,[sp,#72]
+ vrev32.8 @X[3],@X[3]
+ str $t2,[sp,#76] @ save original sp
+ vadd.i32 $T0,$T0,@X[0]
+ vadd.i32 $T1,$T1,@X[1]
+ vst1.32 {$T0},[$Xfer,:128]!
+ vadd.i32 $T2,$T2,@X[2]
+ vst1.32 {$T1},[$Xfer,:128]!
+ vadd.i32 $T3,$T3,@X[3]
+ vst1.32 {$T2},[$Xfer,:128]!
+ vst1.32 {$T3},[$Xfer,:128]!
+
+ ldmia $ctx,{$A-$H}
+ sub $Xfer,$Xfer,#64
+ ldr $t1,[sp,#0]
+ eor $t2,$t2,$t2
+ eor $t3,$B,$C
+ b .L_00_48
+
+.align 4
+.L_00_48:
+___
+ &Xupdate(\&body_00_15);
+ &Xupdate(\&body_00_15);
+ &Xupdate(\&body_00_15);
+ &Xupdate(\&body_00_15);
+$code.=<<___;
+ teq $t1,#0 @ check for K256 terminator
+ ldr $t1,[sp,#0]
+ sub $Xfer,$Xfer,#64
+ bne .L_00_48
+
+ ldr $inp,[sp,#68]
+ ldr $t0,[sp,#72]
+ sub $Ktbl,$Ktbl,#256 @ rewind $Ktbl
+ teq $inp,$t0
+ it eq
+ subeq $inp,$inp,#64 @ avoid SEGV
+ vld1.8 {@X[0]},[$inp]! @ load next input block
+ vld1.8 {@X[1]},[$inp]!
+ vld1.8 {@X[2]},[$inp]!
+ vld1.8 {@X[3]},[$inp]!
+ it ne
+ strne $inp,[sp,#68]
+ mov $Xfer,sp
+___
+ &Xpreload(\&body_00_15);
+ &Xpreload(\&body_00_15);
+ &Xpreload(\&body_00_15);
+ &Xpreload(\&body_00_15);
+$code.=<<___;
+ ldr $t0,[$t1,#0]
+ add $A,$A,$t2 @ h+=Maj(a,b,c) from the past
+ ldr $t2,[$t1,#4]
+ ldr $t3,[$t1,#8]
+ ldr $t4,[$t1,#12]
+ add $A,$A,$t0 @ accumulate
+ ldr $t0,[$t1,#16]
+ add $B,$B,$t2
+ ldr $t2,[$t1,#20]
+ add $C,$C,$t3
+ ldr $t3,[$t1,#24]
+ add $D,$D,$t4
+ ldr $t4,[$t1,#28]
+ add $E,$E,$t0
+ str $A,[$t1],#4
+ add $F,$F,$t2
+ str $B,[$t1],#4
+ add $G,$G,$t3
+ str $C,[$t1],#4
+ add $H,$H,$t4
+ str $D,[$t1],#4
+ stmia $t1,{$E-$H}
+
+ ittte ne
+ movne $Xfer,sp
+ ldrne $t1,[sp,#0]
+ eorne $t2,$t2,$t2
+ ldreq sp,[sp,#76] @ restore original sp
+ itt ne
+ eorne $t3,$B,$C
+ bne .L_00_48
+
+ ldmia sp!,{r4-r12,pc}
+.size sha256_block_data_order_neon,.-sha256_block_data_order_neon
+#endif
+___
+}}}
+######################################################################
+# ARMv8 stuff
+#
+{{{
+my ($ABCD,$EFGH,$abcd)=map("q$_",(0..2));
+my @MSG=map("q$_",(8..11));
+my ($W0,$W1,$ABCD_SAVE,$EFGH_SAVE)=map("q$_",(12..15));
+my $Ktbl="r3";
+
+$code.=<<___;
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+
+# ifdef __thumb2__
+# define INST(a,b,c,d) .byte c,d|0xc,a,b
+# else
+# define INST(a,b,c,d) .byte a,b,c,d
+# endif
+
+.type sha256_block_data_order_armv8,%function
+.align 5
+sha256_block_data_order_armv8:
+.LARMv8:
+ vld1.32 {$ABCD,$EFGH},[$ctx]
+# ifdef __thumb2__
+ adr $Ktbl,.LARMv8
+ sub $Ktbl,$Ktbl,#.LARMv8-K256
+# else
+ adrl $Ktbl,K256
+# endif
+ add $len,$inp,$len,lsl#6 @ len to point at the end of inp
+
+.Loop_v8:
+ vld1.8 {@MSG[0]-@MSG[1]},[$inp]!
+ vld1.8 {@MSG[2]-@MSG[3]},[$inp]!
+ vld1.32 {$W0},[$Ktbl]!
+ vrev32.8 @MSG[0],@MSG[0]
+ vrev32.8 @MSG[1],@MSG[1]
+ vrev32.8 @MSG[2],@MSG[2]
+ vrev32.8 @MSG[3],@MSG[3]
+ vmov $ABCD_SAVE,$ABCD @ offload
+ vmov $EFGH_SAVE,$EFGH
+ teq $inp,$len
+___
+for($i=0;$i<12;$i++) {
+$code.=<<___;
+ vld1.32 {$W1},[$Ktbl]!
+ vadd.i32 $W0,$W0,@MSG[0]
+ sha256su0 @MSG[0],@MSG[1]
+ vmov $abcd,$ABCD
+ sha256h $ABCD,$EFGH,$W0
+ sha256h2 $EFGH,$abcd,$W0
+ sha256su1 @MSG[0],@MSG[2],@MSG[3]
+___
+ ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG));
+}
+$code.=<<___;
+ vld1.32 {$W1},[$Ktbl]!
+ vadd.i32 $W0,$W0,@MSG[0]
+ vmov $abcd,$ABCD
+ sha256h $ABCD,$EFGH,$W0
+ sha256h2 $EFGH,$abcd,$W0
+
+ vld1.32 {$W0},[$Ktbl]!
+ vadd.i32 $W1,$W1,@MSG[1]
+ vmov $abcd,$ABCD
+ sha256h $ABCD,$EFGH,$W1
+ sha256h2 $EFGH,$abcd,$W1
+
+ vld1.32 {$W1},[$Ktbl]
+ vadd.i32 $W0,$W0,@MSG[2]
+ sub $Ktbl,$Ktbl,#256-16 @ rewind
+ vmov $abcd,$ABCD
+ sha256h $ABCD,$EFGH,$W0
+ sha256h2 $EFGH,$abcd,$W0
+
+ vadd.i32 $W1,$W1,@MSG[3]
+ vmov $abcd,$ABCD
+ sha256h $ABCD,$EFGH,$W1
+ sha256h2 $EFGH,$abcd,$W1
+
+ vadd.i32 $ABCD,$ABCD,$ABCD_SAVE
+ vadd.i32 $EFGH,$EFGH,$EFGH_SAVE
+ it ne
+ bne .Loop_v8
+
+ vst1.32 {$ABCD,$EFGH},[$ctx]
+
+ ret @ bx lr
+.size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
+#endif
+___
+}}}
+$code.=<<___;
+.asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
+.align 2
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+.comm OPENSSL_armcap_P,4,4
+#endif
+___
+
+open SELF,$0;
+while(<SELF>) {
+ next if (/^#!/);
+ last if (!s/^#/@/ and !/^$/);
+ print;
+}
+close SELF;
+
+{ my %opcode = (
+ "sha256h" => 0xf3000c40, "sha256h2" => 0xf3100c40,
+ "sha256su0" => 0xf3ba03c0, "sha256su1" => 0xf3200c40 );
+
+ sub unsha256 {
+ my ($mnemonic,$arg)=@_;
+
+ if ($arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o) {
+ my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19)
+ |(($2&7)<<17)|(($2&8)<<4)
+ |(($3&7)<<1) |(($3&8)<<2);
+ # since ARMv7 instructions are always encoded little-endian.
+ # correct solution is to use .inst directive, but older
+ # assemblers don't implement it:-(
+ sprintf "INST(0x%02x,0x%02x,0x%02x,0x%02x)\t@ %s %s",
+ $word&0xff,($word>>8)&0xff,
+ ($word>>16)&0xff,($word>>24)&0xff,
+ $mnemonic,$arg;
+ }
+ }
+}
+
+foreach (split($/,$code)) {
+
+ s/\`([^\`]*)\`/eval $1/geo;
+
+ s/\b(sha256\w+)\s+(q.*)/unsha256($1,$2)/geo;
+
+ s/\bret\b/bx lr/go or
+ s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4
+
+ print $_,"\n";
+}
+
+close STDOUT; # enforce flush
diff --git a/arch/arm/crypto/sha256-core.S_shipped b/arch/arm/crypto/sha256-core.S_shipped
new file mode 100644
index 000000000000..555a1a8eec90
--- /dev/null
+++ b/arch/arm/crypto/sha256-core.S_shipped
@@ -0,0 +1,2808 @@
+
+@ ====================================================================
+@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+@ project. The module is, however, dual licensed under OpenSSL and
+@ CRYPTOGAMS licenses depending on where you obtain it. For further
+@ details see http://www.openssl.org/~appro/cryptogams/.
+@
+@ Permission to use under GPL terms is granted.
+@ ====================================================================
+
+@ SHA256 block procedure for ARMv4. May 2007.
+
+@ Performance is ~2x better than gcc 3.4 generated code and in "abso-
+@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
+@ byte [on single-issue Xscale PXA250 core].
+
+@ July 2010.
+@
+@ Rescheduling for dual-issue pipeline resulted in 22% improvement on
+@ Cortex A8 core and ~20 cycles per processed byte.
+
+@ February 2011.
+@
+@ Profiler-assisted and platform-specific optimization resulted in 16%
+@ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
+
+@ September 2013.
+@
+@ Add NEON implementation. On Cortex A8 it was measured to process one
+@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
+@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
+@ code (meaning that latter performs sub-optimally, nothing was done
+@ about it).
+
+@ May 2014.
+@
+@ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
+
+#ifndef __KERNEL__
+# include "arm_arch.h"
+#else
+# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+# define __ARM_MAX_ARCH__ 7
+#endif
+
+.text
+#if __ARM_ARCH__<7
+.code 32
+#else
+.syntax unified
+# ifdef __thumb2__
+# define adrl adr
+.thumb
+# else
+.code 32
+# endif
+#endif
+
+.type K256,%object
+.align 5
+K256:
+.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+.word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+.word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+.size K256,.-K256
+.word 0 @ terminator
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+.LOPENSSL_armcap:
+.word OPENSSL_armcap_P-sha256_block_data_order
+#endif
+.align 5
+
+.global sha256_block_data_order
+.type sha256_block_data_order,%function
+sha256_block_data_order:
+#if __ARM_ARCH__<7
+ sub r3,pc,#8 @ sha256_block_data_order
+#else
+ adr r3,sha256_block_data_order
+#endif
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+ ldr r12,.LOPENSSL_armcap
+ ldr r12,[r3,r12] @ OPENSSL_armcap_P
+ tst r12,#ARMV8_SHA256
+ bne .LARMv8
+ tst r12,#ARMV7_NEON
+ bne .LNEON
+#endif
+ add r2,r1,r2,lsl#6 @ len to point at the end of inp
+ stmdb sp!,{r0,r1,r2,r4-r11,lr}
+ ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11}
+ sub r14,r3,#256+32 @ K256
+ sub sp,sp,#16*4 @ alloca(X[16])
+.Loop:
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r5,r6 @ magic
+ eor r12,r12,r12
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 0
+# if 0==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r8,r8,ror#5
+ add r4,r4,r12 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r8,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 0
+ add r4,r4,r12 @ h+=Maj(a,b,c) from the past
+ ldrb r12,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r12,lsl#8
+ ldrb r12,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 0==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r8,r8,ror#5
+ orr r2,r2,r12,lsl#24
+ eor r0,r0,r8,ror#19 @ Sigma1(e)
+#endif
+ ldr r12,[r14],#4 @ *K256++
+ add r11,r11,r2 @ h+=X[i]
+ str r2,[sp,#0*4]
+ eor r2,r9,r10
+ add r11,r11,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r8
+ add r11,r11,r12 @ h+=K256[i]
+ eor r2,r2,r10 @ Ch(e,f,g)
+ eor r0,r4,r4,ror#11
+ add r11,r11,r2 @ h+=Ch(e,f,g)
+#if 0==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 0<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r4,r5 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#2*4] @ from future BODY_16_xx
+ eor r12,r4,r5 @ a^b, b^c in next round
+ ldr r1,[sp,#15*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r4,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r7,r7,r11 @ d+=h
+ eor r3,r3,r5 @ Maj(a,b,c)
+ add r11,r11,r0,ror#2 @ h+=Sigma0(a)
+ @ add r11,r11,r3 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 1
+# if 1==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r7,r7,ror#5
+ add r11,r11,r3 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r7,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 1
+ add r11,r11,r3 @ h+=Maj(a,b,c) from the past
+ ldrb r3,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r3,lsl#8
+ ldrb r3,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 1==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r7,r7,ror#5
+ orr r2,r2,r3,lsl#24
+ eor r0,r0,r7,ror#19 @ Sigma1(e)
+#endif
+ ldr r3,[r14],#4 @ *K256++
+ add r10,r10,r2 @ h+=X[i]
+ str r2,[sp,#1*4]
+ eor r2,r8,r9
+ add r10,r10,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r7
+ add r10,r10,r3 @ h+=K256[i]
+ eor r2,r2,r9 @ Ch(e,f,g)
+ eor r0,r11,r11,ror#11
+ add r10,r10,r2 @ h+=Ch(e,f,g)
+#if 1==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 1<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r11,r4 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#3*4] @ from future BODY_16_xx
+ eor r3,r11,r4 @ a^b, b^c in next round
+ ldr r1,[sp,#0*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r11,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r6,r6,r10 @ d+=h
+ eor r12,r12,r4 @ Maj(a,b,c)
+ add r10,r10,r0,ror#2 @ h+=Sigma0(a)
+ @ add r10,r10,r12 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 2
+# if 2==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r6,r6,ror#5
+ add r10,r10,r12 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r6,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 2
+ add r10,r10,r12 @ h+=Maj(a,b,c) from the past
+ ldrb r12,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r12,lsl#8
+ ldrb r12,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 2==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r6,r6,ror#5
+ orr r2,r2,r12,lsl#24
+ eor r0,r0,r6,ror#19 @ Sigma1(e)
+#endif
+ ldr r12,[r14],#4 @ *K256++
+ add r9,r9,r2 @ h+=X[i]
+ str r2,[sp,#2*4]
+ eor r2,r7,r8
+ add r9,r9,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r6
+ add r9,r9,r12 @ h+=K256[i]
+ eor r2,r2,r8 @ Ch(e,f,g)
+ eor r0,r10,r10,ror#11
+ add r9,r9,r2 @ h+=Ch(e,f,g)
+#if 2==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 2<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r10,r11 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#4*4] @ from future BODY_16_xx
+ eor r12,r10,r11 @ a^b, b^c in next round
+ ldr r1,[sp,#1*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r10,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r5,r5,r9 @ d+=h
+ eor r3,r3,r11 @ Maj(a,b,c)
+ add r9,r9,r0,ror#2 @ h+=Sigma0(a)
+ @ add r9,r9,r3 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 3
+# if 3==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r5,r5,ror#5
+ add r9,r9,r3 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r5,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 3
+ add r9,r9,r3 @ h+=Maj(a,b,c) from the past
+ ldrb r3,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r3,lsl#8
+ ldrb r3,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 3==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r5,r5,ror#5
+ orr r2,r2,r3,lsl#24
+ eor r0,r0,r5,ror#19 @ Sigma1(e)
+#endif
+ ldr r3,[r14],#4 @ *K256++
+ add r8,r8,r2 @ h+=X[i]
+ str r2,[sp,#3*4]
+ eor r2,r6,r7
+ add r8,r8,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r5
+ add r8,r8,r3 @ h+=K256[i]
+ eor r2,r2,r7 @ Ch(e,f,g)
+ eor r0,r9,r9,ror#11
+ add r8,r8,r2 @ h+=Ch(e,f,g)
+#if 3==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 3<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r9,r10 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#5*4] @ from future BODY_16_xx
+ eor r3,r9,r10 @ a^b, b^c in next round
+ ldr r1,[sp,#2*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r9,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r4,r4,r8 @ d+=h
+ eor r12,r12,r10 @ Maj(a,b,c)
+ add r8,r8,r0,ror#2 @ h+=Sigma0(a)
+ @ add r8,r8,r12 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 4
+# if 4==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r4,r4,ror#5
+ add r8,r8,r12 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r4,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 4
+ add r8,r8,r12 @ h+=Maj(a,b,c) from the past
+ ldrb r12,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r12,lsl#8
+ ldrb r12,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 4==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r4,r4,ror#5
+ orr r2,r2,r12,lsl#24
+ eor r0,r0,r4,ror#19 @ Sigma1(e)
+#endif
+ ldr r12,[r14],#4 @ *K256++
+ add r7,r7,r2 @ h+=X[i]
+ str r2,[sp,#4*4]
+ eor r2,r5,r6
+ add r7,r7,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r4
+ add r7,r7,r12 @ h+=K256[i]
+ eor r2,r2,r6 @ Ch(e,f,g)
+ eor r0,r8,r8,ror#11
+ add r7,r7,r2 @ h+=Ch(e,f,g)
+#if 4==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 4<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r8,r9 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#6*4] @ from future BODY_16_xx
+ eor r12,r8,r9 @ a^b, b^c in next round
+ ldr r1,[sp,#3*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r8,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r11,r11,r7 @ d+=h
+ eor r3,r3,r9 @ Maj(a,b,c)
+ add r7,r7,r0,ror#2 @ h+=Sigma0(a)
+ @ add r7,r7,r3 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 5
+# if 5==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r11,r11,ror#5
+ add r7,r7,r3 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r11,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 5
+ add r7,r7,r3 @ h+=Maj(a,b,c) from the past
+ ldrb r3,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r3,lsl#8
+ ldrb r3,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 5==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r11,r11,ror#5
+ orr r2,r2,r3,lsl#24
+ eor r0,r0,r11,ror#19 @ Sigma1(e)
+#endif
+ ldr r3,[r14],#4 @ *K256++
+ add r6,r6,r2 @ h+=X[i]
+ str r2,[sp,#5*4]
+ eor r2,r4,r5
+ add r6,r6,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r11
+ add r6,r6,r3 @ h+=K256[i]
+ eor r2,r2,r5 @ Ch(e,f,g)
+ eor r0,r7,r7,ror#11
+ add r6,r6,r2 @ h+=Ch(e,f,g)
+#if 5==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 5<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r7,r8 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#7*4] @ from future BODY_16_xx
+ eor r3,r7,r8 @ a^b, b^c in next round
+ ldr r1,[sp,#4*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r7,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r10,r10,r6 @ d+=h
+ eor r12,r12,r8 @ Maj(a,b,c)
+ add r6,r6,r0,ror#2 @ h+=Sigma0(a)
+ @ add r6,r6,r12 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 6
+# if 6==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r10,r10,ror#5
+ add r6,r6,r12 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r10,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 6
+ add r6,r6,r12 @ h+=Maj(a,b,c) from the past
+ ldrb r12,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r12,lsl#8
+ ldrb r12,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 6==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r10,r10,ror#5
+ orr r2,r2,r12,lsl#24
+ eor r0,r0,r10,ror#19 @ Sigma1(e)
+#endif
+ ldr r12,[r14],#4 @ *K256++
+ add r5,r5,r2 @ h+=X[i]
+ str r2,[sp,#6*4]
+ eor r2,r11,r4
+ add r5,r5,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r10
+ add r5,r5,r12 @ h+=K256[i]
+ eor r2,r2,r4 @ Ch(e,f,g)
+ eor r0,r6,r6,ror#11
+ add r5,r5,r2 @ h+=Ch(e,f,g)
+#if 6==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 6<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r6,r7 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#8*4] @ from future BODY_16_xx
+ eor r12,r6,r7 @ a^b, b^c in next round
+ ldr r1,[sp,#5*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r6,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r9,r9,r5 @ d+=h
+ eor r3,r3,r7 @ Maj(a,b,c)
+ add r5,r5,r0,ror#2 @ h+=Sigma0(a)
+ @ add r5,r5,r3 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 7
+# if 7==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r9,r9,ror#5
+ add r5,r5,r3 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r9,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 7
+ add r5,r5,r3 @ h+=Maj(a,b,c) from the past
+ ldrb r3,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r3,lsl#8
+ ldrb r3,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 7==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r9,r9,ror#5
+ orr r2,r2,r3,lsl#24
+ eor r0,r0,r9,ror#19 @ Sigma1(e)
+#endif
+ ldr r3,[r14],#4 @ *K256++
+ add r4,r4,r2 @ h+=X[i]
+ str r2,[sp,#7*4]
+ eor r2,r10,r11
+ add r4,r4,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r9
+ add r4,r4,r3 @ h+=K256[i]
+ eor r2,r2,r11 @ Ch(e,f,g)
+ eor r0,r5,r5,ror#11
+ add r4,r4,r2 @ h+=Ch(e,f,g)
+#if 7==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 7<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r5,r6 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#9*4] @ from future BODY_16_xx
+ eor r3,r5,r6 @ a^b, b^c in next round
+ ldr r1,[sp,#6*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r5,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r8,r8,r4 @ d+=h
+ eor r12,r12,r6 @ Maj(a,b,c)
+ add r4,r4,r0,ror#2 @ h+=Sigma0(a)
+ @ add r4,r4,r12 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 8
+# if 8==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r8,r8,ror#5
+ add r4,r4,r12 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r8,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 8
+ add r4,r4,r12 @ h+=Maj(a,b,c) from the past
+ ldrb r12,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r12,lsl#8
+ ldrb r12,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 8==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r8,r8,ror#5
+ orr r2,r2,r12,lsl#24
+ eor r0,r0,r8,ror#19 @ Sigma1(e)
+#endif
+ ldr r12,[r14],#4 @ *K256++
+ add r11,r11,r2 @ h+=X[i]
+ str r2,[sp,#8*4]
+ eor r2,r9,r10
+ add r11,r11,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r8
+ add r11,r11,r12 @ h+=K256[i]
+ eor r2,r2,r10 @ Ch(e,f,g)
+ eor r0,r4,r4,ror#11
+ add r11,r11,r2 @ h+=Ch(e,f,g)
+#if 8==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 8<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r4,r5 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#10*4] @ from future BODY_16_xx
+ eor r12,r4,r5 @ a^b, b^c in next round
+ ldr r1,[sp,#7*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r4,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r7,r7,r11 @ d+=h
+ eor r3,r3,r5 @ Maj(a,b,c)
+ add r11,r11,r0,ror#2 @ h+=Sigma0(a)
+ @ add r11,r11,r3 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 9
+# if 9==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r7,r7,ror#5
+ add r11,r11,r3 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r7,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 9
+ add r11,r11,r3 @ h+=Maj(a,b,c) from the past
+ ldrb r3,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r3,lsl#8
+ ldrb r3,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 9==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r7,r7,ror#5
+ orr r2,r2,r3,lsl#24
+ eor r0,r0,r7,ror#19 @ Sigma1(e)
+#endif
+ ldr r3,[r14],#4 @ *K256++
+ add r10,r10,r2 @ h+=X[i]
+ str r2,[sp,#9*4]
+ eor r2,r8,r9
+ add r10,r10,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r7
+ add r10,r10,r3 @ h+=K256[i]
+ eor r2,r2,r9 @ Ch(e,f,g)
+ eor r0,r11,r11,ror#11
+ add r10,r10,r2 @ h+=Ch(e,f,g)
+#if 9==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 9<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r11,r4 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#11*4] @ from future BODY_16_xx
+ eor r3,r11,r4 @ a^b, b^c in next round
+ ldr r1,[sp,#8*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r11,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r6,r6,r10 @ d+=h
+ eor r12,r12,r4 @ Maj(a,b,c)
+ add r10,r10,r0,ror#2 @ h+=Sigma0(a)
+ @ add r10,r10,r12 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 10
+# if 10==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r6,r6,ror#5
+ add r10,r10,r12 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r6,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 10
+ add r10,r10,r12 @ h+=Maj(a,b,c) from the past
+ ldrb r12,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r12,lsl#8
+ ldrb r12,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 10==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r6,r6,ror#5
+ orr r2,r2,r12,lsl#24
+ eor r0,r0,r6,ror#19 @ Sigma1(e)
+#endif
+ ldr r12,[r14],#4 @ *K256++
+ add r9,r9,r2 @ h+=X[i]
+ str r2,[sp,#10*4]
+ eor r2,r7,r8
+ add r9,r9,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r6
+ add r9,r9,r12 @ h+=K256[i]
+ eor r2,r2,r8 @ Ch(e,f,g)
+ eor r0,r10,r10,ror#11
+ add r9,r9,r2 @ h+=Ch(e,f,g)
+#if 10==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 10<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r10,r11 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#12*4] @ from future BODY_16_xx
+ eor r12,r10,r11 @ a^b, b^c in next round
+ ldr r1,[sp,#9*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r10,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r5,r5,r9 @ d+=h
+ eor r3,r3,r11 @ Maj(a,b,c)
+ add r9,r9,r0,ror#2 @ h+=Sigma0(a)
+ @ add r9,r9,r3 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 11
+# if 11==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r5,r5,ror#5
+ add r9,r9,r3 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r5,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 11
+ add r9,r9,r3 @ h+=Maj(a,b,c) from the past
+ ldrb r3,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r3,lsl#8
+ ldrb r3,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 11==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r5,r5,ror#5
+ orr r2,r2,r3,lsl#24
+ eor r0,r0,r5,ror#19 @ Sigma1(e)
+#endif
+ ldr r3,[r14],#4 @ *K256++
+ add r8,r8,r2 @ h+=X[i]
+ str r2,[sp,#11*4]
+ eor r2,r6,r7
+ add r8,r8,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r5
+ add r8,r8,r3 @ h+=K256[i]
+ eor r2,r2,r7 @ Ch(e,f,g)
+ eor r0,r9,r9,ror#11
+ add r8,r8,r2 @ h+=Ch(e,f,g)
+#if 11==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 11<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r9,r10 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#13*4] @ from future BODY_16_xx
+ eor r3,r9,r10 @ a^b, b^c in next round
+ ldr r1,[sp,#10*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r9,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r4,r4,r8 @ d+=h
+ eor r12,r12,r10 @ Maj(a,b,c)
+ add r8,r8,r0,ror#2 @ h+=Sigma0(a)
+ @ add r8,r8,r12 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 12
+# if 12==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r4,r4,ror#5
+ add r8,r8,r12 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r4,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 12
+ add r8,r8,r12 @ h+=Maj(a,b,c) from the past
+ ldrb r12,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r12,lsl#8
+ ldrb r12,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 12==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r4,r4,ror#5
+ orr r2,r2,r12,lsl#24
+ eor r0,r0,r4,ror#19 @ Sigma1(e)
+#endif
+ ldr r12,[r14],#4 @ *K256++
+ add r7,r7,r2 @ h+=X[i]
+ str r2,[sp,#12*4]
+ eor r2,r5,r6
+ add r7,r7,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r4
+ add r7,r7,r12 @ h+=K256[i]
+ eor r2,r2,r6 @ Ch(e,f,g)
+ eor r0,r8,r8,ror#11
+ add r7,r7,r2 @ h+=Ch(e,f,g)
+#if 12==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 12<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r8,r9 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#14*4] @ from future BODY_16_xx
+ eor r12,r8,r9 @ a^b, b^c in next round
+ ldr r1,[sp,#11*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r8,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r11,r11,r7 @ d+=h
+ eor r3,r3,r9 @ Maj(a,b,c)
+ add r7,r7,r0,ror#2 @ h+=Sigma0(a)
+ @ add r7,r7,r3 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 13
+# if 13==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r11,r11,ror#5
+ add r7,r7,r3 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r11,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 13
+ add r7,r7,r3 @ h+=Maj(a,b,c) from the past
+ ldrb r3,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r3,lsl#8
+ ldrb r3,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 13==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r11,r11,ror#5
+ orr r2,r2,r3,lsl#24
+ eor r0,r0,r11,ror#19 @ Sigma1(e)
+#endif
+ ldr r3,[r14],#4 @ *K256++
+ add r6,r6,r2 @ h+=X[i]
+ str r2,[sp,#13*4]
+ eor r2,r4,r5
+ add r6,r6,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r11
+ add r6,r6,r3 @ h+=K256[i]
+ eor r2,r2,r5 @ Ch(e,f,g)
+ eor r0,r7,r7,ror#11
+ add r6,r6,r2 @ h+=Ch(e,f,g)
+#if 13==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 13<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r7,r8 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#15*4] @ from future BODY_16_xx
+ eor r3,r7,r8 @ a^b, b^c in next round
+ ldr r1,[sp,#12*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r7,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r10,r10,r6 @ d+=h
+ eor r12,r12,r8 @ Maj(a,b,c)
+ add r6,r6,r0,ror#2 @ h+=Sigma0(a)
+ @ add r6,r6,r12 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 14
+# if 14==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r10,r10,ror#5
+ add r6,r6,r12 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r10,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 14
+ add r6,r6,r12 @ h+=Maj(a,b,c) from the past
+ ldrb r12,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r12,lsl#8
+ ldrb r12,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 14==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r10,r10,ror#5
+ orr r2,r2,r12,lsl#24
+ eor r0,r0,r10,ror#19 @ Sigma1(e)
+#endif
+ ldr r12,[r14],#4 @ *K256++
+ add r5,r5,r2 @ h+=X[i]
+ str r2,[sp,#14*4]
+ eor r2,r11,r4
+ add r5,r5,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r10
+ add r5,r5,r12 @ h+=K256[i]
+ eor r2,r2,r4 @ Ch(e,f,g)
+ eor r0,r6,r6,ror#11
+ add r5,r5,r2 @ h+=Ch(e,f,g)
+#if 14==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 14<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r6,r7 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#0*4] @ from future BODY_16_xx
+ eor r12,r6,r7 @ a^b, b^c in next round
+ ldr r1,[sp,#13*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r6,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r9,r9,r5 @ d+=h
+ eor r3,r3,r7 @ Maj(a,b,c)
+ add r5,r5,r0,ror#2 @ h+=Sigma0(a)
+ @ add r5,r5,r3 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 15
+# if 15==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r9,r9,ror#5
+ add r5,r5,r3 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r9,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 15
+ add r5,r5,r3 @ h+=Maj(a,b,c) from the past
+ ldrb r3,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r3,lsl#8
+ ldrb r3,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 15==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r9,r9,ror#5
+ orr r2,r2,r3,lsl#24
+ eor r0,r0,r9,ror#19 @ Sigma1(e)
+#endif
+ ldr r3,[r14],#4 @ *K256++
+ add r4,r4,r2 @ h+=X[i]
+ str r2,[sp,#15*4]
+ eor r2,r10,r11
+ add r4,r4,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r9
+ add r4,r4,r3 @ h+=K256[i]
+ eor r2,r2,r11 @ Ch(e,f,g)
+ eor r0,r5,r5,ror#11
+ add r4,r4,r2 @ h+=Ch(e,f,g)
+#if 15==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 15<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r5,r6 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#1*4] @ from future BODY_16_xx
+ eor r3,r5,r6 @ a^b, b^c in next round
+ ldr r1,[sp,#14*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r5,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r8,r8,r4 @ d+=h
+ eor r12,r12,r6 @ Maj(a,b,c)
+ add r4,r4,r0,ror#2 @ h+=Sigma0(a)
+ @ add r4,r4,r12 @ h+=Maj(a,b,c)
+.Lrounds_16_xx:
+ @ ldr r2,[sp,#1*4] @ 16
+ @ ldr r1,[sp,#14*4]
+ mov r0,r2,ror#7
+ add r4,r4,r12 @ h+=Maj(a,b,c) from the past
+ mov r12,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r12,r12,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#0*4]
+ eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#9*4]
+
+ add r12,r12,r0
+ eor r0,r8,r8,ror#5 @ from BODY_00_15
+ add r2,r2,r12
+ eor r0,r0,r8,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r12,[r14],#4 @ *K256++
+ add r11,r11,r2 @ h+=X[i]
+ str r2,[sp,#0*4]
+ eor r2,r9,r10
+ add r11,r11,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r8
+ add r11,r11,r12 @ h+=K256[i]
+ eor r2,r2,r10 @ Ch(e,f,g)
+ eor r0,r4,r4,ror#11
+ add r11,r11,r2 @ h+=Ch(e,f,g)
+#if 16==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 16<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r4,r5 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#2*4] @ from future BODY_16_xx
+ eor r12,r4,r5 @ a^b, b^c in next round
+ ldr r1,[sp,#15*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r4,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r7,r7,r11 @ d+=h
+ eor r3,r3,r5 @ Maj(a,b,c)
+ add r11,r11,r0,ror#2 @ h+=Sigma0(a)
+ @ add r11,r11,r3 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#2*4] @ 17
+ @ ldr r1,[sp,#15*4]
+ mov r0,r2,ror#7
+ add r11,r11,r3 @ h+=Maj(a,b,c) from the past
+ mov r3,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r3,r3,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#1*4]
+ eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#10*4]
+
+ add r3,r3,r0
+ eor r0,r7,r7,ror#5 @ from BODY_00_15
+ add r2,r2,r3
+ eor r0,r0,r7,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r3,[r14],#4 @ *K256++
+ add r10,r10,r2 @ h+=X[i]
+ str r2,[sp,#1*4]
+ eor r2,r8,r9
+ add r10,r10,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r7
+ add r10,r10,r3 @ h+=K256[i]
+ eor r2,r2,r9 @ Ch(e,f,g)
+ eor r0,r11,r11,ror#11
+ add r10,r10,r2 @ h+=Ch(e,f,g)
+#if 17==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 17<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r11,r4 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#3*4] @ from future BODY_16_xx
+ eor r3,r11,r4 @ a^b, b^c in next round
+ ldr r1,[sp,#0*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r11,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r6,r6,r10 @ d+=h
+ eor r12,r12,r4 @ Maj(a,b,c)
+ add r10,r10,r0,ror#2 @ h+=Sigma0(a)
+ @ add r10,r10,r12 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#3*4] @ 18
+ @ ldr r1,[sp,#0*4]
+ mov r0,r2,ror#7
+ add r10,r10,r12 @ h+=Maj(a,b,c) from the past
+ mov r12,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r12,r12,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#2*4]
+ eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#11*4]
+
+ add r12,r12,r0
+ eor r0,r6,r6,ror#5 @ from BODY_00_15
+ add r2,r2,r12
+ eor r0,r0,r6,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r12,[r14],#4 @ *K256++
+ add r9,r9,r2 @ h+=X[i]
+ str r2,[sp,#2*4]
+ eor r2,r7,r8
+ add r9,r9,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r6
+ add r9,r9,r12 @ h+=K256[i]
+ eor r2,r2,r8 @ Ch(e,f,g)
+ eor r0,r10,r10,ror#11
+ add r9,r9,r2 @ h+=Ch(e,f,g)
+#if 18==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 18<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r10,r11 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#4*4] @ from future BODY_16_xx
+ eor r12,r10,r11 @ a^b, b^c in next round
+ ldr r1,[sp,#1*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r10,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r5,r5,r9 @ d+=h
+ eor r3,r3,r11 @ Maj(a,b,c)
+ add r9,r9,r0,ror#2 @ h+=Sigma0(a)
+ @ add r9,r9,r3 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#4*4] @ 19
+ @ ldr r1,[sp,#1*4]
+ mov r0,r2,ror#7
+ add r9,r9,r3 @ h+=Maj(a,b,c) from the past
+ mov r3,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r3,r3,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#3*4]
+ eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#12*4]
+
+ add r3,r3,r0
+ eor r0,r5,r5,ror#5 @ from BODY_00_15
+ add r2,r2,r3
+ eor r0,r0,r5,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r3,[r14],#4 @ *K256++
+ add r8,r8,r2 @ h+=X[i]
+ str r2,[sp,#3*4]
+ eor r2,r6,r7
+ add r8,r8,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r5
+ add r8,r8,r3 @ h+=K256[i]
+ eor r2,r2,r7 @ Ch(e,f,g)
+ eor r0,r9,r9,ror#11
+ add r8,r8,r2 @ h+=Ch(e,f,g)
+#if 19==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 19<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r9,r10 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#5*4] @ from future BODY_16_xx
+ eor r3,r9,r10 @ a^b, b^c in next round
+ ldr r1,[sp,#2*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r9,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r4,r4,r8 @ d+=h
+ eor r12,r12,r10 @ Maj(a,b,c)
+ add r8,r8,r0,ror#2 @ h+=Sigma0(a)
+ @ add r8,r8,r12 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#5*4] @ 20
+ @ ldr r1,[sp,#2*4]
+ mov r0,r2,ror#7
+ add r8,r8,r12 @ h+=Maj(a,b,c) from the past
+ mov r12,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r12,r12,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#4*4]
+ eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#13*4]
+
+ add r12,r12,r0
+ eor r0,r4,r4,ror#5 @ from BODY_00_15
+ add r2,r2,r12
+ eor r0,r0,r4,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r12,[r14],#4 @ *K256++
+ add r7,r7,r2 @ h+=X[i]
+ str r2,[sp,#4*4]
+ eor r2,r5,r6
+ add r7,r7,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r4
+ add r7,r7,r12 @ h+=K256[i]
+ eor r2,r2,r6 @ Ch(e,f,g)
+ eor r0,r8,r8,ror#11
+ add r7,r7,r2 @ h+=Ch(e,f,g)
+#if 20==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 20<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r8,r9 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#6*4] @ from future BODY_16_xx
+ eor r12,r8,r9 @ a^b, b^c in next round
+ ldr r1,[sp,#3*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r8,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r11,r11,r7 @ d+=h
+ eor r3,r3,r9 @ Maj(a,b,c)
+ add r7,r7,r0,ror#2 @ h+=Sigma0(a)
+ @ add r7,r7,r3 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#6*4] @ 21
+ @ ldr r1,[sp,#3*4]
+ mov r0,r2,ror#7
+ add r7,r7,r3 @ h+=Maj(a,b,c) from the past
+ mov r3,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r3,r3,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#5*4]
+ eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#14*4]
+
+ add r3,r3,r0
+ eor r0,r11,r11,ror#5 @ from BODY_00_15
+ add r2,r2,r3
+ eor r0,r0,r11,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r3,[r14],#4 @ *K256++
+ add r6,r6,r2 @ h+=X[i]
+ str r2,[sp,#5*4]
+ eor r2,r4,r5
+ add r6,r6,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r11
+ add r6,r6,r3 @ h+=K256[i]
+ eor r2,r2,r5 @ Ch(e,f,g)
+ eor r0,r7,r7,ror#11
+ add r6,r6,r2 @ h+=Ch(e,f,g)
+#if 21==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 21<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r7,r8 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#7*4] @ from future BODY_16_xx
+ eor r3,r7,r8 @ a^b, b^c in next round
+ ldr r1,[sp,#4*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r7,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r10,r10,r6 @ d+=h
+ eor r12,r12,r8 @ Maj(a,b,c)
+ add r6,r6,r0,ror#2 @ h+=Sigma0(a)
+ @ add r6,r6,r12 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#7*4] @ 22
+ @ ldr r1,[sp,#4*4]
+ mov r0,r2,ror#7
+ add r6,r6,r12 @ h+=Maj(a,b,c) from the past
+ mov r12,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r12,r12,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#6*4]
+ eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#15*4]
+
+ add r12,r12,r0
+ eor r0,r10,r10,ror#5 @ from BODY_00_15
+ add r2,r2,r12
+ eor r0,r0,r10,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r12,[r14],#4 @ *K256++
+ add r5,r5,r2 @ h+=X[i]
+ str r2,[sp,#6*4]
+ eor r2,r11,r4
+ add r5,r5,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r10
+ add r5,r5,r12 @ h+=K256[i]
+ eor r2,r2,r4 @ Ch(e,f,g)
+ eor r0,r6,r6,ror#11
+ add r5,r5,r2 @ h+=Ch(e,f,g)
+#if 22==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 22<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r6,r7 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#8*4] @ from future BODY_16_xx
+ eor r12,r6,r7 @ a^b, b^c in next round
+ ldr r1,[sp,#5*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r6,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r9,r9,r5 @ d+=h
+ eor r3,r3,r7 @ Maj(a,b,c)
+ add r5,r5,r0,ror#2 @ h+=Sigma0(a)
+ @ add r5,r5,r3 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#8*4] @ 23
+ @ ldr r1,[sp,#5*4]
+ mov r0,r2,ror#7
+ add r5,r5,r3 @ h+=Maj(a,b,c) from the past
+ mov r3,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r3,r3,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#7*4]
+ eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#0*4]
+
+ add r3,r3,r0
+ eor r0,r9,r9,ror#5 @ from BODY_00_15
+ add r2,r2,r3
+ eor r0,r0,r9,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r3,[r14],#4 @ *K256++
+ add r4,r4,r2 @ h+=X[i]
+ str r2,[sp,#7*4]
+ eor r2,r10,r11
+ add r4,r4,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r9
+ add r4,r4,r3 @ h+=K256[i]
+ eor r2,r2,r11 @ Ch(e,f,g)
+ eor r0,r5,r5,ror#11
+ add r4,r4,r2 @ h+=Ch(e,f,g)
+#if 23==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 23<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r5,r6 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#9*4] @ from future BODY_16_xx
+ eor r3,r5,r6 @ a^b, b^c in next round
+ ldr r1,[sp,#6*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r5,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r8,r8,r4 @ d+=h
+ eor r12,r12,r6 @ Maj(a,b,c)
+ add r4,r4,r0,ror#2 @ h+=Sigma0(a)
+ @ add r4,r4,r12 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#9*4] @ 24
+ @ ldr r1,[sp,#6*4]
+ mov r0,r2,ror#7
+ add r4,r4,r12 @ h+=Maj(a,b,c) from the past
+ mov r12,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r12,r12,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#8*4]
+ eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#1*4]
+
+ add r12,r12,r0
+ eor r0,r8,r8,ror#5 @ from BODY_00_15
+ add r2,r2,r12
+ eor r0,r0,r8,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r12,[r14],#4 @ *K256++
+ add r11,r11,r2 @ h+=X[i]
+ str r2,[sp,#8*4]
+ eor r2,r9,r10
+ add r11,r11,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r8
+ add r11,r11,r12 @ h+=K256[i]
+ eor r2,r2,r10 @ Ch(e,f,g)
+ eor r0,r4,r4,ror#11
+ add r11,r11,r2 @ h+=Ch(e,f,g)
+#if 24==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 24<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r4,r5 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#10*4] @ from future BODY_16_xx
+ eor r12,r4,r5 @ a^b, b^c in next round
+ ldr r1,[sp,#7*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r4,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r7,r7,r11 @ d+=h
+ eor r3,r3,r5 @ Maj(a,b,c)
+ add r11,r11,r0,ror#2 @ h+=Sigma0(a)
+ @ add r11,r11,r3 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#10*4] @ 25
+ @ ldr r1,[sp,#7*4]
+ mov r0,r2,ror#7
+ add r11,r11,r3 @ h+=Maj(a,b,c) from the past
+ mov r3,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r3,r3,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#9*4]
+ eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#2*4]
+
+ add r3,r3,r0
+ eor r0,r7,r7,ror#5 @ from BODY_00_15
+ add r2,r2,r3
+ eor r0,r0,r7,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r3,[r14],#4 @ *K256++
+ add r10,r10,r2 @ h+=X[i]
+ str r2,[sp,#9*4]
+ eor r2,r8,r9
+ add r10,r10,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r7
+ add r10,r10,r3 @ h+=K256[i]
+ eor r2,r2,r9 @ Ch(e,f,g)
+ eor r0,r11,r11,ror#11
+ add r10,r10,r2 @ h+=Ch(e,f,g)
+#if 25==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 25<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r11,r4 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#11*4] @ from future BODY_16_xx
+ eor r3,r11,r4 @ a^b, b^c in next round
+ ldr r1,[sp,#8*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r11,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r6,r6,r10 @ d+=h
+ eor r12,r12,r4 @ Maj(a,b,c)
+ add r10,r10,r0,ror#2 @ h+=Sigma0(a)
+ @ add r10,r10,r12 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#11*4] @ 26
+ @ ldr r1,[sp,#8*4]
+ mov r0,r2,ror#7
+ add r10,r10,r12 @ h+=Maj(a,b,c) from the past
+ mov r12,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r12,r12,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#10*4]
+ eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#3*4]
+
+ add r12,r12,r0
+ eor r0,r6,r6,ror#5 @ from BODY_00_15
+ add r2,r2,r12
+ eor r0,r0,r6,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r12,[r14],#4 @ *K256++
+ add r9,r9,r2 @ h+=X[i]
+ str r2,[sp,#10*4]
+ eor r2,r7,r8
+ add r9,r9,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r6
+ add r9,r9,r12 @ h+=K256[i]
+ eor r2,r2,r8 @ Ch(e,f,g)
+ eor r0,r10,r10,ror#11
+ add r9,r9,r2 @ h+=Ch(e,f,g)
+#if 26==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 26<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r10,r11 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#12*4] @ from future BODY_16_xx
+ eor r12,r10,r11 @ a^b, b^c in next round
+ ldr r1,[sp,#9*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r10,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r5,r5,r9 @ d+=h
+ eor r3,r3,r11 @ Maj(a,b,c)
+ add r9,r9,r0,ror#2 @ h+=Sigma0(a)
+ @ add r9,r9,r3 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#12*4] @ 27
+ @ ldr r1,[sp,#9*4]
+ mov r0,r2,ror#7
+ add r9,r9,r3 @ h+=Maj(a,b,c) from the past
+ mov r3,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r3,r3,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#11*4]
+ eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#4*4]
+
+ add r3,r3,r0
+ eor r0,r5,r5,ror#5 @ from BODY_00_15
+ add r2,r2,r3
+ eor r0,r0,r5,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r3,[r14],#4 @ *K256++
+ add r8,r8,r2 @ h+=X[i]
+ str r2,[sp,#11*4]
+ eor r2,r6,r7
+ add r8,r8,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r5
+ add r8,r8,r3 @ h+=K256[i]
+ eor r2,r2,r7 @ Ch(e,f,g)
+ eor r0,r9,r9,ror#11
+ add r8,r8,r2 @ h+=Ch(e,f,g)
+#if 27==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 27<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r9,r10 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#13*4] @ from future BODY_16_xx
+ eor r3,r9,r10 @ a^b, b^c in next round
+ ldr r1,[sp,#10*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r9,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r4,r4,r8 @ d+=h
+ eor r12,r12,r10 @ Maj(a,b,c)
+ add r8,r8,r0,ror#2 @ h+=Sigma0(a)
+ @ add r8,r8,r12 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#13*4] @ 28
+ @ ldr r1,[sp,#10*4]
+ mov r0,r2,ror#7
+ add r8,r8,r12 @ h+=Maj(a,b,c) from the past
+ mov r12,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r12,r12,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#12*4]
+ eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#5*4]
+
+ add r12,r12,r0
+ eor r0,r4,r4,ror#5 @ from BODY_00_15
+ add r2,r2,r12
+ eor r0,r0,r4,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r12,[r14],#4 @ *K256++
+ add r7,r7,r2 @ h+=X[i]
+ str r2,[sp,#12*4]
+ eor r2,r5,r6
+ add r7,r7,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r4
+ add r7,r7,r12 @ h+=K256[i]
+ eor r2,r2,r6 @ Ch(e,f,g)
+ eor r0,r8,r8,ror#11
+ add r7,r7,r2 @ h+=Ch(e,f,g)
+#if 28==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 28<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r8,r9 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#14*4] @ from future BODY_16_xx
+ eor r12,r8,r9 @ a^b, b^c in next round
+ ldr r1,[sp,#11*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r8,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r11,r11,r7 @ d+=h
+ eor r3,r3,r9 @ Maj(a,b,c)
+ add r7,r7,r0,ror#2 @ h+=Sigma0(a)
+ @ add r7,r7,r3 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#14*4] @ 29
+ @ ldr r1,[sp,#11*4]
+ mov r0,r2,ror#7
+ add r7,r7,r3 @ h+=Maj(a,b,c) from the past
+ mov r3,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r3,r3,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#13*4]
+ eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#6*4]
+
+ add r3,r3,r0
+ eor r0,r11,r11,ror#5 @ from BODY_00_15
+ add r2,r2,r3
+ eor r0,r0,r11,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r3,[r14],#4 @ *K256++
+ add r6,r6,r2 @ h+=X[i]
+ str r2,[sp,#13*4]
+ eor r2,r4,r5
+ add r6,r6,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r11
+ add r6,r6,r3 @ h+=K256[i]
+ eor r2,r2,r5 @ Ch(e,f,g)
+ eor r0,r7,r7,ror#11
+ add r6,r6,r2 @ h+=Ch(e,f,g)
+#if 29==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 29<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r7,r8 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#15*4] @ from future BODY_16_xx
+ eor r3,r7,r8 @ a^b, b^c in next round
+ ldr r1,[sp,#12*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r7,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r10,r10,r6 @ d+=h
+ eor r12,r12,r8 @ Maj(a,b,c)
+ add r6,r6,r0,ror#2 @ h+=Sigma0(a)
+ @ add r6,r6,r12 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#15*4] @ 30
+ @ ldr r1,[sp,#12*4]
+ mov r0,r2,ror#7
+ add r6,r6,r12 @ h+=Maj(a,b,c) from the past
+ mov r12,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r12,r12,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#14*4]
+ eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#7*4]
+
+ add r12,r12,r0
+ eor r0,r10,r10,ror#5 @ from BODY_00_15
+ add r2,r2,r12
+ eor r0,r0,r10,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r12,[r14],#4 @ *K256++
+ add r5,r5,r2 @ h+=X[i]
+ str r2,[sp,#14*4]
+ eor r2,r11,r4
+ add r5,r5,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r10
+ add r5,r5,r12 @ h+=K256[i]
+ eor r2,r2,r4 @ Ch(e,f,g)
+ eor r0,r6,r6,ror#11
+ add r5,r5,r2 @ h+=Ch(e,f,g)
+#if 30==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 30<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r6,r7 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#0*4] @ from future BODY_16_xx
+ eor r12,r6,r7 @ a^b, b^c in next round
+ ldr r1,[sp,#13*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r6,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r9,r9,r5 @ d+=h
+ eor r3,r3,r7 @ Maj(a,b,c)
+ add r5,r5,r0,ror#2 @ h+=Sigma0(a)
+ @ add r5,r5,r3 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#0*4] @ 31
+ @ ldr r1,[sp,#13*4]
+ mov r0,r2,ror#7
+ add r5,r5,r3 @ h+=Maj(a,b,c) from the past
+ mov r3,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r3,r3,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#15*4]
+ eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#8*4]
+
+ add r3,r3,r0
+ eor r0,r9,r9,ror#5 @ from BODY_00_15
+ add r2,r2,r3
+ eor r0,r0,r9,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r3,[r14],#4 @ *K256++
+ add r4,r4,r2 @ h+=X[i]
+ str r2,[sp,#15*4]
+ eor r2,r10,r11
+ add r4,r4,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r9
+ add r4,r4,r3 @ h+=K256[i]
+ eor r2,r2,r11 @ Ch(e,f,g)
+ eor r0,r5,r5,ror#11
+ add r4,r4,r2 @ h+=Ch(e,f,g)
+#if 31==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 31<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r5,r6 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#1*4] @ from future BODY_16_xx
+ eor r3,r5,r6 @ a^b, b^c in next round
+ ldr r1,[sp,#14*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r5,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r8,r8,r4 @ d+=h
+ eor r12,r12,r6 @ Maj(a,b,c)
+ add r4,r4,r0,ror#2 @ h+=Sigma0(a)
+ @ add r4,r4,r12 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ ite eq @ Thumb2 thing, sanity check in ARM
+#endif
+ ldreq r3,[sp,#16*4] @ pull ctx
+ bne .Lrounds_16_xx
+
+ add r4,r4,r12 @ h+=Maj(a,b,c) from the past
+ ldr r0,[r3,#0]
+ ldr r2,[r3,#4]
+ ldr r12,[r3,#8]
+ add r4,r4,r0
+ ldr r0,[r3,#12]
+ add r5,r5,r2
+ ldr r2,[r3,#16]
+ add r6,r6,r12
+ ldr r12,[r3,#20]
+ add r7,r7,r0
+ ldr r0,[r3,#24]
+ add r8,r8,r2
+ ldr r2,[r3,#28]
+ add r9,r9,r12
+ ldr r1,[sp,#17*4] @ pull inp
+ ldr r12,[sp,#18*4] @ pull inp+len
+ add r10,r10,r0
+ add r11,r11,r2
+ stmia r3,{r4,r5,r6,r7,r8,r9,r10,r11}
+ cmp r1,r12
+ sub r14,r14,#256 @ rewind Ktbl
+ bne .Loop
+
+ add sp,sp,#19*4 @ destroy frame
+#if __ARM_ARCH__>=5
+ ldmia sp!,{r4-r11,pc}
+#else
+ ldmia sp!,{r4-r11,lr}
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ .word 0xe12fff1e @ interoperable with Thumb ISA:-)
+#endif
+.size sha256_block_data_order,.-sha256_block_data_order
+#if __ARM_MAX_ARCH__>=7
+.arch armv7-a
+.fpu neon
+
+.global sha256_block_data_order_neon
+.type sha256_block_data_order_neon,%function
+.align 4
+sha256_block_data_order_neon:
+.LNEON:
+ stmdb sp!,{r4-r12,lr}
+
+ sub r11,sp,#16*4+16
+ adrl r14,K256
+ bic r11,r11,#15 @ align for 128-bit stores
+ mov r12,sp
+ mov sp,r11 @ alloca
+ add r2,r1,r2,lsl#6 @ len to point at the end of inp
+
+ vld1.8 {q0},[r1]!
+ vld1.8 {q1},[r1]!
+ vld1.8 {q2},[r1]!
+ vld1.8 {q3},[r1]!
+ vld1.32 {q8},[r14,:128]!
+ vld1.32 {q9},[r14,:128]!
+ vld1.32 {q10},[r14,:128]!
+ vld1.32 {q11},[r14,:128]!
+ vrev32.8 q0,q0 @ yes, even on
+ str r0,[sp,#64]
+ vrev32.8 q1,q1 @ big-endian
+ str r1,[sp,#68]
+ mov r1,sp
+ vrev32.8 q2,q2
+ str r2,[sp,#72]
+ vrev32.8 q3,q3
+ str r12,[sp,#76] @ save original sp
+ vadd.i32 q8,q8,q0
+ vadd.i32 q9,q9,q1
+ vst1.32 {q8},[r1,:128]!
+ vadd.i32 q10,q10,q2
+ vst1.32 {q9},[r1,:128]!
+ vadd.i32 q11,q11,q3
+ vst1.32 {q10},[r1,:128]!
+ vst1.32 {q11},[r1,:128]!
+
+ ldmia r0,{r4-r11}
+ sub r1,r1,#64
+ ldr r2,[sp,#0]
+ eor r12,r12,r12
+ eor r3,r5,r6
+ b .L_00_48
+
+.align 4
+.L_00_48:
+ vext.8 q8,q0,q1,#4
+ add r11,r11,r2
+ eor r2,r9,r10
+ eor r0,r8,r8,ror#5
+ vext.8 q9,q2,q3,#4
+ add r4,r4,r12
+ and r2,r2,r8
+ eor r12,r0,r8,ror#19
+ vshr.u32 q10,q8,#7
+ eor r0,r4,r4,ror#11
+ eor r2,r2,r10
+ vadd.i32 q0,q0,q9
+ add r11,r11,r12,ror#6
+ eor r12,r4,r5
+ vshr.u32 q9,q8,#3
+ eor r0,r0,r4,ror#20
+ add r11,r11,r2
+ vsli.32 q10,q8,#25
+ ldr r2,[sp,#4]
+ and r3,r3,r12
+ vshr.u32 q11,q8,#18
+ add r7,r7,r11
+ add r11,r11,r0,ror#2
+ eor r3,r3,r5
+ veor q9,q9,q10
+ add r10,r10,r2
+ vsli.32 q11,q8,#14
+ eor r2,r8,r9
+ eor r0,r7,r7,ror#5
+ vshr.u32 d24,d7,#17
+ add r11,r11,r3
+ and r2,r2,r7
+ veor q9,q9,q11
+ eor r3,r0,r7,ror#19
+ eor r0,r11,r11,ror#11
+ vsli.32 d24,d7,#15
+ eor r2,r2,r9
+ add r10,r10,r3,ror#6
+ vshr.u32 d25,d7,#10
+ eor r3,r11,r4
+ eor r0,r0,r11,ror#20
+ vadd.i32 q0,q0,q9
+ add r10,r10,r2
+ ldr r2,[sp,#8]
+ veor d25,d25,d24
+ and r12,r12,r3
+ add r6,r6,r10
+ vshr.u32 d24,d7,#19
+ add r10,r10,r0,ror#2
+ eor r12,r12,r4
+ vsli.32 d24,d7,#13
+ add r9,r9,r2
+ eor r2,r7,r8
+ veor d25,d25,d24
+ eor r0,r6,r6,ror#5
+ add r10,r10,r12
+ vadd.i32 d0,d0,d25
+ and r2,r2,r6
+ eor r12,r0,r6,ror#19
+ vshr.u32 d24,d0,#17
+ eor r0,r10,r10,ror#11
+ eor r2,r2,r8
+ vsli.32 d24,d0,#15
+ add r9,r9,r12,ror#6
+ eor r12,r10,r11
+ vshr.u32 d25,d0,#10
+ eor r0,r0,r10,ror#20
+ add r9,r9,r2
+ veor d25,d25,d24
+ ldr r2,[sp,#12]
+ and r3,r3,r12
+ vshr.u32 d24,d0,#19
+ add r5,r5,r9
+ add r9,r9,r0,ror#2
+ eor r3,r3,r11
+ vld1.32 {q8},[r14,:128]!
+ add r8,r8,r2
+ vsli.32 d24,d0,#13
+ eor r2,r6,r7
+ eor r0,r5,r5,ror#5
+ veor d25,d25,d24
+ add r9,r9,r3
+ and r2,r2,r5
+ vadd.i32 d1,d1,d25
+ eor r3,r0,r5,ror#19
+ eor r0,r9,r9,ror#11
+ vadd.i32 q8,q8,q0
+ eor r2,r2,r7
+ add r8,r8,r3,ror#6
+ eor r3,r9,r10
+ eor r0,r0,r9,ror#20
+ add r8,r8,r2
+ ldr r2,[sp,#16]
+ and r12,r12,r3
+ add r4,r4,r8
+ vst1.32 {q8},[r1,:128]!
+ add r8,r8,r0,ror#2
+ eor r12,r12,r10
+ vext.8 q8,q1,q2,#4
+ add r7,r7,r2
+ eor r2,r5,r6
+ eor r0,r4,r4,ror#5
+ vext.8 q9,q3,q0,#4
+ add r8,r8,r12
+ and r2,r2,r4
+ eor r12,r0,r4,ror#19
+ vshr.u32 q10,q8,#7
+ eor r0,r8,r8,ror#11
+ eor r2,r2,r6
+ vadd.i32 q1,q1,q9
+ add r7,r7,r12,ror#6
+ eor r12,r8,r9
+ vshr.u32 q9,q8,#3
+ eor r0,r0,r8,ror#20
+ add r7,r7,r2
+ vsli.32 q10,q8,#25
+ ldr r2,[sp,#20]
+ and r3,r3,r12
+ vshr.u32 q11,q8,#18
+ add r11,r11,r7
+ add r7,r7,r0,ror#2
+ eor r3,r3,r9
+ veor q9,q9,q10
+ add r6,r6,r2
+ vsli.32 q11,q8,#14
+ eor r2,r4,r5
+ eor r0,r11,r11,ror#5
+ vshr.u32 d24,d1,#17
+ add r7,r7,r3
+ and r2,r2,r11
+ veor q9,q9,q11
+ eor r3,r0,r11,ror#19
+ eor r0,r7,r7,ror#11
+ vsli.32 d24,d1,#15
+ eor r2,r2,r5
+ add r6,r6,r3,ror#6
+ vshr.u32 d25,d1,#10
+ eor r3,r7,r8
+ eor r0,r0,r7,ror#20
+ vadd.i32 q1,q1,q9
+ add r6,r6,r2
+ ldr r2,[sp,#24]
+ veor d25,d25,d24
+ and r12,r12,r3
+ add r10,r10,r6
+ vshr.u32 d24,d1,#19
+ add r6,r6,r0,ror#2
+ eor r12,r12,r8
+ vsli.32 d24,d1,#13
+ add r5,r5,r2
+ eor r2,r11,r4
+ veor d25,d25,d24
+ eor r0,r10,r10,ror#5
+ add r6,r6,r12
+ vadd.i32 d2,d2,d25
+ and r2,r2,r10
+ eor r12,r0,r10,ror#19
+ vshr.u32 d24,d2,#17
+ eor r0,r6,r6,ror#11
+ eor r2,r2,r4
+ vsli.32 d24,d2,#15
+ add r5,r5,r12,ror#6
+ eor r12,r6,r7
+ vshr.u32 d25,d2,#10
+ eor r0,r0,r6,ror#20
+ add r5,r5,r2
+ veor d25,d25,d24
+ ldr r2,[sp,#28]
+ and r3,r3,r12
+ vshr.u32 d24,d2,#19
+ add r9,r9,r5
+ add r5,r5,r0,ror#2
+ eor r3,r3,r7
+ vld1.32 {q8},[r14,:128]!
+ add r4,r4,r2
+ vsli.32 d24,d2,#13
+ eor r2,r10,r11
+ eor r0,r9,r9,ror#5
+ veor d25,d25,d24
+ add r5,r5,r3
+ and r2,r2,r9
+ vadd.i32 d3,d3,d25
+ eor r3,r0,r9,ror#19
+ eor r0,r5,r5,ror#11
+ vadd.i32 q8,q8,q1
+ eor r2,r2,r11
+ add r4,r4,r3,ror#6
+ eor r3,r5,r6
+ eor r0,r0,r5,ror#20
+ add r4,r4,r2
+ ldr r2,[sp,#32]
+ and r12,r12,r3
+ add r8,r8,r4
+ vst1.32 {q8},[r1,:128]!
+ add r4,r4,r0,ror#2
+ eor r12,r12,r6
+ vext.8 q8,q2,q3,#4
+ add r11,r11,r2
+ eor r2,r9,r10
+ eor r0,r8,r8,ror#5
+ vext.8 q9,q0,q1,#4
+ add r4,r4,r12
+ and r2,r2,r8
+ eor r12,r0,r8,ror#19
+ vshr.u32 q10,q8,#7
+ eor r0,r4,r4,ror#11
+ eor r2,r2,r10
+ vadd.i32 q2,q2,q9
+ add r11,r11,r12,ror#6
+ eor r12,r4,r5
+ vshr.u32 q9,q8,#3
+ eor r0,r0,r4,ror#20
+ add r11,r11,r2
+ vsli.32 q10,q8,#25
+ ldr r2,[sp,#36]
+ and r3,r3,r12
+ vshr.u32 q11,q8,#18
+ add r7,r7,r11
+ add r11,r11,r0,ror#2
+ eor r3,r3,r5
+ veor q9,q9,q10
+ add r10,r10,r2
+ vsli.32 q11,q8,#14
+ eor r2,r8,r9
+ eor r0,r7,r7,ror#5
+ vshr.u32 d24,d3,#17
+ add r11,r11,r3
+ and r2,r2,r7
+ veor q9,q9,q11
+ eor r3,r0,r7,ror#19
+ eor r0,r11,r11,ror#11
+ vsli.32 d24,d3,#15
+ eor r2,r2,r9
+ add r10,r10,r3,ror#6
+ vshr.u32 d25,d3,#10
+ eor r3,r11,r4
+ eor r0,r0,r11,ror#20
+ vadd.i32 q2,q2,q9
+ add r10,r10,r2
+ ldr r2,[sp,#40]
+ veor d25,d25,d24
+ and r12,r12,r3
+ add r6,r6,r10
+ vshr.u32 d24,d3,#19
+ add r10,r10,r0,ror#2
+ eor r12,r12,r4
+ vsli.32 d24,d3,#13
+ add r9,r9,r2
+ eor r2,r7,r8
+ veor d25,d25,d24
+ eor r0,r6,r6,ror#5
+ add r10,r10,r12
+ vadd.i32 d4,d4,d25
+ and r2,r2,r6
+ eor r12,r0,r6,ror#19
+ vshr.u32 d24,d4,#17
+ eor r0,r10,r10,ror#11
+ eor r2,r2,r8
+ vsli.32 d24,d4,#15
+ add r9,r9,r12,ror#6
+ eor r12,r10,r11
+ vshr.u32 d25,d4,#10
+ eor r0,r0,r10,ror#20
+ add r9,r9,r2
+ veor d25,d25,d24
+ ldr r2,[sp,#44]
+ and r3,r3,r12
+ vshr.u32 d24,d4,#19
+ add r5,r5,r9
+ add r9,r9,r0,ror#2
+ eor r3,r3,r11
+ vld1.32 {q8},[r14,:128]!
+ add r8,r8,r2
+ vsli.32 d24,d4,#13
+ eor r2,r6,r7
+ eor r0,r5,r5,ror#5
+ veor d25,d25,d24
+ add r9,r9,r3
+ and r2,r2,r5
+ vadd.i32 d5,d5,d25
+ eor r3,r0,r5,ror#19
+ eor r0,r9,r9,ror#11
+ vadd.i32 q8,q8,q2
+ eor r2,r2,r7
+ add r8,r8,r3,ror#6
+ eor r3,r9,r10
+ eor r0,r0,r9,ror#20
+ add r8,r8,r2
+ ldr r2,[sp,#48]
+ and r12,r12,r3
+ add r4,r4,r8
+ vst1.32 {q8},[r1,:128]!
+ add r8,r8,r0,ror#2
+ eor r12,r12,r10
+ vext.8 q8,q3,q0,#4
+ add r7,r7,r2
+ eor r2,r5,r6
+ eor r0,r4,r4,ror#5
+ vext.8 q9,q1,q2,#4
+ add r8,r8,r12
+ and r2,r2,r4
+ eor r12,r0,r4,ror#19
+ vshr.u32 q10,q8,#7
+ eor r0,r8,r8,ror#11
+ eor r2,r2,r6
+ vadd.i32 q3,q3,q9
+ add r7,r7,r12,ror#6
+ eor r12,r8,r9
+ vshr.u32 q9,q8,#3
+ eor r0,r0,r8,ror#20
+ add r7,r7,r2
+ vsli.32 q10,q8,#25
+ ldr r2,[sp,#52]
+ and r3,r3,r12
+ vshr.u32 q11,q8,#18
+ add r11,r11,r7
+ add r7,r7,r0,ror#2
+ eor r3,r3,r9
+ veor q9,q9,q10
+ add r6,r6,r2
+ vsli.32 q11,q8,#14
+ eor r2,r4,r5
+ eor r0,r11,r11,ror#5
+ vshr.u32 d24,d5,#17
+ add r7,r7,r3
+ and r2,r2,r11
+ veor q9,q9,q11
+ eor r3,r0,r11,ror#19
+ eor r0,r7,r7,ror#11
+ vsli.32 d24,d5,#15
+ eor r2,r2,r5
+ add r6,r6,r3,ror#6
+ vshr.u32 d25,d5,#10
+ eor r3,r7,r8
+ eor r0,r0,r7,ror#20
+ vadd.i32 q3,q3,q9
+ add r6,r6,r2
+ ldr r2,[sp,#56]
+ veor d25,d25,d24
+ and r12,r12,r3
+ add r10,r10,r6
+ vshr.u32 d24,d5,#19
+ add r6,r6,r0,ror#2
+ eor r12,r12,r8
+ vsli.32 d24,d5,#13
+ add r5,r5,r2
+ eor r2,r11,r4
+ veor d25,d25,d24
+ eor r0,r10,r10,ror#5
+ add r6,r6,r12
+ vadd.i32 d6,d6,d25
+ and r2,r2,r10
+ eor r12,r0,r10,ror#19
+ vshr.u32 d24,d6,#17
+ eor r0,r6,r6,ror#11
+ eor r2,r2,r4
+ vsli.32 d24,d6,#15
+ add r5,r5,r12,ror#6
+ eor r12,r6,r7
+ vshr.u32 d25,d6,#10
+ eor r0,r0,r6,ror#20
+ add r5,r5,r2
+ veor d25,d25,d24
+ ldr r2,[sp,#60]
+ and r3,r3,r12
+ vshr.u32 d24,d6,#19
+ add r9,r9,r5
+ add r5,r5,r0,ror#2
+ eor r3,r3,r7
+ vld1.32 {q8},[r14,:128]!
+ add r4,r4,r2
+ vsli.32 d24,d6,#13
+ eor r2,r10,r11
+ eor r0,r9,r9,ror#5
+ veor d25,d25,d24
+ add r5,r5,r3
+ and r2,r2,r9
+ vadd.i32 d7,d7,d25
+ eor r3,r0,r9,ror#19
+ eor r0,r5,r5,ror#11
+ vadd.i32 q8,q8,q3
+ eor r2,r2,r11
+ add r4,r4,r3,ror#6
+ eor r3,r5,r6
+ eor r0,r0,r5,ror#20
+ add r4,r4,r2
+ ldr r2,[r14]
+ and r12,r12,r3
+ add r8,r8,r4
+ vst1.32 {q8},[r1,:128]!
+ add r4,r4,r0,ror#2
+ eor r12,r12,r6
+ teq r2,#0 @ check for K256 terminator
+ ldr r2,[sp,#0]
+ sub r1,r1,#64
+ bne .L_00_48
+
+ ldr r1,[sp,#68]
+ ldr r0,[sp,#72]
+ sub r14,r14,#256 @ rewind r14
+ teq r1,r0
+ it eq
+ subeq r1,r1,#64 @ avoid SEGV
+ vld1.8 {q0},[r1]! @ load next input block
+ vld1.8 {q1},[r1]!
+ vld1.8 {q2},[r1]!
+ vld1.8 {q3},[r1]!
+ it ne
+ strne r1,[sp,#68]
+ mov r1,sp
+ add r11,r11,r2
+ eor r2,r9,r10
+ eor r0,r8,r8,ror#5
+ add r4,r4,r12
+ vld1.32 {q8},[r14,:128]!
+ and r2,r2,r8
+ eor r12,r0,r8,ror#19
+ eor r0,r4,r4,ror#11
+ eor r2,r2,r10
+ vrev32.8 q0,q0
+ add r11,r11,r12,ror#6
+ eor r12,r4,r5
+ eor r0,r0,r4,ror#20
+ add r11,r11,r2
+ vadd.i32 q8,q8,q0
+ ldr r2,[sp,#4]
+ and r3,r3,r12
+ add r7,r7,r11
+ add r11,r11,r0,ror#2
+ eor r3,r3,r5
+ add r10,r10,r2
+ eor r2,r8,r9
+ eor r0,r7,r7,ror#5
+ add r11,r11,r3
+ and r2,r2,r7
+ eor r3,r0,r7,ror#19
+ eor r0,r11,r11,ror#11
+ eor r2,r2,r9
+ add r10,r10,r3,ror#6
+ eor r3,r11,r4
+ eor r0,r0,r11,ror#20
+ add r10,r10,r2
+ ldr r2,[sp,#8]
+ and r12,r12,r3
+ add r6,r6,r10
+ add r10,r10,r0,ror#2
+ eor r12,r12,r4
+ add r9,r9,r2
+ eor r2,r7,r8
+ eor r0,r6,r6,ror#5
+ add r10,r10,r12
+ and r2,r2,r6
+ eor r12,r0,r6,ror#19
+ eor r0,r10,r10,ror#11
+ eor r2,r2,r8
+ add r9,r9,r12,ror#6
+ eor r12,r10,r11
+ eor r0,r0,r10,ror#20
+ add r9,r9,r2
+ ldr r2,[sp,#12]
+ and r3,r3,r12
+ add r5,r5,r9
+ add r9,r9,r0,ror#2
+ eor r3,r3,r11
+ add r8,r8,r2
+ eor r2,r6,r7
+ eor r0,r5,r5,ror#5
+ add r9,r9,r3
+ and r2,r2,r5
+ eor r3,r0,r5,ror#19
+ eor r0,r9,r9,ror#11
+ eor r2,r2,r7
+ add r8,r8,r3,ror#6
+ eor r3,r9,r10
+ eor r0,r0,r9,ror#20
+ add r8,r8,r2
+ ldr r2,[sp,#16]
+ and r12,r12,r3
+ add r4,r4,r8
+ add r8,r8,r0,ror#2
+ eor r12,r12,r10
+ vst1.32 {q8},[r1,:128]!
+ add r7,r7,r2
+ eor r2,r5,r6
+ eor r0,r4,r4,ror#5
+ add r8,r8,r12
+ vld1.32 {q8},[r14,:128]!
+ and r2,r2,r4
+ eor r12,r0,r4,ror#19
+ eor r0,r8,r8,ror#11
+ eor r2,r2,r6
+ vrev32.8 q1,q1
+ add r7,r7,r12,ror#6
+ eor r12,r8,r9
+ eor r0,r0,r8,ror#20
+ add r7,r7,r2
+ vadd.i32 q8,q8,q1
+ ldr r2,[sp,#20]
+ and r3,r3,r12
+ add r11,r11,r7
+ add r7,r7,r0,ror#2
+ eor r3,r3,r9
+ add r6,r6,r2
+ eor r2,r4,r5
+ eor r0,r11,r11,ror#5
+ add r7,r7,r3
+ and r2,r2,r11
+ eor r3,r0,r11,ror#19
+ eor r0,r7,r7,ror#11
+ eor r2,r2,r5
+ add r6,r6,r3,ror#6
+ eor r3,r7,r8
+ eor r0,r0,r7,ror#20
+ add r6,r6,r2
+ ldr r2,[sp,#24]
+ and r12,r12,r3
+ add r10,r10,r6
+ add r6,r6,r0,ror#2
+ eor r12,r12,r8
+ add r5,r5,r2
+ eor r2,r11,r4
+ eor r0,r10,r10,ror#5
+ add r6,r6,r12
+ and r2,r2,r10
+ eor r12,r0,r10,ror#19
+ eor r0,r6,r6,ror#11
+ eor r2,r2,r4
+ add r5,r5,r12,ror#6
+ eor r12,r6,r7
+ eor r0,r0,r6,ror#20
+ add r5,r5,r2
+ ldr r2,[sp,#28]
+ and r3,r3,r12
+ add r9,r9,r5
+ add r5,r5,r0,ror#2
+ eor r3,r3,r7
+ add r4,r4,r2
+ eor r2,r10,r11
+ eor r0,r9,r9,ror#5
+ add r5,r5,r3
+ and r2,r2,r9
+ eor r3,r0,r9,ror#19
+ eor r0,r5,r5,ror#11
+ eor r2,r2,r11
+ add r4,r4,r3,ror#6
+ eor r3,r5,r6
+ eor r0,r0,r5,ror#20
+ add r4,r4,r2
+ ldr r2,[sp,#32]
+ and r12,r12,r3
+ add r8,r8,r4
+ add r4,r4,r0,ror#2
+ eor r12,r12,r6
+ vst1.32 {q8},[r1,:128]!
+ add r11,r11,r2
+ eor r2,r9,r10
+ eor r0,r8,r8,ror#5
+ add r4,r4,r12
+ vld1.32 {q8},[r14,:128]!
+ and r2,r2,r8
+ eor r12,r0,r8,ror#19
+ eor r0,r4,r4,ror#11
+ eor r2,r2,r10
+ vrev32.8 q2,q2
+ add r11,r11,r12,ror#6
+ eor r12,r4,r5
+ eor r0,r0,r4,ror#20
+ add r11,r11,r2
+ vadd.i32 q8,q8,q2
+ ldr r2,[sp,#36]
+ and r3,r3,r12
+ add r7,r7,r11
+ add r11,r11,r0,ror#2
+ eor r3,r3,r5
+ add r10,r10,r2
+ eor r2,r8,r9
+ eor r0,r7,r7,ror#5
+ add r11,r11,r3
+ and r2,r2,r7
+ eor r3,r0,r7,ror#19
+ eor r0,r11,r11,ror#11
+ eor r2,r2,r9
+ add r10,r10,r3,ror#6
+ eor r3,r11,r4
+ eor r0,r0,r11,ror#20
+ add r10,r10,r2
+ ldr r2,[sp,#40]
+ and r12,r12,r3
+ add r6,r6,r10
+ add r10,r10,r0,ror#2
+ eor r12,r12,r4
+ add r9,r9,r2
+ eor r2,r7,r8
+ eor r0,r6,r6,ror#5
+ add r10,r10,r12
+ and r2,r2,r6
+ eor r12,r0,r6,ror#19
+ eor r0,r10,r10,ror#11
+ eor r2,r2,r8
+ add r9,r9,r12,ror#6
+ eor r12,r10,r11
+ eor r0,r0,r10,ror#20
+ add r9,r9,r2
+ ldr r2,[sp,#44]
+ and r3,r3,r12
+ add r5,r5,r9
+ add r9,r9,r0,ror#2
+ eor r3,r3,r11
+ add r8,r8,r2
+ eor r2,r6,r7
+ eor r0,r5,r5,ror#5
+ add r9,r9,r3
+ and r2,r2,r5
+ eor r3,r0,r5,ror#19
+ eor r0,r9,r9,ror#11
+ eor r2,r2,r7
+ add r8,r8,r3,ror#6
+ eor r3,r9,r10
+ eor r0,r0,r9,ror#20
+ add r8,r8,r2
+ ldr r2,[sp,#48]
+ and r12,r12,r3
+ add r4,r4,r8
+ add r8,r8,r0,ror#2
+ eor r12,r12,r10
+ vst1.32 {q8},[r1,:128]!
+ add r7,r7,r2
+ eor r2,r5,r6
+ eor r0,r4,r4,ror#5
+ add r8,r8,r12
+ vld1.32 {q8},[r14,:128]!
+ and r2,r2,r4
+ eor r12,r0,r4,ror#19
+ eor r0,r8,r8,ror#11
+ eor r2,r2,r6
+ vrev32.8 q3,q3
+ add r7,r7,r12,ror#6
+ eor r12,r8,r9
+ eor r0,r0,r8,ror#20
+ add r7,r7,r2
+ vadd.i32 q8,q8,q3
+ ldr r2,[sp,#52]
+ and r3,r3,r12
+ add r11,r11,r7
+ add r7,r7,r0,ror#2
+ eor r3,r3,r9
+ add r6,r6,r2
+ eor r2,r4,r5
+ eor r0,r11,r11,ror#5
+ add r7,r7,r3
+ and r2,r2,r11
+ eor r3,r0,r11,ror#19
+ eor r0,r7,r7,ror#11
+ eor r2,r2,r5
+ add r6,r6,r3,ror#6
+ eor r3,r7,r8
+ eor r0,r0,r7,ror#20
+ add r6,r6,r2
+ ldr r2,[sp,#56]
+ and r12,r12,r3
+ add r10,r10,r6
+ add r6,r6,r0,ror#2
+ eor r12,r12,r8
+ add r5,r5,r2
+ eor r2,r11,r4
+ eor r0,r10,r10,ror#5
+ add r6,r6,r12
+ and r2,r2,r10
+ eor r12,r0,r10,ror#19
+ eor r0,r6,r6,ror#11
+ eor r2,r2,r4
+ add r5,r5,r12,ror#6
+ eor r12,r6,r7
+ eor r0,r0,r6,ror#20
+ add r5,r5,r2
+ ldr r2,[sp,#60]
+ and r3,r3,r12
+ add r9,r9,r5
+ add r5,r5,r0,ror#2
+ eor r3,r3,r7
+ add r4,r4,r2
+ eor r2,r10,r11
+ eor r0,r9,r9,ror#5
+ add r5,r5,r3
+ and r2,r2,r9
+ eor r3,r0,r9,ror#19
+ eor r0,r5,r5,ror#11
+ eor r2,r2,r11
+ add r4,r4,r3,ror#6
+ eor r3,r5,r6
+ eor r0,r0,r5,ror#20
+ add r4,r4,r2
+ ldr r2,[sp,#64]
+ and r12,r12,r3
+ add r8,r8,r4
+ add r4,r4,r0,ror#2
+ eor r12,r12,r6
+ vst1.32 {q8},[r1,:128]!
+ ldr r0,[r2,#0]
+ add r4,r4,r12 @ h+=Maj(a,b,c) from the past
+ ldr r12,[r2,#4]
+ ldr r3,[r2,#8]
+ ldr r1,[r2,#12]
+ add r4,r4,r0 @ accumulate
+ ldr r0,[r2,#16]
+ add r5,r5,r12
+ ldr r12,[r2,#20]
+ add r6,r6,r3
+ ldr r3,[r2,#24]
+ add r7,r7,r1
+ ldr r1,[r2,#28]
+ add r8,r8,r0
+ str r4,[r2],#4
+ add r9,r9,r12
+ str r5,[r2],#4
+ add r10,r10,r3
+ str r6,[r2],#4
+ add r11,r11,r1
+ str r7,[r2],#4
+ stmia r2,{r8-r11}
+
+ ittte ne
+ movne r1,sp
+ ldrne r2,[sp,#0]
+ eorne r12,r12,r12
+ ldreq sp,[sp,#76] @ restore original sp
+ itt ne
+ eorne r3,r5,r6
+ bne .L_00_48
+
+ ldmia sp!,{r4-r12,pc}
+.size sha256_block_data_order_neon,.-sha256_block_data_order_neon
+#endif
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+
+# ifdef __thumb2__
+# define INST(a,b,c,d) .byte c,d|0xc,a,b
+# else
+# define INST(a,b,c,d) .byte a,b,c,d
+# endif
+
+.type sha256_block_data_order_armv8,%function
+.align 5
+sha256_block_data_order_armv8:
+.LARMv8:
+ vld1.32 {q0,q1},[r0]
+# ifdef __thumb2__
+ adr r3,.LARMv8
+ sub r3,r3,#.LARMv8-K256
+# else
+ adrl r3,K256
+# endif
+ add r2,r1,r2,lsl#6 @ len to point at the end of inp
+
+.Loop_v8:
+ vld1.8 {q8-q9},[r1]!
+ vld1.8 {q10-q11},[r1]!
+ vld1.32 {q12},[r3]!
+ vrev32.8 q8,q8
+ vrev32.8 q9,q9
+ vrev32.8 q10,q10
+ vrev32.8 q11,q11
+ vmov q14,q0 @ offload
+ vmov q15,q1
+ teq r1,r2
+ vld1.32 {q13},[r3]!
+ vadd.i32 q12,q12,q8
+ INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9
+ vmov q2,q0
+ INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
+ INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
+ INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11
+ vld1.32 {q12},[r3]!
+ vadd.i32 q13,q13,q9
+ INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10
+ vmov q2,q0
+ INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
+ INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
+ INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8
+ vld1.32 {q13},[r3]!
+ vadd.i32 q12,q12,q10
+ INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11
+ vmov q2,q0
+ INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
+ INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
+ INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9
+ vld1.32 {q12},[r3]!
+ vadd.i32 q13,q13,q11
+ INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8
+ vmov q2,q0
+ INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
+ INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
+ INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10
+ vld1.32 {q13},[r3]!
+ vadd.i32 q12,q12,q8
+ INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9
+ vmov q2,q0
+ INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
+ INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
+ INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11
+ vld1.32 {q12},[r3]!
+ vadd.i32 q13,q13,q9
+ INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10
+ vmov q2,q0
+ INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
+ INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
+ INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8
+ vld1.32 {q13},[r3]!
+ vadd.i32 q12,q12,q10
+ INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11
+ vmov q2,q0
+ INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
+ INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
+ INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9
+ vld1.32 {q12},[r3]!
+ vadd.i32 q13,q13,q11
+ INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8
+ vmov q2,q0
+ INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
+ INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
+ INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10
+ vld1.32 {q13},[r3]!
+ vadd.i32 q12,q12,q8
+ INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9
+ vmov q2,q0
+ INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
+ INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
+ INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11
+ vld1.32 {q12},[r3]!
+ vadd.i32 q13,q13,q9
+ INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10
+ vmov q2,q0
+ INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
+ INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
+ INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8
+ vld1.32 {q13},[r3]!
+ vadd.i32 q12,q12,q10
+ INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11
+ vmov q2,q0
+ INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
+ INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
+ INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9
+ vld1.32 {q12},[r3]!
+ vadd.i32 q13,q13,q11
+ INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8
+ vmov q2,q0
+ INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
+ INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
+ INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10
+ vld1.32 {q13},[r3]!
+ vadd.i32 q12,q12,q8
+ vmov q2,q0
+ INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
+ INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
+
+ vld1.32 {q12},[r3]!
+ vadd.i32 q13,q13,q9
+ vmov q2,q0
+ INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
+ INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
+
+ vld1.32 {q13},[r3]
+ vadd.i32 q12,q12,q10
+ sub r3,r3,#256-16 @ rewind
+ vmov q2,q0
+ INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
+ INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
+
+ vadd.i32 q13,q13,q11
+ vmov q2,q0
+ INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
+ INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
+
+ vadd.i32 q0,q0,q14
+ vadd.i32 q1,q1,q15
+ it ne
+ bne .Loop_v8
+
+ vst1.32 {q0,q1},[r0]
+
+ bx lr @ bx lr
+.size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
+#endif
+.asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro@openssl.org>"
+.align 2
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+.comm OPENSSL_armcap_P,4,4
+#endif
diff --git a/arch/arm/crypto/sha256_glue.c b/arch/arm/crypto/sha256_glue.c
new file mode 100644
index 000000000000..ff90ced528a1
--- /dev/null
+++ b/arch/arm/crypto/sha256_glue.c
@@ -0,0 +1,246 @@
+/*
+ * Glue code for the SHA256 Secure Hash Algorithm assembly implementation
+ * using optimized ARM assembler and NEON instructions.
+ *
+ * Copyright © 2015 Google Inc.
+ *
+ * This file is based on sha256_ssse3_glue.c:
+ * Copyright (C) 2013 Intel Corporation
+ * Author: Tim Chen <tim.c.chen@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/internal/hash.h>
+#include <linux/crypto.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/cryptohash.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <crypto/sha.h>
+#include <asm/byteorder.h>
+#include <asm/simd.h>
+#include <asm/neon.h>
+#include "sha256_glue.h"
+
+asmlinkage void sha256_block_data_order(u32 *digest, const void *data,
+ unsigned int num_blks);
+
+
+int sha256_init(struct shash_desc *desc)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+
+ sctx->state[0] = SHA256_H0;
+ sctx->state[1] = SHA256_H1;
+ sctx->state[2] = SHA256_H2;
+ sctx->state[3] = SHA256_H3;
+ sctx->state[4] = SHA256_H4;
+ sctx->state[5] = SHA256_H5;
+ sctx->state[6] = SHA256_H6;
+ sctx->state[7] = SHA256_H7;
+ sctx->count = 0;
+
+ return 0;
+}
+
+int sha224_init(struct shash_desc *desc)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+
+ sctx->state[0] = SHA224_H0;
+ sctx->state[1] = SHA224_H1;
+ sctx->state[2] = SHA224_H2;
+ sctx->state[3] = SHA224_H3;
+ sctx->state[4] = SHA224_H4;
+ sctx->state[5] = SHA224_H5;
+ sctx->state[6] = SHA224_H6;
+ sctx->state[7] = SHA224_H7;
+ sctx->count = 0;
+
+ return 0;
+}
+
+int __sha256_update(struct shash_desc *desc, const u8 *data, unsigned int len,
+ unsigned int partial)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+ unsigned int done = 0;
+
+ sctx->count += len;
+
+ if (partial) {
+ done = SHA256_BLOCK_SIZE - partial;
+ memcpy(sctx->buf + partial, data, done);
+ sha256_block_data_order(sctx->state, sctx->buf, 1);
+ }
+
+ if (len - done >= SHA256_BLOCK_SIZE) {
+ const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE;
+
+ sha256_block_data_order(sctx->state, data + done, rounds);
+ done += rounds * SHA256_BLOCK_SIZE;
+ }
+
+ memcpy(sctx->buf, data + done, len - done);
+
+ return 0;
+}
+
+int sha256_update(struct shash_desc *desc, const u8 *data, unsigned int len)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+ unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
+
+ /* Handle the fast case right here */
+ if (partial + len < SHA256_BLOCK_SIZE) {
+ sctx->count += len;
+ memcpy(sctx->buf + partial, data, len);
+
+ return 0;
+ }
+
+ return __sha256_update(desc, data, len, partial);
+}
+
+/* Add padding and return the message digest. */
+static int sha256_final(struct shash_desc *desc, u8 *out)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+ unsigned int i, index, padlen;
+ __be32 *dst = (__be32 *)out;
+ __be64 bits;
+ static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, };
+
+ /* save number of bits */
+ bits = cpu_to_be64(sctx->count << 3);
+
+ /* Pad out to 56 mod 64 and append length */
+ index = sctx->count % SHA256_BLOCK_SIZE;
+ padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56)-index);
+
+ /* We need to fill a whole block for __sha256_update */
+ if (padlen <= 56) {
+ sctx->count += padlen;
+ memcpy(sctx->buf + index, padding, padlen);
+ } else {
+ __sha256_update(desc, padding, padlen, index);
+ }
+ __sha256_update(desc, (const u8 *)&bits, sizeof(bits), 56);
+
+ /* Store state in digest */
+ for (i = 0; i < 8; i++)
+ dst[i] = cpu_to_be32(sctx->state[i]);
+
+ /* Wipe context */
+ memset(sctx, 0, sizeof(*sctx));
+
+ return 0;
+}
+
+static int sha224_final(struct shash_desc *desc, u8 *out)
+{
+ u8 D[SHA256_DIGEST_SIZE];
+
+ sha256_final(desc, D);
+
+ memcpy(out, D, SHA224_DIGEST_SIZE);
+ memzero_explicit(D, SHA256_DIGEST_SIZE);
+
+ return 0;
+}
+
+int sha256_export(struct shash_desc *desc, void *out)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+
+ memcpy(out, sctx, sizeof(*sctx));
+
+ return 0;
+}
+
+int sha256_import(struct shash_desc *desc, const void *in)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+
+ memcpy(sctx, in, sizeof(*sctx));
+
+ return 0;
+}
+
+static struct shash_alg algs[] = { {
+ .digestsize = SHA256_DIGEST_SIZE,
+ .init = sha256_init,
+ .update = sha256_update,
+ .final = sha256_final,
+ .export = sha256_export,
+ .import = sha256_import,
+ .descsize = sizeof(struct sha256_state),
+ .statesize = sizeof(struct sha256_state),
+ .base = {
+ .cra_name = "sha256",
+ .cra_driver_name = "sha256-asm",
+ .cra_priority = 150,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA256_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+}, {
+ .digestsize = SHA224_DIGEST_SIZE,
+ .init = sha224_init,
+ .update = sha256_update,
+ .final = sha224_final,
+ .export = sha256_export,
+ .import = sha256_import,
+ .descsize = sizeof(struct sha256_state),
+ .statesize = sizeof(struct sha256_state),
+ .base = {
+ .cra_name = "sha224",
+ .cra_driver_name = "sha224-asm",
+ .cra_priority = 150,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA224_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+} };
+
+static int __init sha256_mod_init(void)
+{
+ int res = crypto_register_shashes(algs, ARRAY_SIZE(algs));
+
+ if (res < 0)
+ return res;
+
+ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon()) {
+ res = crypto_register_shashes(sha256_neon_algs,
+ ARRAY_SIZE(sha256_neon_algs));
+
+ if (res < 0)
+ crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
+ }
+
+ return res;
+}
+
+static void __exit sha256_mod_fini(void)
+{
+ crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
+
+ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon())
+ crypto_unregister_shashes(sha256_neon_algs,
+ ARRAY_SIZE(sha256_neon_algs));
+}
+
+module_init(sha256_mod_init);
+module_exit(sha256_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm (ARM), including NEON");
+
+MODULE_ALIAS("sha256");
diff --git a/arch/arm/crypto/sha256_glue.h b/arch/arm/crypto/sha256_glue.h
new file mode 100644
index 000000000000..0312f4ffe8cc
--- /dev/null
+++ b/arch/arm/crypto/sha256_glue.h
@@ -0,0 +1,23 @@
+#ifndef _CRYPTO_SHA256_GLUE_H
+#define _CRYPTO_SHA256_GLUE_H
+
+#include <linux/crypto.h>
+#include <crypto/sha.h>
+
+extern struct shash_alg sha256_neon_algs[2];
+
+extern int sha256_init(struct shash_desc *desc);
+
+extern int sha224_init(struct shash_desc *desc);
+
+extern int __sha256_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len, unsigned int partial);
+
+extern int sha256_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len);
+
+extern int sha256_export(struct shash_desc *desc, void *out);
+
+extern int sha256_import(struct shash_desc *desc, const void *in);
+
+#endif /* _CRYPTO_SHA256_GLUE_H */
diff --git a/arch/arm/crypto/sha256_neon_glue.c b/arch/arm/crypto/sha256_neon_glue.c
new file mode 100644
index 000000000000..c4da10090eee
--- /dev/null
+++ b/arch/arm/crypto/sha256_neon_glue.c
@@ -0,0 +1,172 @@
+/*
+ * Glue code for the SHA256 Secure Hash Algorithm assembly implementation
+ * using NEON instructions.
+ *
+ * Copyright © 2015 Google Inc.
+ *
+ * This file is based on sha512_neon_glue.c:
+ * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/internal/hash.h>
+#include <linux/cryptohash.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <crypto/sha.h>
+#include <asm/byteorder.h>
+#include <asm/simd.h>
+#include <asm/neon.h>
+#include "sha256_glue.h"
+
+asmlinkage void sha256_block_data_order_neon(u32 *digest, const void *data,
+ unsigned int num_blks);
+
+
+static int __sha256_neon_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len, unsigned int partial)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+ unsigned int done = 0;
+
+ sctx->count += len;
+
+ if (partial) {
+ done = SHA256_BLOCK_SIZE - partial;
+ memcpy(sctx->buf + partial, data, done);
+ sha256_block_data_order_neon(sctx->state, sctx->buf, 1);
+ }
+
+ if (len - done >= SHA256_BLOCK_SIZE) {
+ const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE;
+
+ sha256_block_data_order_neon(sctx->state, data + done, rounds);
+ done += rounds * SHA256_BLOCK_SIZE;
+ }
+
+ memcpy(sctx->buf, data + done, len - done);
+
+ return 0;
+}
+
+static int sha256_neon_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+ unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
+ int res;
+
+ /* Handle the fast case right here */
+ if (partial + len < SHA256_BLOCK_SIZE) {
+ sctx->count += len;
+ memcpy(sctx->buf + partial, data, len);
+
+ return 0;
+ }
+
+ if (!may_use_simd()) {
+ res = __sha256_update(desc, data, len, partial);
+ } else {
+ kernel_neon_begin();
+ res = __sha256_neon_update(desc, data, len, partial);
+ kernel_neon_end();
+ }
+
+ return res;
+}
+
+/* Add padding and return the message digest. */
+static int sha256_neon_final(struct shash_desc *desc, u8 *out)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+ unsigned int i, index, padlen;
+ __be32 *dst = (__be32 *)out;
+ __be64 bits;
+ static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, };
+
+ /* save number of bits */
+ bits = cpu_to_be64(sctx->count << 3);
+
+ /* Pad out to 56 mod 64 and append length */
+ index = sctx->count % SHA256_BLOCK_SIZE;
+ padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56)-index);
+
+ if (!may_use_simd()) {
+ sha256_update(desc, padding, padlen);
+ sha256_update(desc, (const u8 *)&bits, sizeof(bits));
+ } else {
+ kernel_neon_begin();
+ /* We need to fill a whole block for __sha256_neon_update() */
+ if (padlen <= 56) {
+ sctx->count += padlen;
+ memcpy(sctx->buf + index, padding, padlen);
+ } else {
+ __sha256_neon_update(desc, padding, padlen, index);
+ }
+ __sha256_neon_update(desc, (const u8 *)&bits,
+ sizeof(bits), 56);
+ kernel_neon_end();
+ }
+
+ /* Store state in digest */
+ for (i = 0; i < 8; i++)
+ dst[i] = cpu_to_be32(sctx->state[i]);
+
+ /* Wipe context */
+ memzero_explicit(sctx, sizeof(*sctx));
+
+ return 0;
+}
+
+static int sha224_neon_final(struct shash_desc *desc, u8 *out)
+{
+ u8 D[SHA256_DIGEST_SIZE];
+
+ sha256_neon_final(desc, D);
+
+ memcpy(out, D, SHA224_DIGEST_SIZE);
+ memzero_explicit(D, SHA256_DIGEST_SIZE);
+
+ return 0;
+}
+
+struct shash_alg sha256_neon_algs[] = { {
+ .digestsize = SHA256_DIGEST_SIZE,
+ .init = sha256_init,
+ .update = sha256_neon_update,
+ .final = sha256_neon_final,
+ .export = sha256_export,
+ .import = sha256_import,
+ .descsize = sizeof(struct sha256_state),
+ .statesize = sizeof(struct sha256_state),
+ .base = {
+ .cra_name = "sha256",
+ .cra_driver_name = "sha256-neon",
+ .cra_priority = 250,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA256_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+}, {
+ .digestsize = SHA224_DIGEST_SIZE,
+ .init = sha224_init,
+ .update = sha256_neon_update,
+ .final = sha224_neon_final,
+ .export = sha256_export,
+ .import = sha256_import,
+ .descsize = sizeof(struct sha256_state),
+ .statesize = sizeof(struct sha256_state),
+ .base = {
+ .cra_name = "sha224",
+ .cra_driver_name = "sha224-neon",
+ .cra_priority = 250,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA224_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+} };
diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index 10e78d00a0bb..2d46862e7bef 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -487,6 +487,16 @@ int set_memory_rw(unsigned long addr, int numpages);
int set_memory_x(unsigned long addr, int numpages);
int set_memory_nx(unsigned long addr, int numpages);
+#ifdef CONFIG_DEBUG_RODATA
+void mark_rodata_ro(void);
+void set_kernel_text_rw(void);
+void set_kernel_text_ro(void);
+#else
+static inline void set_kernel_text_rw(void) { }
+static inline void set_kernel_text_ro(void) { }
+#endif
+
void flush_uprobe_xol_access(struct page *page, unsigned long uaddr,
void *kaddr, unsigned long len);
+
#endif
diff --git a/arch/arm/include/asm/fixmap.h b/arch/arm/include/asm/fixmap.h
index 74124b0d0d79..0415eae1df27 100644
--- a/arch/arm/include/asm/fixmap.h
+++ b/arch/arm/include/asm/fixmap.h
@@ -2,27 +2,24 @@
#define _ASM_FIXMAP_H
#define FIXADDR_START 0xffc00000UL
-#define FIXADDR_TOP 0xffe00000UL
-#define FIXADDR_SIZE (FIXADDR_TOP - FIXADDR_START)
+#define FIXADDR_END 0xfff00000UL
+#define FIXADDR_TOP (FIXADDR_END - PAGE_SIZE)
-#define FIX_KMAP_NR_PTES (FIXADDR_SIZE >> PAGE_SHIFT)
+#include <asm/kmap_types.h>
-#define __fix_to_virt(x) (FIXADDR_START + ((x) << PAGE_SHIFT))
-#define __virt_to_fix(x) (((x) - FIXADDR_START) >> PAGE_SHIFT)
+enum fixed_addresses {
+ FIX_KMAP_BEGIN,
+ FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_TYPE_NR * NR_CPUS) - 1,
-extern void __this_fixmap_does_not_exist(void);
+ /* Support writing RO kernel text via kprobes, jump labels, etc. */
+ FIX_TEXT_POKE0,
+ FIX_TEXT_POKE1,
-static inline unsigned long fix_to_virt(const unsigned int idx)
-{
- if (idx >= FIX_KMAP_NR_PTES)
- __this_fixmap_does_not_exist();
- return __fix_to_virt(idx);
-}
+ __end_of_fixed_addresses
+};
-static inline unsigned int virt_to_fix(const unsigned long vaddr)
-{
- BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
- return __virt_to_fix(vaddr);
-}
+void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot);
+
+#include <asm-generic/fixmap.h>
#endif
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 2ecc7d15bc09..e03714662cb6 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -67,7 +67,7 @@ test-kprobes-objs += kprobes-test-arm.o
endif
obj-$(CONFIG_OABI_COMPAT) += sys_oabi-compat.o
obj-$(CONFIG_ARM_THUMBEE) += thumbee.o
-obj-$(CONFIG_KGDB) += kgdb.o
+obj-$(CONFIG_KGDB) += kgdb.o patch.o
obj-$(CONFIG_ARM_UNWIND) += unwind.o
obj-$(CONFIG_HAVE_TCM) += tcm.o
obj-$(CONFIG_OF) += devtree.o
diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c
index af9a8a927a4e..b8c75e45a950 100644
--- a/arch/arm/kernel/ftrace.c
+++ b/arch/arm/kernel/ftrace.c
@@ -15,6 +15,7 @@
#include <linux/ftrace.h>
#include <linux/uaccess.h>
#include <linux/module.h>
+#include <linux/stop_machine.h>
#include <asm/cacheflush.h>
#include <asm/opcodes.h>
@@ -35,6 +36,22 @@
#define OLD_NOP 0xe1a00000 /* mov r0, r0 */
+static int __ftrace_modify_code(void *data)
+{
+ int *command = data;
+
+ set_kernel_text_rw();
+ ftrace_modify_all_code(*command);
+ set_kernel_text_ro();
+
+ return 0;
+}
+
+void arch_ftrace_update_code(int command)
+{
+ stop_machine(__ftrace_modify_code, &command, NULL);
+}
+
static unsigned long ftrace_nop_replace(struct dyn_ftrace *rec)
{
return rec->arch.old_mcount ? OLD_NOP : NOP;
@@ -73,6 +90,8 @@ int ftrace_arch_code_modify_prepare(void)
int ftrace_arch_code_modify_post_process(void)
{
set_all_modules_text_ro();
+ /* Make sure any TLB misses during machine stop are cleared. */
+ flush_tlb_all();
return 0;
}
diff --git a/arch/arm/kernel/jump_label.c b/arch/arm/kernel/jump_label.c
index 4ce4f789446d..afeeb9ea6f43 100644
--- a/arch/arm/kernel/jump_label.c
+++ b/arch/arm/kernel/jump_label.c
@@ -19,7 +19,7 @@ static void __arch_jump_label_transform(struct jump_entry *entry,
insn = arm_gen_nop();
if (is_static)
- __patch_text(addr, insn);
+ __patch_text_early(addr, insn);
else
patch_text(addr, insn);
}
diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c
index 241255835abb..07f5059dac5e 100644
--- a/arch/arm/kernel/kgdb.c
+++ b/arch/arm/kernel/kgdb.c
@@ -12,8 +12,12 @@
#include <linux/irq.h>
#include <linux/kdebug.h>
#include <linux/kgdb.h>
+#include <linux/uaccess.h>
+
#include <asm/traps.h>
+#include "patch.h"
+
struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] =
{
{ "r0", 4, offsetof(struct pt_regs, ARM_r0)},
@@ -248,6 +252,33 @@ void kgdb_arch_exit(void)
unregister_die_notifier(&kgdb_notifier);
}
+int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
+{
+ int err;
+
+ /* patch_text() only supports int-sized breakpoints */
+ BUILD_BUG_ON(sizeof(int) != BREAK_INSTR_SIZE);
+
+ err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr,
+ BREAK_INSTR_SIZE);
+ if (err)
+ return err;
+
+ /* Machine is already stopped, so we can use __patch_text() directly */
+ __patch_text((void *)bpt->bpt_addr,
+ *(unsigned int *)arch_kgdb_ops.gdb_bpt_instr);
+
+ return err;
+}
+
+int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
+{
+ /* Machine is already stopped, so we can use __patch_text() directly */
+ __patch_text((void *)bpt->bpt_addr, *(unsigned int *)bpt->saved_instr);
+
+ return 0;
+}
+
/*
* Register our undef instruction hooks with ARM undef core.
* We regsiter a hook specifically looking for the KGB break inst
diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c
index 8cf0996aa1a8..4423a565ef6f 100644
--- a/arch/arm/kernel/machine_kexec.c
+++ b/arch/arm/kernel/machine_kexec.c
@@ -29,6 +29,7 @@ extern unsigned long kexec_boot_atags;
static atomic_t waiting_for_crash_ipi;
+static unsigned long dt_mem;
/*
* Provide a dummy crash_notes definition while crash dump arrives to arm.
* This prevents breakage of crash_notes attribute in kernel/ksysfs.c.
@@ -64,7 +65,7 @@ int machine_kexec_prepare(struct kimage *image)
return err;
if (be32_to_cpu(header) == OF_DT_HEADER)
- kexec_boot_atags = current_segment->mem;
+ dt_mem = current_segment->mem;
}
return 0;
}
@@ -163,12 +164,12 @@ void machine_kexec(struct kimage *image)
reboot_code_buffer = page_address(image->control_code_page);
/* Prepare parameters for reboot_code_buffer*/
+ set_kernel_text_rw();
kexec_start_address = image->start;
kexec_indirection_page = page_list;
kexec_mach_type = machine_arch_type;
- if (!kexec_boot_atags)
- kexec_boot_atags = image->start - KEXEC_ARM_ZIMAGE_OFFSET + KEXEC_ARM_ATAGS_OFFSET;
-
+ kexec_boot_atags = dt_mem ?: image->start - KEXEC_ARM_ZIMAGE_OFFSET
+ + KEXEC_ARM_ATAGS_OFFSET;
/* copy our kernel relocation code to the control code page */
reboot_entry = fncpy(reboot_code_buffer,
diff --git a/arch/arm/kernel/patch.c b/arch/arm/kernel/patch.c
index 07314af47733..5038960e3c55 100644
--- a/arch/arm/kernel/patch.c
+++ b/arch/arm/kernel/patch.c
@@ -1,8 +1,11 @@
#include <linux/kernel.h>
+#include <linux/spinlock.h>
#include <linux/kprobes.h>
+#include <linux/mm.h>
#include <linux/stop_machine.h>
#include <asm/cacheflush.h>
+#include <asm/fixmap.h>
#include <asm/smp_plat.h>
#include <asm/opcodes.h>
@@ -13,21 +16,77 @@ struct patch {
unsigned int insn;
};
-void __kprobes __patch_text(void *addr, unsigned int insn)
+static DEFINE_SPINLOCK(patch_lock);
+
+static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags)
+ __acquires(&patch_lock)
+{
+ unsigned int uintaddr = (uintptr_t) addr;
+ bool module = !core_kernel_text(uintaddr);
+ struct page *page;
+
+ if (module && IS_ENABLED(CONFIG_DEBUG_SET_MODULE_RONX))
+ page = vmalloc_to_page(addr);
+ else if (!module && IS_ENABLED(CONFIG_DEBUG_RODATA))
+ page = virt_to_page(addr);
+ else
+ return addr;
+
+ if (flags)
+ spin_lock_irqsave(&patch_lock, *flags);
+ else
+ __acquire(&patch_lock);
+
+ set_fixmap(fixmap, page_to_phys(page));
+
+ return (void *) (__fix_to_virt(fixmap) + (uintaddr & ~PAGE_MASK));
+}
+
+static void __kprobes patch_unmap(int fixmap, unsigned long *flags)
+ __releases(&patch_lock)
+{
+ clear_fixmap(fixmap);
+
+ if (flags)
+ spin_unlock_irqrestore(&patch_lock, *flags);
+ else
+ __release(&patch_lock);
+}
+
+void __kprobes __patch_text_real(void *addr, unsigned int insn, bool remap)
{
bool thumb2 = IS_ENABLED(CONFIG_THUMB2_KERNEL);
+ unsigned int uintaddr = (uintptr_t) addr;
+ bool twopage = false;
+ unsigned long flags;
+ void *waddr = addr;
int size;
+ if (remap)
+ waddr = patch_map(addr, FIX_TEXT_POKE0, &flags);
+ else
+ __acquire(&patch_lock);
+
if (thumb2 && __opcode_is_thumb16(insn)) {
- *(u16 *)addr = __opcode_to_mem_thumb16(insn);
+ *(u16 *)waddr = __opcode_to_mem_thumb16(insn);
size = sizeof(u16);
- } else if (thumb2 && ((uintptr_t)addr & 2)) {
+ } else if (thumb2 && (uintaddr & 2)) {
u16 first = __opcode_thumb32_first(insn);
u16 second = __opcode_thumb32_second(insn);
- u16 *addrh = addr;
+ u16 *addrh0 = waddr;
+ u16 *addrh1 = waddr + 2;
+
+ twopage = (uintaddr & ~PAGE_MASK) == PAGE_SIZE - 2;
+ if (twopage && remap)
+ addrh1 = patch_map(addr + 2, FIX_TEXT_POKE1, NULL);
+
+ *addrh0 = __opcode_to_mem_thumb16(first);
+ *addrh1 = __opcode_to_mem_thumb16(second);
- addrh[0] = __opcode_to_mem_thumb16(first);
- addrh[1] = __opcode_to_mem_thumb16(second);
+ if (twopage && addrh1 != addr + 2) {
+ flush_kernel_vmap_range(addrh1, 2);
+ patch_unmap(FIX_TEXT_POKE1, NULL);
+ }
size = sizeof(u32);
} else {
@@ -36,10 +95,16 @@ void __kprobes __patch_text(void *addr, unsigned int insn)
else
insn = __opcode_to_mem_arm(insn);
- *(u32 *)addr = insn;
+ *(u32 *)waddr = insn;
size = sizeof(u32);
}
+ if (waddr != addr) {
+ flush_kernel_vmap_range(waddr, twopage ? size / 2 : size);
+ patch_unmap(FIX_TEXT_POKE0, &flags);
+ } else
+ __release(&patch_lock);
+
flush_icache_range((uintptr_t)(addr),
(uintptr_t)(addr) + size);
}
@@ -60,16 +125,5 @@ void __kprobes patch_text(void *addr, unsigned int insn)
.insn = insn,
};
- if (cache_ops_need_broadcast()) {
- stop_machine(patch_text_stop_machine, &patch, cpu_online_mask);
- } else {
- bool straddles_word = IS_ENABLED(CONFIG_THUMB2_KERNEL)
- && __opcode_is_thumb32(insn)
- && ((uintptr_t)addr & 2);
-
- if (straddles_word)
- stop_machine(patch_text_stop_machine, &patch, NULL);
- else
- __patch_text(addr, insn);
- }
+ stop_machine(patch_text_stop_machine, &patch, NULL);
}
diff --git a/arch/arm/kernel/patch.h b/arch/arm/kernel/patch.h
index b4731f2dac38..77e054c2f6cd 100644
--- a/arch/arm/kernel/patch.h
+++ b/arch/arm/kernel/patch.h
@@ -2,6 +2,16 @@
#define _ARM_KERNEL_PATCH_H
void patch_text(void *addr, unsigned int insn);
-void __patch_text(void *addr, unsigned int insn);
+void __patch_text_real(void *addr, unsigned int insn, bool remap);
+
+static inline void __patch_text(void *addr, unsigned int insn)
+{
+ __patch_text_real(addr, insn, true);
+}
+
+static inline void __patch_text_early(void *addr, unsigned int insn)
+{
+ __patch_text_real(addr, insn, false);
+}
#endif
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 8e95aa47457a..3afcb6c2cf06 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -8,6 +8,9 @@
#include <asm/thread_info.h>
#include <asm/memory.h>
#include <asm/page.h>
+#ifdef CONFIG_ARM_KERNMEM_PERMS
+#include <asm/pgtable.h>
+#endif
#define PROC_INFO \
. = ALIGN(4); \
@@ -90,6 +93,11 @@ SECTIONS
_text = .;
HEAD_TEXT
}
+
+#ifdef CONFIG_ARM_KERNMEM_PERMS
+ . = ALIGN(1<<SECTION_SHIFT);
+#endif
+
.text : { /* Real text segment */
_stext = .; /* Text and read-only data */
__exception_text_start = .;
@@ -112,6 +120,9 @@ SECTIONS
ARM_CPU_KEEP(PROC_INFO)
}
+#ifdef CONFIG_DEBUG_RODATA
+ . = ALIGN(1<<SECTION_SHIFT);
+#endif
RO_DATA(PAGE_SIZE)
. = ALIGN(4);
@@ -145,7 +156,11 @@ SECTIONS
_etext = .; /* End of text and rodata section */
#ifndef CONFIG_XIP_KERNEL
+# ifdef CONFIG_ARM_KERNMEM_PERMS
+ . = ALIGN(1<<SECTION_SHIFT);
+# else
. = ALIGN(PAGE_SIZE);
+# endif
__init_begin = .;
#endif
/*
@@ -219,8 +234,12 @@ SECTIONS
__data_loc = ALIGN(4); /* location in binary */
. = PAGE_OFFSET + TEXT_OFFSET;
#else
- . = ALIGN(THREAD_SIZE);
__init_end = .;
+#ifdef CONFIG_ARM_KERNMEM_PERMS
+ . = ALIGN(1<<SECTION_SHIFT);
+#else
+ . = ALIGN(THREAD_SIZE);
+#endif
__data_loc = .;
#endif
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 7eb94e6fc376..bc219b303bc7 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -1009,3 +1009,24 @@ config ARCH_SUPPORTS_BIG_ENDIAN
help
This option specifies the architecture can support big endian
operation.
+
+config ARM_KERNMEM_PERMS
+ bool "Restrict kernel memory permissions"
+ help
+ If this is set, kernel memory other than kernel text (and rodata)
+ will be made non-executable. The tradeoff is that each region is
+ padded to section-size (1MiB) boundaries (because their permissions
+ are different and splitting the 1M pages into 4K ones causes TLB
+ performance problems), wasting memory.
+
+config DEBUG_RODATA
+ bool "Make kernel text and rodata read-only"
+ depends on ARM_KERNMEM_PERMS
+ default y
+ help
+ If this is set, kernel text and rodata will be made read-only. This
+ is to help catch accidental or malicious attempts to change the
+ kernel's executable code. Additionally splits rodata from kernel
+ text so it can be made explicitly non-executable. This creates
+ another section-size padded region, so it can waste more memory
+ space while gaining the read-only protections.
diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c
index e17ed00828d7..b98895d9fe57 100644
--- a/arch/arm/mm/highmem.c
+++ b/arch/arm/mm/highmem.c
@@ -18,19 +18,20 @@
#include <asm/tlbflush.h>
#include "mm.h"
-pte_t *fixmap_page_table;
-
static inline void set_fixmap_pte(int idx, pte_t pte)
{
unsigned long vaddr = __fix_to_virt(idx);
- set_pte_ext(fixmap_page_table + idx, pte, 0);
+ pte_t *ptep = pte_offset_kernel(pmd_off_k(vaddr), vaddr);
+
+ set_pte_ext(ptep, pte, 0);
local_flush_tlb_kernel_page(vaddr);
}
static inline pte_t get_fixmap_pte(unsigned long vaddr)
{
- unsigned long idx = __virt_to_fix(vaddr);
- return *(fixmap_page_table + idx);
+ pte_t *ptep = pte_offset_kernel(pmd_off_k(vaddr), vaddr);
+
+ return *ptep;
}
void *kmap(struct page *page)
@@ -84,7 +85,7 @@ void *kmap_atomic(struct page *page)
* With debugging enabled, kunmap_atomic forces that entry to 0.
* Make sure it was indeed properly unmapped.
*/
- BUG_ON(!pte_none(*(fixmap_page_table + idx)));
+ BUG_ON(!pte_none(get_fixmap_pte(vaddr)));
#endif
/*
* When debugging is off, kunmap_atomic leaves the previous mapping
@@ -137,7 +138,7 @@ void *kmap_atomic_pfn(unsigned long pfn)
idx = type + KM_TYPE_NR * smp_processor_id();
vaddr = __fix_to_virt(idx);
#ifdef CONFIG_DEBUG_HIGHMEM
- BUG_ON(!pte_none(*(fixmap_page_table + idx)));
+ BUG_ON(!pte_none(get_fixmap_pte(vaddr)));
#endif
set_fixmap_pte(idx, pfn_pte(pfn, kmap_prot));
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 9481f85c56e6..4fe8ecf6a520 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -29,6 +29,7 @@
#include <asm/prom.h>
#include <asm/sections.h>
#include <asm/setup.h>
+#include <asm/system_info.h>
#include <asm/tlb.h>
#include <asm/fixmap.h>
@@ -570,7 +571,7 @@ void __init mem_init(void)
MLK(DTCM_OFFSET, (unsigned long) dtcm_end),
MLK(ITCM_OFFSET, (unsigned long) itcm_end),
#endif
- MLK(FIXADDR_START, FIXADDR_TOP),
+ MLK(FIXADDR_START, FIXADDR_END),
MLM(VMALLOC_START, VMALLOC_END),
MLM(PAGE_OFFSET, (unsigned long)high_memory),
#ifdef CONFIG_HIGHMEM
@@ -615,7 +616,145 @@ void __init mem_init(void)
}
}
-void free_initmem(void)
+#ifdef CONFIG_ARM_KERNMEM_PERMS
+struct section_perm {
+ unsigned long start;
+ unsigned long end;
+ pmdval_t mask;
+ pmdval_t prot;
+ pmdval_t clear;
+};
+
+static struct section_perm nx_perms[] = {
+ /* Make pages tables, etc before _stext RW (set NX). */
+ {
+ .start = PAGE_OFFSET,
+ .end = (unsigned long)_stext,
+ .mask = ~PMD_SECT_XN,
+ .prot = PMD_SECT_XN,
+ },
+ /* Make init RW (set NX). */
+ {
+ .start = (unsigned long)__init_begin,
+ .end = (unsigned long)_sdata,
+ .mask = ~PMD_SECT_XN,
+ .prot = PMD_SECT_XN,
+ },
+#ifdef CONFIG_DEBUG_RODATA
+ /* Make rodata NX (set RO in ro_perms below). */
+ {
+ .start = (unsigned long)__start_rodata,
+ .end = (unsigned long)__init_begin,
+ .mask = ~PMD_SECT_XN,
+ .prot = PMD_SECT_XN,
+ },
+#endif
+};
+
+#ifdef CONFIG_DEBUG_RODATA
+static struct section_perm ro_perms[] = {
+ /* Make kernel code and rodata RX (set RO). */
+ {
+ .start = (unsigned long)_stext,
+ .end = (unsigned long)__init_begin,
+#ifdef CONFIG_ARM_LPAE
+ .mask = ~PMD_SECT_RDONLY,
+ .prot = PMD_SECT_RDONLY,
+#else
+ .mask = ~(PMD_SECT_APX | PMD_SECT_AP_WRITE),
+ .prot = PMD_SECT_APX | PMD_SECT_AP_WRITE,
+ .clear = PMD_SECT_AP_WRITE,
+#endif
+ },
+};
+#endif
+
+/*
+ * Updates section permissions only for the current mm (sections are
+ * copied into each mm). During startup, this is the init_mm. Is only
+ * safe to be called with preemption disabled, as under stop_machine().
+ */
+static inline void section_update(unsigned long addr, pmdval_t mask,
+ pmdval_t prot)
+{
+ struct mm_struct *mm;
+ pmd_t *pmd;
+
+ mm = current->active_mm;
+ pmd = pmd_offset(pud_offset(pgd_offset(mm, addr), addr), addr);
+
+#ifdef CONFIG_ARM_LPAE
+ pmd[0] = __pmd((pmd_val(pmd[0]) & mask) | prot);
+#else
+ if (addr & SECTION_SIZE)
+ pmd[1] = __pmd((pmd_val(pmd[1]) & mask) | prot);
+ else
+ pmd[0] = __pmd((pmd_val(pmd[0]) & mask) | prot);
+#endif
+ flush_pmd_entry(pmd);
+ local_flush_tlb_kernel_range(addr, addr + SECTION_SIZE);
+}
+
+/* Make sure extended page tables are in use. */
+static inline bool arch_has_strict_perms(void)
+{
+ if (cpu_architecture() < CPU_ARCH_ARMv6)
+ return false;
+
+ return !!(get_cr() & CR_XP);
+}
+
+#define set_section_perms(perms, field) { \
+ size_t i; \
+ unsigned long addr; \
+ \
+ if (!arch_has_strict_perms()) \
+ return; \
+ \
+ for (i = 0; i < ARRAY_SIZE(perms); i++) { \
+ if (!IS_ALIGNED(perms[i].start, SECTION_SIZE) || \
+ !IS_ALIGNED(perms[i].end, SECTION_SIZE)) { \
+ pr_err("BUG: section %lx-%lx not aligned to %lx\n", \
+ perms[i].start, perms[i].end, \
+ SECTION_SIZE); \
+ continue; \
+ } \
+ \
+ for (addr = perms[i].start; \
+ addr < perms[i].end; \
+ addr += SECTION_SIZE) \
+ section_update(addr, perms[i].mask, \
+ perms[i].field); \
+ } \
+}
+
+static inline void fix_kernmem_perms(void)
+{
+ set_section_perms(nx_perms, prot);
+}
+
+#ifdef CONFIG_DEBUG_RODATA
+void mark_rodata_ro(void)
+{
+ set_section_perms(ro_perms, prot);
+}
+
+void set_kernel_text_rw(void)
+{
+ set_section_perms(ro_perms, clear);
+}
+
+void set_kernel_text_ro(void)
+{
+ set_section_perms(ro_perms, prot);
+}
+#endif /* CONFIG_DEBUG_RODATA */
+
+#else
+static inline void fix_kernmem_perms(void) { }
+#endif /* CONFIG_ARM_KERNMEM_PERMS */
+
+void free_tcmmem(void)
{
#ifdef CONFIG_HAVE_TCM
extern char __tcm_start, __tcm_end;
@@ -623,6 +762,12 @@ void free_initmem(void)
poison_init_mem(&__tcm_start, &__tcm_end - &__tcm_start);
free_reserved_area(&__tcm_start, &__tcm_end, -1, "TCM link");
#endif
+}
+
+void free_initmem(void)
+{
+ fix_kernmem_perms();
+ free_tcmmem();
poison_init_mem(__init_begin, __init_end - __init_begin);
if (!machine_is_integrator() && !machine_is_cintegrator())
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index fb9d305c874b..d7c0e974695c 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -22,6 +22,7 @@
#include <asm/cputype.h>
#include <asm/sections.h>
#include <asm/cachetype.h>
+#include <asm/fixmap.h>
#include <asm/sections.h>
#include <asm/setup.h>
#include <asm/smp_plat.h>
@@ -393,6 +394,29 @@ SET_MEMORY_FN(x, pte_set_x)
SET_MEMORY_FN(nx, pte_set_nx)
/*
+ * To avoid TLB flush broadcasts, this uses local_flush_tlb_kernel_range().
+ * As a result, this can only be called with preemption disabled, as under
+ * stop_machine().
+ */
+void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot)
+{
+ unsigned long vaddr = __fix_to_virt(idx);
+ pte_t *pte = pte_offset_kernel(pmd_off_k(vaddr), vaddr);
+
+ /* Make sure fixmap region does not exceed available allocation. */
+ BUILD_BUG_ON(FIXADDR_START + (__end_of_fixed_addresses * PAGE_SIZE) >
+ FIXADDR_END);
+ BUG_ON(idx >= __end_of_fixed_addresses);
+
+ if (pgprot_val(prot))
+ set_pte_at(NULL, vaddr, pte,
+ pfn_pte(phys >> PAGE_SHIFT, prot));
+ else
+ pte_clear(NULL, vaddr, pte);
+ local_flush_tlb_kernel_range(vaddr, vaddr + PAGE_SIZE);
+}
+
+/*
* Adjust the PMD section entries according to the CPU in use.
*/
static void __init build_mem_type_table(void)
@@ -1326,10 +1350,10 @@ static void __init kmap_init(void)
#ifdef CONFIG_HIGHMEM
pkmap_page_table = early_pte_alloc(pmd_off_k(PKMAP_BASE),
PKMAP_BASE, _PAGE_KERNEL_TABLE);
-
- fixmap_page_table = early_pte_alloc(pmd_off_k(FIXADDR_START),
- FIXADDR_START, _PAGE_KERNEL_TABLE);
#endif
+
+ early_pte_alloc(pmd_off_k(FIXADDR_START), FIXADDR_START,
+ _PAGE_KERNEL_TABLE);
}
static void __init map_lowmem(void)
@@ -1349,13 +1373,20 @@ static void __init map_lowmem(void)
if (start >= end)
break;
- if (end < kernel_x_start || start >= kernel_x_end) {
+ if (end < kernel_x_start) {
map.pfn = __phys_to_pfn(start);
map.virtual = __phys_to_virt(start);
map.length = end - start;
map.type = MT_MEMORY_RWX;
create_mapping(&map);
+ } else if (start >= kernel_x_end) {
+ map.pfn = __phys_to_pfn(start);
+ map.virtual = __phys_to_virt(start);
+ map.length = end - start;
+ map.type = MT_MEMORY_RW;
+
+ create_mapping(&map);
} else {
/* This better cover the entire kernel */
if (start < kernel_x_start) {
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 43fe1434f34e..7b03c24c85d8 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -649,6 +649,17 @@ config BUILD_ARM64_APPENDED_DTB_IMAGE_NAMES
Space separated list of names of dtbs to append when
building a concatenated Image.gz-dtb.
+config DMI
+ bool "Enable support for SMBIOS (DMI) tables"
+ depends on EFI
+ default y
+ help
+ This enables SMBIOS/DMI feature for systems.
+
+ This option is only useful on systems that have UEFI firmware.
+ However, even with this option, the resultant kernel should
+ continue to boot on existing non-UEFI platforms.
+
endmenu
menu "Userspace binary formats"
diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug
index 8dd3a551c170..d6285ef9b5f9 100644
--- a/arch/arm64/Kconfig.debug
+++ b/arch/arm64/Kconfig.debug
@@ -6,6 +6,18 @@ config FRAME_POINTER
bool
default y
+config ARM64_PTDUMP
+ bool "Export kernel pagetable layout to userspace via debugfs"
+ depends on DEBUG_KERNEL
+ select DEBUG_FS
+ help
+ Say Y here if you want to show the kernel pagetable layout in a
+ debugfs file. This information is only useful for kernel developers
+ who are working in architecture specific areas of the kernel.
+ It is probably not a good idea to enable this feature in a production
+ kernel.
+ If in doubt, say "N"
+
config STRICT_DEVMEM
bool "Filter access to /dev/mem"
depends on MMU
@@ -54,6 +66,29 @@ config DEBUG_SET_MODULE_RONX
against certain classes of kernel exploits.
If in doubt, say "N".
+config DEBUG_RODATA
+ bool "Make kernel text and rodata read-only"
+ help
+ If this is set, kernel text and rodata will be made read-only. This
+ is to help catch accidental or malicious attempts to change the
+ kernel's executable code. Additionally splits rodata from kernel
+ text so it can be made explicitly non-executable.
+
+ If in doubt, say Y
+
+config DEBUG_ALIGN_RODATA
+ depends on DEBUG_RODATA && !ARM64_64K_PAGES
+ bool "Align linker sections up to SECTION_SIZE"
+ help
+ If this option is enabled, sections that may potentially be marked as
+ read only or non-executable will be aligned up to the section size of
+ the kernel. This prevents sections from being split into pages and
+ avoids a potential TLB penalty. The downside is an increase in
+ alignment and potentially wasted space. Turn on this option if
+ performance is more important than memory pressure.
+
+ If in doubt, say N
+
source "drivers/hwtracing/coresight/Kconfig"
endmenu
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 689b6379188c..81a5e4df8fec 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -152,4 +152,9 @@ int set_memory_ro(unsigned long addr, int numpages);
int set_memory_rw(unsigned long addr, int numpages);
int set_memory_x(unsigned long addr, int numpages);
int set_memory_nx(unsigned long addr, int numpages);
+
+#ifdef CONFIG_DEBUG_RODATA
+void mark_rodata_ro(void);
+#endif
+
#endif
diff --git a/arch/arm64/include/asm/dmi.h b/arch/arm64/include/asm/dmi.h
new file mode 100644
index 000000000000..69d37d87b159
--- /dev/null
+++ b/arch/arm64/include/asm/dmi.h
@@ -0,0 +1,31 @@
+/*
+ * arch/arm64/include/asm/dmi.h
+ *
+ * Copyright (C) 2013 Linaro Limited.
+ * Written by: Yi Li (yi.li@linaro.org)
+ *
+ * based on arch/ia64/include/asm/dmi.h
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#ifndef __ASM_DMI_H
+#define __ASM_DMI_H
+
+#include <linux/io.h>
+#include <linux/slab.h>
+
+/*
+ * According to section 2.3.6 of the UEFI spec, the firmware should not
+ * request a virtual mapping for configuration tables such as SMBIOS.
+ * This means we have to map them before use.
+ */
+#define dmi_early_remap(x, l) ioremap_cache(x, l)
+#define dmi_early_unmap(x, l) iounmap(x)
+#define dmi_remap(x, l) ioremap_cache(x, l)
+#define dmi_unmap(x) iounmap(x)
+#define dmi_alloc(l) kzalloc(l, GFP_KERNEL)
+
+#endif
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index a34fd3b12e2b..7baf2cc04e1e 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -6,29 +6,35 @@
#ifdef CONFIG_EFI
extern void efi_init(void);
-extern void efi_idmap_init(void);
+extern void efi_virtmap_init(void);
#else
#define efi_init()
-#define efi_idmap_init()
+#define efi_virtmap_init()
#endif
#define efi_call_virt(f, ...) \
({ \
- efi_##f##_t *__f = efi.systab->runtime->f; \
+ efi_##f##_t *__f; \
efi_status_t __s; \
\
kernel_neon_begin(); \
+ efi_virtmap_load(); \
+ __f = efi.systab->runtime->f; \
__s = __f(__VA_ARGS__); \
+ efi_virtmap_unload(); \
kernel_neon_end(); \
__s; \
})
#define __efi_call_virt(f, ...) \
({ \
- efi_##f##_t *__f = efi.systab->runtime->f; \
+ efi_##f##_t *__f; \
\
kernel_neon_begin(); \
+ efi_virtmap_load(); \
+ __f = efi.systab->runtime->f; \
__f(__VA_ARGS__); \
+ efi_virtmap_unload(); \
kernel_neon_end(); \
})
@@ -44,4 +50,28 @@ extern void efi_idmap_init(void);
#define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__)
+#define EFI_ALLOC_ALIGN SZ_64K
+
+/*
+ * On ARM systems, virtually remapped UEFI runtime services are set up in three
+ * distinct stages:
+ * - The stub retrieves the final version of the memory map from UEFI, populates
+ * the virt_addr fields and calls the SetVirtualAddressMap() [SVAM] runtime
+ * service to communicate the new mapping to the firmware (Note that the new
+ * mapping is not live at this time)
+ * - During early boot, the page tables are allocated and populated based on the
+ * virt_addr fields in the memory map, but only if all descriptors with the
+ * EFI_MEMORY_RUNTIME attribute have a non-zero value for virt_addr. If this
+ * succeeds, the EFI_VIRTMAP flag is set to indicate that the virtual mappings
+ * have been installed successfully.
+ * - During an early initcall(), the UEFI Runtime Services are enabled and the
+ * EFI_RUNTIME_SERVICES bit set if some conditions are met, i.e., we need a
+ * non-early mapping of the UEFI system table, and we need to have the virtmap
+ * installed.
+ */
+#define EFI_VIRTMAP EFI_ARCH_1
+
+void efi_virtmap_load(void);
+void efi_virtmap_unload(void);
+
#endif /* _ASM_EFI_H */
diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h
index 5f7bfe6df723..defa0ff98250 100644
--- a/arch/arm64/include/asm/fixmap.h
+++ b/arch/arm64/include/asm/fixmap.h
@@ -31,6 +31,7 @@
*
*/
enum fixed_addresses {
+ FIX_HOLE,
FIX_EARLYCON_MEM_BASE,
__end_of_permanent_fixed_addresses,
@@ -48,6 +49,7 @@ enum fixed_addresses {
FIX_BTMAP_END = __end_of_permanent_fixed_addresses,
FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1,
+ FIX_TEXT_POKE0,
__end_of_fixed_addresses
};
@@ -56,10 +58,11 @@ enum fixed_addresses {
#define FIXMAP_PAGE_IO __pgprot(PROT_DEVICE_nGnRE)
-extern void __early_set_fixmap(enum fixed_addresses idx,
- phys_addr_t phys, pgprot_t flags);
+void __init early_fixmap_init(void);
-#define __set_fixmap __early_set_fixmap
+#define __early_set_fixmap __set_fixmap
+
+extern void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot);
#include <asm-generic/fixmap.h>
diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
index e1f7ecdde11f..1eebf5bb0b58 100644
--- a/arch/arm64/include/asm/irq.h
+++ b/arch/arm64/include/asm/irq.h
@@ -3,7 +3,6 @@
#include <asm-generic/irq.h>
-extern void (*handle_arch_irq)(struct pt_regs *);
extern void migrate_irqs(void);
extern void set_handle_irq(void (*handle_irq)(struct pt_regs *));
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index c2f006c48bdb..3d311761e3c2 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -31,7 +31,8 @@ extern void paging_init(void);
extern void setup_mm_for_reboot(void);
extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt);
extern void init_mem_pgprot(void);
-/* create an identity mapping for memory (or io if map_io is true) */
-extern void create_id_mapping(phys_addr_t addr, phys_addr_t size, int map_io);
+extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
+ unsigned long virt, phys_addr_t size,
+ pgprot_t prot);
#endif
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 41a43bf26492..1abe4d08725b 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -264,6 +264,11 @@ static inline pmd_t pte_pmd(pte_t pte)
return __pmd(pte_val(pte));
}
+static inline pgprot_t mk_sect_prot(pgprot_t prot)
+{
+ return __pgprot(pgprot_val(prot) & ~PTE_TABLE_BIT);
+}
+
/*
* THP definitions.
*/
diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S
index d18a44940968..8ce9b0577442 100644
--- a/arch/arm64/kernel/efi-entry.S
+++ b/arch/arm64/kernel/efi-entry.S
@@ -61,7 +61,8 @@ ENTRY(efi_stub_entry)
*/
mov x20, x0 // DTB address
ldr x0, [sp, #16] // relocated _text address
- mov x21, x0
+ ldr x21, =stext_offset
+ add x21, x0, x21
/*
* Calculate size of the kernel Image (same for original and copy).
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index 1d85a7c5a850..a98415b5979c 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -11,24 +11,31 @@
*
*/
+#include <linux/atomic.h>
+#include <linux/dmi.h>
#include <linux/efi.h>
#include <linux/export.h>
#include <linux/memblock.h>
+#include <linux/mm_types.h>
#include <linux/bootmem.h>
#include <linux/of.h>
#include <linux/of_fdt.h>
+#include <linux/preempt.h>
+#include <linux/rbtree.h>
+#include <linux/rwsem.h>
#include <linux/sched.h>
#include <linux/slab.h>
+#include <linux/spinlock.h>
#include <asm/cacheflush.h>
#include <asm/efi.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
+#include <asm/mmu.h>
+#include <asm/pgtable.h>
struct efi_memory_map memmap;
-static efi_runtime_services_t *runtime;
-
static u64 efi_system_table;
static int uefi_debug __initdata;
@@ -47,30 +54,33 @@ static int __init is_normal_ram(efi_memory_desc_t *md)
return 0;
}
-static void __init efi_setup_idmap(void)
+/*
+ * Translate a EFI virtual address into a physical address: this is necessary,
+ * as some data members of the EFI system table are virtually remapped after
+ * SetVirtualAddressMap() has been called.
+ */
+static phys_addr_t efi_to_phys(unsigned long addr)
{
- struct memblock_region *r;
efi_memory_desc_t *md;
- u64 paddr, npages, size;
-
- for_each_memblock(memory, r)
- create_id_mapping(r->base, r->size, 0);
- /* map runtime io spaces */
for_each_efi_memory_desc(&memmap, md) {
- if (!(md->attribute & EFI_MEMORY_RUNTIME) || is_normal_ram(md))
+ if (!(md->attribute & EFI_MEMORY_RUNTIME))
continue;
- paddr = md->phys_addr;
- npages = md->num_pages;
- memrange_efi_to_native(&paddr, &npages);
- size = npages << PAGE_SHIFT;
- create_id_mapping(paddr, size, 1);
+ if (md->virt_addr == 0)
+ /* no virtual mapping has been installed by the stub */
+ break;
+ if (md->virt_addr <= addr &&
+ (addr - md->virt_addr) < (md->num_pages << EFI_PAGE_SHIFT))
+ return md->phys_addr + addr - md->virt_addr;
}
+ return addr;
}
static int __init uefi_init(void)
{
efi_char16_t *c16;
+ void *config_tables;
+ u64 table_size;
char vendor[100] = "unknown";
int i, retval;
@@ -98,7 +108,7 @@ static int __init uefi_init(void)
efi.systab->hdr.revision & 0xffff);
/* Show what we know for posterity */
- c16 = early_memremap(efi.systab->fw_vendor,
+ c16 = early_memremap(efi_to_phys(efi.systab->fw_vendor),
sizeof(vendor));
if (c16) {
for (i = 0; i < (int) sizeof(vendor) - 1 && *c16; ++i)
@@ -111,10 +121,14 @@ static int __init uefi_init(void)
efi.systab->hdr.revision >> 16,
efi.systab->hdr.revision & 0xffff, vendor);
- retval = efi_config_init(NULL);
- if (retval == 0)
- set_bit(EFI_CONFIG_TABLES, &efi.flags);
+ table_size = sizeof(efi_config_table_64_t) * efi.systab->nr_tables;
+ config_tables = early_memremap(efi_to_phys(efi.systab->tables),
+ table_size);
+
+ retval = efi_config_parse_tables(config_tables, efi.systab->nr_tables,
+ sizeof(efi_config_table_64_t), NULL);
+ early_memunmap(config_tables, table_size);
out:
early_memunmap(efi.systab, sizeof(efi_system_table_t));
return retval;
@@ -125,17 +139,17 @@ out:
*/
static __init int is_reserve_region(efi_memory_desc_t *md)
{
- if (!is_normal_ram(md))
+ switch (md->type) {
+ case EFI_LOADER_CODE:
+ case EFI_LOADER_DATA:
+ case EFI_BOOT_SERVICES_CODE:
+ case EFI_BOOT_SERVICES_DATA:
+ case EFI_CONVENTIONAL_MEMORY:
return 0;
-
- if (md->attribute & EFI_MEMORY_RUNTIME)
- return 1;
-
- if (md->type == EFI_ACPI_RECLAIM_MEMORY ||
- md->type == EFI_RESERVED_TYPE)
- return 1;
-
- return 0;
+ default:
+ break;
+ }
+ return is_normal_ram(md);
}
static __init void reserve_regions(void)
@@ -164,9 +178,7 @@ static __init void reserve_regions(void)
if (is_normal_ram(md))
early_init_dt_add_memory_arch(paddr, size);
- if (is_reserve_region(md) ||
- md->type == EFI_BOOT_SERVICES_CODE ||
- md->type == EFI_BOOT_SERVICES_DATA) {
+ if (is_reserve_region(md)) {
memblock_reserve(paddr, size);
if (uefi_debug)
pr_cont("*");
@@ -179,123 +191,6 @@ static __init void reserve_regions(void)
set_bit(EFI_MEMMAP, &efi.flags);
}
-
-static u64 __init free_one_region(u64 start, u64 end)
-{
- u64 size = end - start;
-
- if (uefi_debug)
- pr_info(" EFI freeing: 0x%012llx-0x%012llx\n", start, end - 1);
-
- free_bootmem_late(start, size);
- return size;
-}
-
-static u64 __init free_region(u64 start, u64 end)
-{
- u64 map_start, map_end, total = 0;
-
- if (end <= start)
- return total;
-
- map_start = (u64)memmap.phys_map;
- map_end = PAGE_ALIGN(map_start + (memmap.map_end - memmap.map));
- map_start &= PAGE_MASK;
-
- if (start < map_end && end > map_start) {
- /* region overlaps UEFI memmap */
- if (start < map_start)
- total += free_one_region(start, map_start);
-
- if (map_end < end)
- total += free_one_region(map_end, end);
- } else
- total += free_one_region(start, end);
-
- return total;
-}
-
-static void __init free_boot_services(void)
-{
- u64 total_freed = 0;
- u64 keep_end, free_start, free_end;
- efi_memory_desc_t *md;
-
- /*
- * If kernel uses larger pages than UEFI, we have to be careful
- * not to inadvertantly free memory we want to keep if there is
- * overlap at the kernel page size alignment. We do not want to
- * free is_reserve_region() memory nor the UEFI memmap itself.
- *
- * The memory map is sorted, so we keep track of the end of
- * any previous region we want to keep, remember any region
- * we want to free and defer freeing it until we encounter
- * the next region we want to keep. This way, before freeing
- * it, we can clip it as needed to avoid freeing memory we
- * want to keep for UEFI.
- */
-
- keep_end = 0;
- free_start = 0;
-
- for_each_efi_memory_desc(&memmap, md) {
- u64 paddr, npages, size;
-
- if (is_reserve_region(md)) {
- /*
- * We don't want to free any memory from this region.
- */
- if (free_start) {
- /* adjust free_end then free region */
- if (free_end > md->phys_addr)
- free_end -= PAGE_SIZE;
- total_freed += free_region(free_start, free_end);
- free_start = 0;
- }
- keep_end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
- continue;
- }
-
- if (md->type != EFI_BOOT_SERVICES_CODE &&
- md->type != EFI_BOOT_SERVICES_DATA) {
- /* no need to free this region */
- continue;
- }
-
- /*
- * We want to free memory from this region.
- */
- paddr = md->phys_addr;
- npages = md->num_pages;
- memrange_efi_to_native(&paddr, &npages);
- size = npages << PAGE_SHIFT;
-
- if (free_start) {
- if (paddr <= free_end)
- free_end = paddr + size;
- else {
- total_freed += free_region(free_start, free_end);
- free_start = paddr;
- free_end = paddr + size;
- }
- } else {
- free_start = paddr;
- free_end = paddr + size;
- }
- if (free_start < keep_end) {
- free_start += PAGE_SIZE;
- if (free_start >= free_end)
- free_start = 0;
- }
- }
- if (free_start)
- total_freed += free_region(free_start, free_end);
-
- if (total_freed)
- pr_info("Freed 0x%llx bytes of EFI boot services memory",
- total_freed);
-}
-
void __init efi_init(void)
{
struct efi_fdt_params params;
@@ -320,61 +215,14 @@ void __init efi_init(void)
reserve_regions();
}
-void __init efi_idmap_init(void)
-{
- if (!efi_enabled(EFI_BOOT))
- return;
-
- /* boot time idmap_pg_dir is incomplete, so fill in missing parts */
- efi_setup_idmap();
- early_memunmap(memmap.map, memmap.map_end - memmap.map);
-}
-
-static int __init remap_region(efi_memory_desc_t *md, void **new)
-{
- u64 paddr, vaddr, npages, size;
-
- paddr = md->phys_addr;
- npages = md->num_pages;
- memrange_efi_to_native(&paddr, &npages);
- size = npages << PAGE_SHIFT;
-
- if (is_normal_ram(md))
- vaddr = (__force u64)ioremap_cache(paddr, size);
- else
- vaddr = (__force u64)ioremap(paddr, size);
-
- if (!vaddr) {
- pr_err("Unable to remap 0x%llx pages @ %p\n",
- npages, (void *)paddr);
- return 0;
- }
-
- /* adjust for any rounding when EFI and system pagesize differs */
- md->virt_addr = vaddr + (md->phys_addr - paddr);
-
- if (uefi_debug)
- pr_info(" EFI remap 0x%012llx => %p\n",
- md->phys_addr, (void *)md->virt_addr);
-
- memcpy(*new, md, memmap.desc_size);
- *new += memmap.desc_size;
-
- return 1;
-}
-
/*
- * Switch UEFI from an identity map to a kernel virtual map
+ * Enable the UEFI Runtime Services if all prerequisites are in place, i.e.,
+ * non-early mapping of the UEFI system table and virtual mappings for all
+ * EFI_MEMORY_RUNTIME regions.
*/
-static int __init arm64_enter_virtual_mode(void)
+static int __init arm64_enable_runtime_services(void)
{
- efi_memory_desc_t *md;
- phys_addr_t virtmap_phys;
- void *virtmap, *virt_md;
- efi_status_t status;
u64 mapsize;
- int count = 0;
- unsigned long flags;
if (!efi_enabled(EFI_BOOT)) {
pr_info("EFI services will not be available.\n");
@@ -396,78 +244,116 @@ static int __init arm64_enter_virtual_mode(void)
efi.memmap = &memmap;
- /* Map the runtime regions */
- virtmap = kmalloc(mapsize, GFP_KERNEL);
- if (!virtmap) {
- pr_err("Failed to allocate EFI virtual memmap\n");
+ efi.systab = (__force void *)ioremap_cache(efi_system_table,
+ sizeof(efi_system_table_t));
+ if (!efi.systab) {
+ pr_err("Failed to remap EFI System Table\n");
return -1;
}
- virtmap_phys = virt_to_phys(virtmap);
- virt_md = virtmap;
+ set_bit(EFI_SYSTEM_TABLES, &efi.flags);
- for_each_efi_memory_desc(&memmap, md) {
- if (!(md->attribute & EFI_MEMORY_RUNTIME))
- continue;
- if (!remap_region(md, &virt_md))
- goto err_unmap;
- ++count;
+ if (!efi_enabled(EFI_VIRTMAP)) {
+ pr_err("No UEFI virtual mapping was installed -- runtime services will not be available\n");
+ return -1;
}
- efi.systab = (__force void *)efi_lookup_mapped_addr(efi_system_table);
- if (!efi.systab) {
- /*
- * If we have no virtual mapping for the System Table at this
- * point, the memory map doesn't cover the physical offset where
- * it resides. This means the System Table will be inaccessible
- * to Runtime Services themselves once the virtual mapping is
- * installed.
- */
- pr_err("Failed to remap EFI System Table -- buggy firmware?\n");
- goto err_unmap;
- }
- set_bit(EFI_SYSTEM_TABLES, &efi.flags);
+ /* Set up runtime services function pointers */
+ efi_native_runtime_setup();
+ set_bit(EFI_RUNTIME_SERVICES, &efi.flags);
- local_irq_save(flags);
- cpu_switch_mm(idmap_pg_dir, &init_mm);
+ efi.runtime_version = efi.systab->hdr.revision;
- /* Call SetVirtualAddressMap with the physical address of the map */
- runtime = efi.systab->runtime;
- efi.set_virtual_address_map = runtime->set_virtual_address_map;
+ return 0;
+}
+early_initcall(arm64_enable_runtime_services);
- status = efi.set_virtual_address_map(count * memmap.desc_size,
- memmap.desc_size,
- memmap.desc_version,
- (efi_memory_desc_t *)virtmap_phys);
- cpu_set_reserved_ttbr0();
+static int __init arm64_dmi_init(void)
+{
+ /*
+ * On arm64, DMI depends on UEFI, and dmi_scan_machine() needs to
+ * be called early because dmi_id_init(), which is an arch_initcall
+ * itself, depends on dmi_scan_machine() having been called already.
+ */
+ dmi_scan_machine();
+ if (dmi_available)
+ dmi_set_dump_stack_arch_desc();
+ return 0;
+}
+core_initcall(arm64_dmi_init);
+
+static pgd_t efi_pgd[PTRS_PER_PGD] __page_aligned_bss;
+
+static struct mm_struct efi_mm = {
+ .mm_rb = RB_ROOT,
+ .pgd = efi_pgd,
+ .mm_users = ATOMIC_INIT(2),
+ .mm_count = ATOMIC_INIT(1),
+ .mmap_sem = __RWSEM_INITIALIZER(efi_mm.mmap_sem),
+ .page_table_lock = __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock),
+ .mmlist = LIST_HEAD_INIT(efi_mm.mmlist),
+ INIT_MM_CONTEXT(efi_mm)
+};
+
+static void efi_set_pgd(struct mm_struct *mm)
+{
+ cpu_switch_mm(mm->pgd, mm);
flush_tlb_all();
- local_irq_restore(flags);
+ if (icache_is_aivivt())
+ __flush_icache_all();
+}
- kfree(virtmap);
+void efi_virtmap_load(void)
+{
+ preempt_disable();
+ efi_set_pgd(&efi_mm);
+}
- free_boot_services();
+void efi_virtmap_unload(void)
+{
+ efi_set_pgd(current->active_mm);
+ preempt_enable();
+}
- if (status != EFI_SUCCESS) {
- pr_err("Failed to set EFI virtual address map! [%lx]\n",
- status);
- return -1;
- }
+void __init efi_virtmap_init(void)
+{
+ efi_memory_desc_t *md;
- /* Set up runtime services function pointers */
- runtime = efi.systab->runtime;
- efi_native_runtime_setup();
- set_bit(EFI_RUNTIME_SERVICES, &efi.flags);
+ if (!efi_enabled(EFI_BOOT))
+ return;
- efi.runtime_version = efi.systab->hdr.revision;
+ for_each_efi_memory_desc(&memmap, md) {
+ u64 paddr, npages, size;
+ pgprot_t prot;
- return 0;
+ if (!(md->attribute & EFI_MEMORY_RUNTIME))
+ continue;
+ if (WARN(md->virt_addr == 0,
+ "UEFI virtual mapping incomplete or missing -- no entry found for 0x%llx\n",
+ md->phys_addr))
+ return;
-err_unmap:
- /* unmap all mappings that succeeded: there are 'count' of those */
- for (virt_md = virtmap; count--; virt_md += memmap.desc_size) {
- md = virt_md;
- iounmap((__force void __iomem *)md->virt_addr);
+ paddr = md->phys_addr;
+ npages = md->num_pages;
+ memrange_efi_to_native(&paddr, &npages);
+ size = npages << PAGE_SHIFT;
+
+ pr_info(" EFI remap 0x%016llx => %p\n",
+ md->phys_addr, (void *)md->virt_addr);
+
+ /*
+ * Only regions of type EFI_RUNTIME_SERVICES_CODE need to be
+ * executable, everything else can be mapped with the XN bits
+ * set.
+ */
+ if (!is_normal_ram(md))
+ prot = __pgprot(PROT_DEVICE_nGnRE);
+ else if (md->type == EFI_RUNTIME_SERVICES_CODE)
+ prot = PAGE_KERNEL_EXEC;
+ else
+ prot = PAGE_KERNEL;
+
+ create_pgd_mapping(&efi_mm, paddr, md->virt_addr, size, prot);
}
- kfree(virtmap);
- return -1;
+ set_bit(EFI_VIRTMAP, &efi.flags);
+ early_memunmap(memmap.map, memmap.map_end - memmap.map);
}
-early_initcall(arm64_enter_virtual_mode);
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index c19999ce2fb1..a4f58c8760cf 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -188,7 +188,8 @@ tsk .req x28 // current thread_info
* Interrupt handling.
*/
.macro irq_handler
- ldr x1, handle_arch_irq
+ adrp x1, handle_arch_irq
+ ldr x1, [x1, #:lo12:handle_arch_irq]
mov x0, sp
blr x1
.endm
@@ -729,6 +730,3 @@ ENTRY(sys_rt_sigreturn_wrapper)
mov x0, sp
b sys_rt_sigreturn
ENDPROC(sys_rt_sigreturn_wrapper)
-
-ENTRY(handle_arch_irq)
- .quad 0
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 0a6e4f924df8..8ce88e08c030 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -132,6 +132,8 @@ efi_head:
#endif
#ifdef CONFIG_EFI
+ .globl stext_offset
+ .set stext_offset, stext - efi_head
.align 3
pe_header:
.ascii "PE"
@@ -155,12 +157,12 @@ optional_header:
.long 0 // SizeOfInitializedData
.long 0 // SizeOfUninitializedData
.long efi_stub_entry - efi_head // AddressOfEntryPoint
- .long stext - efi_head // BaseOfCode
+ .long stext_offset // BaseOfCode
extra_header_fields:
.quad 0 // ImageBase
- .long 0x20 // SectionAlignment
- .long 0x8 // FileAlignment
+ .long 0x1000 // SectionAlignment
+ .long PECOFF_FILE_ALIGNMENT // FileAlignment
.short 0 // MajorOperatingSystemVersion
.short 0 // MinorOperatingSystemVersion
.short 0 // MajorImageVersion
@@ -172,7 +174,7 @@ extra_header_fields:
.long _end - efi_head // SizeOfImage
// Everything before the kernel image is considered part of the header
- .long stext - efi_head // SizeOfHeaders
+ .long stext_offset // SizeOfHeaders
.long 0 // CheckSum
.short 0xa // Subsystem (EFI application)
.short 0 // DllCharacteristics
@@ -217,16 +219,24 @@ section_table:
.byte 0
.byte 0 // end of 0 padding of section name
.long _end - stext // VirtualSize
- .long stext - efi_head // VirtualAddress
+ .long stext_offset // VirtualAddress
.long _edata - stext // SizeOfRawData
- .long stext - efi_head // PointerToRawData
+ .long stext_offset // PointerToRawData
.long 0 // PointerToRelocations (0 for executables)
.long 0 // PointerToLineNumbers (0 for executables)
.short 0 // NumberOfRelocations (0 for executables)
.short 0 // NumberOfLineNumbers (0 for executables)
.long 0xe0500020 // Characteristics (section flags)
- .align 5
+
+ /*
+ * EFI will load stext onwards at the 4k section alignment
+ * described in the PE/COFF header. To ensure that instruction
+ * sequences using an adrp and a :lo12: immediate will function
+ * correctly at this alignment, we must ensure that stext is
+ * placed at a 4k boundary in the Image to begin with.
+ */
+ .align 12
#endif
ENTRY(stext)
@@ -238,7 +248,13 @@ ENTRY(stext)
mov x0, x22
bl lookup_processor_type
mov x23, x0 // x23=current cpu_table
- cbz x23, __error_p // invalid processor (x23=0)?
+ /*
+ * __error_p may end up out of range for cbz if text areas are
+ * aligned up to section sizes.
+ */
+ cbnz x23, 1f // invalid processor (x23=0)?
+ b __error_p
+1:
bl __vet_fdt
bl __create_page_tables // x25=TTBR0, x26=TTBR1
/*
@@ -250,13 +266,214 @@ ENTRY(stext)
*/
ldr x27, __switch_data // address to jump to after
// MMU has been enabled
- adr lr, __enable_mmu // return (PIC) address
+ adrp lr, __enable_mmu // return (PIC) address
+ add lr, lr, #:lo12:__enable_mmu
ldr x12, [x23, #CPU_INFO_SETUP]
add x12, x12, x28 // __virt_to_phys
br x12 // initialise processor
ENDPROC(stext)
/*
+ * Determine validity of the x21 FDT pointer.
+ * The dtb must be 8-byte aligned and live in the first 512M of memory.
+ */
+__vet_fdt:
+ tst x21, #0x7
+ b.ne 1f
+ cmp x21, x24
+ b.lt 1f
+ mov x0, #(1 << 29)
+ add x0, x0, x24
+ cmp x21, x0
+ b.ge 1f
+ ret
+1:
+ mov x21, #0
+ ret
+ENDPROC(__vet_fdt)
+/*
+ * Macro to create a table entry to the next page.
+ *
+ * tbl: page table address
+ * virt: virtual address
+ * shift: #imm page table shift
+ * ptrs: #imm pointers per table page
+ *
+ * Preserves: virt
+ * Corrupts: tmp1, tmp2
+ * Returns: tbl -> next level table page address
+ */
+ .macro create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2
+ lsr \tmp1, \virt, #\shift
+ and \tmp1, \tmp1, #\ptrs - 1 // table index
+ add \tmp2, \tbl, #PAGE_SIZE
+ orr \tmp2, \tmp2, #PMD_TYPE_TABLE // address of next table and entry type
+ str \tmp2, [\tbl, \tmp1, lsl #3]
+ add \tbl, \tbl, #PAGE_SIZE // next level table page
+ .endm
+
+/*
+ * Macro to populate the PGD (and possibily PUD) for the corresponding
+ * block entry in the next level (tbl) for the given virtual address.
+ *
+ * Preserves: tbl, next, virt
+ * Corrupts: tmp1, tmp2
+ */
+ .macro create_pgd_entry, tbl, virt, tmp1, tmp2
+ create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2
+#if SWAPPER_PGTABLE_LEVELS == 3
+ create_table_entry \tbl, \virt, TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2
+#endif
+ .endm
+
+/*
+ * Macro to populate block entries in the page table for the start..end
+ * virtual range (inclusive).
+ *
+ * Preserves: tbl, flags
+ * Corrupts: phys, start, end, pstate
+ */
+ .macro create_block_map, tbl, flags, phys, start, end
+ lsr \phys, \phys, #BLOCK_SHIFT
+ lsr \start, \start, #BLOCK_SHIFT
+ and \start, \start, #PTRS_PER_PTE - 1 // table index
+ orr \phys, \flags, \phys, lsl #BLOCK_SHIFT // table entry
+ lsr \end, \end, #BLOCK_SHIFT
+ and \end, \end, #PTRS_PER_PTE - 1 // table end index
+9999: str \phys, [\tbl, \start, lsl #3] // store the entry
+ add \start, \start, #1 // next entry
+ add \phys, \phys, #BLOCK_SIZE // next block
+ cmp \start, \end
+ b.ls 9999b
+ .endm
+
+/*
+ * Setup the initial page tables. We only setup the barest amount which is
+ * required to get the kernel running. The following sections are required:
+ * - identity mapping to enable the MMU (low address, TTBR0)
+ * - first few MB of the kernel linear mapping to jump to once the MMU has
+ * been enabled, including the FDT blob (TTBR1)
+ * - pgd entry for fixed mappings (TTBR1)
+ */
+__create_page_tables:
+ pgtbl x25, x26, x28 // idmap_pg_dir and swapper_pg_dir addresses
+ mov x27, lr
+
+ /*
+ * Invalidate the idmap and swapper page tables to avoid potential
+ * dirty cache lines being evicted.
+ */
+ mov x0, x25
+ add x1, x26, #SWAPPER_DIR_SIZE
+ bl __inval_cache_range
+
+ /*
+ * Clear the idmap and swapper page tables.
+ */
+ mov x0, x25
+ add x6, x26, #SWAPPER_DIR_SIZE
+1: stp xzr, xzr, [x0], #16
+ stp xzr, xzr, [x0], #16
+ stp xzr, xzr, [x0], #16
+ stp xzr, xzr, [x0], #16
+ cmp x0, x6
+ b.lo 1b
+
+ ldr x7, =MM_MMUFLAGS
+
+ /*
+ * Create the identity mapping.
+ */
+ mov x0, x25 // idmap_pg_dir
+ ldr x3, =KERNEL_START
+ add x3, x3, x28 // __pa(KERNEL_START)
+ create_pgd_entry x0, x3, x5, x6
+ ldr x6, =KERNEL_END
+ mov x5, x3 // __pa(KERNEL_START)
+ add x6, x6, x28 // __pa(KERNEL_END)
+ create_block_map x0, x7, x3, x5, x6
+
+ /*
+ * Map the kernel image (starting with PHYS_OFFSET).
+ */
+ mov x0, x26 // swapper_pg_dir
+ mov x5, #PAGE_OFFSET
+ create_pgd_entry x0, x5, x3, x6
+ ldr x6, =KERNEL_END
+ mov x3, x24 // phys offset
+ create_block_map x0, x7, x3, x5, x6
+
+ /*
+ * Map the FDT blob (maximum 2MB; must be within 512MB of
+ * PHYS_OFFSET).
+ */
+ mov x3, x21 // FDT phys address
+ and x3, x3, #~((1 << 21) - 1) // 2MB aligned
+ mov x6, #PAGE_OFFSET
+ sub x5, x3, x24 // subtract PHYS_OFFSET
+ tst x5, #~((1 << 29) - 1) // within 512MB?
+ csel x21, xzr, x21, ne // zero the FDT pointer
+ b.ne 1f
+ add x5, x5, x6 // __va(FDT blob)
+ add x6, x5, #1 << 21 // 2MB for the FDT blob
+ sub x6, x6, #1 // inclusive range
+ create_block_map x0, x7, x3, x5, x6
+1:
+ /*
+ * Since the page tables have been populated with non-cacheable
+ * accesses (MMU disabled), invalidate the idmap and swapper page
+ * tables again to remove any speculatively loaded cache lines.
+ */
+ mov x0, x25
+ add x1, x26, #SWAPPER_DIR_SIZE
+ bl __inval_cache_range
+
+ mov lr, x27
+ ret
+ENDPROC(__create_page_tables)
+ .ltorg
+
+ .align 3
+ .type __switch_data, %object
+__switch_data:
+ .quad __mmap_switched
+ .quad __bss_start // x6
+ .quad __bss_stop // x7
+ .quad processor_id // x4
+ .quad __fdt_pointer // x5
+ .quad memstart_addr // x6
+ .quad init_thread_union + THREAD_START_SP // sp
+
+/*
+ * The following fragment of code is executed with the MMU on in MMU mode, and
+ * uses absolute addresses; this is not position independent.
+ */
+__mmap_switched:
+ adr x3, __switch_data + 8
+
+ ldp x6, x7, [x3], #16
+1: cmp x6, x7
+ b.hs 2f
+ str xzr, [x6], #8 // Clear BSS
+ b 1b
+2:
+ ldp x4, x5, [x3], #16
+ ldr x6, [x3], #8
+ ldr x16, [x3]
+ mov sp, x16
+ str x22, [x4] // Save processor ID
+ str x21, [x5] // Save FDT pointer
+ str x24, [x6] // Save PHYS_OFFSET
+ mov x29, #0
+ b start_kernel
+ENDPROC(__mmap_switched)
+
+/*
+ * end early head section, begin head code that is also used for
+ * hotplug and needs to have the same protections as the text region
+ */
+ .section ".text","ax"
+/*
* If we're fortunate enough to boot at EL2, ensure that the world is
* sane before dropping to EL1.
*
@@ -331,7 +548,8 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems
msr vttbr_el2, xzr
/* Hypervisor stub */
- adr x0, __hyp_stub_vectors
+ adrp x0, __hyp_stub_vectors
+ add x0, x0, #:lo12:__hyp_stub_vectors
msr vbar_el2, x0
/* spsr */
@@ -492,183 +710,6 @@ ENDPROC(__calc_phys_offset)
.quad PAGE_OFFSET
/*
- * Macro to create a table entry to the next page.
- *
- * tbl: page table address
- * virt: virtual address
- * shift: #imm page table shift
- * ptrs: #imm pointers per table page
- *
- * Preserves: virt
- * Corrupts: tmp1, tmp2
- * Returns: tbl -> next level table page address
- */
- .macro create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2
- lsr \tmp1, \virt, #\shift
- and \tmp1, \tmp1, #\ptrs - 1 // table index
- add \tmp2, \tbl, #PAGE_SIZE
- orr \tmp2, \tmp2, #PMD_TYPE_TABLE // address of next table and entry type
- str \tmp2, [\tbl, \tmp1, lsl #3]
- add \tbl, \tbl, #PAGE_SIZE // next level table page
- .endm
-
-/*
- * Macro to populate the PGD (and possibily PUD) for the corresponding
- * block entry in the next level (tbl) for the given virtual address.
- *
- * Preserves: tbl, next, virt
- * Corrupts: tmp1, tmp2
- */
- .macro create_pgd_entry, tbl, virt, tmp1, tmp2
- create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2
-#if SWAPPER_PGTABLE_LEVELS == 3
- create_table_entry \tbl, \virt, TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2
-#endif
- .endm
-
-/*
- * Macro to populate block entries in the page table for the start..end
- * virtual range (inclusive).
- *
- * Preserves: tbl, flags
- * Corrupts: phys, start, end, pstate
- */
- .macro create_block_map, tbl, flags, phys, start, end
- lsr \phys, \phys, #BLOCK_SHIFT
- lsr \start, \start, #BLOCK_SHIFT
- and \start, \start, #PTRS_PER_PTE - 1 // table index
- orr \phys, \flags, \phys, lsl #BLOCK_SHIFT // table entry
- lsr \end, \end, #BLOCK_SHIFT
- and \end, \end, #PTRS_PER_PTE - 1 // table end index
-9999: str \phys, [\tbl, \start, lsl #3] // store the entry
- add \start, \start, #1 // next entry
- add \phys, \phys, #BLOCK_SIZE // next block
- cmp \start, \end
- b.ls 9999b
- .endm
-
-/*
- * Setup the initial page tables. We only setup the barest amount which is
- * required to get the kernel running. The following sections are required:
- * - identity mapping to enable the MMU (low address, TTBR0)
- * - first few MB of the kernel linear mapping to jump to once the MMU has
- * been enabled, including the FDT blob (TTBR1)
- * - pgd entry for fixed mappings (TTBR1)
- */
-__create_page_tables:
- pgtbl x25, x26, x28 // idmap_pg_dir and swapper_pg_dir addresses
- mov x27, lr
-
- /*
- * Invalidate the idmap and swapper page tables to avoid potential
- * dirty cache lines being evicted.
- */
- mov x0, x25
- add x1, x26, #SWAPPER_DIR_SIZE
- bl __inval_cache_range
-
- /*
- * Clear the idmap and swapper page tables.
- */
- mov x0, x25
- add x6, x26, #SWAPPER_DIR_SIZE
-1: stp xzr, xzr, [x0], #16
- stp xzr, xzr, [x0], #16
- stp xzr, xzr, [x0], #16
- stp xzr, xzr, [x0], #16
- cmp x0, x6
- b.lo 1b
-
- ldr x7, =MM_MMUFLAGS
-
- /*
- * Create the identity mapping.
- */
- mov x0, x25 // idmap_pg_dir
- ldr x3, =KERNEL_START
- add x3, x3, x28 // __pa(KERNEL_START)
- create_pgd_entry x0, x3, x5, x6
- ldr x6, =KERNEL_END
- mov x5, x3 // __pa(KERNEL_START)
- add x6, x6, x28 // __pa(KERNEL_END)
- create_block_map x0, x7, x3, x5, x6
-
- /*
- * Map the kernel image (starting with PHYS_OFFSET).
- */
- mov x0, x26 // swapper_pg_dir
- mov x5, #PAGE_OFFSET
- create_pgd_entry x0, x5, x3, x6
- ldr x6, =KERNEL_END
- mov x3, x24 // phys offset
- create_block_map x0, x7, x3, x5, x6
-
- /*
- * Map the FDT blob (maximum 2MB; must be within 512MB of
- * PHYS_OFFSET).
- */
- mov x3, x21 // FDT phys address
- and x3, x3, #~((1 << 21) - 1) // 2MB aligned
- mov x6, #PAGE_OFFSET
- sub x5, x3, x24 // subtract PHYS_OFFSET
- tst x5, #~((1 << 29) - 1) // within 512MB?
- csel x21, xzr, x21, ne // zero the FDT pointer
- b.ne 1f
- add x5, x5, x6 // __va(FDT blob)
- add x6, x5, #1 << 21 // 2MB for the FDT blob
- sub x6, x6, #1 // inclusive range
- create_block_map x0, x7, x3, x5, x6
-1:
- /*
- * Since the page tables have been populated with non-cacheable
- * accesses (MMU disabled), invalidate the idmap and swapper page
- * tables again to remove any speculatively loaded cache lines.
- */
- mov x0, x25
- add x1, x26, #SWAPPER_DIR_SIZE
- bl __inval_cache_range
-
- mov lr, x27
- ret
-ENDPROC(__create_page_tables)
- .ltorg
-
- .align 3
- .type __switch_data, %object
-__switch_data:
- .quad __mmap_switched
- .quad __bss_start // x6
- .quad __bss_stop // x7
- .quad processor_id // x4
- .quad __fdt_pointer // x5
- .quad memstart_addr // x6
- .quad init_thread_union + THREAD_START_SP // sp
-
-/*
- * The following fragment of code is executed with the MMU on in MMU mode, and
- * uses absolute addresses; this is not position independent.
- */
-__mmap_switched:
- adr x3, __switch_data + 8
-
- ldp x6, x7, [x3], #16
-1: cmp x6, x7
- b.hs 2f
- str xzr, [x6], #8 // Clear BSS
- b 1b
-2:
- ldp x4, x5, [x3], #16
- ldr x6, [x3], #8
- ldr x16, [x3]
- mov sp, x16
- str x22, [x4] // Save processor ID
- str x21, [x5] // Save FDT pointer
- str x24, [x6] // Save PHYS_OFFSET
- mov x29, #0
- b start_kernel
-ENDPROC(__mmap_switched)
-
-/*
* Exception handling. Something went wrong and we can't proceed. We ought to
* tell the user, but since we don't have any guarantee that we're even
* running on the right architecture, we do virtually nothing.
@@ -715,22 +756,3 @@ __lookup_processor_type_data:
.quad .
.quad cpu_table
.size __lookup_processor_type_data, . - __lookup_processor_type_data
-
-/*
- * Determine validity of the x21 FDT pointer.
- * The dtb must be 8-byte aligned and live in the first 512M of memory.
- */
-__vet_fdt:
- tst x21, #0x7
- b.ne 1f
- cmp x21, x24
- b.lt 1f
- mov x0, #(1 << 29)
- add x0, x0, x24
- cmp x21, x0
- b.ge 1f
- ret
-1:
- mov x21, #0
- ret
-ENDPROC(__vet_fdt)
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index 7e9327a0986d..27d4864577e5 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -17,14 +17,19 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/bitops.h>
+#include <linux/bug.h>
#include <linux/compiler.h>
#include <linux/kernel.h>
+#include <linux/mm.h>
#include <linux/smp.h>
+#include <linux/spinlock.h>
#include <linux/stop_machine.h>
+#include <linux/types.h>
#include <linux/uaccess.h>
#include <asm/cacheflush.h>
#include <asm/debug-monitors.h>
+#include <asm/fixmap.h>
#include <asm/insn.h>
#define AARCH64_INSN_SF_BIT BIT(31)
@@ -72,6 +77,29 @@ bool __kprobes aarch64_insn_is_nop(u32 insn)
}
}
+static DEFINE_SPINLOCK(patch_lock);
+
+static void __kprobes *patch_map(void *addr, int fixmap)
+{
+ unsigned long uintaddr = (uintptr_t) addr;
+ bool module = !core_kernel_text(uintaddr);
+ struct page *page;
+
+ if (module && IS_ENABLED(CONFIG_DEBUG_SET_MODULE_RONX))
+ page = vmalloc_to_page(addr);
+ else
+ page = virt_to_page(addr);
+
+ BUG_ON(!page);
+ set_fixmap(fixmap, page_to_phys(page));
+
+ return (void *) (__fix_to_virt(fixmap) + (uintaddr & ~PAGE_MASK));
+}
+
+static void __kprobes patch_unmap(int fixmap)
+{
+ clear_fixmap(fixmap);
+}
/*
* In ARMv8-A, A64 instructions have a fixed length of 32 bits and are always
* little-endian.
@@ -88,10 +116,27 @@ int __kprobes aarch64_insn_read(void *addr, u32 *insnp)
return ret;
}
+static int __kprobes __aarch64_insn_write(void *addr, u32 insn)
+{
+ void *waddr = addr;
+ unsigned long flags = 0;
+ int ret;
+
+ spin_lock_irqsave(&patch_lock, flags);
+ waddr = patch_map(addr, FIX_TEXT_POKE0);
+
+ ret = probe_kernel_write(waddr, &insn, AARCH64_INSN_SIZE);
+
+ patch_unmap(FIX_TEXT_POKE0);
+ spin_unlock_irqrestore(&patch_lock, flags);
+
+ return ret;
+}
+
int __kprobes aarch64_insn_write(void *addr, u32 insn)
{
insn = cpu_to_le32(insn);
- return probe_kernel_write(addr, &insn, AARCH64_INSN_SIZE);
+ return __aarch64_insn_write(addr, insn);
}
static bool __kprobes __aarch64_insn_hotpatch_safe(u32 insn)
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index 071a6ec13bd8..240b75c0e94f 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -40,6 +40,8 @@ int arch_show_interrupts(struct seq_file *p, int prec)
return 0;
}
+void (*handle_arch_irq)(struct pt_regs *) = NULL;
+
void __init set_handle_irq(void (*handle_irq)(struct pt_regs *))
{
if (handle_arch_irq)
diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c
index 263a166291fb..4f1fec7a46db 100644
--- a/arch/arm64/kernel/jump_label.c
+++ b/arch/arm64/kernel/jump_label.c
@@ -22,9 +22,8 @@
#ifdef HAVE_JUMP_LABEL
-static void __arch_jump_label_transform(struct jump_entry *entry,
- enum jump_label_type type,
- bool is_static)
+void arch_jump_label_transform(struct jump_entry *entry,
+ enum jump_label_type type)
{
void *addr = (void *)entry->code;
u32 insn;
@@ -37,22 +36,18 @@ static void __arch_jump_label_transform(struct jump_entry *entry,
insn = aarch64_insn_gen_nop();
}
- if (is_static)
- aarch64_insn_patch_text_nosync(addr, insn);
- else
- aarch64_insn_patch_text(&addr, &insn, 1);
-}
-
-void arch_jump_label_transform(struct jump_entry *entry,
- enum jump_label_type type)
-{
- __arch_jump_label_transform(entry, type, false);
+ aarch64_insn_patch_text(&addr, &insn, 1);
}
void arch_jump_label_transform_static(struct jump_entry *entry,
enum jump_label_type type)
{
- __arch_jump_label_transform(entry, type, true);
+ /*
+ * We use the architected A64 NOP in arch_static_branch, so there's no
+ * need to patch an identical A64 NOP over the top of it here. The core
+ * will call arch_jump_label_transform from a module notifier if the
+ * NOP needs to be replaced by a branch.
+ */
}
#endif /* HAVE_JUMP_LABEL */
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index ae8be923f2cd..789ce6cf7ff6 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -380,6 +380,7 @@ void __init setup_arch(char **cmdline_p)
*cmdline_p = boot_command_line;
+ early_fixmap_init();
early_ioremap_init();
parse_early_param();
@@ -396,7 +397,7 @@ void __init setup_arch(char **cmdline_p)
paging_init();
request_standard_resources();
- efi_idmap_init();
+ efi_virtmap_init();
early_ioremap_reset();
unflatten_device_tree();
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 2f600294e8ca..5f11ebe652a4 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -8,6 +8,7 @@
#include <asm/thread_info.h>
#include <asm/memory.h>
#include <asm/page.h>
+#include <asm/pgtable.h>
#include "image.h"
@@ -32,6 +33,30 @@ jiffies = jiffies_64;
*(.hyp.text) \
VMLINUX_SYMBOL(__hyp_text_end) = .;
+/*
+ * The size of the PE/COFF section that covers the kernel image, which
+ * runs from stext to _edata, must be a round multiple of the PE/COFF
+ * FileAlignment, which we set to its minimum value of 0x200. 'stext'
+ * itself is 4 KB aligned, so padding out _edata to a 0x200 aligned
+ * boundary should be sufficient.
+ */
+PECOFF_FILE_ALIGNMENT = 0x200;
+
+#ifdef CONFIG_EFI
+#define PECOFF_EDATA_PADDING \
+ .pecoff_edata_padding : { BYTE(0); . = ALIGN(PECOFF_FILE_ALIGNMENT); }
+#else
+#define PECOFF_EDATA_PADDING
+#endif
+
+#ifdef CONFIG_DEBUG_ALIGN_RODATA
+#define ALIGN_DEBUG_RO . = ALIGN(1<<SECTION_SHIFT);
+#define ALIGN_DEBUG_RO_MIN(min) ALIGN_DEBUG_RO
+#else
+#define ALIGN_DEBUG_RO
+#define ALIGN_DEBUG_RO_MIN(min) . = ALIGN(min);
+#endif
+
SECTIONS
{
/*
@@ -54,6 +79,7 @@ SECTIONS
_text = .;
HEAD_TEXT
}
+ ALIGN_DEBUG_RO
.text : { /* Real text segment */
_stext = .; /* Text and read-only data */
__exception_text_start = .;
@@ -70,19 +96,22 @@ SECTIONS
*(.got) /* Global offset table */
}
+ ALIGN_DEBUG_RO
RO_DATA(PAGE_SIZE)
EXCEPTION_TABLE(8)
NOTES
+ ALIGN_DEBUG_RO
_etext = .; /* End of text and rodata section */
- . = ALIGN(PAGE_SIZE);
+ ALIGN_DEBUG_RO_MIN(PAGE_SIZE)
__init_begin = .;
INIT_TEXT_SECTION(8)
.exit.text : {
ARM_EXIT_KEEP(EXIT_TEXT)
}
- . = ALIGN(16);
+
+ ALIGN_DEBUG_RO_MIN(16)
.init.data : {
INIT_DATA
INIT_SETUP(16)
@@ -114,6 +143,7 @@ SECTIONS
_data = .;
_sdata = .;
RW_DATA_SECTION(64, PAGE_SIZE, THREAD_SIZE)
+ PECOFF_EDATA_PADDING
_edata = .;
BSS_SECTION(0, 0, 0)
diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile
index c56179ed2c09..773d37a14039 100644
--- a/arch/arm64/mm/Makefile
+++ b/arch/arm64/mm/Makefile
@@ -3,3 +3,4 @@ obj-y := dma-mapping.o extable.o fault.o init.o \
ioremap.o mmap.o pgd.o mmu.o \
context.o proc.o pageattr.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
+obj-$(CONFIG_ARM64_PTDUMP) += dump.o
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 6efbb52cb92e..6ee26f82966b 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -170,7 +170,7 @@ static void *__dma_alloc_noncoherent(struct device *dev, size_t size,
coherent_ptr = dma_common_contiguous_remap(page, size, VM_USERMAP,
__get_dma_pgprot(attrs,
__pgprot(PROT_NORMAL_NC), false),
- NULL);
+ __builtin_return_address(0));
if (!coherent_ptr)
goto no_map;
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
new file mode 100644
index 000000000000..bf69601be546
--- /dev/null
+++ b/arch/arm64/mm/dump.c
@@ -0,0 +1,332 @@
+/*
+ * Copyright (c) 2014, The Linux Foundation. All rights reserved.
+ * Debug helper to dump the current kernel pagetables of the system
+ * so that we can see what the various memory ranges are set to.
+ *
+ * Derived from x86 and arm implementation:
+ * (C) Copyright 2008 Intel Corporation
+ *
+ * Author: Arjan van de Ven <arjan@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+#include <linux/debugfs.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+
+#include <asm/fixmap.h>
+#include <asm/pgtable.h>
+
+#define LOWEST_ADDR (UL(0xffffffffffffffff) << VA_BITS)
+
+struct addr_marker {
+ unsigned long start_address;
+ const char *name;
+};
+
+enum address_markers_idx {
+ VMALLOC_START_NR = 0,
+ VMALLOC_END_NR,
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+ VMEMMAP_START_NR,
+ VMEMMAP_END_NR,
+#endif
+ PCI_START_NR,
+ PCI_END_NR,
+ FIXADDR_START_NR,
+ FIXADDR_END_NR,
+ MODULES_START_NR,
+ MODUELS_END_NR,
+ KERNEL_SPACE_NR,
+};
+
+static struct addr_marker address_markers[] = {
+ { VMALLOC_START, "vmalloc() Area" },
+ { VMALLOC_END, "vmalloc() End" },
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+ { 0, "vmemmap start" },
+ { 0, "vmemmap end" },
+#endif
+ { (unsigned long) PCI_IOBASE, "PCI I/O start" },
+ { (unsigned long) PCI_IOBASE + SZ_16M, "PCI I/O end" },
+ { FIXADDR_START, "Fixmap start" },
+ { FIXADDR_TOP, "Fixmap end" },
+ { MODULES_VADDR, "Modules start" },
+ { MODULES_END, "Modules end" },
+ { PAGE_OFFSET, "Kernel Mapping" },
+ { -1, NULL },
+};
+
+struct pg_state {
+ struct seq_file *seq;
+ const struct addr_marker *marker;
+ unsigned long start_address;
+ unsigned level;
+ u64 current_prot;
+};
+
+struct prot_bits {
+ u64 mask;
+ u64 val;
+ const char *set;
+ const char *clear;
+};
+
+static const struct prot_bits pte_bits[] = {
+ {
+ .mask = PTE_USER,
+ .val = PTE_USER,
+ .set = "USR",
+ .clear = " ",
+ }, {
+ .mask = PTE_RDONLY,
+ .val = PTE_RDONLY,
+ .set = "ro",
+ .clear = "RW",
+ }, {
+ .mask = PTE_PXN,
+ .val = PTE_PXN,
+ .set = "NX",
+ .clear = "x ",
+ }, {
+ .mask = PTE_SHARED,
+ .val = PTE_SHARED,
+ .set = "SHD",
+ .clear = " ",
+ }, {
+ .mask = PTE_AF,
+ .val = PTE_AF,
+ .set = "AF",
+ .clear = " ",
+ }, {
+ .mask = PTE_NG,
+ .val = PTE_NG,
+ .set = "NG",
+ .clear = " ",
+ }, {
+ .mask = PTE_UXN,
+ .val = PTE_UXN,
+ .set = "UXN",
+ }, {
+ .mask = PTE_ATTRINDX_MASK,
+ .val = PTE_ATTRINDX(MT_DEVICE_nGnRnE),
+ .set = "DEVICE/nGnRnE",
+ }, {
+ .mask = PTE_ATTRINDX_MASK,
+ .val = PTE_ATTRINDX(MT_DEVICE_nGnRE),
+ .set = "DEVICE/nGnRE",
+ }, {
+ .mask = PTE_ATTRINDX_MASK,
+ .val = PTE_ATTRINDX(MT_DEVICE_GRE),
+ .set = "DEVICE/GRE",
+ }, {
+ .mask = PTE_ATTRINDX_MASK,
+ .val = PTE_ATTRINDX(MT_NORMAL_NC),
+ .set = "MEM/NORMAL-NC",
+ }, {
+ .mask = PTE_ATTRINDX_MASK,
+ .val = PTE_ATTRINDX(MT_NORMAL),
+ .set = "MEM/NORMAL",
+ }
+};
+
+struct pg_level {
+ const struct prot_bits *bits;
+ size_t num;
+ u64 mask;
+};
+
+static struct pg_level pg_level[] = {
+ {
+ }, { /* pgd */
+ .bits = pte_bits,
+ .num = ARRAY_SIZE(pte_bits),
+ }, { /* pud */
+ .bits = pte_bits,
+ .num = ARRAY_SIZE(pte_bits),
+ }, { /* pmd */
+ .bits = pte_bits,
+ .num = ARRAY_SIZE(pte_bits),
+ }, { /* pte */
+ .bits = pte_bits,
+ .num = ARRAY_SIZE(pte_bits),
+ },
+};
+
+static void dump_prot(struct pg_state *st, const struct prot_bits *bits,
+ size_t num)
+{
+ unsigned i;
+
+ for (i = 0; i < num; i++, bits++) {
+ const char *s;
+
+ if ((st->current_prot & bits->mask) == bits->val)
+ s = bits->set;
+ else
+ s = bits->clear;
+
+ if (s)
+ seq_printf(st->seq, " %s", s);
+ }
+}
+
+static void note_page(struct pg_state *st, unsigned long addr, unsigned level,
+ u64 val)
+{
+ static const char units[] = "KMGTPE";
+ u64 prot = val & pg_level[level].mask;
+
+ if (addr < LOWEST_ADDR)
+ return;
+
+ if (!st->level) {
+ st->level = level;
+ st->current_prot = prot;
+ st->start_address = addr;
+ seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
+ } else if (prot != st->current_prot || level != st->level ||
+ addr >= st->marker[1].start_address) {
+ const char *unit = units;
+ unsigned long delta;
+
+ if (st->current_prot) {
+ seq_printf(st->seq, "0x%16lx-0x%16lx ",
+ st->start_address, addr);
+
+ delta = (addr - st->start_address) >> 10;
+ while (!(delta & 1023) && unit[1]) {
+ delta >>= 10;
+ unit++;
+ }
+ seq_printf(st->seq, "%9lu%c", delta, *unit);
+ if (pg_level[st->level].bits)
+ dump_prot(st, pg_level[st->level].bits,
+ pg_level[st->level].num);
+ seq_puts(st->seq, "\n");
+ }
+
+ if (addr >= st->marker[1].start_address) {
+ st->marker++;
+ seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
+ }
+
+ st->start_address = addr;
+ st->current_prot = prot;
+ st->level = level;
+ }
+
+ if (addr >= st->marker[1].start_address) {
+ st->marker++;
+ seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
+ }
+
+}
+
+static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
+{
+ pte_t *pte = pte_offset_kernel(pmd, 0);
+ unsigned long addr;
+ unsigned i;
+
+ for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
+ addr = start + i * PAGE_SIZE;
+ note_page(st, addr, 4, pte_val(*pte));
+ }
+}
+
+static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start)
+{
+ pmd_t *pmd = pmd_offset(pud, 0);
+ unsigned long addr;
+ unsigned i;
+
+ for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {
+ addr = start + i * PMD_SIZE;
+ if (pmd_none(*pmd) || pmd_sect(*pmd) || pmd_bad(*pmd))
+ note_page(st, addr, 3, pmd_val(*pmd));
+ else
+ walk_pte(st, pmd, addr);
+ }
+}
+
+static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
+{
+ pud_t *pud = pud_offset(pgd, 0);
+ unsigned long addr;
+ unsigned i;
+
+ for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
+ addr = start + i * PUD_SIZE;
+ if (pud_none(*pud) || pud_sect(*pud) || pud_bad(*pud))
+ note_page(st, addr, 2, pud_val(*pud));
+ else
+ walk_pmd(st, pud, addr);
+ }
+}
+
+static void walk_pgd(struct pg_state *st, struct mm_struct *mm, unsigned long start)
+{
+ pgd_t *pgd = pgd_offset(mm, 0);
+ unsigned i;
+ unsigned long addr;
+
+ for (i = 0; i < PTRS_PER_PGD; i++, pgd++) {
+ addr = start + i * PGDIR_SIZE;
+ if (pgd_none(*pgd) || pgd_bad(*pgd))
+ note_page(st, addr, 1, pgd_val(*pgd));
+ else
+ walk_pud(st, pgd, addr);
+ }
+}
+
+static int ptdump_show(struct seq_file *m, void *v)
+{
+ struct pg_state st = {
+ .seq = m,
+ .marker = address_markers,
+ };
+
+ walk_pgd(&st, &init_mm, LOWEST_ADDR);
+
+ note_page(&st, 0, 0, 0);
+ return 0;
+}
+
+static int ptdump_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, ptdump_show, NULL);
+}
+
+static const struct file_operations ptdump_fops = {
+ .open = ptdump_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int ptdump_init(void)
+{
+ struct dentry *pe;
+ unsigned i, j;
+
+ for (i = 0; i < ARRAY_SIZE(pg_level); i++)
+ if (pg_level[i].bits)
+ for (j = 0; j < pg_level[i].num; j++)
+ pg_level[i].mask |= pg_level[i].bits[j].mask;
+
+ address_markers[VMEMMAP_START_NR].start_address =
+ (unsigned long)virt_to_page(PAGE_OFFSET);
+ address_markers[VMEMMAP_END_NR].start_address =
+ (unsigned long)virt_to_page(high_memory);
+
+ pe = debugfs_create_file("kernel_page_tables", 0400, NULL, NULL,
+ &ptdump_fops);
+ return pe ? 0 : -ENOMEM;
+}
+device_initcall(ptdump_init);
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 41cb6d3d6075..6094c64b3380 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -279,6 +279,7 @@ retry:
* starvation.
*/
mm_flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ mm_flags |= FAULT_FLAG_TRIED;
goto retry;
}
}
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index b45b31397d0d..b24ed5e112ec 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -327,6 +327,7 @@ void __init mem_init(void)
void free_initmem(void)
{
+ fixup_init();
free_initmem_default(0);
free_alternatives_memory();
}
diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c
index 4a07630a6616..cbb99c8f1e04 100644
--- a/arch/arm64/mm/ioremap.c
+++ b/arch/arm64/mm/ioremap.c
@@ -103,97 +103,10 @@ void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size)
}
EXPORT_SYMBOL(ioremap_cache);
-static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
-#if CONFIG_ARM64_PGTABLE_LEVELS > 2
-static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss;
-#endif
-#if CONFIG_ARM64_PGTABLE_LEVELS > 3
-static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss;
-#endif
-
-static inline pud_t * __init early_ioremap_pud(unsigned long addr)
-{
- pgd_t *pgd;
-
- pgd = pgd_offset_k(addr);
- BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd));
-
- return pud_offset(pgd, addr);
-}
-
-static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
-{
- pud_t *pud = early_ioremap_pud(addr);
-
- BUG_ON(pud_none(*pud) || pud_bad(*pud));
-
- return pmd_offset(pud, addr);
-}
-
-static inline pte_t * __init early_ioremap_pte(unsigned long addr)
-{
- pmd_t *pmd = early_ioremap_pmd(addr);
-
- BUG_ON(pmd_none(*pmd) || pmd_bad(*pmd));
-
- return pte_offset_kernel(pmd, addr);
-}
-
+/*
+ * Must be called after early_fixmap_init
+ */
void __init early_ioremap_init(void)
{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- unsigned long addr = fix_to_virt(FIX_BTMAP_BEGIN);
-
- pgd = pgd_offset_k(addr);
- pgd_populate(&init_mm, pgd, bm_pud);
- pud = pud_offset(pgd, addr);
- pud_populate(&init_mm, pud, bm_pmd);
- pmd = pmd_offset(pud, addr);
- pmd_populate_kernel(&init_mm, pmd, bm_pte);
-
- /*
- * The boot-ioremap range spans multiple pmds, for which
- * we are not prepared:
- */
- BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
- != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
-
- if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
- WARN_ON(1);
- pr_warn("pmd %p != %p\n",
- pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END)));
- pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
- fix_to_virt(FIX_BTMAP_BEGIN));
- pr_warn("fix_to_virt(FIX_BTMAP_END): %08lx\n",
- fix_to_virt(FIX_BTMAP_END));
-
- pr_warn("FIX_BTMAP_END: %d\n", FIX_BTMAP_END);
- pr_warn("FIX_BTMAP_BEGIN: %d\n",
- FIX_BTMAP_BEGIN);
- }
-
early_ioremap_setup();
}
-
-void __init __early_set_fixmap(enum fixed_addresses idx,
- phys_addr_t phys, pgprot_t flags)
-{
- unsigned long addr = __fix_to_virt(idx);
- pte_t *pte;
-
- if (idx >= __end_of_fixed_addresses) {
- BUG();
- return;
- }
-
- pte = early_ioremap_pte(addr);
-
- if (pgprot_val(flags))
- set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
- else {
- pte_clear(&init_mm, addr, pte);
- flush_tlb_kernel_range(addr, addr+PAGE_SIZE);
- }
-}
diff --git a/arch/arm64/mm/mm.h b/arch/arm64/mm/mm.h
index d519f4f50c8c..ef47d99b5cbc 100644
--- a/arch/arm64/mm/mm.h
+++ b/arch/arm64/mm/mm.h
@@ -1,2 +1,3 @@
extern void __init bootmem_init(void);
-extern void __init arm64_swiotlb_init(void);
+
+void fixup_init(void);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index f4f8b500f74c..e83d30e544cc 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -26,8 +26,10 @@
#include <linux/memblock.h>
#include <linux/fs.h>
#include <linux/io.h>
+#include <linux/stop_machine.h>
#include <asm/cputype.h>
+#include <asm/fixmap.h>
#include <asm/sections.h>
#include <asm/setup.h>
#include <asm/sizes.h>
@@ -132,21 +134,43 @@ EXPORT_SYMBOL(phys_mem_access_prot);
static void __init *early_alloc(unsigned long sz)
{
void *ptr = __va(memblock_alloc(sz, sz));
+ BUG_ON(!ptr);
memset(ptr, 0, sz);
return ptr;
}
-static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
+/*
+ * remap a PMD into pages
+ */
+static void split_pmd(pmd_t *pmd, pte_t *pte)
+{
+ unsigned long pfn = pmd_pfn(*pmd);
+ int i = 0;
+
+ do {
+ /*
+ * Need to have the least restrictive permissions available
+ * permissions will be fixed up later
+ */
+ set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
+ pfn++;
+ } while (pte++, i++, i < PTRS_PER_PTE);
+}
+
+static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
unsigned long end, unsigned long pfn,
- pgprot_t prot)
+ pgprot_t prot,
+ void *(*alloc)(unsigned long size))
{
pte_t *pte;
- if (pmd_none(*pmd)) {
- pte = early_alloc(PTRS_PER_PTE * sizeof(pte_t));
+ if (pmd_none(*pmd) || pmd_bad(*pmd)) {
+ pte = alloc(PTRS_PER_PTE * sizeof(pte_t));
+ if (pmd_sect(*pmd))
+ split_pmd(pmd, pte);
__pmd_populate(pmd, __pa(pte), PMD_TYPE_TABLE);
+ flush_tlb_all();
}
- BUG_ON(pmd_bad(*pmd));
pte = pte_offset_kernel(pmd, addr);
do {
@@ -155,29 +179,40 @@ static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
} while (pte++, addr += PAGE_SIZE, addr != end);
}
-static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
- unsigned long end, phys_addr_t phys,
- int map_io)
+void split_pud(pud_t *old_pud, pmd_t *pmd)
+{
+ unsigned long addr = pud_pfn(*old_pud) << PAGE_SHIFT;
+ pgprot_t prot = __pgprot(pud_val(*old_pud) ^ addr);
+ int i = 0;
+
+ do {
+ set_pmd(pmd, __pmd(addr | prot));
+ addr += PMD_SIZE;
+ } while (pmd++, i++, i < PTRS_PER_PMD);
+}
+
+static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
+ unsigned long addr, unsigned long end,
+ phys_addr_t phys, pgprot_t prot,
+ void *(*alloc)(unsigned long size))
{
pmd_t *pmd;
unsigned long next;
- pmdval_t prot_sect;
- pgprot_t prot_pte;
-
- if (map_io) {
- prot_sect = PROT_SECT_DEVICE_nGnRE;
- prot_pte = __pgprot(PROT_DEVICE_nGnRE);
- } else {
- prot_sect = PROT_SECT_NORMAL_EXEC;
- prot_pte = PAGE_KERNEL_EXEC;
- }
/*
* Check for initial section mappings in the pgd/pud and remove them.
*/
if (pud_none(*pud) || pud_bad(*pud)) {
- pmd = early_alloc(PTRS_PER_PMD * sizeof(pmd_t));
- pud_populate(&init_mm, pud, pmd);
+ pmd = alloc(PTRS_PER_PMD * sizeof(pmd_t));
+ if (pud_sect(*pud)) {
+ /*
+ * need to have the 1G of mappings continue to be
+ * present
+ */
+ split_pud(pud, pmd);
+ }
+ pud_populate(mm, pud, pmd);
+ flush_tlb_all();
}
pmd = pmd_offset(pud, addr);
@@ -186,7 +221,8 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
/* try section mapping first */
if (((addr | next | phys) & ~SECTION_MASK) == 0) {
pmd_t old_pmd =*pmd;
- set_pmd(pmd, __pmd(phys | prot_sect));
+ set_pmd(pmd, __pmd(phys |
+ pgprot_val(mk_sect_prot(prot))));
/*
* Check for previous table entries created during
* boot (__create_page_tables) and flush them.
@@ -195,22 +231,35 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
flush_tlb_all();
} else {
alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys),
- prot_pte);
+ prot, alloc);
}
phys += next - addr;
} while (pmd++, addr = next, addr != end);
}
-static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
- unsigned long end, phys_addr_t phys,
- int map_io)
+static inline bool use_1G_block(unsigned long addr, unsigned long next,
+ unsigned long phys)
+{
+ if (PAGE_SHIFT != 12)
+ return false;
+
+ if (((addr | next | phys) & ~PUD_MASK) != 0)
+ return false;
+
+ return true;
+}
+
+static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd,
+ unsigned long addr, unsigned long end,
+ phys_addr_t phys, pgprot_t prot,
+ void *(*alloc)(unsigned long size))
{
pud_t *pud;
unsigned long next;
if (pgd_none(*pgd)) {
- pud = early_alloc(PTRS_PER_PUD * sizeof(pud_t));
- pgd_populate(&init_mm, pgd, pud);
+ pud = alloc(PTRS_PER_PUD * sizeof(pud_t));
+ pgd_populate(mm, pgd, pud);
}
BUG_ON(pgd_bad(*pgd));
@@ -221,10 +270,10 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
/*
* For 4K granule only, attempt to put down a 1GB block
*/
- if (!map_io && (PAGE_SHIFT == 12) &&
- ((addr | next | phys) & ~PUD_MASK) == 0) {
+ if (use_1G_block(addr, next, phys)) {
pud_t old_pud = *pud;
- set_pud(pud, __pud(phys | PROT_SECT_NORMAL_EXEC));
+ set_pud(pud, __pud(phys |
+ pgprot_val(mk_sect_prot(prot))));
/*
* If we have an old value for a pud, it will
@@ -239,7 +288,7 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
flush_tlb_all();
}
} else {
- alloc_init_pmd(pud, addr, next, phys, map_io);
+ alloc_init_pmd(mm, pud, addr, next, phys, prot, alloc);
}
phys += next - addr;
} while (pud++, addr = next, addr != end);
@@ -249,9 +298,10 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
* Create the page directory entries and any necessary page tables for the
* mapping specified by 'md'.
*/
-static void __init __create_mapping(pgd_t *pgd, phys_addr_t phys,
- unsigned long virt, phys_addr_t size,
- int map_io)
+static void __create_mapping(struct mm_struct *mm, pgd_t *pgd,
+ phys_addr_t phys, unsigned long virt,
+ phys_addr_t size, pgprot_t prot,
+ void *(*alloc)(unsigned long size))
{
unsigned long addr, length, end, next;
@@ -261,32 +311,96 @@ static void __init __create_mapping(pgd_t *pgd, phys_addr_t phys,
end = addr + length;
do {
next = pgd_addr_end(addr, end);
- alloc_init_pud(pgd, addr, next, phys, map_io);
+ alloc_init_pud(mm, pgd, addr, next, phys, prot, alloc);
phys += next - addr;
} while (pgd++, addr = next, addr != end);
}
-static void __init create_mapping(phys_addr_t phys, unsigned long virt,
- phys_addr_t size)
+static void *late_alloc(unsigned long size)
+{
+ void *ptr;
+
+ BUG_ON(size > PAGE_SIZE);
+ ptr = (void *)__get_free_page(PGALLOC_GFP);
+ BUG_ON(!ptr);
+ return ptr;
+}
+
+static void __ref create_mapping(phys_addr_t phys, unsigned long virt,
+ phys_addr_t size, pgprot_t prot)
{
if (virt < VMALLOC_START) {
pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n",
&phys, virt);
return;
}
- __create_mapping(pgd_offset_k(virt & PAGE_MASK), phys, virt, size, 0);
+ __create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK), phys, virt,
+ size, prot, early_alloc);
+}
+
+void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
+ unsigned long virt, phys_addr_t size,
+ pgprot_t prot)
+{
+ __create_mapping(mm, pgd_offset(mm, virt), phys, virt, size, prot,
+ early_alloc);
}
-void __init create_id_mapping(phys_addr_t addr, phys_addr_t size, int map_io)
+static void create_mapping_late(phys_addr_t phys, unsigned long virt,
+ phys_addr_t size, pgprot_t prot)
{
- if ((addr >> PGDIR_SHIFT) >= ARRAY_SIZE(idmap_pg_dir)) {
- pr_warn("BUG: not creating id mapping for %pa\n", &addr);
+ if (virt < VMALLOC_START) {
+ pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n",
+ &phys, virt);
return;
}
- __create_mapping(&idmap_pg_dir[pgd_index(addr)],
- addr, addr, size, map_io);
+
+ return __create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK),
+ phys, virt, size, prot, late_alloc);
}
+#ifdef CONFIG_DEBUG_RODATA
+static void __init __map_memblock(phys_addr_t start, phys_addr_t end)
+{
+ /*
+ * Set up the executable regions using the existing section mappings
+ * for now. This will get more fine grained later once all memory
+ * is mapped
+ */
+ unsigned long kernel_x_start = round_down(__pa(_stext), SECTION_SIZE);
+ unsigned long kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE);
+
+ if (end < kernel_x_start) {
+ create_mapping(start, __phys_to_virt(start),
+ end - start, PAGE_KERNEL);
+ } else if (start >= kernel_x_end) {
+ create_mapping(start, __phys_to_virt(start),
+ end - start, PAGE_KERNEL);
+ } else {
+ if (start < kernel_x_start)
+ create_mapping(start, __phys_to_virt(start),
+ kernel_x_start - start,
+ PAGE_KERNEL);
+ create_mapping(kernel_x_start,
+ __phys_to_virt(kernel_x_start),
+ kernel_x_end - kernel_x_start,
+ PAGE_KERNEL_EXEC);
+ if (kernel_x_end < end)
+ create_mapping(kernel_x_end,
+ __phys_to_virt(kernel_x_end),
+ end - kernel_x_end,
+ PAGE_KERNEL);
+ }
+
+}
+#else
+static void __init __map_memblock(phys_addr_t start, phys_addr_t end)
+{
+ create_mapping(start, __phys_to_virt(start), end - start,
+ PAGE_KERNEL_EXEC);
+}
+#endif
+
static void __init map_mem(void)
{
struct memblock_region *reg;
@@ -331,14 +445,53 @@ static void __init map_mem(void)
memblock_set_current_limit(limit);
}
#endif
-
- create_mapping(start, __phys_to_virt(start), end - start);
+ __map_memblock(start, end);
}
/* Limit no longer required. */
memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
}
+void __init fixup_executable(void)
+{
+#ifdef CONFIG_DEBUG_RODATA
+ /* now that we are actually fully mapped, make the start/end more fine grained */
+ if (!IS_ALIGNED((unsigned long)_stext, SECTION_SIZE)) {
+ unsigned long aligned_start = round_down(__pa(_stext),
+ SECTION_SIZE);
+
+ create_mapping(aligned_start, __phys_to_virt(aligned_start),
+ __pa(_stext) - aligned_start,
+ PAGE_KERNEL);
+ }
+
+ if (!IS_ALIGNED((unsigned long)__init_end, SECTION_SIZE)) {
+ unsigned long aligned_end = round_up(__pa(__init_end),
+ SECTION_SIZE);
+ create_mapping(__pa(__init_end), (unsigned long)__init_end,
+ aligned_end - __pa(__init_end),
+ PAGE_KERNEL);
+ }
+#endif
+}
+
+#ifdef CONFIG_DEBUG_RODATA
+void mark_rodata_ro(void)
+{
+ create_mapping_late(__pa(_stext), (unsigned long)_stext,
+ (unsigned long)_etext - (unsigned long)_stext,
+ PAGE_KERNEL_EXEC | PTE_RDONLY);
+
+}
+#endif
+
+void fixup_init(void)
+{
+ create_mapping_late(__pa(__init_begin), (unsigned long)__init_begin,
+ (unsigned long)__init_end - (unsigned long)__init_begin,
+ PAGE_KERNEL);
+}
+
/*
* paging_init() sets up the page tables, initialises the zone memory
* maps and sets up the zero page.
@@ -348,6 +501,7 @@ void __init paging_init(void)
void *zero_page;
map_mem();
+ fixup_executable();
/*
* Finally flush the caches and tlb to ensure that we're in a
@@ -463,3 +617,96 @@ void vmemmap_free(unsigned long start, unsigned long end)
{
}
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
+
+static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
+#if CONFIG_ARM64_PGTABLE_LEVELS > 2
+static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss;
+#endif
+#if CONFIG_ARM64_PGTABLE_LEVELS > 3
+static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss;
+#endif
+
+static inline pud_t * fixmap_pud(unsigned long addr)
+{
+ pgd_t *pgd = pgd_offset_k(addr);
+
+ BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd));
+
+ return pud_offset(pgd, addr);
+}
+
+static inline pmd_t * fixmap_pmd(unsigned long addr)
+{
+ pud_t *pud = fixmap_pud(addr);
+
+ BUG_ON(pud_none(*pud) || pud_bad(*pud));
+
+ return pmd_offset(pud, addr);
+}
+
+static inline pte_t * fixmap_pte(unsigned long addr)
+{
+ pmd_t *pmd = fixmap_pmd(addr);
+
+ BUG_ON(pmd_none(*pmd) || pmd_bad(*pmd));
+
+ return pte_offset_kernel(pmd, addr);
+}
+
+void __init early_fixmap_init(void)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ unsigned long addr = FIXADDR_START;
+
+ pgd = pgd_offset_k(addr);
+ pgd_populate(&init_mm, pgd, bm_pud);
+ pud = pud_offset(pgd, addr);
+ pud_populate(&init_mm, pud, bm_pmd);
+ pmd = pmd_offset(pud, addr);
+ pmd_populate_kernel(&init_mm, pmd, bm_pte);
+
+ /*
+ * The boot-ioremap range spans multiple pmds, for which
+ * we are not preparted:
+ */
+ BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
+ != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
+
+ if ((pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)))
+ || pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) {
+ WARN_ON(1);
+ pr_warn("pmd %p != %p, %p\n",
+ pmd, fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)),
+ fixmap_pmd(fix_to_virt(FIX_BTMAP_END)));
+ pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
+ fix_to_virt(FIX_BTMAP_BEGIN));
+ pr_warn("fix_to_virt(FIX_BTMAP_END): %08lx\n",
+ fix_to_virt(FIX_BTMAP_END));
+
+ pr_warn("FIX_BTMAP_END: %d\n", FIX_BTMAP_END);
+ pr_warn("FIX_BTMAP_BEGIN: %d\n", FIX_BTMAP_BEGIN);
+ }
+}
+
+void __set_fixmap(enum fixed_addresses idx,
+ phys_addr_t phys, pgprot_t flags)
+{
+ unsigned long addr = __fix_to_virt(idx);
+ pte_t *pte;
+
+ if (idx >= __end_of_fixed_addresses) {
+ BUG();
+ return;
+ }
+
+ pte = fixmap_pte(addr);
+
+ if (pgprot_val(flags)) {
+ set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
+ } else {
+ pte_clear(&init_mm, addr, pte);
+ flush_tlb_kernel_range(addr, addr+PAGE_SIZE);
+ }
+}
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 87bbc9c1e681..c753f662d7da 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -631,6 +631,14 @@ config CRYPTO_SHA512_SPARC64
SHA-512 secure hash standard (DFIPS 180-2) implemented
using sparc64 crypto instructions, when available.
+config CRYPTO_SHA256_ARM
+ tristate "SHA-224/256 digest algorithm (ARM-asm and NEON)"
+ select CRYPTO_HASH
+ help
+ SHA-256 secure hash standard (DFIPS 180-2) implemented
+ using optimized ARM assembler and NEON, when available.
+
+
config CRYPTO_SHA512_ARM_NEON
tristate "SHA384 and SHA512 digest algorithm (ARM NEON)"
depends on ARM && KERNEL_MODE_NEON
diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index 35286fe52823..29f93bf718e6 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -93,6 +93,12 @@ static void dmi_table(u8 *buf, int len, int num,
const struct dmi_header *dm = (const struct dmi_header *)data;
/*
+ * 7.45 End-of-Table (Type 127) [SMBIOS reference spec v3.0.0]
+ */
+ if (dm->type == DMI_ENTRY_END_OF_TABLE)
+ break;
+
+ /*
* We want to know the total length (formatted area and
* strings) before decoding to make sure we won't run off the
* table in dmi_decode or dmi_string
@@ -107,7 +113,7 @@ static void dmi_table(u8 *buf, int len, int num,
}
}
-static u32 dmi_base;
+static phys_addr_t dmi_base;
static u16 dmi_len;
static u16 dmi_num;
@@ -467,7 +473,7 @@ static int __init dmi_present(const u8 *buf)
if (memcmp(buf, "_SM_", 4) == 0 &&
buf[5] < 32 && dmi_checksum(buf, buf[5])) {
- smbios_ver = (buf[6] << 8) + buf[7];
+ smbios_ver = get_unaligned_be16(buf + 6);
/* Some BIOS report weird SMBIOS version, fix that up */
switch (smbios_ver) {
@@ -493,10 +499,9 @@ static int __init dmi_present(const u8 *buf)
dmi_ver = smbios_ver;
else
dmi_ver = (buf[14] & 0xF0) << 4 | (buf[14] & 0x0F);
- dmi_num = (buf[13] << 8) | buf[12];
- dmi_len = (buf[7] << 8) | buf[6];
- dmi_base = (buf[11] << 24) | (buf[10] << 16) |
- (buf[9] << 8) | buf[8];
+ dmi_num = get_unaligned_le16(buf + 12);
+ dmi_len = get_unaligned_le16(buf + 6);
+ dmi_base = get_unaligned_le32(buf + 8);
if (dmi_walk_early(dmi_decode) == 0) {
if (smbios_ver) {
@@ -515,12 +520,72 @@ static int __init dmi_present(const u8 *buf)
return 1;
}
+/*
+ * Check for the SMBIOS 3.0 64-bit entry point signature. Unlike the legacy
+ * 32-bit entry point, there is no embedded DMI header (_DMI_) in here.
+ */
+static int __init dmi_smbios3_present(const u8 *buf)
+{
+ if (memcmp(buf, "_SM3_", 5) == 0 &&
+ buf[6] < 32 && dmi_checksum(buf, buf[6])) {
+ dmi_ver = get_unaligned_be16(buf + 7);
+ dmi_len = get_unaligned_le32(buf + 12);
+ dmi_base = get_unaligned_le64(buf + 16);
+
+ /*
+ * The 64-bit SMBIOS 3.0 entry point no longer has a field
+ * containing the number of structures present in the table.
+ * Instead, it defines the table size as a maximum size, and
+ * relies on the end-of-table structure type (#127) to be used
+ * to signal the end of the table.
+ * So let's define dmi_num as an upper bound as well: each
+ * structure has a 4 byte header, so dmi_len / 4 is an upper
+ * bound for the number of structures in the table.
+ */
+ dmi_num = dmi_len / 4;
+
+ if (dmi_walk_early(dmi_decode) == 0) {
+ pr_info("SMBIOS %d.%d present.\n",
+ dmi_ver >> 8, dmi_ver & 0xFF);
+ dmi_format_ids(dmi_ids_string, sizeof(dmi_ids_string));
+ pr_debug("DMI: %s\n", dmi_ids_string);
+ return 0;
+ }
+ }
+ return 1;
+}
+
void __init dmi_scan_machine(void)
{
char __iomem *p, *q;
char buf[32];
if (efi_enabled(EFI_CONFIG_TABLES)) {
+ /*
+ * According to the DMTF SMBIOS reference spec v3.0.0, it is
+ * allowed to define both the 64-bit entry point (smbios3) and
+ * the 32-bit entry point (smbios), in which case they should
+ * either both point to the same SMBIOS structure table, or the
+ * table pointed to by the 64-bit entry point should contain a
+ * superset of the table contents pointed to by the 32-bit entry
+ * point (section 5.2)
+ * This implies that the 64-bit entry point should have
+ * precedence if it is defined and supported by the OS. If we
+ * have the 64-bit entry point, but fail to decode it, fall
+ * back to the legacy one (if available)
+ */
+ if (efi.smbios3 != EFI_INVALID_TABLE_ADDR) {
+ p = dmi_early_remap(efi.smbios3, 32);
+ if (p == NULL)
+ goto error;
+ memcpy_fromio(buf, p, 32);
+ dmi_early_unmap(p, 32);
+
+ if (!dmi_smbios3_present(buf)) {
+ dmi_available = 1;
+ goto out;
+ }
+ }
if (efi.smbios == EFI_INVALID_TABLE_ADDR)
goto error;
@@ -553,7 +618,7 @@ void __init dmi_scan_machine(void)
memset(buf, 0, 16);
for (q = p; q < p + 0x10000; q += 16) {
memcpy_fromio(buf + 16, q, 16);
- if (!dmi_present(buf)) {
+ if (!dmi_smbios3_present(buf) || !dmi_present(buf)) {
dmi_available = 1;
dmi_early_unmap(p, 0x10000);
goto out;
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 8590099ac148..b7ba9d8ec4b0 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -30,6 +30,7 @@ struct efi __read_mostly efi = {
.acpi = EFI_INVALID_TABLE_ADDR,
.acpi20 = EFI_INVALID_TABLE_ADDR,
.smbios = EFI_INVALID_TABLE_ADDR,
+ .smbios3 = EFI_INVALID_TABLE_ADDR,
.sal_systab = EFI_INVALID_TABLE_ADDR,
.boot_info = EFI_INVALID_TABLE_ADDR,
.hcdp = EFI_INVALID_TABLE_ADDR,
@@ -86,6 +87,8 @@ static ssize_t systab_show(struct kobject *kobj,
str += sprintf(str, "ACPI=0x%lx\n", efi.acpi);
if (efi.smbios != EFI_INVALID_TABLE_ADDR)
str += sprintf(str, "SMBIOS=0x%lx\n", efi.smbios);
+ if (efi.smbios3 != EFI_INVALID_TABLE_ADDR)
+ str += sprintf(str, "SMBIOS3=0x%lx\n", efi.smbios3);
if (efi.hcdp != EFI_INVALID_TABLE_ADDR)
str += sprintf(str, "HCDP=0x%lx\n", efi.hcdp);
if (efi.boot_info != EFI_INVALID_TABLE_ADDR)
@@ -260,6 +263,7 @@ static __initdata efi_config_table_type_t common_tables[] = {
{MPS_TABLE_GUID, "MPS", &efi.mps},
{SAL_SYSTEM_TABLE_GUID, "SALsystab", &efi.sal_systab},
{SMBIOS_TABLE_GUID, "SMBIOS", &efi.smbios},
+ {SMBIOS3_TABLE_GUID, "SMBIOS 3.0", &efi.smbios3},
{UGA_IO_PROTOCOL_GUID, "UGA", &efi.uga},
{NULL_GUID, NULL, NULL},
};
@@ -289,29 +293,15 @@ static __init int match_config_table(efi_guid_t *guid,
return 0;
}
-int __init efi_config_init(efi_config_table_type_t *arch_tables)
+int __init efi_config_parse_tables(void *config_tables, int count, int sz,
+ efi_config_table_type_t *arch_tables)
{
- void *config_tables, *tablep;
- int i, sz;
-
- if (efi_enabled(EFI_64BIT))
- sz = sizeof(efi_config_table_64_t);
- else
- sz = sizeof(efi_config_table_32_t);
-
- /*
- * Let's see what config tables the firmware passed to us.
- */
- config_tables = early_memremap(efi.systab->tables,
- efi.systab->nr_tables * sz);
- if (config_tables == NULL) {
- pr_err("Could not map Configuration table!\n");
- return -ENOMEM;
- }
+ void *tablep;
+ int i;
tablep = config_tables;
pr_info("");
- for (i = 0; i < efi.systab->nr_tables; i++) {
+ for (i = 0; i < count; i++) {
efi_guid_t guid;
unsigned long table;
@@ -324,8 +314,6 @@ int __init efi_config_init(efi_config_table_type_t *arch_tables)
if (table64 >> 32) {
pr_cont("\n");
pr_err("Table located above 4GB, disabling EFI.\n");
- early_memunmap(config_tables,
- efi.systab->nr_tables * sz);
return -EINVAL;
}
#endif
@@ -340,13 +328,37 @@ int __init efi_config_init(efi_config_table_type_t *arch_tables)
tablep += sz;
}
pr_cont("\n");
- early_memunmap(config_tables, efi.systab->nr_tables * sz);
-
set_bit(EFI_CONFIG_TABLES, &efi.flags);
-
return 0;
}
+int __init efi_config_init(efi_config_table_type_t *arch_tables)
+{
+ void *config_tables;
+ int sz, ret;
+
+ if (efi_enabled(EFI_64BIT))
+ sz = sizeof(efi_config_table_64_t);
+ else
+ sz = sizeof(efi_config_table_32_t);
+
+ /*
+ * Let's see what config tables the firmware passed to us.
+ */
+ config_tables = early_memremap(efi.systab->tables,
+ efi.systab->nr_tables * sz);
+ if (config_tables == NULL) {
+ pr_err("Could not map Configuration table!\n");
+ return -ENOMEM;
+ }
+
+ ret = efi_config_parse_tables(config_tables, efi.systab->nr_tables, sz,
+ arch_tables);
+
+ early_memunmap(config_tables, efi.systab->nr_tables * sz);
+ return ret;
+}
+
#ifdef CONFIG_EFI_VARS_MODULE
static int __init efi_load_efivars(void)
{
diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c
index 75ee05964cbc..e2432b39b6df 100644
--- a/drivers/firmware/efi/libstub/arm-stub.c
+++ b/drivers/firmware/efi/libstub/arm-stub.c
@@ -247,9 +247,18 @@ unsigned long __init efi_entry(void *handle, efi_system_table_t *sys_table,
goto fail_free_cmdline;
}
}
- if (!fdt_addr)
+
+ if (fdt_addr) {
+ pr_efi(sys_table, "Using DTB from command line\n");
+ } else {
/* Look for a device tree configuration table entry. */
fdt_addr = (uintptr_t)get_fdt(sys_table);
+ if (fdt_addr)
+ pr_efi(sys_table, "Using DTB from configuration table\n");
+ }
+
+ if (!fdt_addr)
+ pr_efi(sys_table, "Generating empty DTB\n");
status = handle_cmdline_files(sys_table, image, cmdline_ptr,
"initrd=", dram_base + SZ_512M,
@@ -286,3 +295,62 @@ fail_free_image:
fail:
return EFI_ERROR;
}
+
+/*
+ * This is the base address at which to start allocating virtual memory ranges
+ * for UEFI Runtime Services. This is in the low TTBR0 range so that we can use
+ * any allocation we choose, and eliminate the risk of a conflict after kexec.
+ * The value chosen is the largest non-zero power of 2 suitable for this purpose
+ * both on 32-bit and 64-bit ARM CPUs, to maximize the likelihood that it can
+ * be mapped efficiently.
+ */
+#define EFI_RT_VIRTUAL_BASE 0x40000000
+
+/*
+ * efi_get_virtmap() - create a virtual mapping for the EFI memory map
+ *
+ * This function populates the virt_addr fields of all memory region descriptors
+ * in @memory_map whose EFI_MEMORY_RUNTIME attribute is set. Those descriptors
+ * are also copied to @runtime_map, and their total count is returned in @count.
+ */
+void efi_get_virtmap(efi_memory_desc_t *memory_map, unsigned long map_size,
+ unsigned long desc_size, efi_memory_desc_t *runtime_map,
+ int *count)
+{
+ u64 efi_virt_base = EFI_RT_VIRTUAL_BASE;
+ efi_memory_desc_t *out = runtime_map;
+ int l;
+
+ for (l = 0; l < map_size; l += desc_size) {
+ efi_memory_desc_t *in = (void *)memory_map + l;
+ u64 paddr, size;
+
+ if (!(in->attribute & EFI_MEMORY_RUNTIME))
+ continue;
+
+ /*
+ * Make the mapping compatible with 64k pages: this allows
+ * a 4k page size kernel to kexec a 64k page size kernel and
+ * vice versa.
+ */
+ paddr = round_down(in->phys_addr, SZ_64K);
+ size = round_up(in->num_pages * EFI_PAGE_SIZE +
+ in->phys_addr - paddr, SZ_64K);
+
+ /*
+ * Avoid wasting memory on PTEs by choosing a virtual base that
+ * is compatible with section mappings if this region has the
+ * appropriate size and physical alignment. (Sections are 2 MB
+ * on 4k granule kernels)
+ */
+ if (IS_ALIGNED(in->phys_addr, SZ_2M) && size >= SZ_2M)
+ efi_virt_base = round_up(efi_virt_base, SZ_2M);
+
+ in->virt_addr = efi_virt_base + in->phys_addr - paddr;
+ efi_virt_base += size;
+
+ memcpy(out, in, desc_size);
+ out = (void *)out + desc_size;
+ ++*count;
+ }
+}
diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c
index 5186eb01945a..77ef72775cbf 100644
--- a/drivers/firmware/efi/libstub/efi-stub-helper.c
+++ b/drivers/firmware/efi/libstub/efi-stub-helper.c
@@ -32,6 +32,15 @@
static unsigned long __chunk_size = EFI_READ_CHUNK_SIZE;
+/*
+ * Allow the platform to override the allocation granularity: this allows
+ * systems that have the capability to run with a larger page size to deal
+ * with the allocations for initrd and fdt more efficiently.
+ */
+#ifndef EFI_ALLOC_ALIGN
+#define EFI_ALLOC_ALIGN EFI_PAGE_SIZE
+#endif
+
struct file_info {
efi_file_handle_t *handle;
u64 size;
@@ -150,10 +159,10 @@ efi_status_t efi_high_alloc(efi_system_table_t *sys_table_arg,
* a specific address. We are doing page-based allocations,
* so we must be aligned to a page.
*/
- if (align < EFI_PAGE_SIZE)
- align = EFI_PAGE_SIZE;
+ if (align < EFI_ALLOC_ALIGN)
+ align = EFI_ALLOC_ALIGN;
- nr_pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE;
+ nr_pages = round_up(size, EFI_ALLOC_ALIGN) / EFI_PAGE_SIZE;
again:
for (i = 0; i < map_size / desc_size; i++) {
efi_memory_desc_t *desc;
@@ -235,10 +244,10 @@ efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg,
* a specific address. We are doing page-based allocations,
* so we must be aligned to a page.
*/
- if (align < EFI_PAGE_SIZE)
- align = EFI_PAGE_SIZE;
+ if (align < EFI_ALLOC_ALIGN)
+ align = EFI_ALLOC_ALIGN;
- nr_pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE;
+ nr_pages = round_up(size, EFI_ALLOC_ALIGN) / EFI_PAGE_SIZE;
for (i = 0; i < map_size / desc_size; i++) {
efi_memory_desc_t *desc;
unsigned long m = (unsigned long)map;
@@ -292,7 +301,7 @@ void efi_free(efi_system_table_t *sys_table_arg, unsigned long size,
if (!size)
return;
- nr_pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE;
+ nr_pages = round_up(size, EFI_ALLOC_ALIGN) / EFI_PAGE_SIZE;
efi_call_early(free_pages, addr, nr_pages);
}
@@ -561,7 +570,7 @@ efi_status_t efi_relocate_kernel(efi_system_table_t *sys_table_arg,
* to the preferred address. If that fails, allocate as low
* as possible while respecting the required alignment.
*/
- nr_pages = round_up(alloc_size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE;
+ nr_pages = round_up(alloc_size, EFI_ALLOC_ALIGN) / EFI_PAGE_SIZE;
status = efi_call_early(allocate_pages,
EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA,
nr_pages, &efi_addr);
diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h
index 304ab295ca1a..2be10984a67a 100644
--- a/drivers/firmware/efi/libstub/efistub.h
+++ b/drivers/firmware/efi/libstub/efistub.h
@@ -39,4 +39,8 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table,
void *get_fdt(efi_system_table_t *sys_table);
+void efi_get_virtmap(efi_memory_desc_t *memory_map, unsigned long map_size,
+ unsigned long desc_size, efi_memory_desc_t *runtime_map,
+ int *count);
+
#endif
diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c
index c846a9608cbd..91da56c4fd54 100644
--- a/drivers/firmware/efi/libstub/fdt.c
+++ b/drivers/firmware/efi/libstub/fdt.c
@@ -14,6 +14,8 @@
#include <linux/libfdt.h>
#include <asm/efi.h>
+#include "efistub.h"
+
efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt,
unsigned long orig_fdt_size,
void *fdt, int new_fdt_size, char *cmdline_ptr,
@@ -193,9 +195,26 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table,
unsigned long map_size, desc_size;
u32 desc_ver;
unsigned long mmap_key;
- efi_memory_desc_t *memory_map;
+ efi_memory_desc_t *memory_map, *runtime_map;
unsigned long new_fdt_size;
efi_status_t status;
+ int runtime_entry_count = 0;
+
+ /*
+ * Get a copy of the current memory map that we will use to prepare
+ * the input for SetVirtualAddressMap(). We don't have to worry about
+ * subsequent allocations adding entries, since they could not affect
+ * the number of EFI_MEMORY_RUNTIME regions.
+ */
+ status = efi_get_memory_map(sys_table, &runtime_map, &map_size,
+ &desc_size, &desc_ver, &mmap_key);
+ if (status != EFI_SUCCESS) {
+ pr_efi_err(sys_table, "Unable to retrieve UEFI memory map.\n");
+ return status;
+ }
+
+ pr_efi(sys_table,
+ "Exiting boot services and installing virtual address map...\n");
/*
* Estimate size of new FDT, and allocate memory for it. We
@@ -248,12 +267,48 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table,
}
}
+ /*
+ * Update the memory map with virtual addresses. The function will also
+ * populate @runtime_map with copies of just the EFI_MEMORY_RUNTIME
+ * entries so that we can pass it straight into SetVirtualAddressMap()
+ */
+ efi_get_virtmap(memory_map, map_size, desc_size, runtime_map,
+ &runtime_entry_count);
+
/* Now we are ready to exit_boot_services.*/
status = sys_table->boottime->exit_boot_services(handle, mmap_key);
+ if (status == EFI_SUCCESS) {
+ efi_set_virtual_address_map_t *svam;
- if (status == EFI_SUCCESS)
- return status;
+ /* Install the new virtual address map */
+ svam = sys_table->runtime->set_virtual_address_map;
+ status = svam(runtime_entry_count * desc_size, desc_size,
+ desc_ver, runtime_map);
+
+ /*
+ * We are beyond the point of no return here, so if the call to
+ * SetVirtualAddressMap() failed, we need to signal that to the
+ * incoming kernel but proceed normally otherwise.
+ */
+ if (status != EFI_SUCCESS) {
+ int l;
+
+ /*
+ * Set the virtual address field of all
+ * EFI_MEMORY_RUNTIME entries to 0. This will signal
+ * the incoming kernel that no virtual translation has
+ * been installed.
+ */
+ for (l = 0; l < map_size; l += desc_size) {
+ efi_memory_desc_t *p = (void *)memory_map + l;
+
+ if (p->attribute & EFI_MEMORY_RUNTIME)
+ p->virt_addr = 0;
+ }
+ }
+ return EFI_SUCCESS;
+ }
pr_efi_err(sys_table, "Exit boot services failed.\n");
@@ -264,6 +319,7 @@ fail_free_new_fdt:
efi_free(sys_table, new_fdt_size, *new_fdt_addr);
fail:
+ sys_table->boottime->free_pool(runtime_map);
return EFI_LOAD_ERROR;
}
diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c
index 7a7bab8947ae..32a6a8130557 100644
--- a/drivers/md/dm-verity.c
+++ b/drivers/md/dm-verity.c
@@ -18,20 +18,36 @@
#include <linux/module.h>
#include <linux/device-mapper.h>
+#include <linux/reboot.h>
#include <crypto/hash.h>
#define DM_MSG_PREFIX "verity"
+#define DM_VERITY_ENV_LENGTH 42
+#define DM_VERITY_ENV_VAR_NAME "VERITY_ERR_BLOCK_NR"
+
#define DM_VERITY_IO_VEC_INLINE 16
#define DM_VERITY_MEMPOOL_SIZE 4
#define DM_VERITY_DEFAULT_PREFETCH_SIZE 262144
#define DM_VERITY_MAX_LEVELS 63
+#define DM_VERITY_MAX_CORRUPTED_ERRS 100
static unsigned dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE;
module_param_named(prefetch_cluster, dm_verity_prefetch_cluster, uint, S_IRUGO | S_IWUSR);
+enum verity_mode {
+ DM_VERITY_MODE_EIO = 0,
+ DM_VERITY_MODE_LOGGING = 1,
+ DM_VERITY_MODE_RESTART = 2
+};
+
+enum verity_block_type {
+ DM_VERITY_BLOCK_TYPE_DATA,
+ DM_VERITY_BLOCK_TYPE_METADATA
+};
+
struct dm_verity {
struct dm_dev *data_dev;
struct dm_dev *hash_dev;
@@ -54,6 +70,8 @@ struct dm_verity {
unsigned digest_size; /* digest size for the current hash algorithm */
unsigned shash_descsize;/* the size of temporary space for crypto */
int hash_failed; /* set to 1 if hash of any block failed */
+ enum verity_mode mode; /* mode for handling verification errors */
+ unsigned corrupted_errs;/* Number of errors for corrupted blocks */
mempool_t *vec_mempool; /* mempool of bio vector */
@@ -175,6 +193,54 @@ static void verity_hash_at_level(struct dm_verity *v, sector_t block, int level,
}
/*
+ * Handle verification errors.
+ */
+static int verity_handle_err(struct dm_verity *v, enum verity_block_type type,
+ unsigned long long block)
+{
+ char verity_env[DM_VERITY_ENV_LENGTH];
+ char *envp[] = { verity_env, NULL };
+ const char *type_str = "";
+ struct mapped_device *md = dm_table_get_md(v->ti->table);
+
+ if (v->corrupted_errs >= DM_VERITY_MAX_CORRUPTED_ERRS)
+ goto out;
+
+ ++v->corrupted_errs;
+
+ switch (type) {
+ case DM_VERITY_BLOCK_TYPE_DATA:
+ type_str = "data";
+ break;
+ case DM_VERITY_BLOCK_TYPE_METADATA:
+ type_str = "metadata";
+ break;
+ default:
+ BUG();
+ }
+
+ DMERR_LIMIT("%s: %s block %llu is corrupted", v->data_dev->name,
+ type_str, block);
+
+ if (v->corrupted_errs == DM_VERITY_MAX_CORRUPTED_ERRS)
+ DMERR("%s: reached maximum errors", v->data_dev->name);
+
+ snprintf(verity_env, DM_VERITY_ENV_LENGTH, "%s=%d,%llu",
+ DM_VERITY_ENV_VAR_NAME, type, block);
+
+ kobject_uevent_env(&disk_to_dev(dm_disk(md))->kobj, KOBJ_CHANGE, envp);
+
+out:
+ if (v->mode == DM_VERITY_MODE_LOGGING)
+ return 0;
+
+ if (v->mode == DM_VERITY_MODE_RESTART)
+ kernel_restart("dm-verity device corrupted");
+
+ return 1;
+}
+
+/*
* Verify hash of a metadata block pertaining to the specified data block
* ("block" argument) at a specified level ("level" argument).
*
@@ -251,11 +317,13 @@ static int verity_verify_level(struct dm_verity_io *io, sector_t block,
goto release_ret_r;
}
if (unlikely(memcmp(result, io_want_digest(v, io), v->digest_size))) {
- DMERR_LIMIT("metadata block %llu is corrupted",
- (unsigned long long)hash_block);
v->hash_failed = 1;
- r = -EIO;
- goto release_ret_r;
+
+ if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_METADATA,
+ hash_block)) {
+ r = -EIO;
+ goto release_ret_r;
+ }
} else
aux->hash_verified = 1;
}
@@ -367,10 +435,11 @@ test_block_hash:
return r;
}
if (unlikely(memcmp(result, io_want_digest(v, io), v->digest_size))) {
- DMERR_LIMIT("data block %llu is corrupted",
- (unsigned long long)(io->block + b));
v->hash_failed = 1;
- return -EIO;
+
+ if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA,
+ io->block + b))
+ return -EIO;
}
}
@@ -668,14 +737,14 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
goto bad;
}
- if (argc != 10) {
- ti->error = "Invalid argument count: exactly 10 arguments required";
+ if (argc < 10 || argc > 11) {
+ ti->error = "Invalid argument count: 10-11 arguments required";
r = -EINVAL;
goto bad;
}
- if (sscanf(argv[0], "%u%c", &num, &dummy) != 1 ||
- num > 1) {
+ if (sscanf(argv[0], "%d%c", &num, &dummy) != 1 ||
+ num < 0 || num > 1) {
ti->error = "Invalid version";
r = -EINVAL;
goto bad;
@@ -790,6 +859,17 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
}
}
+ if (argc > 10) {
+ if (sscanf(argv[10], "%d%c", &num, &dummy) != 1 ||
+ num < DM_VERITY_MODE_EIO ||
+ num > DM_VERITY_MODE_RESTART) {
+ ti->error = "Invalid mode";
+ r = -EINVAL;
+ goto bad;
+ }
+ v->mode = num;
+ }
+
v->hash_per_block_bits =
__fls((1 << v->hash_dev_block_bits) / v->digest_size);
diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c
index ca92b605f6a3..7e41ccf0c10e 100644
--- a/drivers/staging/android/lowmemorykiller.c
+++ b/drivers/staging/android/lowmemorykiller.c
@@ -41,6 +41,9 @@
#include <linux/rcupdate.h>
#include <linux/notifier.h>
+#define CREATE_TRACE_POINTS
+#include "trace/lowmemorykiller.h"
+
static uint32_t lowmem_debug_level = 1;
static short lowmem_adj[6] = {
0,
@@ -156,6 +159,10 @@ static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc)
p->comm, p->pid, oom_score_adj, tasksize);
}
if (selected) {
+ long cache_size = other_file * (long)(PAGE_SIZE / 1024);
+ long cache_limit = minfree * (long)(PAGE_SIZE / 1024);
+ long free = other_free * (long)(PAGE_SIZE / 1024);
+ trace_lowmemory_kill(selected, cache_size, cache_limit, free);
lowmem_print(1, "Killing '%s' (%d), adj %hd,\n" \
" to free %ldkB on behalf of '%s' (%d) because\n" \
" cache %ldkB is below limit %ldkB for oom_score_adj %hd\n" \
@@ -164,10 +171,9 @@ static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc)
selected_oom_score_adj,
selected_tasksize * (long)(PAGE_SIZE / 1024),
current->comm, current->pid,
- other_file * (long)(PAGE_SIZE / 1024),
- minfree * (long)(PAGE_SIZE / 1024),
+ cache_size, cache_limit,
min_score_adj,
- other_free * (long)(PAGE_SIZE / 1024));
+ free);
lowmem_deathpending_timeout = jiffies + HZ;
set_tsk_thread_flag(selected, TIF_MEMDIE);
send_sig(SIGKILL, selected, 0);
diff --git a/drivers/staging/android/trace/lowmemorykiller.h b/drivers/staging/android/trace/lowmemorykiller.h
new file mode 100644
index 000000000000..f43d3fae75ee
--- /dev/null
+++ b/drivers/staging/android/trace/lowmemorykiller.h
@@ -0,0 +1,41 @@
+#undef TRACE_SYSTEM
+#define TRACE_INCLUDE_PATH ../../drivers/staging/android/trace
+#define TRACE_SYSTEM lowmemorykiller
+
+#if !defined(_TRACE_LOWMEMORYKILLER_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_LOWMEMORYKILLER_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(lowmemory_kill,
+ TP_PROTO(struct task_struct *killed_task, long cache_size, \
+ long cache_limit, long free),
+
+ TP_ARGS(killed_task, cache_size, cache_limit, free),
+
+ TP_STRUCT__entry(
+ __array(char, comm, TASK_COMM_LEN)
+ __field(pid_t, pid)
+ __field(long, pagecache_size)
+ __field(long, pagecache_limit)
+ __field(long, free)
+ ),
+
+ TP_fast_assign(
+ memcpy(__entry->comm, killed_task->comm, TASK_COMM_LEN);
+ __entry->pid = killed_task->pid;
+ __entry->pagecache_size = cache_size;
+ __entry->pagecache_limit = cache_limit;
+ __entry->free = free;
+ ),
+
+ TP_printk("%s (%d), page cache %ldkB (limit %ldkB), free %ldKb",
+ __entry->comm, __entry->pid, __entry->pagecache_size,
+ __entry->pagecache_limit, __entry->free)
+);
+
+
+#endif /* if !defined(_TRACE_LOWMEMORYKILLER_H) || defined(TRACE_HEADER_MULTI_READ) */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/drivers/usb/gadget/udc/udc-core.c b/drivers/usb/gadget/udc/udc-core.c
index 59beaf95e14f..5442febcd7af 100644
--- a/drivers/usb/gadget/udc/udc-core.c
+++ b/drivers/usb/gadget/udc/udc-core.c
@@ -401,15 +401,7 @@ static int udc_bind_to_driver(struct usb_udc *udc, struct usb_gadget_driver *dri
driver->unbind(udc->gadget);
goto err1;
}
- /*
- * HACK: The Android gadget driver disconnects the gadget
- * on bind and expects the gadget to stay disconnected until
- * it calls usb_gadget_connect when userspace is ready. Remove
- * the call to usb_gadget_connect bellow to avoid enabling the
- * pullup before userspace is ready.
- *
- * usb_gadget_connect(udc->gadget);
- */
+ usb_gadget_connect(udc->gadget);
kobject_uevent(&udc->dev.kobj, KOBJ_CHANGE);
return 0;
diff --git a/drivers/xen/efi.c b/drivers/xen/efi.c
index 1f850c97482f..f745db270171 100644
--- a/drivers/xen/efi.c
+++ b/drivers/xen/efi.c
@@ -294,6 +294,7 @@ static const struct efi efi_xen __initconst = {
.acpi = EFI_INVALID_TABLE_ADDR,
.acpi20 = EFI_INVALID_TABLE_ADDR,
.smbios = EFI_INVALID_TABLE_ADDR,
+ .smbios3 = EFI_INVALID_TABLE_ADDR,
.sal_systab = EFI_INVALID_TABLE_ADDR,
.boot_info = EFI_INVALID_TABLE_ADDR,
.hcdp = EFI_INVALID_TABLE_ADDR,
diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
index b6fa8657dcbc..ffb093e72b6c 100644
--- a/fs/squashfs/Kconfig
+++ b/fs/squashfs/Kconfig
@@ -120,6 +120,21 @@ config SQUASHFS_ZLIB
If unsure, say Y.
+config SQUASHFS_LZ4
+ bool "Include support for LZ4 compressed file systems"
+ depends on SQUASHFS
+ select LZ4_DECOMPRESS
+ help
+ Saying Y here includes support for reading Squashfs file systems
+ compressed with LZ4 compression. LZ4 compression is mainly
+ aimed at embedded systems with slower CPUs where the overheads
+ of zlib are too high.
+
+ LZ4 is not the standard compression used in Squashfs and so most
+ file systems will be readable without selecting this option.
+
+ If unsure, say N.
+
config SQUASHFS_LZO
bool "Include support for LZO compressed file systems"
depends on SQUASHFS
diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile
index 4132520b4ff2..246a6f329d89 100644
--- a/fs/squashfs/Makefile
+++ b/fs/squashfs/Makefile
@@ -11,6 +11,7 @@ squashfs-$(CONFIG_SQUASHFS_DECOMP_SINGLE) += decompressor_single.o
squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI) += decompressor_multi.o
squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) += decompressor_multi_percpu.o
squashfs-$(CONFIG_SQUASHFS_XATTR) += xattr.o xattr_id.o
+squashfs-$(CONFIG_SQUASHFS_LZ4) += lz4_wrapper.o
squashfs-$(CONFIG_SQUASHFS_LZO) += lzo_wrapper.o
squashfs-$(CONFIG_SQUASHFS_XZ) += xz_wrapper.o
squashfs-$(CONFIG_SQUASHFS_ZLIB) += zlib_wrapper.o
diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c
index ac22fe73b0ad..e9034bf6e5ae 100644
--- a/fs/squashfs/decompressor.c
+++ b/fs/squashfs/decompressor.c
@@ -41,6 +41,12 @@ static const struct squashfs_decompressor squashfs_lzma_unsupported_comp_ops = {
NULL, NULL, NULL, NULL, LZMA_COMPRESSION, "lzma", 0
};
+#ifndef CONFIG_SQUASHFS_LZ4
+static const struct squashfs_decompressor squashfs_lz4_comp_ops = {
+ NULL, NULL, NULL, NULL, LZ4_COMPRESSION, "lz4", 0
+};
+#endif
+
#ifndef CONFIG_SQUASHFS_LZO
static const struct squashfs_decompressor squashfs_lzo_comp_ops = {
NULL, NULL, NULL, NULL, LZO_COMPRESSION, "lzo", 0
@@ -65,6 +71,7 @@ static const struct squashfs_decompressor squashfs_unknown_comp_ops = {
static const struct squashfs_decompressor *decompressor[] = {
&squashfs_zlib_comp_ops,
+ &squashfs_lz4_comp_ops,
&squashfs_lzo_comp_ops,
&squashfs_xz_comp_ops,
&squashfs_lzma_unsupported_comp_ops,
diff --git a/fs/squashfs/decompressor.h b/fs/squashfs/decompressor.h
index af0985321808..a25713c031a5 100644
--- a/fs/squashfs/decompressor.h
+++ b/fs/squashfs/decompressor.h
@@ -46,6 +46,10 @@ static inline void *squashfs_comp_opts(struct squashfs_sb_info *msblk,
extern const struct squashfs_decompressor squashfs_xz_comp_ops;
#endif
+#ifdef CONFIG_SQUASHFS_LZ4
+extern const struct squashfs_decompressor squashfs_lz4_comp_ops;
+#endif
+
#ifdef CONFIG_SQUASHFS_LZO
extern const struct squashfs_decompressor squashfs_lzo_comp_ops;
#endif
diff --git a/fs/squashfs/lz4_wrapper.c b/fs/squashfs/lz4_wrapper.c
new file mode 100644
index 000000000000..c31e2bc9c081
--- /dev/null
+++ b/fs/squashfs/lz4_wrapper.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2013, 2014
+ * Phillip Lougher <phillip@squashfs.org.uk>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/buffer_head.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/lz4.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "squashfs.h"
+#include "decompressor.h"
+#include "page_actor.h"
+
+#define LZ4_LEGACY 1
+
+struct lz4_comp_opts {
+ __le32 version;
+ __le32 flags;
+};
+
+struct squashfs_lz4 {
+ void *input;
+ void *output;
+};
+
+
+static void *lz4_comp_opts(struct squashfs_sb_info *msblk,
+ void *buff, int len)
+{
+ struct lz4_comp_opts *comp_opts = buff;
+
+ /* LZ4 compressed filesystems always have compression options */
+ if (comp_opts == NULL || len < sizeof(*comp_opts))
+ return ERR_PTR(-EIO);
+
+ if (le32_to_cpu(comp_opts->version) != LZ4_LEGACY) {
+ /* LZ4 format currently used by the kernel is the 'legacy'
+ * format */
+ ERROR("Unknown LZ4 version\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ return NULL;
+}
+
+
+static void *lz4_init(struct squashfs_sb_info *msblk, void *buff)
+{
+ int block_size = max_t(int, msblk->block_size, SQUASHFS_METADATA_SIZE);
+ struct squashfs_lz4 *stream;
+
+ stream = kzalloc(sizeof(*stream), GFP_KERNEL);
+ if (stream == NULL)
+ goto failed;
+ stream->input = vmalloc(block_size);
+ if (stream->input == NULL)
+ goto failed2;
+ stream->output = vmalloc(block_size);
+ if (stream->output == NULL)
+ goto failed3;
+
+ return stream;
+
+failed3:
+ vfree(stream->input);
+failed2:
+ kfree(stream);
+failed:
+ ERROR("Failed to initialise LZ4 decompressor\n");
+ return ERR_PTR(-ENOMEM);
+}
+
+
+static void lz4_free(void *strm)
+{
+ struct squashfs_lz4 *stream = strm;
+
+ if (stream) {
+ vfree(stream->input);
+ vfree(stream->output);
+ }
+ kfree(stream);
+}
+
+
+static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
+ struct buffer_head **bh, int b, int offset, int length,
+ struct squashfs_page_actor *output)
+{
+ struct squashfs_lz4 *stream = strm;
+ void *buff = stream->input, *data;
+ int avail, i, bytes = length, res;
+ size_t dest_len = output->length;
+
+ for (i = 0; i < b; i++) {
+ avail = min(bytes, msblk->devblksize - offset);
+ memcpy(buff, bh[i]->b_data + offset, avail);
+ buff += avail;
+ bytes -= avail;
+ offset = 0;
+ put_bh(bh[i]);
+ }
+
+ res = lz4_decompress_unknownoutputsize(stream->input, length,
+ stream->output, &dest_len);
+ if (res)
+ return -EIO;
+
+ bytes = dest_len;
+ data = squashfs_first_page(output);
+ buff = stream->output;
+ while (data) {
+ if (bytes <= PAGE_CACHE_SIZE) {
+ memcpy(data, buff, bytes);
+ break;
+ }
+ memcpy(data, buff, PAGE_CACHE_SIZE);
+ buff += PAGE_CACHE_SIZE;
+ bytes -= PAGE_CACHE_SIZE;
+ data = squashfs_next_page(output);
+ }
+ squashfs_finish_page(output);
+
+ return dest_len;
+}
+
+const struct squashfs_decompressor squashfs_lz4_comp_ops = {
+ .init = lz4_init,
+ .comp_opts = lz4_comp_opts,
+ .free = lz4_free,
+ .decompress = lz4_uncompress,
+ .id = LZ4_COMPRESSION,
+ .name = "lz4",
+ .supported = 1
+};
diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h
index 4b2beda49498..506f4ba5b983 100644
--- a/fs/squashfs/squashfs_fs.h
+++ b/fs/squashfs/squashfs_fs.h
@@ -240,6 +240,7 @@ struct meta_index {
#define LZMA_COMPRESSION 2
#define LZO_COMPRESSION 3
#define XZ_COMPRESSION 4
+#define LZ4_COMPRESSION 5
struct squashfs_super_block {
__le32 s_magic;
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 0949f9c7e872..5ffe5115951f 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -547,6 +547,9 @@ void efi_native_runtime_setup(void);
#define SMBIOS_TABLE_GUID \
EFI_GUID( 0xeb9d2d31, 0x2d88, 0x11d3, 0x9a, 0x16, 0x0, 0x90, 0x27, 0x3f, 0xc1, 0x4d )
+#define SMBIOS3_TABLE_GUID \
+ EFI_GUID( 0xf2fd1544, 0x9794, 0x4a2c, 0x99, 0x2e, 0xe5, 0xbb, 0xcf, 0x20, 0xe3, 0x94 )
+
#define SAL_SYSTEM_TABLE_GUID \
EFI_GUID( 0xeb9d2d32, 0x2d88, 0x11d3, 0x9a, 0x16, 0x0, 0x90, 0x27, 0x3f, 0xc1, 0x4d )
@@ -810,7 +813,8 @@ extern struct efi {
unsigned long mps; /* MPS table */
unsigned long acpi; /* ACPI table (IA64 ext 0.71) */
unsigned long acpi20; /* ACPI table (ACPI 2.0) */
- unsigned long smbios; /* SM BIOS table */
+ unsigned long smbios; /* SMBIOS table (32 bit entry point) */
+ unsigned long smbios3; /* SMBIOS table (64 bit entry point) */
unsigned long sal_systab; /* SAL system table */
unsigned long boot_info; /* boot info table */
unsigned long hcdp; /* HCDP table */
@@ -871,6 +875,8 @@ static inline efi_status_t efi_query_variable_store(u32 attributes, unsigned lon
#endif
extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr);
extern int efi_config_init(efi_config_table_type_t *arch_tables);
+extern int efi_config_parse_tables(void *config_tables, int count, int sz,
+ efi_config_table_type_t *arch_tables);
extern u64 efi_get_iobase (void);
extern u32 efi_mem_type (unsigned long phys_addr);
extern u64 efi_mem_attributes (unsigned long phys_addr);
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index b8476f016354..bac42ed3a579 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -53,6 +53,7 @@ struct ipv6_devconf {
__s32 force_tllao;
__s32 ndisc_notify;
__s32 suppress_frag_ndisc;
+ __s32 use_oif_addrs_only;
void *sysctl;
};
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 9828539ae701..004cae9c395d 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -62,6 +62,9 @@ int addrconf_set_dstaddr(struct net *net, void __user *arg);
int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
const struct net_device *dev, int strict);
+int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
+ const struct net_device *dev, int strict,
+ u32 banned_flags);
#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr);
diff --git a/include/net/route.h b/include/net/route.h
index 41288e356890..af3d4ac09926 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -140,7 +140,8 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi
flowi4_init_output(fl4, oif, sk ? sk->sk_mark : 0, tos,
RT_SCOPE_UNIVERSE, proto,
sk ? inet_sk_flowi_flags(sk) : 0,
- daddr, saddr, dport, sport, sk ? sock_i_uid(sk) : GLOBAL_ROOT_UID);
+ daddr, saddr, dport, sport,
+ sk ? sock_i_uid(sk) : GLOBAL_ROOT_UID);
if (sk)
security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
return ip_route_output_flow(net, fl4, sk);
diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
index 187d9f6ccb0a..3388b91f2972 100644
--- a/include/uapi/linux/ipv6.h
+++ b/include/uapi/linux/ipv6.h
@@ -166,6 +166,7 @@ enum {
DEVCONF_SUPPRESS_FRAG_NDISC,
DEVCONF_ACCEPT_RA_FROM_LOCAL,
DEVCONF_USE_OPTIMISTIC,
+ DEVCONF_USE_OIF_ADDRS_ONLY,
DEVCONF_MAX
};
diff --git a/ipc/msg.c b/ipc/msg.c
index c5d8e3749985..cfc8b388332d 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -137,13 +137,6 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params)
return retval;
}
- /* ipc_addid() locks msq upon success. */
- id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni);
- if (id < 0) {
- ipc_rcu_putref(msq, msg_rcu_free);
- return id;
- }
-
msq->q_stime = msq->q_rtime = 0;
msq->q_ctime = get_seconds();
msq->q_cbytes = msq->q_qnum = 0;
@@ -153,6 +146,13 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params)
INIT_LIST_HEAD(&msq->q_receivers);
INIT_LIST_HEAD(&msq->q_senders);
+ /* ipc_addid() locks msq upon success. */
+ id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni);
+ if (id < 0) {
+ ipc_rcu_putref(msq, msg_rcu_free);
+ return id;
+ }
+
ipc_unlock_object(&msq->q_perm);
rcu_read_unlock();
diff --git a/ipc/shm.c b/ipc/shm.c
index 01454796ba3c..2511771a9a07 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -549,12 +549,6 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
if (IS_ERR(file))
goto no_file;
- id = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
- if (id < 0) {
- error = id;
- goto no_id;
- }
-
shp->shm_cprid = task_tgid_vnr(current);
shp->shm_lprid = 0;
shp->shm_atim = shp->shm_dtim = 0;
@@ -563,6 +557,13 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
shp->shm_nattch = 0;
shp->shm_file = file;
shp->shm_creator = current;
+
+ id = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
+ if (id < 0) {
+ error = id;
+ goto no_id;
+ }
+
list_add(&shp->shm_clist, &current->sysvshm.shm_clist);
/*
diff --git a/ipc/util.c b/ipc/util.c
index 88adc329888c..bc72cbf929da 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -277,6 +277,10 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int size)
rcu_read_lock();
spin_lock(&new->lock);
+ current_euid_egid(&euid, &egid);
+ new->cuid = new->uid = euid;
+ new->gid = new->cgid = egid;
+
id = idr_alloc(&ids->ipcs_idr, new,
(next_id < 0) ? 0 : ipcid_to_idx(next_id), 0,
GFP_NOWAIT);
@@ -289,10 +293,6 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int size)
ids->in_use++;
- current_euid_egid(&euid, &egid);
- new->cuid = new->uid = euid;
- new->gid = new->cgid = egid;
-
if (next_id < 0) {
new->seq = ids->seq++;
if (ids->seq > IPCID_SEQ_MAX)
diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c
index f0f5c5c3de12..6d940c72b5fc 100644
--- a/lib/lz4/lz4_decompress.c
+++ b/lib/lz4/lz4_decompress.c
@@ -47,6 +47,11 @@
#include "lz4defs.h"
+static const int dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
+#if LZ4_ARCH64
+static const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
+#endif
+
static int lz4_uncompress(const char *source, char *dest, int osize)
{
const BYTE *ip = (const BYTE *) source;
@@ -56,10 +61,6 @@ static int lz4_uncompress(const char *source, char *dest, int osize)
BYTE *cpy;
unsigned token;
size_t length;
- size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
-#if LZ4_ARCH64
- size_t dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
-#endif
while (1) {
@@ -116,7 +117,7 @@ static int lz4_uncompress(const char *source, char *dest, int osize)
/* copy repeated sequence */
if (unlikely((op - ref) < STEPSIZE)) {
#if LZ4_ARCH64
- size_t dec64 = dec64table[op - ref];
+ int dec64 = dec64table[op - ref];
#else
const int dec64 = 0;
#endif
@@ -139,8 +140,12 @@ static int lz4_uncompress(const char *source, char *dest, int osize)
/* Error: request to write beyond destination buffer */
if (cpy > oend)
goto _output_error;
+#if LZ4_ARCH64
+ if ((ref + COPYLENGTH) > oend)
+#else
if ((ref + COPYLENGTH) > oend ||
(op + COPYLENGTH) > oend)
+#endif
goto _output_error;
LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
while (op < cpy)
@@ -177,11 +182,6 @@ static int lz4_uncompress_unknownoutputsize(const char *source, char *dest,
BYTE * const oend = op + maxoutputsize;
BYTE *cpy;
- size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
-#if LZ4_ARCH64
- size_t dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
-#endif
-
/* Main Loop */
while (ip < iend) {
@@ -249,7 +249,7 @@ static int lz4_uncompress_unknownoutputsize(const char *source, char *dest,
/* copy repeated sequence */
if (unlikely((op - ref) < STEPSIZE)) {
#if LZ4_ARCH64
- size_t dec64 = dec64table[op - ref];
+ int dec64 = dec64table[op - ref];
#else
const int dec64 = 0;
#endif
@@ -270,7 +270,13 @@ static int lz4_uncompress_unknownoutputsize(const char *source, char *dest,
if (cpy > oend - COPYLENGTH) {
if (cpy > oend)
goto _output_error; /* write outside of buf */
-
+#if LZ4_ARCH64
+ if ((ref + COPYLENGTH) > oend)
+#else
+ if ((ref + COPYLENGTH) > oend ||
+ (op + COPYLENGTH) > oend)
+#endif
+ goto _output_error;
LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
while (op < cpy)
*op++ = *ref++;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 89a9a1e4bbf1..ad0225c44ffa 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -202,6 +202,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.disable_ipv6 = 0,
.accept_dad = 1,
.suppress_frag_ndisc = 1,
+ .use_oif_addrs_only = 0,
};
static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -240,6 +241,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.disable_ipv6 = 0,
.accept_dad = 1,
.suppress_frag_ndisc = 1,
+ .use_oif_addrs_only = 0,
};
/* Check if a valid qdisc is available */
@@ -1345,15 +1347,96 @@ out:
return ret;
}
+static int __ipv6_dev_get_saddr(struct net *net,
+ struct ipv6_saddr_dst *dst,
+ struct inet6_dev *idev,
+ struct ipv6_saddr_score *scores,
+ int hiscore_idx)
+{
+ struct ipv6_saddr_score *score = &scores[1 - hiscore_idx], *hiscore = &scores[hiscore_idx];
+
+ read_lock_bh(&idev->lock);
+ list_for_each_entry(score->ifa, &idev->addr_list, if_list) {
+ int i;
+
+ /*
+ * - Tentative Address (RFC2462 section 5.4)
+ * - A tentative address is not considered
+ * "assigned to an interface" in the traditional
+ * sense, unless it is also flagged as optimistic.
+ * - Candidate Source Address (section 4)
+ * - In any case, anycast addresses, multicast
+ * addresses, and the unspecified address MUST
+ * NOT be included in a candidate set.
+ */
+ if ((score->ifa->flags & IFA_F_TENTATIVE) &&
+ (!(score->ifa->flags & IFA_F_OPTIMISTIC)))
+ continue;
+
+ score->addr_type = __ipv6_addr_type(&score->ifa->addr);
+
+ if (unlikely(score->addr_type == IPV6_ADDR_ANY ||
+ score->addr_type & IPV6_ADDR_MULTICAST)) {
+ net_dbg_ratelimited("ADDRCONF: unspecified / multicast address assigned as unicast address on %s",
+ idev->dev->name);
+ continue;
+ }
+
+ score->rule = -1;
+ bitmap_zero(score->scorebits, IPV6_SADDR_RULE_MAX);
+
+ for (i = 0; i < IPV6_SADDR_RULE_MAX; i++) {
+ int minihiscore, miniscore;
+
+ minihiscore = ipv6_get_saddr_eval(net, hiscore, dst, i);
+ miniscore = ipv6_get_saddr_eval(net, score, dst, i);
+
+ if (minihiscore > miniscore) {
+ if (i == IPV6_SADDR_RULE_SCOPE &&
+ score->scopedist > 0) {
+ /*
+ * special case:
+ * each remaining entry
+ * has too small (not enough)
+ * scope, because ifa entries
+ * are sorted by their scope
+ * values.
+ */
+ goto out;
+ }
+ break;
+ } else if (minihiscore < miniscore) {
+ if (hiscore->ifa)
+ in6_ifa_put(hiscore->ifa);
+
+ in6_ifa_hold(score->ifa);
+
+ swap(hiscore, score);
+ hiscore_idx = 1 - hiscore_idx;
+
+ /* restore our iterator */
+ score->ifa = hiscore->ifa;
+
+ break;
+ }
+ }
+ }
+out:
+ read_unlock_bh(&idev->lock);
+ return hiscore_idx;
+}
+
int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
const struct in6_addr *daddr, unsigned int prefs,
struct in6_addr *saddr)
{
- struct ipv6_saddr_score scores[2],
- *score = &scores[0], *hiscore = &scores[1];
+ struct ipv6_saddr_score scores[2], *hiscore;
struct ipv6_saddr_dst dst;
+ struct inet6_dev *idev;
struct net_device *dev;
int dst_type;
+ bool use_oif_addr = false;
+ int hiscore_idx = 0;
dst_type = __ipv6_addr_type(daddr);
dst.addr = daddr;
@@ -1362,107 +1445,50 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
dst.label = ipv6_addr_label(net, daddr, dst_type, dst.ifindex);
dst.prefs = prefs;
- hiscore->rule = -1;
- hiscore->ifa = NULL;
+ scores[hiscore_idx].rule = -1;
+ scores[hiscore_idx].ifa = NULL;
rcu_read_lock();
- for_each_netdev_rcu(net, dev) {
- struct inet6_dev *idev;
-
- /* Candidate Source Address (section 4)
- * - multicast and link-local destination address,
- * the set of candidate source address MUST only
- * include addresses assigned to interfaces
- * belonging to the same link as the outgoing
- * interface.
- * (- For site-local destination addresses, the
- * set of candidate source addresses MUST only
- * include addresses assigned to interfaces
- * belonging to the same site as the outgoing
- * interface.)
- */
- if (((dst_type & IPV6_ADDR_MULTICAST) ||
- dst.scope <= IPV6_ADDR_SCOPE_LINKLOCAL) &&
- dst.ifindex && dev->ifindex != dst.ifindex)
- continue;
-
- idev = __in6_dev_get(dev);
- if (!idev)
- continue;
-
- read_lock_bh(&idev->lock);
- list_for_each_entry(score->ifa, &idev->addr_list, if_list) {
- int i;
-
- /*
- * - Tentative Address (RFC2462 section 5.4)
- * - A tentative address is not considered
- * "assigned to an interface" in the traditional
- * sense, unless it is also flagged as optimistic.
- * - Candidate Source Address (section 4)
- * - In any case, anycast addresses, multicast
- * addresses, and the unspecified address MUST
- * NOT be included in a candidate set.
- */
- if ((score->ifa->flags & IFA_F_TENTATIVE) &&
- (!(score->ifa->flags & IFA_F_OPTIMISTIC)))
- continue;
-
- score->addr_type = __ipv6_addr_type(&score->ifa->addr);
+ /* Candidate Source Address (section 4)
+ * - multicast and link-local destination address,
+ * the set of candidate source address MUST only
+ * include addresses assigned to interfaces
+ * belonging to the same link as the outgoing
+ * interface.
+ * (- For site-local destination addresses, the
+ * set of candidate source addresses MUST only
+ * include addresses assigned to interfaces
+ * belonging to the same site as the outgoing
+ * interface.)
+ * - "It is RECOMMENDED that the candidate source addresses
+ * be the set of unicast addresses assigned to the
+ * interface that will be used to send to the destination
+ * (the 'outgoing' interface)." (RFC 6724)
+ */
+ if (dst_dev) {
+ idev = __in6_dev_get(dst_dev);
+ if ((dst_type & IPV6_ADDR_MULTICAST) ||
+ dst.scope <= IPV6_ADDR_SCOPE_LINKLOCAL ||
+ (idev && idev->cnf.use_oif_addrs_only)) {
+ use_oif_addr = true;
+ }
+ }
- if (unlikely(score->addr_type == IPV6_ADDR_ANY ||
- score->addr_type & IPV6_ADDR_MULTICAST)) {
- LIMIT_NETDEBUG(KERN_DEBUG
- "ADDRCONF: unspecified / multicast address "
- "assigned as unicast address on %s",
- dev->name);
+ if (use_oif_addr) {
+ if (idev)
+ hiscore_idx = __ipv6_dev_get_saddr(net, &dst, idev, scores, hiscore_idx);
+ } else {
+ for_each_netdev_rcu(net, dev) {
+ idev = __in6_dev_get(dev);
+ if (!idev)
continue;
- }
-
- score->rule = -1;
- bitmap_zero(score->scorebits, IPV6_SADDR_RULE_MAX);
-
- for (i = 0; i < IPV6_SADDR_RULE_MAX; i++) {
- int minihiscore, miniscore;
-
- minihiscore = ipv6_get_saddr_eval(net, hiscore, &dst, i);
- miniscore = ipv6_get_saddr_eval(net, score, &dst, i);
-
- if (minihiscore > miniscore) {
- if (i == IPV6_SADDR_RULE_SCOPE &&
- score->scopedist > 0) {
- /*
- * special case:
- * each remaining entry
- * has too small (not enough)
- * scope, because ifa entries
- * are sorted by their scope
- * values.
- */
- goto try_nextdev;
- }
- break;
- } else if (minihiscore < miniscore) {
- if (hiscore->ifa)
- in6_ifa_put(hiscore->ifa);
-
- in6_ifa_hold(score->ifa);
-
- swap(hiscore, score);
-
- /* restore our iterator */
- score->ifa = hiscore->ifa;
-
- break;
- }
- }
+ hiscore_idx = __ipv6_dev_get_saddr(net, &dst, idev, scores, hiscore_idx);
}
-try_nextdev:
- read_unlock_bh(&idev->lock);
}
rcu_read_unlock();
+ hiscore = &scores[hiscore_idx];
if (!hiscore->ifa)
return -EADDRNOTAVAIL;
@@ -1523,17 +1549,30 @@ static int ipv6_count_addresses(struct inet6_dev *idev)
int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
const struct net_device *dev, int strict)
{
+ return ipv6_chk_addr_and_flags(net, addr, dev, strict, IFA_F_TENTATIVE);
+}
+EXPORT_SYMBOL(ipv6_chk_addr);
+
+int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
+ const struct net_device *dev, int strict,
+ u32 banned_flags)
+{
struct inet6_ifaddr *ifp;
unsigned int hash = inet6_addr_hash(addr);
+ u32 ifp_flags;
rcu_read_lock_bh();
hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
if (!net_eq(dev_net(ifp->idev->dev), net))
continue;
+ /* Decouple optimistic from tentative for evaluation here.
+ * Ban optimistic addresses explicitly, when required.
+ */
+ ifp_flags = (ifp->flags&IFA_F_OPTIMISTIC)
+ ? (ifp->flags&~IFA_F_TENTATIVE)
+ : ifp->flags;
if (ipv6_addr_equal(&ifp->addr, addr) &&
- (!(ifp->flags&IFA_F_TENTATIVE) ||
- (ipv6_use_optimistic_addr(ifp->idev) &&
- ifp->flags&IFA_F_OPTIMISTIC)) &&
+ !(ifp_flags&banned_flags) &&
(dev == NULL || ifp->idev->dev == dev ||
!(ifp->scope&(IFA_LINK|IFA_HOST) || strict))) {
rcu_read_unlock_bh();
@@ -1544,7 +1583,7 @@ int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
rcu_read_unlock_bh();
return 0;
}
-EXPORT_SYMBOL(ipv6_chk_addr);
+EXPORT_SYMBOL(ipv6_chk_addr_and_flags);
static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
struct net_device *dev)
@@ -4405,6 +4444,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_NDISC_NOTIFY] = cnf->ndisc_notify;
array[DEVCONF_SUPPRESS_FRAG_NDISC] = cnf->suppress_frag_ndisc;
array[DEVCONF_ACCEPT_RA_FROM_LOCAL] = cnf->accept_ra_from_local;
+ array[DEVCONF_USE_OIF_ADDRS_ONLY] = cnf->use_oif_addrs_only;
}
static inline size_t inet6_ifla6_size(void)
@@ -5305,6 +5345,14 @@ static struct addrconf_sysctl_table
.proc_handler = proc_dointvec,
},
{
+ .procname = "use_oif_addrs_only",
+ .data = &ipv6_devconf.use_oif_addrs_only,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+
+ },
+ {
/* sentinel */
}
},
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index a46c50423aec..1c34d758b0c2 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -654,7 +654,9 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
struct in6_addr *target = (struct in6_addr *)&neigh->primary_key;
int probes = atomic_read(&neigh->probes);
- if (skb && ipv6_chk_addr(dev_net(dev), &ipv6_hdr(skb)->saddr, dev, 1))
+ if (skb && ipv6_chk_addr_and_flags(dev_net(dev), &ipv6_hdr(skb)->saddr,
+ dev, 1,
+ IFA_F_TENTATIVE|IFA_F_OPTIMISTIC))
saddr = &ipv6_hdr(skb)->saddr;
if ((probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES)) < 0) {
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index cdce4947ebd5..bc76f992014d 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2947,7 +2947,8 @@ static int selinux_inode_setattr(struct dentry *dentry, struct iattr *iattr)
ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_TIMES_SET))
return dentry_has_perm(cred, dentry, FILE__SETATTR);
- if (selinux_policycap_openperm && (ia_valid & ATTR_SIZE))
+ if (selinux_policycap_openperm && (ia_valid & ATTR_SIZE)
+ && !(ia_valid & ATTR_FILE))
av |= FILE__OPEN;
return dentry_has_perm(cred, dentry, av);