aboutsummaryrefslogtreecommitdiff
path: root/arch/arm64
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64')
-rw-r--r--arch/arm64/Kconfig143
-rw-r--r--arch/arm64/Makefile1
-rw-r--r--arch/arm64/boot/dts/Makefile4
-rw-r--r--arch/arm64/boot/dts/apm-mustang.dts4
-rw-r--r--arch/arm64/boot/dts/apm-storm.dtsi88
-rw-r--r--arch/arm64/boot/dts/clcd-panels.dtsi52
-rw-r--r--arch/arm64/boot/dts/fvp-base-gicv2-psci.dts266
-rw-r--r--arch/arm64/boot/dts/juno.dts498
-rw-r--r--arch/arm64/boot/dts/rtsm_ve-aemv8a.dts2
-rw-r--r--arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi3
-rw-r--r--arch/arm64/crypto/Kconfig53
-rw-r--r--arch/arm64/crypto/Makefile38
-rw-r--r--arch/arm64/crypto/aes-ce-ccm-core.S222
-rw-r--r--arch/arm64/crypto/aes-ce-ccm-glue.c297
-rw-r--r--arch/arm64/crypto/aes-ce-cipher.c155
-rw-r--r--arch/arm64/crypto/aes-ce.S133
-rw-r--r--arch/arm64/crypto/aes-glue.c446
-rw-r--r--arch/arm64/crypto/aes-modes.S532
-rw-r--r--arch/arm64/crypto/aes-neon.S382
-rw-r--r--arch/arm64/crypto/ghash-ce-core.S79
-rw-r--r--arch/arm64/crypto/ghash-ce-glue.c156
-rw-r--r--arch/arm64/crypto/sha1-ce-core.S153
-rw-r--r--arch/arm64/crypto/sha1-ce-glue.c174
-rw-r--r--arch/arm64/crypto/sha2-ce-core.S156
-rw-r--r--arch/arm64/crypto/sha2-ce-glue.c255
-rw-r--r--arch/arm64/include/asm/Kbuild1
-rw-r--r--arch/arm64/include/asm/bL_switcher.h54
-rw-r--r--arch/arm64/include/asm/compat.h7
-rw-r--r--arch/arm64/include/asm/cpufeature.h29
-rw-r--r--arch/arm64/include/asm/debug-monitors.h64
-rw-r--r--arch/arm64/include/asm/dma-mapping.h2
-rw-r--r--arch/arm64/include/asm/ftrace.h59
-rw-r--r--arch/arm64/include/asm/hwcap.h9
-rw-r--r--arch/arm64/include/asm/insn.h5
-rw-r--r--arch/arm64/include/asm/irqflags.h23
-rw-r--r--arch/arm64/include/asm/kgdb.h84
-rw-r--r--arch/arm64/include/asm/memory.h2
-rw-r--r--arch/arm64/include/asm/ptrace.h10
-rw-r--r--arch/arm64/include/asm/syscall.h1
-rw-r--r--arch/arm64/include/asm/thread_info.h13
-rw-r--r--arch/arm64/include/asm/topology.h70
-rw-r--r--arch/arm64/include/asm/unistd.h2
-rw-r--r--arch/arm64/include/asm/unistd32.h2
-rw-r--r--arch/arm64/include/uapi/asm/Kbuild1
-rw-r--r--arch/arm64/include/uapi/asm/perf_regs.h40
-rw-r--r--arch/arm64/kernel/Makefile12
-rw-r--r--arch/arm64/kernel/arm64ksyms.c4
-rw-r--r--arch/arm64/kernel/debug-monitors.c4
-rw-r--r--arch/arm64/kernel/entry-ftrace.S218
-rw-r--r--arch/arm64/kernel/entry.S16
-rw-r--r--arch/arm64/kernel/ftrace.c177
-rw-r--r--arch/arm64/kernel/hw_breakpoint.c2
-rw-r--r--arch/arm64/kernel/irq.c10
-rw-r--r--arch/arm64/kernel/kgdb.c336
-rw-r--r--arch/arm64/kernel/perf_event.c79
-rw-r--r--arch/arm64/kernel/perf_regs.c46
-rw-r--r--arch/arm64/kernel/process.c1
-rw-r--r--arch/arm64/kernel/ptrace.c90
-rw-r--r--arch/arm64/kernel/return_address.c55
-rw-r--r--arch/arm64/kernel/setup.c33
-rw-r--r--arch/arm64/kernel/signal.c2
-rw-r--r--arch/arm64/kernel/smp.c11
-rw-r--r--arch/arm64/kernel/stacktrace.c2
-rw-r--r--arch/arm64/kernel/topology.c558
-rw-r--r--arch/arm64/mm/flush.c3
-rw-r--r--arch/arm64/mm/hugetlbpage.c9
66 files changed, 6344 insertions, 94 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 6dc93675e701..43564fce31a8 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -2,7 +2,9 @@ config ARM64
def_bool y
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
select ARCH_USE_CMPXCHG_LOCKREF
+ select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
+ select ARCH_HAS_OPP
select ARCH_WANT_OPTIONAL_GPIOLIB
select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
select ARCH_WANT_FRAME_POINTERS
@@ -17,6 +19,7 @@ config ARM64
select DCACHE_WORD_ACCESS
select GENERIC_CLOCKEVENTS
select GENERIC_CLOCKEVENTS_BROADCAST if SMP
+ select GENERIC_CPU_AUTOPROBE
select GENERIC_EARLY_IOREMAP
select GENERIC_IOMAP
select GENERIC_IRQ_PROBE
@@ -28,18 +31,27 @@ config ARM64
select GENERIC_TIME_VSYSCALL
select HARDIRQS_SW_RESEND
select HAVE_ARCH_JUMP_LABEL
+ select HAVE_ARCH_KGDB
select HAVE_ARCH_TRACEHOOK
+ select HAVE_C_RECORDMCOUNT
select HAVE_DEBUG_BUGVERBOSE
select HAVE_DEBUG_KMEMLEAK
select HAVE_DMA_API_DEBUG
select HAVE_DMA_ATTRS
select HAVE_DMA_CONTIGUOUS
select HAVE_EFFICIENT_UNALIGNED_ACCESS
+ select HAVE_DYNAMIC_FTRACE
+ select HAVE_FTRACE_MCOUNT_RECORD
+ select HAVE_FUNCTION_TRACER
+ select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_GENERIC_DMA_COHERENT
select HAVE_HW_BREAKPOINT if PERF_EVENTS
select HAVE_MEMBLOCK
select HAVE_PATA_PLATFORM
select HAVE_PERF_EVENTS
+ select HAVE_PERF_REGS
+ select HAVE_PERF_USER_STACK_DUMP
+ select HAVE_SYSCALL_TRACEPOINTS
select IRQ_DOMAIN
select MODULES_USE_ELF_RELA
select NO_BOOTMEM
@@ -170,6 +182,134 @@ config SMP
If you don't know what to do here, say N.
+config SCHED_MC
+ bool "Multi-core scheduler support"
+ depends on SMP
+ help
+ Multi-core scheduler support improves the CPU scheduler's decision
+ making when dealing with multi-core CPU chips at a cost of slightly
+ increased overhead in some places. If unsure say N here.
+
+config SCHED_SMT
+ bool "SMT scheduler support"
+ depends on SMP
+ help
+ Improves the CPU scheduler's decision making when dealing with
+ MultiThreading at a cost of slightly increased overhead in some
+ places. If unsure say N here.
+
+config SCHED_MC
+ bool "Multi-core scheduler support"
+ depends on ARM_CPU_TOPOLOGY
+ help
+ Multi-core scheduler support improves the CPU scheduler's decision
+ making when dealing with multi-core CPU chips at a cost of slightly
+ increased overhead in some places. If unsure say N here.
+
+config SCHED_SMT
+ bool "SMT scheduler support"
+ depends on ARM_CPU_TOPOLOGY
+ help
+ Improves the CPU scheduler's decision making when dealing with
+ MultiThreading at a cost of slightly increased overhead in some
+ places. If unsure say N here.
+
+config DISABLE_CPU_SCHED_DOMAIN_BALANCE
+ bool "(EXPERIMENTAL) Disable CPU level scheduler load-balancing"
+ help
+ Disables scheduler load-balancing at CPU sched domain level.
+
+config SCHED_HMP
+ bool "(EXPERIMENTAL) Heterogenous multiprocessor scheduling"
+ depends on DISABLE_CPU_SCHED_DOMAIN_BALANCE && SCHED_MC && FAIR_GROUP_SCHED && !SCHED_AUTOGROUP
+ help
+ Experimental scheduler optimizations for heterogeneous platforms.
+ Attempts to introspectively select task affinity to optimize power
+ and performance. Basic support for multiple (>2) cpu types is in place,
+ but it has only been tested with two types of cpus.
+ There is currently no support for migration of task groups, hence
+ !SCHED_AUTOGROUP. Furthermore, normal load-balancing must be disabled
+ between cpus of different type (DISABLE_CPU_SCHED_DOMAIN_BALANCE).
+
+config SCHED_HMP_PRIO_FILTER
+ bool "(EXPERIMENTAL) Filter HMP migrations by task priority"
+ depends on SCHED_HMP
+ help
+ Enables task priority based HMP migration filter. Any task with
+ a NICE value above the threshold will always be on low-power cpus
+ with less compute capacity.
+
+config SCHED_HMP_PRIO_FILTER_VAL
+ int "NICE priority threshold"
+ default 5
+ depends on SCHED_HMP_PRIO_FILTER
+
+config HMP_FAST_CPU_MASK
+ string "HMP scheduler fast CPU mask"
+ depends on SCHED_HMP
+ help
+ Leave empty to use device tree information.
+ Specify the cpuids of the fast CPUs in the system as a list string,
+ e.g. cpuid 0+1 should be specified as 0-1.
+
+config HMP_SLOW_CPU_MASK
+ string "HMP scheduler slow CPU mask"
+ depends on SCHED_HMP
+ help
+ Leave empty to use device tree information.
+ Specify the cpuids of the slow CPUs in the system as a list string,
+ e.g. cpuid 0+1 should be specified as 0-1.
+
+config HMP_VARIABLE_SCALE
+ bool "Allows changing the load tracking scale through sysfs"
+ depends on SCHED_HMP
+ help
+ When turned on, this option exports the thresholds and load average
+ period value for the load tracking patches through sysfs.
+ The values can be modified to change the rate of load accumulation
+ and the thresholds used for HMP migration.
+ The load_avg_period_ms is the time in ms to reach a load average of
+ 0.5 for an idle task of 0 load average ratio that start a busy loop.
+ The up_threshold and down_threshold is the value to go to a faster
+ CPU or to go back to a slower cpu.
+ The {up,down}_threshold are devided by 1024 before being compared
+ to the load average.
+ For examples, with load_avg_period_ms = 128 and up_threshold = 512,
+ a running task with a load of 0 will be migrated to a bigger CPU after
+ 128ms, because after 128ms its load_avg_ratio is 0.5 and the real
+ up_threshold is 0.5.
+ This patch has the same behavior as changing the Y of the load
+ average computation to
+ (1002/1024)^(LOAD_AVG_PERIOD/load_avg_period_ms)
+ but it remove intermadiate overflows in computation.
+
+config HMP_FREQUENCY_INVARIANT_SCALE
+ bool "(EXPERIMENTAL) Frequency-Invariant Tracked Load for HMP"
+ depends on HMP_VARIABLE_SCALE && CPU_FREQ
+ help
+ Scales the current load contribution in line with the frequency
+ of the CPU that the task was executed on.
+ In this version, we use a simple linear scale derived from the
+ maximum frequency reported by CPUFreq.
+ Restricting tracked load to be scaled by the CPU's frequency
+ represents the consumption of possible compute capacity
+ (rather than consumption of actual instantaneous capacity as
+ normal) and allows the HMP migration's simple threshold
+ migration strategy to interact more predictably with CPUFreq's
+ asynchronous compute capacity changes.
+
+config SCHED_HMP_LITTLE_PACKING
+ bool "Small task packing for HMP"
+ depends on SCHED_HMP
+ default n
+ help
+ Allows the HMP Scheduler to pack small tasks into CPUs in the
+ smallest HMP domain.
+ Controlled by two sysfs files in sys/kernel/hmp.
+ packing_enable: 1 to enable, 0 to disable packing. Default 1.
+ packing_limit: runqueue load ratio where a RQ is considered
+ to be full. Default is NICE_0_LOAD * 9/8.
+
config NR_CPUS
int "Maximum number of CPUs (2-32)"
range 2 32
@@ -322,5 +462,8 @@ source "arch/arm64/Kconfig.debug"
source "security/Kconfig"
source "crypto/Kconfig"
+if CRYPTO
+source "arch/arm64/crypto/Kconfig"
+endif
source "lib/Kconfig"
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 2fceb71ac3b7..8185a913c5ed 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -45,6 +45,7 @@ export TEXT_OFFSET GZFLAGS
core-y += arch/arm64/kernel/ arch/arm64/mm/
core-$(CONFIG_KVM) += arch/arm64/kvm/
core-$(CONFIG_XEN) += arch/arm64/xen/
+core-$(CONFIG_CRYPTO) += arch/arm64/crypto/
libs-y := arch/arm64/lib/ $(libs-y)
libs-y += $(LIBGCC)
diff --git a/arch/arm64/boot/dts/Makefile b/arch/arm64/boot/dts/Makefile
index c52bdb051f66..3391691a85f1 100644
--- a/arch/arm64/boot/dts/Makefile
+++ b/arch/arm64/boot/dts/Makefile
@@ -1,5 +1,7 @@
-dtb-$(CONFIG_ARCH_VEXPRESS) += rtsm_ve-aemv8a.dtb foundation-v8.dtb
+dtb-$(CONFIG_ARCH_VEXPRESS) += rtsm_ve-aemv8a.dtb foundation-v8.dtb \
+ fvp-base-gicv2-psci.dtb
dtb-$(CONFIG_ARCH_XGENE) += apm-mustang.dtb
+dtb-$(CONFIG_ARCH_VEXPRESS) += juno.dtb
targets += dtbs
targets += $(dtb-y)
diff --git a/arch/arm64/boot/dts/apm-mustang.dts b/arch/arm64/boot/dts/apm-mustang.dts
index 1247ca1200b1..6541962f5d70 100644
--- a/arch/arm64/boot/dts/apm-mustang.dts
+++ b/arch/arm64/boot/dts/apm-mustang.dts
@@ -24,3 +24,7 @@
reg = < 0x1 0x00000000 0x0 0x80000000 >; /* Updated by bootloader */
};
};
+
+&serial0 {
+ status = "ok";
+};
diff --git a/arch/arm64/boot/dts/apm-storm.dtsi b/arch/arm64/boot/dts/apm-storm.dtsi
index 6d4f493aac9a..03431d25123b 100644
--- a/arch/arm64/boot/dts/apm-storm.dtsi
+++ b/arch/arm64/boot/dts/apm-storm.dtsi
@@ -218,11 +218,64 @@
enable-offset = <0x0>;
enable-mask = <0x06>;
};
+
+ sata01clk: sata01clk@1f21c000 {
+ compatible = "apm,xgene-device-clock";
+ #clock-cells = <1>;
+ clocks = <&socplldiv2 0>;
+ reg = <0x0 0x1f21c000 0x0 0x1000>;
+ reg-names = "csr-reg";
+ clock-output-names = "sata01clk";
+ csr-offset = <0x4>;
+ csr-mask = <0x05>;
+ enable-offset = <0x0>;
+ enable-mask = <0x39>;
+ };
+
+ sata23clk: sata23clk@1f22c000 {
+ compatible = "apm,xgene-device-clock";
+ #clock-cells = <1>;
+ clocks = <&socplldiv2 0>;
+ reg = <0x0 0x1f22c000 0x0 0x1000>;
+ reg-names = "csr-reg";
+ clock-output-names = "sata23clk";
+ csr-offset = <0x4>;
+ csr-mask = <0x05>;
+ enable-offset = <0x0>;
+ enable-mask = <0x39>;
+ };
+
+ sata45clk: sata45clk@1f23c000 {
+ compatible = "apm,xgene-device-clock";
+ #clock-cells = <1>;
+ clocks = <&socplldiv2 0>;
+ reg = <0x0 0x1f23c000 0x0 0x1000>;
+ reg-names = "csr-reg";
+ clock-output-names = "sata45clk";
+ csr-offset = <0x4>;
+ csr-mask = <0x05>;
+ enable-offset = <0x0>;
+ enable-mask = <0x39>;
+ };
+
+ rtcclk: rtcclk@17000000 {
+ compatible = "apm,xgene-device-clock";
+ #clock-cells = <1>;
+ clocks = <&socplldiv2 0>;
+ reg = <0x0 0x17000000 0x0 0x2000>;
+ reg-names = "csr-reg";
+ csr-offset = <0xc>;
+ csr-mask = <0x2>;
+ enable-offset = <0x10>;
+ enable-mask = <0x2>;
+ clock-output-names = "rtcclk";
+ };
};
serial0: serial@1c020000 {
+ status = "disabled";
device_type = "serial";
- compatible = "ns16550";
+ compatible = "ns16550a";
reg = <0 0x1c020000 0x0 0x1000>;
reg-shift = <2>;
clock-frequency = <10000000>; /* Updated by bootloader */
@@ -230,6 +283,39 @@
interrupts = <0x0 0x4c 0x4>;
};
+ serial1: serial@1c021000 {
+ status = "disabled";
+ device_type = "serial";
+ compatible = "ns16550a";
+ reg = <0 0x1c021000 0x0 0x1000>;
+ reg-shift = <2>;
+ clock-frequency = <10000000>; /* Updated by bootloader */
+ interrupt-parent = <&gic>;
+ interrupts = <0x0 0x4d 0x4>;
+ };
+
+ serial2: serial@1c022000 {
+ status = "disabled";
+ device_type = "serial";
+ compatible = "ns16550a";
+ reg = <0 0x1c022000 0x0 0x1000>;
+ reg-shift = <2>;
+ clock-frequency = <10000000>; /* Updated by bootloader */
+ interrupt-parent = <&gic>;
+ interrupts = <0x0 0x4e 0x4>;
+ };
+
+ serial3: serial@1c023000 {
+ status = "disabled";
+ device_type = "serial";
+ compatible = "ns16550a";
+ reg = <0 0x1c023000 0x0 0x1000>;
+ reg-shift = <2>;
+ clock-frequency = <10000000>; /* Updated by bootloader */
+ interrupt-parent = <&gic>;
+ interrupts = <0x0 0x4f 0x4>;
+ };
+
phy1: phy@1f21a000 {
compatible = "apm,xgene-phy";
reg = <0x0 0x1f21a000 0x0 0x100>;
diff --git a/arch/arm64/boot/dts/clcd-panels.dtsi b/arch/arm64/boot/dts/clcd-panels.dtsi
new file mode 100644
index 000000000000..0b0ff6ead4b2
--- /dev/null
+++ b/arch/arm64/boot/dts/clcd-panels.dtsi
@@ -0,0 +1,52 @@
+/*
+ * ARM Ltd. Versatile Express
+ *
+ */
+
+/ {
+ panels {
+ panel@0 {
+ compatible = "panel";
+ mode = "VGA";
+ refresh = <60>;
+ xres = <640>;
+ yres = <480>;
+ pixclock = <39721>;
+ left_margin = <40>;
+ right_margin = <24>;
+ upper_margin = <32>;
+ lower_margin = <11>;
+ hsync_len = <96>;
+ vsync_len = <2>;
+ sync = <0>;
+ vmode = "FB_VMODE_NONINTERLACED";
+
+ tim2 = "TIM2_BCD", "TIM2_IPC";
+ cntl = "CNTL_LCDTFT", "CNTL_BGR", "CNTL_LCDVCOMP(1)";
+ caps = "CLCD_CAP_5551", "CLCD_CAP_565", "CLCD_CAP_888";
+ bpp = <16>;
+ };
+
+ panel@1 {
+ compatible = "panel";
+ mode = "XVGA";
+ refresh = <60>;
+ xres = <1024>;
+ yres = <768>;
+ pixclock = <15748>;
+ left_margin = <152>;
+ right_margin = <48>;
+ upper_margin = <23>;
+ lower_margin = <3>;
+ hsync_len = <104>;
+ vsync_len = <4>;
+ sync = <0>;
+ vmode = "FB_VMODE_NONINTERLACED";
+
+ tim2 = "TIM2_BCD", "TIM2_IPC";
+ cntl = "CNTL_LCDTFT", "CNTL_BGR", "CNTL_LCDVCOMP(1)";
+ caps = "CLCD_CAP_5551", "CLCD_CAP_565", "CLCD_CAP_888";
+ bpp = <16>;
+ };
+ };
+};
diff --git a/arch/arm64/boot/dts/fvp-base-gicv2-psci.dts b/arch/arm64/boot/dts/fvp-base-gicv2-psci.dts
new file mode 100644
index 000000000000..a46be6148b3a
--- /dev/null
+++ b/arch/arm64/boot/dts/fvp-base-gicv2-psci.dts
@@ -0,0 +1,266 @@
+/*
+ * Copyright (c) 2013, ARM Limited. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of ARM nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/dts-v1/;
+
+/memreserve/ 0x80000000 0x00010000;
+
+/ {
+};
+
+/ {
+ model = "FVP Base";
+ compatible = "arm,vfp-base", "arm,vexpress";
+ interrupt-parent = <&gic>;
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ chosen { };
+
+ aliases {
+ serial0 = &v2m_serial0;
+ serial1 = &v2m_serial1;
+ serial2 = &v2m_serial2;
+ serial3 = &v2m_serial3;
+ };
+
+ psci {
+ compatible = "arm,psci";
+ method = "smc";
+ cpu_suspend = <0xc4000001>;
+ cpu_off = <0x84000002>;
+ cpu_on = <0xc4000003>;
+ };
+
+ cpus {
+ #address-cells = <2>;
+ #size-cells = <0>;
+
+ big0: cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57", "arm,armv8";
+ reg = <0x0 0x0>;
+ enable-method = "psci";
+ clock-frequency = <1000000>;
+ };
+ big1: cpu@1 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57", "arm,armv8";
+ reg = <0x0 0x1>;
+ enable-method = "psci";
+ clock-frequency = <1000000>;
+ };
+ big2: cpu@2 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57", "arm,armv8";
+ reg = <0x0 0x2>;
+ enable-method = "psci";
+ clock-frequency = <1000000>;
+ };
+ big3: cpu@3 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57", "arm,armv8";
+ reg = <0x0 0x3>;
+ enable-method = "psci";
+ clock-frequency = <1000000>;
+ };
+ little0: cpu@100 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53", "arm,armv8";
+ reg = <0x0 0x100>;
+ enable-method = "psci";
+ clock-frequency = <1000000>;
+ };
+ little1: cpu@101 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53", "arm,armv8";
+ reg = <0x0 0x101>;
+ enable-method = "psci";
+ clock-frequency = <1000000>;
+ };
+ little2: cpu@102 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53", "arm,armv8";
+ reg = <0x0 0x102>;
+ enable-method = "psci";
+ clock-frequency = <1000000>;
+ };
+ little3: cpu@103 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53", "arm,armv8";
+ reg = <0x0 0x103>;
+ enable-method = "psci";
+ clock-frequency = <1000000>;
+ };
+
+ cpu-map {
+ cluster0 {
+ core0 {
+ cpu = <&big0>;
+ };
+ core1 {
+ cpu = <&big1>;
+ };
+ core2 {
+ cpu = <&big2>;
+ };
+ core3 {
+ cpu = <&big3>;
+ };
+ };
+ cluster1 {
+ core0 {
+ cpu = <&little0>;
+ };
+ core1 {
+ cpu = <&little1>;
+ };
+ core2 {
+ cpu = <&little2>;
+ };
+ core3 {
+ cpu = <&little3>;
+ };
+ };
+ };
+ };
+
+ memory@80000000 {
+ device_type = "memory";
+ reg = <0x00000000 0x80000000 0 0x80000000>,
+ <0x00000008 0x80000000 0 0x80000000>;
+ };
+
+ gic: interrupt-controller@2f000000 {
+ compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic";
+ #interrupt-cells = <3>;
+ #address-cells = <0>;
+ interrupt-controller;
+ reg = <0x0 0x2f000000 0 0x10000>,
+ <0x0 0x2c000000 0 0x2000>,
+ <0x0 0x2c010000 0 0x2000>,
+ <0x0 0x2c02F000 0 0x2000>;
+ interrupts = <1 9 0xf04>;
+ };
+
+ timer {
+ compatible = "arm,armv8-timer";
+ interrupts = <1 13 0xff01>,
+ <1 14 0xff01>,
+ <1 11 0xff01>,
+ <1 10 0xff01>;
+ clock-frequency = <100000000>;
+ };
+
+ timer@2a810000 {
+ compatible = "arm,armv7-timer-mem";
+ reg = <0x0 0x2a810000 0x0 0x10000>;
+ clock-frequency = <100000000>;
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+ frame@2a820000 {
+ frame-number = <0>;
+ interrupts = <0 25 4>;
+ reg = <0x0 0x2a820000 0x0 0x10000>;
+ };
+ };
+
+ pmu {
+ compatible = "arm,armv8-pmuv3";
+ interrupts = <0 60 4>,
+ <0 61 4>,
+ <0 62 4>,
+ <0 63 4>;
+ };
+
+ smb {
+ compatible = "simple-bus";
+
+ #address-cells = <2>;
+ #size-cells = <1>;
+ ranges = <0 0 0 0x08000000 0x04000000>,
+ <1 0 0 0x14000000 0x04000000>,
+ <2 0 0 0x18000000 0x04000000>,
+ <3 0 0 0x1c000000 0x04000000>,
+ <4 0 0 0x0c000000 0x04000000>,
+ <5 0 0 0x10000000 0x04000000>;
+
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0 0 63>;
+ interrupt-map = <0 0 0 &gic 0 0 4>,
+ <0 0 1 &gic 0 1 4>,
+ <0 0 2 &gic 0 2 4>,
+ <0 0 3 &gic 0 3 4>,
+ <0 0 4 &gic 0 4 4>,
+ <0 0 5 &gic 0 5 4>,
+ <0 0 6 &gic 0 6 4>,
+ <0 0 7 &gic 0 7 4>,
+ <0 0 8 &gic 0 8 4>,
+ <0 0 9 &gic 0 9 4>,
+ <0 0 10 &gic 0 10 4>,
+ <0 0 11 &gic 0 11 4>,
+ <0 0 12 &gic 0 12 4>,
+ <0 0 13 &gic 0 13 4>,
+ <0 0 14 &gic 0 14 4>,
+ <0 0 15 &gic 0 15 4>,
+ <0 0 16 &gic 0 16 4>,
+ <0 0 17 &gic 0 17 4>,
+ <0 0 18 &gic 0 18 4>,
+ <0 0 19 &gic 0 19 4>,
+ <0 0 20 &gic 0 20 4>,
+ <0 0 21 &gic 0 21 4>,
+ <0 0 22 &gic 0 22 4>,
+ <0 0 23 &gic 0 23 4>,
+ <0 0 24 &gic 0 24 4>,
+ <0 0 25 &gic 0 25 4>,
+ <0 0 26 &gic 0 26 4>,
+ <0 0 27 &gic 0 27 4>,
+ <0 0 28 &gic 0 28 4>,
+ <0 0 29 &gic 0 29 4>,
+ <0 0 30 &gic 0 30 4>,
+ <0 0 31 &gic 0 31 4>,
+ <0 0 32 &gic 0 32 4>,
+ <0 0 33 &gic 0 33 4>,
+ <0 0 34 &gic 0 34 4>,
+ <0 0 35 &gic 0 35 4>,
+ <0 0 36 &gic 0 36 4>,
+ <0 0 37 &gic 0 37 4>,
+ <0 0 38 &gic 0 38 4>,
+ <0 0 39 &gic 0 39 4>,
+ <0 0 40 &gic 0 40 4>,
+ <0 0 41 &gic 0 41 4>,
+ <0 0 42 &gic 0 42 4>;
+
+ /include/ "rtsm_ve-motherboard.dtsi"
+ };
+};
+
+/include/ "clcd-panels.dtsi"
diff --git a/arch/arm64/boot/dts/juno.dts b/arch/arm64/boot/dts/juno.dts
new file mode 100644
index 000000000000..9785a14ca604
--- /dev/null
+++ b/arch/arm64/boot/dts/juno.dts
@@ -0,0 +1,498 @@
+/*
+ * ARM Ltd. Juno Plaform
+ *
+ * Fast Models FVP v2 support
+ */
+
+/dts-v1/;
+
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+
+/ {
+ model = "Juno";
+ compatible = "arm,juno", "arm,vexpress";
+ interrupt-parent = <&gic>;
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ aliases {
+ serial0 = &soc_uart0;
+ };
+
+ cpus {
+ #address-cells = <2>;
+ #size-cells = <0>;
+
+ cpu@100 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53","arm,armv8";
+ reg = <0x0 0x100>;
+ enable-method = "psci";
+ };
+
+ cpu@101 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53","arm,armv8";
+ reg = <0x0 0x101>;
+ enable-method = "psci";
+ };
+
+ cpu@102 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53","arm,armv8";
+ reg = <0x0 0x102>;
+ enable-method = "psci";
+ };
+
+ cpu@103 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53","arm,armv8";
+ reg = <0x0 0x103>;
+ enable-method = "psci";
+ };
+
+ cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57","arm,armv8";
+ reg = <0x0 0x0>;
+ enable-method = "psci";
+ };
+
+ cpu@1 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57","arm,armv8";
+ reg = <0x0 0x1>;
+ enable-method = "psci";
+ };
+ };
+
+ memory@80000000 {
+ device_type = "memory";
+ reg = <0x00000000 0x80000000 0x0 0x80000000>,
+ <0x00000008 0x80000000 0x1 0x80000000>;
+ };
+
+ /* memory@14000000 {
+ device_type = "memory";
+ reg = <0x00000000 0x14000000 0x0 0x02000000>;
+ }; */
+
+ gic: interrupt-controller@2c001000 {
+ compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic";
+ #interrupt-cells = <3>;
+ #address-cells = <0>;
+ interrupt-controller;
+ reg = <0x0 0x2c010000 0 0x1000>,
+ <0x0 0x2c02f000 0 0x1000>,
+ <0x0 0x2c04f000 0 0x2000>,
+ <0x0 0x2c06f000 0 0x2000>;
+ interrupts = <GIC_PPI 9 0xf04>;
+ };
+
+ msi0: msi@2c1c0000 {
+ compatible = "arm,gic-msi";
+ reg = <0x0 0x2c1c0000 0 0x10000
+ 0x0 0x2c1d0000 0 0x10000
+ 0x0 0x2c1e0000 0 0x10000
+ 0x0 0x2c1f0000 0 0x10000>;
+ };
+
+ timer {
+ compatible = "arm,armv8-timer";
+ interrupts = <GIC_PPI 13 0xff01>,
+ <GIC_PPI 14 0xff01>,
+ <GIC_PPI 11 0xff01>,
+ <GIC_PPI 10 0xff01>;
+ };
+
+ pmu {
+ compatible = "arm,armv8-pmuv3";
+ interrupts = <GIC_SPI 60 4>,
+ <GIC_SPI 61 4>,
+ <GIC_SPI 62 4>,
+ <GIC_SPI 63 4>;
+ };
+
+ psci {
+ compatible = "arm,psci";
+ method = "smc";
+ cpu_suspend = <0xC4000001>;
+ cpu_off = <0x84000002>;
+ cpu_on = <0xC4000003>;
+ migrate = <0xC4000005>;
+ };
+
+ pci0: pci@30000000 {
+ compatible = "arm,pcie-xr3";
+ device_type = "pci";
+ reg = <0 0x7ff30000 0 0x1000
+ 0 0x7ff20000 0 0x10000
+ 0 0x40000000 0 0x10000000>;
+ bus-range = <0 255>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+ ranges = <0x01000000 0x0 0x00000000 0x00 0x5ff00000 0x0 0x00100000
+ 0x02000000 0x0 0x00000000 0x40 0x00000000 0x0 0x80000000
+ 0x42000000 0x0 0x80000000 0x40 0x80000000 0x0 0x80000000>;
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0 0 0 7>;
+ interrupt-map = <0 0 0 1 &gic 0 136 4
+ 0 0 0 2 &gic 0 137 4
+ 0 0 0 3 &gic 0 138 4
+ 0 0 0 4 &gic 0 139 4>;
+ };
+
+ scpi: scpi@2b1f0000 {
+ compatible = "arm,scpi-mhu";
+ reg = <0x0 0x2b1f0000 0x0 0x10000>, /* MHU registers */
+ <0x0 0x2e000000 0x0 0x10000>; /* Payload area */
+ interrupts = <0 36 4>, /* low priority interrupt */
+ <0 35 4>, /* high priority interrupt */
+ <0 37 4>; /* secure channel interrupt */
+ #clock-cells = <1>;
+ clock-output-names = "a57", "a53", "gpu", "hdlcd0", "hdlcd1";
+ };
+
+ hdlcd0_osc: scpi_osc@3 {
+ compatible = "arm,scpi-osc";
+ #clock-cells = <0>;
+ clocks = <&scpi 3>;
+ frequency-range = <23000000 210000000>;
+ clock-output-names = "pxlclk0";
+ };
+
+ hdlcd1_osc: scpi_osc@4 {
+ compatible = "arm,scpi-osc";
+ #clock-cells = <0>;
+ clocks = <&scpi 4>;
+ frequency-range = <23000000 210000000>;
+ clock-output-names = "pxlclk1";
+ };
+
+ soc_uartclk: refclk72738khz {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <7273800>;
+ clock-output-names = "juno:uartclk";
+ };
+
+ soc_refclk24mhz: clk24mhz {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <24000000>;
+ clock-output-names = "juno:clk24mhz";
+ };
+
+ mb_eth25mhz: clk25mhz {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <25000000>;
+ clock-output-names = "ethclk25mhz";
+ };
+
+ soc_usb48mhz: clk48mhz {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <48000000>;
+ clock-output-names = "clk48mhz";
+ };
+
+ soc_smc50mhz: clk50mhz {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <50000000>;
+ clock-output-names = "smc_clk";
+ };
+
+ soc_refclk100mhz: refclk100mhz {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <100000000>;
+ clock-output-names = "apb_pclk";
+ };
+
+ soc_faxiclk: refclk533mhz {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <533000000>;
+ clock-output-names = "faxi_clk";
+ };
+
+ soc_fixed_3v3: fixedregulator@0 {
+ compatible = "regulator-fixed";
+ regulator-name = "3V3";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-always-on;
+ };
+
+ memory-controller@7ffd0000 {
+ compatible = "arm,pl354", "arm,primecell";
+ reg = <0 0x7ffd0000 0 0x1000>;
+ interrupts = <0 86 4>,
+ <0 87 4>;
+ clocks = <&soc_smc50mhz>;
+ clock-names = "apb_pclk";
+ chip5-memwidth = <16>;
+ };
+
+ dma0: dma@0x7ff00000 {
+ compatible = "arm,pl330", "arm,primecell";
+ reg = <0x0 0x7ff00000 0 0x1000>;
+ interrupts = <0 95 4>,
+ <0 88 4>,
+ <0 89 4>,
+ <0 90 4>,
+ <0 91 4>,
+ <0 108 4>,
+ <0 109 4>,
+ <0 110 4>,
+ <0 111 4>;
+ #dma-cells = <1>;
+ #dma-channels = <8>;
+ #dma-requests = <32>;
+ clocks = <&soc_faxiclk>;
+ clock-names = "apb_pclk";
+ };
+
+ soc_uart0: uart@7ff80000 {
+ compatible = "arm,pl011", "arm,primecell";
+ reg = <0x0 0x7ff80000 0x0 0x1000>;
+ interrupts = <0 83 4>;
+ clocks = <&soc_uartclk>, <&soc_refclk100mhz>;
+ clock-names = "uartclk", "apb_pclk";
+ dmas = <&dma0 1
+ &dma0 2>;
+ dma-names = "rx", "tx";
+ };
+
+ /* this UART is reserved for secure software.
+ soc_uart1: uart@7ff70000 {
+ compatible = "arm,pl011", "arm,primecell";
+ reg = <0x0 0x7ff70000 0x0 0x1000>;
+ interrupts = <0 84 4>;
+ clocks = <&soc_uartclk>, <&soc_refclk100mhz>;
+ clock-names = "uartclk", "apb_pclk";
+ }; */
+
+ ulpi_phy: phy@0 {
+ compatible = "phy-ulpi-generic";
+ reg = <0x0 0x94 0x0 0x4>;
+ phy-id = <0>;
+ };
+
+ ehci@7ffc0000 {
+ compatible = "snps,ehci-h20ahb";
+ /* compatible = "arm,h20ahb-ehci"; */
+ reg = <0x0 0x7ffc0000 0x0 0x10000>;
+ interrupts = <0 117 4>;
+ clocks = <&soc_usb48mhz>;
+ clock-names = "otg";
+ phys = <&ulpi_phy>;
+ };
+
+ ohci@0x7ffb0000 {
+ compatible = "generic-ohci";
+ reg = <0x0 0x7ffb0000 0x0 0x10000>;
+ interrupts = <0 116 4>;
+ clocks = <&soc_usb48mhz>;
+ clock-names = "otg";
+ };
+
+ i2c@0x7ffa0000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "snps,designware-i2c";
+ reg = <0x0 0x7ffa0000 0x0 0x1000>;
+ interrupts = <0 104 4>;
+ clock-frequency = <400000>;
+ i2c-sda-hold-time-ns = <500>;
+ clocks = <&soc_smc50mhz>;
+
+ dvi0: dvi-transmitter@70 {
+ compatible = "nxp,tda998x";
+ reg = <0x70>;
+ };
+
+ dvi1: dvi-transmitter@71 {
+ compatible = "nxp,tda998x";
+ reg = <0x71>;
+ };
+ };
+
+ /* mmci@1c050000 {
+ compatible = "arm,pl180", "arm,primecell";
+ reg = <0x0 0x1c050000 0x0 0x1000>;
+ interrupts = <0 73 4>,
+ <0 74 4>;
+ max-frequency = <12000000>;
+ vmmc-supply = <&soc_fixed_3v3>;
+ clocks = <&soc_refclk24mhz>, <&soc_refclk100mhz>;
+ clock-names = "mclk", "apb_pclk";
+ }; */
+
+ hdlcd@7ff60000 {
+ compatible = "arm,hdlcd";
+ reg = <0 0x7ff60000 0 0x1000>;
+ interrupts = <0 85 4>;
+ clocks = <&hdlcd0_osc>;
+ clock-names = "pxlclk";
+ i2c-slave = <&dvi0>;
+
+ /* display-timings {
+ native-mode = <&timing0>;
+ timing0: timing@0 {
+ /* 1024 x 768 framebufer, standard VGA timings * /
+ clock-frequency = <65000>;
+ hactive = <1024>;
+ vactive = <768>;
+ hfront-porch = <24>;
+ hback-porch = <160>;
+ hsync-len = <136>;
+ vfront-porch = <3>;
+ vback-porch = <29>;
+ vsync-len = <6>;
+ };
+ }; */
+ };
+
+ hdlcd@7ff50000 {
+ compatible = "arm,hdlcd";
+ reg = <0 0x7ff50000 0 0x1000>;
+ interrupts = <0 93 4>;
+ clocks = <&hdlcd1_osc>;
+ clock-names = "pxlclk";
+ i2c-slave = <&dvi1>;
+
+ display-timings {
+ native-mode = <&timing1>;
+ timing1: timing@1 {
+ /* 1024 x 768 framebufer, standard VGA timings */
+ clock-frequency = <65000>;
+ hactive = <1024>;
+ vactive = <768>;
+ hfront-porch = <24>;
+ hback-porch = <160>;
+ hsync-len = <136>;
+ vfront-porch = <3>;
+ vback-porch = <29>;
+ vsync-len = <6>;
+ };
+ };
+ };
+
+ smb {
+ compatible = "simple-bus";
+ #address-cells = <2>;
+ #size-cells = <1>;
+ ranges = <0 0 0 0x08000000 0x04000000>,
+ <1 0 0 0x14000000 0x04000000>,
+ <2 0 0 0x18000000 0x04000000>,
+ <3 0 0 0x1c000000 0x04000000>,
+ <4 0 0 0x0c000000 0x04000000>,
+ <5 0 0 0x10000000 0x04000000>;
+
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0 0 15>;
+ interrupt-map = <0 0 0 &gic 0 68 4>,
+ <0 0 1 &gic 0 69 4>,
+ <0 0 2 &gic 0 70 4>,
+ <0 0 3 &gic 0 160 4>,
+ <0 0 4 &gic 0 161 4>,
+ <0 0 5 &gic 0 162 4>,
+ <0 0 6 &gic 0 163 4>,
+ <0 0 7 &gic 0 164 4>,
+ <0 0 8 &gic 0 165 4>,
+ <0 0 9 &gic 0 166 4>,
+ <0 0 10 &gic 0 167 4>,
+ <0 0 11 &gic 0 168 4>,
+ <0 0 12 &gic 0 169 4>;
+
+ motherboard {
+ model = "V2M-Juno";
+ arm,hbi = <0x252>;
+ arm,vexpress,site = <0>;
+ arm,v2m-memory-map = "rs1";
+ compatible = "arm,vexpress,v2p-p1", "simple-bus";
+ #address-cells = <2>; /* SMB chipselect number and offset */
+ #size-cells = <1>;
+ #interrupt-cells = <1>;
+ ranges;
+
+ usb@5,00000000 {
+ compatible = "nxp,usb-isp1763";
+ reg = <5 0x00000000 0x20000>;
+ bus-width = <16>;
+ interrupts = <4>;
+ };
+
+ ethernet@2,00000000 {
+ compatible = "smsc,lan9118", "smsc,lan9115";
+ reg = <2 0x00000000 0x10000>;
+ interrupts = <3>;
+ phy-mode = "mii";
+ reg-io-width = <4>;
+ smsc,irq-active-high;
+ smsc,irq-push-pull;
+ clocks = <&mb_eth25mhz>;
+ vdd33a-supply = <&soc_fixed_3v3>; /* change this */
+ vddvario-supply = <&soc_fixed_3v3>; /* and this */
+ };
+
+ iofpga@3,00000000 {
+ compatible = "arm,amba-bus", "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0 3 0 0x200000>;
+
+ kmi@060000 {
+ compatible = "arm,pl050", "arm,primecell";
+ reg = <0x060000 0x1000>;
+ interrupts = <8>;
+ clocks = <&soc_refclk24mhz>, <&soc_smc50mhz>;
+ clock-names = "KMIREFCLK", "apb_pclk";
+ };
+
+ kmi@070000 {
+ compatible = "arm,pl050", "arm,primecell";
+ reg = <0x070000 0x1000>;
+ interrupts = <8>;
+ clocks = <&soc_refclk24mhz>, <&soc_smc50mhz>;
+ clock-names = "KMIREFCLK", "apb_pclk";
+ };
+
+ wdt@0f0000 {
+ compatible = "arm,sp805", "arm,primecell";
+ reg = <0x0f0000 0x10000>;
+ interrupts = <7>;
+ clocks = <&soc_refclk24mhz>, <&soc_smc50mhz>;
+ clock-names = "wdogclk", "apb_pclk";
+ };
+
+ v2m_timer01: timer@110000 {
+ compatible = "arm,sp804", "arm,primecell";
+ reg = <0x110000 0x10000>;
+ interrupts = <9>;
+ clocks = <&soc_refclk24mhz>, <&soc_smc50mhz>;
+ clock-names = "timclken1", "apb_pclk";
+ };
+
+ v2m_timer23: timer@120000 {
+ compatible = "arm,sp804", "arm,primecell";
+ reg = <0x120000 0x10000>;
+ interrupts = <9>;
+ clocks = <&soc_refclk24mhz>, <&soc_smc50mhz>;
+ clock-names = "timclken1", "apb_pclk";
+ };
+
+ rtc@170000 {
+ compatible = "arm,pl031", "arm,primecell";
+ reg = <0x170000 0x10000>;
+ interrupts = <0>;
+ clocks = <&soc_smc50mhz>;
+ clock-names = "apb_pclk";
+ };
+ };
+ };
+ };
+};
diff --git a/arch/arm64/boot/dts/rtsm_ve-aemv8a.dts b/arch/arm64/boot/dts/rtsm_ve-aemv8a.dts
index 572005ea2217..f76eb9024a32 100644
--- a/arch/arm64/boot/dts/rtsm_ve-aemv8a.dts
+++ b/arch/arm64/boot/dts/rtsm_ve-aemv8a.dts
@@ -157,3 +157,5 @@
/include/ "rtsm_ve-motherboard.dtsi"
};
};
+
+/include/ "clcd-panels.dtsi"
diff --git a/arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi b/arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi
index 2f2ecd217363..b683d4703582 100644
--- a/arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi
+++ b/arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi
@@ -182,6 +182,9 @@
interrupts = <14>;
clocks = <&v2m_oscclk1>, <&v2m_clk24mhz>;
clock-names = "clcdclk", "apb_pclk";
+ mode = "XVGA";
+ use_dma = <0>;
+ framebuffer = <0x18000000 0x00180000>;
};
virtio_block@0130000 {
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
new file mode 100644
index 000000000000..5562652c5316
--- /dev/null
+++ b/arch/arm64/crypto/Kconfig
@@ -0,0 +1,53 @@
+
+menuconfig ARM64_CRYPTO
+ bool "ARM64 Accelerated Cryptographic Algorithms"
+ depends on ARM64
+ help
+ Say Y here to choose from a selection of cryptographic algorithms
+ implemented using ARM64 specific CPU features or instructions.
+
+if ARM64_CRYPTO
+
+config CRYPTO_SHA1_ARM64_CE
+ tristate "SHA-1 digest algorithm (ARMv8 Crypto Extensions)"
+ depends on ARM64 && KERNEL_MODE_NEON
+ select CRYPTO_HASH
+
+config CRYPTO_SHA2_ARM64_CE
+ tristate "SHA-224/SHA-256 digest algorithm (ARMv8 Crypto Extensions)"
+ depends on ARM64 && KERNEL_MODE_NEON
+ select CRYPTO_HASH
+
+config CRYPTO_GHASH_ARM64_CE
+ tristate "GHASH (for GCM chaining mode) using ARMv8 Crypto Extensions"
+ depends on ARM64 && KERNEL_MODE_NEON
+ select CRYPTO_HASH
+
+config CRYPTO_AES_ARM64_CE
+ tristate "AES core cipher using ARMv8 Crypto Extensions"
+ depends on ARM64 && KERNEL_MODE_NEON
+ select CRYPTO_ALGAPI
+ select CRYPTO_AES
+
+config CRYPTO_AES_ARM64_CE_CCM
+ tristate "AES in CCM mode using ARMv8 Crypto Extensions"
+ depends on ARM64 && KERNEL_MODE_NEON
+ select CRYPTO_ALGAPI
+ select CRYPTO_AES
+ select CRYPTO_AEAD
+
+config CRYPTO_AES_ARM64_CE_BLK
+ tristate "AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions"
+ depends on ARM64 && KERNEL_MODE_NEON
+ select CRYPTO_BLKCIPHER
+ select CRYPTO_AES
+ select CRYPTO_ABLK_HELPER
+
+config CRYPTO_AES_ARM64_NEON_BLK
+ tristate "AES in ECB/CBC/CTR/XTS modes using NEON instructions"
+ depends on ARM64 && KERNEL_MODE_NEON
+ select CRYPTO_BLKCIPHER
+ select CRYPTO_AES
+ select CRYPTO_ABLK_HELPER
+
+endif
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
new file mode 100644
index 000000000000..2070a56ecc46
--- /dev/null
+++ b/arch/arm64/crypto/Makefile
@@ -0,0 +1,38 @@
+#
+# linux/arch/arm64/crypto/Makefile
+#
+# Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+#
+
+obj-$(CONFIG_CRYPTO_SHA1_ARM64_CE) += sha1-ce.o
+sha1-ce-y := sha1-ce-glue.o sha1-ce-core.o
+
+obj-$(CONFIG_CRYPTO_SHA2_ARM64_CE) += sha2-ce.o
+sha2-ce-y := sha2-ce-glue.o sha2-ce-core.o
+
+obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o
+ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o
+
+obj-$(CONFIG_CRYPTO_AES_ARM64_CE) += aes-ce-cipher.o
+CFLAGS_aes-ce-cipher.o += -march=armv8-a+crypto
+
+obj-$(CONFIG_CRYPTO_AES_ARM64_CE_CCM) += aes-ce-ccm.o
+aes-ce-ccm-y := aes-ce-ccm-glue.o aes-ce-ccm-core.o
+
+obj-$(CONFIG_CRYPTO_AES_ARM64_CE_BLK) += aes-ce-blk.o
+aes-ce-blk-y := aes-glue-ce.o aes-ce.o
+
+obj-$(CONFIG_CRYPTO_AES_ARM64_NEON_BLK) += aes-neon-blk.o
+aes-neon-blk-y := aes-glue-neon.o aes-neon.o
+
+AFLAGS_aes-ce.o := -DINTERLEAVE=2 -DINTERLEAVE_INLINE
+AFLAGS_aes-neon.o := -DINTERLEAVE=4
+
+CFLAGS_aes-glue-ce.o := -DUSE_V8_CRYPTO_EXTENSIONS
+
+$(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE
+ $(call if_changed_dep,cc_o_c)
diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S
new file mode 100644
index 000000000000..432e4841cd81
--- /dev/null
+++ b/arch/arm64/crypto/aes-ce-ccm-core.S
@@ -0,0 +1,222 @@
+/*
+ * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
+ *
+ * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+
+ .text
+ .arch armv8-a+crypto
+
+ /*
+ * void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
+ * u32 *macp, u8 const rk[], u32 rounds);
+ */
+ENTRY(ce_aes_ccm_auth_data)
+ ldr w8, [x3] /* leftover from prev round? */
+ ld1 {v0.2d}, [x0] /* load mac */
+ cbz w8, 1f
+ sub w8, w8, #16
+ eor v1.16b, v1.16b, v1.16b
+0: ldrb w7, [x1], #1 /* get 1 byte of input */
+ subs w2, w2, #1
+ add w8, w8, #1
+ ins v1.b[0], w7
+ ext v1.16b, v1.16b, v1.16b, #1 /* rotate in the input bytes */
+ beq 8f /* out of input? */
+ cbnz w8, 0b
+ eor v0.16b, v0.16b, v1.16b
+1: ld1 {v3.2d}, [x4] /* load first round key */
+ prfm pldl1strm, [x1]
+ cmp w5, #12 /* which key size? */
+ add x6, x4, #16
+ sub w7, w5, #2 /* modified # of rounds */
+ bmi 2f
+ bne 5f
+ mov v5.16b, v3.16b
+ b 4f
+2: mov v4.16b, v3.16b
+ ld1 {v5.2d}, [x6], #16 /* load 2nd round key */
+3: aese v0.16b, v4.16b
+ aesmc v0.16b, v0.16b
+4: ld1 {v3.2d}, [x6], #16 /* load next round key */
+ aese v0.16b, v5.16b
+ aesmc v0.16b, v0.16b
+5: ld1 {v4.2d}, [x6], #16 /* load next round key */
+ subs w7, w7, #3
+ aese v0.16b, v3.16b
+ aesmc v0.16b, v0.16b
+ ld1 {v5.2d}, [x6], #16 /* load next round key */
+ bpl 3b
+ aese v0.16b, v4.16b
+ subs w2, w2, #16 /* last data? */
+ eor v0.16b, v0.16b, v5.16b /* final round */
+ bmi 6f
+ ld1 {v1.16b}, [x1], #16 /* load next input block */
+ eor v0.16b, v0.16b, v1.16b /* xor with mac */
+ bne 1b
+6: st1 {v0.2d}, [x0] /* store mac */
+ beq 10f
+ adds w2, w2, #16
+ beq 10f
+ mov w8, w2
+7: ldrb w7, [x1], #1
+ umov w6, v0.b[0]
+ eor w6, w6, w7
+ strb w6, [x0], #1
+ subs w2, w2, #1
+ beq 10f
+ ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */
+ b 7b
+8: mov w7, w8
+ add w8, w8, #16
+9: ext v1.16b, v1.16b, v1.16b, #1
+ adds w7, w7, #1
+ bne 9b
+ eor v0.16b, v0.16b, v1.16b
+ st1 {v0.2d}, [x0]
+10: str w8, [x3]
+ ret
+ENDPROC(ce_aes_ccm_auth_data)
+
+ /*
+ * void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[],
+ * u32 rounds);
+ */
+ENTRY(ce_aes_ccm_final)
+ ld1 {v3.2d}, [x2], #16 /* load first round key */
+ ld1 {v0.2d}, [x0] /* load mac */
+ cmp w3, #12 /* which key size? */
+ sub w3, w3, #2 /* modified # of rounds */
+ ld1 {v1.2d}, [x1] /* load 1st ctriv */
+ bmi 0f
+ bne 3f
+ mov v5.16b, v3.16b
+ b 2f
+0: mov v4.16b, v3.16b
+1: ld1 {v5.2d}, [x2], #16 /* load next round key */
+ aese v0.16b, v4.16b
+ aese v1.16b, v4.16b
+ aesmc v0.16b, v0.16b
+ aesmc v1.16b, v1.16b
+2: ld1 {v3.2d}, [x2], #16 /* load next round key */
+ aese v0.16b, v5.16b
+ aese v1.16b, v5.16b
+ aesmc v0.16b, v0.16b
+ aesmc v1.16b, v1.16b
+3: ld1 {v4.2d}, [x2], #16 /* load next round key */
+ subs w3, w3, #3
+ aese v0.16b, v3.16b
+ aese v1.16b, v3.16b
+ aesmc v0.16b, v0.16b
+ aesmc v1.16b, v1.16b
+ bpl 1b
+ aese v0.16b, v4.16b
+ aese v1.16b, v4.16b
+ /* final round key cancels out */
+ eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */
+ st1 {v0.2d}, [x0] /* store result */
+ ret
+ENDPROC(ce_aes_ccm_final)
+
+ .macro aes_ccm_do_crypt,enc
+ ldr x8, [x6, #8] /* load lower ctr */
+ ld1 {v0.2d}, [x5] /* load mac */
+ rev x8, x8 /* keep swabbed ctr in reg */
+0: /* outer loop */
+ ld1 {v1.1d}, [x6] /* load upper ctr */
+ prfm pldl1strm, [x1]
+ add x8, x8, #1
+ rev x9, x8
+ cmp w4, #12 /* which key size? */
+ sub w7, w4, #2 /* get modified # of rounds */
+ ins v1.d[1], x9 /* no carry in lower ctr */
+ ld1 {v3.2d}, [x3] /* load first round key */
+ add x10, x3, #16
+ bmi 1f
+ bne 4f
+ mov v5.16b, v3.16b
+ b 3f
+1: mov v4.16b, v3.16b
+ ld1 {v5.2d}, [x10], #16 /* load 2nd round key */
+2: /* inner loop: 3 rounds, 2x interleaved */
+ aese v0.16b, v4.16b
+ aese v1.16b, v4.16b
+ aesmc v0.16b, v0.16b
+ aesmc v1.16b, v1.16b
+3: ld1 {v3.2d}, [x10], #16 /* load next round key */
+ aese v0.16b, v5.16b
+ aese v1.16b, v5.16b
+ aesmc v0.16b, v0.16b
+ aesmc v1.16b, v1.16b
+4: ld1 {v4.2d}, [x10], #16 /* load next round key */
+ subs w7, w7, #3
+ aese v0.16b, v3.16b
+ aese v1.16b, v3.16b
+ aesmc v0.16b, v0.16b
+ aesmc v1.16b, v1.16b
+ ld1 {v5.2d}, [x10], #16 /* load next round key */
+ bpl 2b
+ aese v0.16b, v4.16b
+ aese v1.16b, v4.16b
+ subs w2, w2, #16
+ bmi 6f /* partial block? */
+ ld1 {v2.16b}, [x1], #16 /* load next input block */
+ .if \enc == 1
+ eor v2.16b, v2.16b, v5.16b /* final round enc+mac */
+ eor v1.16b, v1.16b, v2.16b /* xor with crypted ctr */
+ .else
+ eor v2.16b, v2.16b, v1.16b /* xor with crypted ctr */
+ eor v1.16b, v2.16b, v5.16b /* final round enc */
+ .endif
+ eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */
+ st1 {v1.16b}, [x0], #16 /* write output block */
+ bne 0b
+ rev x8, x8
+ st1 {v0.2d}, [x5] /* store mac */
+ str x8, [x6, #8] /* store lsb end of ctr (BE) */
+5: ret
+
+6: eor v0.16b, v0.16b, v5.16b /* final round mac */
+ eor v1.16b, v1.16b, v5.16b /* final round enc */
+ st1 {v0.2d}, [x5] /* store mac */
+ add w2, w2, #16 /* process partial tail block */
+7: ldrb w9, [x1], #1 /* get 1 byte of input */
+ umov w6, v1.b[0] /* get top crypted ctr byte */
+ umov w7, v0.b[0] /* get top mac byte */
+ .if \enc == 1
+ eor w7, w7, w9
+ eor w9, w9, w6
+ .else
+ eor w9, w9, w6
+ eor w7, w7, w9
+ .endif
+ strb w9, [x0], #1 /* store out byte */
+ strb w7, [x5], #1 /* store mac byte */
+ subs w2, w2, #1
+ beq 5b
+ ext v0.16b, v0.16b, v0.16b, #1 /* shift out mac byte */
+ ext v1.16b, v1.16b, v1.16b, #1 /* shift out ctr byte */
+ b 7b
+ .endm
+
+ /*
+ * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
+ * u8 const rk[], u32 rounds, u8 mac[],
+ * u8 ctr[]);
+ * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
+ * u8 const rk[], u32 rounds, u8 mac[],
+ * u8 ctr[]);
+ */
+ENTRY(ce_aes_ccm_encrypt)
+ aes_ccm_do_crypt 1
+ENDPROC(ce_aes_ccm_encrypt)
+
+ENTRY(ce_aes_ccm_decrypt)
+ aes_ccm_do_crypt 0
+ENDPROC(ce_aes_ccm_decrypt)
diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c
new file mode 100644
index 000000000000..9e6cdde9b43d
--- /dev/null
+++ b/arch/arm64/crypto/aes-ce-ccm-glue.c
@@ -0,0 +1,297 @@
+/*
+ * aes-ccm-glue.c - AES-CCM transform for ARMv8 with Crypto Extensions
+ *
+ * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <asm/unaligned.h>
+#include <crypto/aes.h>
+#include <crypto/algapi.h>
+#include <crypto/scatterwalk.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+static int num_rounds(struct crypto_aes_ctx *ctx)
+{
+ /*
+ * # of rounds specified by AES:
+ * 128 bit key 10 rounds
+ * 192 bit key 12 rounds
+ * 256 bit key 14 rounds
+ * => n byte key => 6 + (n/4) rounds
+ */
+ return 6 + ctx->key_length / 4;
+}
+
+asmlinkage void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
+ u32 *macp, u32 const rk[], u32 rounds);
+
+asmlinkage void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
+ u32 const rk[], u32 rounds, u8 mac[],
+ u8 ctr[]);
+
+asmlinkage void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
+ u32 const rk[], u32 rounds, u8 mac[],
+ u8 ctr[]);
+
+asmlinkage void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u32 const rk[],
+ u32 rounds);
+
+static int ccm_setkey(struct crypto_aead *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ struct crypto_aes_ctx *ctx = crypto_aead_ctx(tfm);
+ int ret;
+
+ ret = crypto_aes_expand_key(ctx, in_key, key_len);
+ if (!ret)
+ return 0;
+
+ tfm->base.crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+ return -EINVAL;
+}
+
+static int ccm_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
+{
+ if ((authsize & 1) || authsize < 4)
+ return -EINVAL;
+ return 0;
+}
+
+static int ccm_init_mac(struct aead_request *req, u8 maciv[], u32 msglen)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ __be32 *n = (__be32 *)&maciv[AES_BLOCK_SIZE - 8];
+ u32 l = req->iv[0] + 1;
+
+ /* verify that CCM dimension 'L' is set correctly in the IV */
+ if (l < 2 || l > 8)
+ return -EINVAL;
+
+ /* verify that msglen can in fact be represented in L bytes */
+ if (l < 4 && msglen >> (8 * l))
+ return -EOVERFLOW;
+
+ /*
+ * Even if the CCM spec allows L values of up to 8, the Linux cryptoapi
+ * uses a u32 type to represent msglen so the top 4 bytes are always 0.
+ */
+ n[0] = 0;
+ n[1] = cpu_to_be32(msglen);
+
+ memcpy(maciv, req->iv, AES_BLOCK_SIZE - l);
+
+ /*
+ * Meaning of byte 0 according to CCM spec (RFC 3610/NIST 800-38C)
+ * - bits 0..2 : max # of bytes required to represent msglen, minus 1
+ * (already set by caller)
+ * - bits 3..5 : size of auth tag (1 => 4 bytes, 2 => 6 bytes, etc)
+ * - bit 6 : indicates presence of authenticate-only data
+ */
+ maciv[0] |= (crypto_aead_authsize(aead) - 2) << 2;
+ if (req->assoclen)
+ maciv[0] |= 0x40;
+
+ memset(&req->iv[AES_BLOCK_SIZE - l], 0, l);
+ return 0;
+}
+
+static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
+ struct __packed { __be16 l; __be32 h; u16 len; } ltag;
+ struct scatter_walk walk;
+ u32 len = req->assoclen;
+ u32 macp = 0;
+
+ /* prepend the AAD with a length tag */
+ if (len < 0xff00) {
+ ltag.l = cpu_to_be16(len);
+ ltag.len = 2;
+ } else {
+ ltag.l = cpu_to_be16(0xfffe);
+ put_unaligned_be32(len, &ltag.h);
+ ltag.len = 6;
+ }
+
+ ce_aes_ccm_auth_data(mac, (u8 *)&ltag, ltag.len, &macp, ctx->key_enc,
+ num_rounds(ctx));
+ scatterwalk_start(&walk, req->assoc);
+
+ do {
+ u32 n = scatterwalk_clamp(&walk, len);
+ u8 *p;
+
+ if (!n) {
+ scatterwalk_start(&walk, sg_next(walk.sg));
+ n = scatterwalk_clamp(&walk, len);
+ }
+ p = scatterwalk_map(&walk);
+ ce_aes_ccm_auth_data(mac, p, n, &macp, ctx->key_enc,
+ num_rounds(ctx));
+ len -= n;
+
+ scatterwalk_unmap(p);
+ scatterwalk_advance(&walk, n);
+ scatterwalk_done(&walk, 0, len);
+ } while (len);
+}
+
+static int ccm_encrypt(struct aead_request *req)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
+ struct blkcipher_desc desc = { .info = req->iv };
+ struct blkcipher_walk walk;
+ u8 __aligned(8) mac[AES_BLOCK_SIZE];
+ u8 buf[AES_BLOCK_SIZE];
+ u32 len = req->cryptlen;
+ int err;
+
+ err = ccm_init_mac(req, mac, len);
+ if (err)
+ return err;
+
+ kernel_neon_begin_partial(6);
+
+ if (req->assoclen)
+ ccm_calculate_auth_mac(req, mac);
+
+ /* preserve the original iv for the final round */
+ memcpy(buf, req->iv, AES_BLOCK_SIZE);
+
+ blkcipher_walk_init(&walk, req->dst, req->src, len);
+ err = blkcipher_aead_walk_virt_block(&desc, &walk, aead,
+ AES_BLOCK_SIZE);
+
+ while (walk.nbytes) {
+ u32 tail = walk.nbytes % AES_BLOCK_SIZE;
+
+ if (walk.nbytes == len)
+ tail = 0;
+
+ ce_aes_ccm_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ walk.nbytes - tail, ctx->key_enc,
+ num_rounds(ctx), mac, walk.iv);
+
+ len -= walk.nbytes - tail;
+ err = blkcipher_walk_done(&desc, &walk, tail);
+ }
+ if (!err)
+ ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
+
+ kernel_neon_end();
+
+ if (err)
+ return err;
+
+ /* copy authtag to end of dst */
+ scatterwalk_map_and_copy(mac, req->dst, req->cryptlen,
+ crypto_aead_authsize(aead), 1);
+
+ return 0;
+}
+
+static int ccm_decrypt(struct aead_request *req)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
+ unsigned int authsize = crypto_aead_authsize(aead);
+ struct blkcipher_desc desc = { .info = req->iv };
+ struct blkcipher_walk walk;
+ u8 __aligned(8) mac[AES_BLOCK_SIZE];
+ u8 buf[AES_BLOCK_SIZE];
+ u32 len = req->cryptlen - authsize;
+ int err;
+
+ err = ccm_init_mac(req, mac, len);
+ if (err)
+ return err;
+
+ kernel_neon_begin_partial(6);
+
+ if (req->assoclen)
+ ccm_calculate_auth_mac(req, mac);
+
+ /* preserve the original iv for the final round */
+ memcpy(buf, req->iv, AES_BLOCK_SIZE);
+
+ blkcipher_walk_init(&walk, req->dst, req->src, len);
+ err = blkcipher_aead_walk_virt_block(&desc, &walk, aead,
+ AES_BLOCK_SIZE);
+
+ while (walk.nbytes) {
+ u32 tail = walk.nbytes % AES_BLOCK_SIZE;
+
+ if (walk.nbytes == len)
+ tail = 0;
+
+ ce_aes_ccm_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ walk.nbytes - tail, ctx->key_enc,
+ num_rounds(ctx), mac, walk.iv);
+
+ len -= walk.nbytes - tail;
+ err = blkcipher_walk_done(&desc, &walk, tail);
+ }
+ if (!err)
+ ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
+
+ kernel_neon_end();
+
+ if (err)
+ return err;
+
+ /* compare calculated auth tag with the stored one */
+ scatterwalk_map_and_copy(buf, req->src, req->cryptlen - authsize,
+ authsize, 0);
+
+ if (memcmp(mac, buf, authsize))
+ return -EBADMSG;
+ return 0;
+}
+
+static struct crypto_alg ccm_aes_alg = {
+ .cra_name = "ccm(aes)",
+ .cra_driver_name = "ccm-aes-ce",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_AEAD,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct crypto_aes_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_aead_type,
+ .cra_module = THIS_MODULE,
+ .cra_aead = {
+ .ivsize = AES_BLOCK_SIZE,
+ .maxauthsize = AES_BLOCK_SIZE,
+ .setkey = ccm_setkey,
+ .setauthsize = ccm_setauthsize,
+ .encrypt = ccm_encrypt,
+ .decrypt = ccm_decrypt,
+ }
+};
+
+static int __init aes_mod_init(void)
+{
+ if (!(elf_hwcap & HWCAP_AES))
+ return -ENODEV;
+ return crypto_register_alg(&ccm_aes_alg);
+}
+
+static void __exit aes_mod_exit(void)
+{
+ crypto_unregister_alg(&ccm_aes_alg);
+}
+
+module_init(aes_mod_init);
+module_exit(aes_mod_exit);
+
+MODULE_DESCRIPTION("Synchronous AES in CCM mode using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("ccm(aes)");
diff --git a/arch/arm64/crypto/aes-ce-cipher.c b/arch/arm64/crypto/aes-ce-cipher.c
new file mode 100644
index 000000000000..2075e1acae6b
--- /dev/null
+++ b/arch/arm64/crypto/aes-ce-cipher.c
@@ -0,0 +1,155 @@
+/*
+ * aes-ce-cipher.c - core AES cipher using ARMv8 Crypto Extensions
+ *
+ * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <crypto/aes.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+MODULE_DESCRIPTION("Synchronous AES cipher using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+struct aes_block {
+ u8 b[AES_BLOCK_SIZE];
+};
+
+static int num_rounds(struct crypto_aes_ctx *ctx)
+{
+ /*
+ * # of rounds specified by AES:
+ * 128 bit key 10 rounds
+ * 192 bit key 12 rounds
+ * 256 bit key 14 rounds
+ * => n byte key => 6 + (n/4) rounds
+ */
+ return 6 + ctx->key_length / 4;
+}
+
+static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
+{
+ struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct aes_block *out = (struct aes_block *)dst;
+ struct aes_block const *in = (struct aes_block *)src;
+ void *dummy0;
+ int dummy1;
+
+ kernel_neon_begin_partial(4);
+
+ __asm__(" ld1 {v0.16b}, %[in] ;"
+ " ld1 {v1.2d}, [%[key]], #16 ;"
+ " cmp %w[rounds], #10 ;"
+ " bmi 0f ;"
+ " bne 3f ;"
+ " mov v3.16b, v1.16b ;"
+ " b 2f ;"
+ "0: mov v2.16b, v1.16b ;"
+ " ld1 {v3.2d}, [%[key]], #16 ;"
+ "1: aese v0.16b, v2.16b ;"
+ " aesmc v0.16b, v0.16b ;"
+ "2: ld1 {v1.2d}, [%[key]], #16 ;"
+ " aese v0.16b, v3.16b ;"
+ " aesmc v0.16b, v0.16b ;"
+ "3: ld1 {v2.2d}, [%[key]], #16 ;"
+ " subs %w[rounds], %w[rounds], #3 ;"
+ " aese v0.16b, v1.16b ;"
+ " aesmc v0.16b, v0.16b ;"
+ " ld1 {v3.2d}, [%[key]], #16 ;"
+ " bpl 1b ;"
+ " aese v0.16b, v2.16b ;"
+ " eor v0.16b, v0.16b, v3.16b ;"
+ " st1 {v0.16b}, %[out] ;"
+
+ : [out] "=Q"(*out),
+ [key] "=r"(dummy0),
+ [rounds] "=r"(dummy1)
+ : [in] "Q"(*in),
+ "1"(ctx->key_enc),
+ "2"(num_rounds(ctx) - 2)
+ : "cc");
+
+ kernel_neon_end();
+}
+
+static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
+{
+ struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct aes_block *out = (struct aes_block *)dst;
+ struct aes_block const *in = (struct aes_block *)src;
+ void *dummy0;
+ int dummy1;
+
+ kernel_neon_begin_partial(4);
+
+ __asm__(" ld1 {v0.16b}, %[in] ;"
+ " ld1 {v1.2d}, [%[key]], #16 ;"
+ " cmp %w[rounds], #10 ;"
+ " bmi 0f ;"
+ " bne 3f ;"
+ " mov v3.16b, v1.16b ;"
+ " b 2f ;"
+ "0: mov v2.16b, v1.16b ;"
+ " ld1 {v3.2d}, [%[key]], #16 ;"
+ "1: aesd v0.16b, v2.16b ;"
+ " aesimc v0.16b, v0.16b ;"
+ "2: ld1 {v1.2d}, [%[key]], #16 ;"
+ " aesd v0.16b, v3.16b ;"
+ " aesimc v0.16b, v0.16b ;"
+ "3: ld1 {v2.2d}, [%[key]], #16 ;"
+ " subs %w[rounds], %w[rounds], #3 ;"
+ " aesd v0.16b, v1.16b ;"
+ " aesimc v0.16b, v0.16b ;"
+ " ld1 {v3.2d}, [%[key]], #16 ;"
+ " bpl 1b ;"
+ " aesd v0.16b, v2.16b ;"
+ " eor v0.16b, v0.16b, v3.16b ;"
+ " st1 {v0.16b}, %[out] ;"
+
+ : [out] "=Q"(*out),
+ [key] "=r"(dummy0),
+ [rounds] "=r"(dummy1)
+ : [in] "Q"(*in),
+ "1"(ctx->key_dec),
+ "2"(num_rounds(ctx) - 2)
+ : "cc");
+
+ kernel_neon_end();
+}
+
+static struct crypto_alg aes_alg = {
+ .cra_name = "aes",
+ .cra_driver_name = "aes-ce",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct crypto_aes_ctx),
+ .cra_module = THIS_MODULE,
+ .cra_cipher = {
+ .cia_min_keysize = AES_MIN_KEY_SIZE,
+ .cia_max_keysize = AES_MAX_KEY_SIZE,
+ .cia_setkey = crypto_aes_set_key,
+ .cia_encrypt = aes_cipher_encrypt,
+ .cia_decrypt = aes_cipher_decrypt
+ }
+};
+
+static int __init aes_mod_init(void)
+{
+ return crypto_register_alg(&aes_alg);
+}
+
+static void __exit aes_mod_exit(void)
+{
+ crypto_unregister_alg(&aes_alg);
+}
+
+module_cpu_feature_match(AES, aes_mod_init);
+module_exit(aes_mod_exit);
diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S
new file mode 100644
index 000000000000..685a18f731eb
--- /dev/null
+++ b/arch/arm64/crypto/aes-ce.S
@@ -0,0 +1,133 @@
+/*
+ * linux/arch/arm64/crypto/aes-ce.S - AES cipher for ARMv8 with
+ * Crypto Extensions
+ *
+ * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+
+#define AES_ENTRY(func) ENTRY(ce_ ## func)
+#define AES_ENDPROC(func) ENDPROC(ce_ ## func)
+
+ .arch armv8-a+crypto
+
+ /* preload all round keys */
+ .macro load_round_keys, rounds, rk
+ cmp \rounds, #12
+ blo 2222f /* 128 bits */
+ beq 1111f /* 192 bits */
+ ld1 {v17.16b-v18.16b}, [\rk], #32
+1111: ld1 {v19.16b-v20.16b}, [\rk], #32
+2222: ld1 {v21.16b-v24.16b}, [\rk], #64
+ ld1 {v25.16b-v28.16b}, [\rk], #64
+ ld1 {v29.16b-v31.16b}, [\rk]
+ .endm
+
+ /* prepare for encryption with key in rk[] */
+ .macro enc_prepare, rounds, rk, ignore
+ load_round_keys \rounds, \rk
+ .endm
+
+ /* prepare for encryption (again) but with new key in rk[] */
+ .macro enc_switch_key, rounds, rk, ignore
+ load_round_keys \rounds, \rk
+ .endm
+
+ /* prepare for decryption with key in rk[] */
+ .macro dec_prepare, rounds, rk, ignore
+ load_round_keys \rounds, \rk
+ .endm
+
+ .macro do_enc_Nx, de, mc, k, i0, i1, i2, i3
+ aes\de \i0\().16b, \k\().16b
+ .ifnb \i1
+ aes\de \i1\().16b, \k\().16b
+ .ifnb \i3
+ aes\de \i2\().16b, \k\().16b
+ aes\de \i3\().16b, \k\().16b
+ .endif
+ .endif
+ aes\mc \i0\().16b, \i0\().16b
+ .ifnb \i1
+ aes\mc \i1\().16b, \i1\().16b
+ .ifnb \i3
+ aes\mc \i2\().16b, \i2\().16b
+ aes\mc \i3\().16b, \i3\().16b
+ .endif
+ .endif
+ .endm
+
+ /* up to 4 interleaved encryption rounds with the same round key */
+ .macro round_Nx, enc, k, i0, i1, i2, i3
+ .ifc \enc, e
+ do_enc_Nx e, mc, \k, \i0, \i1, \i2, \i3
+ .else
+ do_enc_Nx d, imc, \k, \i0, \i1, \i2, \i3
+ .endif
+ .endm
+
+ /* up to 4 interleaved final rounds */
+ .macro fin_round_Nx, de, k, k2, i0, i1, i2, i3
+ aes\de \i0\().16b, \k\().16b
+ .ifnb \i1
+ aes\de \i1\().16b, \k\().16b
+ .ifnb \i3
+ aes\de \i2\().16b, \k\().16b
+ aes\de \i3\().16b, \k\().16b
+ .endif
+ .endif
+ eor \i0\().16b, \i0\().16b, \k2\().16b
+ .ifnb \i1
+ eor \i1\().16b, \i1\().16b, \k2\().16b
+ .ifnb \i3
+ eor \i2\().16b, \i2\().16b, \k2\().16b
+ eor \i3\().16b, \i3\().16b, \k2\().16b
+ .endif
+ .endif
+ .endm
+
+ /* up to 4 interleaved blocks */
+ .macro do_block_Nx, enc, rounds, i0, i1, i2, i3
+ cmp \rounds, #12
+ blo 2222f /* 128 bits */
+ beq 1111f /* 192 bits */
+ round_Nx \enc, v17, \i0, \i1, \i2, \i3
+ round_Nx \enc, v18, \i0, \i1, \i2, \i3
+1111: round_Nx \enc, v19, \i0, \i1, \i2, \i3
+ round_Nx \enc, v20, \i0, \i1, \i2, \i3
+2222: .irp key, v21, v22, v23, v24, v25, v26, v27, v28, v29
+ round_Nx \enc, \key, \i0, \i1, \i2, \i3
+ .endr
+ fin_round_Nx \enc, v30, v31, \i0, \i1, \i2, \i3
+ .endm
+
+ .macro encrypt_block, in, rounds, t0, t1, t2
+ do_block_Nx e, \rounds, \in
+ .endm
+
+ .macro encrypt_block2x, i0, i1, rounds, t0, t1, t2
+ do_block_Nx e, \rounds, \i0, \i1
+ .endm
+
+ .macro encrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2
+ do_block_Nx e, \rounds, \i0, \i1, \i2, \i3
+ .endm
+
+ .macro decrypt_block, in, rounds, t0, t1, t2
+ do_block_Nx d, \rounds, \in
+ .endm
+
+ .macro decrypt_block2x, i0, i1, rounds, t0, t1, t2
+ do_block_Nx d, \rounds, \i0, \i1
+ .endm
+
+ .macro decrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2
+ do_block_Nx d, \rounds, \i0, \i1, \i2, \i3
+ .endm
+
+#include "aes-modes.S"
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
new file mode 100644
index 000000000000..60f2f4c12256
--- /dev/null
+++ b/arch/arm64/crypto/aes-glue.c
@@ -0,0 +1,446 @@
+/*
+ * linux/arch/arm64/crypto/aes-glue.c - wrapper code for ARMv8 AES
+ *
+ * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <asm/hwcap.h>
+#include <crypto/aes.h>
+#include <crypto/ablk_helper.h>
+#include <crypto/algapi.h>
+#include <linux/module.h>
+#include <linux/cpufeature.h>
+
+#ifdef USE_V8_CRYPTO_EXTENSIONS
+#define MODE "ce"
+#define PRIO 300
+#define aes_ecb_encrypt ce_aes_ecb_encrypt
+#define aes_ecb_decrypt ce_aes_ecb_decrypt
+#define aes_cbc_encrypt ce_aes_cbc_encrypt
+#define aes_cbc_decrypt ce_aes_cbc_decrypt
+#define aes_ctr_encrypt ce_aes_ctr_encrypt
+#define aes_xts_encrypt ce_aes_xts_encrypt
+#define aes_xts_decrypt ce_aes_xts_decrypt
+MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
+#else
+#define MODE "neon"
+#define PRIO 200
+#define aes_ecb_encrypt neon_aes_ecb_encrypt
+#define aes_ecb_decrypt neon_aes_ecb_decrypt
+#define aes_cbc_encrypt neon_aes_cbc_encrypt
+#define aes_cbc_decrypt neon_aes_cbc_decrypt
+#define aes_ctr_encrypt neon_aes_ctr_encrypt
+#define aes_xts_encrypt neon_aes_xts_encrypt
+#define aes_xts_decrypt neon_aes_xts_decrypt
+MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 NEON");
+MODULE_ALIAS("ecb(aes)");
+MODULE_ALIAS("cbc(aes)");
+MODULE_ALIAS("ctr(aes)");
+MODULE_ALIAS("xts(aes)");
+#endif
+
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+/* defined in aes-modes.S */
+asmlinkage void aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[],
+ int rounds, int blocks, int first);
+asmlinkage void aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[],
+ int rounds, int blocks, int first);
+
+asmlinkage void aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[],
+ int rounds, int blocks, u8 iv[], int first);
+asmlinkage void aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
+ int rounds, int blocks, u8 iv[], int first);
+
+asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
+ int rounds, int blocks, u8 ctr[], int first);
+
+asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[],
+ int rounds, int blocks, u8 const rk2[], u8 iv[],
+ int first);
+asmlinkage void aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[],
+ int rounds, int blocks, u8 const rk2[], u8 iv[],
+ int first);
+
+struct crypto_aes_xts_ctx {
+ struct crypto_aes_ctx key1;
+ struct crypto_aes_ctx __aligned(8) key2;
+};
+
+static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ struct crypto_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+ int ret;
+
+ ret = crypto_aes_expand_key(&ctx->key1, in_key, key_len / 2);
+ if (!ret)
+ ret = crypto_aes_expand_key(&ctx->key2, &in_key[key_len / 2],
+ key_len / 2);
+ if (!ret)
+ return 0;
+
+ tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+ return -EINVAL;
+}
+
+static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ int err, first, rounds = 6 + ctx->key_length / 4;
+ struct blkcipher_walk walk;
+ unsigned int blocks;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ kernel_neon_begin();
+ for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+ aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ (u8 *)ctx->key_enc, rounds, blocks, first);
+ err = blkcipher_walk_done(desc, &walk, 0);
+ }
+ kernel_neon_end();
+ return err;
+}
+
+static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ int err, first, rounds = 6 + ctx->key_length / 4;
+ struct blkcipher_walk walk;
+ unsigned int blocks;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ kernel_neon_begin();
+ for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+ aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ (u8 *)ctx->key_dec, rounds, blocks, first);
+ err = blkcipher_walk_done(desc, &walk, 0);
+ }
+ kernel_neon_end();
+ return err;
+}
+
+static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ int err, first, rounds = 6 + ctx->key_length / 4;
+ struct blkcipher_walk walk;
+ unsigned int blocks;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ kernel_neon_begin();
+ for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+ aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ (u8 *)ctx->key_enc, rounds, blocks, walk.iv,
+ first);
+ err = blkcipher_walk_done(desc, &walk, 0);
+ }
+ kernel_neon_end();
+ return err;
+}
+
+static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ int err, first, rounds = 6 + ctx->key_length / 4;
+ struct blkcipher_walk walk;
+ unsigned int blocks;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ kernel_neon_begin();
+ for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+ aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ (u8 *)ctx->key_dec, rounds, blocks, walk.iv,
+ first);
+ err = blkcipher_walk_done(desc, &walk, 0);
+ }
+ kernel_neon_end();
+ return err;
+}
+
+static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ int err, first, rounds = 6 + ctx->key_length / 4;
+ struct blkcipher_walk walk;
+ int blocks;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
+
+ first = 1;
+ kernel_neon_begin();
+ while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+ aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ (u8 *)ctx->key_enc, rounds, blocks, walk.iv,
+ first);
+ first = 0;
+ nbytes -= blocks * AES_BLOCK_SIZE;
+ if (nbytes && nbytes == walk.nbytes % AES_BLOCK_SIZE)
+ break;
+ err = blkcipher_walk_done(desc, &walk,
+ walk.nbytes % AES_BLOCK_SIZE);
+ }
+ if (nbytes) {
+ u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
+ u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
+ u8 __aligned(8) tail[AES_BLOCK_SIZE];
+
+ /*
+ * Minimum alignment is 8 bytes, so if nbytes is <= 8, we need
+ * to tell aes_ctr_encrypt() to only read half a block.
+ */
+ blocks = (nbytes <= 8) ? -1 : 1;
+
+ aes_ctr_encrypt(tail, tsrc, (u8 *)ctx->key_enc, rounds,
+ blocks, walk.iv, first);
+ memcpy(tdst, tail, nbytes);
+ err = blkcipher_walk_done(desc, &walk, 0);
+ }
+ kernel_neon_end();
+
+ return err;
+}
+
+static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ int err, first, rounds = 6 + ctx->key1.key_length / 4;
+ struct blkcipher_walk walk;
+ unsigned int blocks;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ kernel_neon_begin();
+ for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+ aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ (u8 *)ctx->key1.key_enc, rounds, blocks,
+ (u8 *)ctx->key2.key_enc, walk.iv, first);
+ err = blkcipher_walk_done(desc, &walk, 0);
+ }
+ kernel_neon_end();
+
+ return err;
+}
+
+static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ int err, first, rounds = 6 + ctx->key1.key_length / 4;
+ struct blkcipher_walk walk;
+ unsigned int blocks;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ kernel_neon_begin();
+ for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+ aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ (u8 *)ctx->key1.key_dec, rounds, blocks,
+ (u8 *)ctx->key2.key_enc, walk.iv, first);
+ err = blkcipher_walk_done(desc, &walk, 0);
+ }
+ kernel_neon_end();
+
+ return err;
+}
+
+static struct crypto_alg aes_algs[] = { {
+ .cra_name = "__ecb-aes-" MODE,
+ .cra_driver_name = "__driver-ecb-aes-" MODE,
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct crypto_aes_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_blkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = crypto_aes_set_key,
+ .encrypt = ecb_encrypt,
+ .decrypt = ecb_decrypt,
+ },
+}, {
+ .cra_name = "__cbc-aes-" MODE,
+ .cra_driver_name = "__driver-cbc-aes-" MODE,
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct crypto_aes_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_blkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = crypto_aes_set_key,
+ .encrypt = cbc_encrypt,
+ .decrypt = cbc_decrypt,
+ },
+}, {
+ .cra_name = "__ctr-aes-" MODE,
+ .cra_driver_name = "__driver-ctr-aes-" MODE,
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct crypto_aes_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_blkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = crypto_aes_set_key,
+ .encrypt = ctr_encrypt,
+ .decrypt = ctr_encrypt,
+ },
+}, {
+ .cra_name = "__xts-aes-" MODE,
+ .cra_driver_name = "__driver-xts-aes-" MODE,
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct crypto_aes_xts_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_blkcipher = {
+ .min_keysize = 2 * AES_MIN_KEY_SIZE,
+ .max_keysize = 2 * AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = xts_set_key,
+ .encrypt = xts_encrypt,
+ .decrypt = xts_decrypt,
+ },
+}, {
+ .cra_name = "ecb(aes)",
+ .cra_driver_name = "ecb-aes-" MODE,
+ .cra_priority = PRIO,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct async_helper_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_init = ablk_init,
+ .cra_exit = ablk_exit,
+ .cra_ablkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = ablk_encrypt,
+ .decrypt = ablk_decrypt,
+ }
+}, {
+ .cra_name = "cbc(aes)",
+ .cra_driver_name = "cbc-aes-" MODE,
+ .cra_priority = PRIO,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct async_helper_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_init = ablk_init,
+ .cra_exit = ablk_exit,
+ .cra_ablkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = ablk_encrypt,
+ .decrypt = ablk_decrypt,
+ }
+}, {
+ .cra_name = "ctr(aes)",
+ .cra_driver_name = "ctr-aes-" MODE,
+ .cra_priority = PRIO,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct async_helper_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_init = ablk_init,
+ .cra_exit = ablk_exit,
+ .cra_ablkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = ablk_encrypt,
+ .decrypt = ablk_decrypt,
+ }
+}, {
+ .cra_name = "xts(aes)",
+ .cra_driver_name = "xts-aes-" MODE,
+ .cra_priority = PRIO,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct async_helper_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_init = ablk_init,
+ .cra_exit = ablk_exit,
+ .cra_ablkcipher = {
+ .min_keysize = 2 * AES_MIN_KEY_SIZE,
+ .max_keysize = 2 * AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = ablk_encrypt,
+ .decrypt = ablk_decrypt,
+ }
+} };
+
+static int __init aes_init(void)
+{
+ return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs));
+}
+
+static void __exit aes_exit(void)
+{
+ crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs));
+}
+
+#ifdef USE_V8_CRYPTO_EXTENSIONS
+module_cpu_feature_match(AES, aes_init);
+#else
+module_init(aes_init);
+#endif
+module_exit(aes_exit);
diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S
new file mode 100644
index 000000000000..f6e372c528eb
--- /dev/null
+++ b/arch/arm64/crypto/aes-modes.S
@@ -0,0 +1,532 @@
+/*
+ * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
+ *
+ * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* included by aes-ce.S and aes-neon.S */
+
+ .text
+ .align 4
+
+/*
+ * There are several ways to instantiate this code:
+ * - no interleave, all inline
+ * - 2-way interleave, 2x calls out of line (-DINTERLEAVE=2)
+ * - 2-way interleave, all inline (-DINTERLEAVE=2 -DINTERLEAVE_INLINE)
+ * - 4-way interleave, 4x calls out of line (-DINTERLEAVE=4)
+ * - 4-way interleave, all inline (-DINTERLEAVE=4 -DINTERLEAVE_INLINE)
+ *
+ * Macros imported by this code:
+ * - enc_prepare - setup NEON registers for encryption
+ * - dec_prepare - setup NEON registers for decryption
+ * - enc_switch_key - change to new key after having prepared for encryption
+ * - encrypt_block - encrypt a single block
+ * - decrypt block - decrypt a single block
+ * - encrypt_block2x - encrypt 2 blocks in parallel (if INTERLEAVE == 2)
+ * - decrypt_block2x - decrypt 2 blocks in parallel (if INTERLEAVE == 2)
+ * - encrypt_block4x - encrypt 4 blocks in parallel (if INTERLEAVE == 4)
+ * - decrypt_block4x - decrypt 4 blocks in parallel (if INTERLEAVE == 4)
+ */
+
+#if defined(INTERLEAVE) && !defined(INTERLEAVE_INLINE)
+#define FRAME_PUSH stp x29, x30, [sp,#-16]! ; mov x29, sp
+#define FRAME_POP ldp x29, x30, [sp],#16
+
+#if INTERLEAVE == 2
+
+aes_encrypt_block2x:
+ encrypt_block2x v0, v1, w3, x2, x6, w7
+ ret
+ENDPROC(aes_encrypt_block2x)
+
+aes_decrypt_block2x:
+ decrypt_block2x v0, v1, w3, x2, x6, w7
+ ret
+ENDPROC(aes_decrypt_block2x)
+
+#elif INTERLEAVE == 4
+
+aes_encrypt_block4x:
+ encrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
+ ret
+ENDPROC(aes_encrypt_block4x)
+
+aes_decrypt_block4x:
+ decrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
+ ret
+ENDPROC(aes_decrypt_block4x)
+
+#else
+#error INTERLEAVE should equal 2 or 4
+#endif
+
+ .macro do_encrypt_block2x
+ bl aes_encrypt_block2x
+ .endm
+
+ .macro do_decrypt_block2x
+ bl aes_decrypt_block2x
+ .endm
+
+ .macro do_encrypt_block4x
+ bl aes_encrypt_block4x
+ .endm
+
+ .macro do_decrypt_block4x
+ bl aes_decrypt_block4x
+ .endm
+
+#else
+#define FRAME_PUSH
+#define FRAME_POP
+
+ .macro do_encrypt_block2x
+ encrypt_block2x v0, v1, w3, x2, x6, w7
+ .endm
+
+ .macro do_decrypt_block2x
+ decrypt_block2x v0, v1, w3, x2, x6, w7
+ .endm
+
+ .macro do_encrypt_block4x
+ encrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
+ .endm
+
+ .macro do_decrypt_block4x
+ decrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
+ .endm
+
+#endif
+
+ /*
+ * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+ * int blocks, int first)
+ * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+ * int blocks, int first)
+ */
+
+AES_ENTRY(aes_ecb_encrypt)
+ FRAME_PUSH
+ cbz w5, .LecbencloopNx
+
+ enc_prepare w3, x2, x5
+
+.LecbencloopNx:
+#if INTERLEAVE >= 2
+ subs w4, w4, #INTERLEAVE
+ bmi .Lecbenc1x
+#if INTERLEAVE == 2
+ ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 pt blocks */
+ do_encrypt_block2x
+ st1 {v0.16b-v1.16b}, [x0], #32
+#else
+ ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
+ do_encrypt_block4x
+ st1 {v0.16b-v3.16b}, [x0], #64
+#endif
+ b .LecbencloopNx
+.Lecbenc1x:
+ adds w4, w4, #INTERLEAVE
+ beq .Lecbencout
+#endif
+.Lecbencloop:
+ ld1 {v0.16b}, [x1], #16 /* get next pt block */
+ encrypt_block v0, w3, x2, x5, w6
+ st1 {v0.16b}, [x0], #16
+ subs w4, w4, #1
+ bne .Lecbencloop
+.Lecbencout:
+ FRAME_POP
+ ret
+AES_ENDPROC(aes_ecb_encrypt)
+
+
+AES_ENTRY(aes_ecb_decrypt)
+ FRAME_PUSH
+ cbz w5, .LecbdecloopNx
+
+ dec_prepare w3, x2, x5
+
+.LecbdecloopNx:
+#if INTERLEAVE >= 2
+ subs w4, w4, #INTERLEAVE
+ bmi .Lecbdec1x
+#if INTERLEAVE == 2
+ ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */
+ do_decrypt_block2x
+ st1 {v0.16b-v1.16b}, [x0], #32
+#else
+ ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
+ do_decrypt_block4x
+ st1 {v0.16b-v3.16b}, [x0], #64
+#endif
+ b .LecbdecloopNx
+.Lecbdec1x:
+ adds w4, w4, #INTERLEAVE
+ beq .Lecbdecout
+#endif
+.Lecbdecloop:
+ ld1 {v0.16b}, [x1], #16 /* get next ct block */
+ decrypt_block v0, w3, x2, x5, w6
+ st1 {v0.16b}, [x0], #16
+ subs w4, w4, #1
+ bne .Lecbdecloop
+.Lecbdecout:
+ FRAME_POP
+ ret
+AES_ENDPROC(aes_ecb_decrypt)
+
+
+ /*
+ * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+ * int blocks, u8 iv[], int first)
+ * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+ * int blocks, u8 iv[], int first)
+ */
+
+AES_ENTRY(aes_cbc_encrypt)
+ cbz w6, .Lcbcencloop
+
+ ld1 {v0.16b}, [x5] /* get iv */
+ enc_prepare w3, x2, x5
+
+.Lcbcencloop:
+ ld1 {v1.16b}, [x1], #16 /* get next pt block */
+ eor v0.16b, v0.16b, v1.16b /* ..and xor with iv */
+ encrypt_block v0, w3, x2, x5, w6
+ st1 {v0.16b}, [x0], #16
+ subs w4, w4, #1
+ bne .Lcbcencloop
+ ret
+AES_ENDPROC(aes_cbc_encrypt)
+
+
+AES_ENTRY(aes_cbc_decrypt)
+ FRAME_PUSH
+ cbz w6, .LcbcdecloopNx
+
+ ld1 {v7.16b}, [x5] /* get iv */
+ dec_prepare w3, x2, x5
+
+.LcbcdecloopNx:
+#if INTERLEAVE >= 2
+ subs w4, w4, #INTERLEAVE
+ bmi .Lcbcdec1x
+#if INTERLEAVE == 2
+ ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */
+ mov v2.16b, v0.16b
+ mov v3.16b, v1.16b
+ do_decrypt_block2x
+ eor v0.16b, v0.16b, v7.16b
+ eor v1.16b, v1.16b, v2.16b
+ mov v7.16b, v3.16b
+ st1 {v0.16b-v1.16b}, [x0], #32
+#else
+ ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
+ mov v4.16b, v0.16b
+ mov v5.16b, v1.16b
+ mov v6.16b, v2.16b
+ do_decrypt_block4x
+ sub x1, x1, #16
+ eor v0.16b, v0.16b, v7.16b
+ eor v1.16b, v1.16b, v4.16b
+ ld1 {v7.16b}, [x1], #16 /* reload 1 ct block */
+ eor v2.16b, v2.16b, v5.16b
+ eor v3.16b, v3.16b, v6.16b
+ st1 {v0.16b-v3.16b}, [x0], #64
+#endif
+ b .LcbcdecloopNx
+.Lcbcdec1x:
+ adds w4, w4, #INTERLEAVE
+ beq .Lcbcdecout
+#endif
+.Lcbcdecloop:
+ ld1 {v1.16b}, [x1], #16 /* get next ct block */
+ mov v0.16b, v1.16b /* ...and copy to v0 */
+ decrypt_block v0, w3, x2, x5, w6
+ eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */
+ mov v7.16b, v1.16b /* ct is next iv */
+ st1 {v0.16b}, [x0], #16
+ subs w4, w4, #1
+ bne .Lcbcdecloop
+.Lcbcdecout:
+ FRAME_POP
+ ret
+AES_ENDPROC(aes_cbc_decrypt)
+
+
+ /*
+ * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+ * int blocks, u8 ctr[], int first)
+ */
+
+AES_ENTRY(aes_ctr_encrypt)
+ FRAME_PUSH
+ cbnz w6, .Lctrfirst /* 1st time around? */
+ umov x5, v4.d[1] /* keep swabbed ctr in reg */
+ rev x5, x5
+#if INTERLEAVE >= 2
+ cmn w5, w4 /* 32 bit overflow? */
+ bcs .Lctrinc
+ add x5, x5, #1 /* increment BE ctr */
+ b .LctrincNx
+#else
+ b .Lctrinc
+#endif
+.Lctrfirst:
+ enc_prepare w3, x2, x6
+ ld1 {v4.16b}, [x5]
+ umov x5, v4.d[1] /* keep swabbed ctr in reg */
+ rev x5, x5
+#if INTERLEAVE >= 2
+ cmn w5, w4 /* 32 bit overflow? */
+ bcs .Lctrloop
+.LctrloopNx:
+ subs w4, w4, #INTERLEAVE
+ bmi .Lctr1x
+#if INTERLEAVE == 2
+ mov v0.8b, v4.8b
+ mov v1.8b, v4.8b
+ rev x7, x5
+ add x5, x5, #1
+ ins v0.d[1], x7
+ rev x7, x5
+ add x5, x5, #1
+ ins v1.d[1], x7
+ ld1 {v2.16b-v3.16b}, [x1], #32 /* get 2 input blocks */
+ do_encrypt_block2x
+ eor v0.16b, v0.16b, v2.16b
+ eor v1.16b, v1.16b, v3.16b
+ st1 {v0.16b-v1.16b}, [x0], #32
+#else
+ ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */
+ dup v7.4s, w5
+ mov v0.16b, v4.16b
+ add v7.4s, v7.4s, v8.4s
+ mov v1.16b, v4.16b
+ rev32 v8.16b, v7.16b
+ mov v2.16b, v4.16b
+ mov v3.16b, v4.16b
+ mov v1.s[3], v8.s[0]
+ mov v2.s[3], v8.s[1]
+ mov v3.s[3], v8.s[2]
+ ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */
+ do_encrypt_block4x
+ eor v0.16b, v5.16b, v0.16b
+ ld1 {v5.16b}, [x1], #16 /* get 1 input block */
+ eor v1.16b, v6.16b, v1.16b
+ eor v2.16b, v7.16b, v2.16b
+ eor v3.16b, v5.16b, v3.16b
+ st1 {v0.16b-v3.16b}, [x0], #64
+ add x5, x5, #INTERLEAVE
+#endif
+ cbz w4, .LctroutNx
+.LctrincNx:
+ rev x7, x5
+ ins v4.d[1], x7
+ b .LctrloopNx
+.LctroutNx:
+ sub x5, x5, #1
+ rev x7, x5
+ ins v4.d[1], x7
+ b .Lctrout
+.Lctr1x:
+ adds w4, w4, #INTERLEAVE
+ beq .Lctrout
+#endif
+.Lctrloop:
+ mov v0.16b, v4.16b
+ encrypt_block v0, w3, x2, x6, w7
+ subs w4, w4, #1
+ bmi .Lctrhalfblock /* blocks < 0 means 1/2 block */
+ ld1 {v3.16b}, [x1], #16
+ eor v3.16b, v0.16b, v3.16b
+ st1 {v3.16b}, [x0], #16
+ beq .Lctrout
+.Lctrinc:
+ adds x5, x5, #1 /* increment BE ctr */
+ rev x7, x5
+ ins v4.d[1], x7
+ bcc .Lctrloop /* no overflow? */
+ umov x7, v4.d[0] /* load upper word of ctr */
+ rev x7, x7 /* ... to handle the carry */
+ add x7, x7, #1
+ rev x7, x7
+ ins v4.d[0], x7
+ b .Lctrloop
+.Lctrhalfblock:
+ ld1 {v3.8b}, [x1]
+ eor v3.8b, v0.8b, v3.8b
+ st1 {v3.8b}, [x0]
+.Lctrout:
+ FRAME_POP
+ ret
+AES_ENDPROC(aes_ctr_encrypt)
+ .ltorg
+
+
+ /*
+ * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
+ * int blocks, u8 const rk2[], u8 iv[], int first)
+ * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
+ * int blocks, u8 const rk2[], u8 iv[], int first)
+ */
+
+ .macro next_tweak, out, in, const, tmp
+ sshr \tmp\().2d, \in\().2d, #63
+ and \tmp\().16b, \tmp\().16b, \const\().16b
+ add \out\().2d, \in\().2d, \in\().2d
+ ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
+ eor \out\().16b, \out\().16b, \tmp\().16b
+ .endm
+
+.Lxts_mul_x:
+ .word 1, 0, 0x87, 0
+
+AES_ENTRY(aes_xts_encrypt)
+ FRAME_PUSH
+ cbz w7, .LxtsencloopNx
+
+ ld1 {v4.16b}, [x6]
+ enc_prepare w3, x5, x6
+ encrypt_block v4, w3, x5, x6, w7 /* first tweak */
+ enc_switch_key w3, x2, x6
+ ldr q7, .Lxts_mul_x
+ b .LxtsencNx
+
+.LxtsencloopNx:
+ ldr q7, .Lxts_mul_x
+ next_tweak v4, v4, v7, v8
+.LxtsencNx:
+#if INTERLEAVE >= 2
+ subs w4, w4, #INTERLEAVE
+ bmi .Lxtsenc1x
+#if INTERLEAVE == 2
+ ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 pt blocks */
+ next_tweak v5, v4, v7, v8
+ eor v0.16b, v0.16b, v4.16b
+ eor v1.16b, v1.16b, v5.16b
+ do_encrypt_block2x
+ eor v0.16b, v0.16b, v4.16b
+ eor v1.16b, v1.16b, v5.16b
+ st1 {v0.16b-v1.16b}, [x0], #32
+ cbz w4, .LxtsencoutNx
+ next_tweak v4, v5, v7, v8
+ b .LxtsencNx
+.LxtsencoutNx:
+ mov v4.16b, v5.16b
+ b .Lxtsencout
+#else
+ ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
+ next_tweak v5, v4, v7, v8
+ eor v0.16b, v0.16b, v4.16b
+ next_tweak v6, v5, v7, v8
+ eor v1.16b, v1.16b, v5.16b
+ eor v2.16b, v2.16b, v6.16b
+ next_tweak v7, v6, v7, v8
+ eor v3.16b, v3.16b, v7.16b
+ do_encrypt_block4x
+ eor v3.16b, v3.16b, v7.16b
+ eor v0.16b, v0.16b, v4.16b
+ eor v1.16b, v1.16b, v5.16b
+ eor v2.16b, v2.16b, v6.16b
+ st1 {v0.16b-v3.16b}, [x0], #64
+ mov v4.16b, v7.16b
+ cbz w4, .Lxtsencout
+ b .LxtsencloopNx
+#endif
+.Lxtsenc1x:
+ adds w4, w4, #INTERLEAVE
+ beq .Lxtsencout
+#endif
+.Lxtsencloop:
+ ld1 {v1.16b}, [x1], #16
+ eor v0.16b, v1.16b, v4.16b
+ encrypt_block v0, w3, x2, x6, w7
+ eor v0.16b, v0.16b, v4.16b
+ st1 {v0.16b}, [x0], #16
+ subs w4, w4, #1
+ beq .Lxtsencout
+ next_tweak v4, v4, v7, v8
+ b .Lxtsencloop
+.Lxtsencout:
+ FRAME_POP
+ ret
+AES_ENDPROC(aes_xts_encrypt)
+
+
+AES_ENTRY(aes_xts_decrypt)
+ FRAME_PUSH
+ cbz w7, .LxtsdecloopNx
+
+ ld1 {v4.16b}, [x6]
+ enc_prepare w3, x5, x6
+ encrypt_block v4, w3, x5, x6, w7 /* first tweak */
+ dec_prepare w3, x2, x6
+ ldr q7, .Lxts_mul_x
+ b .LxtsdecNx
+
+.LxtsdecloopNx:
+ ldr q7, .Lxts_mul_x
+ next_tweak v4, v4, v7, v8
+.LxtsdecNx:
+#if INTERLEAVE >= 2
+ subs w4, w4, #INTERLEAVE
+ bmi .Lxtsdec1x
+#if INTERLEAVE == 2
+ ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */
+ next_tweak v5, v4, v7, v8
+ eor v0.16b, v0.16b, v4.16b
+ eor v1.16b, v1.16b, v5.16b
+ do_decrypt_block2x
+ eor v0.16b, v0.16b, v4.16b
+ eor v1.16b, v1.16b, v5.16b
+ st1 {v0.16b-v1.16b}, [x0], #32
+ cbz w4, .LxtsdecoutNx
+ next_tweak v4, v5, v7, v8
+ b .LxtsdecNx
+.LxtsdecoutNx:
+ mov v4.16b, v5.16b
+ b .Lxtsdecout
+#else
+ ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
+ next_tweak v5, v4, v7, v8
+ eor v0.16b, v0.16b, v4.16b
+ next_tweak v6, v5, v7, v8
+ eor v1.16b, v1.16b, v5.16b
+ eor v2.16b, v2.16b, v6.16b
+ next_tweak v7, v6, v7, v8
+ eor v3.16b, v3.16b, v7.16b
+ do_decrypt_block4x
+ eor v3.16b, v3.16b, v7.16b
+ eor v0.16b, v0.16b, v4.16b
+ eor v1.16b, v1.16b, v5.16b
+ eor v2.16b, v2.16b, v6.16b
+ st1 {v0.16b-v3.16b}, [x0], #64
+ mov v4.16b, v7.16b
+ cbz w4, .Lxtsdecout
+ b .LxtsdecloopNx
+#endif
+.Lxtsdec1x:
+ adds w4, w4, #INTERLEAVE
+ beq .Lxtsdecout
+#endif
+.Lxtsdecloop:
+ ld1 {v1.16b}, [x1], #16
+ eor v0.16b, v1.16b, v4.16b
+ decrypt_block v0, w3, x2, x6, w7
+ eor v0.16b, v0.16b, v4.16b
+ st1 {v0.16b}, [x0], #16
+ subs w4, w4, #1
+ beq .Lxtsdecout
+ next_tweak v4, v4, v7, v8
+ b .Lxtsdecloop
+.Lxtsdecout:
+ FRAME_POP
+ ret
+AES_ENDPROC(aes_xts_decrypt)
diff --git a/arch/arm64/crypto/aes-neon.S b/arch/arm64/crypto/aes-neon.S
new file mode 100644
index 000000000000..b93170e1cc93
--- /dev/null
+++ b/arch/arm64/crypto/aes-neon.S
@@ -0,0 +1,382 @@
+/*
+ * linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON
+ *
+ * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+
+#define AES_ENTRY(func) ENTRY(neon_ ## func)
+#define AES_ENDPROC(func) ENDPROC(neon_ ## func)
+
+ /* multiply by polynomial 'x' in GF(2^8) */
+ .macro mul_by_x, out, in, temp, const
+ sshr \temp, \in, #7
+ add \out, \in, \in
+ and \temp, \temp, \const
+ eor \out, \out, \temp
+ .endm
+
+ /* preload the entire Sbox */
+ .macro prepare, sbox, shiftrows, temp
+ adr \temp, \sbox
+ movi v12.16b, #0x40
+ ldr q13, \shiftrows
+ movi v14.16b, #0x1b
+ ld1 {v16.16b-v19.16b}, [\temp], #64
+ ld1 {v20.16b-v23.16b}, [\temp], #64
+ ld1 {v24.16b-v27.16b}, [\temp], #64
+ ld1 {v28.16b-v31.16b}, [\temp]
+ .endm
+
+ /* do preload for encryption */
+ .macro enc_prepare, ignore0, ignore1, temp
+ prepare .LForward_Sbox, .LForward_ShiftRows, \temp
+ .endm
+
+ .macro enc_switch_key, ignore0, ignore1, temp
+ /* do nothing */
+ .endm
+
+ /* do preload for decryption */
+ .macro dec_prepare, ignore0, ignore1, temp
+ prepare .LReverse_Sbox, .LReverse_ShiftRows, \temp
+ .endm
+
+ /* apply SubBytes transformation using the the preloaded Sbox */
+ .macro sub_bytes, in
+ sub v9.16b, \in\().16b, v12.16b
+ tbl \in\().16b, {v16.16b-v19.16b}, \in\().16b
+ sub v10.16b, v9.16b, v12.16b
+ tbx \in\().16b, {v20.16b-v23.16b}, v9.16b
+ sub v11.16b, v10.16b, v12.16b
+ tbx \in\().16b, {v24.16b-v27.16b}, v10.16b
+ tbx \in\().16b, {v28.16b-v31.16b}, v11.16b
+ .endm
+
+ /* apply MixColumns transformation */
+ .macro mix_columns, in
+ mul_by_x v10.16b, \in\().16b, v9.16b, v14.16b
+ rev32 v8.8h, \in\().8h
+ eor \in\().16b, v10.16b, \in\().16b
+ shl v9.4s, v8.4s, #24
+ shl v11.4s, \in\().4s, #24
+ sri v9.4s, v8.4s, #8
+ sri v11.4s, \in\().4s, #8
+ eor v9.16b, v9.16b, v8.16b
+ eor v10.16b, v10.16b, v9.16b
+ eor \in\().16b, v10.16b, v11.16b
+ .endm
+
+ /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
+ .macro inv_mix_columns, in
+ mul_by_x v11.16b, \in\().16b, v10.16b, v14.16b
+ mul_by_x v11.16b, v11.16b, v10.16b, v14.16b
+ eor \in\().16b, \in\().16b, v11.16b
+ rev32 v11.8h, v11.8h
+ eor \in\().16b, \in\().16b, v11.16b
+ mix_columns \in
+ .endm
+
+ .macro do_block, enc, in, rounds, rk, rkp, i
+ ld1 {v15.16b}, [\rk]
+ add \rkp, \rk, #16
+ mov \i, \rounds
+1111: eor \in\().16b, \in\().16b, v15.16b /* ^round key */
+ tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */
+ sub_bytes \in
+ ld1 {v15.16b}, [\rkp], #16
+ subs \i, \i, #1
+ beq 2222f
+ .if \enc == 1
+ mix_columns \in
+ .else
+ inv_mix_columns \in
+ .endif
+ b 1111b
+2222: eor \in\().16b, \in\().16b, v15.16b /* ^round key */
+ .endm
+
+ .macro encrypt_block, in, rounds, rk, rkp, i
+ do_block 1, \in, \rounds, \rk, \rkp, \i
+ .endm
+
+ .macro decrypt_block, in, rounds, rk, rkp, i
+ do_block 0, \in, \rounds, \rk, \rkp, \i
+ .endm
+
+ /*
+ * Interleaved versions: functionally equivalent to the
+ * ones above, but applied to 2 or 4 AES states in parallel.
+ */
+
+ .macro sub_bytes_2x, in0, in1
+ sub v8.16b, \in0\().16b, v12.16b
+ sub v9.16b, \in1\().16b, v12.16b
+ tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
+ tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
+ sub v10.16b, v8.16b, v12.16b
+ sub v11.16b, v9.16b, v12.16b
+ tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b
+ tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b
+ sub v8.16b, v10.16b, v12.16b
+ sub v9.16b, v11.16b, v12.16b
+ tbx \in0\().16b, {v24.16b-v27.16b}, v10.16b
+ tbx \in1\().16b, {v24.16b-v27.16b}, v11.16b
+ tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b
+ tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b
+ .endm
+
+ .macro sub_bytes_4x, in0, in1, in2, in3
+ sub v8.16b, \in0\().16b, v12.16b
+ tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
+ sub v9.16b, \in1\().16b, v12.16b
+ tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
+ sub v10.16b, \in2\().16b, v12.16b
+ tbl \in2\().16b, {v16.16b-v19.16b}, \in2\().16b
+ sub v11.16b, \in3\().16b, v12.16b
+ tbl \in3\().16b, {v16.16b-v19.16b}, \in3\().16b
+ tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b
+ tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b
+ sub v8.16b, v8.16b, v12.16b
+ tbx \in2\().16b, {v20.16b-v23.16b}, v10.16b
+ sub v9.16b, v9.16b, v12.16b
+ tbx \in3\().16b, {v20.16b-v23.16b}, v11.16b
+ sub v10.16b, v10.16b, v12.16b
+ tbx \in0\().16b, {v24.16b-v27.16b}, v8.16b
+ sub v11.16b, v11.16b, v12.16b
+ tbx \in1\().16b, {v24.16b-v27.16b}, v9.16b
+ sub v8.16b, v8.16b, v12.16b
+ tbx \in2\().16b, {v24.16b-v27.16b}, v10.16b
+ sub v9.16b, v9.16b, v12.16b
+ tbx \in3\().16b, {v24.16b-v27.16b}, v11.16b
+ sub v10.16b, v10.16b, v12.16b
+ tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b
+ sub v11.16b, v11.16b, v12.16b
+ tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b
+ tbx \in2\().16b, {v28.16b-v31.16b}, v10.16b
+ tbx \in3\().16b, {v28.16b-v31.16b}, v11.16b
+ .endm
+
+ .macro mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const
+ sshr \tmp0\().16b, \in0\().16b, #7
+ add \out0\().16b, \in0\().16b, \in0\().16b
+ sshr \tmp1\().16b, \in1\().16b, #7
+ and \tmp0\().16b, \tmp0\().16b, \const\().16b
+ add \out1\().16b, \in1\().16b, \in1\().16b
+ and \tmp1\().16b, \tmp1\().16b, \const\().16b
+ eor \out0\().16b, \out0\().16b, \tmp0\().16b
+ eor \out1\().16b, \out1\().16b, \tmp1\().16b
+ .endm
+
+ .macro mix_columns_2x, in0, in1
+ mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14
+ rev32 v10.8h, \in0\().8h
+ rev32 v11.8h, \in1\().8h
+ eor \in0\().16b, v8.16b, \in0\().16b
+ eor \in1\().16b, v9.16b, \in1\().16b
+ shl v12.4s, v10.4s, #24
+ shl v13.4s, v11.4s, #24
+ eor v8.16b, v8.16b, v10.16b
+ sri v12.4s, v10.4s, #8
+ shl v10.4s, \in0\().4s, #24
+ eor v9.16b, v9.16b, v11.16b
+ sri v13.4s, v11.4s, #8
+ shl v11.4s, \in1\().4s, #24
+ sri v10.4s, \in0\().4s, #8
+ eor \in0\().16b, v8.16b, v12.16b
+ sri v11.4s, \in1\().4s, #8
+ eor \in1\().16b, v9.16b, v13.16b
+ eor \in0\().16b, v10.16b, \in0\().16b
+ eor \in1\().16b, v11.16b, \in1\().16b
+ .endm
+
+ .macro inv_mix_cols_2x, in0, in1
+ mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14
+ mul_by_x_2x v8, v9, v8, v9, v10, v11, v14
+ eor \in0\().16b, \in0\().16b, v8.16b
+ eor \in1\().16b, \in1\().16b, v9.16b
+ rev32 v8.8h, v8.8h
+ rev32 v9.8h, v9.8h
+ eor \in0\().16b, \in0\().16b, v8.16b
+ eor \in1\().16b, \in1\().16b, v9.16b
+ mix_columns_2x \in0, \in1
+ .endm
+
+ .macro inv_mix_cols_4x, in0, in1, in2, in3
+ mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14
+ mul_by_x_2x v10, v11, \in2, \in3, v12, v13, v14
+ mul_by_x_2x v8, v9, v8, v9, v12, v13, v14
+ mul_by_x_2x v10, v11, v10, v11, v12, v13, v14
+ eor \in0\().16b, \in0\().16b, v8.16b
+ eor \in1\().16b, \in1\().16b, v9.16b
+ eor \in2\().16b, \in2\().16b, v10.16b
+ eor \in3\().16b, \in3\().16b, v11.16b
+ rev32 v8.8h, v8.8h
+ rev32 v9.8h, v9.8h
+ rev32 v10.8h, v10.8h
+ rev32 v11.8h, v11.8h
+ eor \in0\().16b, \in0\().16b, v8.16b
+ eor \in1\().16b, \in1\().16b, v9.16b
+ eor \in2\().16b, \in2\().16b, v10.16b
+ eor \in3\().16b, \in3\().16b, v11.16b
+ mix_columns_2x \in0, \in1
+ mix_columns_2x \in2, \in3
+ .endm
+
+ .macro do_block_2x, enc, in0, in1 rounds, rk, rkp, i
+ ld1 {v15.16b}, [\rk]
+ add \rkp, \rk, #16
+ mov \i, \rounds
+1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
+ eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
+ sub_bytes_2x \in0, \in1
+ tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */
+ tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
+ ld1 {v15.16b}, [\rkp], #16
+ subs \i, \i, #1
+ beq 2222f
+ .if \enc == 1
+ mix_columns_2x \in0, \in1
+ ldr q13, .LForward_ShiftRows
+ .else
+ inv_mix_cols_2x \in0, \in1
+ ldr q13, .LReverse_ShiftRows
+ .endif
+ movi v12.16b, #0x40
+ b 1111b
+2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
+ eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
+ .endm
+
+ .macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
+ ld1 {v15.16b}, [\rk]
+ add \rkp, \rk, #16
+ mov \i, \rounds
+1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
+ eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
+ eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */
+ eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */
+ sub_bytes_4x \in0, \in1, \in2, \in3
+ tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */
+ tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
+ tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */
+ tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */
+ ld1 {v15.16b}, [\rkp], #16
+ subs \i, \i, #1
+ beq 2222f
+ .if \enc == 1
+ mix_columns_2x \in0, \in1
+ mix_columns_2x \in2, \in3
+ ldr q13, .LForward_ShiftRows
+ .else
+ inv_mix_cols_4x \in0, \in1, \in2, \in3
+ ldr q13, .LReverse_ShiftRows
+ .endif
+ movi v12.16b, #0x40
+ b 1111b
+2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
+ eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
+ eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */
+ eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */
+ .endm
+
+ .macro encrypt_block2x, in0, in1, rounds, rk, rkp, i
+ do_block_2x 1, \in0, \in1, \rounds, \rk, \rkp, \i
+ .endm
+
+ .macro decrypt_block2x, in0, in1, rounds, rk, rkp, i
+ do_block_2x 0, \in0, \in1, \rounds, \rk, \rkp, \i
+ .endm
+
+ .macro encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
+ do_block_4x 1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
+ .endm
+
+ .macro decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
+ do_block_4x 0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
+ .endm
+
+#include "aes-modes.S"
+
+ .text
+ .align 4
+.LForward_ShiftRows:
+ .byte 0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3
+ .byte 0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb
+
+.LReverse_ShiftRows:
+ .byte 0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb
+ .byte 0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3
+
+.LForward_Sbox:
+ .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
+ .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
+ .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
+ .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
+ .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
+ .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
+ .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
+ .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
+ .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
+ .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
+ .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
+ .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
+ .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
+ .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
+ .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
+ .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
+ .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
+ .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
+ .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
+ .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
+ .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
+ .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
+ .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
+ .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
+ .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
+ .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
+ .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
+ .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
+ .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
+ .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
+ .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
+ .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
+
+.LReverse_Sbox:
+ .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
+ .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
+ .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
+ .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
+ .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
+ .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
+ .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
+ .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
+ .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
+ .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
+ .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
+ .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
+ .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
+ .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
+ .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
+ .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
+ .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
+ .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
+ .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
+ .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
+ .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
+ .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
+ .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
+ .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
+ .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
+ .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
+ .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
+ .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
+ .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
+ .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
+ .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
+ .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S
new file mode 100644
index 000000000000..dc457015884e
--- /dev/null
+++ b/arch/arm64/crypto/ghash-ce-core.S
@@ -0,0 +1,79 @@
+/*
+ * Accelerated GHASH implementation with ARMv8 PMULL instructions.
+ *
+ * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+ SHASH .req v0
+ SHASH2 .req v1
+ T1 .req v2
+ T2 .req v3
+ MASK .req v4
+ XL .req v5
+ XM .req v6
+ XH .req v7
+ IN1 .req v7
+
+ .text
+ .arch armv8-a+crypto
+
+ /*
+ * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
+ * struct ghash_key const *k, const char *head)
+ */
+ENTRY(pmull_ghash_update)
+ ld1 {SHASH.16b}, [x3]
+ ld1 {XL.16b}, [x1]
+ movi MASK.16b, #0xe1
+ ext SHASH2.16b, SHASH.16b, SHASH.16b, #8
+ shl MASK.2d, MASK.2d, #57
+ eor SHASH2.16b, SHASH2.16b, SHASH.16b
+
+ /* do the head block first, if supplied */
+ cbz x4, 0f
+ ld1 {T1.2d}, [x4]
+ b 1f
+
+0: ld1 {T1.2d}, [x2], #16
+ sub w0, w0, #1
+
+1: /* multiply XL by SHASH in GF(2^128) */
+CPU_LE( rev64 T1.16b, T1.16b )
+
+ ext T2.16b, XL.16b, XL.16b, #8
+ ext IN1.16b, T1.16b, T1.16b, #8
+ eor T1.16b, T1.16b, T2.16b
+ eor XL.16b, XL.16b, IN1.16b
+
+ pmull2 XH.1q, SHASH.2d, XL.2d // a1 * b1
+ eor T1.16b, T1.16b, XL.16b
+ pmull XL.1q, SHASH.1d, XL.1d // a0 * b0
+ pmull XM.1q, SHASH2.1d, T1.1d // (a1 + a0)(b1 + b0)
+
+ ext T1.16b, XL.16b, XH.16b, #8
+ eor T2.16b, XL.16b, XH.16b
+ eor XM.16b, XM.16b, T1.16b
+ eor XM.16b, XM.16b, T2.16b
+ pmull T2.1q, XL.1d, MASK.1d
+
+ mov XH.d[0], XM.d[1]
+ mov XM.d[1], XL.d[0]
+
+ eor XL.16b, XM.16b, T2.16b
+ ext T2.16b, XL.16b, XL.16b, #8
+ pmull XL.1q, XL.1d, MASK.1d
+ eor T2.16b, T2.16b, XH.16b
+ eor XL.16b, XL.16b, T2.16b
+
+ cbnz w0, 0b
+
+ st1 {XL.16b}, [x1]
+ ret
+ENDPROC(pmull_ghash_update)
diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
new file mode 100644
index 000000000000..833ec1e3f3e9
--- /dev/null
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -0,0 +1,156 @@
+/*
+ * Accelerated GHASH implementation with ARMv8 PMULL instructions.
+ *
+ * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <asm/unaligned.h>
+#include <crypto/internal/hash.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+#define GHASH_BLOCK_SIZE 16
+#define GHASH_DIGEST_SIZE 16
+
+struct ghash_key {
+ u64 a;
+ u64 b;
+};
+
+struct ghash_desc_ctx {
+ u64 digest[GHASH_DIGEST_SIZE/sizeof(u64)];
+ u8 buf[GHASH_BLOCK_SIZE];
+ u32 count;
+};
+
+asmlinkage void pmull_ghash_update(int blocks, u64 dg[], const char *src,
+ struct ghash_key const *k, const char *head);
+
+static int ghash_init(struct shash_desc *desc)
+{
+ struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
+
+ *ctx = (struct ghash_desc_ctx){};
+ return 0;
+}
+
+static int ghash_update(struct shash_desc *desc, const u8 *src,
+ unsigned int len)
+{
+ struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
+ unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
+
+ ctx->count += len;
+
+ if ((partial + len) >= GHASH_BLOCK_SIZE) {
+ struct ghash_key *key = crypto_shash_ctx(desc->tfm);
+ int blocks;
+
+ if (partial) {
+ int p = GHASH_BLOCK_SIZE - partial;
+
+ memcpy(ctx->buf + partial, src, p);
+ src += p;
+ len -= p;
+ }
+
+ blocks = len / GHASH_BLOCK_SIZE;
+ len %= GHASH_BLOCK_SIZE;
+
+ kernel_neon_begin_partial(8);
+ pmull_ghash_update(blocks, ctx->digest, src, key,
+ partial ? ctx->buf : NULL);
+ kernel_neon_end();
+ src += blocks * GHASH_BLOCK_SIZE;
+ partial = 0;
+ }
+ if (len)
+ memcpy(ctx->buf + partial, src, len);
+ return 0;
+}
+
+static int ghash_final(struct shash_desc *desc, u8 *dst)
+{
+ struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
+ unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
+
+ if (partial) {
+ struct ghash_key *key = crypto_shash_ctx(desc->tfm);
+
+ memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial);
+
+ kernel_neon_begin_partial(8);
+ pmull_ghash_update(1, ctx->digest, ctx->buf, key, NULL);
+ kernel_neon_end();
+ }
+ put_unaligned_be64(ctx->digest[1], dst);
+ put_unaligned_be64(ctx->digest[0], dst + 8);
+
+ *ctx = (struct ghash_desc_ctx){};
+ return 0;
+}
+
+static int ghash_setkey(struct crypto_shash *tfm,
+ const u8 *inkey, unsigned int keylen)
+{
+ struct ghash_key *key = crypto_shash_ctx(tfm);
+ u64 a, b;
+
+ if (keylen != GHASH_BLOCK_SIZE) {
+ crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+ return -EINVAL;
+ }
+
+ /* perform multiplication by 'x' in GF(2^128) */
+ b = get_unaligned_be64(inkey);
+ a = get_unaligned_be64(inkey + 8);
+
+ key->a = (a << 1) | (b >> 63);
+ key->b = (b << 1) | (a >> 63);
+
+ if (b >> 63)
+ key->b ^= 0xc200000000000000UL;
+
+ return 0;
+}
+
+static struct shash_alg ghash_alg = {
+ .digestsize = GHASH_DIGEST_SIZE,
+ .init = ghash_init,
+ .update = ghash_update,
+ .final = ghash_final,
+ .setkey = ghash_setkey,
+ .descsize = sizeof(struct ghash_desc_ctx),
+ .base = {
+ .cra_name = "ghash",
+ .cra_driver_name = "ghash-ce",
+ .cra_priority = 200,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = GHASH_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct ghash_key),
+ .cra_module = THIS_MODULE,
+ },
+};
+
+static int __init ghash_ce_mod_init(void)
+{
+ return crypto_register_shash(&ghash_alg);
+}
+
+static void __exit ghash_ce_mod_exit(void)
+{
+ crypto_unregister_shash(&ghash_alg);
+}
+
+module_cpu_feature_match(PMULL, ghash_ce_mod_init);
+module_exit(ghash_ce_mod_exit);
diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S
new file mode 100644
index 000000000000..09d57d98609c
--- /dev/null
+++ b/arch/arm64/crypto/sha1-ce-core.S
@@ -0,0 +1,153 @@
+/*
+ * sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions
+ *
+ * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+ .text
+ .arch armv8-a+crypto
+
+ k0 .req v0
+ k1 .req v1
+ k2 .req v2
+ k3 .req v3
+
+ t0 .req v4
+ t1 .req v5
+
+ dga .req q6
+ dgav .req v6
+ dgb .req s7
+ dgbv .req v7
+
+ dg0q .req q12
+ dg0s .req s12
+ dg0v .req v12
+ dg1s .req s13
+ dg1v .req v13
+ dg2s .req s14
+
+ .macro add_only, op, ev, rc, s0, dg1
+ .ifc \ev, ev
+ add t1.4s, v\s0\().4s, \rc\().4s
+ sha1h dg2s, dg0s
+ .ifnb \dg1
+ sha1\op dg0q, \dg1, t0.4s
+ .else
+ sha1\op dg0q, dg1s, t0.4s
+ .endif
+ .else
+ .ifnb \s0
+ add t0.4s, v\s0\().4s, \rc\().4s
+ .endif
+ sha1h dg1s, dg0s
+ sha1\op dg0q, dg2s, t1.4s
+ .endif
+ .endm
+
+ .macro add_update, op, ev, rc, s0, s1, s2, s3, dg1
+ sha1su0 v\s0\().4s, v\s1\().4s, v\s2\().4s
+ add_only \op, \ev, \rc, \s1, \dg1
+ sha1su1 v\s0\().4s, v\s3\().4s
+ .endm
+
+ /*
+ * The SHA1 round constants
+ */
+ .align 4
+.Lsha1_rcon:
+ .word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6
+
+ /*
+ * void sha1_ce_transform(int blocks, u8 const *src, u32 *state,
+ * u8 *head, long bytes)
+ */
+ENTRY(sha1_ce_transform)
+ /* load round constants */
+ adr x6, .Lsha1_rcon
+ ld1r {k0.4s}, [x6], #4
+ ld1r {k1.4s}, [x6], #4
+ ld1r {k2.4s}, [x6], #4
+ ld1r {k3.4s}, [x6]
+
+ /* load state */
+ ldr dga, [x2]
+ ldr dgb, [x2, #16]
+
+ /* load partial state (if supplied) */
+ cbz x3, 0f
+ ld1 {v8.4s-v11.4s}, [x3]
+ b 1f
+
+ /* load input */
+0: ld1 {v8.4s-v11.4s}, [x1], #64
+ sub w0, w0, #1
+
+1:
+CPU_LE( rev32 v8.16b, v8.16b )
+CPU_LE( rev32 v9.16b, v9.16b )
+CPU_LE( rev32 v10.16b, v10.16b )
+CPU_LE( rev32 v11.16b, v11.16b )
+
+2: add t0.4s, v8.4s, k0.4s
+ mov dg0v.16b, dgav.16b
+
+ add_update c, ev, k0, 8, 9, 10, 11, dgb
+ add_update c, od, k0, 9, 10, 11, 8
+ add_update c, ev, k0, 10, 11, 8, 9
+ add_update c, od, k0, 11, 8, 9, 10
+ add_update c, ev, k1, 8, 9, 10, 11
+
+ add_update p, od, k1, 9, 10, 11, 8
+ add_update p, ev, k1, 10, 11, 8, 9
+ add_update p, od, k1, 11, 8, 9, 10
+ add_update p, ev, k1, 8, 9, 10, 11
+ add_update p, od, k2, 9, 10, 11, 8
+
+ add_update m, ev, k2, 10, 11, 8, 9
+ add_update m, od, k2, 11, 8, 9, 10
+ add_update m, ev, k2, 8, 9, 10, 11
+ add_update m, od, k2, 9, 10, 11, 8
+ add_update m, ev, k3, 10, 11, 8, 9
+
+ add_update p, od, k3, 11, 8, 9, 10
+ add_only p, ev, k3, 9
+ add_only p, od, k3, 10
+ add_only p, ev, k3, 11
+ add_only p, od
+
+ /* update state */
+ add dgbv.2s, dgbv.2s, dg1v.2s
+ add dgav.4s, dgav.4s, dg0v.4s
+
+ cbnz w0, 0b
+
+ /*
+ * Final block: add padding and total bit count.
+ * Skip if we have no total byte count in x4. In that case, the input
+ * size was not a round multiple of the block size, and the padding is
+ * handled by the C code.
+ */
+ cbz x4, 3f
+ movi v9.2d, #0
+ mov x8, #0x80000000
+ movi v10.2d, #0
+ ror x7, x4, #29 // ror(lsl(x4, 3), 32)
+ fmov d8, x8
+ mov x4, #0
+ mov v11.d[0], xzr
+ mov v11.d[1], x7
+ b 2b
+
+ /* store new state */
+3: str dga, [x2]
+ str dgb, [x2, #16]
+ ret
+ENDPROC(sha1_ce_transform)
diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c
new file mode 100644
index 000000000000..6fe83f37a750
--- /dev/null
+++ b/arch/arm64/crypto/sha1-ce-glue.c
@@ -0,0 +1,174 @@
+/*
+ * sha1-ce-glue.c - SHA-1 secure hash using ARMv8 Crypto Extensions
+ *
+ * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <asm/unaligned.h>
+#include <crypto/internal/hash.h>
+#include <crypto/sha.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+asmlinkage void sha1_ce_transform(int blocks, u8 const *src, u32 *state,
+ u8 *head, long bytes);
+
+static int sha1_init(struct shash_desc *desc)
+{
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+
+ *sctx = (struct sha1_state){
+ .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
+ };
+ return 0;
+}
+
+static int sha1_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+ unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
+
+ sctx->count += len;
+
+ if ((partial + len) >= SHA1_BLOCK_SIZE) {
+ int blocks;
+
+ if (partial) {
+ int p = SHA1_BLOCK_SIZE - partial;
+
+ memcpy(sctx->buffer + partial, data, p);
+ data += p;
+ len -= p;
+ }
+
+ blocks = len / SHA1_BLOCK_SIZE;
+ len %= SHA1_BLOCK_SIZE;
+
+ kernel_neon_begin_partial(16);
+ sha1_ce_transform(blocks, data, sctx->state,
+ partial ? sctx->buffer : NULL, 0);
+ kernel_neon_end();
+
+ data += blocks * SHA1_BLOCK_SIZE;
+ partial = 0;
+ }
+ if (len)
+ memcpy(sctx->buffer + partial, data, len);
+ return 0;
+}
+
+static int sha1_final(struct shash_desc *desc, u8 *out)
+{
+ static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
+
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+ __be64 bits = cpu_to_be64(sctx->count << 3);
+ __be32 *dst = (__be32 *)out;
+ int i;
+
+ u32 padlen = SHA1_BLOCK_SIZE
+ - ((sctx->count + sizeof(bits)) % SHA1_BLOCK_SIZE);
+
+ sha1_update(desc, padding, padlen);
+ sha1_update(desc, (const u8 *)&bits, sizeof(bits));
+
+ for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++)
+ put_unaligned_be32(sctx->state[i], dst++);
+
+ *sctx = (struct sha1_state){};
+ return 0;
+}
+
+static int sha1_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
+{
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+ __be32 *dst = (__be32 *)out;
+ int blocks;
+ int i;
+
+ if (sctx->count || !len || (len % SHA1_BLOCK_SIZE)) {
+ sha1_update(desc, data, len);
+ return sha1_final(desc, out);
+ }
+
+ /*
+ * Use a fast path if the input is a multiple of 64 bytes. In
+ * this case, there is no need to copy data around, and we can
+ * perform the entire digest calculation in a single invocation
+ * of sha1_ce_transform()
+ */
+ blocks = len / SHA1_BLOCK_SIZE;
+
+ kernel_neon_begin_partial(16);
+ sha1_ce_transform(blocks, data, sctx->state, NULL, len);
+ kernel_neon_end();
+
+ for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++)
+ put_unaligned_be32(sctx->state[i], dst++);
+
+ *sctx = (struct sha1_state){};
+ return 0;
+}
+
+static int sha1_export(struct shash_desc *desc, void *out)
+{
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+ struct sha1_state *dst = out;
+
+ *dst = *sctx;
+ return 0;
+}
+
+static int sha1_import(struct shash_desc *desc, const void *in)
+{
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+ struct sha1_state const *src = in;
+
+ *sctx = *src;
+ return 0;
+}
+
+static struct shash_alg alg = {
+ .init = sha1_init,
+ .update = sha1_update,
+ .final = sha1_final,
+ .finup = sha1_finup,
+ .export = sha1_export,
+ .import = sha1_import,
+ .descsize = sizeof(struct sha1_state),
+ .digestsize = SHA1_DIGEST_SIZE,
+ .statesize = sizeof(struct sha1_state),
+ .base = {
+ .cra_name = "sha1",
+ .cra_driver_name = "sha1-ce",
+ .cra_priority = 200,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA1_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+};
+
+static int __init sha1_ce_mod_init(void)
+{
+ return crypto_register_shash(&alg);
+}
+
+static void __exit sha1_ce_mod_fini(void)
+{
+ crypto_unregister_shash(&alg);
+}
+
+module_cpu_feature_match(SHA1, sha1_ce_mod_init);
+module_exit(sha1_ce_mod_fini);
diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S
new file mode 100644
index 000000000000..7f29fc031ea8
--- /dev/null
+++ b/arch/arm64/crypto/sha2-ce-core.S
@@ -0,0 +1,156 @@
+/*
+ * sha2-ce-core.S - core SHA-224/SHA-256 transform using v8 Crypto Extensions
+ *
+ * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+ .text
+ .arch armv8-a+crypto
+
+ dga .req q20
+ dgav .req v20
+ dgb .req q21
+ dgbv .req v21
+
+ t0 .req v22
+ t1 .req v23
+
+ dg0q .req q24
+ dg0v .req v24
+ dg1q .req q25
+ dg1v .req v25
+ dg2q .req q26
+ dg2v .req v26
+
+ .macro add_only, ev, rc, s0
+ mov dg2v.16b, dg0v.16b
+ .ifeq \ev
+ add t1.4s, v\s0\().4s, \rc\().4s
+ sha256h dg0q, dg1q, t0.4s
+ sha256h2 dg1q, dg2q, t0.4s
+ .else
+ .ifnb \s0
+ add t0.4s, v\s0\().4s, \rc\().4s
+ .endif
+ sha256h dg0q, dg1q, t1.4s
+ sha256h2 dg1q, dg2q, t1.4s
+ .endif
+ .endm
+
+ .macro add_update, ev, rc, s0, s1, s2, s3
+ sha256su0 v\s0\().4s, v\s1\().4s
+ add_only \ev, \rc, \s1
+ sha256su1 v\s0\().4s, v\s2\().4s, v\s3\().4s
+ .endm
+
+ /*
+ * The SHA-256 round constants
+ */
+ .align 4
+.Lsha2_rcon:
+ .word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
+ .word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
+ .word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
+ .word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
+ .word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
+ .word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
+ .word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
+ .word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
+ .word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
+ .word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
+ .word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
+ .word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
+ .word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
+ .word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
+ .word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
+ .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+
+ /*
+ * void sha2_ce_transform(int blocks, u8 const *src, u32 *state,
+ * u8 *head, long bytes)
+ */
+ENTRY(sha2_ce_transform)
+ /* load round constants */
+ adr x8, .Lsha2_rcon
+ ld1 { v0.4s- v3.4s}, [x8], #64
+ ld1 { v4.4s- v7.4s}, [x8], #64
+ ld1 { v8.4s-v11.4s}, [x8], #64
+ ld1 {v12.4s-v15.4s}, [x8]
+
+ /* load state */
+ ldp dga, dgb, [x2]
+
+ /* load partial input (if supplied) */
+ cbz x3, 0f
+ ld1 {v16.4s-v19.4s}, [x3]
+ b 1f
+
+ /* load input */
+0: ld1 {v16.4s-v19.4s}, [x1], #64
+ sub w0, w0, #1
+
+1:
+CPU_LE( rev32 v16.16b, v16.16b )
+CPU_LE( rev32 v17.16b, v17.16b )
+CPU_LE( rev32 v18.16b, v18.16b )
+CPU_LE( rev32 v19.16b, v19.16b )
+
+2: add t0.4s, v16.4s, v0.4s
+ mov dg0v.16b, dgav.16b
+ mov dg1v.16b, dgbv.16b
+
+ add_update 0, v1, 16, 17, 18, 19
+ add_update 1, v2, 17, 18, 19, 16
+ add_update 0, v3, 18, 19, 16, 17
+ add_update 1, v4, 19, 16, 17, 18
+
+ add_update 0, v5, 16, 17, 18, 19
+ add_update 1, v6, 17, 18, 19, 16
+ add_update 0, v7, 18, 19, 16, 17
+ add_update 1, v8, 19, 16, 17, 18
+
+ add_update 0, v9, 16, 17, 18, 19
+ add_update 1, v10, 17, 18, 19, 16
+ add_update 0, v11, 18, 19, 16, 17
+ add_update 1, v12, 19, 16, 17, 18
+
+ add_only 0, v13, 17
+ add_only 1, v14, 18
+ add_only 0, v15, 19
+ add_only 1
+
+ /* update state */
+ add dgav.4s, dgav.4s, dg0v.4s
+ add dgbv.4s, dgbv.4s, dg1v.4s
+
+ /* handled all input blocks? */
+ cbnz w0, 0b
+
+ /*
+ * Final block: add padding and total bit count.
+ * Skip if we have no total byte count in x4. In that case, the input
+ * size was not a round multiple of the block size, and the padding is
+ * handled by the C code.
+ */
+ cbz x4, 3f
+ movi v17.2d, #0
+ mov x8, #0x80000000
+ movi v18.2d, #0
+ ror x7, x4, #29 // ror(lsl(x4, 3), 32)
+ fmov d16, x8
+ mov x4, #0
+ mov v19.d[0], xzr
+ mov v19.d[1], x7
+ b 2b
+
+ /* store new state */
+3: stp dga, dgb, [x2]
+ ret
+ENDPROC(sha2_ce_transform)
diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c
new file mode 100644
index 000000000000..c294e67d3925
--- /dev/null
+++ b/arch/arm64/crypto/sha2-ce-glue.c
@@ -0,0 +1,255 @@
+/*
+ * sha2-ce-glue.c - SHA-224/SHA-256 using ARMv8 Crypto Extensions
+ *
+ * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <asm/unaligned.h>
+#include <crypto/internal/hash.h>
+#include <crypto/sha.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+asmlinkage int sha2_ce_transform(int blocks, u8 const *src, u32 *state,
+ u8 *head, long bytes);
+
+static int sha224_init(struct shash_desc *desc)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+
+ *sctx = (struct sha256_state){
+ .state = {
+ SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3,
+ SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7,
+ }
+ };
+ return 0;
+}
+
+static int sha256_init(struct shash_desc *desc)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+
+ *sctx = (struct sha256_state){
+ .state = {
+ SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3,
+ SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7,
+ }
+ };
+ return 0;
+}
+
+static int sha2_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+ unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
+
+ sctx->count += len;
+
+ if ((partial + len) >= SHA256_BLOCK_SIZE) {
+ int blocks;
+
+ if (partial) {
+ int p = SHA256_BLOCK_SIZE - partial;
+
+ memcpy(sctx->buf + partial, data, p);
+ data += p;
+ len -= p;
+ }
+
+ blocks = len / SHA256_BLOCK_SIZE;
+ len %= SHA256_BLOCK_SIZE;
+
+ kernel_neon_begin_partial(28);
+ sha2_ce_transform(blocks, data, sctx->state,
+ partial ? sctx->buf : NULL, 0);
+ kernel_neon_end();
+
+ data += blocks * SHA256_BLOCK_SIZE;
+ partial = 0;
+ }
+ if (len)
+ memcpy(sctx->buf + partial, data, len);
+ return 0;
+}
+
+static void sha2_final(struct shash_desc *desc)
+{
+ static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, };
+
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+ __be64 bits = cpu_to_be64(sctx->count << 3);
+ u32 padlen = SHA256_BLOCK_SIZE
+ - ((sctx->count + sizeof(bits)) % SHA256_BLOCK_SIZE);
+
+ sha2_update(desc, padding, padlen);
+ sha2_update(desc, (const u8 *)&bits, sizeof(bits));
+}
+
+static int sha224_final(struct shash_desc *desc, u8 *out)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+ __be32 *dst = (__be32 *)out;
+ int i;
+
+ sha2_final(desc);
+
+ for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++)
+ put_unaligned_be32(sctx->state[i], dst++);
+
+ *sctx = (struct sha256_state){};
+ return 0;
+}
+
+static int sha256_final(struct shash_desc *desc, u8 *out)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+ __be32 *dst = (__be32 *)out;
+ int i;
+
+ sha2_final(desc);
+
+ for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++)
+ put_unaligned_be32(sctx->state[i], dst++);
+
+ *sctx = (struct sha256_state){};
+ return 0;
+}
+
+static void sha2_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+ int blocks;
+
+ if (sctx->count || !len || (len % SHA256_BLOCK_SIZE)) {
+ sha2_update(desc, data, len);
+ sha2_final(desc);
+ return;
+ }
+
+ /*
+ * Use a fast path if the input is a multiple of 64 bytes. In
+ * this case, there is no need to copy data around, and we can
+ * perform the entire digest calculation in a single invocation
+ * of sha2_ce_transform()
+ */
+ blocks = len / SHA256_BLOCK_SIZE;
+
+ kernel_neon_begin_partial(28);
+ sha2_ce_transform(blocks, data, sctx->state, NULL, len);
+ kernel_neon_end();
+ data += blocks * SHA256_BLOCK_SIZE;
+}
+
+static int sha224_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+ __be32 *dst = (__be32 *)out;
+ int i;
+
+ sha2_finup(desc, data, len);
+
+ for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++)
+ put_unaligned_be32(sctx->state[i], dst++);
+
+ *sctx = (struct sha256_state){};
+ return 0;
+}
+
+static int sha256_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+ __be32 *dst = (__be32 *)out;
+ int i;
+
+ sha2_finup(desc, data, len);
+
+ for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++)
+ put_unaligned_be32(sctx->state[i], dst++);
+
+ *sctx = (struct sha256_state){};
+ return 0;
+}
+
+static int sha2_export(struct shash_desc *desc, void *out)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+ struct sha256_state *dst = out;
+
+ *dst = *sctx;
+ return 0;
+}
+
+static int sha2_import(struct shash_desc *desc, const void *in)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+ struct sha256_state const *src = in;
+
+ *sctx = *src;
+ return 0;
+}
+
+static struct shash_alg algs[] = { {
+ .init = sha224_init,
+ .update = sha2_update,
+ .final = sha224_final,
+ .finup = sha224_finup,
+ .export = sha2_export,
+ .import = sha2_import,
+ .descsize = sizeof(struct sha256_state),
+ .digestsize = SHA224_DIGEST_SIZE,
+ .statesize = sizeof(struct sha256_state),
+ .base = {
+ .cra_name = "sha224",
+ .cra_driver_name = "sha224-ce",
+ .cra_priority = 200,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA256_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+}, {
+ .init = sha256_init,
+ .update = sha2_update,
+ .final = sha256_final,
+ .finup = sha256_finup,
+ .export = sha2_export,
+ .import = sha2_import,
+ .descsize = sizeof(struct sha256_state),
+ .digestsize = SHA256_DIGEST_SIZE,
+ .statesize = sizeof(struct sha256_state),
+ .base = {
+ .cra_name = "sha256",
+ .cra_driver_name = "sha256-ce",
+ .cra_priority = 200,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA256_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+} };
+
+static int __init sha2_ce_mod_init(void)
+{
+ return crypto_register_shashes(algs, ARRAY_SIZE(algs));
+}
+
+static void __exit sha2_ce_mod_fini(void)
+{
+ crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
+}
+
+module_cpu_feature_match(SHA2, sha2_ce_mod_init);
+module_exit(sha2_ce_mod_fini);
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index 27e3c6bccf62..bd36b3013f82 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -36,6 +36,7 @@ generic-y += segment.h
generic-y += sembuf.h
generic-y += serial.h
generic-y += shmbuf.h
+generic-y += simd.h
generic-y += sizes.h
generic-y += socket.h
generic-y += sockios.h
diff --git a/arch/arm64/include/asm/bL_switcher.h b/arch/arm64/include/asm/bL_switcher.h
new file mode 100644
index 000000000000..2bee500b7f54
--- /dev/null
+++ b/arch/arm64/include/asm/bL_switcher.h
@@ -0,0 +1,54 @@
+/*
+ * Based on the stubs for the ARM implementation which is:
+ *
+ * Created by: Nicolas Pitre, April 2012
+ * Copyright: (C) 2012-2013 Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef ASM_BL_SWITCHER_H
+#define ASM_BL_SWITCHER_H
+
+#include <linux/notifier.h>
+#include <linux/types.h>
+
+typedef void (*bL_switch_completion_handler)(void *cookie);
+
+static inline int bL_switch_request(unsigned int cpu,
+ unsigned int new_cluster_id)
+{
+ return -ENOTSUPP;
+}
+
+/*
+ * Register here to be notified about runtime enabling/disabling of
+ * the switcher.
+ *
+ * The notifier chain is called with the switcher activation lock held:
+ * the switcher will not be enabled or disabled during callbacks.
+ * Callbacks must not call bL_switcher_{get,put}_enabled().
+ */
+#define BL_NOTIFY_PRE_ENABLE 0
+#define BL_NOTIFY_POST_ENABLE 1
+#define BL_NOTIFY_PRE_DISABLE 2
+#define BL_NOTIFY_POST_DISABLE 3
+
+static inline int bL_switcher_register_notifier(struct notifier_block *nb)
+{
+ return 0;
+}
+
+static inline int bL_switcher_unregister_notifier(struct notifier_block *nb)
+{
+ return 0;
+}
+
+static inline bool bL_switcher_get_enabled(void) { return false; }
+static inline void bL_switcher_put_enabled(void) { }
+static inline int bL_switcher_trace_trigger(void) { return 0; }
+static inline int bL_switcher_get_logical_index(u32 mpidr) { return -EUNATCH; }
+
+#endif
diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h
index fda2704b3f9f..253e33bc94fb 100644
--- a/arch/arm64/include/asm/compat.h
+++ b/arch/arm64/include/asm/compat.h
@@ -228,7 +228,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr)
return (u32)(unsigned long)uptr;
}
-#define compat_user_stack_pointer() (current_pt_regs()->compat_sp)
+#define compat_user_stack_pointer() (user_stack_pointer(current_pt_regs()))
static inline void __user *arch_compat_alloc_user_space(long len)
{
@@ -305,11 +305,6 @@ static inline int is_compat_thread(struct thread_info *thread)
#else /* !CONFIG_COMPAT */
-static inline int is_compat_task(void)
-{
- return 0;
-}
-
static inline int is_compat_thread(struct thread_info *thread)
{
return 0;
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
new file mode 100644
index 000000000000..cd4ac0516488
--- /dev/null
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_CPUFEATURE_H
+#define __ASM_CPUFEATURE_H
+
+#include <asm/hwcap.h>
+
+/*
+ * In the arm64 world (as in the ARM world), elf_hwcap is used both internally
+ * in the kernel and for user space to keep track of which optional features
+ * are supported by the current system. So let's map feature 'x' to HWCAP_x.
+ * Note that HWCAP_x constants are bit fields so we need to take the log.
+ */
+
+#define MAX_CPU_FEATURES (8 * sizeof(elf_hwcap))
+#define cpu_feature(x) ilog2(HWCAP_ ## x)
+
+static inline bool cpu_have_feature(unsigned int num)
+{
+ return elf_hwcap & (1UL << num);
+}
+
+#endif
diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
index 62314791570c..6e9b5b36921c 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -26,6 +26,53 @@
#define DBG_ESR_EVT_HWWP 0x2
#define DBG_ESR_EVT_BRK 0x6
+/*
+ * Break point instruction encoding
+ */
+#define BREAK_INSTR_SIZE 4
+
+/*
+ * ESR values expected for dynamic and compile time BRK instruction
+ */
+#define DBG_ESR_VAL_BRK(x) (0xf2000000 | ((x) & 0xfffff))
+
+/*
+ * #imm16 values used for BRK instruction generation
+ * Allowed values for kgbd are 0x400 - 0x7ff
+ * 0x400: for dynamic BRK instruction
+ * 0x401: for compile time BRK instruction
+ */
+#define KGDB_DYN_DGB_BRK_IMM 0x400
+#define KDBG_COMPILED_DBG_BRK_IMM 0x401
+
+/*
+ * BRK instruction encoding
+ * The #imm16 value should be placed at bits[20:5] within BRK ins
+ */
+#define AARCH64_BREAK_MON 0xd4200000
+
+/*
+ * Extract byte from BRK instruction
+ */
+#define KGDB_DYN_DGB_BRK_INS_BYTE(x) \
+ ((((AARCH64_BREAK_MON) & 0xffe0001f) >> (x * 8)) & 0xff)
+
+/*
+ * Extract byte from BRK #imm16
+ */
+#define KGBD_DYN_DGB_BRK_IMM_BYTE(x) \
+ (((((KGDB_DYN_DGB_BRK_IMM) & 0xffff) << 5) >> (x * 8)) & 0xff)
+
+#define KGDB_DYN_DGB_BRK_BYTE(x) \
+ (KGDB_DYN_DGB_BRK_INS_BYTE(x) | KGBD_DYN_DGB_BRK_IMM_BYTE(x))
+
+#define KGDB_DYN_BRK_INS_BYTE0 KGDB_DYN_DGB_BRK_BYTE(0)
+#define KGDB_DYN_BRK_INS_BYTE1 KGDB_DYN_DGB_BRK_BYTE(1)
+#define KGDB_DYN_BRK_INS_BYTE2 KGDB_DYN_DGB_BRK_BYTE(2)
+#define KGDB_DYN_BRK_INS_BYTE3 KGDB_DYN_DGB_BRK_BYTE(3)
+
+#define CACHE_FLUSH_IS_SAFE 1
+
enum debug_el {
DBG_ACTIVE_EL0 = 0,
DBG_ACTIVE_EL1,
@@ -43,23 +90,6 @@ enum debug_el {
#ifndef __ASSEMBLY__
struct task_struct;
-#define local_dbg_save(flags) \
- do { \
- typecheck(unsigned long, flags); \
- asm volatile( \
- "mrs %0, daif // local_dbg_save\n" \
- "msr daifset, #8" \
- : "=r" (flags) : : "memory"); \
- } while (0)
-
-#define local_dbg_restore(flags) \
- do { \
- typecheck(unsigned long, flags); \
- asm volatile( \
- "msr daif, %0 // local_dbg_restore\n" \
- : : "r" (flags) : "memory"); \
- } while (0)
-
#define DBG_ARCH_ID_RESERVED 0 /* In case of ptrace ABI updates. */
#define DBG_HOOK_HANDLED 0
diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h
index 3a4572ec3273..dc82e52acdb3 100644
--- a/arch/arm64/include/asm/dma-mapping.h
+++ b/arch/arm64/include/asm/dma-mapping.h
@@ -26,8 +26,6 @@
#include <xen/xen.h>
#include <asm/xen/hypervisor.h>
-#define ARCH_HAS_DMA_GET_REQUIRED_MASK
-
#define DMA_ERROR_CODE (~(dma_addr_t)0)
extern struct dma_map_ops *dma_ops;
extern struct dma_map_ops coherent_swiotlb_dma_ops;
diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
new file mode 100644
index 000000000000..c5534facf941
--- /dev/null
+++ b/arch/arm64/include/asm/ftrace.h
@@ -0,0 +1,59 @@
+/*
+ * arch/arm64/include/asm/ftrace.h
+ *
+ * Copyright (C) 2013 Linaro Limited
+ * Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_FTRACE_H
+#define __ASM_FTRACE_H
+
+#include <asm/insn.h>
+
+#define MCOUNT_ADDR ((unsigned long)_mcount)
+#define MCOUNT_INSN_SIZE AARCH64_INSN_SIZE
+
+#ifndef __ASSEMBLY__
+#include <linux/compat.h>
+
+extern void _mcount(unsigned long);
+extern void *return_address(unsigned int);
+
+struct dyn_arch_ftrace {
+ /* No extra data needed for arm64 */
+};
+
+extern unsigned long ftrace_graph_call;
+
+static inline unsigned long ftrace_call_adjust(unsigned long addr)
+{
+ /*
+ * addr is the address of the mcount call instruction.
+ * recordmcount does the necessary offset calculation.
+ */
+ return addr;
+}
+
+#define ftrace_return_address(n) return_address(n)
+
+/*
+ * Because AArch32 mode does not share the same syscall table with AArch64,
+ * tracing compat syscalls may result in reporting bogus syscalls or even
+ * hang-up, so just do not trace them.
+ * See kernel/trace/trace_syscalls.c
+ *
+ * x86 code says:
+ * If the user realy wants these, then they should use the
+ * raw syscall tracepoints with filtering.
+ */
+#define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS
+static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs)
+{
+ return is_compat_task();
+}
+#endif /* ifndef __ASSEMBLY__ */
+
+#endif /* __ASM_FTRACE_H */
diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index 6cddbb0c9f54..024c46183c3c 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -32,6 +32,12 @@
#define COMPAT_HWCAP_IDIV (COMPAT_HWCAP_IDIVA|COMPAT_HWCAP_IDIVT)
#define COMPAT_HWCAP_EVTSTRM (1 << 21)
+#define COMPAT_HWCAP2_AES (1 << 0)
+#define COMPAT_HWCAP2_PMULL (1 << 1)
+#define COMPAT_HWCAP2_SHA1 (1 << 2)
+#define COMPAT_HWCAP2_SHA2 (1 << 3)
+#define COMPAT_HWCAP2_CRC32 (1 << 4)
+
#ifndef __ASSEMBLY__
/*
* This yields a mask that user programs can use to figure out what
@@ -41,7 +47,8 @@
#ifdef CONFIG_COMPAT
#define COMPAT_ELF_HWCAP (compat_elf_hwcap)
-extern unsigned int compat_elf_hwcap;
+#define COMPAT_ELF_HWCAP2 (compat_elf_hwcap2)
+extern unsigned int compat_elf_hwcap, compat_elf_hwcap2;
#endif
extern unsigned long elf_hwcap;
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index c44ad39ed310..62e7b8bcd2dc 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -16,11 +16,14 @@
*/
#ifndef __ASM_INSN_H
#define __ASM_INSN_H
+
#include <linux/types.h>
/* A64 instructions are always 32 bits. */
#define AARCH64_INSN_SIZE 4
+#ifndef __ASSEMBLY__
+
/*
* ARM Architecture Reference Manual for ARMv8 Profile-A, Issue A.a
* Section C3.1 "A64 instruction index by encoding":
@@ -105,4 +108,6 @@ int aarch64_insn_patch_text_nosync(void *addr, u32 insn);
int aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt);
int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt);
+#endif /* __ASSEMBLY__ */
+
#endif /* __ASM_INSN_H */
diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
index b2fcfbc51ecc..11cc941bd107 100644
--- a/arch/arm64/include/asm/irqflags.h
+++ b/arch/arm64/include/asm/irqflags.h
@@ -90,5 +90,28 @@ static inline int arch_irqs_disabled_flags(unsigned long flags)
return flags & PSR_I_BIT;
}
+/*
+ * save and restore debug state
+ */
+#define local_dbg_save(flags) \
+ do { \
+ typecheck(unsigned long, flags); \
+ asm volatile( \
+ "mrs %0, daif // local_dbg_save\n" \
+ "msr daifset, #8" \
+ : "=r" (flags) : : "memory"); \
+ } while (0)
+
+#define local_dbg_restore(flags) \
+ do { \
+ typecheck(unsigned long, flags); \
+ asm volatile( \
+ "msr daif, %0 // local_dbg_restore\n" \
+ : : "r" (flags) : "memory"); \
+ } while (0)
+
+#define local_dbg_enable() asm("msr daifclr, #8" : : : "memory")
+#define local_dbg_disable() asm("msr daifset, #8" : : : "memory")
+
#endif
#endif
diff --git a/arch/arm64/include/asm/kgdb.h b/arch/arm64/include/asm/kgdb.h
new file mode 100644
index 000000000000..3c8aafc1082f
--- /dev/null
+++ b/arch/arm64/include/asm/kgdb.h
@@ -0,0 +1,84 @@
+/*
+ * AArch64 KGDB support
+ *
+ * Based on arch/arm/include/kgdb.h
+ *
+ * Copyright (C) 2013 Cavium Inc.
+ * Author: Vijaya Kumar K <vijaya.kumar@caviumnetworks.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM_KGDB_H
+#define __ARM_KGDB_H
+
+#include <linux/ptrace.h>
+#include <asm/debug-monitors.h>
+
+#ifndef __ASSEMBLY__
+
+static inline void arch_kgdb_breakpoint(void)
+{
+ asm ("brk %0" : : "I" (KDBG_COMPILED_DBG_BRK_IMM));
+}
+
+extern void kgdb_handle_bus_error(void);
+extern int kgdb_fault_expected;
+
+#endif /* !__ASSEMBLY__ */
+
+/*
+ * gdb is expecting the following registers layout.
+ *
+ * General purpose regs:
+ * r0-r30: 64 bit
+ * sp,pc : 64 bit
+ * pstate : 64 bit
+ * Total: 34
+ * FPU regs:
+ * f0-f31: 128 bit
+ * Total: 32
+ * Extra regs
+ * fpsr & fpcr: 32 bit
+ * Total: 2
+ *
+ */
+
+#define _GP_REGS 34
+#define _FP_REGS 32
+#define _EXTRA_REGS 2
+/*
+ * general purpose registers size in bytes.
+ * pstate is only 4 bytes. subtract 4 bytes
+ */
+#define GP_REG_BYTES (_GP_REGS * 8)
+#define DBG_MAX_REG_NUM (_GP_REGS + _FP_REGS + _EXTRA_REGS)
+
+/*
+ * Size of I/O buffer for gdb packet.
+ * considering to hold all register contents, size is set
+ */
+
+#define BUFMAX 2048
+
+/*
+ * Number of bytes required for gdb_regs buffer.
+ * _GP_REGS: 8 bytes, _FP_REGS: 16 bytes and _EXTRA_REGS: 4 bytes each
+ * GDB fails to connect for size beyond this with error
+ * "'g' packet reply is too long"
+ */
+
+#define NUMREGBYTES ((_GP_REGS * 8) + (_FP_REGS * 16) + \
+ (_EXTRA_REGS * 4))
+
+#endif /* __ASM_KGDB_H */
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 993bce527b85..902eb708804a 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -56,6 +56,8 @@
#define TASK_SIZE_32 UL(0x100000000)
#define TASK_SIZE (test_thread_flag(TIF_32BIT) ? \
TASK_SIZE_32 : TASK_SIZE_64)
+#define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_32BIT) ? \
+ TASK_SIZE_32 : TASK_SIZE_64)
#else
#define TASK_SIZE TASK_SIZE_64
#endif /* CONFIG_COMPAT */
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index 0e7fa4963735..a429b5940be2 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -68,6 +68,7 @@
/* Architecturally defined mapping between AArch32 and AArch64 registers */
#define compat_usr(x) regs[(x)]
+#define compat_fp regs[11]
#define compat_sp regs[13]
#define compat_lr regs[14]
#define compat_sp_hyp regs[15]
@@ -132,7 +133,12 @@ struct pt_regs {
(!((regs)->pstate & PSR_F_BIT))
#define user_stack_pointer(regs) \
- ((regs)->sp)
+ (!compat_user_mode(regs)) ? ((regs)->sp) : ((regs)->compat_sp)
+
+static inline unsigned long regs_return_value(struct pt_regs *regs)
+{
+ return regs->regs[0];
+}
/*
* Are the current registers suitable for user mode? (used to maintain
@@ -164,7 +170,7 @@ static inline int valid_user_regs(struct user_pt_regs *regs)
return 0;
}
-#define instruction_pointer(regs) (regs)->pc
+#define instruction_pointer(regs) ((unsigned long)(regs)->pc)
#ifdef CONFIG_SMP
extern unsigned long profile_pc(struct pt_regs *regs);
diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h
index 70ba9d4ee978..383771eb0b87 100644
--- a/arch/arm64/include/asm/syscall.h
+++ b/arch/arm64/include/asm/syscall.h
@@ -18,6 +18,7 @@
#include <linux/err.h>
+extern const void *sys_call_table[];
static inline int syscall_get_nr(struct task_struct *task,
struct pt_regs *regs)
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 720e70b66ffd..0a8b2a97a32e 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -91,6 +91,9 @@ static inline struct thread_info *current_thread_info(void)
/*
* thread information flags:
* TIF_SYSCALL_TRACE - syscall trace active
+ * TIF_SYSCALL_TRACEPOINT - syscall tracepoint for ftrace
+ * TIF_SYSCALL_AUDIT - syscall auditing
+ * TIF_SECOMP - syscall secure computing
* TIF_SIGPENDING - signal pending
* TIF_NEED_RESCHED - rescheduling necessary
* TIF_NOTIFY_RESUME - callback before returning to user
@@ -101,6 +104,9 @@ static inline struct thread_info *current_thread_info(void)
#define TIF_NEED_RESCHED 1
#define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
#define TIF_SYSCALL_TRACE 8
+#define TIF_SYSCALL_AUDIT 9
+#define TIF_SYSCALL_TRACEPOINT 10
+#define TIF_SECCOMP 11
#define TIF_POLLING_NRFLAG 16
#define TIF_MEMDIE 18 /* is terminating due to OOM killer */
#define TIF_FREEZE 19
@@ -112,10 +118,17 @@ static inline struct thread_info *current_thread_info(void)
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
+#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
+#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
+#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
+#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_32BIT (1 << TIF_32BIT)
#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
_TIF_NOTIFY_RESUME)
+#define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
+ _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP)
+
#endif /* __KERNEL__ */
#endif /* __ASM_THREAD_INFO_H */
diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
new file mode 100644
index 000000000000..e0171b393a14
--- /dev/null
+++ b/arch/arm64/include/asm/topology.h
@@ -0,0 +1,70 @@
+#ifndef __ASM_TOPOLOGY_H
+#define __ASM_TOPOLOGY_H
+
+#ifdef CONFIG_SMP
+
+#include <linux/cpumask.h>
+
+struct cpu_topology {
+ int thread_id;
+ int core_id;
+ int cluster_id;
+ cpumask_t thread_sibling;
+ cpumask_t core_sibling;
+};
+
+extern struct cpu_topology cpu_topology[NR_CPUS];
+
+#define topology_physical_package_id(cpu) (cpu_topology[cpu].cluster_id)
+#define topology_core_id(cpu) (cpu_topology[cpu].core_id)
+#define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_sibling)
+#define topology_thread_cpumask(cpu) (&cpu_topology[cpu].thread_sibling)
+
+#define mc_capable() (cpu_topology[0].cluster_id != -1)
+#define smt_capable() (cpu_topology[0].thread_id != -1)
+
+void init_cpu_topology(void);
+void store_cpu_topology(unsigned int cpuid);
+const struct cpumask *cpu_coregroup_mask(int cpu);
+
+#ifdef CONFIG_DISABLE_CPU_SCHED_DOMAIN_BALANCE
+/* Common values for CPUs */
+#ifndef SD_CPU_INIT
+#define SD_CPU_INIT (struct sched_domain) { \
+ .min_interval = 1, \
+ .max_interval = 4, \
+ .busy_factor = 64, \
+ .imbalance_pct = 125, \
+ .cache_nice_tries = 1, \
+ .busy_idx = 2, \
+ .idle_idx = 1, \
+ .newidle_idx = 0, \
+ .wake_idx = 0, \
+ .forkexec_idx = 0, \
+ \
+ .flags = 0*SD_LOAD_BALANCE \
+ | 1*SD_BALANCE_NEWIDLE \
+ | 1*SD_BALANCE_EXEC \
+ | 1*SD_BALANCE_FORK \
+ | 0*SD_BALANCE_WAKE \
+ | 1*SD_WAKE_AFFINE \
+ | 0*SD_SHARE_CPUPOWER \
+ | 0*SD_SHARE_PKG_RESOURCES \
+ | 0*SD_SERIALIZE \
+ , \
+ .last_balance = jiffies, \
+ .balance_interval = 1, \
+}
+#endif
+#endif /* CONFIG_DISABLE_CPU_SCHED_DOMAIN_BALANCE */
+
+#else
+
+static inline void init_cpu_topology(void) { }
+static inline void store_cpu_topology(unsigned int cpuid) { }
+
+#endif
+
+#include <asm-generic/topology.h>
+
+#endif /* _ASM_ARM_TOPOLOGY_H */
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 82ce217e94cf..c335479c2638 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -28,3 +28,5 @@
#endif
#define __ARCH_WANT_SYS_CLONE
#include <uapi/asm/unistd.h>
+
+#define NR_syscalls (__NR_syscalls)
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index bb8eb8a78e67..faa0e1ce59df 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -404,7 +404,7 @@ __SYSCALL(379, sys_finit_module)
__SYSCALL(380, sys_sched_setattr)
__SYSCALL(381, sys_sched_getattr)
-#define __NR_compat_syscalls 379
+#define __NR_compat_syscalls 382
/*
* Compat syscall numbers used by the AArch64 kernel.
diff --git a/arch/arm64/include/uapi/asm/Kbuild b/arch/arm64/include/uapi/asm/Kbuild
index e4b78bdca19e..942376d37d22 100644
--- a/arch/arm64/include/uapi/asm/Kbuild
+++ b/arch/arm64/include/uapi/asm/Kbuild
@@ -9,6 +9,7 @@ header-y += byteorder.h
header-y += fcntl.h
header-y += hwcap.h
header-y += kvm_para.h
+header-y += perf_regs.h
header-y += param.h
header-y += ptrace.h
header-y += setup.h
diff --git a/arch/arm64/include/uapi/asm/perf_regs.h b/arch/arm64/include/uapi/asm/perf_regs.h
new file mode 100644
index 000000000000..172b8317ee49
--- /dev/null
+++ b/arch/arm64/include/uapi/asm/perf_regs.h
@@ -0,0 +1,40 @@
+#ifndef _ASM_ARM64_PERF_REGS_H
+#define _ASM_ARM64_PERF_REGS_H
+
+enum perf_event_arm_regs {
+ PERF_REG_ARM64_X0,
+ PERF_REG_ARM64_X1,
+ PERF_REG_ARM64_X2,
+ PERF_REG_ARM64_X3,
+ PERF_REG_ARM64_X4,
+ PERF_REG_ARM64_X5,
+ PERF_REG_ARM64_X6,
+ PERF_REG_ARM64_X7,
+ PERF_REG_ARM64_X8,
+ PERF_REG_ARM64_X9,
+ PERF_REG_ARM64_X10,
+ PERF_REG_ARM64_X11,
+ PERF_REG_ARM64_X12,
+ PERF_REG_ARM64_X13,
+ PERF_REG_ARM64_X14,
+ PERF_REG_ARM64_X15,
+ PERF_REG_ARM64_X16,
+ PERF_REG_ARM64_X17,
+ PERF_REG_ARM64_X18,
+ PERF_REG_ARM64_X19,
+ PERF_REG_ARM64_X20,
+ PERF_REG_ARM64_X21,
+ PERF_REG_ARM64_X22,
+ PERF_REG_ARM64_X23,
+ PERF_REG_ARM64_X24,
+ PERF_REG_ARM64_X25,
+ PERF_REG_ARM64_X26,
+ PERF_REG_ARM64_X27,
+ PERF_REG_ARM64_X28,
+ PERF_REG_ARM64_X29,
+ PERF_REG_ARM64_LR,
+ PERF_REG_ARM64_SP,
+ PERF_REG_ARM64_PC,
+ PERF_REG_ARM64_MAX,
+};
+#endif /* _ASM_ARM64_PERF_REGS_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 2d4554b13410..6ae85d55308b 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -5,21 +5,29 @@
CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET)
AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
+CFLAGS_REMOVE_ftrace.o = -pg
+CFLAGS_REMOVE_insn.o = -pg
+CFLAGS_REMOVE_return_address.o = -pg
+
# Object file lists.
arm64-obj-y := cputable.o debug-monitors.o entry.o irq.o fpsimd.o \
entry-fpsimd.o process.o ptrace.o setup.o signal.o \
sys.o stacktrace.o time.o traps.o io.o vdso.o \
- hyp-stub.o psci.o cpu_ops.o insn.o
+ hyp-stub.o psci.o cpu_ops.o insn.o return_address.o
arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \
sys_compat.o
+arm64-obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o
arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o
+arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o topology.o
arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o
+arm64-obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o
-arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o
+arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
arm64-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
arm64-obj-$(CONFIG_ARM64_CPU_SUSPEND) += sleep.o suspend.o
arm64-obj-$(CONFIG_JUMP_LABEL) += jump_label.o
+arm64-obj-$(CONFIG_KGDB) += kgdb.o
obj-y += $(arm64-obj-y) vdso/
obj-m += $(arm64-obj-m)
diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c
index 338b568cd8ae..7f0512feaa13 100644
--- a/arch/arm64/kernel/arm64ksyms.c
+++ b/arch/arm64/kernel/arm64ksyms.c
@@ -56,3 +56,7 @@ EXPORT_SYMBOL(clear_bit);
EXPORT_SYMBOL(test_and_clear_bit);
EXPORT_SYMBOL(change_bit);
EXPORT_SYMBOL(test_and_change_bit);
+
+#ifdef CONFIG_FUNCTION_TRACER
+EXPORT_SYMBOL(_mcount);
+#endif
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index 636ba8b6240b..bb9a0e684e12 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -138,6 +138,7 @@ static void clear_os_lock(void *unused)
{
asm volatile("msr oslar_el1, %0" : : "r" (0));
isb();
+ local_dbg_enable();
}
static int os_lock_notify(struct notifier_block *self,
@@ -314,9 +315,6 @@ static int brk_handler(unsigned long addr, unsigned int esr,
if (call_break_hook(regs, esr) == DBG_HOOK_HANDLED)
return 0;
- pr_warn("unexpected brk exception at %lx, esr=0x%x\n",
- (long)instruction_pointer(regs), esr);
-
if (!user_mode(regs))
return -EFAULT;
diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
new file mode 100644
index 000000000000..b051871f2965
--- /dev/null
+++ b/arch/arm64/kernel/entry-ftrace.S
@@ -0,0 +1,218 @@
+/*
+ * arch/arm64/kernel/entry-ftrace.S
+ *
+ * Copyright (C) 2013 Linaro Limited
+ * Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/ftrace.h>
+#include <asm/insn.h>
+
+/*
+ * Gcc with -pg will put the following code in the beginning of each function:
+ * mov x0, x30
+ * bl _mcount
+ * [function's body ...]
+ * "bl _mcount" may be replaced to "bl ftrace_caller" or NOP if dynamic
+ * ftrace is enabled.
+ *
+ * Please note that x0 as an argument will not be used here because we can
+ * get lr(x30) of instrumented function at any time by winding up call stack
+ * as long as the kernel is compiled without -fomit-frame-pointer.
+ * (or CONFIG_FRAME_POINTER, this is forced on arm64)
+ *
+ * stack layout after mcount_enter in _mcount():
+ *
+ * current sp/fp => 0:+-----+
+ * in _mcount() | x29 | -> instrumented function's fp
+ * +-----+
+ * | x30 | -> _mcount()'s lr (= instrumented function's pc)
+ * old sp => +16:+-----+
+ * when instrumented | |
+ * function calls | ... |
+ * _mcount() | |
+ * | |
+ * instrumented => +xx:+-----+
+ * function's fp | x29 | -> parent's fp
+ * +-----+
+ * | x30 | -> instrumented function's lr (= parent's pc)
+ * +-----+
+ * | ... |
+ */
+
+ .macro mcount_enter
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+ .endm
+
+ .macro mcount_exit
+ ldp x29, x30, [sp], #16
+ ret
+ .endm
+
+ .macro mcount_adjust_addr rd, rn
+ sub \rd, \rn, #AARCH64_INSN_SIZE
+ .endm
+
+ /* for instrumented function's parent */
+ .macro mcount_get_parent_fp reg
+ ldr \reg, [x29]
+ ldr \reg, [\reg]
+ .endm
+
+ /* for instrumented function */
+ .macro mcount_get_pc0 reg
+ mcount_adjust_addr \reg, x30
+ .endm
+
+ .macro mcount_get_pc reg
+ ldr \reg, [x29, #8]
+ mcount_adjust_addr \reg, \reg
+ .endm
+
+ .macro mcount_get_lr reg
+ ldr \reg, [x29]
+ ldr \reg, [\reg, #8]
+ mcount_adjust_addr \reg, \reg
+ .endm
+
+ .macro mcount_get_lr_addr reg
+ ldr \reg, [x29]
+ add \reg, \reg, #8
+ .endm
+
+#ifndef CONFIG_DYNAMIC_FTRACE
+/*
+ * void _mcount(unsigned long return_address)
+ * @return_address: return address to instrumented function
+ *
+ * This function makes calls, if enabled, to:
+ * - tracer function to probe instrumented function's entry,
+ * - ftrace_graph_caller to set up an exit hook
+ */
+ENTRY(_mcount)
+#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
+ ldr x0, =ftrace_trace_stop
+ ldr x0, [x0] // if ftrace_trace_stop
+ ret // return;
+#endif
+ mcount_enter
+
+ ldr x0, =ftrace_trace_function
+ ldr x2, [x0]
+ adr x0, ftrace_stub
+ cmp x0, x2 // if (ftrace_trace_function
+ b.eq skip_ftrace_call // != ftrace_stub) {
+
+ mcount_get_pc x0 // function's pc
+ mcount_get_lr x1 // function's lr (= parent's pc)
+ blr x2 // (*ftrace_trace_function)(pc, lr);
+
+#ifndef CONFIG_FUNCTION_GRAPH_TRACER
+skip_ftrace_call: // return;
+ mcount_exit // }
+#else
+ mcount_exit // return;
+ // }
+skip_ftrace_call:
+ ldr x1, =ftrace_graph_return
+ ldr x2, [x1] // if ((ftrace_graph_return
+ cmp x0, x2 // != ftrace_stub)
+ b.ne ftrace_graph_caller
+
+ ldr x1, =ftrace_graph_entry // || (ftrace_graph_entry
+ ldr x2, [x1] // != ftrace_graph_entry_stub))
+ ldr x0, =ftrace_graph_entry_stub
+ cmp x0, x2
+ b.ne ftrace_graph_caller // ftrace_graph_caller();
+
+ mcount_exit
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+ENDPROC(_mcount)
+
+#else /* CONFIG_DYNAMIC_FTRACE */
+/*
+ * _mcount() is used to build the kernel with -pg option, but all the branch
+ * instructions to _mcount() are replaced to NOP initially at kernel start up,
+ * and later on, NOP to branch to ftrace_caller() when enabled or branch to
+ * NOP when disabled per-function base.
+ */
+ENTRY(_mcount)
+ ret
+ENDPROC(_mcount)
+
+/*
+ * void ftrace_caller(unsigned long return_address)
+ * @return_address: return address to instrumented function
+ *
+ * This function is a counterpart of _mcount() in 'static' ftrace, and
+ * makes calls to:
+ * - tracer function to probe instrumented function's entry,
+ * - ftrace_graph_caller to set up an exit hook
+ */
+ENTRY(ftrace_caller)
+ mcount_enter
+
+ mcount_get_pc0 x0 // function's pc
+ mcount_get_lr x1 // function's lr
+
+ .global ftrace_call
+ftrace_call: // tracer(pc, lr);
+ nop // This will be replaced with "bl xxx"
+ // where xxx can be any kind of tracer.
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ .global ftrace_graph_call
+ftrace_graph_call: // ftrace_graph_caller();
+ nop // If enabled, this will be replaced
+ // "b ftrace_graph_caller"
+#endif
+
+ mcount_exit
+ENDPROC(ftrace_caller)
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+ENTRY(ftrace_stub)
+ ret
+ENDPROC(ftrace_stub)
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+/*
+ * void ftrace_graph_caller(void)
+ *
+ * Called from _mcount() or ftrace_caller() when function_graph tracer is
+ * selected.
+ * This function w/ prepare_ftrace_return() fakes link register's value on
+ * the call stack in order to intercept instrumented function's return path
+ * and run return_to_handler() later on its exit.
+ */
+ENTRY(ftrace_graph_caller)
+ mcount_get_lr_addr x0 // pointer to function's saved lr
+ mcount_get_pc x1 // function's pc
+ mcount_get_parent_fp x2 // parent's fp
+ bl prepare_ftrace_return // prepare_ftrace_return(&lr, pc, fp)
+
+ mcount_exit
+ENDPROC(ftrace_graph_caller)
+
+/*
+ * void return_to_handler(void)
+ *
+ * Run ftrace_return_to_handler() before going back to parent.
+ * @fp is checked against the value passed by ftrace_graph_caller()
+ * only when CONFIG_FUNCTION_GRAPH_FP_TEST is enabled.
+ */
+ENTRY(return_to_handler)
+ str x0, [sp, #-16]!
+ mov x0, x29 // parent's fp
+ bl ftrace_return_to_handler// addr = ftrace_return_to_hander(fp);
+ mov x30, x0 // restore the original return address
+ ldr x0, [sp], #16
+ ret
+END(return_to_handler)
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 39ac630d83de..6d26ebc260ad 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -275,7 +275,6 @@ el1_sp_pc:
* Stack or PC alignment exception handling
*/
mrs x0, far_el1
- mov x1, x25
mov x2, sp
b do_sp_pc_abort
el1_undef:
@@ -631,8 +630,9 @@ el0_svc_naked: // compat entry point
enable_irq
get_thread_info tsk
- ldr x16, [tsk, #TI_FLAGS] // check for syscall tracing
- tbnz x16, #TIF_SYSCALL_TRACE, __sys_trace // are we tracing syscalls?
+ ldr x16, [tsk, #TI_FLAGS] // check for syscall hooks
+ tst x16, #_TIF_SYSCALL_WORK
+ b.ne __sys_trace
adr lr, ret_fast_syscall // return address
cmp scno, sc_nr // check upper syscall limit
b.hs ni_sys
@@ -648,9 +648,8 @@ ENDPROC(el0_svc)
* switches, and waiting for our parent to respond.
*/
__sys_trace:
- mov x1, sp
- mov w0, #0 // trace entry
- bl syscall_trace
+ mov x0, sp
+ bl syscall_trace_enter
adr lr, __sys_trace_return // return address
uxtw scno, w0 // syscall number (possibly new)
mov x1, sp // pointer to regs
@@ -665,9 +664,8 @@ __sys_trace:
__sys_trace_return:
str x0, [sp] // save returned x0
- mov x1, sp
- mov w0, #1 // trace exit
- bl syscall_trace
+ mov x0, sp
+ bl syscall_trace_exit
b ret_to_user
/*
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
new file mode 100644
index 000000000000..649890a3ac4e
--- /dev/null
+++ b/arch/arm64/kernel/ftrace.c
@@ -0,0 +1,177 @@
+/*
+ * arch/arm64/kernel/ftrace.c
+ *
+ * Copyright (C) 2013 Linaro Limited
+ * Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/ftrace.h>
+#include <linux/swab.h>
+#include <linux/uaccess.h>
+
+#include <asm/cacheflush.h>
+#include <asm/ftrace.h>
+#include <asm/insn.h>
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+/*
+ * Replace a single instruction, which may be a branch or NOP.
+ * If @validate == true, a replaced instruction is checked against 'old'.
+ */
+static int ftrace_modify_code(unsigned long pc, u32 old, u32 new,
+ bool validate)
+{
+ u32 replaced;
+
+ /*
+ * Note:
+ * Due to modules and __init, code can disappear and change,
+ * we need to protect against faulting as well as code changing.
+ * We do this by aarch64_insn_*() which use the probe_kernel_*().
+ *
+ * No lock is held here because all the modifications are run
+ * through stop_machine().
+ */
+ if (validate) {
+ if (aarch64_insn_read((void *)pc, &replaced))
+ return -EFAULT;
+
+ if (replaced != old)
+ return -EINVAL;
+ }
+ if (aarch64_insn_patch_text_nosync((void *)pc, new))
+ return -EPERM;
+
+ return 0;
+}
+
+/*
+ * Replace tracer function in ftrace_caller()
+ */
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+ unsigned long pc;
+ u32 new;
+
+ pc = (unsigned long)&ftrace_call;
+ new = aarch64_insn_gen_branch_imm(pc, (unsigned long)func, true);
+
+ return ftrace_modify_code(pc, 0, new, false);
+}
+
+/*
+ * Turn on the call to ftrace_caller() in instrumented function
+ */
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned long pc = rec->ip;
+ u32 old, new;
+
+ old = aarch64_insn_gen_nop();
+ new = aarch64_insn_gen_branch_imm(pc, addr, true);
+
+ return ftrace_modify_code(pc, old, new, true);
+}
+
+/*
+ * Turn off the call to ftrace_caller() in instrumented function
+ */
+int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
+ unsigned long addr)
+{
+ unsigned long pc = rec->ip;
+ u32 old, new;
+
+ old = aarch64_insn_gen_branch_imm(pc, addr, true);
+ new = aarch64_insn_gen_nop();
+
+ return ftrace_modify_code(pc, old, new, true);
+}
+
+int __init ftrace_dyn_arch_init(void *data)
+{
+ *(unsigned long *)data = 0;
+ return 0;
+}
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+/*
+ * function_graph tracer expects ftrace_return_to_handler() to be called
+ * on the way back to parent. For this purpose, this function is called
+ * in _mcount() or ftrace_caller() to replace return address (*parent) on
+ * the call stack to return_to_handler.
+ *
+ * Note that @frame_pointer is used only for sanity check later.
+ */
+void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
+ unsigned long frame_pointer)
+{
+ unsigned long return_hooker = (unsigned long)&return_to_handler;
+ unsigned long old;
+ struct ftrace_graph_ent trace;
+ int err;
+
+ if (unlikely(atomic_read(&current->tracing_graph_pause)))
+ return;
+
+ /*
+ * Note:
+ * No protection against faulting at *parent, which may be seen
+ * on other archs. It's unlikely on AArch64.
+ */
+ old = *parent;
+ *parent = return_hooker;
+
+ trace.func = self_addr;
+ trace.depth = current->curr_ret_stack + 1;
+
+ /* Only trace if the calling function expects to */
+ if (!ftrace_graph_entry(&trace)) {
+ *parent = old;
+ return;
+ }
+
+ err = ftrace_push_return_trace(old, self_addr, &trace.depth,
+ frame_pointer);
+ if (err == -EBUSY) {
+ *parent = old;
+ return;
+ }
+}
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+/*
+ * Turn on/off the call to ftrace_graph_caller() in ftrace_caller()
+ * depending on @enable.
+ */
+static int ftrace_modify_graph_caller(bool enable)
+{
+ unsigned long pc = (unsigned long)&ftrace_graph_call;
+ u32 branch, nop;
+
+ branch = aarch64_insn_gen_branch_imm(pc,
+ (unsigned long)ftrace_graph_caller, false);
+ nop = aarch64_insn_gen_nop();
+
+ if (enable)
+ return ftrace_modify_code(pc, nop, branch, true);
+ else
+ return ftrace_modify_code(pc, branch, nop, true);
+}
+
+int ftrace_enable_ftrace_graph_caller(void)
+{
+ return ftrace_modify_graph_caller(true);
+}
+
+int ftrace_disable_ftrace_graph_caller(void)
+{
+ return ftrace_modify_graph_caller(false);
+}
+#endif /* CONFIG_DYNAMIC_FTRACE */
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index f17f581116fc..a45e2db6e502 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -20,6 +20,7 @@
#define pr_fmt(fmt) "hw-breakpoint: " fmt
+#include <linux/compat.h>
#include <linux/cpu_pm.h>
#include <linux/errno.h>
#include <linux/hw_breakpoint.h>
@@ -27,7 +28,6 @@
#include <linux/ptrace.h>
#include <linux/smp.h>
-#include <asm/compat.h>
#include <asm/current.h>
#include <asm/debug-monitors.h>
#include <asm/hw_breakpoint.h>
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index 473e5dbf8f39..0f08dfd69ebc 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -97,11 +97,15 @@ static bool migrate_one_irq(struct irq_desc *desc)
if (irqd_is_per_cpu(d) || !cpumask_test_cpu(smp_processor_id(), affinity))
return false;
- if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
- affinity = cpu_online_mask;
+ if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids)
ret = true;
- }
+ /*
+ * when using forced irq_set_affinity we must ensure that the cpu
+ * being offlined is not present in the affinity mask, it may be
+ * selected as the target CPU otherwise
+ */
+ affinity = cpu_online_mask;
c = irq_data_get_irq_chip(d);
if (!c->irq_set_affinity)
pr_debug("IRQ%u: unable to set affinity\n", d->irq);
diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
new file mode 100644
index 000000000000..75c9cf1aafee
--- /dev/null
+++ b/arch/arm64/kernel/kgdb.c
@@ -0,0 +1,336 @@
+/*
+ * AArch64 KGDB support
+ *
+ * Based on arch/arm/kernel/kgdb.c
+ *
+ * Copyright (C) 2013 Cavium Inc.
+ * Author: Vijaya Kumar K <vijaya.kumar@caviumnetworks.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/irq.h>
+#include <linux/kdebug.h>
+#include <linux/kgdb.h>
+#include <asm/traps.h>
+
+struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
+ { "x0", 8, offsetof(struct pt_regs, regs[0])},
+ { "x1", 8, offsetof(struct pt_regs, regs[1])},
+ { "x2", 8, offsetof(struct pt_regs, regs[2])},
+ { "x3", 8, offsetof(struct pt_regs, regs[3])},
+ { "x4", 8, offsetof(struct pt_regs, regs[4])},
+ { "x5", 8, offsetof(struct pt_regs, regs[5])},
+ { "x6", 8, offsetof(struct pt_regs, regs[6])},
+ { "x7", 8, offsetof(struct pt_regs, regs[7])},
+ { "x8", 8, offsetof(struct pt_regs, regs[8])},
+ { "x9", 8, offsetof(struct pt_regs, regs[9])},
+ { "x10", 8, offsetof(struct pt_regs, regs[10])},
+ { "x11", 8, offsetof(struct pt_regs, regs[11])},
+ { "x12", 8, offsetof(struct pt_regs, regs[12])},
+ { "x13", 8, offsetof(struct pt_regs, regs[13])},
+ { "x14", 8, offsetof(struct pt_regs, regs[14])},
+ { "x15", 8, offsetof(struct pt_regs, regs[15])},
+ { "x16", 8, offsetof(struct pt_regs, regs[16])},
+ { "x17", 8, offsetof(struct pt_regs, regs[17])},
+ { "x18", 8, offsetof(struct pt_regs, regs[18])},
+ { "x19", 8, offsetof(struct pt_regs, regs[19])},
+ { "x20", 8, offsetof(struct pt_regs, regs[20])},
+ { "x21", 8, offsetof(struct pt_regs, regs[21])},
+ { "x22", 8, offsetof(struct pt_regs, regs[22])},
+ { "x23", 8, offsetof(struct pt_regs, regs[23])},
+ { "x24", 8, offsetof(struct pt_regs, regs[24])},
+ { "x25", 8, offsetof(struct pt_regs, regs[25])},
+ { "x26", 8, offsetof(struct pt_regs, regs[26])},
+ { "x27", 8, offsetof(struct pt_regs, regs[27])},
+ { "x28", 8, offsetof(struct pt_regs, regs[28])},
+ { "x29", 8, offsetof(struct pt_regs, regs[29])},
+ { "x30", 8, offsetof(struct pt_regs, regs[30])},
+ { "sp", 8, offsetof(struct pt_regs, sp)},
+ { "pc", 8, offsetof(struct pt_regs, pc)},
+ { "pstate", 8, offsetof(struct pt_regs, pstate)},
+ { "v0", 16, -1 },
+ { "v1", 16, -1 },
+ { "v2", 16, -1 },
+ { "v3", 16, -1 },
+ { "v4", 16, -1 },
+ { "v5", 16, -1 },
+ { "v6", 16, -1 },
+ { "v7", 16, -1 },
+ { "v8", 16, -1 },
+ { "v9", 16, -1 },
+ { "v10", 16, -1 },
+ { "v11", 16, -1 },
+ { "v12", 16, -1 },
+ { "v13", 16, -1 },
+ { "v14", 16, -1 },
+ { "v15", 16, -1 },
+ { "v16", 16, -1 },
+ { "v17", 16, -1 },
+ { "v18", 16, -1 },
+ { "v19", 16, -1 },
+ { "v20", 16, -1 },
+ { "v21", 16, -1 },
+ { "v22", 16, -1 },
+ { "v23", 16, -1 },
+ { "v24", 16, -1 },
+ { "v25", 16, -1 },
+ { "v26", 16, -1 },
+ { "v27", 16, -1 },
+ { "v28", 16, -1 },
+ { "v29", 16, -1 },
+ { "v30", 16, -1 },
+ { "v31", 16, -1 },
+ { "fpsr", 4, -1 },
+ { "fpcr", 4, -1 },
+};
+
+char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
+{
+ if (regno >= DBG_MAX_REG_NUM || regno < 0)
+ return NULL;
+
+ if (dbg_reg_def[regno].offset != -1)
+ memcpy(mem, (void *)regs + dbg_reg_def[regno].offset,
+ dbg_reg_def[regno].size);
+ else
+ memset(mem, 0, dbg_reg_def[regno].size);
+ return dbg_reg_def[regno].name;
+}
+
+int dbg_set_reg(int regno, void *mem, struct pt_regs *regs)
+{
+ if (regno >= DBG_MAX_REG_NUM || regno < 0)
+ return -EINVAL;
+
+ if (dbg_reg_def[regno].offset != -1)
+ memcpy((void *)regs + dbg_reg_def[regno].offset, mem,
+ dbg_reg_def[regno].size);
+ return 0;
+}
+
+void
+sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task)
+{
+ struct pt_regs *thread_regs;
+
+ /* Initialize to zero */
+ memset((char *)gdb_regs, 0, NUMREGBYTES);
+ thread_regs = task_pt_regs(task);
+ memcpy((void *)gdb_regs, (void *)thread_regs->regs, GP_REG_BYTES);
+}
+
+void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc)
+{
+ regs->pc = pc;
+}
+
+static int compiled_break;
+
+static void kgdb_arch_update_addr(struct pt_regs *regs,
+ char *remcom_in_buffer)
+{
+ unsigned long addr;
+ char *ptr;
+
+ ptr = &remcom_in_buffer[1];
+ if (kgdb_hex2long(&ptr, &addr))
+ kgdb_arch_set_pc(regs, addr);
+ else if (compiled_break == 1)
+ kgdb_arch_set_pc(regs, regs->pc + 4);
+
+ compiled_break = 0;
+}
+
+int kgdb_arch_handle_exception(int exception_vector, int signo,
+ int err_code, char *remcom_in_buffer,
+ char *remcom_out_buffer,
+ struct pt_regs *linux_regs)
+{
+ int err;
+
+ switch (remcom_in_buffer[0]) {
+ case 'D':
+ case 'k':
+ /*
+ * Packet D (Detach), k (kill). No special handling
+ * is required here. Handle same as c packet.
+ */
+ case 'c':
+ /*
+ * Packet c (Continue) to continue executing.
+ * Set pc to required address.
+ * Try to read optional parameter and set pc.
+ * If this was a compiled breakpoint, we need to move
+ * to the next instruction else we will just breakpoint
+ * over and over again.
+ */
+ kgdb_arch_update_addr(linux_regs, remcom_in_buffer);
+ atomic_set(&kgdb_cpu_doing_single_step, -1);
+ kgdb_single_step = 0;
+
+ /*
+ * Received continue command, disable single step
+ */
+ if (kernel_active_single_step())
+ kernel_disable_single_step();
+
+ err = 0;
+ break;
+ case 's':
+ /*
+ * Update step address value with address passed
+ * with step packet.
+ * On debug exception return PC is copied to ELR
+ * So just update PC.
+ * If no step address is passed, resume from the address
+ * pointed by PC. Do not update PC
+ */
+ kgdb_arch_update_addr(linux_regs, remcom_in_buffer);
+ atomic_set(&kgdb_cpu_doing_single_step, raw_smp_processor_id());
+ kgdb_single_step = 1;
+
+ /*
+ * Enable single step handling
+ */
+ if (!kernel_active_single_step())
+ kernel_enable_single_step(linux_regs);
+ err = 0;
+ break;
+ default:
+ err = -1;
+ }
+ return err;
+}
+
+static int kgdb_brk_fn(struct pt_regs *regs, unsigned int esr)
+{
+ kgdb_handle_exception(1, SIGTRAP, 0, regs);
+ return 0;
+}
+
+static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr)
+{
+ compiled_break = 1;
+ kgdb_handle_exception(1, SIGTRAP, 0, regs);
+
+ return 0;
+}
+
+static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr)
+{
+ kgdb_handle_exception(1, SIGTRAP, 0, regs);
+ return 0;
+}
+
+static struct break_hook kgdb_brkpt_hook = {
+ .esr_mask = 0xffffffff,
+ .esr_val = DBG_ESR_VAL_BRK(KGDB_DYN_DGB_BRK_IMM),
+ .fn = kgdb_brk_fn
+};
+
+static struct break_hook kgdb_compiled_brkpt_hook = {
+ .esr_mask = 0xffffffff,
+ .esr_val = DBG_ESR_VAL_BRK(KDBG_COMPILED_DBG_BRK_IMM),
+ .fn = kgdb_compiled_brk_fn
+};
+
+static struct step_hook kgdb_step_hook = {
+ .fn = kgdb_step_brk_fn
+};
+
+static void kgdb_call_nmi_hook(void *ignored)
+{
+ kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs());
+}
+
+void kgdb_roundup_cpus(unsigned long flags)
+{
+ local_irq_enable();
+ smp_call_function(kgdb_call_nmi_hook, NULL, 0);
+ local_irq_disable();
+}
+
+static int __kgdb_notify(struct die_args *args, unsigned long cmd)
+{
+ struct pt_regs *regs = args->regs;
+
+ if (kgdb_handle_exception(1, args->signr, cmd, regs))
+ return NOTIFY_DONE;
+ return NOTIFY_STOP;
+}
+
+static int
+kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr)
+{
+ unsigned long flags;
+ int ret;
+
+ local_irq_save(flags);
+ ret = __kgdb_notify(ptr, cmd);
+ local_irq_restore(flags);
+
+ return ret;
+}
+
+static struct notifier_block kgdb_notifier = {
+ .notifier_call = kgdb_notify,
+ /*
+ * Want to be lowest priority
+ */
+ .priority = -INT_MAX,
+};
+
+/*
+ * kgdb_arch_init - Perform any architecture specific initalization.
+ * This function will handle the initalization of any architecture
+ * specific callbacks.
+ */
+int kgdb_arch_init(void)
+{
+ int ret = register_die_notifier(&kgdb_notifier);
+
+ if (ret != 0)
+ return ret;
+
+ register_break_hook(&kgdb_brkpt_hook);
+ register_break_hook(&kgdb_compiled_brkpt_hook);
+ register_step_hook(&kgdb_step_hook);
+ return 0;
+}
+
+/*
+ * kgdb_arch_exit - Perform any architecture specific uninitalization.
+ * This function will handle the uninitalization of any architecture
+ * specific callbacks, for dynamic registration and unregistration.
+ */
+void kgdb_arch_exit(void)
+{
+ unregister_break_hook(&kgdb_brkpt_hook);
+ unregister_break_hook(&kgdb_compiled_brkpt_hook);
+ unregister_step_hook(&kgdb_step_hook);
+ unregister_die_notifier(&kgdb_notifier);
+}
+
+/*
+ * ARM instructions are always in LE.
+ * Break instruction is encoded in LE format
+ */
+struct kgdb_arch arch_kgdb_ops = {
+ .gdb_bpt_instr = {
+ KGDB_DYN_BRK_INS_BYTE0,
+ KGDB_DYN_BRK_INS_BYTE1,
+ KGDB_DYN_BRK_INS_BYTE2,
+ KGDB_DYN_BRK_INS_BYTE3,
+ }
+};
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 5b1cd792274a..baf5afb7e6a0 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -1348,8 +1348,8 @@ early_initcall(init_hw_perf_events);
* Callchain handling code.
*/
struct frame_tail {
- struct frame_tail __user *fp;
- unsigned long lr;
+ struct frame_tail __user *fp;
+ unsigned long lr;
} __attribute__((packed));
/*
@@ -1386,22 +1386,84 @@ user_backtrace(struct frame_tail __user *tail,
return buftail.fp;
}
+#ifdef CONFIG_COMPAT
+/*
+ * The registers we're interested in are at the end of the variable
+ * length saved register structure. The fp points at the end of this
+ * structure so the address of this struct is:
+ * (struct compat_frame_tail *)(xxx->fp)-1
+ *
+ * This code has been adapted from the ARM OProfile support.
+ */
+struct compat_frame_tail {
+ compat_uptr_t fp; /* a (struct compat_frame_tail *) in compat mode */
+ u32 sp;
+ u32 lr;
+} __attribute__((packed));
+
+static struct compat_frame_tail __user *
+compat_user_backtrace(struct compat_frame_tail __user *tail,
+ struct perf_callchain_entry *entry)
+{
+ struct compat_frame_tail buftail;
+ unsigned long err;
+
+ /* Also check accessibility of one struct frame_tail beyond */
+ if (!access_ok(VERIFY_READ, tail, sizeof(buftail)))
+ return NULL;
+
+ pagefault_disable();
+ err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail));
+ pagefault_enable();
+
+ if (err)
+ return NULL;
+
+ perf_callchain_store(entry, buftail.lr);
+
+ /*
+ * Frame pointers should strictly progress back up the stack
+ * (towards higher addresses).
+ */
+ if (tail + 1 >= (struct compat_frame_tail __user *)
+ compat_ptr(buftail.fp))
+ return NULL;
+
+ return (struct compat_frame_tail __user *)compat_ptr(buftail.fp) - 1;
+}
+#endif /* CONFIG_COMPAT */
+
void perf_callchain_user(struct perf_callchain_entry *entry,
struct pt_regs *regs)
{
- struct frame_tail __user *tail;
-
if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
/* We don't support guest os callchain now */
return;
}
perf_callchain_store(entry, regs->pc);
- tail = (struct frame_tail __user *)regs->regs[29];
- while (entry->nr < PERF_MAX_STACK_DEPTH &&
- tail && !((unsigned long)tail & 0xf))
- tail = user_backtrace(tail, entry);
+ if (!compat_user_mode(regs)) {
+ /* AARCH64 mode */
+ struct frame_tail __user *tail;
+
+ tail = (struct frame_tail __user *)regs->regs[29];
+
+ while (entry->nr < PERF_MAX_STACK_DEPTH &&
+ tail && !((unsigned long)tail & 0xf))
+ tail = user_backtrace(tail, entry);
+ } else {
+#ifdef CONFIG_COMPAT
+ /* AARCH32 compat mode */
+ struct compat_frame_tail __user *tail;
+
+ tail = (struct compat_frame_tail __user *)regs->compat_fp - 1;
+
+ while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
+ tail && !((unsigned long)tail & 0x3))
+ tail = compat_user_backtrace(tail, entry);
+#endif
+ }
}
/*
@@ -1429,6 +1491,7 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry,
frame.fp = regs->regs[29];
frame.sp = regs->sp;
frame.pc = regs->pc;
+
walk_stackframe(&frame, callchain_trace, entry);
}
diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c
new file mode 100644
index 000000000000..422ebd63b619
--- /dev/null
+++ b/arch/arm64/kernel/perf_regs.c
@@ -0,0 +1,46 @@
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/perf_event.h>
+#include <linux/bug.h>
+
+#include <asm/compat.h>
+#include <asm/perf_regs.h>
+#include <asm/ptrace.h>
+
+u64 perf_reg_value(struct pt_regs *regs, int idx)
+{
+ if (WARN_ON_ONCE((u32)idx >= PERF_REG_ARM64_MAX))
+ return 0;
+
+ /*
+ * Compat (i.e. 32 bit) mode:
+ * - PC has been set in the pt_regs struct in kernel_entry,
+ * - Handle SP and LR here.
+ */
+ if (compat_user_mode(regs)) {
+ if ((u32)idx == PERF_REG_ARM64_SP)
+ return regs->compat_sp;
+ if ((u32)idx == PERF_REG_ARM64_LR)
+ return regs->compat_lr;
+ }
+
+ return regs->regs[idx];
+}
+
+#define REG_RESERVED (~((1ULL << PERF_REG_ARM64_MAX) - 1))
+
+int perf_reg_validate(u64 mask)
+{
+ if (!mask || mask & REG_RESERVED)
+ return -EINVAL;
+
+ return 0;
+}
+
+u64 perf_reg_abi(struct task_struct *task)
+{
+ if (is_compat_thread(task_thread_info(task)))
+ return PERF_SAMPLE_REGS_ABI_32;
+ else
+ return PERF_SAMPLE_REGS_ABI_64;
+}
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 8e78cb238376..0e9ce28145d9 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -20,6 +20,7 @@
#include <stdarg.h>
+#include <linux/compat.h>
#include <linux/export.h>
#include <linux/sched.h>
#include <linux/kernel.h>
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 6a8928bba03c..7c8e809bdbde 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -19,6 +19,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <linux/compat.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/mm.h>
@@ -41,6 +42,9 @@
#include <asm/traps.h>
#include <asm/system_misc.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/syscalls.h>
+
/*
* TODO: does not yet catch signals sent when the child dies.
* in exit.c or in signal.c.
@@ -650,11 +654,16 @@ static int compat_gpr_get(struct task_struct *target,
reg = task_pt_regs(target)->regs[idx];
}
- ret = copy_to_user(ubuf, &reg, sizeof(reg));
- if (ret)
- break;
+ if (kbuf) {
+ memcpy(kbuf, &reg, sizeof(reg));
+ kbuf += sizeof(reg);
+ } else {
+ ret = copy_to_user(ubuf, &reg, sizeof(reg));
+ if (ret)
+ break;
- ubuf += sizeof(reg);
+ ubuf += sizeof(reg);
+ }
}
return ret;
@@ -684,11 +693,16 @@ static int compat_gpr_set(struct task_struct *target,
unsigned int idx = start + i;
compat_ulong_t reg;
- ret = copy_from_user(&reg, ubuf, sizeof(reg));
- if (ret)
- return ret;
+ if (kbuf) {
+ memcpy(&reg, kbuf, sizeof(reg));
+ kbuf += sizeof(reg);
+ } else {
+ ret = copy_from_user(&reg, ubuf, sizeof(reg));
+ if (ret)
+ return ret;
- ubuf += sizeof(reg);
+ ubuf += sizeof(reg);
+ }
switch (idx) {
case 15:
@@ -821,6 +835,7 @@ static int compat_ptrace_write_user(struct task_struct *tsk, compat_ulong_t off,
compat_ulong_t val)
{
int ret;
+ mm_segment_t old_fs = get_fs();
if (off & 3 || off >= COMPAT_USER_SZ)
return -EIO;
@@ -828,10 +843,13 @@ static int compat_ptrace_write_user(struct task_struct *tsk, compat_ulong_t off,
if (off >= sizeof(compat_elf_gregset_t))
return 0;
+ set_fs(KERNEL_DS);
ret = copy_regset_from_user(tsk, &user_aarch32_view,
REGSET_COMPAT_GPR, off,
sizeof(compat_ulong_t),
&val);
+ set_fs(old_fs);
+
return ret;
}
@@ -1058,35 +1076,49 @@ long arch_ptrace(struct task_struct *child, long request,
return ptrace_request(child, request, addr, data);
}
-asmlinkage int syscall_trace(int dir, struct pt_regs *regs)
+enum ptrace_syscall_dir {
+ PTRACE_SYSCALL_ENTER = 0,
+ PTRACE_SYSCALL_EXIT,
+};
+
+static void tracehook_report_syscall(struct pt_regs *regs,
+ enum ptrace_syscall_dir dir)
{
+ int regno;
unsigned long saved_reg;
- if (!test_thread_flag(TIF_SYSCALL_TRACE))
- return regs->syscallno;
-
- if (is_compat_task()) {
- /* AArch32 uses ip (r12) for scratch */
- saved_reg = regs->regs[12];
- regs->regs[12] = dir;
- } else {
- /*
- * Save X7. X7 is used to denote syscall entry/exit:
- * X7 = 0 -> entry, = 1 -> exit
- */
- saved_reg = regs->regs[7];
- regs->regs[7] = dir;
- }
+ /*
+ * A scratch register (ip(r12) on AArch32, x7 on AArch64) is
+ * used to denote syscall entry/exit:
+ */
+ regno = (is_compat_task() ? 12 : 7);
+ saved_reg = regs->regs[regno];
+ regs->regs[regno] = dir;
- if (dir)
+ if (dir == PTRACE_SYSCALL_EXIT)
tracehook_report_syscall_exit(regs, 0);
else if (tracehook_report_syscall_entry(regs))
regs->syscallno = ~0UL;
- if (is_compat_task())
- regs->regs[12] = saved_reg;
- else
- regs->regs[7] = saved_reg;
+ regs->regs[regno] = saved_reg;
+}
+
+asmlinkage int syscall_trace_enter(struct pt_regs *regs)
+{
+ if (test_thread_flag(TIF_SYSCALL_TRACE))
+ tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER);
+
+ if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
+ trace_sys_enter(regs, regs->syscallno);
return regs->syscallno;
}
+
+asmlinkage void syscall_trace_exit(struct pt_regs *regs)
+{
+ if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
+ trace_sys_exit(regs, regs_return_value(regs));
+
+ if (test_thread_flag(TIF_SYSCALL_TRACE))
+ tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT);
+}
diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c
new file mode 100644
index 000000000000..89102a6ffad5
--- /dev/null
+++ b/arch/arm64/kernel/return_address.c
@@ -0,0 +1,55 @@
+/*
+ * arch/arm64/kernel/return_address.c
+ *
+ * Copyright (C) 2013 Linaro Limited
+ * Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/export.h>
+#include <linux/ftrace.h>
+
+#include <asm/stacktrace.h>
+
+struct return_address_data {
+ unsigned int level;
+ void *addr;
+};
+
+static int save_return_addr(struct stackframe *frame, void *d)
+{
+ struct return_address_data *data = d;
+
+ if (!data->level) {
+ data->addr = (void *)frame->pc;
+ return 1;
+ } else {
+ --data->level;
+ return 0;
+ }
+}
+
+void *return_address(unsigned int level)
+{
+ struct return_address_data data;
+ struct stackframe frame;
+ register unsigned long current_sp asm ("sp");
+
+ data.level = level + 2;
+ data.addr = NULL;
+
+ frame.fp = (unsigned long)__builtin_frame_address(0);
+ frame.sp = current_sp;
+ frame.pc = (unsigned long)return_address; /* dummy */
+
+ walk_stackframe(&frame, save_return_addr, &data);
+
+ if (!data.level)
+ return data.addr;
+ else
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(return_address);
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 0fe5fdf666ec..46b441679d8f 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -70,6 +70,7 @@ EXPORT_SYMBOL_GPL(elf_hwcap);
COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\
COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV)
unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT;
+unsigned int compat_elf_hwcap2 __read_mostly;
#endif
static const char *cpu_name;
@@ -243,6 +244,38 @@ static void __init setup_processor(void)
block = (features >> 16) & 0xf;
if (block && !(block & 0x8))
elf_hwcap |= HWCAP_CRC32;
+
+#ifdef CONFIG_COMPAT
+ /*
+ * ID_ISAR5_EL1 carries similar information as above, but pertaining to
+ * the Aarch32 32-bit execution state.
+ */
+ features = read_cpuid(ID_ISAR5_EL1);
+ block = (features >> 4) & 0xf;
+ if (!(block & 0x8)) {
+ switch (block) {
+ default:
+ case 2:
+ compat_elf_hwcap2 |= COMPAT_HWCAP2_PMULL;
+ case 1:
+ compat_elf_hwcap2 |= COMPAT_HWCAP2_AES;
+ case 0:
+ break;
+ }
+ }
+
+ block = (features >> 8) & 0xf;
+ if (block && !(block & 0x8))
+ compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA1;
+
+ block = (features >> 12) & 0xf;
+ if (block && !(block & 0x8))
+ compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA2;
+
+ block = (features >> 16) & 0xf;
+ if (block && !(block & 0x8))
+ compat_elf_hwcap2 |= COMPAT_HWCAP2_CRC32;
+#endif
}
static void __init setup_machine_fdt(phys_addr_t dt_phys)
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 7ff2eee96c6b..e3cf09626245 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -17,6 +17,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <linux/compat.h>
#include <linux/errno.h>
#include <linux/signal.h>
#include <linux/personality.h>
@@ -25,7 +26,6 @@
#include <linux/tracehook.h>
#include <linux/ratelimit.h>
-#include <asm/compat.h>
#include <asm/debug-monitors.h>
#include <asm/elf.h>
#include <asm/cacheflush.h>
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 7cfb92a4ab66..9660750f34ba 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -114,6 +114,11 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
return ret;
}
+static void smp_store_cpu_info(unsigned int cpuid)
+{
+ store_cpu_topology(cpuid);
+}
+
/*
* This is the secondary CPU boot entry. We're using this CPUs
* idle thread stack, but a set of temporary page tables.
@@ -152,6 +157,8 @@ asmlinkage void secondary_start_kernel(void)
*/
notify_cpu_starting(cpu);
+ smp_store_cpu_info(cpu);
+
/*
* OK, now it's safe to let the boot CPU continue. Wait for
* the CPU migration code to notice that the CPU is online
@@ -390,6 +397,10 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
int err;
unsigned int cpu, ncores = num_possible_cpus();
+ init_cpu_topology();
+
+ smp_store_cpu_info(smp_processor_id());
+
/*
* are we trying to boot more cores than exist?
*/
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 38f0558f0c0a..55437ba1f5a4 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -35,7 +35,7 @@
* ldp x29, x30, [sp]
* add sp, sp, #0x10
*/
-int unwind_frame(struct stackframe *frame)
+int notrace unwind_frame(struct stackframe *frame)
{
unsigned long high, low;
unsigned long fp = frame->fp;
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
new file mode 100644
index 000000000000..a7328373e613
--- /dev/null
+++ b/arch/arm64/kernel/topology.c
@@ -0,0 +1,558 @@
+/*
+ * arch/arm64/kernel/topology.c
+ *
+ * Copyright (C) 2011,2013,2014 Linaro Limited.
+ *
+ * Based on the arm32 version written by Vincent Guittot in turn based on
+ * arch/sh/kernel/topology.c
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/init.h>
+#include <linux/percpu.h>
+#include <linux/node.h>
+#include <linux/nodemask.h>
+#include <linux/of.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#include <asm/topology.h>
+
+/*
+ * cpu power table
+ * This per cpu data structure describes the relative capacity of each core.
+ * On a heteregenous system, cores don't have the same computation capacity
+ * and we reflect that difference in the cpu_power field so the scheduler can
+ * take this difference into account during load balance. A per cpu structure
+ * is preferred because each CPU updates its own cpu_power field during the
+ * load balance except for idle cores. One idle core is selected to run the
+ * rebalance_domains for all idle cores and the cpu_power can be updated
+ * during this sequence.
+ */
+static DEFINE_PER_CPU(unsigned long, cpu_scale);
+
+unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu)
+{
+ return per_cpu(cpu_scale, cpu);
+}
+
+static void set_power_scale(unsigned int cpu, unsigned long power)
+{
+ per_cpu(cpu_scale, cpu) = power;
+}
+
+static int __init get_cpu_for_node(struct device_node *node)
+{
+ struct device_node *cpu_node;
+ int cpu;
+
+ cpu_node = of_parse_phandle(node, "cpu", 0);
+ if (!cpu_node)
+ return -1;
+
+ for_each_possible_cpu(cpu) {
+ if (of_get_cpu_node(cpu, NULL) == cpu_node) {
+ of_node_put(cpu_node);
+ return cpu;
+ }
+ }
+
+ pr_crit("Unable to find CPU node for %s\n", cpu_node->full_name);
+
+ of_node_put(cpu_node);
+ return -1;
+}
+
+static int __init parse_core(struct device_node *core, int cluster_id,
+ int core_id)
+{
+ char name[10];
+ bool leaf = true;
+ int i = 0;
+ int cpu;
+ struct device_node *t;
+
+ do {
+ snprintf(name, sizeof(name), "thread%d", i);
+ t = of_get_child_by_name(core, name);
+ if (t) {
+ leaf = false;
+ cpu = get_cpu_for_node(t);
+ if (cpu >= 0) {
+ cpu_topology[cpu].cluster_id = cluster_id;
+ cpu_topology[cpu].core_id = core_id;
+ cpu_topology[cpu].thread_id = i;
+ } else {
+ pr_err("%s: Can't get CPU for thread\n",
+ t->full_name);
+ of_node_put(t);
+ return -EINVAL;
+ }
+ of_node_put(t);
+ }
+ i++;
+ } while (t);
+
+ cpu = get_cpu_for_node(core);
+ if (cpu >= 0) {
+ if (!leaf) {
+ pr_err("%s: Core has both threads and CPU\n",
+ core->full_name);
+ return -EINVAL;
+ }
+
+ cpu_topology[cpu].cluster_id = cluster_id;
+ cpu_topology[cpu].core_id = core_id;
+ } else if (leaf) {
+ pr_err("%s: Can't get CPU for leaf core\n", core->full_name);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int __init parse_cluster(struct device_node *cluster, int depth)
+{
+ char name[10];
+ bool leaf = true;
+ bool has_cores = false;
+ struct device_node *c;
+ static int cluster_id __initdata;
+ int core_id = 0;
+ int i, ret;
+
+ /*
+ * First check for child clusters; we currently ignore any
+ * information about the nesting of clusters and present the
+ * scheduler with a flat list of them.
+ */
+ i = 0;
+ do {
+ snprintf(name, sizeof(name), "cluster%d", i);
+ c = of_get_child_by_name(cluster, name);
+ if (c) {
+ leaf = false;
+ ret = parse_cluster(c, depth + 1);
+ of_node_put(c);
+ if (ret != 0)
+ return ret;
+ }
+ i++;
+ } while (c);
+
+ /* Now check for cores */
+ i = 0;
+ do {
+ snprintf(name, sizeof(name), "core%d", i);
+ c = of_get_child_by_name(cluster, name);
+ if (c) {
+ has_cores = true;
+
+ if (depth == 0) {
+ pr_err("%s: cpu-map children should be clusters\n",
+ c->full_name);
+ of_node_put(c);
+ return -EINVAL;
+ }
+
+ if (leaf) {
+ ret = parse_core(c, cluster_id, core_id++);
+ } else {
+ pr_err("%s: Non-leaf cluster with core %s\n",
+ cluster->full_name, name);
+ ret = -EINVAL;
+ }
+
+ of_node_put(c);
+ if (ret != 0)
+ return ret;
+ }
+ i++;
+ } while (c);
+
+ if (leaf && !has_cores)
+ pr_warn("%s: empty cluster\n", cluster->full_name);
+
+ if (leaf)
+ cluster_id++;
+
+ return 0;
+}
+
+struct cpu_efficiency {
+ const char *compatible;
+ unsigned long efficiency;
+};
+
+/*
+ * Table of relative efficiency of each processors
+ * The efficiency value must fit in 20bit and the final
+ * cpu_scale value must be in the range
+ * 0 < cpu_scale < 3*SCHED_POWER_SCALE/2
+ * in order to return at most 1 when DIV_ROUND_CLOSEST
+ * is used to compute the capacity of a CPU.
+ * Processors that are not defined in the table,
+ * use the default SCHED_POWER_SCALE value for cpu_scale.
+ */
+static const struct cpu_efficiency table_efficiency[] = {
+ { "arm,cortex-a57", 3891 },
+ { "arm,cortex-a53", 2048 },
+ { NULL, },
+};
+
+static unsigned long *__cpu_capacity;
+#define cpu_capacity(cpu) __cpu_capacity[cpu]
+
+static unsigned long middle_capacity = 1;
+
+/*
+ * Iterate all CPUs' descriptor in DT and compute the efficiency
+ * (as per table_efficiency). Also calculate a middle efficiency
+ * as close as possible to (max{eff_i} - min{eff_i}) / 2
+ * This is later used to scale the cpu_power field such that an
+ * 'average' CPU is of middle power. Also see the comments near
+ * table_efficiency[] and update_cpu_power().
+ */
+static int __init parse_dt_topology(void)
+{
+ struct device_node *cn, *map;
+ int ret = 0;
+ int cpu;
+
+ cn = of_find_node_by_path("/cpus");
+ if (!cn) {
+ pr_err("No CPU information found in DT\n");
+ return 0;
+ }
+
+ /*
+ * When topology is provided cpu-map is essentially a root
+ * cluster with restricted subnodes.
+ */
+ map = of_get_child_by_name(cn, "cpu-map");
+ if (!map)
+ goto out;
+
+ ret = parse_cluster(map, 0);
+ if (ret != 0)
+ goto out_map;
+
+ /*
+ * Check that all cores are in the topology; the SMP code will
+ * only mark cores described in the DT as possible.
+ */
+ for_each_possible_cpu(cpu) {
+ if (cpu_topology[cpu].cluster_id == -1) {
+ pr_err("CPU%d: No topology information specified\n",
+ cpu);
+ ret = -EINVAL;
+ }
+ }
+
+out_map:
+ of_node_put(map);
+out:
+ of_node_put(cn);
+ return ret;
+}
+
+static void __init parse_dt_cpu_power(void)
+{
+ const struct cpu_efficiency *cpu_eff;
+ struct device_node *cn;
+ unsigned long min_capacity = ULONG_MAX;
+ unsigned long max_capacity = 0;
+ unsigned long capacity = 0;
+ int cpu;
+
+ __cpu_capacity = kcalloc(nr_cpu_ids, sizeof(*__cpu_capacity),
+ GFP_NOWAIT);
+
+ for_each_possible_cpu(cpu) {
+ const u32 *rate;
+ int len;
+
+ /* Too early to use cpu->of_node */
+ cn = of_get_cpu_node(cpu, NULL);
+ if (!cn) {
+ pr_err("Missing device node for CPU %d\n", cpu);
+ continue;
+ }
+
+ for (cpu_eff = table_efficiency; cpu_eff->compatible; cpu_eff++)
+ if (of_device_is_compatible(cn, cpu_eff->compatible))
+ break;
+
+ if (cpu_eff->compatible == NULL) {
+ pr_warn("%s: Unknown CPU type\n", cn->full_name);
+ continue;
+ }
+
+ rate = of_get_property(cn, "clock-frequency", &len);
+ if (!rate || len != 4) {
+ pr_err("%s: Missing clock-frequency property\n",
+ cn->full_name);
+ continue;
+ }
+
+ capacity = ((be32_to_cpup(rate)) >> 20) * cpu_eff->efficiency;
+
+ /* Save min capacity of the system */
+ if (capacity < min_capacity)
+ min_capacity = capacity;
+
+ /* Save max capacity of the system */
+ if (capacity > max_capacity)
+ max_capacity = capacity;
+
+ cpu_capacity(cpu) = capacity;
+ }
+
+ /* If min and max capacities are equal we bypass the update of the
+ * cpu_scale because all CPUs have the same capacity. Otherwise, we
+ * compute a middle_capacity factor that will ensure that the capacity
+ * of an 'average' CPU of the system will be as close as possible to
+ * SCHED_POWER_SCALE, which is the default value, but with the
+ * constraint explained near table_efficiency[].
+ */
+ if (min_capacity == max_capacity)
+ return;
+ else if (4 * max_capacity < (3 * (max_capacity + min_capacity)))
+ middle_capacity = (min_capacity + max_capacity)
+ >> (SCHED_POWER_SHIFT+1);
+ else
+ middle_capacity = ((max_capacity / 3)
+ >> (SCHED_POWER_SHIFT-1)) + 1;
+}
+
+/*
+ * Look for a customed capacity of a CPU in the cpu_topo_data table during the
+ * boot. The update of all CPUs is in O(n^2) for heteregeneous system but the
+ * function returns directly for SMP system.
+ */
+static void update_cpu_power(unsigned int cpu)
+{
+ if (!cpu_capacity(cpu))
+ return;
+
+ set_power_scale(cpu, cpu_capacity(cpu) / middle_capacity);
+
+ pr_info("CPU%u: update cpu_power %lu\n",
+ cpu, arch_scale_freq_power(NULL, cpu));
+}
+
+/*
+ * cpu topology table
+ */
+struct cpu_topology cpu_topology[NR_CPUS];
+EXPORT_SYMBOL_GPL(cpu_topology);
+
+const struct cpumask *cpu_coregroup_mask(int cpu)
+{
+ return &cpu_topology[cpu].core_sibling;
+}
+
+static void update_siblings_masks(unsigned int cpuid)
+{
+ struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
+ int cpu;
+
+ if (cpuid_topo->cluster_id == -1) {
+ /*
+ * DT does not contain topology information for this cpu.
+ */
+ pr_debug("CPU%u: No topology information configured\n", cpuid);
+ return;
+ }
+
+ /* update core and thread sibling masks */
+ for_each_possible_cpu(cpu) {
+ cpu_topo = &cpu_topology[cpu];
+
+ if (cpuid_topo->cluster_id != cpu_topo->cluster_id)
+ continue;
+
+ cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
+ if (cpu != cpuid)
+ cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
+
+ if (cpuid_topo->core_id != cpu_topo->core_id)
+ continue;
+
+ cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
+ if (cpu != cpuid)
+ cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
+ }
+}
+
+void store_cpu_topology(unsigned int cpuid)
+{
+ update_siblings_masks(cpuid);
+ update_cpu_power(cpuid);
+}
+
+#ifdef CONFIG_SCHED_HMP
+
+/*
+ * Retrieve logical cpu index corresponding to a given MPIDR[23:0]
+ * - mpidr: MPIDR[23:0] to be used for the look-up
+ *
+ * Returns the cpu logical index or -EINVAL on look-up error
+ */
+static inline int get_logical_index(u32 mpidr)
+{
+ int cpu;
+ for (cpu = 0; cpu < nr_cpu_ids; cpu++)
+ if (cpu_logical_map(cpu) == mpidr)
+ return cpu;
+ return -EINVAL;
+}
+
+static const char * const little_cores[] = {
+ "arm,cortex-a53",
+ NULL,
+};
+
+static bool is_little_cpu(struct device_node *cn)
+{
+ const char * const *lc;
+ for (lc = little_cores; *lc; lc++)
+ if (of_device_is_compatible(cn, *lc))
+ return true;
+ return false;
+}
+
+void __init arch_get_fast_and_slow_cpus(struct cpumask *fast,
+ struct cpumask *slow)
+{
+ struct device_node *cn = NULL;
+ int cpu;
+
+ cpumask_clear(fast);
+ cpumask_clear(slow);
+
+ /*
+ * Use the config options if they are given. This helps testing
+ * HMP scheduling on systems without a big.LITTLE architecture.
+ */
+ if (strlen(CONFIG_HMP_FAST_CPU_MASK) && strlen(CONFIG_HMP_SLOW_CPU_MASK)) {
+ if (cpulist_parse(CONFIG_HMP_FAST_CPU_MASK, fast))
+ WARN(1, "Failed to parse HMP fast cpu mask!\n");
+ if (cpulist_parse(CONFIG_HMP_SLOW_CPU_MASK, slow))
+ WARN(1, "Failed to parse HMP slow cpu mask!\n");
+ return;
+ }
+
+ /*
+ * Else, parse device tree for little cores.
+ */
+ while ((cn = of_find_node_by_type(cn, "cpu"))) {
+
+ const u32 *mpidr;
+ int len;
+
+ mpidr = of_get_property(cn, "reg", &len);
+ if (!mpidr || len != 8) {
+ pr_err("%s missing reg property\n", cn->full_name);
+ continue;
+ }
+
+ cpu = get_logical_index(be32_to_cpup(mpidr+1));
+ if (cpu == -EINVAL) {
+ pr_err("couldn't get logical index for mpidr %x\n",
+ be32_to_cpup(mpidr+1));
+ break;
+ }
+
+ if (is_little_cpu(cn))
+ cpumask_set_cpu(cpu, slow);
+ else
+ cpumask_set_cpu(cpu, fast);
+ }
+
+ if (!cpumask_empty(fast) && !cpumask_empty(slow))
+ return;
+
+ /*
+ * We didn't find both big and little cores so let's call all cores
+ * fast as this will keep the system running, with all cores being
+ * treated equal.
+ */
+ cpumask_setall(fast);
+ cpumask_clear(slow);
+}
+
+struct cpumask hmp_slow_cpu_mask;
+
+void __init arch_get_hmp_domains(struct list_head *hmp_domains_list)
+{
+ struct cpumask hmp_fast_cpu_mask;
+ struct hmp_domain *domain;
+
+ arch_get_fast_and_slow_cpus(&hmp_fast_cpu_mask, &hmp_slow_cpu_mask);
+
+ /*
+ * Initialize hmp_domains
+ * Must be ordered with respect to compute capacity.
+ * Fastest domain at head of list.
+ */
+ if(!cpumask_empty(&hmp_slow_cpu_mask)) {
+ domain = (struct hmp_domain *)
+ kmalloc(sizeof(struct hmp_domain), GFP_KERNEL);
+ cpumask_copy(&domain->possible_cpus, &hmp_slow_cpu_mask);
+ cpumask_and(&domain->cpus, cpu_online_mask, &domain->possible_cpus);
+ list_add(&domain->hmp_domains, hmp_domains_list);
+ }
+ domain = (struct hmp_domain *)
+ kmalloc(sizeof(struct hmp_domain), GFP_KERNEL);
+ cpumask_copy(&domain->possible_cpus, &hmp_fast_cpu_mask);
+ cpumask_and(&domain->cpus, cpu_online_mask, &domain->possible_cpus);
+ list_add(&domain->hmp_domains, hmp_domains_list);
+}
+#endif /* CONFIG_SCHED_HMP */
+
+static void __init reset_cpu_topology(void)
+{
+ unsigned int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct cpu_topology *cpu_topo = &cpu_topology[cpu];
+
+ cpu_topo->thread_id = -1;
+ cpu_topo->core_id = 0;
+ cpu_topo->cluster_id = -1;
+
+ cpumask_clear(&cpu_topo->core_sibling);
+ cpumask_set_cpu(cpu, &cpu_topo->core_sibling);
+ cpumask_clear(&cpu_topo->thread_sibling);
+ cpumask_set_cpu(cpu, &cpu_topo->thread_sibling);
+ }
+}
+
+static void __init reset_cpu_power(void)
+{
+ unsigned int cpu;
+
+ for_each_possible_cpu(cpu)
+ set_power_scale(cpu, SCHED_POWER_SCALE);
+}
+
+void __init init_cpu_topology(void)
+{
+ reset_cpu_topology();
+
+ /*
+ * Discard anything that was parsed if we hit an error so we
+ * don't use partial information.
+ */
+ if (parse_dt_topology())
+ reset_cpu_topology();
+
+ reset_cpu_power();
+ parse_dt_cpu_power();
+}
diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index e4193e3adc7f..0d64089d28b5 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -79,7 +79,8 @@ void __sync_icache_dcache(pte_t pte, unsigned long addr)
return;
if (!test_and_set_bit(PG_dcache_clean, &page->flags)) {
- __flush_dcache_area(page_address(page), PAGE_SIZE);
+ __flush_dcache_area(page_address(page),
+ PAGE_SIZE << compound_order(page));
__flush_icache_all();
} else if (icache_is_aivivt()) {
__flush_icache_all();
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 5e9aec358306..023747bf4dd7 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -51,12 +51,11 @@ int pmd_huge(pmd_t pmd)
int pud_huge(pud_t pud)
{
+#ifndef __PAGETABLE_PMD_FOLDED
return !(pud_val(pud) & PUD_TABLE_BIT);
-}
-
-int pmd_huge_support(void)
-{
- return 1;
+#else
+ return 0;
+#endif
}
static __init int setup_hugepagesz(char *opt)