aboutsummaryrefslogtreecommitdiff
path: root/arch/arm64
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64')
-rw-r--r--arch/arm64/Kconfig151
-rw-r--r--arch/arm64/Makefile1
-rw-r--r--arch/arm64/boot/dts/Makefile4
-rw-r--r--arch/arm64/boot/dts/apm-mustang.dts4
-rw-r--r--arch/arm64/boot/dts/apm-storm.dtsi160
-rw-r--r--arch/arm64/boot/dts/clcd-panels.dtsi52
-rw-r--r--arch/arm64/boot/dts/fvp-base-gicv2-psci.dts266
-rw-r--r--arch/arm64/boot/dts/juno.dts498
-rw-r--r--arch/arm64/boot/dts/rtsm_ve-aemv8a.dts2
-rw-r--r--arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi3
-rw-r--r--arch/arm64/crypto/Kconfig53
-rw-r--r--arch/arm64/crypto/Makefile38
-rw-r--r--arch/arm64/crypto/aes-ce-ccm-core.S222
-rw-r--r--arch/arm64/crypto/aes-ce-ccm-glue.c297
-rw-r--r--arch/arm64/crypto/aes-ce-cipher.c155
-rw-r--r--arch/arm64/crypto/aes-ce.S133
-rw-r--r--arch/arm64/crypto/aes-glue.c446
-rw-r--r--arch/arm64/crypto/aes-modes.S532
-rw-r--r--arch/arm64/crypto/aes-neon.S382
-rw-r--r--arch/arm64/crypto/ghash-ce-core.S79
-rw-r--r--arch/arm64/crypto/ghash-ce-glue.c156
-rw-r--r--arch/arm64/crypto/sha1-ce-core.S153
-rw-r--r--arch/arm64/crypto/sha1-ce-glue.c174
-rw-r--r--arch/arm64/crypto/sha2-ce-core.S156
-rw-r--r--arch/arm64/crypto/sha2-ce-glue.c255
-rw-r--r--arch/arm64/include/asm/Kbuild2
-rw-r--r--arch/arm64/include/asm/atomic.h2
-rw-r--r--arch/arm64/include/asm/bL_switcher.h54
-rw-r--r--arch/arm64/include/asm/barrier.h15
-rw-r--r--arch/arm64/include/asm/cacheflush.h20
-rw-r--r--arch/arm64/include/asm/cmpxchg.h7
-rw-r--r--arch/arm64/include/asm/compat.h7
-rw-r--r--arch/arm64/include/asm/cpufeature.h29
-rw-r--r--arch/arm64/include/asm/cputype.h1
-rw-r--r--arch/arm64/include/asm/debug-monitors.h81
-rw-r--r--arch/arm64/include/asm/dma-mapping.h7
-rw-r--r--arch/arm64/include/asm/fixmap.h67
-rw-r--r--arch/arm64/include/asm/fpsimd.h23
-rw-r--r--arch/arm64/include/asm/fpsimdmacros.h35
-rw-r--r--arch/arm64/include/asm/ftrace.h59
-rw-r--r--arch/arm64/include/asm/hardirq.h2
-rw-r--r--arch/arm64/include/asm/hwcap.h9
-rw-r--r--arch/arm64/include/asm/insn.h5
-rw-r--r--arch/arm64/include/asm/io.h9
-rw-r--r--arch/arm64/include/asm/irqflags.h23
-rw-r--r--arch/arm64/include/asm/kgdb.h84
-rw-r--r--arch/arm64/include/asm/kvm_arm.h36
-rw-r--r--arch/arm64/include/asm/kvm_asm.h56
-rw-r--r--arch/arm64/include/asm/kvm_coproc.h3
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h27
-rw-r--r--arch/arm64/include/asm/kvm_host.h66
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h55
-rw-r--r--arch/arm64/include/asm/kvm_psci.h6
-rw-r--r--arch/arm64/include/asm/memory.h3
-rw-r--r--arch/arm64/include/asm/mmu.h6
-rw-r--r--arch/arm64/include/asm/neon.h6
-rw-r--r--arch/arm64/include/asm/page.h9
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h5
-rw-r--r--arch/arm64/include/asm/pgtable.h173
-rw-r--r--arch/arm64/include/asm/ptrace.h10
-rw-r--r--arch/arm64/include/asm/sigcontext.h31
-rw-r--r--arch/arm64/include/asm/syscall.h1
-rw-r--r--arch/arm64/include/asm/sysreg.h60
-rw-r--r--arch/arm64/include/asm/thread_info.h20
-rw-r--r--arch/arm64/include/asm/tlbflush.h49
-rw-r--r--arch/arm64/include/asm/topology.h70
-rw-r--r--arch/arm64/include/asm/unistd.h2
-rw-r--r--arch/arm64/include/asm/virt.h4
-rw-r--r--arch/arm64/include/uapi/asm/Kbuild1
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h15
-rw-r--r--arch/arm64/include/uapi/asm/perf_regs.h40
-rw-r--r--arch/arm64/kernel/Makefile12
-rw-r--r--arch/arm64/kernel/arm64ksyms.c4
-rw-r--r--arch/arm64/kernel/asm-offsets.c26
-rw-r--r--arch/arm64/kernel/debug-monitors.c13
-rw-r--r--arch/arm64/kernel/early_printk.c8
-rw-r--r--arch/arm64/kernel/entry-fpsimd.S24
-rw-r--r--arch/arm64/kernel/entry-ftrace.S218
-rw-r--r--arch/arm64/kernel/entry.S74
-rw-r--r--arch/arm64/kernel/fpsimd.c186
-rw-r--r--arch/arm64/kernel/ftrace.c177
-rw-r--r--arch/arm64/kernel/head.S106
-rw-r--r--arch/arm64/kernel/hw_breakpoint.c2
-rw-r--r--arch/arm64/kernel/hyp-stub.S1
-rw-r--r--arch/arm64/kernel/kgdb.c336
-rw-r--r--arch/arm64/kernel/perf_event.c79
-rw-r--r--arch/arm64/kernel/perf_regs.c46
-rw-r--r--arch/arm64/kernel/process.c5
-rw-r--r--arch/arm64/kernel/ptrace.c62
-rw-r--r--arch/arm64/kernel/return_address.c55
-rw-r--r--arch/arm64/kernel/setup.c38
-rw-r--r--arch/arm64/kernel/signal.c42
-rw-r--r--arch/arm64/kernel/signal32.c9
-rw-r--r--arch/arm64/kernel/smp.c30
-rw-r--r--arch/arm64/kernel/smp_spin_table.c39
-rw-r--r--arch/arm64/kernel/stacktrace.c2
-rw-r--r--arch/arm64/kernel/topology.c558
-rw-r--r--arch/arm64/kernel/vdso.c34
-rw-r--r--arch/arm64/kernel/vdso/Makefile4
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S7
-rw-r--r--arch/arm64/kvm/Makefile4
-rw-r--r--arch/arm64/kvm/guest.c70
-rw-r--r--arch/arm64/kvm/handle_exit.c14
-rw-r--r--arch/arm64/kvm/hyp-init.S6
-rw-r--r--arch/arm64/kvm/hyp.S612
-rw-r--r--arch/arm64/kvm/sys_regs.c641
-rw-r--r--arch/arm64/kvm/sys_regs.h2
-rw-r--r--arch/arm64/kvm/sys_regs_generic_v8.c2
-rw-r--r--arch/arm64/kvm/vgic-v2-switch.S133
-rw-r--r--arch/arm64/kvm/vgic-v3-switch.S267
-rw-r--r--arch/arm64/mm/Makefile2
-rw-r--r--arch/arm64/mm/cache.S96
-rw-r--r--arch/arm64/mm/copypage.c2
-rw-r--r--arch/arm64/mm/dma-mapping.c277
-rw-r--r--arch/arm64/mm/init.c46
-rw-r--r--arch/arm64/mm/ioremap.c85
-rw-r--r--arch/arm64/mm/mmu.c134
-rw-r--r--arch/arm64/mm/proc.S14
-rw-r--r--arch/arm64/mm/tlb.S71
119 files changed, 9475 insertions, 867 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 65b788410bd9..372eb823e92f 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -4,12 +4,14 @@ config ARM64
select ARCH_USE_CMPXCHG_LOCKREF
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
+ select ARCH_HAS_OPP
select ARCH_WANT_OPTIONAL_GPIOLIB
select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
select ARCH_WANT_FRAME_POINTERS
select ARM_AMBA
select ARM_ARCH_TIMER
select ARM_GIC
+ select ARM_GIC_V3
select BUILDTIME_EXTABLE_SORT
select CLONE_BACKWARDS
select COMMON_CLK
@@ -17,6 +19,8 @@ config ARM64
select DCACHE_WORD_ACCESS
select GENERIC_CLOCKEVENTS
select GENERIC_CLOCKEVENTS_BROADCAST if SMP
+ select GENERIC_CPU_AUTOPROBE
+ select GENERIC_EARLY_IOREMAP
select GENERIC_IOMAP
select GENERIC_IRQ_PROBE
select GENERIC_IRQ_SHOW
@@ -27,29 +31,40 @@ config ARM64
select GENERIC_TIME_VSYSCALL
select HARDIRQS_SW_RESEND
select HAVE_ARCH_JUMP_LABEL
+ select HAVE_ARCH_KGDB
select HAVE_ARCH_TRACEHOOK
+ select HAVE_C_RECORDMCOUNT
select HAVE_DEBUG_BUGVERBOSE
select HAVE_DEBUG_KMEMLEAK
select HAVE_DMA_API_DEBUG
select HAVE_DMA_ATTRS
select HAVE_DMA_CONTIGUOUS
select HAVE_EFFICIENT_UNALIGNED_ACCESS
+ select HAVE_DYNAMIC_FTRACE
+ select HAVE_FTRACE_MCOUNT_RECORD
+ select HAVE_FUNCTION_TRACER
+ select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_GENERIC_DMA_COHERENT
select HAVE_HW_BREAKPOINT if PERF_EVENTS
select HAVE_MEMBLOCK
select HAVE_PATA_PLATFORM
select HAVE_PERF_EVENTS
+ select HAVE_PERF_REGS
+ select HAVE_PERF_USER_STACK_DUMP
+ select HAVE_SYSCALL_TRACEPOINTS
select IRQ_DOMAIN
select MODULES_USE_ELF_RELA
select NO_BOOTMEM
select OF
select OF_EARLY_FLATTREE
+ select OF_RESERVED_MEM
select PERF_USE_VMALLOC
select POWER_RESET
select POWER_SUPPLY
select RTC_LIB
select SPARSE_IRQ
select SYSCTL_EXCEPTION_TRACE
+ select HAVE_CONTEXT_TRACKING
help
ARM 64-bit (AArch64) Linux support.
@@ -86,7 +101,7 @@ config GENERIC_CSUM
config GENERIC_CALIBRATE_DELAY
def_bool y
-config ZONE_DMA32
+config ZONE_DMA
def_bool y
config ARCH_DMA_ADDR_T_64BIT
@@ -107,6 +122,9 @@ config IOMMU_HELPER
config KERNEL_MODE_NEON
def_bool y
+config FIX_EARLYCON_MEM
+ def_bool y
+
source "init/Kconfig"
source "kernel/Kconfig.freezer"
@@ -165,6 +183,134 @@ config SMP
If you don't know what to do here, say N.
+config SCHED_MC
+ bool "Multi-core scheduler support"
+ depends on SMP
+ help
+ Multi-core scheduler support improves the CPU scheduler's decision
+ making when dealing with multi-core CPU chips at a cost of slightly
+ increased overhead in some places. If unsure say N here.
+
+config SCHED_SMT
+ bool "SMT scheduler support"
+ depends on SMP
+ help
+ Improves the CPU scheduler's decision making when dealing with
+ MultiThreading at a cost of slightly increased overhead in some
+ places. If unsure say N here.
+
+config SCHED_MC
+ bool "Multi-core scheduler support"
+ depends on ARM_CPU_TOPOLOGY
+ help
+ Multi-core scheduler support improves the CPU scheduler's decision
+ making when dealing with multi-core CPU chips at a cost of slightly
+ increased overhead in some places. If unsure say N here.
+
+config SCHED_SMT
+ bool "SMT scheduler support"
+ depends on ARM_CPU_TOPOLOGY
+ help
+ Improves the CPU scheduler's decision making when dealing with
+ MultiThreading at a cost of slightly increased overhead in some
+ places. If unsure say N here.
+
+config DISABLE_CPU_SCHED_DOMAIN_BALANCE
+ bool "(EXPERIMENTAL) Disable CPU level scheduler load-balancing"
+ help
+ Disables scheduler load-balancing at CPU sched domain level.
+
+config SCHED_HMP
+ bool "(EXPERIMENTAL) Heterogenous multiprocessor scheduling"
+ depends on DISABLE_CPU_SCHED_DOMAIN_BALANCE && SCHED_MC && FAIR_GROUP_SCHED && !SCHED_AUTOGROUP
+ help
+ Experimental scheduler optimizations for heterogeneous platforms.
+ Attempts to introspectively select task affinity to optimize power
+ and performance. Basic support for multiple (>2) cpu types is in place,
+ but it has only been tested with two types of cpus.
+ There is currently no support for migration of task groups, hence
+ !SCHED_AUTOGROUP. Furthermore, normal load-balancing must be disabled
+ between cpus of different type (DISABLE_CPU_SCHED_DOMAIN_BALANCE).
+
+config SCHED_HMP_PRIO_FILTER
+ bool "(EXPERIMENTAL) Filter HMP migrations by task priority"
+ depends on SCHED_HMP
+ help
+ Enables task priority based HMP migration filter. Any task with
+ a NICE value above the threshold will always be on low-power cpus
+ with less compute capacity.
+
+config SCHED_HMP_PRIO_FILTER_VAL
+ int "NICE priority threshold"
+ default 5
+ depends on SCHED_HMP_PRIO_FILTER
+
+config HMP_FAST_CPU_MASK
+ string "HMP scheduler fast CPU mask"
+ depends on SCHED_HMP
+ help
+ Leave empty to use device tree information.
+ Specify the cpuids of the fast CPUs in the system as a list string,
+ e.g. cpuid 0+1 should be specified as 0-1.
+
+config HMP_SLOW_CPU_MASK
+ string "HMP scheduler slow CPU mask"
+ depends on SCHED_HMP
+ help
+ Leave empty to use device tree information.
+ Specify the cpuids of the slow CPUs in the system as a list string,
+ e.g. cpuid 0+1 should be specified as 0-1.
+
+config HMP_VARIABLE_SCALE
+ bool "Allows changing the load tracking scale through sysfs"
+ depends on SCHED_HMP
+ help
+ When turned on, this option exports the thresholds and load average
+ period value for the load tracking patches through sysfs.
+ The values can be modified to change the rate of load accumulation
+ and the thresholds used for HMP migration.
+ The load_avg_period_ms is the time in ms to reach a load average of
+ 0.5 for an idle task of 0 load average ratio that start a busy loop.
+ The up_threshold and down_threshold is the value to go to a faster
+ CPU or to go back to a slower cpu.
+ The {up,down}_threshold are devided by 1024 before being compared
+ to the load average.
+ For examples, with load_avg_period_ms = 128 and up_threshold = 512,
+ a running task with a load of 0 will be migrated to a bigger CPU after
+ 128ms, because after 128ms its load_avg_ratio is 0.5 and the real
+ up_threshold is 0.5.
+ This patch has the same behavior as changing the Y of the load
+ average computation to
+ (1002/1024)^(LOAD_AVG_PERIOD/load_avg_period_ms)
+ but it remove intermadiate overflows in computation.
+
+config HMP_FREQUENCY_INVARIANT_SCALE
+ bool "(EXPERIMENTAL) Frequency-Invariant Tracked Load for HMP"
+ depends on HMP_VARIABLE_SCALE && CPU_FREQ
+ help
+ Scales the current load contribution in line with the frequency
+ of the CPU that the task was executed on.
+ In this version, we use a simple linear scale derived from the
+ maximum frequency reported by CPUFreq.
+ Restricting tracked load to be scaled by the CPU's frequency
+ represents the consumption of possible compute capacity
+ (rather than consumption of actual instantaneous capacity as
+ normal) and allows the HMP migration's simple threshold
+ migration strategy to interact more predictably with CPUFreq's
+ asynchronous compute capacity changes.
+
+config SCHED_HMP_LITTLE_PACKING
+ bool "Small task packing for HMP"
+ depends on SCHED_HMP
+ default n
+ help
+ Allows the HMP Scheduler to pack small tasks into CPUs in the
+ smallest HMP domain.
+ Controlled by two sysfs files in sys/kernel/hmp.
+ packing_enable: 1 to enable, 0 to disable packing. Default 1.
+ packing_limit: runqueue load ratio where a RQ is considered
+ to be full. Default is NICE_0_LOAD * 9/8.
+
config NR_CPUS
int "Maximum number of CPUs (2-32)"
range 2 32
@@ -317,5 +463,8 @@ source "arch/arm64/Kconfig.debug"
source "security/Kconfig"
source "crypto/Kconfig"
+if CRYPTO
+source "arch/arm64/crypto/Kconfig"
+endif
source "lib/Kconfig"
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 2fceb71ac3b7..8185a913c5ed 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -45,6 +45,7 @@ export TEXT_OFFSET GZFLAGS
core-y += arch/arm64/kernel/ arch/arm64/mm/
core-$(CONFIG_KVM) += arch/arm64/kvm/
core-$(CONFIG_XEN) += arch/arm64/xen/
+core-$(CONFIG_CRYPTO) += arch/arm64/crypto/
libs-y := arch/arm64/lib/ $(libs-y)
libs-y += $(LIBGCC)
diff --git a/arch/arm64/boot/dts/Makefile b/arch/arm64/boot/dts/Makefile
index c52bdb051f66..3391691a85f1 100644
--- a/arch/arm64/boot/dts/Makefile
+++ b/arch/arm64/boot/dts/Makefile
@@ -1,5 +1,7 @@
-dtb-$(CONFIG_ARCH_VEXPRESS) += rtsm_ve-aemv8a.dtb foundation-v8.dtb
+dtb-$(CONFIG_ARCH_VEXPRESS) += rtsm_ve-aemv8a.dtb foundation-v8.dtb \
+ fvp-base-gicv2-psci.dtb
dtb-$(CONFIG_ARCH_XGENE) += apm-mustang.dtb
+dtb-$(CONFIG_ARCH_VEXPRESS) += juno.dtb
targets += dtbs
targets += $(dtb-y)
diff --git a/arch/arm64/boot/dts/apm-mustang.dts b/arch/arm64/boot/dts/apm-mustang.dts
index 1247ca1200b1..6541962f5d70 100644
--- a/arch/arm64/boot/dts/apm-mustang.dts
+++ b/arch/arm64/boot/dts/apm-mustang.dts
@@ -24,3 +24,7 @@
reg = < 0x1 0x00000000 0x0 0x80000000 >; /* Updated by bootloader */
};
};
+
+&serial0 {
+ status = "ok";
+};
diff --git a/arch/arm64/boot/dts/apm-storm.dtsi b/arch/arm64/boot/dts/apm-storm.dtsi
index d37d7369e260..03431d25123b 100644
--- a/arch/arm64/boot/dts/apm-storm.dtsi
+++ b/arch/arm64/boot/dts/apm-storm.dtsi
@@ -176,16 +176,174 @@
reg-names = "csr-reg";
clock-output-names = "eth8clk";
};
+
+ sataphy1clk: sataphy1clk@1f21c000 {
+ compatible = "apm,xgene-device-clock";
+ #clock-cells = <1>;
+ clocks = <&socplldiv2 0>;
+ reg = <0x0 0x1f21c000 0x0 0x1000>;
+ reg-names = "csr-reg";
+ clock-output-names = "sataphy1clk";
+ status = "disabled";
+ csr-offset = <0x4>;
+ csr-mask = <0x00>;
+ enable-offset = <0x0>;
+ enable-mask = <0x06>;
+ };
+
+ sataphy2clk: sataphy1clk@1f22c000 {
+ compatible = "apm,xgene-device-clock";
+ #clock-cells = <1>;
+ clocks = <&socplldiv2 0>;
+ reg = <0x0 0x1f22c000 0x0 0x1000>;
+ reg-names = "csr-reg";
+ clock-output-names = "sataphy2clk";
+ status = "ok";
+ csr-offset = <0x4>;
+ csr-mask = <0x3a>;
+ enable-offset = <0x0>;
+ enable-mask = <0x06>;
+ };
+
+ sataphy3clk: sataphy1clk@1f23c000 {
+ compatible = "apm,xgene-device-clock";
+ #clock-cells = <1>;
+ clocks = <&socplldiv2 0>;
+ reg = <0x0 0x1f23c000 0x0 0x1000>;
+ reg-names = "csr-reg";
+ clock-output-names = "sataphy3clk";
+ status = "ok";
+ csr-offset = <0x4>;
+ csr-mask = <0x3a>;
+ enable-offset = <0x0>;
+ enable-mask = <0x06>;
+ };
+
+ sata01clk: sata01clk@1f21c000 {
+ compatible = "apm,xgene-device-clock";
+ #clock-cells = <1>;
+ clocks = <&socplldiv2 0>;
+ reg = <0x0 0x1f21c000 0x0 0x1000>;
+ reg-names = "csr-reg";
+ clock-output-names = "sata01clk";
+ csr-offset = <0x4>;
+ csr-mask = <0x05>;
+ enable-offset = <0x0>;
+ enable-mask = <0x39>;
+ };
+
+ sata23clk: sata23clk@1f22c000 {
+ compatible = "apm,xgene-device-clock";
+ #clock-cells = <1>;
+ clocks = <&socplldiv2 0>;
+ reg = <0x0 0x1f22c000 0x0 0x1000>;
+ reg-names = "csr-reg";
+ clock-output-names = "sata23clk";
+ csr-offset = <0x4>;
+ csr-mask = <0x05>;
+ enable-offset = <0x0>;
+ enable-mask = <0x39>;
+ };
+
+ sata45clk: sata45clk@1f23c000 {
+ compatible = "apm,xgene-device-clock";
+ #clock-cells = <1>;
+ clocks = <&socplldiv2 0>;
+ reg = <0x0 0x1f23c000 0x0 0x1000>;
+ reg-names = "csr-reg";
+ clock-output-names = "sata45clk";
+ csr-offset = <0x4>;
+ csr-mask = <0x05>;
+ enable-offset = <0x0>;
+ enable-mask = <0x39>;
+ };
+
+ rtcclk: rtcclk@17000000 {
+ compatible = "apm,xgene-device-clock";
+ #clock-cells = <1>;
+ clocks = <&socplldiv2 0>;
+ reg = <0x0 0x17000000 0x0 0x2000>;
+ reg-names = "csr-reg";
+ csr-offset = <0xc>;
+ csr-mask = <0x2>;
+ enable-offset = <0x10>;
+ enable-mask = <0x2>;
+ clock-output-names = "rtcclk";
+ };
};
serial0: serial@1c020000 {
+ status = "disabled";
device_type = "serial";
- compatible = "ns16550";
+ compatible = "ns16550a";
reg = <0 0x1c020000 0x0 0x1000>;
reg-shift = <2>;
clock-frequency = <10000000>; /* Updated by bootloader */
interrupt-parent = <&gic>;
interrupts = <0x0 0x4c 0x4>;
};
+
+ serial1: serial@1c021000 {
+ status = "disabled";
+ device_type = "serial";
+ compatible = "ns16550a";
+ reg = <0 0x1c021000 0x0 0x1000>;
+ reg-shift = <2>;
+ clock-frequency = <10000000>; /* Updated by bootloader */
+ interrupt-parent = <&gic>;
+ interrupts = <0x0 0x4d 0x4>;
+ };
+
+ serial2: serial@1c022000 {
+ status = "disabled";
+ device_type = "serial";
+ compatible = "ns16550a";
+ reg = <0 0x1c022000 0x0 0x1000>;
+ reg-shift = <2>;
+ clock-frequency = <10000000>; /* Updated by bootloader */
+ interrupt-parent = <&gic>;
+ interrupts = <0x0 0x4e 0x4>;
+ };
+
+ serial3: serial@1c023000 {
+ status = "disabled";
+ device_type = "serial";
+ compatible = "ns16550a";
+ reg = <0 0x1c023000 0x0 0x1000>;
+ reg-shift = <2>;
+ clock-frequency = <10000000>; /* Updated by bootloader */
+ interrupt-parent = <&gic>;
+ interrupts = <0x0 0x4f 0x4>;
+ };
+
+ phy1: phy@1f21a000 {
+ compatible = "apm,xgene-phy";
+ reg = <0x0 0x1f21a000 0x0 0x100>;
+ #phy-cells = <1>;
+ clocks = <&sataphy1clk 0>;
+ status = "disabled";
+ apm,tx-boost-gain = <30 30 30 30 30 30>;
+ apm,tx-eye-tuning = <2 10 10 2 10 10>;
+ };
+
+ phy2: phy@1f22a000 {
+ compatible = "apm,xgene-phy";
+ reg = <0x0 0x1f22a000 0x0 0x100>;
+ #phy-cells = <1>;
+ clocks = <&sataphy2clk 0>;
+ status = "ok";
+ apm,tx-boost-gain = <30 30 30 30 30 30>;
+ apm,tx-eye-tuning = <1 10 10 2 10 10>;
+ };
+
+ phy3: phy@1f23a000 {
+ compatible = "apm,xgene-phy";
+ reg = <0x0 0x1f23a000 0x0 0x100>;
+ #phy-cells = <1>;
+ clocks = <&sataphy3clk 0>;
+ status = "ok";
+ apm,tx-boost-gain = <31 31 31 31 31 31>;
+ apm,tx-eye-tuning = <2 10 10 2 10 10>;
+ };
};
};
diff --git a/arch/arm64/boot/dts/clcd-panels.dtsi b/arch/arm64/boot/dts/clcd-panels.dtsi
new file mode 100644
index 000000000000..0b0ff6ead4b2
--- /dev/null
+++ b/arch/arm64/boot/dts/clcd-panels.dtsi
@@ -0,0 +1,52 @@
+/*
+ * ARM Ltd. Versatile Express
+ *
+ */
+
+/ {
+ panels {
+ panel@0 {
+ compatible = "panel";
+ mode = "VGA";
+ refresh = <60>;
+ xres = <640>;
+ yres = <480>;
+ pixclock = <39721>;
+ left_margin = <40>;
+ right_margin = <24>;
+ upper_margin = <32>;
+ lower_margin = <11>;
+ hsync_len = <96>;
+ vsync_len = <2>;
+ sync = <0>;
+ vmode = "FB_VMODE_NONINTERLACED";
+
+ tim2 = "TIM2_BCD", "TIM2_IPC";
+ cntl = "CNTL_LCDTFT", "CNTL_BGR", "CNTL_LCDVCOMP(1)";
+ caps = "CLCD_CAP_5551", "CLCD_CAP_565", "CLCD_CAP_888";
+ bpp = <16>;
+ };
+
+ panel@1 {
+ compatible = "panel";
+ mode = "XVGA";
+ refresh = <60>;
+ xres = <1024>;
+ yres = <768>;
+ pixclock = <15748>;
+ left_margin = <152>;
+ right_margin = <48>;
+ upper_margin = <23>;
+ lower_margin = <3>;
+ hsync_len = <104>;
+ vsync_len = <4>;
+ sync = <0>;
+ vmode = "FB_VMODE_NONINTERLACED";
+
+ tim2 = "TIM2_BCD", "TIM2_IPC";
+ cntl = "CNTL_LCDTFT", "CNTL_BGR", "CNTL_LCDVCOMP(1)";
+ caps = "CLCD_CAP_5551", "CLCD_CAP_565", "CLCD_CAP_888";
+ bpp = <16>;
+ };
+ };
+};
diff --git a/arch/arm64/boot/dts/fvp-base-gicv2-psci.dts b/arch/arm64/boot/dts/fvp-base-gicv2-psci.dts
new file mode 100644
index 000000000000..a46be6148b3a
--- /dev/null
+++ b/arch/arm64/boot/dts/fvp-base-gicv2-psci.dts
@@ -0,0 +1,266 @@
+/*
+ * Copyright (c) 2013, ARM Limited. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of ARM nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/dts-v1/;
+
+/memreserve/ 0x80000000 0x00010000;
+
+/ {
+};
+
+/ {
+ model = "FVP Base";
+ compatible = "arm,vfp-base", "arm,vexpress";
+ interrupt-parent = <&gic>;
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ chosen { };
+
+ aliases {
+ serial0 = &v2m_serial0;
+ serial1 = &v2m_serial1;
+ serial2 = &v2m_serial2;
+ serial3 = &v2m_serial3;
+ };
+
+ psci {
+ compatible = "arm,psci";
+ method = "smc";
+ cpu_suspend = <0xc4000001>;
+ cpu_off = <0x84000002>;
+ cpu_on = <0xc4000003>;
+ };
+
+ cpus {
+ #address-cells = <2>;
+ #size-cells = <0>;
+
+ big0: cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57", "arm,armv8";
+ reg = <0x0 0x0>;
+ enable-method = "psci";
+ clock-frequency = <1000000>;
+ };
+ big1: cpu@1 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57", "arm,armv8";
+ reg = <0x0 0x1>;
+ enable-method = "psci";
+ clock-frequency = <1000000>;
+ };
+ big2: cpu@2 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57", "arm,armv8";
+ reg = <0x0 0x2>;
+ enable-method = "psci";
+ clock-frequency = <1000000>;
+ };
+ big3: cpu@3 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57", "arm,armv8";
+ reg = <0x0 0x3>;
+ enable-method = "psci";
+ clock-frequency = <1000000>;
+ };
+ little0: cpu@100 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53", "arm,armv8";
+ reg = <0x0 0x100>;
+ enable-method = "psci";
+ clock-frequency = <1000000>;
+ };
+ little1: cpu@101 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53", "arm,armv8";
+ reg = <0x0 0x101>;
+ enable-method = "psci";
+ clock-frequency = <1000000>;
+ };
+ little2: cpu@102 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53", "arm,armv8";
+ reg = <0x0 0x102>;
+ enable-method = "psci";
+ clock-frequency = <1000000>;
+ };
+ little3: cpu@103 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53", "arm,armv8";
+ reg = <0x0 0x103>;
+ enable-method = "psci";
+ clock-frequency = <1000000>;
+ };
+
+ cpu-map {
+ cluster0 {
+ core0 {
+ cpu = <&big0>;
+ };
+ core1 {
+ cpu = <&big1>;
+ };
+ core2 {
+ cpu = <&big2>;
+ };
+ core3 {
+ cpu = <&big3>;
+ };
+ };
+ cluster1 {
+ core0 {
+ cpu = <&little0>;
+ };
+ core1 {
+ cpu = <&little1>;
+ };
+ core2 {
+ cpu = <&little2>;
+ };
+ core3 {
+ cpu = <&little3>;
+ };
+ };
+ };
+ };
+
+ memory@80000000 {
+ device_type = "memory";
+ reg = <0x00000000 0x80000000 0 0x80000000>,
+ <0x00000008 0x80000000 0 0x80000000>;
+ };
+
+ gic: interrupt-controller@2f000000 {
+ compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic";
+ #interrupt-cells = <3>;
+ #address-cells = <0>;
+ interrupt-controller;
+ reg = <0x0 0x2f000000 0 0x10000>,
+ <0x0 0x2c000000 0 0x2000>,
+ <0x0 0x2c010000 0 0x2000>,
+ <0x0 0x2c02F000 0 0x2000>;
+ interrupts = <1 9 0xf04>;
+ };
+
+ timer {
+ compatible = "arm,armv8-timer";
+ interrupts = <1 13 0xff01>,
+ <1 14 0xff01>,
+ <1 11 0xff01>,
+ <1 10 0xff01>;
+ clock-frequency = <100000000>;
+ };
+
+ timer@2a810000 {
+ compatible = "arm,armv7-timer-mem";
+ reg = <0x0 0x2a810000 0x0 0x10000>;
+ clock-frequency = <100000000>;
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+ frame@2a820000 {
+ frame-number = <0>;
+ interrupts = <0 25 4>;
+ reg = <0x0 0x2a820000 0x0 0x10000>;
+ };
+ };
+
+ pmu {
+ compatible = "arm,armv8-pmuv3";
+ interrupts = <0 60 4>,
+ <0 61 4>,
+ <0 62 4>,
+ <0 63 4>;
+ };
+
+ smb {
+ compatible = "simple-bus";
+
+ #address-cells = <2>;
+ #size-cells = <1>;
+ ranges = <0 0 0 0x08000000 0x04000000>,
+ <1 0 0 0x14000000 0x04000000>,
+ <2 0 0 0x18000000 0x04000000>,
+ <3 0 0 0x1c000000 0x04000000>,
+ <4 0 0 0x0c000000 0x04000000>,
+ <5 0 0 0x10000000 0x04000000>;
+
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0 0 63>;
+ interrupt-map = <0 0 0 &gic 0 0 4>,
+ <0 0 1 &gic 0 1 4>,
+ <0 0 2 &gic 0 2 4>,
+ <0 0 3 &gic 0 3 4>,
+ <0 0 4 &gic 0 4 4>,
+ <0 0 5 &gic 0 5 4>,
+ <0 0 6 &gic 0 6 4>,
+ <0 0 7 &gic 0 7 4>,
+ <0 0 8 &gic 0 8 4>,
+ <0 0 9 &gic 0 9 4>,
+ <0 0 10 &gic 0 10 4>,
+ <0 0 11 &gic 0 11 4>,
+ <0 0 12 &gic 0 12 4>,
+ <0 0 13 &gic 0 13 4>,
+ <0 0 14 &gic 0 14 4>,
+ <0 0 15 &gic 0 15 4>,
+ <0 0 16 &gic 0 16 4>,
+ <0 0 17 &gic 0 17 4>,
+ <0 0 18 &gic 0 18 4>,
+ <0 0 19 &gic 0 19 4>,
+ <0 0 20 &gic 0 20 4>,
+ <0 0 21 &gic 0 21 4>,
+ <0 0 22 &gic 0 22 4>,
+ <0 0 23 &gic 0 23 4>,
+ <0 0 24 &gic 0 24 4>,
+ <0 0 25 &gic 0 25 4>,
+ <0 0 26 &gic 0 26 4>,
+ <0 0 27 &gic 0 27 4>,
+ <0 0 28 &gic 0 28 4>,
+ <0 0 29 &gic 0 29 4>,
+ <0 0 30 &gic 0 30 4>,
+ <0 0 31 &gic 0 31 4>,
+ <0 0 32 &gic 0 32 4>,
+ <0 0 33 &gic 0 33 4>,
+ <0 0 34 &gic 0 34 4>,
+ <0 0 35 &gic 0 35 4>,
+ <0 0 36 &gic 0 36 4>,
+ <0 0 37 &gic 0 37 4>,
+ <0 0 38 &gic 0 38 4>,
+ <0 0 39 &gic 0 39 4>,
+ <0 0 40 &gic 0 40 4>,
+ <0 0 41 &gic 0 41 4>,
+ <0 0 42 &gic 0 42 4>;
+
+ /include/ "rtsm_ve-motherboard.dtsi"
+ };
+};
+
+/include/ "clcd-panels.dtsi"
diff --git a/arch/arm64/boot/dts/juno.dts b/arch/arm64/boot/dts/juno.dts
new file mode 100644
index 000000000000..9785a14ca604
--- /dev/null
+++ b/arch/arm64/boot/dts/juno.dts
@@ -0,0 +1,498 @@
+/*
+ * ARM Ltd. Juno Plaform
+ *
+ * Fast Models FVP v2 support
+ */
+
+/dts-v1/;
+
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+
+/ {
+ model = "Juno";
+ compatible = "arm,juno", "arm,vexpress";
+ interrupt-parent = <&gic>;
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ aliases {
+ serial0 = &soc_uart0;
+ };
+
+ cpus {
+ #address-cells = <2>;
+ #size-cells = <0>;
+
+ cpu@100 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53","arm,armv8";
+ reg = <0x0 0x100>;
+ enable-method = "psci";
+ };
+
+ cpu@101 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53","arm,armv8";
+ reg = <0x0 0x101>;
+ enable-method = "psci";
+ };
+
+ cpu@102 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53","arm,armv8";
+ reg = <0x0 0x102>;
+ enable-method = "psci";
+ };
+
+ cpu@103 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53","arm,armv8";
+ reg = <0x0 0x103>;
+ enable-method = "psci";
+ };
+
+ cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57","arm,armv8";
+ reg = <0x0 0x0>;
+ enable-method = "psci";
+ };
+
+ cpu@1 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57","arm,armv8";
+ reg = <0x0 0x1>;
+ enable-method = "psci";
+ };
+ };
+
+ memory@80000000 {
+ device_type = "memory";
+ reg = <0x00000000 0x80000000 0x0 0x80000000>,
+ <0x00000008 0x80000000 0x1 0x80000000>;
+ };
+
+ /* memory@14000000 {
+ device_type = "memory";
+ reg = <0x00000000 0x14000000 0x0 0x02000000>;
+ }; */
+
+ gic: interrupt-controller@2c001000 {
+ compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic";
+ #interrupt-cells = <3>;
+ #address-cells = <0>;
+ interrupt-controller;
+ reg = <0x0 0x2c010000 0 0x1000>,
+ <0x0 0x2c02f000 0 0x1000>,
+ <0x0 0x2c04f000 0 0x2000>,
+ <0x0 0x2c06f000 0 0x2000>;
+ interrupts = <GIC_PPI 9 0xf04>;
+ };
+
+ msi0: msi@2c1c0000 {
+ compatible = "arm,gic-msi";
+ reg = <0x0 0x2c1c0000 0 0x10000
+ 0x0 0x2c1d0000 0 0x10000
+ 0x0 0x2c1e0000 0 0x10000
+ 0x0 0x2c1f0000 0 0x10000>;
+ };
+
+ timer {
+ compatible = "arm,armv8-timer";
+ interrupts = <GIC_PPI 13 0xff01>,
+ <GIC_PPI 14 0xff01>,
+ <GIC_PPI 11 0xff01>,
+ <GIC_PPI 10 0xff01>;
+ };
+
+ pmu {
+ compatible = "arm,armv8-pmuv3";
+ interrupts = <GIC_SPI 60 4>,
+ <GIC_SPI 61 4>,
+ <GIC_SPI 62 4>,
+ <GIC_SPI 63 4>;
+ };
+
+ psci {
+ compatible = "arm,psci";
+ method = "smc";
+ cpu_suspend = <0xC4000001>;
+ cpu_off = <0x84000002>;
+ cpu_on = <0xC4000003>;
+ migrate = <0xC4000005>;
+ };
+
+ pci0: pci@30000000 {
+ compatible = "arm,pcie-xr3";
+ device_type = "pci";
+ reg = <0 0x7ff30000 0 0x1000
+ 0 0x7ff20000 0 0x10000
+ 0 0x40000000 0 0x10000000>;
+ bus-range = <0 255>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+ ranges = <0x01000000 0x0 0x00000000 0x00 0x5ff00000 0x0 0x00100000
+ 0x02000000 0x0 0x00000000 0x40 0x00000000 0x0 0x80000000
+ 0x42000000 0x0 0x80000000 0x40 0x80000000 0x0 0x80000000>;
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0 0 0 7>;
+ interrupt-map = <0 0 0 1 &gic 0 136 4
+ 0 0 0 2 &gic 0 137 4
+ 0 0 0 3 &gic 0 138 4
+ 0 0 0 4 &gic 0 139 4>;
+ };
+
+ scpi: scpi@2b1f0000 {
+ compatible = "arm,scpi-mhu";
+ reg = <0x0 0x2b1f0000 0x0 0x10000>, /* MHU registers */
+ <0x0 0x2e000000 0x0 0x10000>; /* Payload area */
+ interrupts = <0 36 4>, /* low priority interrupt */
+ <0 35 4>, /* high priority interrupt */
+ <0 37 4>; /* secure channel interrupt */
+ #clock-cells = <1>;
+ clock-output-names = "a57", "a53", "gpu", "hdlcd0", "hdlcd1";
+ };
+
+ hdlcd0_osc: scpi_osc@3 {
+ compatible = "arm,scpi-osc";
+ #clock-cells = <0>;
+ clocks = <&scpi 3>;
+ frequency-range = <23000000 210000000>;
+ clock-output-names = "pxlclk0";
+ };
+
+ hdlcd1_osc: scpi_osc@4 {
+ compatible = "arm,scpi-osc";
+ #clock-cells = <0>;
+ clocks = <&scpi 4>;
+ frequency-range = <23000000 210000000>;
+ clock-output-names = "pxlclk1";
+ };
+
+ soc_uartclk: refclk72738khz {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <7273800>;
+ clock-output-names = "juno:uartclk";
+ };
+
+ soc_refclk24mhz: clk24mhz {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <24000000>;
+ clock-output-names = "juno:clk24mhz";
+ };
+
+ mb_eth25mhz: clk25mhz {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <25000000>;
+ clock-output-names = "ethclk25mhz";
+ };
+
+ soc_usb48mhz: clk48mhz {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <48000000>;
+ clock-output-names = "clk48mhz";
+ };
+
+ soc_smc50mhz: clk50mhz {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <50000000>;
+ clock-output-names = "smc_clk";
+ };
+
+ soc_refclk100mhz: refclk100mhz {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <100000000>;
+ clock-output-names = "apb_pclk";
+ };
+
+ soc_faxiclk: refclk533mhz {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <533000000>;
+ clock-output-names = "faxi_clk";
+ };
+
+ soc_fixed_3v3: fixedregulator@0 {
+ compatible = "regulator-fixed";
+ regulator-name = "3V3";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-always-on;
+ };
+
+ memory-controller@7ffd0000 {
+ compatible = "arm,pl354", "arm,primecell";
+ reg = <0 0x7ffd0000 0 0x1000>;
+ interrupts = <0 86 4>,
+ <0 87 4>;
+ clocks = <&soc_smc50mhz>;
+ clock-names = "apb_pclk";
+ chip5-memwidth = <16>;
+ };
+
+ dma0: dma@0x7ff00000 {
+ compatible = "arm,pl330", "arm,primecell";
+ reg = <0x0 0x7ff00000 0 0x1000>;
+ interrupts = <0 95 4>,
+ <0 88 4>,
+ <0 89 4>,
+ <0 90 4>,
+ <0 91 4>,
+ <0 108 4>,
+ <0 109 4>,
+ <0 110 4>,
+ <0 111 4>;
+ #dma-cells = <1>;
+ #dma-channels = <8>;
+ #dma-requests = <32>;
+ clocks = <&soc_faxiclk>;
+ clock-names = "apb_pclk";
+ };
+
+ soc_uart0: uart@7ff80000 {
+ compatible = "arm,pl011", "arm,primecell";
+ reg = <0x0 0x7ff80000 0x0 0x1000>;
+ interrupts = <0 83 4>;
+ clocks = <&soc_uartclk>, <&soc_refclk100mhz>;
+ clock-names = "uartclk", "apb_pclk";
+ dmas = <&dma0 1
+ &dma0 2>;
+ dma-names = "rx", "tx";
+ };
+
+ /* this UART is reserved for secure software.
+ soc_uart1: uart@7ff70000 {
+ compatible = "arm,pl011", "arm,primecell";
+ reg = <0x0 0x7ff70000 0x0 0x1000>;
+ interrupts = <0 84 4>;
+ clocks = <&soc_uartclk>, <&soc_refclk100mhz>;
+ clock-names = "uartclk", "apb_pclk";
+ }; */
+
+ ulpi_phy: phy@0 {
+ compatible = "phy-ulpi-generic";
+ reg = <0x0 0x94 0x0 0x4>;
+ phy-id = <0>;
+ };
+
+ ehci@7ffc0000 {
+ compatible = "snps,ehci-h20ahb";
+ /* compatible = "arm,h20ahb-ehci"; */
+ reg = <0x0 0x7ffc0000 0x0 0x10000>;
+ interrupts = <0 117 4>;
+ clocks = <&soc_usb48mhz>;
+ clock-names = "otg";
+ phys = <&ulpi_phy>;
+ };
+
+ ohci@0x7ffb0000 {
+ compatible = "generic-ohci";
+ reg = <0x0 0x7ffb0000 0x0 0x10000>;
+ interrupts = <0 116 4>;
+ clocks = <&soc_usb48mhz>;
+ clock-names = "otg";
+ };
+
+ i2c@0x7ffa0000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "snps,designware-i2c";
+ reg = <0x0 0x7ffa0000 0x0 0x1000>;
+ interrupts = <0 104 4>;
+ clock-frequency = <400000>;
+ i2c-sda-hold-time-ns = <500>;
+ clocks = <&soc_smc50mhz>;
+
+ dvi0: dvi-transmitter@70 {
+ compatible = "nxp,tda998x";
+ reg = <0x70>;
+ };
+
+ dvi1: dvi-transmitter@71 {
+ compatible = "nxp,tda998x";
+ reg = <0x71>;
+ };
+ };
+
+ /* mmci@1c050000 {
+ compatible = "arm,pl180", "arm,primecell";
+ reg = <0x0 0x1c050000 0x0 0x1000>;
+ interrupts = <0 73 4>,
+ <0 74 4>;
+ max-frequency = <12000000>;
+ vmmc-supply = <&soc_fixed_3v3>;
+ clocks = <&soc_refclk24mhz>, <&soc_refclk100mhz>;
+ clock-names = "mclk", "apb_pclk";
+ }; */
+
+ hdlcd@7ff60000 {
+ compatible = "arm,hdlcd";
+ reg = <0 0x7ff60000 0 0x1000>;
+ interrupts = <0 85 4>;
+ clocks = <&hdlcd0_osc>;
+ clock-names = "pxlclk";
+ i2c-slave = <&dvi0>;
+
+ /* display-timings {
+ native-mode = <&timing0>;
+ timing0: timing@0 {
+ /* 1024 x 768 framebufer, standard VGA timings * /
+ clock-frequency = <65000>;
+ hactive = <1024>;
+ vactive = <768>;
+ hfront-porch = <24>;
+ hback-porch = <160>;
+ hsync-len = <136>;
+ vfront-porch = <3>;
+ vback-porch = <29>;
+ vsync-len = <6>;
+ };
+ }; */
+ };
+
+ hdlcd@7ff50000 {
+ compatible = "arm,hdlcd";
+ reg = <0 0x7ff50000 0 0x1000>;
+ interrupts = <0 93 4>;
+ clocks = <&hdlcd1_osc>;
+ clock-names = "pxlclk";
+ i2c-slave = <&dvi1>;
+
+ display-timings {
+ native-mode = <&timing1>;
+ timing1: timing@1 {
+ /* 1024 x 768 framebufer, standard VGA timings */
+ clock-frequency = <65000>;
+ hactive = <1024>;
+ vactive = <768>;
+ hfront-porch = <24>;
+ hback-porch = <160>;
+ hsync-len = <136>;
+ vfront-porch = <3>;
+ vback-porch = <29>;
+ vsync-len = <6>;
+ };
+ };
+ };
+
+ smb {
+ compatible = "simple-bus";
+ #address-cells = <2>;
+ #size-cells = <1>;
+ ranges = <0 0 0 0x08000000 0x04000000>,
+ <1 0 0 0x14000000 0x04000000>,
+ <2 0 0 0x18000000 0x04000000>,
+ <3 0 0 0x1c000000 0x04000000>,
+ <4 0 0 0x0c000000 0x04000000>,
+ <5 0 0 0x10000000 0x04000000>;
+
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0 0 15>;
+ interrupt-map = <0 0 0 &gic 0 68 4>,
+ <0 0 1 &gic 0 69 4>,
+ <0 0 2 &gic 0 70 4>,
+ <0 0 3 &gic 0 160 4>,
+ <0 0 4 &gic 0 161 4>,
+ <0 0 5 &gic 0 162 4>,
+ <0 0 6 &gic 0 163 4>,
+ <0 0 7 &gic 0 164 4>,
+ <0 0 8 &gic 0 165 4>,
+ <0 0 9 &gic 0 166 4>,
+ <0 0 10 &gic 0 167 4>,
+ <0 0 11 &gic 0 168 4>,
+ <0 0 12 &gic 0 169 4>;
+
+ motherboard {
+ model = "V2M-Juno";
+ arm,hbi = <0x252>;
+ arm,vexpress,site = <0>;
+ arm,v2m-memory-map = "rs1";
+ compatible = "arm,vexpress,v2p-p1", "simple-bus";
+ #address-cells = <2>; /* SMB chipselect number and offset */
+ #size-cells = <1>;
+ #interrupt-cells = <1>;
+ ranges;
+
+ usb@5,00000000 {
+ compatible = "nxp,usb-isp1763";
+ reg = <5 0x00000000 0x20000>;
+ bus-width = <16>;
+ interrupts = <4>;
+ };
+
+ ethernet@2,00000000 {
+ compatible = "smsc,lan9118", "smsc,lan9115";
+ reg = <2 0x00000000 0x10000>;
+ interrupts = <3>;
+ phy-mode = "mii";
+ reg-io-width = <4>;
+ smsc,irq-active-high;
+ smsc,irq-push-pull;
+ clocks = <&mb_eth25mhz>;
+ vdd33a-supply = <&soc_fixed_3v3>; /* change this */
+ vddvario-supply = <&soc_fixed_3v3>; /* and this */
+ };
+
+ iofpga@3,00000000 {
+ compatible = "arm,amba-bus", "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0 3 0 0x200000>;
+
+ kmi@060000 {
+ compatible = "arm,pl050", "arm,primecell";
+ reg = <0x060000 0x1000>;
+ interrupts = <8>;
+ clocks = <&soc_refclk24mhz>, <&soc_smc50mhz>;
+ clock-names = "KMIREFCLK", "apb_pclk";
+ };
+
+ kmi@070000 {
+ compatible = "arm,pl050", "arm,primecell";
+ reg = <0x070000 0x1000>;
+ interrupts = <8>;
+ clocks = <&soc_refclk24mhz>, <&soc_smc50mhz>;
+ clock-names = "KMIREFCLK", "apb_pclk";
+ };
+
+ wdt@0f0000 {
+ compatible = "arm,sp805", "arm,primecell";
+ reg = <0x0f0000 0x10000>;
+ interrupts = <7>;
+ clocks = <&soc_refclk24mhz>, <&soc_smc50mhz>;
+ clock-names = "wdogclk", "apb_pclk";
+ };
+
+ v2m_timer01: timer@110000 {
+ compatible = "arm,sp804", "arm,primecell";
+ reg = <0x110000 0x10000>;
+ interrupts = <9>;
+ clocks = <&soc_refclk24mhz>, <&soc_smc50mhz>;
+ clock-names = "timclken1", "apb_pclk";
+ };
+
+ v2m_timer23: timer@120000 {
+ compatible = "arm,sp804", "arm,primecell";
+ reg = <0x120000 0x10000>;
+ interrupts = <9>;
+ clocks = <&soc_refclk24mhz>, <&soc_smc50mhz>;
+ clock-names = "timclken1", "apb_pclk";
+ };
+
+ rtc@170000 {
+ compatible = "arm,pl031", "arm,primecell";
+ reg = <0x170000 0x10000>;
+ interrupts = <0>;
+ clocks = <&soc_smc50mhz>;
+ clock-names = "apb_pclk";
+ };
+ };
+ };
+ };
+};
diff --git a/arch/arm64/boot/dts/rtsm_ve-aemv8a.dts b/arch/arm64/boot/dts/rtsm_ve-aemv8a.dts
index 572005ea2217..f76eb9024a32 100644
--- a/arch/arm64/boot/dts/rtsm_ve-aemv8a.dts
+++ b/arch/arm64/boot/dts/rtsm_ve-aemv8a.dts
@@ -157,3 +157,5 @@
/include/ "rtsm_ve-motherboard.dtsi"
};
};
+
+/include/ "clcd-panels.dtsi"
diff --git a/arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi b/arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi
index 2f2ecd217363..b683d4703582 100644
--- a/arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi
+++ b/arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi
@@ -182,6 +182,9 @@
interrupts = <14>;
clocks = <&v2m_oscclk1>, <&v2m_clk24mhz>;
clock-names = "clcdclk", "apb_pclk";
+ mode = "XVGA";
+ use_dma = <0>;
+ framebuffer = <0x18000000 0x00180000>;
};
virtio_block@0130000 {
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
new file mode 100644
index 000000000000..5562652c5316
--- /dev/null
+++ b/arch/arm64/crypto/Kconfig
@@ -0,0 +1,53 @@
+
+menuconfig ARM64_CRYPTO
+ bool "ARM64 Accelerated Cryptographic Algorithms"
+ depends on ARM64
+ help
+ Say Y here to choose from a selection of cryptographic algorithms
+ implemented using ARM64 specific CPU features or instructions.
+
+if ARM64_CRYPTO
+
+config CRYPTO_SHA1_ARM64_CE
+ tristate "SHA-1 digest algorithm (ARMv8 Crypto Extensions)"
+ depends on ARM64 && KERNEL_MODE_NEON
+ select CRYPTO_HASH
+
+config CRYPTO_SHA2_ARM64_CE
+ tristate "SHA-224/SHA-256 digest algorithm (ARMv8 Crypto Extensions)"
+ depends on ARM64 && KERNEL_MODE_NEON
+ select CRYPTO_HASH
+
+config CRYPTO_GHASH_ARM64_CE
+ tristate "GHASH (for GCM chaining mode) using ARMv8 Crypto Extensions"
+ depends on ARM64 && KERNEL_MODE_NEON
+ select CRYPTO_HASH
+
+config CRYPTO_AES_ARM64_CE
+ tristate "AES core cipher using ARMv8 Crypto Extensions"
+ depends on ARM64 && KERNEL_MODE_NEON
+ select CRYPTO_ALGAPI
+ select CRYPTO_AES
+
+config CRYPTO_AES_ARM64_CE_CCM
+ tristate "AES in CCM mode using ARMv8 Crypto Extensions"
+ depends on ARM64 && KERNEL_MODE_NEON
+ select CRYPTO_ALGAPI
+ select CRYPTO_AES
+ select CRYPTO_AEAD
+
+config CRYPTO_AES_ARM64_CE_BLK
+ tristate "AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions"
+ depends on ARM64 && KERNEL_MODE_NEON
+ select CRYPTO_BLKCIPHER
+ select CRYPTO_AES
+ select CRYPTO_ABLK_HELPER
+
+config CRYPTO_AES_ARM64_NEON_BLK
+ tristate "AES in ECB/CBC/CTR/XTS modes using NEON instructions"
+ depends on ARM64 && KERNEL_MODE_NEON
+ select CRYPTO_BLKCIPHER
+ select CRYPTO_AES
+ select CRYPTO_ABLK_HELPER
+
+endif
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
new file mode 100644
index 000000000000..2070a56ecc46
--- /dev/null
+++ b/arch/arm64/crypto/Makefile
@@ -0,0 +1,38 @@
+#
+# linux/arch/arm64/crypto/Makefile
+#
+# Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+#
+
+obj-$(CONFIG_CRYPTO_SHA1_ARM64_CE) += sha1-ce.o
+sha1-ce-y := sha1-ce-glue.o sha1-ce-core.o
+
+obj-$(CONFIG_CRYPTO_SHA2_ARM64_CE) += sha2-ce.o
+sha2-ce-y := sha2-ce-glue.o sha2-ce-core.o
+
+obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o
+ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o
+
+obj-$(CONFIG_CRYPTO_AES_ARM64_CE) += aes-ce-cipher.o
+CFLAGS_aes-ce-cipher.o += -march=armv8-a+crypto
+
+obj-$(CONFIG_CRYPTO_AES_ARM64_CE_CCM) += aes-ce-ccm.o
+aes-ce-ccm-y := aes-ce-ccm-glue.o aes-ce-ccm-core.o
+
+obj-$(CONFIG_CRYPTO_AES_ARM64_CE_BLK) += aes-ce-blk.o
+aes-ce-blk-y := aes-glue-ce.o aes-ce.o
+
+obj-$(CONFIG_CRYPTO_AES_ARM64_NEON_BLK) += aes-neon-blk.o
+aes-neon-blk-y := aes-glue-neon.o aes-neon.o
+
+AFLAGS_aes-ce.o := -DINTERLEAVE=2 -DINTERLEAVE_INLINE
+AFLAGS_aes-neon.o := -DINTERLEAVE=4
+
+CFLAGS_aes-glue-ce.o := -DUSE_V8_CRYPTO_EXTENSIONS
+
+$(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE
+ $(call if_changed_dep,cc_o_c)
diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S
new file mode 100644
index 000000000000..432e4841cd81
--- /dev/null
+++ b/arch/arm64/crypto/aes-ce-ccm-core.S
@@ -0,0 +1,222 @@
+/*
+ * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
+ *
+ * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+
+ .text
+ .arch armv8-a+crypto
+
+ /*
+ * void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
+ * u32 *macp, u8 const rk[], u32 rounds);
+ */
+ENTRY(ce_aes_ccm_auth_data)
+ ldr w8, [x3] /* leftover from prev round? */
+ ld1 {v0.2d}, [x0] /* load mac */
+ cbz w8, 1f
+ sub w8, w8, #16
+ eor v1.16b, v1.16b, v1.16b
+0: ldrb w7, [x1], #1 /* get 1 byte of input */
+ subs w2, w2, #1
+ add w8, w8, #1
+ ins v1.b[0], w7
+ ext v1.16b, v1.16b, v1.16b, #1 /* rotate in the input bytes */
+ beq 8f /* out of input? */
+ cbnz w8, 0b
+ eor v0.16b, v0.16b, v1.16b
+1: ld1 {v3.2d}, [x4] /* load first round key */
+ prfm pldl1strm, [x1]
+ cmp w5, #12 /* which key size? */
+ add x6, x4, #16
+ sub w7, w5, #2 /* modified # of rounds */
+ bmi 2f
+ bne 5f
+ mov v5.16b, v3.16b
+ b 4f
+2: mov v4.16b, v3.16b
+ ld1 {v5.2d}, [x6], #16 /* load 2nd round key */
+3: aese v0.16b, v4.16b
+ aesmc v0.16b, v0.16b
+4: ld1 {v3.2d}, [x6], #16 /* load next round key */
+ aese v0.16b, v5.16b
+ aesmc v0.16b, v0.16b
+5: ld1 {v4.2d}, [x6], #16 /* load next round key */
+ subs w7, w7, #3
+ aese v0.16b, v3.16b
+ aesmc v0.16b, v0.16b
+ ld1 {v5.2d}, [x6], #16 /* load next round key */
+ bpl 3b
+ aese v0.16b, v4.16b
+ subs w2, w2, #16 /* last data? */
+ eor v0.16b, v0.16b, v5.16b /* final round */
+ bmi 6f
+ ld1 {v1.16b}, [x1], #16 /* load next input block */
+ eor v0.16b, v0.16b, v1.16b /* xor with mac */
+ bne 1b
+6: st1 {v0.2d}, [x0] /* store mac */
+ beq 10f
+ adds w2, w2, #16
+ beq 10f
+ mov w8, w2
+7: ldrb w7, [x1], #1
+ umov w6, v0.b[0]
+ eor w6, w6, w7
+ strb w6, [x0], #1
+ subs w2, w2, #1
+ beq 10f
+ ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */
+ b 7b
+8: mov w7, w8
+ add w8, w8, #16
+9: ext v1.16b, v1.16b, v1.16b, #1
+ adds w7, w7, #1
+ bne 9b
+ eor v0.16b, v0.16b, v1.16b
+ st1 {v0.2d}, [x0]
+10: str w8, [x3]
+ ret
+ENDPROC(ce_aes_ccm_auth_data)
+
+ /*
+ * void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[],
+ * u32 rounds);
+ */
+ENTRY(ce_aes_ccm_final)
+ ld1 {v3.2d}, [x2], #16 /* load first round key */
+ ld1 {v0.2d}, [x0] /* load mac */
+ cmp w3, #12 /* which key size? */
+ sub w3, w3, #2 /* modified # of rounds */
+ ld1 {v1.2d}, [x1] /* load 1st ctriv */
+ bmi 0f
+ bne 3f
+ mov v5.16b, v3.16b
+ b 2f
+0: mov v4.16b, v3.16b
+1: ld1 {v5.2d}, [x2], #16 /* load next round key */
+ aese v0.16b, v4.16b
+ aese v1.16b, v4.16b
+ aesmc v0.16b, v0.16b
+ aesmc v1.16b, v1.16b
+2: ld1 {v3.2d}, [x2], #16 /* load next round key */
+ aese v0.16b, v5.16b
+ aese v1.16b, v5.16b
+ aesmc v0.16b, v0.16b
+ aesmc v1.16b, v1.16b
+3: ld1 {v4.2d}, [x2], #16 /* load next round key */
+ subs w3, w3, #3
+ aese v0.16b, v3.16b
+ aese v1.16b, v3.16b
+ aesmc v0.16b, v0.16b
+ aesmc v1.16b, v1.16b
+ bpl 1b
+ aese v0.16b, v4.16b
+ aese v1.16b, v4.16b
+ /* final round key cancels out */
+ eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */
+ st1 {v0.2d}, [x0] /* store result */
+ ret
+ENDPROC(ce_aes_ccm_final)
+
+ .macro aes_ccm_do_crypt,enc
+ ldr x8, [x6, #8] /* load lower ctr */
+ ld1 {v0.2d}, [x5] /* load mac */
+ rev x8, x8 /* keep swabbed ctr in reg */
+0: /* outer loop */
+ ld1 {v1.1d}, [x6] /* load upper ctr */
+ prfm pldl1strm, [x1]
+ add x8, x8, #1
+ rev x9, x8
+ cmp w4, #12 /* which key size? */
+ sub w7, w4, #2 /* get modified # of rounds */
+ ins v1.d[1], x9 /* no carry in lower ctr */
+ ld1 {v3.2d}, [x3] /* load first round key */
+ add x10, x3, #16
+ bmi 1f
+ bne 4f
+ mov v5.16b, v3.16b
+ b 3f
+1: mov v4.16b, v3.16b
+ ld1 {v5.2d}, [x10], #16 /* load 2nd round key */
+2: /* inner loop: 3 rounds, 2x interleaved */
+ aese v0.16b, v4.16b
+ aese v1.16b, v4.16b
+ aesmc v0.16b, v0.16b
+ aesmc v1.16b, v1.16b
+3: ld1 {v3.2d}, [x10], #16 /* load next round key */
+ aese v0.16b, v5.16b
+ aese v1.16b, v5.16b
+ aesmc v0.16b, v0.16b
+ aesmc v1.16b, v1.16b
+4: ld1 {v4.2d}, [x10], #16 /* load next round key */
+ subs w7, w7, #3
+ aese v0.16b, v3.16b
+ aese v1.16b, v3.16b
+ aesmc v0.16b, v0.16b
+ aesmc v1.16b, v1.16b
+ ld1 {v5.2d}, [x10], #16 /* load next round key */
+ bpl 2b
+ aese v0.16b, v4.16b
+ aese v1.16b, v4.16b
+ subs w2, w2, #16
+ bmi 6f /* partial block? */
+ ld1 {v2.16b}, [x1], #16 /* load next input block */
+ .if \enc == 1
+ eor v2.16b, v2.16b, v5.16b /* final round enc+mac */
+ eor v1.16b, v1.16b, v2.16b /* xor with crypted ctr */
+ .else
+ eor v2.16b, v2.16b, v1.16b /* xor with crypted ctr */
+ eor v1.16b, v2.16b, v5.16b /* final round enc */
+ .endif
+ eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */
+ st1 {v1.16b}, [x0], #16 /* write output block */
+ bne 0b
+ rev x8, x8
+ st1 {v0.2d}, [x5] /* store mac */
+ str x8, [x6, #8] /* store lsb end of ctr (BE) */
+5: ret
+
+6: eor v0.16b, v0.16b, v5.16b /* final round mac */
+ eor v1.16b, v1.16b, v5.16b /* final round enc */
+ st1 {v0.2d}, [x5] /* store mac */
+ add w2, w2, #16 /* process partial tail block */
+7: ldrb w9, [x1], #1 /* get 1 byte of input */
+ umov w6, v1.b[0] /* get top crypted ctr byte */
+ umov w7, v0.b[0] /* get top mac byte */
+ .if \enc == 1
+ eor w7, w7, w9
+ eor w9, w9, w6
+ .else
+ eor w9, w9, w6
+ eor w7, w7, w9
+ .endif
+ strb w9, [x0], #1 /* store out byte */
+ strb w7, [x5], #1 /* store mac byte */
+ subs w2, w2, #1
+ beq 5b
+ ext v0.16b, v0.16b, v0.16b, #1 /* shift out mac byte */
+ ext v1.16b, v1.16b, v1.16b, #1 /* shift out ctr byte */
+ b 7b
+ .endm
+
+ /*
+ * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
+ * u8 const rk[], u32 rounds, u8 mac[],
+ * u8 ctr[]);
+ * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
+ * u8 const rk[], u32 rounds, u8 mac[],
+ * u8 ctr[]);
+ */
+ENTRY(ce_aes_ccm_encrypt)
+ aes_ccm_do_crypt 1
+ENDPROC(ce_aes_ccm_encrypt)
+
+ENTRY(ce_aes_ccm_decrypt)
+ aes_ccm_do_crypt 0
+ENDPROC(ce_aes_ccm_decrypt)
diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c
new file mode 100644
index 000000000000..9e6cdde9b43d
--- /dev/null
+++ b/arch/arm64/crypto/aes-ce-ccm-glue.c
@@ -0,0 +1,297 @@
+/*
+ * aes-ccm-glue.c - AES-CCM transform for ARMv8 with Crypto Extensions
+ *
+ * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <asm/unaligned.h>
+#include <crypto/aes.h>
+#include <crypto/algapi.h>
+#include <crypto/scatterwalk.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+static int num_rounds(struct crypto_aes_ctx *ctx)
+{
+ /*
+ * # of rounds specified by AES:
+ * 128 bit key 10 rounds
+ * 192 bit key 12 rounds
+ * 256 bit key 14 rounds
+ * => n byte key => 6 + (n/4) rounds
+ */
+ return 6 + ctx->key_length / 4;
+}
+
+asmlinkage void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
+ u32 *macp, u32 const rk[], u32 rounds);
+
+asmlinkage void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
+ u32 const rk[], u32 rounds, u8 mac[],
+ u8 ctr[]);
+
+asmlinkage void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
+ u32 const rk[], u32 rounds, u8 mac[],
+ u8 ctr[]);
+
+asmlinkage void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u32 const rk[],
+ u32 rounds);
+
+static int ccm_setkey(struct crypto_aead *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ struct crypto_aes_ctx *ctx = crypto_aead_ctx(tfm);
+ int ret;
+
+ ret = crypto_aes_expand_key(ctx, in_key, key_len);
+ if (!ret)
+ return 0;
+
+ tfm->base.crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+ return -EINVAL;
+}
+
+static int ccm_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
+{
+ if ((authsize & 1) || authsize < 4)
+ return -EINVAL;
+ return 0;
+}
+
+static int ccm_init_mac(struct aead_request *req, u8 maciv[], u32 msglen)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ __be32 *n = (__be32 *)&maciv[AES_BLOCK_SIZE - 8];
+ u32 l = req->iv[0] + 1;
+
+ /* verify that CCM dimension 'L' is set correctly in the IV */
+ if (l < 2 || l > 8)
+ return -EINVAL;
+
+ /* verify that msglen can in fact be represented in L bytes */
+ if (l < 4 && msglen >> (8 * l))
+ return -EOVERFLOW;
+
+ /*
+ * Even if the CCM spec allows L values of up to 8, the Linux cryptoapi
+ * uses a u32 type to represent msglen so the top 4 bytes are always 0.
+ */
+ n[0] = 0;
+ n[1] = cpu_to_be32(msglen);
+
+ memcpy(maciv, req->iv, AES_BLOCK_SIZE - l);
+
+ /*
+ * Meaning of byte 0 according to CCM spec (RFC 3610/NIST 800-38C)
+ * - bits 0..2 : max # of bytes required to represent msglen, minus 1
+ * (already set by caller)
+ * - bits 3..5 : size of auth tag (1 => 4 bytes, 2 => 6 bytes, etc)
+ * - bit 6 : indicates presence of authenticate-only data
+ */
+ maciv[0] |= (crypto_aead_authsize(aead) - 2) << 2;
+ if (req->assoclen)
+ maciv[0] |= 0x40;
+
+ memset(&req->iv[AES_BLOCK_SIZE - l], 0, l);
+ return 0;
+}
+
+static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
+ struct __packed { __be16 l; __be32 h; u16 len; } ltag;
+ struct scatter_walk walk;
+ u32 len = req->assoclen;
+ u32 macp = 0;
+
+ /* prepend the AAD with a length tag */
+ if (len < 0xff00) {
+ ltag.l = cpu_to_be16(len);
+ ltag.len = 2;
+ } else {
+ ltag.l = cpu_to_be16(0xfffe);
+ put_unaligned_be32(len, &ltag.h);
+ ltag.len = 6;
+ }
+
+ ce_aes_ccm_auth_data(mac, (u8 *)&ltag, ltag.len, &macp, ctx->key_enc,
+ num_rounds(ctx));
+ scatterwalk_start(&walk, req->assoc);
+
+ do {
+ u32 n = scatterwalk_clamp(&walk, len);
+ u8 *p;
+
+ if (!n) {
+ scatterwalk_start(&walk, sg_next(walk.sg));
+ n = scatterwalk_clamp(&walk, len);
+ }
+ p = scatterwalk_map(&walk);
+ ce_aes_ccm_auth_data(mac, p, n, &macp, ctx->key_enc,
+ num_rounds(ctx));
+ len -= n;
+
+ scatterwalk_unmap(p);
+ scatterwalk_advance(&walk, n);
+ scatterwalk_done(&walk, 0, len);
+ } while (len);
+}
+
+static int ccm_encrypt(struct aead_request *req)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
+ struct blkcipher_desc desc = { .info = req->iv };
+ struct blkcipher_walk walk;
+ u8 __aligned(8) mac[AES_BLOCK_SIZE];
+ u8 buf[AES_BLOCK_SIZE];
+ u32 len = req->cryptlen;
+ int err;
+
+ err = ccm_init_mac(req, mac, len);
+ if (err)
+ return err;
+
+ kernel_neon_begin_partial(6);
+
+ if (req->assoclen)
+ ccm_calculate_auth_mac(req, mac);
+
+ /* preserve the original iv for the final round */
+ memcpy(buf, req->iv, AES_BLOCK_SIZE);
+
+ blkcipher_walk_init(&walk, req->dst, req->src, len);
+ err = blkcipher_aead_walk_virt_block(&desc, &walk, aead,
+ AES_BLOCK_SIZE);
+
+ while (walk.nbytes) {
+ u32 tail = walk.nbytes % AES_BLOCK_SIZE;
+
+ if (walk.nbytes == len)
+ tail = 0;
+
+ ce_aes_ccm_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ walk.nbytes - tail, ctx->key_enc,
+ num_rounds(ctx), mac, walk.iv);
+
+ len -= walk.nbytes - tail;
+ err = blkcipher_walk_done(&desc, &walk, tail);
+ }
+ if (!err)
+ ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
+
+ kernel_neon_end();
+
+ if (err)
+ return err;
+
+ /* copy authtag to end of dst */
+ scatterwalk_map_and_copy(mac, req->dst, req->cryptlen,
+ crypto_aead_authsize(aead), 1);
+
+ return 0;
+}
+
+static int ccm_decrypt(struct aead_request *req)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
+ unsigned int authsize = crypto_aead_authsize(aead);
+ struct blkcipher_desc desc = { .info = req->iv };
+ struct blkcipher_walk walk;
+ u8 __aligned(8) mac[AES_BLOCK_SIZE];
+ u8 buf[AES_BLOCK_SIZE];
+ u32 len = req->cryptlen - authsize;
+ int err;
+
+ err = ccm_init_mac(req, mac, len);
+ if (err)
+ return err;
+
+ kernel_neon_begin_partial(6);
+
+ if (req->assoclen)
+ ccm_calculate_auth_mac(req, mac);
+
+ /* preserve the original iv for the final round */
+ memcpy(buf, req->iv, AES_BLOCK_SIZE);
+
+ blkcipher_walk_init(&walk, req->dst, req->src, len);
+ err = blkcipher_aead_walk_virt_block(&desc, &walk, aead,
+ AES_BLOCK_SIZE);
+
+ while (walk.nbytes) {
+ u32 tail = walk.nbytes % AES_BLOCK_SIZE;
+
+ if (walk.nbytes == len)
+ tail = 0;
+
+ ce_aes_ccm_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ walk.nbytes - tail, ctx->key_enc,
+ num_rounds(ctx), mac, walk.iv);
+
+ len -= walk.nbytes - tail;
+ err = blkcipher_walk_done(&desc, &walk, tail);
+ }
+ if (!err)
+ ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
+
+ kernel_neon_end();
+
+ if (err)
+ return err;
+
+ /* compare calculated auth tag with the stored one */
+ scatterwalk_map_and_copy(buf, req->src, req->cryptlen - authsize,
+ authsize, 0);
+
+ if (memcmp(mac, buf, authsize))
+ return -EBADMSG;
+ return 0;
+}
+
+static struct crypto_alg ccm_aes_alg = {
+ .cra_name = "ccm(aes)",
+ .cra_driver_name = "ccm-aes-ce",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_AEAD,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct crypto_aes_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_aead_type,
+ .cra_module = THIS_MODULE,
+ .cra_aead = {
+ .ivsize = AES_BLOCK_SIZE,
+ .maxauthsize = AES_BLOCK_SIZE,
+ .setkey = ccm_setkey,
+ .setauthsize = ccm_setauthsize,
+ .encrypt = ccm_encrypt,
+ .decrypt = ccm_decrypt,
+ }
+};
+
+static int __init aes_mod_init(void)
+{
+ if (!(elf_hwcap & HWCAP_AES))
+ return -ENODEV;
+ return crypto_register_alg(&ccm_aes_alg);
+}
+
+static void __exit aes_mod_exit(void)
+{
+ crypto_unregister_alg(&ccm_aes_alg);
+}
+
+module_init(aes_mod_init);
+module_exit(aes_mod_exit);
+
+MODULE_DESCRIPTION("Synchronous AES in CCM mode using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("ccm(aes)");
diff --git a/arch/arm64/crypto/aes-ce-cipher.c b/arch/arm64/crypto/aes-ce-cipher.c
new file mode 100644
index 000000000000..2075e1acae6b
--- /dev/null
+++ b/arch/arm64/crypto/aes-ce-cipher.c
@@ -0,0 +1,155 @@
+/*
+ * aes-ce-cipher.c - core AES cipher using ARMv8 Crypto Extensions
+ *
+ * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <crypto/aes.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+MODULE_DESCRIPTION("Synchronous AES cipher using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+struct aes_block {
+ u8 b[AES_BLOCK_SIZE];
+};
+
+static int num_rounds(struct crypto_aes_ctx *ctx)
+{
+ /*
+ * # of rounds specified by AES:
+ * 128 bit key 10 rounds
+ * 192 bit key 12 rounds
+ * 256 bit key 14 rounds
+ * => n byte key => 6 + (n/4) rounds
+ */
+ return 6 + ctx->key_length / 4;
+}
+
+static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
+{
+ struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct aes_block *out = (struct aes_block *)dst;
+ struct aes_block const *in = (struct aes_block *)src;
+ void *dummy0;
+ int dummy1;
+
+ kernel_neon_begin_partial(4);
+
+ __asm__(" ld1 {v0.16b}, %[in] ;"
+ " ld1 {v1.2d}, [%[key]], #16 ;"
+ " cmp %w[rounds], #10 ;"
+ " bmi 0f ;"
+ " bne 3f ;"
+ " mov v3.16b, v1.16b ;"
+ " b 2f ;"
+ "0: mov v2.16b, v1.16b ;"
+ " ld1 {v3.2d}, [%[key]], #16 ;"
+ "1: aese v0.16b, v2.16b ;"
+ " aesmc v0.16b, v0.16b ;"
+ "2: ld1 {v1.2d}, [%[key]], #16 ;"
+ " aese v0.16b, v3.16b ;"
+ " aesmc v0.16b, v0.16b ;"
+ "3: ld1 {v2.2d}, [%[key]], #16 ;"
+ " subs %w[rounds], %w[rounds], #3 ;"
+ " aese v0.16b, v1.16b ;"
+ " aesmc v0.16b, v0.16b ;"
+ " ld1 {v3.2d}, [%[key]], #16 ;"
+ " bpl 1b ;"
+ " aese v0.16b, v2.16b ;"
+ " eor v0.16b, v0.16b, v3.16b ;"
+ " st1 {v0.16b}, %[out] ;"
+
+ : [out] "=Q"(*out),
+ [key] "=r"(dummy0),
+ [rounds] "=r"(dummy1)
+ : [in] "Q"(*in),
+ "1"(ctx->key_enc),
+ "2"(num_rounds(ctx) - 2)
+ : "cc");
+
+ kernel_neon_end();
+}
+
+static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
+{
+ struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct aes_block *out = (struct aes_block *)dst;
+ struct aes_block const *in = (struct aes_block *)src;
+ void *dummy0;
+ int dummy1;
+
+ kernel_neon_begin_partial(4);
+
+ __asm__(" ld1 {v0.16b}, %[in] ;"
+ " ld1 {v1.2d}, [%[key]], #16 ;"
+ " cmp %w[rounds], #10 ;"
+ " bmi 0f ;"
+ " bne 3f ;"
+ " mov v3.16b, v1.16b ;"
+ " b 2f ;"
+ "0: mov v2.16b, v1.16b ;"
+ " ld1 {v3.2d}, [%[key]], #16 ;"
+ "1: aesd v0.16b, v2.16b ;"
+ " aesimc v0.16b, v0.16b ;"
+ "2: ld1 {v1.2d}, [%[key]], #16 ;"
+ " aesd v0.16b, v3.16b ;"
+ " aesimc v0.16b, v0.16b ;"
+ "3: ld1 {v2.2d}, [%[key]], #16 ;"
+ " subs %w[rounds], %w[rounds], #3 ;"
+ " aesd v0.16b, v1.16b ;"
+ " aesimc v0.16b, v0.16b ;"
+ " ld1 {v3.2d}, [%[key]], #16 ;"
+ " bpl 1b ;"
+ " aesd v0.16b, v2.16b ;"
+ " eor v0.16b, v0.16b, v3.16b ;"
+ " st1 {v0.16b}, %[out] ;"
+
+ : [out] "=Q"(*out),
+ [key] "=r"(dummy0),
+ [rounds] "=r"(dummy1)
+ : [in] "Q"(*in),
+ "1"(ctx->key_dec),
+ "2"(num_rounds(ctx) - 2)
+ : "cc");
+
+ kernel_neon_end();
+}
+
+static struct crypto_alg aes_alg = {
+ .cra_name = "aes",
+ .cra_driver_name = "aes-ce",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct crypto_aes_ctx),
+ .cra_module = THIS_MODULE,
+ .cra_cipher = {
+ .cia_min_keysize = AES_MIN_KEY_SIZE,
+ .cia_max_keysize = AES_MAX_KEY_SIZE,
+ .cia_setkey = crypto_aes_set_key,
+ .cia_encrypt = aes_cipher_encrypt,
+ .cia_decrypt = aes_cipher_decrypt
+ }
+};
+
+static int __init aes_mod_init(void)
+{
+ return crypto_register_alg(&aes_alg);
+}
+
+static void __exit aes_mod_exit(void)
+{
+ crypto_unregister_alg(&aes_alg);
+}
+
+module_cpu_feature_match(AES, aes_mod_init);
+module_exit(aes_mod_exit);
diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S
new file mode 100644
index 000000000000..685a18f731eb
--- /dev/null
+++ b/arch/arm64/crypto/aes-ce.S
@@ -0,0 +1,133 @@
+/*
+ * linux/arch/arm64/crypto/aes-ce.S - AES cipher for ARMv8 with
+ * Crypto Extensions
+ *
+ * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+
+#define AES_ENTRY(func) ENTRY(ce_ ## func)
+#define AES_ENDPROC(func) ENDPROC(ce_ ## func)
+
+ .arch armv8-a+crypto
+
+ /* preload all round keys */
+ .macro load_round_keys, rounds, rk
+ cmp \rounds, #12
+ blo 2222f /* 128 bits */
+ beq 1111f /* 192 bits */
+ ld1 {v17.16b-v18.16b}, [\rk], #32
+1111: ld1 {v19.16b-v20.16b}, [\rk], #32
+2222: ld1 {v21.16b-v24.16b}, [\rk], #64
+ ld1 {v25.16b-v28.16b}, [\rk], #64
+ ld1 {v29.16b-v31.16b}, [\rk]
+ .endm
+
+ /* prepare for encryption with key in rk[] */
+ .macro enc_prepare, rounds, rk, ignore
+ load_round_keys \rounds, \rk
+ .endm
+
+ /* prepare for encryption (again) but with new key in rk[] */
+ .macro enc_switch_key, rounds, rk, ignore
+ load_round_keys \rounds, \rk
+ .endm
+
+ /* prepare for decryption with key in rk[] */
+ .macro dec_prepare, rounds, rk, ignore
+ load_round_keys \rounds, \rk
+ .endm
+
+ .macro do_enc_Nx, de, mc, k, i0, i1, i2, i3
+ aes\de \i0\().16b, \k\().16b
+ .ifnb \i1
+ aes\de \i1\().16b, \k\().16b
+ .ifnb \i3
+ aes\de \i2\().16b, \k\().16b
+ aes\de \i3\().16b, \k\().16b
+ .endif
+ .endif
+ aes\mc \i0\().16b, \i0\().16b
+ .ifnb \i1
+ aes\mc \i1\().16b, \i1\().16b
+ .ifnb \i3
+ aes\mc \i2\().16b, \i2\().16b
+ aes\mc \i3\().16b, \i3\().16b
+ .endif
+ .endif
+ .endm
+
+ /* up to 4 interleaved encryption rounds with the same round key */
+ .macro round_Nx, enc, k, i0, i1, i2, i3
+ .ifc \enc, e
+ do_enc_Nx e, mc, \k, \i0, \i1, \i2, \i3
+ .else
+ do_enc_Nx d, imc, \k, \i0, \i1, \i2, \i3
+ .endif
+ .endm
+
+ /* up to 4 interleaved final rounds */
+ .macro fin_round_Nx, de, k, k2, i0, i1, i2, i3
+ aes\de \i0\().16b, \k\().16b
+ .ifnb \i1
+ aes\de \i1\().16b, \k\().16b
+ .ifnb \i3
+ aes\de \i2\().16b, \k\().16b
+ aes\de \i3\().16b, \k\().16b
+ .endif
+ .endif
+ eor \i0\().16b, \i0\().16b, \k2\().16b
+ .ifnb \i1
+ eor \i1\().16b, \i1\().16b, \k2\().16b
+ .ifnb \i3
+ eor \i2\().16b, \i2\().16b, \k2\().16b
+ eor \i3\().16b, \i3\().16b, \k2\().16b
+ .endif
+ .endif
+ .endm
+
+ /* up to 4 interleaved blocks */
+ .macro do_block_Nx, enc, rounds, i0, i1, i2, i3
+ cmp \rounds, #12
+ blo 2222f /* 128 bits */
+ beq 1111f /* 192 bits */
+ round_Nx \enc, v17, \i0, \i1, \i2, \i3
+ round_Nx \enc, v18, \i0, \i1, \i2, \i3
+1111: round_Nx \enc, v19, \i0, \i1, \i2, \i3
+ round_Nx \enc, v20, \i0, \i1, \i2, \i3
+2222: .irp key, v21, v22, v23, v24, v25, v26, v27, v28, v29
+ round_Nx \enc, \key, \i0, \i1, \i2, \i3
+ .endr
+ fin_round_Nx \enc, v30, v31, \i0, \i1, \i2, \i3
+ .endm
+
+ .macro encrypt_block, in, rounds, t0, t1, t2
+ do_block_Nx e, \rounds, \in
+ .endm
+
+ .macro encrypt_block2x, i0, i1, rounds, t0, t1, t2
+ do_block_Nx e, \rounds, \i0, \i1
+ .endm
+
+ .macro encrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2
+ do_block_Nx e, \rounds, \i0, \i1, \i2, \i3
+ .endm
+
+ .macro decrypt_block, in, rounds, t0, t1, t2
+ do_block_Nx d, \rounds, \in
+ .endm
+
+ .macro decrypt_block2x, i0, i1, rounds, t0, t1, t2
+ do_block_Nx d, \rounds, \i0, \i1
+ .endm
+
+ .macro decrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2
+ do_block_Nx d, \rounds, \i0, \i1, \i2, \i3
+ .endm
+
+#include "aes-modes.S"
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
new file mode 100644
index 000000000000..60f2f4c12256
--- /dev/null
+++ b/arch/arm64/crypto/aes-glue.c
@@ -0,0 +1,446 @@
+/*
+ * linux/arch/arm64/crypto/aes-glue.c - wrapper code for ARMv8 AES
+ *
+ * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <asm/hwcap.h>
+#include <crypto/aes.h>
+#include <crypto/ablk_helper.h>
+#include <crypto/algapi.h>
+#include <linux/module.h>
+#include <linux/cpufeature.h>
+
+#ifdef USE_V8_CRYPTO_EXTENSIONS
+#define MODE "ce"
+#define PRIO 300
+#define aes_ecb_encrypt ce_aes_ecb_encrypt
+#define aes_ecb_decrypt ce_aes_ecb_decrypt
+#define aes_cbc_encrypt ce_aes_cbc_encrypt
+#define aes_cbc_decrypt ce_aes_cbc_decrypt
+#define aes_ctr_encrypt ce_aes_ctr_encrypt
+#define aes_xts_encrypt ce_aes_xts_encrypt
+#define aes_xts_decrypt ce_aes_xts_decrypt
+MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
+#else
+#define MODE "neon"
+#define PRIO 200
+#define aes_ecb_encrypt neon_aes_ecb_encrypt
+#define aes_ecb_decrypt neon_aes_ecb_decrypt
+#define aes_cbc_encrypt neon_aes_cbc_encrypt
+#define aes_cbc_decrypt neon_aes_cbc_decrypt
+#define aes_ctr_encrypt neon_aes_ctr_encrypt
+#define aes_xts_encrypt neon_aes_xts_encrypt
+#define aes_xts_decrypt neon_aes_xts_decrypt
+MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 NEON");
+MODULE_ALIAS("ecb(aes)");
+MODULE_ALIAS("cbc(aes)");
+MODULE_ALIAS("ctr(aes)");
+MODULE_ALIAS("xts(aes)");
+#endif
+
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+/* defined in aes-modes.S */
+asmlinkage void aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[],
+ int rounds, int blocks, int first);
+asmlinkage void aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[],
+ int rounds, int blocks, int first);
+
+asmlinkage void aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[],
+ int rounds, int blocks, u8 iv[], int first);
+asmlinkage void aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
+ int rounds, int blocks, u8 iv[], int first);
+
+asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
+ int rounds, int blocks, u8 ctr[], int first);
+
+asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[],
+ int rounds, int blocks, u8 const rk2[], u8 iv[],
+ int first);
+asmlinkage void aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[],
+ int rounds, int blocks, u8 const rk2[], u8 iv[],
+ int first);
+
+struct crypto_aes_xts_ctx {
+ struct crypto_aes_ctx key1;
+ struct crypto_aes_ctx __aligned(8) key2;
+};
+
+static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ struct crypto_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+ int ret;
+
+ ret = crypto_aes_expand_key(&ctx->key1, in_key, key_len / 2);
+ if (!ret)
+ ret = crypto_aes_expand_key(&ctx->key2, &in_key[key_len / 2],
+ key_len / 2);
+ if (!ret)
+ return 0;
+
+ tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+ return -EINVAL;
+}
+
+static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ int err, first, rounds = 6 + ctx->key_length / 4;
+ struct blkcipher_walk walk;
+ unsigned int blocks;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ kernel_neon_begin();
+ for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+ aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ (u8 *)ctx->key_enc, rounds, blocks, first);
+ err = blkcipher_walk_done(desc, &walk, 0);
+ }
+ kernel_neon_end();
+ return err;
+}
+
+static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ int err, first, rounds = 6 + ctx->key_length / 4;
+ struct blkcipher_walk walk;
+ unsigned int blocks;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ kernel_neon_begin();
+ for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+ aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ (u8 *)ctx->key_dec, rounds, blocks, first);
+ err = blkcipher_walk_done(desc, &walk, 0);
+ }
+ kernel_neon_end();
+ return err;
+}
+
+static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ int err, first, rounds = 6 + ctx->key_length / 4;
+ struct blkcipher_walk walk;
+ unsigned int blocks;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ kernel_neon_begin();
+ for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+ aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ (u8 *)ctx->key_enc, rounds, blocks, walk.iv,
+ first);
+ err = blkcipher_walk_done(desc, &walk, 0);
+ }
+ kernel_neon_end();
+ return err;
+}
+
+static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ int err, first, rounds = 6 + ctx->key_length / 4;
+ struct blkcipher_walk walk;
+ unsigned int blocks;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ kernel_neon_begin();
+ for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+ aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ (u8 *)ctx->key_dec, rounds, blocks, walk.iv,
+ first);
+ err = blkcipher_walk_done(desc, &walk, 0);
+ }
+ kernel_neon_end();
+ return err;
+}
+
+static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ int err, first, rounds = 6 + ctx->key_length / 4;
+ struct blkcipher_walk walk;
+ int blocks;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
+
+ first = 1;
+ kernel_neon_begin();
+ while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+ aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ (u8 *)ctx->key_enc, rounds, blocks, walk.iv,
+ first);
+ first = 0;
+ nbytes -= blocks * AES_BLOCK_SIZE;
+ if (nbytes && nbytes == walk.nbytes % AES_BLOCK_SIZE)
+ break;
+ err = blkcipher_walk_done(desc, &walk,
+ walk.nbytes % AES_BLOCK_SIZE);
+ }
+ if (nbytes) {
+ u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
+ u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
+ u8 __aligned(8) tail[AES_BLOCK_SIZE];
+
+ /*
+ * Minimum alignment is 8 bytes, so if nbytes is <= 8, we need
+ * to tell aes_ctr_encrypt() to only read half a block.
+ */
+ blocks = (nbytes <= 8) ? -1 : 1;
+
+ aes_ctr_encrypt(tail, tsrc, (u8 *)ctx->key_enc, rounds,
+ blocks, walk.iv, first);
+ memcpy(tdst, tail, nbytes);
+ err = blkcipher_walk_done(desc, &walk, 0);
+ }
+ kernel_neon_end();
+
+ return err;
+}
+
+static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ int err, first, rounds = 6 + ctx->key1.key_length / 4;
+ struct blkcipher_walk walk;
+ unsigned int blocks;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ kernel_neon_begin();
+ for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+ aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ (u8 *)ctx->key1.key_enc, rounds, blocks,
+ (u8 *)ctx->key2.key_enc, walk.iv, first);
+ err = blkcipher_walk_done(desc, &walk, 0);
+ }
+ kernel_neon_end();
+
+ return err;
+}
+
+static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ int err, first, rounds = 6 + ctx->key1.key_length / 4;
+ struct blkcipher_walk walk;
+ unsigned int blocks;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ kernel_neon_begin();
+ for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+ aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ (u8 *)ctx->key1.key_dec, rounds, blocks,
+ (u8 *)ctx->key2.key_enc, walk.iv, first);
+ err = blkcipher_walk_done(desc, &walk, 0);
+ }
+ kernel_neon_end();
+
+ return err;
+}
+
+static struct crypto_alg aes_algs[] = { {
+ .cra_name = "__ecb-aes-" MODE,
+ .cra_driver_name = "__driver-ecb-aes-" MODE,
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct crypto_aes_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_blkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = crypto_aes_set_key,
+ .encrypt = ecb_encrypt,
+ .decrypt = ecb_decrypt,
+ },
+}, {
+ .cra_name = "__cbc-aes-" MODE,
+ .cra_driver_name = "__driver-cbc-aes-" MODE,
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct crypto_aes_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_blkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = crypto_aes_set_key,
+ .encrypt = cbc_encrypt,
+ .decrypt = cbc_decrypt,
+ },
+}, {
+ .cra_name = "__ctr-aes-" MODE,
+ .cra_driver_name = "__driver-ctr-aes-" MODE,
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct crypto_aes_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_blkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = crypto_aes_set_key,
+ .encrypt = ctr_encrypt,
+ .decrypt = ctr_encrypt,
+ },
+}, {
+ .cra_name = "__xts-aes-" MODE,
+ .cra_driver_name = "__driver-xts-aes-" MODE,
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct crypto_aes_xts_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_blkcipher = {
+ .min_keysize = 2 * AES_MIN_KEY_SIZE,
+ .max_keysize = 2 * AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = xts_set_key,
+ .encrypt = xts_encrypt,
+ .decrypt = xts_decrypt,
+ },
+}, {
+ .cra_name = "ecb(aes)",
+ .cra_driver_name = "ecb-aes-" MODE,
+ .cra_priority = PRIO,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct async_helper_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_init = ablk_init,
+ .cra_exit = ablk_exit,
+ .cra_ablkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = ablk_encrypt,
+ .decrypt = ablk_decrypt,
+ }
+}, {
+ .cra_name = "cbc(aes)",
+ .cra_driver_name = "cbc-aes-" MODE,
+ .cra_priority = PRIO,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct async_helper_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_init = ablk_init,
+ .cra_exit = ablk_exit,
+ .cra_ablkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = ablk_encrypt,
+ .decrypt = ablk_decrypt,
+ }
+}, {
+ .cra_name = "ctr(aes)",
+ .cra_driver_name = "ctr-aes-" MODE,
+ .cra_priority = PRIO,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct async_helper_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_init = ablk_init,
+ .cra_exit = ablk_exit,
+ .cra_ablkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = ablk_encrypt,
+ .decrypt = ablk_decrypt,
+ }
+}, {
+ .cra_name = "xts(aes)",
+ .cra_driver_name = "xts-aes-" MODE,
+ .cra_priority = PRIO,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct async_helper_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_init = ablk_init,
+ .cra_exit = ablk_exit,
+ .cra_ablkcipher = {
+ .min_keysize = 2 * AES_MIN_KEY_SIZE,
+ .max_keysize = 2 * AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = ablk_encrypt,
+ .decrypt = ablk_decrypt,
+ }
+} };
+
+static int __init aes_init(void)
+{
+ return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs));
+}
+
+static void __exit aes_exit(void)
+{
+ crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs));
+}
+
+#ifdef USE_V8_CRYPTO_EXTENSIONS
+module_cpu_feature_match(AES, aes_init);
+#else
+module_init(aes_init);
+#endif
+module_exit(aes_exit);
diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S
new file mode 100644
index 000000000000..f6e372c528eb
--- /dev/null
+++ b/arch/arm64/crypto/aes-modes.S
@@ -0,0 +1,532 @@
+/*
+ * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
+ *
+ * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* included by aes-ce.S and aes-neon.S */
+
+ .text
+ .align 4
+
+/*
+ * There are several ways to instantiate this code:
+ * - no interleave, all inline
+ * - 2-way interleave, 2x calls out of line (-DINTERLEAVE=2)
+ * - 2-way interleave, all inline (-DINTERLEAVE=2 -DINTERLEAVE_INLINE)
+ * - 4-way interleave, 4x calls out of line (-DINTERLEAVE=4)
+ * - 4-way interleave, all inline (-DINTERLEAVE=4 -DINTERLEAVE_INLINE)
+ *
+ * Macros imported by this code:
+ * - enc_prepare - setup NEON registers for encryption
+ * - dec_prepare - setup NEON registers for decryption
+ * - enc_switch_key - change to new key after having prepared for encryption
+ * - encrypt_block - encrypt a single block
+ * - decrypt block - decrypt a single block
+ * - encrypt_block2x - encrypt 2 blocks in parallel (if INTERLEAVE == 2)
+ * - decrypt_block2x - decrypt 2 blocks in parallel (if INTERLEAVE == 2)
+ * - encrypt_block4x - encrypt 4 blocks in parallel (if INTERLEAVE == 4)
+ * - decrypt_block4x - decrypt 4 blocks in parallel (if INTERLEAVE == 4)
+ */
+
+#if defined(INTERLEAVE) && !defined(INTERLEAVE_INLINE)
+#define FRAME_PUSH stp x29, x30, [sp,#-16]! ; mov x29, sp
+#define FRAME_POP ldp x29, x30, [sp],#16
+
+#if INTERLEAVE == 2
+
+aes_encrypt_block2x:
+ encrypt_block2x v0, v1, w3, x2, x6, w7
+ ret
+ENDPROC(aes_encrypt_block2x)
+
+aes_decrypt_block2x:
+ decrypt_block2x v0, v1, w3, x2, x6, w7
+ ret
+ENDPROC(aes_decrypt_block2x)
+
+#elif INTERLEAVE == 4
+
+aes_encrypt_block4x:
+ encrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
+ ret
+ENDPROC(aes_encrypt_block4x)
+
+aes_decrypt_block4x:
+ decrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
+ ret
+ENDPROC(aes_decrypt_block4x)
+
+#else
+#error INTERLEAVE should equal 2 or 4
+#endif
+
+ .macro do_encrypt_block2x
+ bl aes_encrypt_block2x
+ .endm
+
+ .macro do_decrypt_block2x
+ bl aes_decrypt_block2x
+ .endm
+
+ .macro do_encrypt_block4x
+ bl aes_encrypt_block4x
+ .endm
+
+ .macro do_decrypt_block4x
+ bl aes_decrypt_block4x
+ .endm
+
+#else
+#define FRAME_PUSH
+#define FRAME_POP
+
+ .macro do_encrypt_block2x
+ encrypt_block2x v0, v1, w3, x2, x6, w7
+ .endm
+
+ .macro do_decrypt_block2x
+ decrypt_block2x v0, v1, w3, x2, x6, w7
+ .endm
+
+ .macro do_encrypt_block4x
+ encrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
+ .endm
+
+ .macro do_decrypt_block4x
+ decrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
+ .endm
+
+#endif
+
+ /*
+ * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+ * int blocks, int first)
+ * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+ * int blocks, int first)
+ */
+
+AES_ENTRY(aes_ecb_encrypt)
+ FRAME_PUSH
+ cbz w5, .LecbencloopNx
+
+ enc_prepare w3, x2, x5
+
+.LecbencloopNx:
+#if INTERLEAVE >= 2
+ subs w4, w4, #INTERLEAVE
+ bmi .Lecbenc1x
+#if INTERLEAVE == 2
+ ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 pt blocks */
+ do_encrypt_block2x
+ st1 {v0.16b-v1.16b}, [x0], #32
+#else
+ ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
+ do_encrypt_block4x
+ st1 {v0.16b-v3.16b}, [x0], #64
+#endif
+ b .LecbencloopNx
+.Lecbenc1x:
+ adds w4, w4, #INTERLEAVE
+ beq .Lecbencout
+#endif
+.Lecbencloop:
+ ld1 {v0.16b}, [x1], #16 /* get next pt block */
+ encrypt_block v0, w3, x2, x5, w6
+ st1 {v0.16b}, [x0], #16
+ subs w4, w4, #1
+ bne .Lecbencloop
+.Lecbencout:
+ FRAME_POP
+ ret
+AES_ENDPROC(aes_ecb_encrypt)
+
+
+AES_ENTRY(aes_ecb_decrypt)
+ FRAME_PUSH
+ cbz w5, .LecbdecloopNx
+
+ dec_prepare w3, x2, x5
+
+.LecbdecloopNx:
+#if INTERLEAVE >= 2
+ subs w4, w4, #INTERLEAVE
+ bmi .Lecbdec1x
+#if INTERLEAVE == 2
+ ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */
+ do_decrypt_block2x
+ st1 {v0.16b-v1.16b}, [x0], #32
+#else
+ ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
+ do_decrypt_block4x
+ st1 {v0.16b-v3.16b}, [x0], #64
+#endif
+ b .LecbdecloopNx
+.Lecbdec1x:
+ adds w4, w4, #INTERLEAVE
+ beq .Lecbdecout
+#endif
+.Lecbdecloop:
+ ld1 {v0.16b}, [x1], #16 /* get next ct block */
+ decrypt_block v0, w3, x2, x5, w6
+ st1 {v0.16b}, [x0], #16
+ subs w4, w4, #1
+ bne .Lecbdecloop
+.Lecbdecout:
+ FRAME_POP
+ ret
+AES_ENDPROC(aes_ecb_decrypt)
+
+
+ /*
+ * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+ * int blocks, u8 iv[], int first)
+ * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+ * int blocks, u8 iv[], int first)
+ */
+
+AES_ENTRY(aes_cbc_encrypt)
+ cbz w6, .Lcbcencloop
+
+ ld1 {v0.16b}, [x5] /* get iv */
+ enc_prepare w3, x2, x5
+
+.Lcbcencloop:
+ ld1 {v1.16b}, [x1], #16 /* get next pt block */
+ eor v0.16b, v0.16b, v1.16b /* ..and xor with iv */
+ encrypt_block v0, w3, x2, x5, w6
+ st1 {v0.16b}, [x0], #16
+ subs w4, w4, #1
+ bne .Lcbcencloop
+ ret
+AES_ENDPROC(aes_cbc_encrypt)
+
+
+AES_ENTRY(aes_cbc_decrypt)
+ FRAME_PUSH
+ cbz w6, .LcbcdecloopNx
+
+ ld1 {v7.16b}, [x5] /* get iv */
+ dec_prepare w3, x2, x5
+
+.LcbcdecloopNx:
+#if INTERLEAVE >= 2
+ subs w4, w4, #INTERLEAVE
+ bmi .Lcbcdec1x
+#if INTERLEAVE == 2
+ ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */
+ mov v2.16b, v0.16b
+ mov v3.16b, v1.16b
+ do_decrypt_block2x
+ eor v0.16b, v0.16b, v7.16b
+ eor v1.16b, v1.16b, v2.16b
+ mov v7.16b, v3.16b
+ st1 {v0.16b-v1.16b}, [x0], #32
+#else
+ ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
+ mov v4.16b, v0.16b
+ mov v5.16b, v1.16b
+ mov v6.16b, v2.16b
+ do_decrypt_block4x
+ sub x1, x1, #16
+ eor v0.16b, v0.16b, v7.16b
+ eor v1.16b, v1.16b, v4.16b
+ ld1 {v7.16b}, [x1], #16 /* reload 1 ct block */
+ eor v2.16b, v2.16b, v5.16b
+ eor v3.16b, v3.16b, v6.16b
+ st1 {v0.16b-v3.16b}, [x0], #64
+#endif
+ b .LcbcdecloopNx
+.Lcbcdec1x:
+ adds w4, w4, #INTERLEAVE
+ beq .Lcbcdecout
+#endif
+.Lcbcdecloop:
+ ld1 {v1.16b}, [x1], #16 /* get next ct block */
+ mov v0.16b, v1.16b /* ...and copy to v0 */
+ decrypt_block v0, w3, x2, x5, w6
+ eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */
+ mov v7.16b, v1.16b /* ct is next iv */
+ st1 {v0.16b}, [x0], #16
+ subs w4, w4, #1
+ bne .Lcbcdecloop
+.Lcbcdecout:
+ FRAME_POP
+ ret
+AES_ENDPROC(aes_cbc_decrypt)
+
+
+ /*
+ * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+ * int blocks, u8 ctr[], int first)
+ */
+
+AES_ENTRY(aes_ctr_encrypt)
+ FRAME_PUSH
+ cbnz w6, .Lctrfirst /* 1st time around? */
+ umov x5, v4.d[1] /* keep swabbed ctr in reg */
+ rev x5, x5
+#if INTERLEAVE >= 2
+ cmn w5, w4 /* 32 bit overflow? */
+ bcs .Lctrinc
+ add x5, x5, #1 /* increment BE ctr */
+ b .LctrincNx
+#else
+ b .Lctrinc
+#endif
+.Lctrfirst:
+ enc_prepare w3, x2, x6
+ ld1 {v4.16b}, [x5]
+ umov x5, v4.d[1] /* keep swabbed ctr in reg */
+ rev x5, x5
+#if INTERLEAVE >= 2
+ cmn w5, w4 /* 32 bit overflow? */
+ bcs .Lctrloop
+.LctrloopNx:
+ subs w4, w4, #INTERLEAVE
+ bmi .Lctr1x
+#if INTERLEAVE == 2
+ mov v0.8b, v4.8b
+ mov v1.8b, v4.8b
+ rev x7, x5
+ add x5, x5, #1
+ ins v0.d[1], x7
+ rev x7, x5
+ add x5, x5, #1
+ ins v1.d[1], x7
+ ld1 {v2.16b-v3.16b}, [x1], #32 /* get 2 input blocks */
+ do_encrypt_block2x
+ eor v0.16b, v0.16b, v2.16b
+ eor v1.16b, v1.16b, v3.16b
+ st1 {v0.16b-v1.16b}, [x0], #32
+#else
+ ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */
+ dup v7.4s, w5
+ mov v0.16b, v4.16b
+ add v7.4s, v7.4s, v8.4s
+ mov v1.16b, v4.16b
+ rev32 v8.16b, v7.16b
+ mov v2.16b, v4.16b
+ mov v3.16b, v4.16b
+ mov v1.s[3], v8.s[0]
+ mov v2.s[3], v8.s[1]
+ mov v3.s[3], v8.s[2]
+ ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */
+ do_encrypt_block4x
+ eor v0.16b, v5.16b, v0.16b
+ ld1 {v5.16b}, [x1], #16 /* get 1 input block */
+ eor v1.16b, v6.16b, v1.16b
+ eor v2.16b, v7.16b, v2.16b
+ eor v3.16b, v5.16b, v3.16b
+ st1 {v0.16b-v3.16b}, [x0], #64
+ add x5, x5, #INTERLEAVE
+#endif
+ cbz w4, .LctroutNx
+.LctrincNx:
+ rev x7, x5
+ ins v4.d[1], x7
+ b .LctrloopNx
+.LctroutNx:
+ sub x5, x5, #1
+ rev x7, x5
+ ins v4.d[1], x7
+ b .Lctrout
+.Lctr1x:
+ adds w4, w4, #INTERLEAVE
+ beq .Lctrout
+#endif
+.Lctrloop:
+ mov v0.16b, v4.16b
+ encrypt_block v0, w3, x2, x6, w7
+ subs w4, w4, #1
+ bmi .Lctrhalfblock /* blocks < 0 means 1/2 block */
+ ld1 {v3.16b}, [x1], #16
+ eor v3.16b, v0.16b, v3.16b
+ st1 {v3.16b}, [x0], #16
+ beq .Lctrout
+.Lctrinc:
+ adds x5, x5, #1 /* increment BE ctr */
+ rev x7, x5
+ ins v4.d[1], x7
+ bcc .Lctrloop /* no overflow? */
+ umov x7, v4.d[0] /* load upper word of ctr */
+ rev x7, x7 /* ... to handle the carry */
+ add x7, x7, #1
+ rev x7, x7
+ ins v4.d[0], x7
+ b .Lctrloop
+.Lctrhalfblock:
+ ld1 {v3.8b}, [x1]
+ eor v3.8b, v0.8b, v3.8b
+ st1 {v3.8b}, [x0]
+.Lctrout:
+ FRAME_POP
+ ret
+AES_ENDPROC(aes_ctr_encrypt)
+ .ltorg
+
+
+ /*
+ * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
+ * int blocks, u8 const rk2[], u8 iv[], int first)
+ * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
+ * int blocks, u8 const rk2[], u8 iv[], int first)
+ */
+
+ .macro next_tweak, out, in, const, tmp
+ sshr \tmp\().2d, \in\().2d, #63
+ and \tmp\().16b, \tmp\().16b, \const\().16b
+ add \out\().2d, \in\().2d, \in\().2d
+ ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
+ eor \out\().16b, \out\().16b, \tmp\().16b
+ .endm
+
+.Lxts_mul_x:
+ .word 1, 0, 0x87, 0
+
+AES_ENTRY(aes_xts_encrypt)
+ FRAME_PUSH
+ cbz w7, .LxtsencloopNx
+
+ ld1 {v4.16b}, [x6]
+ enc_prepare w3, x5, x6
+ encrypt_block v4, w3, x5, x6, w7 /* first tweak */
+ enc_switch_key w3, x2, x6
+ ldr q7, .Lxts_mul_x
+ b .LxtsencNx
+
+.LxtsencloopNx:
+ ldr q7, .Lxts_mul_x
+ next_tweak v4, v4, v7, v8
+.LxtsencNx:
+#if INTERLEAVE >= 2
+ subs w4, w4, #INTERLEAVE
+ bmi .Lxtsenc1x
+#if INTERLEAVE == 2
+ ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 pt blocks */
+ next_tweak v5, v4, v7, v8
+ eor v0.16b, v0.16b, v4.16b
+ eor v1.16b, v1.16b, v5.16b
+ do_encrypt_block2x
+ eor v0.16b, v0.16b, v4.16b
+ eor v1.16b, v1.16b, v5.16b
+ st1 {v0.16b-v1.16b}, [x0], #32
+ cbz w4, .LxtsencoutNx
+ next_tweak v4, v5, v7, v8
+ b .LxtsencNx
+.LxtsencoutNx:
+ mov v4.16b, v5.16b
+ b .Lxtsencout
+#else
+ ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
+ next_tweak v5, v4, v7, v8
+ eor v0.16b, v0.16b, v4.16b
+ next_tweak v6, v5, v7, v8
+ eor v1.16b, v1.16b, v5.16b
+ eor v2.16b, v2.16b, v6.16b
+ next_tweak v7, v6, v7, v8
+ eor v3.16b, v3.16b, v7.16b
+ do_encrypt_block4x
+ eor v3.16b, v3.16b, v7.16b
+ eor v0.16b, v0.16b, v4.16b
+ eor v1.16b, v1.16b, v5.16b
+ eor v2.16b, v2.16b, v6.16b
+ st1 {v0.16b-v3.16b}, [x0], #64
+ mov v4.16b, v7.16b
+ cbz w4, .Lxtsencout
+ b .LxtsencloopNx
+#endif
+.Lxtsenc1x:
+ adds w4, w4, #INTERLEAVE
+ beq .Lxtsencout
+#endif
+.Lxtsencloop:
+ ld1 {v1.16b}, [x1], #16
+ eor v0.16b, v1.16b, v4.16b
+ encrypt_block v0, w3, x2, x6, w7
+ eor v0.16b, v0.16b, v4.16b
+ st1 {v0.16b}, [x0], #16
+ subs w4, w4, #1
+ beq .Lxtsencout
+ next_tweak v4, v4, v7, v8
+ b .Lxtsencloop
+.Lxtsencout:
+ FRAME_POP
+ ret
+AES_ENDPROC(aes_xts_encrypt)
+
+
+AES_ENTRY(aes_xts_decrypt)
+ FRAME_PUSH
+ cbz w7, .LxtsdecloopNx
+
+ ld1 {v4.16b}, [x6]
+ enc_prepare w3, x5, x6
+ encrypt_block v4, w3, x5, x6, w7 /* first tweak */
+ dec_prepare w3, x2, x6
+ ldr q7, .Lxts_mul_x
+ b .LxtsdecNx
+
+.LxtsdecloopNx:
+ ldr q7, .Lxts_mul_x
+ next_tweak v4, v4, v7, v8
+.LxtsdecNx:
+#if INTERLEAVE >= 2
+ subs w4, w4, #INTERLEAVE
+ bmi .Lxtsdec1x
+#if INTERLEAVE == 2
+ ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */
+ next_tweak v5, v4, v7, v8
+ eor v0.16b, v0.16b, v4.16b
+ eor v1.16b, v1.16b, v5.16b
+ do_decrypt_block2x
+ eor v0.16b, v0.16b, v4.16b
+ eor v1.16b, v1.16b, v5.16b
+ st1 {v0.16b-v1.16b}, [x0], #32
+ cbz w4, .LxtsdecoutNx
+ next_tweak v4, v5, v7, v8
+ b .LxtsdecNx
+.LxtsdecoutNx:
+ mov v4.16b, v5.16b
+ b .Lxtsdecout
+#else
+ ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
+ next_tweak v5, v4, v7, v8
+ eor v0.16b, v0.16b, v4.16b
+ next_tweak v6, v5, v7, v8
+ eor v1.16b, v1.16b, v5.16b
+ eor v2.16b, v2.16b, v6.16b
+ next_tweak v7, v6, v7, v8
+ eor v3.16b, v3.16b, v7.16b
+ do_decrypt_block4x
+ eor v3.16b, v3.16b, v7.16b
+ eor v0.16b, v0.16b, v4.16b
+ eor v1.16b, v1.16b, v5.16b
+ eor v2.16b, v2.16b, v6.16b
+ st1 {v0.16b-v3.16b}, [x0], #64
+ mov v4.16b, v7.16b
+ cbz w4, .Lxtsdecout
+ b .LxtsdecloopNx
+#endif
+.Lxtsdec1x:
+ adds w4, w4, #INTERLEAVE
+ beq .Lxtsdecout
+#endif
+.Lxtsdecloop:
+ ld1 {v1.16b}, [x1], #16
+ eor v0.16b, v1.16b, v4.16b
+ decrypt_block v0, w3, x2, x6, w7
+ eor v0.16b, v0.16b, v4.16b
+ st1 {v0.16b}, [x0], #16
+ subs w4, w4, #1
+ beq .Lxtsdecout
+ next_tweak v4, v4, v7, v8
+ b .Lxtsdecloop
+.Lxtsdecout:
+ FRAME_POP
+ ret
+AES_ENDPROC(aes_xts_decrypt)
diff --git a/arch/arm64/crypto/aes-neon.S b/arch/arm64/crypto/aes-neon.S
new file mode 100644
index 000000000000..b93170e1cc93
--- /dev/null
+++ b/arch/arm64/crypto/aes-neon.S
@@ -0,0 +1,382 @@
+/*
+ * linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON
+ *
+ * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+
+#define AES_ENTRY(func) ENTRY(neon_ ## func)
+#define AES_ENDPROC(func) ENDPROC(neon_ ## func)
+
+ /* multiply by polynomial 'x' in GF(2^8) */
+ .macro mul_by_x, out, in, temp, const
+ sshr \temp, \in, #7
+ add \out, \in, \in
+ and \temp, \temp, \const
+ eor \out, \out, \temp
+ .endm
+
+ /* preload the entire Sbox */
+ .macro prepare, sbox, shiftrows, temp
+ adr \temp, \sbox
+ movi v12.16b, #0x40
+ ldr q13, \shiftrows
+ movi v14.16b, #0x1b
+ ld1 {v16.16b-v19.16b}, [\temp], #64
+ ld1 {v20.16b-v23.16b}, [\temp], #64
+ ld1 {v24.16b-v27.16b}, [\temp], #64
+ ld1 {v28.16b-v31.16b}, [\temp]
+ .endm
+
+ /* do preload for encryption */
+ .macro enc_prepare, ignore0, ignore1, temp
+ prepare .LForward_Sbox, .LForward_ShiftRows, \temp
+ .endm
+
+ .macro enc_switch_key, ignore0, ignore1, temp
+ /* do nothing */
+ .endm
+
+ /* do preload for decryption */
+ .macro dec_prepare, ignore0, ignore1, temp
+ prepare .LReverse_Sbox, .LReverse_ShiftRows, \temp
+ .endm
+
+ /* apply SubBytes transformation using the the preloaded Sbox */
+ .macro sub_bytes, in
+ sub v9.16b, \in\().16b, v12.16b
+ tbl \in\().16b, {v16.16b-v19.16b}, \in\().16b
+ sub v10.16b, v9.16b, v12.16b
+ tbx \in\().16b, {v20.16b-v23.16b}, v9.16b
+ sub v11.16b, v10.16b, v12.16b
+ tbx \in\().16b, {v24.16b-v27.16b}, v10.16b
+ tbx \in\().16b, {v28.16b-v31.16b}, v11.16b
+ .endm
+
+ /* apply MixColumns transformation */
+ .macro mix_columns, in
+ mul_by_x v10.16b, \in\().16b, v9.16b, v14.16b
+ rev32 v8.8h, \in\().8h
+ eor \in\().16b, v10.16b, \in\().16b
+ shl v9.4s, v8.4s, #24
+ shl v11.4s, \in\().4s, #24
+ sri v9.4s, v8.4s, #8
+ sri v11.4s, \in\().4s, #8
+ eor v9.16b, v9.16b, v8.16b
+ eor v10.16b, v10.16b, v9.16b
+ eor \in\().16b, v10.16b, v11.16b
+ .endm
+
+ /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
+ .macro inv_mix_columns, in
+ mul_by_x v11.16b, \in\().16b, v10.16b, v14.16b
+ mul_by_x v11.16b, v11.16b, v10.16b, v14.16b
+ eor \in\().16b, \in\().16b, v11.16b
+ rev32 v11.8h, v11.8h
+ eor \in\().16b, \in\().16b, v11.16b
+ mix_columns \in
+ .endm
+
+ .macro do_block, enc, in, rounds, rk, rkp, i
+ ld1 {v15.16b}, [\rk]
+ add \rkp, \rk, #16
+ mov \i, \rounds
+1111: eor \in\().16b, \in\().16b, v15.16b /* ^round key */
+ tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */
+ sub_bytes \in
+ ld1 {v15.16b}, [\rkp], #16
+ subs \i, \i, #1
+ beq 2222f
+ .if \enc == 1
+ mix_columns \in
+ .else
+ inv_mix_columns \in
+ .endif
+ b 1111b
+2222: eor \in\().16b, \in\().16b, v15.16b /* ^round key */
+ .endm
+
+ .macro encrypt_block, in, rounds, rk, rkp, i
+ do_block 1, \in, \rounds, \rk, \rkp, \i
+ .endm
+
+ .macro decrypt_block, in, rounds, rk, rkp, i
+ do_block 0, \in, \rounds, \rk, \rkp, \i
+ .endm
+
+ /*
+ * Interleaved versions: functionally equivalent to the
+ * ones above, but applied to 2 or 4 AES states in parallel.
+ */
+
+ .macro sub_bytes_2x, in0, in1
+ sub v8.16b, \in0\().16b, v12.16b
+ sub v9.16b, \in1\().16b, v12.16b
+ tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
+ tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
+ sub v10.16b, v8.16b, v12.16b
+ sub v11.16b, v9.16b, v12.16b
+ tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b
+ tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b
+ sub v8.16b, v10.16b, v12.16b
+ sub v9.16b, v11.16b, v12.16b
+ tbx \in0\().16b, {v24.16b-v27.16b}, v10.16b
+ tbx \in1\().16b, {v24.16b-v27.16b}, v11.16b
+ tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b
+ tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b
+ .endm
+
+ .macro sub_bytes_4x, in0, in1, in2, in3
+ sub v8.16b, \in0\().16b, v12.16b
+ tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
+ sub v9.16b, \in1\().16b, v12.16b
+ tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
+ sub v10.16b, \in2\().16b, v12.16b
+ tbl \in2\().16b, {v16.16b-v19.16b}, \in2\().16b
+ sub v11.16b, \in3\().16b, v12.16b
+ tbl \in3\().16b, {v16.16b-v19.16b}, \in3\().16b
+ tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b
+ tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b
+ sub v8.16b, v8.16b, v12.16b
+ tbx \in2\().16b, {v20.16b-v23.16b}, v10.16b
+ sub v9.16b, v9.16b, v12.16b
+ tbx \in3\().16b, {v20.16b-v23.16b}, v11.16b
+ sub v10.16b, v10.16b, v12.16b
+ tbx \in0\().16b, {v24.16b-v27.16b}, v8.16b
+ sub v11.16b, v11.16b, v12.16b
+ tbx \in1\().16b, {v24.16b-v27.16b}, v9.16b
+ sub v8.16b, v8.16b, v12.16b
+ tbx \in2\().16b, {v24.16b-v27.16b}, v10.16b
+ sub v9.16b, v9.16b, v12.16b
+ tbx \in3\().16b, {v24.16b-v27.16b}, v11.16b
+ sub v10.16b, v10.16b, v12.16b
+ tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b
+ sub v11.16b, v11.16b, v12.16b
+ tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b
+ tbx \in2\().16b, {v28.16b-v31.16b}, v10.16b
+ tbx \in3\().16b, {v28.16b-v31.16b}, v11.16b
+ .endm
+
+ .macro mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const
+ sshr \tmp0\().16b, \in0\().16b, #7
+ add \out0\().16b, \in0\().16b, \in0\().16b
+ sshr \tmp1\().16b, \in1\().16b, #7
+ and \tmp0\().16b, \tmp0\().16b, \const\().16b
+ add \out1\().16b, \in1\().16b, \in1\().16b
+ and \tmp1\().16b, \tmp1\().16b, \const\().16b
+ eor \out0\().16b, \out0\().16b, \tmp0\().16b
+ eor \out1\().16b, \out1\().16b, \tmp1\().16b
+ .endm
+
+ .macro mix_columns_2x, in0, in1
+ mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14
+ rev32 v10.8h, \in0\().8h
+ rev32 v11.8h, \in1\().8h
+ eor \in0\().16b, v8.16b, \in0\().16b
+ eor \in1\().16b, v9.16b, \in1\().16b
+ shl v12.4s, v10.4s, #24
+ shl v13.4s, v11.4s, #24
+ eor v8.16b, v8.16b, v10.16b
+ sri v12.4s, v10.4s, #8
+ shl v10.4s, \in0\().4s, #24
+ eor v9.16b, v9.16b, v11.16b
+ sri v13.4s, v11.4s, #8
+ shl v11.4s, \in1\().4s, #24
+ sri v10.4s, \in0\().4s, #8
+ eor \in0\().16b, v8.16b, v12.16b
+ sri v11.4s, \in1\().4s, #8
+ eor \in1\().16b, v9.16b, v13.16b
+ eor \in0\().16b, v10.16b, \in0\().16b
+ eor \in1\().16b, v11.16b, \in1\().16b
+ .endm
+
+ .macro inv_mix_cols_2x, in0, in1
+ mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14
+ mul_by_x_2x v8, v9, v8, v9, v10, v11, v14
+ eor \in0\().16b, \in0\().16b, v8.16b
+ eor \in1\().16b, \in1\().16b, v9.16b
+ rev32 v8.8h, v8.8h
+ rev32 v9.8h, v9.8h
+ eor \in0\().16b, \in0\().16b, v8.16b
+ eor \in1\().16b, \in1\().16b, v9.16b
+ mix_columns_2x \in0, \in1
+ .endm
+
+ .macro inv_mix_cols_4x, in0, in1, in2, in3
+ mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14
+ mul_by_x_2x v10, v11, \in2, \in3, v12, v13, v14
+ mul_by_x_2x v8, v9, v8, v9, v12, v13, v14
+ mul_by_x_2x v10, v11, v10, v11, v12, v13, v14
+ eor \in0\().16b, \in0\().16b, v8.16b
+ eor \in1\().16b, \in1\().16b, v9.16b
+ eor \in2\().16b, \in2\().16b, v10.16b
+ eor \in3\().16b, \in3\().16b, v11.16b
+ rev32 v8.8h, v8.8h
+ rev32 v9.8h, v9.8h
+ rev32 v10.8h, v10.8h
+ rev32 v11.8h, v11.8h
+ eor \in0\().16b, \in0\().16b, v8.16b
+ eor \in1\().16b, \in1\().16b, v9.16b
+ eor \in2\().16b, \in2\().16b, v10.16b
+ eor \in3\().16b, \in3\().16b, v11.16b
+ mix_columns_2x \in0, \in1
+ mix_columns_2x \in2, \in3
+ .endm
+
+ .macro do_block_2x, enc, in0, in1 rounds, rk, rkp, i
+ ld1 {v15.16b}, [\rk]
+ add \rkp, \rk, #16
+ mov \i, \rounds
+1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
+ eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
+ sub_bytes_2x \in0, \in1
+ tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */
+ tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
+ ld1 {v15.16b}, [\rkp], #16
+ subs \i, \i, #1
+ beq 2222f
+ .if \enc == 1
+ mix_columns_2x \in0, \in1
+ ldr q13, .LForward_ShiftRows
+ .else
+ inv_mix_cols_2x \in0, \in1
+ ldr q13, .LReverse_ShiftRows
+ .endif
+ movi v12.16b, #0x40
+ b 1111b
+2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
+ eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
+ .endm
+
+ .macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
+ ld1 {v15.16b}, [\rk]
+ add \rkp, \rk, #16
+ mov \i, \rounds
+1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
+ eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
+ eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */
+ eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */
+ sub_bytes_4x \in0, \in1, \in2, \in3
+ tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */
+ tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
+ tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */
+ tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */
+ ld1 {v15.16b}, [\rkp], #16
+ subs \i, \i, #1
+ beq 2222f
+ .if \enc == 1
+ mix_columns_2x \in0, \in1
+ mix_columns_2x \in2, \in3
+ ldr q13, .LForward_ShiftRows
+ .else
+ inv_mix_cols_4x \in0, \in1, \in2, \in3
+ ldr q13, .LReverse_ShiftRows
+ .endif
+ movi v12.16b, #0x40
+ b 1111b
+2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
+ eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
+ eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */
+ eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */
+ .endm
+
+ .macro encrypt_block2x, in0, in1, rounds, rk, rkp, i
+ do_block_2x 1, \in0, \in1, \rounds, \rk, \rkp, \i
+ .endm
+
+ .macro decrypt_block2x, in0, in1, rounds, rk, rkp, i
+ do_block_2x 0, \in0, \in1, \rounds, \rk, \rkp, \i
+ .endm
+
+ .macro encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
+ do_block_4x 1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
+ .endm
+
+ .macro decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
+ do_block_4x 0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
+ .endm
+
+#include "aes-modes.S"
+
+ .text
+ .align 4
+.LForward_ShiftRows:
+ .byte 0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3
+ .byte 0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb
+
+.LReverse_ShiftRows:
+ .byte 0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb
+ .byte 0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3
+
+.LForward_Sbox:
+ .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
+ .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
+ .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
+ .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
+ .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
+ .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
+ .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
+ .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
+ .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
+ .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
+ .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
+ .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
+ .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
+ .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
+ .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
+ .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
+ .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
+ .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
+ .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
+ .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
+ .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
+ .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
+ .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
+ .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
+ .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
+ .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
+ .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
+ .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
+ .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
+ .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
+ .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
+ .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
+
+.LReverse_Sbox:
+ .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
+ .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
+ .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
+ .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
+ .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
+ .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
+ .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
+ .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
+ .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
+ .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
+ .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
+ .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
+ .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
+ .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
+ .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
+ .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
+ .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
+ .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
+ .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
+ .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
+ .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
+ .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
+ .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
+ .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
+ .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
+ .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
+ .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
+ .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
+ .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
+ .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
+ .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
+ .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S
new file mode 100644
index 000000000000..dc457015884e
--- /dev/null
+++ b/arch/arm64/crypto/ghash-ce-core.S
@@ -0,0 +1,79 @@
+/*
+ * Accelerated GHASH implementation with ARMv8 PMULL instructions.
+ *
+ * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+ SHASH .req v0
+ SHASH2 .req v1
+ T1 .req v2
+ T2 .req v3
+ MASK .req v4
+ XL .req v5
+ XM .req v6
+ XH .req v7
+ IN1 .req v7
+
+ .text
+ .arch armv8-a+crypto
+
+ /*
+ * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
+ * struct ghash_key const *k, const char *head)
+ */
+ENTRY(pmull_ghash_update)
+ ld1 {SHASH.16b}, [x3]
+ ld1 {XL.16b}, [x1]
+ movi MASK.16b, #0xe1
+ ext SHASH2.16b, SHASH.16b, SHASH.16b, #8
+ shl MASK.2d, MASK.2d, #57
+ eor SHASH2.16b, SHASH2.16b, SHASH.16b
+
+ /* do the head block first, if supplied */
+ cbz x4, 0f
+ ld1 {T1.2d}, [x4]
+ b 1f
+
+0: ld1 {T1.2d}, [x2], #16
+ sub w0, w0, #1
+
+1: /* multiply XL by SHASH in GF(2^128) */
+CPU_LE( rev64 T1.16b, T1.16b )
+
+ ext T2.16b, XL.16b, XL.16b, #8
+ ext IN1.16b, T1.16b, T1.16b, #8
+ eor T1.16b, T1.16b, T2.16b
+ eor XL.16b, XL.16b, IN1.16b
+
+ pmull2 XH.1q, SHASH.2d, XL.2d // a1 * b1
+ eor T1.16b, T1.16b, XL.16b
+ pmull XL.1q, SHASH.1d, XL.1d // a0 * b0
+ pmull XM.1q, SHASH2.1d, T1.1d // (a1 + a0)(b1 + b0)
+
+ ext T1.16b, XL.16b, XH.16b, #8
+ eor T2.16b, XL.16b, XH.16b
+ eor XM.16b, XM.16b, T1.16b
+ eor XM.16b, XM.16b, T2.16b
+ pmull T2.1q, XL.1d, MASK.1d
+
+ mov XH.d[0], XM.d[1]
+ mov XM.d[1], XL.d[0]
+
+ eor XL.16b, XM.16b, T2.16b
+ ext T2.16b, XL.16b, XL.16b, #8
+ pmull XL.1q, XL.1d, MASK.1d
+ eor T2.16b, T2.16b, XH.16b
+ eor XL.16b, XL.16b, T2.16b
+
+ cbnz w0, 0b
+
+ st1 {XL.16b}, [x1]
+ ret
+ENDPROC(pmull_ghash_update)
diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
new file mode 100644
index 000000000000..833ec1e3f3e9
--- /dev/null
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -0,0 +1,156 @@
+/*
+ * Accelerated GHASH implementation with ARMv8 PMULL instructions.
+ *
+ * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <asm/unaligned.h>
+#include <crypto/internal/hash.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+#define GHASH_BLOCK_SIZE 16
+#define GHASH_DIGEST_SIZE 16
+
+struct ghash_key {
+ u64 a;
+ u64 b;
+};
+
+struct ghash_desc_ctx {
+ u64 digest[GHASH_DIGEST_SIZE/sizeof(u64)];
+ u8 buf[GHASH_BLOCK_SIZE];
+ u32 count;
+};
+
+asmlinkage void pmull_ghash_update(int blocks, u64 dg[], const char *src,
+ struct ghash_key const *k, const char *head);
+
+static int ghash_init(struct shash_desc *desc)
+{
+ struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
+
+ *ctx = (struct ghash_desc_ctx){};
+ return 0;
+}
+
+static int ghash_update(struct shash_desc *desc, const u8 *src,
+ unsigned int len)
+{
+ struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
+ unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
+
+ ctx->count += len;
+
+ if ((partial + len) >= GHASH_BLOCK_SIZE) {
+ struct ghash_key *key = crypto_shash_ctx(desc->tfm);
+ int blocks;
+
+ if (partial) {
+ int p = GHASH_BLOCK_SIZE - partial;
+
+ memcpy(ctx->buf + partial, src, p);
+ src += p;
+ len -= p;
+ }
+
+ blocks = len / GHASH_BLOCK_SIZE;
+ len %= GHASH_BLOCK_SIZE;
+
+ kernel_neon_begin_partial(8);
+ pmull_ghash_update(blocks, ctx->digest, src, key,
+ partial ? ctx->buf : NULL);
+ kernel_neon_end();
+ src += blocks * GHASH_BLOCK_SIZE;
+ partial = 0;
+ }
+ if (len)
+ memcpy(ctx->buf + partial, src, len);
+ return 0;
+}
+
+static int ghash_final(struct shash_desc *desc, u8 *dst)
+{
+ struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
+ unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
+
+ if (partial) {
+ struct ghash_key *key = crypto_shash_ctx(desc->tfm);
+
+ memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial);
+
+ kernel_neon_begin_partial(8);
+ pmull_ghash_update(1, ctx->digest, ctx->buf, key, NULL);
+ kernel_neon_end();
+ }
+ put_unaligned_be64(ctx->digest[1], dst);
+ put_unaligned_be64(ctx->digest[0], dst + 8);
+
+ *ctx = (struct ghash_desc_ctx){};
+ return 0;
+}
+
+static int ghash_setkey(struct crypto_shash *tfm,
+ const u8 *inkey, unsigned int keylen)
+{
+ struct ghash_key *key = crypto_shash_ctx(tfm);
+ u64 a, b;
+
+ if (keylen != GHASH_BLOCK_SIZE) {
+ crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+ return -EINVAL;
+ }
+
+ /* perform multiplication by 'x' in GF(2^128) */
+ b = get_unaligned_be64(inkey);
+ a = get_unaligned_be64(inkey + 8);
+
+ key->a = (a << 1) | (b >> 63);
+ key->b = (b << 1) | (a >> 63);
+
+ if (b >> 63)
+ key->b ^= 0xc200000000000000UL;
+
+ return 0;
+}
+
+static struct shash_alg ghash_alg = {
+ .digestsize = GHASH_DIGEST_SIZE,
+ .init = ghash_init,
+ .update = ghash_update,
+ .final = ghash_final,
+ .setkey = ghash_setkey,
+ .descsize = sizeof(struct ghash_desc_ctx),
+ .base = {
+ .cra_name = "ghash",
+ .cra_driver_name = "ghash-ce",
+ .cra_priority = 200,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = GHASH_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct ghash_key),
+ .cra_module = THIS_MODULE,
+ },
+};
+
+static int __init ghash_ce_mod_init(void)
+{
+ return crypto_register_shash(&ghash_alg);
+}
+
+static void __exit ghash_ce_mod_exit(void)
+{
+ crypto_unregister_shash(&ghash_alg);
+}
+
+module_cpu_feature_match(PMULL, ghash_ce_mod_init);
+module_exit(ghash_ce_mod_exit);
diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S
new file mode 100644
index 000000000000..09d57d98609c
--- /dev/null
+++ b/arch/arm64/crypto/sha1-ce-core.S
@@ -0,0 +1,153 @@
+/*
+ * sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions
+ *
+ * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+ .text
+ .arch armv8-a+crypto
+
+ k0 .req v0
+ k1 .req v1
+ k2 .req v2
+ k3 .req v3
+
+ t0 .req v4
+ t1 .req v5
+
+ dga .req q6
+ dgav .req v6
+ dgb .req s7
+ dgbv .req v7
+
+ dg0q .req q12
+ dg0s .req s12
+ dg0v .req v12
+ dg1s .req s13
+ dg1v .req v13
+ dg2s .req s14
+
+ .macro add_only, op, ev, rc, s0, dg1
+ .ifc \ev, ev
+ add t1.4s, v\s0\().4s, \rc\().4s
+ sha1h dg2s, dg0s
+ .ifnb \dg1
+ sha1\op dg0q, \dg1, t0.4s
+ .else
+ sha1\op dg0q, dg1s, t0.4s
+ .endif
+ .else
+ .ifnb \s0
+ add t0.4s, v\s0\().4s, \rc\().4s
+ .endif
+ sha1h dg1s, dg0s
+ sha1\op dg0q, dg2s, t1.4s
+ .endif
+ .endm
+
+ .macro add_update, op, ev, rc, s0, s1, s2, s3, dg1
+ sha1su0 v\s0\().4s, v\s1\().4s, v\s2\().4s
+ add_only \op, \ev, \rc, \s1, \dg1
+ sha1su1 v\s0\().4s, v\s3\().4s
+ .endm
+
+ /*
+ * The SHA1 round constants
+ */
+ .align 4
+.Lsha1_rcon:
+ .word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6
+
+ /*
+ * void sha1_ce_transform(int blocks, u8 const *src, u32 *state,
+ * u8 *head, long bytes)
+ */
+ENTRY(sha1_ce_transform)
+ /* load round constants */
+ adr x6, .Lsha1_rcon
+ ld1r {k0.4s}, [x6], #4
+ ld1r {k1.4s}, [x6], #4
+ ld1r {k2.4s}, [x6], #4
+ ld1r {k3.4s}, [x6]
+
+ /* load state */
+ ldr dga, [x2]
+ ldr dgb, [x2, #16]
+
+ /* load partial state (if supplied) */
+ cbz x3, 0f
+ ld1 {v8.4s-v11.4s}, [x3]
+ b 1f
+
+ /* load input */
+0: ld1 {v8.4s-v11.4s}, [x1], #64
+ sub w0, w0, #1
+
+1:
+CPU_LE( rev32 v8.16b, v8.16b )
+CPU_LE( rev32 v9.16b, v9.16b )
+CPU_LE( rev32 v10.16b, v10.16b )
+CPU_LE( rev32 v11.16b, v11.16b )
+
+2: add t0.4s, v8.4s, k0.4s
+ mov dg0v.16b, dgav.16b
+
+ add_update c, ev, k0, 8, 9, 10, 11, dgb
+ add_update c, od, k0, 9, 10, 11, 8
+ add_update c, ev, k0, 10, 11, 8, 9
+ add_update c, od, k0, 11, 8, 9, 10
+ add_update c, ev, k1, 8, 9, 10, 11
+
+ add_update p, od, k1, 9, 10, 11, 8
+ add_update p, ev, k1, 10, 11, 8, 9
+ add_update p, od, k1, 11, 8, 9, 10
+ add_update p, ev, k1, 8, 9, 10, 11
+ add_update p, od, k2, 9, 10, 11, 8
+
+ add_update m, ev, k2, 10, 11, 8, 9
+ add_update m, od, k2, 11, 8, 9, 10
+ add_update m, ev, k2, 8, 9, 10, 11
+ add_update m, od, k2, 9, 10, 11, 8
+ add_update m, ev, k3, 10, 11, 8, 9
+
+ add_update p, od, k3, 11, 8, 9, 10
+ add_only p, ev, k3, 9
+ add_only p, od, k3, 10
+ add_only p, ev, k3, 11
+ add_only p, od
+
+ /* update state */
+ add dgbv.2s, dgbv.2s, dg1v.2s
+ add dgav.4s, dgav.4s, dg0v.4s
+
+ cbnz w0, 0b
+
+ /*
+ * Final block: add padding and total bit count.
+ * Skip if we have no total byte count in x4. In that case, the input
+ * size was not a round multiple of the block size, and the padding is
+ * handled by the C code.
+ */
+ cbz x4, 3f
+ movi v9.2d, #0
+ mov x8, #0x80000000
+ movi v10.2d, #0
+ ror x7, x4, #29 // ror(lsl(x4, 3), 32)
+ fmov d8, x8
+ mov x4, #0
+ mov v11.d[0], xzr
+ mov v11.d[1], x7
+ b 2b
+
+ /* store new state */
+3: str dga, [x2]
+ str dgb, [x2, #16]
+ ret
+ENDPROC(sha1_ce_transform)
diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c
new file mode 100644
index 000000000000..6fe83f37a750
--- /dev/null
+++ b/arch/arm64/crypto/sha1-ce-glue.c
@@ -0,0 +1,174 @@
+/*
+ * sha1-ce-glue.c - SHA-1 secure hash using ARMv8 Crypto Extensions
+ *
+ * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <asm/unaligned.h>
+#include <crypto/internal/hash.h>
+#include <crypto/sha.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+asmlinkage void sha1_ce_transform(int blocks, u8 const *src, u32 *state,
+ u8 *head, long bytes);
+
+static int sha1_init(struct shash_desc *desc)
+{
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+
+ *sctx = (struct sha1_state){
+ .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
+ };
+ return 0;
+}
+
+static int sha1_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+ unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
+
+ sctx->count += len;
+
+ if ((partial + len) >= SHA1_BLOCK_SIZE) {
+ int blocks;
+
+ if (partial) {
+ int p = SHA1_BLOCK_SIZE - partial;
+
+ memcpy(sctx->buffer + partial, data, p);
+ data += p;
+ len -= p;
+ }
+
+ blocks = len / SHA1_BLOCK_SIZE;
+ len %= SHA1_BLOCK_SIZE;
+
+ kernel_neon_begin_partial(16);
+ sha1_ce_transform(blocks, data, sctx->state,
+ partial ? sctx->buffer : NULL, 0);
+ kernel_neon_end();
+
+ data += blocks * SHA1_BLOCK_SIZE;
+ partial = 0;
+ }
+ if (len)
+ memcpy(sctx->buffer + partial, data, len);
+ return 0;
+}
+
+static int sha1_final(struct shash_desc *desc, u8 *out)
+{
+ static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
+
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+ __be64 bits = cpu_to_be64(sctx->count << 3);
+ __be32 *dst = (__be32 *)out;
+ int i;
+
+ u32 padlen = SHA1_BLOCK_SIZE
+ - ((sctx->count + sizeof(bits)) % SHA1_BLOCK_SIZE);
+
+ sha1_update(desc, padding, padlen);
+ sha1_update(desc, (const u8 *)&bits, sizeof(bits));
+
+ for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++)
+ put_unaligned_be32(sctx->state[i], dst++);
+
+ *sctx = (struct sha1_state){};
+ return 0;
+}
+
+static int sha1_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
+{
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+ __be32 *dst = (__be32 *)out;
+ int blocks;
+ int i;
+
+ if (sctx->count || !len || (len % SHA1_BLOCK_SIZE)) {
+ sha1_update(desc, data, len);
+ return sha1_final(desc, out);
+ }
+
+ /*
+ * Use a fast path if the input is a multiple of 64 bytes. In
+ * this case, there is no need to copy data around, and we can
+ * perform the entire digest calculation in a single invocation
+ * of sha1_ce_transform()
+ */
+ blocks = len / SHA1_BLOCK_SIZE;
+
+ kernel_neon_begin_partial(16);
+ sha1_ce_transform(blocks, data, sctx->state, NULL, len);
+ kernel_neon_end();
+
+ for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++)
+ put_unaligned_be32(sctx->state[i], dst++);
+
+ *sctx = (struct sha1_state){};
+ return 0;
+}
+
+static int sha1_export(struct shash_desc *desc, void *out)
+{
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+ struct sha1_state *dst = out;
+
+ *dst = *sctx;
+ return 0;
+}
+
+static int sha1_import(struct shash_desc *desc, const void *in)
+{
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+ struct sha1_state const *src = in;
+
+ *sctx = *src;
+ return 0;
+}
+
+static struct shash_alg alg = {
+ .init = sha1_init,
+ .update = sha1_update,
+ .final = sha1_final,
+ .finup = sha1_finup,
+ .export = sha1_export,
+ .import = sha1_import,
+ .descsize = sizeof(struct sha1_state),
+ .digestsize = SHA1_DIGEST_SIZE,
+ .statesize = sizeof(struct sha1_state),
+ .base = {
+ .cra_name = "sha1",
+ .cra_driver_name = "sha1-ce",
+ .cra_priority = 200,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA1_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+};
+
+static int __init sha1_ce_mod_init(void)
+{
+ return crypto_register_shash(&alg);
+}
+
+static void __exit sha1_ce_mod_fini(void)
+{
+ crypto_unregister_shash(&alg);
+}
+
+module_cpu_feature_match(SHA1, sha1_ce_mod_init);
+module_exit(sha1_ce_mod_fini);
diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S
new file mode 100644
index 000000000000..7f29fc031ea8
--- /dev/null
+++ b/arch/arm64/crypto/sha2-ce-core.S
@@ -0,0 +1,156 @@
+/*
+ * sha2-ce-core.S - core SHA-224/SHA-256 transform using v8 Crypto Extensions
+ *
+ * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+ .text
+ .arch armv8-a+crypto
+
+ dga .req q20
+ dgav .req v20
+ dgb .req q21
+ dgbv .req v21
+
+ t0 .req v22
+ t1 .req v23
+
+ dg0q .req q24
+ dg0v .req v24
+ dg1q .req q25
+ dg1v .req v25
+ dg2q .req q26
+ dg2v .req v26
+
+ .macro add_only, ev, rc, s0
+ mov dg2v.16b, dg0v.16b
+ .ifeq \ev
+ add t1.4s, v\s0\().4s, \rc\().4s
+ sha256h dg0q, dg1q, t0.4s
+ sha256h2 dg1q, dg2q, t0.4s
+ .else
+ .ifnb \s0
+ add t0.4s, v\s0\().4s, \rc\().4s
+ .endif
+ sha256h dg0q, dg1q, t1.4s
+ sha256h2 dg1q, dg2q, t1.4s
+ .endif
+ .endm
+
+ .macro add_update, ev, rc, s0, s1, s2, s3
+ sha256su0 v\s0\().4s, v\s1\().4s
+ add_only \ev, \rc, \s1
+ sha256su1 v\s0\().4s, v\s2\().4s, v\s3\().4s
+ .endm
+
+ /*
+ * The SHA-256 round constants
+ */
+ .align 4
+.Lsha2_rcon:
+ .word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
+ .word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
+ .word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
+ .word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
+ .word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
+ .word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
+ .word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
+ .word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
+ .word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
+ .word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
+ .word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
+ .word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
+ .word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
+ .word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
+ .word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
+ .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+
+ /*
+ * void sha2_ce_transform(int blocks, u8 const *src, u32 *state,
+ * u8 *head, long bytes)
+ */
+ENTRY(sha2_ce_transform)
+ /* load round constants */
+ adr x8, .Lsha2_rcon
+ ld1 { v0.4s- v3.4s}, [x8], #64
+ ld1 { v4.4s- v7.4s}, [x8], #64
+ ld1 { v8.4s-v11.4s}, [x8], #64
+ ld1 {v12.4s-v15.4s}, [x8]
+
+ /* load state */
+ ldp dga, dgb, [x2]
+
+ /* load partial input (if supplied) */
+ cbz x3, 0f
+ ld1 {v16.4s-v19.4s}, [x3]
+ b 1f
+
+ /* load input */
+0: ld1 {v16.4s-v19.4s}, [x1], #64
+ sub w0, w0, #1
+
+1:
+CPU_LE( rev32 v16.16b, v16.16b )
+CPU_LE( rev32 v17.16b, v17.16b )
+CPU_LE( rev32 v18.16b, v18.16b )
+CPU_LE( rev32 v19.16b, v19.16b )
+
+2: add t0.4s, v16.4s, v0.4s
+ mov dg0v.16b, dgav.16b
+ mov dg1v.16b, dgbv.16b
+
+ add_update 0, v1, 16, 17, 18, 19
+ add_update 1, v2, 17, 18, 19, 16
+ add_update 0, v3, 18, 19, 16, 17
+ add_update 1, v4, 19, 16, 17, 18
+
+ add_update 0, v5, 16, 17, 18, 19
+ add_update 1, v6, 17, 18, 19, 16
+ add_update 0, v7, 18, 19, 16, 17
+ add_update 1, v8, 19, 16, 17, 18
+
+ add_update 0, v9, 16, 17, 18, 19
+ add_update 1, v10, 17, 18, 19, 16
+ add_update 0, v11, 18, 19, 16, 17
+ add_update 1, v12, 19, 16, 17, 18
+
+ add_only 0, v13, 17
+ add_only 1, v14, 18
+ add_only 0, v15, 19
+ add_only 1
+
+ /* update state */
+ add dgav.4s, dgav.4s, dg0v.4s
+ add dgbv.4s, dgbv.4s, dg1v.4s
+
+ /* handled all input blocks? */
+ cbnz w0, 0b
+
+ /*
+ * Final block: add padding and total bit count.
+ * Skip if we have no total byte count in x4. In that case, the input
+ * size was not a round multiple of the block size, and the padding is
+ * handled by the C code.
+ */
+ cbz x4, 3f
+ movi v17.2d, #0
+ mov x8, #0x80000000
+ movi v18.2d, #0
+ ror x7, x4, #29 // ror(lsl(x4, 3), 32)
+ fmov d16, x8
+ mov x4, #0
+ mov v19.d[0], xzr
+ mov v19.d[1], x7
+ b 2b
+
+ /* store new state */
+3: stp dga, dgb, [x2]
+ ret
+ENDPROC(sha2_ce_transform)
diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c
new file mode 100644
index 000000000000..c294e67d3925
--- /dev/null
+++ b/arch/arm64/crypto/sha2-ce-glue.c
@@ -0,0 +1,255 @@
+/*
+ * sha2-ce-glue.c - SHA-224/SHA-256 using ARMv8 Crypto Extensions
+ *
+ * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <asm/unaligned.h>
+#include <crypto/internal/hash.h>
+#include <crypto/sha.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+asmlinkage int sha2_ce_transform(int blocks, u8 const *src, u32 *state,
+ u8 *head, long bytes);
+
+static int sha224_init(struct shash_desc *desc)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+
+ *sctx = (struct sha256_state){
+ .state = {
+ SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3,
+ SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7,
+ }
+ };
+ return 0;
+}
+
+static int sha256_init(struct shash_desc *desc)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+
+ *sctx = (struct sha256_state){
+ .state = {
+ SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3,
+ SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7,
+ }
+ };
+ return 0;
+}
+
+static int sha2_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+ unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
+
+ sctx->count += len;
+
+ if ((partial + len) >= SHA256_BLOCK_SIZE) {
+ int blocks;
+
+ if (partial) {
+ int p = SHA256_BLOCK_SIZE - partial;
+
+ memcpy(sctx->buf + partial, data, p);
+ data += p;
+ len -= p;
+ }
+
+ blocks = len / SHA256_BLOCK_SIZE;
+ len %= SHA256_BLOCK_SIZE;
+
+ kernel_neon_begin_partial(28);
+ sha2_ce_transform(blocks, data, sctx->state,
+ partial ? sctx->buf : NULL, 0);
+ kernel_neon_end();
+
+ data += blocks * SHA256_BLOCK_SIZE;
+ partial = 0;
+ }
+ if (len)
+ memcpy(sctx->buf + partial, data, len);
+ return 0;
+}
+
+static void sha2_final(struct shash_desc *desc)
+{
+ static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, };
+
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+ __be64 bits = cpu_to_be64(sctx->count << 3);
+ u32 padlen = SHA256_BLOCK_SIZE
+ - ((sctx->count + sizeof(bits)) % SHA256_BLOCK_SIZE);
+
+ sha2_update(desc, padding, padlen);
+ sha2_update(desc, (const u8 *)&bits, sizeof(bits));
+}
+
+static int sha224_final(struct shash_desc *desc, u8 *out)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+ __be32 *dst = (__be32 *)out;
+ int i;
+
+ sha2_final(desc);
+
+ for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++)
+ put_unaligned_be32(sctx->state[i], dst++);
+
+ *sctx = (struct sha256_state){};
+ return 0;
+}
+
+static int sha256_final(struct shash_desc *desc, u8 *out)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+ __be32 *dst = (__be32 *)out;
+ int i;
+
+ sha2_final(desc);
+
+ for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++)
+ put_unaligned_be32(sctx->state[i], dst++);
+
+ *sctx = (struct sha256_state){};
+ return 0;
+}
+
+static void sha2_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+ int blocks;
+
+ if (sctx->count || !len || (len % SHA256_BLOCK_SIZE)) {
+ sha2_update(desc, data, len);
+ sha2_final(desc);
+ return;
+ }
+
+ /*
+ * Use a fast path if the input is a multiple of 64 bytes. In
+ * this case, there is no need to copy data around, and we can
+ * perform the entire digest calculation in a single invocation
+ * of sha2_ce_transform()
+ */
+ blocks = len / SHA256_BLOCK_SIZE;
+
+ kernel_neon_begin_partial(28);
+ sha2_ce_transform(blocks, data, sctx->state, NULL, len);
+ kernel_neon_end();
+ data += blocks * SHA256_BLOCK_SIZE;
+}
+
+static int sha224_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+ __be32 *dst = (__be32 *)out;
+ int i;
+
+ sha2_finup(desc, data, len);
+
+ for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++)
+ put_unaligned_be32(sctx->state[i], dst++);
+
+ *sctx = (struct sha256_state){};
+ return 0;
+}
+
+static int sha256_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+ __be32 *dst = (__be32 *)out;
+ int i;
+
+ sha2_finup(desc, data, len);
+
+ for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++)
+ put_unaligned_be32(sctx->state[i], dst++);
+
+ *sctx = (struct sha256_state){};
+ return 0;
+}
+
+static int sha2_export(struct shash_desc *desc, void *out)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+ struct sha256_state *dst = out;
+
+ *dst = *sctx;
+ return 0;
+}
+
+static int sha2_import(struct shash_desc *desc, const void *in)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+ struct sha256_state const *src = in;
+
+ *sctx = *src;
+ return 0;
+}
+
+static struct shash_alg algs[] = { {
+ .init = sha224_init,
+ .update = sha2_update,
+ .final = sha224_final,
+ .finup = sha224_finup,
+ .export = sha2_export,
+ .import = sha2_import,
+ .descsize = sizeof(struct sha256_state),
+ .digestsize = SHA224_DIGEST_SIZE,
+ .statesize = sizeof(struct sha256_state),
+ .base = {
+ .cra_name = "sha224",
+ .cra_driver_name = "sha224-ce",
+ .cra_priority = 200,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA256_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+}, {
+ .init = sha256_init,
+ .update = sha2_update,
+ .final = sha256_final,
+ .finup = sha256_finup,
+ .export = sha2_export,
+ .import = sha2_import,
+ .descsize = sizeof(struct sha256_state),
+ .digestsize = SHA256_DIGEST_SIZE,
+ .statesize = sizeof(struct sha256_state),
+ .base = {
+ .cra_name = "sha256",
+ .cra_driver_name = "sha256-ce",
+ .cra_priority = 200,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA256_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+} };
+
+static int __init sha2_ce_mod_init(void)
+{
+ return crypto_register_shashes(algs, ARRAY_SIZE(algs));
+}
+
+static void __exit sha2_ce_mod_fini(void)
+{
+ crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
+}
+
+module_cpu_feature_match(SHA2, sha2_ce_mod_init);
+module_exit(sha2_ce_mod_fini);
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index 71c53ecfcc3a..bd36b3013f82 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -10,6 +10,7 @@ generic-y += delay.h
generic-y += div64.h
generic-y += dma.h
generic-y += emergency-restart.h
+generic-y += early_ioremap.h
generic-y += errno.h
generic-y += ftrace.h
generic-y += hw_irq.h
@@ -35,6 +36,7 @@ generic-y += segment.h
generic-y += sembuf.h
generic-y += serial.h
generic-y += shmbuf.h
+generic-y += simd.h
generic-y += sizes.h
generic-y += socket.h
generic-y += sockios.h
diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index 0237f0867e37..9c48d74652cb 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -162,7 +162,7 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
*/
#define ATOMIC64_INIT(i) { (i) }
-#define atomic64_read(v) (*(volatile long long *)&(v)->counter)
+#define atomic64_read(v) (*(volatile long *)&(v)->counter)
#define atomic64_set(v,i) (((v)->counter) = (i))
static inline void atomic64_add(u64 i, atomic64_t *v)
diff --git a/arch/arm64/include/asm/bL_switcher.h b/arch/arm64/include/asm/bL_switcher.h
new file mode 100644
index 000000000000..2bee500b7f54
--- /dev/null
+++ b/arch/arm64/include/asm/bL_switcher.h
@@ -0,0 +1,54 @@
+/*
+ * Based on the stubs for the ARM implementation which is:
+ *
+ * Created by: Nicolas Pitre, April 2012
+ * Copyright: (C) 2012-2013 Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef ASM_BL_SWITCHER_H
+#define ASM_BL_SWITCHER_H
+
+#include <linux/notifier.h>
+#include <linux/types.h>
+
+typedef void (*bL_switch_completion_handler)(void *cookie);
+
+static inline int bL_switch_request(unsigned int cpu,
+ unsigned int new_cluster_id)
+{
+ return -ENOTSUPP;
+}
+
+/*
+ * Register here to be notified about runtime enabling/disabling of
+ * the switcher.
+ *
+ * The notifier chain is called with the switcher activation lock held:
+ * the switcher will not be enabled or disabled during callbacks.
+ * Callbacks must not call bL_switcher_{get,put}_enabled().
+ */
+#define BL_NOTIFY_PRE_ENABLE 0
+#define BL_NOTIFY_POST_ENABLE 1
+#define BL_NOTIFY_PRE_DISABLE 2
+#define BL_NOTIFY_POST_DISABLE 3
+
+static inline int bL_switcher_register_notifier(struct notifier_block *nb)
+{
+ return 0;
+}
+
+static inline int bL_switcher_unregister_notifier(struct notifier_block *nb)
+{
+ return 0;
+}
+
+static inline bool bL_switcher_get_enabled(void) { return false; }
+static inline void bL_switcher_put_enabled(void) { }
+static inline int bL_switcher_trace_trigger(void) { return 0; }
+static inline int bL_switcher_get_logical_index(u32 mpidr) { return -EUNATCH; }
+
+#endif
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 409ca370cfe2..71a42d6599fb 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -25,11 +25,12 @@
#define wfi() asm volatile("wfi" : : : "memory")
#define isb() asm volatile("isb" : : : "memory")
-#define dsb(opt) asm volatile("dsb sy" : : : "memory")
+#define dmb(opt) asm volatile("dmb " #opt : : : "memory")
+#define dsb(opt) asm volatile("dsb " #opt : : : "memory")
-#define mb() dsb()
-#define rmb() asm volatile("dsb ld" : : : "memory")
-#define wmb() asm volatile("dsb st" : : : "memory")
+#define mb() dsb(sy)
+#define rmb() dsb(ld)
+#define wmb() dsb(st)
#ifndef CONFIG_SMP
#define smp_mb() barrier()
@@ -53,9 +54,9 @@ do { \
#else
-#define smp_mb() asm volatile("dmb ish" : : : "memory")
-#define smp_rmb() asm volatile("dmb ishld" : : : "memory")
-#define smp_wmb() asm volatile("dmb ishst" : : : "memory")
+#define smp_mb() dmb(ish)
+#define smp_rmb() dmb(ishld)
+#define smp_wmb() dmb(ishst)
#define smp_store_release(p, v) \
do { \
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 889324981aa4..f2defe1c380c 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -85,6 +85,13 @@ static inline void flush_cache_page(struct vm_area_struct *vma,
}
/*
+ * Cache maintenance functions used by the DMA API. No to be used directly.
+ */
+extern void __dma_map_area(const void *, size_t, int);
+extern void __dma_unmap_area(const void *, size_t, int);
+extern void __dma_flush_range(const void *, const void *);
+
+/*
* Copy user data from/to a page which is mapped into a different
* processes address space. Really, we want to allow our "user
* space" model to handle this.
@@ -116,7 +123,7 @@ extern void flush_dcache_page(struct page *);
static inline void __flush_icache_all(void)
{
asm("ic ialluis");
- dsb();
+ dsb(ish);
}
#define flush_dcache_mmap_lock(mapping) \
@@ -131,19 +138,10 @@ static inline void __flush_icache_all(void)
#define flush_icache_page(vma,page) do { } while (0)
/*
- * flush_cache_vmap() is used when creating mappings (eg, via vmap,
- * vmalloc, ioremap etc) in kernel space for pages. On non-VIPT
- * caches, since the direct-mappings of these pages may contain cached
- * data, we need to do a full cache flush to ensure that writebacks
- * don't corrupt data placed into these pages via the new mappings.
+ * Not required on AArch64 (PIPT or VIPT non-aliasing D-cache).
*/
static inline void flush_cache_vmap(unsigned long start, unsigned long end)
{
- /*
- * set_pte_at() called from vmap_pte_range() does not
- * have a DSB after cleaning the cache line.
- */
- dsb();
}
static inline void flush_cache_vunmap(unsigned long start, unsigned long end)
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index 57c0fa7bf711..ddb9d7830558 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -72,7 +72,12 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
}
#define xchg(ptr,x) \
- ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr))))
+({ \
+ __typeof__(*(ptr)) __ret; \
+ __ret = (__typeof__(*(ptr))) \
+ __xchg((unsigned long)(x), (ptr), sizeof(*(ptr))); \
+ __ret; \
+})
static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
unsigned long new, int size)
diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h
index e72289a97367..56de5aadede2 100644
--- a/arch/arm64/include/asm/compat.h
+++ b/arch/arm64/include/asm/compat.h
@@ -228,7 +228,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr)
return (u32)(unsigned long)uptr;
}
-#define compat_user_stack_pointer() (current_pt_regs()->compat_sp)
+#define compat_user_stack_pointer() (user_stack_pointer(current_pt_regs()))
static inline void __user *arch_compat_alloc_user_space(long len)
{
@@ -305,11 +305,6 @@ static inline int is_compat_thread(struct thread_info *thread)
#else /* !CONFIG_COMPAT */
-static inline int is_compat_task(void)
-{
- return 0;
-}
-
static inline int is_compat_thread(struct thread_info *thread)
{
return 0;
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
new file mode 100644
index 000000000000..cd4ac0516488
--- /dev/null
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_CPUFEATURE_H
+#define __ASM_CPUFEATURE_H
+
+#include <asm/hwcap.h>
+
+/*
+ * In the arm64 world (as in the ARM world), elf_hwcap is used both internally
+ * in the kernel and for user space to keep track of which optional features
+ * are supported by the current system. So let's map feature 'x' to HWCAP_x.
+ * Note that HWCAP_x constants are bit fields so we need to take the log.
+ */
+
+#define MAX_CPU_FEATURES (8 * sizeof(elf_hwcap))
+#define cpu_feature(x) ilog2(HWCAP_ ## x)
+
+static inline bool cpu_have_feature(unsigned int num)
+{
+ return elf_hwcap & (1UL << num);
+}
+
+#endif
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index c404fb0df3a6..27f54a7cc81b 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -41,6 +41,7 @@
#define ARM_CPU_PART_AEM_V8 0xD0F0
#define ARM_CPU_PART_FOUNDATION 0xD000
+#define ARM_CPU_PART_CORTEX_A53 0xD030
#define ARM_CPU_PART_CORTEX_A57 0xD070
#define APM_CPU_PART_POTENZA 0x0000
diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
index 62314791570c..7fb343779498 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -18,6 +18,15 @@
#ifdef __KERNEL__
+/* Low-level stepping controls. */
+#define DBG_MDSCR_SS (1 << 0)
+#define DBG_SPSR_SS (1 << 21)
+
+/* MDSCR_EL1 enabling bits */
+#define DBG_MDSCR_KDE (1 << 13)
+#define DBG_MDSCR_MDE (1 << 15)
+#define DBG_MDSCR_MASK ~(DBG_MDSCR_KDE | DBG_MDSCR_MDE)
+
#define DBG_ESR_EVT(x) (((x) >> 27) & 0x7)
/* AArch64 */
@@ -26,10 +35,52 @@
#define DBG_ESR_EVT_HWWP 0x2
#define DBG_ESR_EVT_BRK 0x6
-enum debug_el {
- DBG_ACTIVE_EL0 = 0,
- DBG_ACTIVE_EL1,
-};
+/*
+ * Break point instruction encoding
+ */
+#define BREAK_INSTR_SIZE 4
+
+/*
+ * ESR values expected for dynamic and compile time BRK instruction
+ */
+#define DBG_ESR_VAL_BRK(x) (0xf2000000 | ((x) & 0xfffff))
+
+/*
+ * #imm16 values used for BRK instruction generation
+ * Allowed values for kgbd are 0x400 - 0x7ff
+ * 0x400: for dynamic BRK instruction
+ * 0x401: for compile time BRK instruction
+ */
+#define KGDB_DYN_DGB_BRK_IMM 0x400
+#define KDBG_COMPILED_DBG_BRK_IMM 0x401
+
+/*
+ * BRK instruction encoding
+ * The #imm16 value should be placed at bits[20:5] within BRK ins
+ */
+#define AARCH64_BREAK_MON 0xd4200000
+
+/*
+ * Extract byte from BRK instruction
+ */
+#define KGDB_DYN_DGB_BRK_INS_BYTE(x) \
+ ((((AARCH64_BREAK_MON) & 0xffe0001f) >> (x * 8)) & 0xff)
+
+/*
+ * Extract byte from BRK #imm16
+ */
+#define KGBD_DYN_DGB_BRK_IMM_BYTE(x) \
+ (((((KGDB_DYN_DGB_BRK_IMM) & 0xffff) << 5) >> (x * 8)) & 0xff)
+
+#define KGDB_DYN_DGB_BRK_BYTE(x) \
+ (KGDB_DYN_DGB_BRK_INS_BYTE(x) | KGBD_DYN_DGB_BRK_IMM_BYTE(x))
+
+#define KGDB_DYN_BRK_INS_BYTE0 KGDB_DYN_DGB_BRK_BYTE(0)
+#define KGDB_DYN_BRK_INS_BYTE1 KGDB_DYN_DGB_BRK_BYTE(1)
+#define KGDB_DYN_BRK_INS_BYTE2 KGDB_DYN_DGB_BRK_BYTE(2)
+#define KGDB_DYN_BRK_INS_BYTE3 KGDB_DYN_DGB_BRK_BYTE(3)
+
+#define CACHE_FLUSH_IS_SAFE 1
/* AArch32 */
#define DBG_ESR_EVT_BKPT 0x4
@@ -43,23 +94,6 @@ enum debug_el {
#ifndef __ASSEMBLY__
struct task_struct;
-#define local_dbg_save(flags) \
- do { \
- typecheck(unsigned long, flags); \
- asm volatile( \
- "mrs %0, daif // local_dbg_save\n" \
- "msr daifset, #8" \
- : "=r" (flags) : : "memory"); \
- } while (0)
-
-#define local_dbg_restore(flags) \
- do { \
- typecheck(unsigned long, flags); \
- asm volatile( \
- "msr daif, %0 // local_dbg_restore\n" \
- : : "r" (flags) : "memory"); \
- } while (0)
-
#define DBG_ARCH_ID_RESERVED 0 /* In case of ptrace ABI updates. */
#define DBG_HOOK_HANDLED 0
@@ -85,6 +119,11 @@ void unregister_break_hook(struct break_hook *hook);
u8 debug_monitors_arch(void);
+enum debug_el {
+ DBG_ACTIVE_EL0 = 0,
+ DBG_ACTIVE_EL1,
+};
+
void enable_debug_monitors(enum debug_el el);
void disable_debug_monitors(enum debug_el el);
diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h
index 064d3525f260..dc82e52acdb3 100644
--- a/arch/arm64/include/asm/dma-mapping.h
+++ b/arch/arm64/include/asm/dma-mapping.h
@@ -28,6 +28,8 @@
#define DMA_ERROR_CODE (~(dma_addr_t)0)
extern struct dma_map_ops *dma_ops;
+extern struct dma_map_ops coherent_swiotlb_dma_ops;
+extern struct dma_map_ops noncoherent_swiotlb_dma_ops;
static inline struct dma_map_ops *__generic_dma_ops(struct device *dev)
{
@@ -45,6 +47,11 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev)
return __generic_dma_ops(dev);
}
+static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops)
+{
+ dev->archdata.dma_ops = ops;
+}
+
#include <asm-generic/dma-mapping-common.h>
static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h
new file mode 100644
index 000000000000..5f7bfe6df723
--- /dev/null
+++ b/arch/arm64/include/asm/fixmap.h
@@ -0,0 +1,67 @@
+/*
+ * fixmap.h: compile-time virtual memory allocation
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1998 Ingo Molnar
+ * Copyright (C) 2013 Mark Salter <msalter@redhat.com>
+ *
+ * Adapted from arch/x86_64 version.
+ *
+ */
+
+#ifndef _ASM_ARM64_FIXMAP_H
+#define _ASM_ARM64_FIXMAP_H
+
+#ifndef __ASSEMBLY__
+#include <linux/kernel.h>
+#include <asm/page.h>
+
+/*
+ * Here we define all the compile-time 'special' virtual
+ * addresses. The point is to have a constant address at
+ * compile time, but to set the physical address only
+ * in the boot process.
+ *
+ * These 'compile-time allocated' memory buffers are
+ * page-sized. Use set_fixmap(idx,phys) to associate
+ * physical memory with fixmap indices.
+ *
+ */
+enum fixed_addresses {
+ FIX_EARLYCON_MEM_BASE,
+ __end_of_permanent_fixed_addresses,
+
+ /*
+ * Temporary boot-time mappings, used by early_ioremap(),
+ * before ioremap() is functional.
+ */
+#ifdef CONFIG_ARM64_64K_PAGES
+#define NR_FIX_BTMAPS 4
+#else
+#define NR_FIX_BTMAPS 64
+#endif
+#define FIX_BTMAPS_SLOTS 7
+#define TOTAL_FIX_BTMAPS (NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS)
+
+ FIX_BTMAP_END = __end_of_permanent_fixed_addresses,
+ FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1,
+ __end_of_fixed_addresses
+};
+
+#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
+#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
+
+#define FIXMAP_PAGE_IO __pgprot(PROT_DEVICE_nGnRE)
+
+extern void __early_set_fixmap(enum fixed_addresses idx,
+ phys_addr_t phys, pgprot_t flags);
+
+#define __set_fixmap __early_set_fixmap
+
+#include <asm-generic/fixmap.h>
+
+#endif /* !__ASSEMBLY__ */
+#endif /* _ASM_ARM64_FIXMAP_H */
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index c43b4ac13008..50f559f574fe 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -37,8 +37,21 @@ struct fpsimd_state {
u32 fpcr;
};
};
+ /* the id of the last cpu to have restored this state */
+ unsigned int cpu;
};
+/*
+ * Struct for stacking the bottom 'n' FP/SIMD registers.
+ */
+struct fpsimd_partial_state {
+ u32 fpsr;
+ u32 fpcr;
+ u32 num_regs;
+ __uint128_t vregs[32];
+};
+
+
#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
/* Masks for extracting the FPSR and FPCR from the FPSCR */
#define VFP_FPSCR_STAT_MASK 0xf800009f
@@ -58,6 +71,16 @@ extern void fpsimd_load_state(struct fpsimd_state *state);
extern void fpsimd_thread_switch(struct task_struct *next);
extern void fpsimd_flush_thread(void);
+extern void fpsimd_preserve_current_state(void);
+extern void fpsimd_restore_current_state(void);
+extern void fpsimd_update_current_state(struct fpsimd_state *state);
+
+extern void fpsimd_flush_task_state(struct task_struct *target);
+
+extern void fpsimd_save_partial_state(struct fpsimd_partial_state *state,
+ u32 num_regs);
+extern void fpsimd_load_partial_state(struct fpsimd_partial_state *state);
+
#endif
#endif
diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h
index bbec599c96bd..768414d55e64 100644
--- a/arch/arm64/include/asm/fpsimdmacros.h
+++ b/arch/arm64/include/asm/fpsimdmacros.h
@@ -62,3 +62,38 @@
ldr w\tmpnr, [\state, #16 * 2 + 4]
msr fpcr, x\tmpnr
.endm
+
+.altmacro
+.macro fpsimd_save_partial state, numnr, tmpnr1, tmpnr2
+ mrs x\tmpnr1, fpsr
+ str w\numnr, [\state, #8]
+ mrs x\tmpnr2, fpcr
+ stp w\tmpnr1, w\tmpnr2, [\state]
+ adr x\tmpnr1, 0f
+ add \state, \state, x\numnr, lsl #4
+ sub x\tmpnr1, x\tmpnr1, x\numnr, lsl #1
+ br x\tmpnr1
+ .irp qa, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0
+ .irp qb, %(qa + 1)
+ stp q\qa, q\qb, [\state, # -16 * \qa - 16]
+ .endr
+ .endr
+0:
+.endm
+
+.macro fpsimd_restore_partial state, tmpnr1, tmpnr2
+ ldp w\tmpnr1, w\tmpnr2, [\state]
+ msr fpsr, x\tmpnr1
+ msr fpcr, x\tmpnr2
+ adr x\tmpnr1, 0f
+ ldr w\tmpnr2, [\state, #8]
+ add \state, \state, x\tmpnr2, lsl #4
+ sub x\tmpnr1, x\tmpnr1, x\tmpnr2, lsl #1
+ br x\tmpnr1
+ .irp qa, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0
+ .irp qb, %(qa + 1)
+ ldp q\qa, q\qb, [\state, # -16 * \qa - 16]
+ .endr
+ .endr
+0:
+.endm
diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
new file mode 100644
index 000000000000..c5534facf941
--- /dev/null
+++ b/arch/arm64/include/asm/ftrace.h
@@ -0,0 +1,59 @@
+/*
+ * arch/arm64/include/asm/ftrace.h
+ *
+ * Copyright (C) 2013 Linaro Limited
+ * Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_FTRACE_H
+#define __ASM_FTRACE_H
+
+#include <asm/insn.h>
+
+#define MCOUNT_ADDR ((unsigned long)_mcount)
+#define MCOUNT_INSN_SIZE AARCH64_INSN_SIZE
+
+#ifndef __ASSEMBLY__
+#include <linux/compat.h>
+
+extern void _mcount(unsigned long);
+extern void *return_address(unsigned int);
+
+struct dyn_arch_ftrace {
+ /* No extra data needed for arm64 */
+};
+
+extern unsigned long ftrace_graph_call;
+
+static inline unsigned long ftrace_call_adjust(unsigned long addr)
+{
+ /*
+ * addr is the address of the mcount call instruction.
+ * recordmcount does the necessary offset calculation.
+ */
+ return addr;
+}
+
+#define ftrace_return_address(n) return_address(n)
+
+/*
+ * Because AArch32 mode does not share the same syscall table with AArch64,
+ * tracing compat syscalls may result in reporting bogus syscalls or even
+ * hang-up, so just do not trace them.
+ * See kernel/trace/trace_syscalls.c
+ *
+ * x86 code says:
+ * If the user realy wants these, then they should use the
+ * raw syscall tracepoints with filtering.
+ */
+#define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS
+static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs)
+{
+ return is_compat_task();
+}
+#endif /* ifndef __ASSEMBLY__ */
+
+#endif /* __ASM_FTRACE_H */
diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h
index ae4801d77514..0be67821f9ce 100644
--- a/arch/arm64/include/asm/hardirq.h
+++ b/arch/arm64/include/asm/hardirq.h
@@ -20,7 +20,7 @@
#include <linux/threads.h>
#include <asm/irq.h>
-#define NR_IPI 5
+#define NR_IPI 6
typedef struct {
unsigned int __softirq_pending;
diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index 6cddbb0c9f54..024c46183c3c 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -32,6 +32,12 @@
#define COMPAT_HWCAP_IDIV (COMPAT_HWCAP_IDIVA|COMPAT_HWCAP_IDIVT)
#define COMPAT_HWCAP_EVTSTRM (1 << 21)
+#define COMPAT_HWCAP2_AES (1 << 0)
+#define COMPAT_HWCAP2_PMULL (1 << 1)
+#define COMPAT_HWCAP2_SHA1 (1 << 2)
+#define COMPAT_HWCAP2_SHA2 (1 << 3)
+#define COMPAT_HWCAP2_CRC32 (1 << 4)
+
#ifndef __ASSEMBLY__
/*
* This yields a mask that user programs can use to figure out what
@@ -41,7 +47,8 @@
#ifdef CONFIG_COMPAT
#define COMPAT_ELF_HWCAP (compat_elf_hwcap)
-extern unsigned int compat_elf_hwcap;
+#define COMPAT_ELF_HWCAP2 (compat_elf_hwcap2)
+extern unsigned int compat_elf_hwcap, compat_elf_hwcap2;
#endif
extern unsigned long elf_hwcap;
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index c44ad39ed310..62e7b8bcd2dc 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -16,11 +16,14 @@
*/
#ifndef __ASM_INSN_H
#define __ASM_INSN_H
+
#include <linux/types.h>
/* A64 instructions are always 32 bits. */
#define AARCH64_INSN_SIZE 4
+#ifndef __ASSEMBLY__
+
/*
* ARM Architecture Reference Manual for ARMv8 Profile-A, Issue A.a
* Section C3.1 "A64 instruction index by encoding":
@@ -105,4 +108,6 @@ int aarch64_insn_patch_text_nosync(void *addr, u32 insn);
int aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt);
int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt);
+#endif /* __ASSEMBLY__ */
+
#endif /* __ASM_INSN_H */
diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 4cc813eddacb..6d5e328575d4 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -27,6 +27,7 @@
#include <asm/byteorder.h>
#include <asm/barrier.h>
#include <asm/pgtable.h>
+#include <asm/early_ioremap.h>
#include <xen/xen.h>
@@ -229,19 +230,11 @@ extern void __iomem *__ioremap(phys_addr_t phys_addr, size_t size, pgprot_t prot
extern void __iounmap(volatile void __iomem *addr);
extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size);
-#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_DIRTY)
-#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRE))
-#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL_NC))
-#define PROT_NORMAL (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL))
-
#define ioremap(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE))
#define ioremap_nocache(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE))
#define ioremap_wc(addr, size) __ioremap((addr), (size), __pgprot(PROT_NORMAL_NC))
#define iounmap __iounmap
-#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF)
-#define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PTE_PXN | PTE_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE))
-
#define ARCH_HAS_IOREMAP_WC
#include <asm-generic/iomap.h>
diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
index b2fcfbc51ecc..11cc941bd107 100644
--- a/arch/arm64/include/asm/irqflags.h
+++ b/arch/arm64/include/asm/irqflags.h
@@ -90,5 +90,28 @@ static inline int arch_irqs_disabled_flags(unsigned long flags)
return flags & PSR_I_BIT;
}
+/*
+ * save and restore debug state
+ */
+#define local_dbg_save(flags) \
+ do { \
+ typecheck(unsigned long, flags); \
+ asm volatile( \
+ "mrs %0, daif // local_dbg_save\n" \
+ "msr daifset, #8" \
+ : "=r" (flags) : : "memory"); \
+ } while (0)
+
+#define local_dbg_restore(flags) \
+ do { \
+ typecheck(unsigned long, flags); \
+ asm volatile( \
+ "msr daif, %0 // local_dbg_restore\n" \
+ : : "r" (flags) : "memory"); \
+ } while (0)
+
+#define local_dbg_enable() asm("msr daifclr, #8" : : : "memory")
+#define local_dbg_disable() asm("msr daifset, #8" : : : "memory")
+
#endif
#endif
diff --git a/arch/arm64/include/asm/kgdb.h b/arch/arm64/include/asm/kgdb.h
new file mode 100644
index 000000000000..3c8aafc1082f
--- /dev/null
+++ b/arch/arm64/include/asm/kgdb.h
@@ -0,0 +1,84 @@
+/*
+ * AArch64 KGDB support
+ *
+ * Based on arch/arm/include/kgdb.h
+ *
+ * Copyright (C) 2013 Cavium Inc.
+ * Author: Vijaya Kumar K <vijaya.kumar@caviumnetworks.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM_KGDB_H
+#define __ARM_KGDB_H
+
+#include <linux/ptrace.h>
+#include <asm/debug-monitors.h>
+
+#ifndef __ASSEMBLY__
+
+static inline void arch_kgdb_breakpoint(void)
+{
+ asm ("brk %0" : : "I" (KDBG_COMPILED_DBG_BRK_IMM));
+}
+
+extern void kgdb_handle_bus_error(void);
+extern int kgdb_fault_expected;
+
+#endif /* !__ASSEMBLY__ */
+
+/*
+ * gdb is expecting the following registers layout.
+ *
+ * General purpose regs:
+ * r0-r30: 64 bit
+ * sp,pc : 64 bit
+ * pstate : 64 bit
+ * Total: 34
+ * FPU regs:
+ * f0-f31: 128 bit
+ * Total: 32
+ * Extra regs
+ * fpsr & fpcr: 32 bit
+ * Total: 2
+ *
+ */
+
+#define _GP_REGS 34
+#define _FP_REGS 32
+#define _EXTRA_REGS 2
+/*
+ * general purpose registers size in bytes.
+ * pstate is only 4 bytes. subtract 4 bytes
+ */
+#define GP_REG_BYTES (_GP_REGS * 8)
+#define DBG_MAX_REG_NUM (_GP_REGS + _FP_REGS + _EXTRA_REGS)
+
+/*
+ * Size of I/O buffer for gdb packet.
+ * considering to hold all register contents, size is set
+ */
+
+#define BUFMAX 2048
+
+/*
+ * Number of bytes required for gdb_regs buffer.
+ * _GP_REGS: 8 bytes, _FP_REGS: 16 bytes and _EXTRA_REGS: 4 bytes each
+ * GDB fails to connect for size beyond this with error
+ * "'g' packet reply is too long"
+ */
+
+#define NUMREGBYTES ((_GP_REGS * 8) + (_FP_REGS * 16) + \
+ (_EXTRA_REGS * 4))
+
+#endif /* __ASM_KGDB_H */
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 0eb398655378..7fd3e27e3ccc 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -62,6 +62,7 @@
* RW: 64bit by default, can be overriden for 32bit VMs
* TAC: Trap ACTLR
* TSC: Trap SMC
+ * TVM: Trap VM ops (until M+C set in SCTLR_EL1)
* TSW: Trap cache operations by set/way
* TWE: Trap WFE
* TWI: Trap WFI
@@ -74,10 +75,11 @@
* SWIO: Turn set/way invalidates into set/way clean+invalidate
*/
#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \
- HCR_BSU_IS | HCR_FB | HCR_TAC | \
- HCR_AMO | HCR_IMO | HCR_FMO | \
- HCR_SWIO | HCR_TIDCP | HCR_RW)
+ HCR_TVM | HCR_BSU_IS | HCR_FB | HCR_TAC | \
+ HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW)
#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
+#define HCR_INT_OVERRIDE (HCR_FMO | HCR_IMO)
+
/* Hyp System Control Register (SCTLR_EL2) bits */
#define SCTLR_EL2_EE (1 << 25)
@@ -106,7 +108,6 @@
/* VTCR_EL2 Registers bits */
#define VTCR_EL2_PS_MASK (7 << 16)
-#define VTCR_EL2_PS_40B (2 << 16)
#define VTCR_EL2_TG0_MASK (1 << 14)
#define VTCR_EL2_TG0_4K (0 << 14)
#define VTCR_EL2_TG0_64K (1 << 14)
@@ -121,6 +122,17 @@
#define VTCR_EL2_T0SZ_MASK 0x3f
#define VTCR_EL2_T0SZ_40B 24
+/*
+ * We configure the Stage-2 page tables to always restrict the IPA space to be
+ * 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are
+ * not known to exist and will break with this configuration.
+ *
+ * Note that when using 4K pages, we concatenate two first level page tables
+ * together.
+ *
+ * The magic numbers used for VTTBR_X in this patch can be found in Tables
+ * D4-23 and D4-25 in ARM DDI 0487A.b.
+ */
#ifdef CONFIG_ARM64_64K_PAGES
/*
* Stage2 translation configuration:
@@ -129,10 +141,9 @@
* 64kB pages (TG0 = 1)
* 2 level page tables (SL = 1)
*/
-#define VTCR_EL2_FLAGS (VTCR_EL2_PS_40B | VTCR_EL2_TG0_64K | \
- VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \
- VTCR_EL2_IRGN0_WBWA | VTCR_EL2_SL0_LVL1 | \
- VTCR_EL2_T0SZ_40B)
+#define VTCR_EL2_FLAGS (VTCR_EL2_TG0_64K | VTCR_EL2_SH0_INNER | \
+ VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \
+ VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B)
#define VTTBR_X (38 - VTCR_EL2_T0SZ_40B)
#else
/*
@@ -142,15 +153,14 @@
* 4kB pages (TG0 = 0)
* 3 level page tables (SL = 1)
*/
-#define VTCR_EL2_FLAGS (VTCR_EL2_PS_40B | VTCR_EL2_TG0_4K | \
- VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \
- VTCR_EL2_IRGN0_WBWA | VTCR_EL2_SL0_LVL1 | \
- VTCR_EL2_T0SZ_40B)
+#define VTCR_EL2_FLAGS (VTCR_EL2_TG0_4K | VTCR_EL2_SH0_INNER | \
+ VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \
+ VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B)
#define VTTBR_X (37 - VTCR_EL2_T0SZ_40B)
#endif
#define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
-#define VTTBR_BADDR_MASK (((1LLU << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
+#define VTTBR_BADDR_MASK (((1LLU << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
#define VTTBR_VMID_SHIFT (48LLU)
#define VTTBR_VMID_MASK (0xffLLU << VTTBR_VMID_SHIFT)
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index b25763bc0ec4..483842180f8f 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -18,6 +18,8 @@
#ifndef __ARM_KVM_ASM_H__
#define __ARM_KVM_ASM_H__
+#include <asm/virt.h>
+
/*
* 0 is reserved as an invalid value.
* Order *must* be kept in sync with the hyp switch code.
@@ -43,14 +45,25 @@
#define AMAIR_EL1 19 /* Aux Memory Attribute Indirection Register */
#define CNTKCTL_EL1 20 /* Timer Control Register (EL1) */
#define PAR_EL1 21 /* Physical Address Register */
+#define MDSCR_EL1 22 /* Monitor Debug System Control Register */
+#define DBGBCR0_EL1 23 /* Debug Breakpoint Control Registers (0-15) */
+#define DBGBCR15_EL1 38
+#define DBGBVR0_EL1 39 /* Debug Breakpoint Value Registers (0-15) */
+#define DBGBVR15_EL1 54
+#define DBGWCR0_EL1 55 /* Debug Watchpoint Control Registers (0-15) */
+#define DBGWCR15_EL1 70
+#define DBGWVR0_EL1 71 /* Debug Watchpoint Value Registers (0-15) */
+#define DBGWVR15_EL1 86
+#define MDCCINT_EL1 87 /* Monitor Debug Comms Channel Interrupt Enable Reg */
+
/* 32bit specific registers. Keep them at the end of the range */
-#define DACR32_EL2 22 /* Domain Access Control Register */
-#define IFSR32_EL2 23 /* Instruction Fault Status Register */
-#define FPEXC32_EL2 24 /* Floating-Point Exception Control Register */
-#define DBGVCR32_EL2 25 /* Debug Vector Catch Register */
-#define TEECR32_EL1 26 /* ThumbEE Configuration Register */
-#define TEEHBR32_EL1 27 /* ThumbEE Handler Base Register */
-#define NR_SYS_REGS 28
+#define DACR32_EL2 88 /* Domain Access Control Register */
+#define IFSR32_EL2 89 /* Instruction Fault Status Register */
+#define FPEXC32_EL2 90 /* Floating-Point Exception Control Register */
+#define DBGVCR32_EL2 91 /* Debug Vector Catch Register */
+#define TEECR32_EL1 92 /* ThumbEE Configuration Register */
+#define TEEHBR32_EL1 93 /* ThumbEE Handler Base Register */
+#define NR_SYS_REGS 94
/* 32bit mapping */
#define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */
@@ -79,13 +92,26 @@
#define c13_TID_URW (TPIDR_EL0 * 2) /* Thread ID, User R/W */
#define c13_TID_URO (TPIDRRO_EL0 * 2)/* Thread ID, User R/O */
#define c13_TID_PRIV (TPIDR_EL1 * 2) /* Thread ID, Privileged */
-#define c10_AMAIR (AMAIR_EL1 * 2) /* Aux Memory Attr Indirection Reg */
+#define c10_AMAIR0 (AMAIR_EL1 * 2) /* Aux Memory Attr Indirection Reg */
+#define c10_AMAIR1 (c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */
#define c14_CNTKCTL (CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */
-#define NR_CP15_REGS (NR_SYS_REGS * 2)
+
+#define cp14_DBGDSCRext (MDSCR_EL1 * 2)
+#define cp14_DBGBCR0 (DBGBCR0_EL1 * 2)
+#define cp14_DBGBVR0 (DBGBVR0_EL1 * 2)
+#define cp14_DBGBXVR0 (cp14_DBGBVR0 + 1)
+#define cp14_DBGWCR0 (DBGWCR0_EL1 * 2)
+#define cp14_DBGWVR0 (DBGWVR0_EL1 * 2)
+#define cp14_DBGDCCINT (MDCCINT_EL1 * 2)
+
+#define NR_COPRO_REGS (NR_SYS_REGS * 2)
#define ARM_EXCEPTION_IRQ 0
#define ARM_EXCEPTION_TRAP 1
+#define KVM_ARM64_DEBUG_DIRTY_SHIFT 0
+#define KVM_ARM64_DEBUG_DIRTY (1 << KVM_ARM64_DEBUG_DIRTY_SHIFT)
+
#ifndef __ASSEMBLY__
struct kvm;
struct kvm_vcpu;
@@ -95,13 +121,21 @@ extern char __kvm_hyp_init_end[];
extern char __kvm_hyp_vector[];
-extern char __kvm_hyp_code_start[];
-extern char __kvm_hyp_code_end[];
+#define __kvm_hyp_code_start __hyp_text_start
+#define __kvm_hyp_code_end __hyp_text_end
extern void __kvm_flush_vm_context(void);
extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
+
+extern u64 __vgic_v3_get_ich_vtr_el2(void);
+
+extern char __save_vgic_v2_state[];
+extern char __restore_vgic_v2_state[];
+extern char __save_vgic_v3_state[];
+extern char __restore_vgic_v3_state[];
+
#endif
#endif /* __ARM_KVM_ASM_H__ */
diff --git a/arch/arm64/include/asm/kvm_coproc.h b/arch/arm64/include/asm/kvm_coproc.h
index 9a59301cd014..0b52377a6c11 100644
--- a/arch/arm64/include/asm/kvm_coproc.h
+++ b/arch/arm64/include/asm/kvm_coproc.h
@@ -39,7 +39,8 @@ void kvm_register_target_sys_reg_table(unsigned int target,
struct kvm_sys_reg_target_table *table);
int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run);
-int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run);
int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run);
int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run);
int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run);
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index dd8ecfc3f995..5674a55b5518 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -174,6 +174,11 @@ static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu)
static inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu)
{
+ return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC;
+}
+
+static inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu)
+{
return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC_TYPE;
}
@@ -213,6 +218,17 @@ static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu,
default:
return be64_to_cpu(data);
}
+ } else {
+ switch (len) {
+ case 1:
+ return data & 0xff;
+ case 2:
+ return le16_to_cpu(data & 0xffff);
+ case 4:
+ return le32_to_cpu(data & 0xffffffff);
+ default:
+ return le64_to_cpu(data);
+ }
}
return data; /* Leave LE untouched */
@@ -233,6 +249,17 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
default:
return cpu_to_be64(data);
}
+ } else {
+ switch (len) {
+ case 1:
+ return data & 0xff;
+ case 2:
+ return cpu_to_le16(data & 0xffff);
+ case 4:
+ return cpu_to_le32(data & 0xffffffff);
+ default:
+ return cpu_to_le64(data);
+ }
}
return data; /* Leave LE untouched */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 0a1d69751562..bcde41905746 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -22,6 +22,8 @@
#ifndef __ARM64_KVM_HOST_H__
#define __ARM64_KVM_HOST_H__
+#include <linux/types.h>
+#include <linux/kvm_types.h>
#include <asm/kvm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_mmio.h>
@@ -39,10 +41,9 @@
#include <kvm/arm_vgic.h>
#include <kvm/arm_arch_timer.h>
-#define KVM_VCPU_MAX_FEATURES 2
+#define KVM_VCPU_MAX_FEATURES 3
-struct kvm_vcpu;
-int kvm_target_cpu(void);
+int __attribute_const__ kvm_target_cpu(void);
int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
int kvm_arch_dev_ioctl_check_extension(long ext);
@@ -86,7 +87,7 @@ struct kvm_cpu_context {
struct kvm_regs gp_regs;
union {
u64 sys_regs[NR_SYS_REGS];
- u32 cp15[NR_CP15_REGS];
+ u32 copro[NR_COPRO_REGS];
};
};
@@ -101,6 +102,9 @@ struct kvm_vcpu_arch {
/* Exception Information */
struct kvm_vcpu_fault_info fault;
+ /* Debug state */
+ u64 debug_flags;
+
/* Pointer to host CPU context */
kvm_cpu_context_t *host_cpu_context;
@@ -138,7 +142,20 @@ struct kvm_vcpu_arch {
#define vcpu_gp_regs(v) (&(v)->arch.ctxt.gp_regs)
#define vcpu_sys_reg(v,r) ((v)->arch.ctxt.sys_regs[(r)])
-#define vcpu_cp15(v,r) ((v)->arch.ctxt.cp15[(r)])
+/*
+ * CP14 and CP15 live in the same array, as they are backed by the
+ * same system registers.
+ */
+#define vcpu_cp14(v,r) ((v)->arch.ctxt.copro[(r)])
+#define vcpu_cp15(v,r) ((v)->arch.ctxt.copro[(r)])
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define vcpu_cp15_64_high(v,r) vcpu_cp15((v),(r))
+#define vcpu_cp15_64_low(v,r) vcpu_cp15((v),(r) + 1)
+#else
+#define vcpu_cp15_64_high(v,r) vcpu_cp15((v),(r) + 1)
+#define vcpu_cp15_64_low(v,r) vcpu_cp15((v),(r))
+#endif
struct kvm_vm_stat {
u32 remote_tlb_flush;
@@ -148,18 +165,15 @@ struct kvm_vcpu_stat {
u32 halt_wakeup;
};
-struct kvm_vcpu_init;
int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
const struct kvm_vcpu_init *init);
int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
-struct kvm_one_reg;
int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
#define KVM_ARCH_WANT_MMU_NOTIFIER
-struct kvm;
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
int kvm_unmap_hva_range(struct kvm *kvm,
unsigned long start, unsigned long end);
@@ -177,7 +191,7 @@ static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
}
struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
-struct kvm_vcpu __percpu **kvm_get_running_vcpus(void);
+struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
u64 kvm_call_hyp(void *hypfn, ...);
@@ -200,4 +214,38 @@ static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
hyp_stack_ptr, vector_ptr);
}
+struct vgic_sr_vectors {
+ void *save_vgic;
+ void *restore_vgic;
+};
+
+static inline void vgic_arch_setup(const struct vgic_params *vgic)
+{
+ extern struct vgic_sr_vectors __vgic_sr_vectors;
+
+ switch(vgic->type)
+ {
+ case VGIC_V2:
+ __vgic_sr_vectors.save_vgic = __save_vgic_v2_state;
+ __vgic_sr_vectors.restore_vgic = __restore_vgic_v2_state;
+ break;
+
+#ifdef CONFIG_ARM_GIC_V3
+ case VGIC_V3:
+ __vgic_sr_vectors.save_vgic = __save_vgic_v3_state;
+ __vgic_sr_vectors.restore_vgic = __restore_vgic_v3_state;
+ break;
+#endif
+
+ default:
+ BUG();
+ }
+}
+
+static inline void kvm_arch_hardware_disable(void) {}
+static inline void kvm_arch_hardware_unsetup(void) {}
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+
#endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 7f1f9408ff66..a030d163840b 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -59,10 +59,9 @@
#define KERN_TO_HYP(kva) ((unsigned long)kva - PAGE_OFFSET + HYP_PAGE_OFFSET)
/*
- * Align KVM with the kernel's view of physical memory. Should be
- * 40bit IPA, with PGD being 8kB aligned in the 4KB page configuration.
+ * We currently only support a 40bit IPA.
*/
-#define KVM_PHYS_SHIFT PHYS_MASK_SHIFT
+#define KVM_PHYS_SHIFT (40)
#define KVM_PHYS_SIZE (1UL << KVM_PHYS_SHIFT)
#define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1UL)
@@ -93,20 +92,6 @@ void kvm_clear_hyp_idmap(void);
#define kvm_set_pte(ptep, pte) set_pte(ptep, pte)
#define kvm_set_pmd(pmdp, pmd) set_pmd(pmdp, pmd)
-static inline bool kvm_is_write_fault(unsigned long esr)
-{
- unsigned long esr_ec = esr >> ESR_EL2_EC_SHIFT;
-
- if (esr_ec == ESR_EL2_EC_IABT)
- return false;
-
- if ((esr & ESR_EL2_ISV) && !(esr & ESR_EL2_WNR))
- return false;
-
- return true;
-}
-
-static inline void kvm_clean_dcache_area(void *addr, size_t size) {}
static inline void kvm_clean_pgd(pgd_t *pgd) {}
static inline void kvm_clean_pmd_entry(pmd_t *pmd) {}
static inline void kvm_clean_pte(pte_t *pte) {}
@@ -122,11 +107,40 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
pmd_val(*pmd) |= PMD_S2_RDWR;
}
+#define kvm_pgd_addr_end(addr, end) pgd_addr_end(addr, end)
+#define kvm_pud_addr_end(addr, end) pud_addr_end(addr, end)
+#define kvm_pmd_addr_end(addr, end) pmd_addr_end(addr, end)
+
+static inline bool kvm_page_empty(void *ptr)
+{
+ struct page *ptr_page = virt_to_page(ptr);
+ return page_count(ptr_page) == 1;
+}
+
+#define kvm_pte_table_empty(ptep) kvm_page_empty(ptep)
+#ifndef CONFIG_ARM64_64K_PAGES
+#define kvm_pmd_table_empty(pmdp) kvm_page_empty(pmdp)
+#else
+#define kvm_pmd_table_empty(pmdp) (0)
+#endif
+#define kvm_pud_table_empty(pudp) (0)
+
+
struct kvm;
-static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva,
- unsigned long size)
+#define kvm_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l))
+
+static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
+{
+ return (vcpu_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
+}
+
+static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
+ unsigned long size)
{
+ if (!vcpu_has_cache_enabled(vcpu))
+ kvm_flush_dcache_to_poc((void *)hva, size);
+
if (!icache_is_aliasing()) { /* PIPT */
flush_icache_range(hva, hva + size);
} else if (!icache_is_aivivt()) { /* non ASID-tagged VIVT */
@@ -135,8 +149,9 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva,
}
}
-#define kvm_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l))
#define kvm_virt_to_phys(x) __virt_to_phys((unsigned long)(x))
+void stage2_flush_vm(struct kvm *kvm);
+
#endif /* __ASSEMBLY__ */
#endif /* __ARM64_KVM_MMU_H__ */
diff --git a/arch/arm64/include/asm/kvm_psci.h b/arch/arm64/include/asm/kvm_psci.h
index e301a4816355..bc39e557c56c 100644
--- a/arch/arm64/include/asm/kvm_psci.h
+++ b/arch/arm64/include/asm/kvm_psci.h
@@ -18,6 +18,10 @@
#ifndef __ARM64_KVM_PSCI_H__
#define __ARM64_KVM_PSCI_H__
-bool kvm_psci_call(struct kvm_vcpu *vcpu);
+#define KVM_ARM_PSCI_0_1 1
+#define KVM_ARM_PSCI_0_2 2
+
+int kvm_psci_version(struct kvm_vcpu *vcpu);
+int kvm_psci_call(struct kvm_vcpu *vcpu);
#endif /* __ARM64_KVM_PSCI_H__ */
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 11ad59b856c6..902eb708804a 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -49,7 +49,7 @@
#define PAGE_OFFSET (UL(0xffffffffffffffff) << (VA_BITS - 1))
#define MODULES_END (PAGE_OFFSET)
#define MODULES_VADDR (MODULES_END - SZ_64M)
-#define EARLYCON_IOBASE (MODULES_VADDR - SZ_4M)
+#define FIXADDR_TOP (MODULES_VADDR - SZ_2M - PAGE_SIZE)
#define TASK_SIZE_64 (UL(1) << VA_BITS)
#ifdef CONFIG_COMPAT
@@ -140,6 +140,7 @@ static inline void *phys_to_virt(phys_addr_t x)
#define __pa(x) __virt_to_phys((unsigned long)(x))
#define __va(x) ((void *)__phys_to_virt((phys_addr_t)(x)))
#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)
+#define virt_to_pfn(x) __phys_to_pfn(__virt_to_phys(x))
/*
* virt_to_page(k) convert a _valid_ virtual address to struct page *
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index 2494fc01896a..c2f006c48bdb 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -22,10 +22,16 @@ typedef struct {
void *vdso;
} mm_context_t;
+#define INIT_MM_CONTEXT(name) \
+ .context.id_lock = __RAW_SPIN_LOCK_UNLOCKED(name.context.id_lock),
+
#define ASID(mm) ((mm)->context.id & 0xffff)
extern void paging_init(void);
extern void setup_mm_for_reboot(void);
extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt);
+extern void init_mem_pgprot(void);
+/* create an identity mapping for memory (or io if map_io is true) */
+extern void create_id_mapping(phys_addr_t addr, phys_addr_t size, int map_io);
#endif
diff --git a/arch/arm64/include/asm/neon.h b/arch/arm64/include/asm/neon.h
index b0cc58a97780..13ce4cc18e26 100644
--- a/arch/arm64/include/asm/neon.h
+++ b/arch/arm64/include/asm/neon.h
@@ -8,7 +8,11 @@
* published by the Free Software Foundation.
*/
+#include <linux/types.h>
+
#define cpu_has_neon() (1)
-void kernel_neon_begin(void);
+#define kernel_neon_begin() kernel_neon_begin_partial(32)
+
+void kernel_neon_begin_partial(u32 num_regs);
void kernel_neon_end(void);
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index 46bf66628b6a..a6331e6a92b5 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -31,6 +31,15 @@
/* We do define AT_SYSINFO_EHDR but don't use the gate mechanism */
#define __HAVE_ARCH_GATE_AREA 1
+/*
+ * The idmap and swapper page tables need some space reserved in the kernel
+ * image. The idmap only requires a pgd and a next level table to (section) map
+ * the kernel, while the swapper also maps the FDT and requires an additional
+ * table to map an early UART. See __create_page_tables for more information.
+ */
+#define SWAPPER_DIR_SIZE (3 * PAGE_SIZE)
+#define IDMAP_DIR_SIZE (2 * PAGE_SIZE)
+
#ifndef __ASSEMBLY__
#ifdef CONFIG_ARM64_64K_PAGES
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index b1d2e26c3c88..f7af66b54cb2 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -100,9 +100,9 @@
#define PTE_HYP PTE_USER
/*
- * 40-bit physical address supported.
+ * Highest possible physical address supported.
*/
-#define PHYS_MASK_SHIFT (40)
+#define PHYS_MASK_SHIFT (48)
#define PHYS_MASK ((UL(1) << PHYS_MASK_SHIFT) - 1)
/*
@@ -122,7 +122,6 @@
#define TCR_SHARED ((UL(3) << 12) | (UL(3) << 28))
#define TCR_TG0_64K (UL(1) << 14)
#define TCR_TG1_64K (UL(1) << 30)
-#define TCR_IPS_40BIT (UL(2) << 32)
#define TCR_ASID16 (UL(1) << 36)
#define TCR_TBI0 (UL(1) << 37)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index acb17e0d5e24..33608f263ad0 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -52,66 +52,60 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
#endif
#define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd))
-/*
- * The pgprot_* and protection_map entries will be fixed up at runtime to
- * include the cachable and bufferable bits based on memory policy, as well as
- * any architecture dependent bits like global/ASID and SMP shared mapping
- * bits.
- */
-#define _PAGE_DEFAULT PTE_TYPE_PAGE | PTE_AF
+#ifdef CONFIG_SMP
+#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
+#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
+#else
+#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF)
+#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF)
+#endif
-extern pgprot_t pgprot_default;
+#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRE))
+#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_NC))
+#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL))
-#define __pgprot_modify(prot,mask,bits) \
- __pgprot((pgprot_val(prot) & ~(mask)) | (bits))
+#define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE))
+#define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
+#define PROT_SECT_NORMAL_EXEC (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
-#define _MOD_PROT(p, b) __pgprot_modify(p, 0, b)
+#define _PAGE_DEFAULT (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL))
-#define PAGE_NONE __pgprot_modify(pgprot_default, PTE_TYPE_MASK, PTE_PROT_NONE | PTE_PXN | PTE_UXN)
-#define PAGE_SHARED _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
-#define PAGE_SHARED_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE)
-#define PAGE_COPY _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
-#define PAGE_COPY_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN)
-#define PAGE_READONLY _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
-#define PAGE_READONLY_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN)
-#define PAGE_KERNEL _MOD_PROT(pgprot_default, PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)
-#define PAGE_KERNEL_EXEC _MOD_PROT(pgprot_default, PTE_UXN | PTE_DIRTY | PTE_WRITE)
+#define PAGE_KERNEL __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)
+#define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)
-#define PAGE_HYP _MOD_PROT(pgprot_default, PTE_HYP)
+#define PAGE_HYP __pgprot(_PAGE_DEFAULT | PTE_HYP)
#define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP)
-#define PAGE_S2 __pgprot_modify(pgprot_default, PTE_S2_MEMATTR_MASK, PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY)
+#define PAGE_S2 __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY)
#define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDWR | PTE_UXN)
-#define __PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_PXN | PTE_UXN)
-#define __PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
-#define __PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE)
-#define __PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
-#define __PAGE_COPY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
-#define __PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
-#define __PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
-
-#endif /* __ASSEMBLY__ */
-
-#define __P000 __PAGE_NONE
-#define __P001 __PAGE_READONLY
-#define __P010 __PAGE_COPY
-#define __P011 __PAGE_COPY
-#define __P100 __PAGE_READONLY_EXEC
-#define __P101 __PAGE_READONLY_EXEC
-#define __P110 __PAGE_COPY_EXEC
-#define __P111 __PAGE_COPY_EXEC
-
-#define __S000 __PAGE_NONE
-#define __S001 __PAGE_READONLY
-#define __S010 __PAGE_SHARED
-#define __S011 __PAGE_SHARED
-#define __S100 __PAGE_READONLY_EXEC
-#define __S101 __PAGE_READONLY_EXEC
-#define __S110 __PAGE_SHARED_EXEC
-#define __S111 __PAGE_SHARED_EXEC
+#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_PXN | PTE_UXN)
+#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
+#define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE)
+#define PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
+#define PAGE_COPY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
+#define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
+#define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
+#define PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_NG | PTE_PXN)
+
+#define __P000 PAGE_NONE
+#define __P001 PAGE_READONLY
+#define __P010 PAGE_COPY
+#define __P011 PAGE_COPY
+#define __P100 PAGE_READONLY_EXEC
+#define __P101 PAGE_READONLY_EXEC
+#define __P110 PAGE_COPY_EXEC
+#define __P111 PAGE_COPY_EXEC
+
+#define __S000 PAGE_NONE
+#define __S001 PAGE_READONLY
+#define __S010 PAGE_SHARED
+#define __S011 PAGE_SHARED
+#define __S100 PAGE_READONLY_EXEC
+#define __S101 PAGE_READONLY_EXEC
+#define __S110 PAGE_SHARED_EXEC
+#define __S111 PAGE_SHARED_EXEC
-#ifndef __ASSEMBLY__
/*
* ZERO_PAGE is a global shared page that is always zero: used
* for zero-mapped memory areas etc..
@@ -145,6 +139,8 @@ extern struct page *empty_zero_page;
#define pte_valid_user(pte) \
((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER))
+#define pte_valid_not_user(pte) \
+ ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID)
static inline pte_t pte_wrprotect(pte_t pte)
{
@@ -191,6 +187,15 @@ static inline pte_t pte_mkspecial(pte_t pte)
static inline void set_pte(pte_t *ptep, pte_t pte)
{
*ptep = pte;
+
+ /*
+ * Only if the new pte is valid and kernel, otherwise TLB maintenance
+ * or update_mmu_cache() have the necessary barriers.
+ */
+ if (pte_valid_not_user(pte)) {
+ dsb(ishst);
+ isb();
+ }
}
extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
@@ -227,36 +232,36 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
#define __HAVE_ARCH_PTE_SPECIAL
-/*
- * Software PMD bits for THP
- */
+static inline pte_t pmd_pte(pmd_t pmd)
+{
+ return __pte(pmd_val(pmd));
+}
-#define PMD_SECT_DIRTY (_AT(pmdval_t, 1) << 55)
-#define PMD_SECT_SPLITTING (_AT(pmdval_t, 1) << 57)
+static inline pmd_t pte_pmd(pte_t pte)
+{
+ return __pmd(pte_val(pte));
+}
/*
* THP definitions.
*/
-#define pmd_young(pmd) (pmd_val(pmd) & PMD_SECT_AF)
-
-#define __HAVE_ARCH_PMD_WRITE
-#define pmd_write(pmd) (!(pmd_val(pmd) & PMD_SECT_RDONLY))
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define pmd_trans_huge(pmd) (pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT))
-#define pmd_trans_splitting(pmd) (pmd_val(pmd) & PMD_SECT_SPLITTING)
+#define pmd_trans_splitting(pmd) pte_special(pmd_pte(pmd))
#endif
-#define PMD_BIT_FUNC(fn,op) \
-static inline pmd_t pmd_##fn(pmd_t pmd) { pmd_val(pmd) op; return pmd; }
+#define pmd_young(pmd) pte_young(pmd_pte(pmd))
+#define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd)))
+#define pmd_mksplitting(pmd) pte_pmd(pte_mkspecial(pmd_pte(pmd)))
+#define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd)))
+#define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd)))
+#define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd)))
+#define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd)))
+#define pmd_mknotpresent(pmd) (__pmd(pmd_val(pmd) &= ~PMD_TYPE_MASK))
-PMD_BIT_FUNC(wrprotect, |= PMD_SECT_RDONLY);
-PMD_BIT_FUNC(mkold, &= ~PMD_SECT_AF);
-PMD_BIT_FUNC(mksplitting, |= PMD_SECT_SPLITTING);
-PMD_BIT_FUNC(mkwrite, &= ~PMD_SECT_RDONLY);
-PMD_BIT_FUNC(mkdirty, |= PMD_SECT_DIRTY);
-PMD_BIT_FUNC(mkyoung, |= PMD_SECT_AF);
-PMD_BIT_FUNC(mknotpresent, &= ~PMD_TYPE_MASK);
+#define __HAVE_ARCH_PMD_WRITE
+#define pmd_write(pmd) pte_write(pmd_pte(pmd))
#define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT))
@@ -266,15 +271,6 @@ PMD_BIT_FUNC(mknotpresent, &= ~PMD_TYPE_MASK);
#define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK))
-static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
-{
- const pmdval_t mask = PMD_SECT_USER | PMD_SECT_PXN | PMD_SECT_UXN |
- PMD_SECT_RDONLY | PMD_SECT_PROT_NONE |
- PMD_SECT_VALID;
- pmd_val(pmd) = (pmd_val(pmd) & ~mask) | (pgprot_val(newprot) & mask);
- return pmd;
-}
-
#define set_pmd_at(mm, addr, pmdp, pmd) set_pmd(pmdp, pmd)
static inline int has_transparent_hugepage(void)
@@ -282,6 +278,9 @@ static inline int has_transparent_hugepage(void)
return 1;
}
+#define __pgprot_modify(prot,mask,bits) \
+ __pgprot((pgprot_val(prot) & ~(mask)) | (bits))
+
/*
* Mark the prot value as uncacheable and unbufferable.
*/
@@ -289,8 +288,6 @@ static inline int has_transparent_hugepage(void)
__pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRnE) | PTE_PXN | PTE_UXN)
#define pgprot_writecombine(prot) \
__pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN)
-#define pgprot_dmacoherent(prot) \
- __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN)
#define __HAVE_PHYS_MEM_ACCESS_PROT
struct file;
extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
@@ -310,7 +307,8 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
{
*pmdp = pmd;
- dsb();
+ dsb(ishst);
+ isb();
}
static inline void pmd_clear(pmd_t *pmdp)
@@ -340,7 +338,8 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd)
static inline void set_pud(pud_t *pudp, pud_t pud)
{
*pudp = pud;
- dsb();
+ dsb(ishst);
+ isb();
}
static inline void pud_clear(pud_t *pudp)
@@ -383,12 +382,14 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
return pte;
}
+static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+{
+ return pte_pmd(pte_modify(pmd_pte(pmd), newprot));
+}
+
extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
-#define SWAPPER_DIR_SIZE (3 * PAGE_SIZE)
-#define IDMAP_DIR_SIZE (2 * PAGE_SIZE)
-
/*
* Encode and decode a swap entry:
* bits 0-1: present (must be zero)
@@ -412,7 +413,7 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
/*
* Ensure that there are not more swap files than can be encoded in the kernel
- * the PTEs.
+ * PTEs.
*/
#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS)
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index 0e7fa4963735..a429b5940be2 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -68,6 +68,7 @@
/* Architecturally defined mapping between AArch32 and AArch64 registers */
#define compat_usr(x) regs[(x)]
+#define compat_fp regs[11]
#define compat_sp regs[13]
#define compat_lr regs[14]
#define compat_sp_hyp regs[15]
@@ -132,7 +133,12 @@ struct pt_regs {
(!((regs)->pstate & PSR_F_BIT))
#define user_stack_pointer(regs) \
- ((regs)->sp)
+ (!compat_user_mode(regs)) ? ((regs)->sp) : ((regs)->compat_sp)
+
+static inline unsigned long regs_return_value(struct pt_regs *regs)
+{
+ return regs->regs[0];
+}
/*
* Are the current registers suitable for user mode? (used to maintain
@@ -164,7 +170,7 @@ static inline int valid_user_regs(struct user_pt_regs *regs)
return 0;
}
-#define instruction_pointer(regs) (regs)->pc
+#define instruction_pointer(regs) ((unsigned long)(regs)->pc)
#ifdef CONFIG_SMP
extern unsigned long profile_pc(struct pt_regs *regs);
diff --git a/arch/arm64/include/asm/sigcontext.h b/arch/arm64/include/asm/sigcontext.h
deleted file mode 100644
index dca1094acc74..000000000000
--- a/arch/arm64/include/asm/sigcontext.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef __ASM_SIGCONTEXT_H
-#define __ASM_SIGCONTEXT_H
-
-#include <uapi/asm/sigcontext.h>
-
-/*
- * Auxiliary context saved in the sigcontext.__reserved array. Not exported to
- * user space as it will change with the addition of new context. User space
- * should check the magic/size information.
- */
-struct aux_context {
- struct fpsimd_context fpsimd;
- /* additional context to be added before "end" */
- struct _aarch64_ctx end;
-};
-#endif
diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h
index 70ba9d4ee978..383771eb0b87 100644
--- a/arch/arm64/include/asm/syscall.h
+++ b/arch/arm64/include/asm/syscall.h
@@ -18,6 +18,7 @@
#include <linux/err.h>
+extern const void *sys_call_table[];
static inline int syscall_get_nr(struct task_struct *task,
struct pt_regs *regs)
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
new file mode 100644
index 000000000000..5c89df0acbcb
--- /dev/null
+++ b/arch/arm64/include/asm/sysreg.h
@@ -0,0 +1,60 @@
+/*
+ * Macros for accessing system registers with older binutils.
+ *
+ * Copyright (C) 2014 ARM Ltd.
+ * Author: Catalin Marinas <catalin.marinas@arm.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ASM_SYSREG_H
+#define __ASM_SYSREG_H
+
+#define sys_reg(op0, op1, crn, crm, op2) \
+ ((((op0)-2)<<19)|((op1)<<16)|((crn)<<12)|((crm)<<8)|((op2)<<5))
+
+#ifdef __ASSEMBLY__
+
+ .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
+ .equ __reg_num_x\num, \num
+ .endr
+ .equ __reg_num_xzr, 31
+
+ .macro mrs_s, rt, sreg
+ .inst 0xd5300000|(\sreg)|(__reg_num_\rt)
+ .endm
+
+ .macro msr_s, sreg, rt
+ .inst 0xd5100000|(\sreg)|(__reg_num_\rt)
+ .endm
+
+#else
+
+asm(
+" .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n"
+" .equ __reg_num_x\\num, \\num\n"
+" .endr\n"
+" .equ __reg_num_xzr, 31\n"
+"\n"
+" .macro mrs_s, rt, sreg\n"
+" .inst 0xd5300000|(\\sreg)|(__reg_num_\\rt)\n"
+" .endm\n"
+"\n"
+" .macro msr_s, sreg, rt\n"
+" .inst 0xd5100000|(\\sreg)|(__reg_num_\\rt)\n"
+" .endm\n"
+);
+
+#endif
+
+#endif /* __ASM_SYSREG_H */
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 720e70b66ffd..8da65c37193d 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -91,6 +91,9 @@ static inline struct thread_info *current_thread_info(void)
/*
* thread information flags:
* TIF_SYSCALL_TRACE - syscall trace active
+ * TIF_SYSCALL_TRACEPOINT - syscall tracepoint for ftrace
+ * TIF_SYSCALL_AUDIT - syscall auditing
+ * TIF_SECOMP - syscall secure computing
* TIF_SIGPENDING - signal pending
* TIF_NEED_RESCHED - rescheduling necessary
* TIF_NOTIFY_RESUME - callback before returning to user
@@ -100,7 +103,12 @@ static inline struct thread_info *current_thread_info(void)
#define TIF_SIGPENDING 0
#define TIF_NEED_RESCHED 1
#define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
+#define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */
+#define TIF_NOHZ 7
#define TIF_SYSCALL_TRACE 8
+#define TIF_SYSCALL_AUDIT 9
+#define TIF_SYSCALL_TRACEPOINT 10
+#define TIF_SECCOMP 11
#define TIF_POLLING_NRFLAG 16
#define TIF_MEMDIE 18 /* is terminating due to OOM killer */
#define TIF_FREEZE 19
@@ -112,10 +120,20 @@ static inline struct thread_info *current_thread_info(void)
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
+#define _TIF_NOHZ (1 << TIF_NOHZ)
+#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
+#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
+#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
+#define _TIF_SECCOMP (1 << TIF_SECCOMP)
+#define _TIF_FOREIGN_FPSTATE (1 << TIF_FOREIGN_FPSTATE)
#define _TIF_32BIT (1 << TIF_32BIT)
#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
- _TIF_NOTIFY_RESUME)
+ _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE)
+
+#define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
+ _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
+ _TIF_NOHZ)
#endif /* __KERNEL__ */
#endif /* __ASM_THREAD_INFO_H */
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index 8b482035cfc2..3796ea6bb734 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -72,9 +72,9 @@ extern struct cpu_tlb_fns cpu_tlb;
*/
static inline void flush_tlb_all(void)
{
- dsb();
+ dsb(ishst);
asm("tlbi vmalle1is");
- dsb();
+ dsb(ish);
isb();
}
@@ -82,9 +82,9 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
{
unsigned long asid = (unsigned long)ASID(mm) << 48;
- dsb();
+ dsb(ishst);
asm("tlbi aside1is, %0" : : "r" (asid));
- dsb();
+ dsb(ish);
}
static inline void flush_tlb_page(struct vm_area_struct *vma,
@@ -93,16 +93,37 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
unsigned long addr = uaddr >> 12 |
((unsigned long)ASID(vma->vm_mm) << 48);
- dsb();
+ dsb(ishst);
asm("tlbi vae1is, %0" : : "r" (addr));
- dsb();
+ dsb(ish);
}
-/*
- * Convert calls to our calling convention.
- */
-#define flush_tlb_range(vma,start,end) __cpu_flush_user_tlb_range(start,end,vma)
-#define flush_tlb_kernel_range(s,e) __cpu_flush_kern_tlb_range(s,e)
+static inline void flush_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+{
+ unsigned long asid = (unsigned long)ASID(vma->vm_mm) << 48;
+ unsigned long addr;
+ start = asid | (start >> 12);
+ end = asid | (end >> 12);
+
+ dsb(ishst);
+ for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12))
+ asm("tlbi vae1is, %0" : : "r"(addr));
+ dsb(ish);
+}
+
+static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+ unsigned long addr;
+ start >>= 12;
+ end >>= 12;
+
+ dsb(ishst);
+ for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12))
+ asm("tlbi vaae1is, %0" : : "r"(addr));
+ dsb(ish);
+ isb();
+}
/*
* On AArch64, the cache coherency is handled via the set_pte_at() function.
@@ -111,10 +132,10 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
/*
- * set_pte() does not have a DSB, so make sure that the page table
- * write is visible.
+ * set_pte() does not have a DSB for user mappings, so make sure that
+ * the page table write is visible.
*/
- dsb();
+ dsb(ishst);
}
#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
new file mode 100644
index 000000000000..e0171b393a14
--- /dev/null
+++ b/arch/arm64/include/asm/topology.h
@@ -0,0 +1,70 @@
+#ifndef __ASM_TOPOLOGY_H
+#define __ASM_TOPOLOGY_H
+
+#ifdef CONFIG_SMP
+
+#include <linux/cpumask.h>
+
+struct cpu_topology {
+ int thread_id;
+ int core_id;
+ int cluster_id;
+ cpumask_t thread_sibling;
+ cpumask_t core_sibling;
+};
+
+extern struct cpu_topology cpu_topology[NR_CPUS];
+
+#define topology_physical_package_id(cpu) (cpu_topology[cpu].cluster_id)
+#define topology_core_id(cpu) (cpu_topology[cpu].core_id)
+#define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_sibling)
+#define topology_thread_cpumask(cpu) (&cpu_topology[cpu].thread_sibling)
+
+#define mc_capable() (cpu_topology[0].cluster_id != -1)
+#define smt_capable() (cpu_topology[0].thread_id != -1)
+
+void init_cpu_topology(void);
+void store_cpu_topology(unsigned int cpuid);
+const struct cpumask *cpu_coregroup_mask(int cpu);
+
+#ifdef CONFIG_DISABLE_CPU_SCHED_DOMAIN_BALANCE
+/* Common values for CPUs */
+#ifndef SD_CPU_INIT
+#define SD_CPU_INIT (struct sched_domain) { \
+ .min_interval = 1, \
+ .max_interval = 4, \
+ .busy_factor = 64, \
+ .imbalance_pct = 125, \
+ .cache_nice_tries = 1, \
+ .busy_idx = 2, \
+ .idle_idx = 1, \
+ .newidle_idx = 0, \
+ .wake_idx = 0, \
+ .forkexec_idx = 0, \
+ \
+ .flags = 0*SD_LOAD_BALANCE \
+ | 1*SD_BALANCE_NEWIDLE \
+ | 1*SD_BALANCE_EXEC \
+ | 1*SD_BALANCE_FORK \
+ | 0*SD_BALANCE_WAKE \
+ | 1*SD_WAKE_AFFINE \
+ | 0*SD_SHARE_CPUPOWER \
+ | 0*SD_SHARE_PKG_RESOURCES \
+ | 0*SD_SERIALIZE \
+ , \
+ .last_balance = jiffies, \
+ .balance_interval = 1, \
+}
+#endif
+#endif /* CONFIG_DISABLE_CPU_SCHED_DOMAIN_BALANCE */
+
+#else
+
+static inline void init_cpu_topology(void) { }
+static inline void store_cpu_topology(unsigned int cpuid) { }
+
+#endif
+
+#include <asm-generic/topology.h>
+
+#endif /* _ASM_ARM_TOPOLOGY_H */
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 82ce217e94cf..c335479c2638 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -28,3 +28,5 @@
#endif
#define __ARCH_WANT_SYS_CLONE
#include <uapi/asm/unistd.h>
+
+#define NR_syscalls (__NR_syscalls)
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 130e2be952cf..290fb6690b33 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -63,6 +63,10 @@ static inline bool is_hyp_mode_mismatched(void)
return __boot_cpu_mode[0] != __boot_cpu_mode[1];
}
+/* The section containing the hypervisor text */
+extern char __hyp_text_start[];
+extern char __hyp_text_end[];
+
#endif /* __ASSEMBLY__ */
#endif /* ! __ASM__VIRT_H */
diff --git a/arch/arm64/include/uapi/asm/Kbuild b/arch/arm64/include/uapi/asm/Kbuild
index e4b78bdca19e..942376d37d22 100644
--- a/arch/arm64/include/uapi/asm/Kbuild
+++ b/arch/arm64/include/uapi/asm/Kbuild
@@ -9,6 +9,7 @@ header-y += byteorder.h
header-y += fcntl.h
header-y += hwcap.h
header-y += kvm_para.h
+header-y += perf_regs.h
header-y += param.h
header-y += ptrace.h
header-y += setup.h
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index eaf54a30bedc..8e38878c87c6 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -31,11 +31,13 @@
#define KVM_NR_SPSR 5
#ifndef __ASSEMBLY__
+#include <linux/psci.h>
#include <asm/types.h>
#include <asm/ptrace.h>
#define __KVM_HAVE_GUEST_DEBUG
#define __KVM_HAVE_IRQ_LINE
+#define __KVM_HAVE_READONLY_MEM
#define KVM_REG_SIZE(id) \
(1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
@@ -56,8 +58,9 @@ struct kvm_regs {
#define KVM_ARM_TARGET_FOUNDATION_V8 1
#define KVM_ARM_TARGET_CORTEX_A57 2
#define KVM_ARM_TARGET_XGENE_POTENZA 3
+#define KVM_ARM_TARGET_CORTEX_A53 4
-#define KVM_ARM_NUM_TARGETS 4
+#define KVM_ARM_NUM_TARGETS 5
/* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
#define KVM_ARM_DEVICE_TYPE_SHIFT 0
@@ -77,6 +80,7 @@ struct kvm_regs {
#define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */
#define KVM_ARM_VCPU_EL1_32BIT 1 /* CPU running a 32bit VM */
+#define KVM_ARM_VCPU_PSCI_0_2 2 /* CPU uses PSCI v0.2 */
struct kvm_vcpu_init {
__u32 target;
@@ -156,6 +160,7 @@ struct kvm_arch_memory_slot {
#define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0
#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3
/* KVM_IRQ_LINE irq field index values */
#define KVM_ARM_IRQ_TYPE_SHIFT 24
@@ -186,10 +191,10 @@ struct kvm_arch_memory_slot {
#define KVM_PSCI_FN_CPU_ON KVM_PSCI_FN(2)
#define KVM_PSCI_FN_MIGRATE KVM_PSCI_FN(3)
-#define KVM_PSCI_RET_SUCCESS 0
-#define KVM_PSCI_RET_NI ((unsigned long)-1)
-#define KVM_PSCI_RET_INVAL ((unsigned long)-2)
-#define KVM_PSCI_RET_DENIED ((unsigned long)-3)
+#define KVM_PSCI_RET_SUCCESS PSCI_RET_SUCCESS
+#define KVM_PSCI_RET_NI PSCI_RET_NOT_SUPPORTED
+#define KVM_PSCI_RET_INVAL PSCI_RET_INVALID_PARAMS
+#define KVM_PSCI_RET_DENIED PSCI_RET_DENIED
#endif
diff --git a/arch/arm64/include/uapi/asm/perf_regs.h b/arch/arm64/include/uapi/asm/perf_regs.h
new file mode 100644
index 000000000000..172b8317ee49
--- /dev/null
+++ b/arch/arm64/include/uapi/asm/perf_regs.h
@@ -0,0 +1,40 @@
+#ifndef _ASM_ARM64_PERF_REGS_H
+#define _ASM_ARM64_PERF_REGS_H
+
+enum perf_event_arm_regs {
+ PERF_REG_ARM64_X0,
+ PERF_REG_ARM64_X1,
+ PERF_REG_ARM64_X2,
+ PERF_REG_ARM64_X3,
+ PERF_REG_ARM64_X4,
+ PERF_REG_ARM64_X5,
+ PERF_REG_ARM64_X6,
+ PERF_REG_ARM64_X7,
+ PERF_REG_ARM64_X8,
+ PERF_REG_ARM64_X9,
+ PERF_REG_ARM64_X10,
+ PERF_REG_ARM64_X11,
+ PERF_REG_ARM64_X12,
+ PERF_REG_ARM64_X13,
+ PERF_REG_ARM64_X14,
+ PERF_REG_ARM64_X15,
+ PERF_REG_ARM64_X16,
+ PERF_REG_ARM64_X17,
+ PERF_REG_ARM64_X18,
+ PERF_REG_ARM64_X19,
+ PERF_REG_ARM64_X20,
+ PERF_REG_ARM64_X21,
+ PERF_REG_ARM64_X22,
+ PERF_REG_ARM64_X23,
+ PERF_REG_ARM64_X24,
+ PERF_REG_ARM64_X25,
+ PERF_REG_ARM64_X26,
+ PERF_REG_ARM64_X27,
+ PERF_REG_ARM64_X28,
+ PERF_REG_ARM64_X29,
+ PERF_REG_ARM64_LR,
+ PERF_REG_ARM64_SP,
+ PERF_REG_ARM64_PC,
+ PERF_REG_ARM64_MAX,
+};
+#endif /* _ASM_ARM64_PERF_REGS_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 2d4554b13410..6ae85d55308b 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -5,21 +5,29 @@
CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET)
AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
+CFLAGS_REMOVE_ftrace.o = -pg
+CFLAGS_REMOVE_insn.o = -pg
+CFLAGS_REMOVE_return_address.o = -pg
+
# Object file lists.
arm64-obj-y := cputable.o debug-monitors.o entry.o irq.o fpsimd.o \
entry-fpsimd.o process.o ptrace.o setup.o signal.o \
sys.o stacktrace.o time.o traps.o io.o vdso.o \
- hyp-stub.o psci.o cpu_ops.o insn.o
+ hyp-stub.o psci.o cpu_ops.o insn.o return_address.o
arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \
sys_compat.o
+arm64-obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o
arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o
+arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o topology.o
arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o
+arm64-obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o
-arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o
+arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
arm64-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
arm64-obj-$(CONFIG_ARM64_CPU_SUSPEND) += sleep.o suspend.o
arm64-obj-$(CONFIG_JUMP_LABEL) += jump_label.o
+arm64-obj-$(CONFIG_KGDB) += kgdb.o
obj-y += $(arm64-obj-y) vdso/
obj-m += $(arm64-obj-m)
diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c
index 338b568cd8ae..7f0512feaa13 100644
--- a/arch/arm64/kernel/arm64ksyms.c
+++ b/arch/arm64/kernel/arm64ksyms.c
@@ -56,3 +56,7 @@ EXPORT_SYMBOL(clear_bit);
EXPORT_SYMBOL(test_and_clear_bit);
EXPORT_SYMBOL(change_bit);
EXPORT_SYMBOL(test_and_change_bit);
+
+#ifdef CONFIG_FUNCTION_TRACER
+EXPORT_SYMBOL(_mcount);
+#endif
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 646f888387cd..9a9fce090d58 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -120,6 +120,7 @@ int main(void)
DEFINE(VCPU_ESR_EL2, offsetof(struct kvm_vcpu, arch.fault.esr_el2));
DEFINE(VCPU_FAR_EL2, offsetof(struct kvm_vcpu, arch.fault.far_el2));
DEFINE(VCPU_HPFAR_EL2, offsetof(struct kvm_vcpu, arch.fault.hpfar_el2));
+ DEFINE(VCPU_DEBUG_FLAGS, offsetof(struct kvm_vcpu, arch.debug_flags));
DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2));
DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines));
DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context));
@@ -129,13 +130,24 @@ int main(void)
DEFINE(KVM_TIMER_ENABLED, offsetof(struct kvm, arch.timer.enabled));
DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm));
DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu));
- DEFINE(VGIC_CPU_HCR, offsetof(struct vgic_cpu, vgic_hcr));
- DEFINE(VGIC_CPU_VMCR, offsetof(struct vgic_cpu, vgic_vmcr));
- DEFINE(VGIC_CPU_MISR, offsetof(struct vgic_cpu, vgic_misr));
- DEFINE(VGIC_CPU_EISR, offsetof(struct vgic_cpu, vgic_eisr));
- DEFINE(VGIC_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_elrsr));
- DEFINE(VGIC_CPU_APR, offsetof(struct vgic_cpu, vgic_apr));
- DEFINE(VGIC_CPU_LR, offsetof(struct vgic_cpu, vgic_lr));
+ DEFINE(VGIC_SAVE_FN, offsetof(struct vgic_sr_vectors, save_vgic));
+ DEFINE(VGIC_RESTORE_FN, offsetof(struct vgic_sr_vectors, restore_vgic));
+ DEFINE(VGIC_SR_VECTOR_SZ, sizeof(struct vgic_sr_vectors));
+ DEFINE(VGIC_V2_CPU_HCR, offsetof(struct vgic_cpu, vgic_v2.vgic_hcr));
+ DEFINE(VGIC_V2_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr));
+ DEFINE(VGIC_V2_CPU_MISR, offsetof(struct vgic_cpu, vgic_v2.vgic_misr));
+ DEFINE(VGIC_V2_CPU_EISR, offsetof(struct vgic_cpu, vgic_v2.vgic_eisr));
+ DEFINE(VGIC_V2_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr));
+ DEFINE(VGIC_V2_CPU_APR, offsetof(struct vgic_cpu, vgic_v2.vgic_apr));
+ DEFINE(VGIC_V2_CPU_LR, offsetof(struct vgic_cpu, vgic_v2.vgic_lr));
+ DEFINE(VGIC_V3_CPU_HCR, offsetof(struct vgic_cpu, vgic_v3.vgic_hcr));
+ DEFINE(VGIC_V3_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v3.vgic_vmcr));
+ DEFINE(VGIC_V3_CPU_MISR, offsetof(struct vgic_cpu, vgic_v3.vgic_misr));
+ DEFINE(VGIC_V3_CPU_EISR, offsetof(struct vgic_cpu, vgic_v3.vgic_eisr));
+ DEFINE(VGIC_V3_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v3.vgic_elrsr));
+ DEFINE(VGIC_V3_CPU_AP0R, offsetof(struct vgic_cpu, vgic_v3.vgic_ap0r));
+ DEFINE(VGIC_V3_CPU_AP1R, offsetof(struct vgic_cpu, vgic_v3.vgic_ap1r));
+ DEFINE(VGIC_V3_CPU_LR, offsetof(struct vgic_cpu, vgic_v3.vgic_lr));
DEFINE(VGIC_CPU_NR_LR, offsetof(struct vgic_cpu, nr_lr));
DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr));
DEFINE(KVM_VGIC_VCTRL, offsetof(struct kvm, arch.vgic.vctrl_base));
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index 636ba8b6240b..6c6a2e56a8e3 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -30,15 +30,6 @@
#include <asm/cputype.h>
#include <asm/system_misc.h>
-/* Low-level stepping controls. */
-#define DBG_MDSCR_SS (1 << 0)
-#define DBG_SPSR_SS (1 << 21)
-
-/* MDSCR_EL1 enabling bits */
-#define DBG_MDSCR_KDE (1 << 13)
-#define DBG_MDSCR_MDE (1 << 15)
-#define DBG_MDSCR_MASK ~(DBG_MDSCR_KDE | DBG_MDSCR_MDE)
-
/* Determine debug architecture. */
u8 debug_monitors_arch(void)
{
@@ -138,6 +129,7 @@ static void clear_os_lock(void *unused)
{
asm volatile("msr oslar_el1, %0" : : "r" (0));
isb();
+ local_dbg_enable();
}
static int os_lock_notify(struct notifier_block *self,
@@ -314,9 +306,6 @@ static int brk_handler(unsigned long addr, unsigned int esr,
if (call_break_hook(regs, esr) == DBG_HOOK_HANDLED)
return 0;
- pr_warn("unexpected brk exception at %lx, esr=0x%x\n",
- (long)instruction_pointer(regs), esr);
-
if (!user_mode(regs))
return -EFAULT;
diff --git a/arch/arm64/kernel/early_printk.c b/arch/arm64/kernel/early_printk.c
index fbb6e1843659..ffbbdde7aba1 100644
--- a/arch/arm64/kernel/early_printk.c
+++ b/arch/arm64/kernel/early_printk.c
@@ -26,6 +26,8 @@
#include <linux/amba/serial.h>
#include <linux/serial_reg.h>
+#include <asm/fixmap.h>
+
static void __iomem *early_base;
static void (*printch)(char ch);
@@ -141,8 +143,10 @@ static int __init setup_early_printk(char *buf)
}
/* no options parsing yet */
- if (paddr)
- early_base = early_io_map(paddr, EARLYCON_IOBASE);
+ if (paddr) {
+ set_fixmap_io(FIX_EARLYCON_MEM_BASE, paddr);
+ early_base = (void __iomem *)fix_to_virt(FIX_EARLYCON_MEM_BASE);
+ }
printch = match->printch;
early_console = &early_console_dev;
diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S
index 6a27cd6dbfa6..d358ccacfc00 100644
--- a/arch/arm64/kernel/entry-fpsimd.S
+++ b/arch/arm64/kernel/entry-fpsimd.S
@@ -41,3 +41,27 @@ ENTRY(fpsimd_load_state)
fpsimd_restore x0, 8
ret
ENDPROC(fpsimd_load_state)
+
+#ifdef CONFIG_KERNEL_MODE_NEON
+
+/*
+ * Save the bottom n FP registers.
+ *
+ * x0 - pointer to struct fpsimd_partial_state
+ */
+ENTRY(fpsimd_save_partial_state)
+ fpsimd_save_partial x0, 1, 8, 9
+ ret
+ENDPROC(fpsimd_load_partial_state)
+
+/*
+ * Load the bottom n FP registers.
+ *
+ * x0 - pointer to struct fpsimd_partial_state
+ */
+ENTRY(fpsimd_load_partial_state)
+ fpsimd_restore_partial x0, 8, 9
+ ret
+ENDPROC(fpsimd_load_partial_state)
+
+#endif
diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
new file mode 100644
index 000000000000..b051871f2965
--- /dev/null
+++ b/arch/arm64/kernel/entry-ftrace.S
@@ -0,0 +1,218 @@
+/*
+ * arch/arm64/kernel/entry-ftrace.S
+ *
+ * Copyright (C) 2013 Linaro Limited
+ * Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/ftrace.h>
+#include <asm/insn.h>
+
+/*
+ * Gcc with -pg will put the following code in the beginning of each function:
+ * mov x0, x30
+ * bl _mcount
+ * [function's body ...]
+ * "bl _mcount" may be replaced to "bl ftrace_caller" or NOP if dynamic
+ * ftrace is enabled.
+ *
+ * Please note that x0 as an argument will not be used here because we can
+ * get lr(x30) of instrumented function at any time by winding up call stack
+ * as long as the kernel is compiled without -fomit-frame-pointer.
+ * (or CONFIG_FRAME_POINTER, this is forced on arm64)
+ *
+ * stack layout after mcount_enter in _mcount():
+ *
+ * current sp/fp => 0:+-----+
+ * in _mcount() | x29 | -> instrumented function's fp
+ * +-----+
+ * | x30 | -> _mcount()'s lr (= instrumented function's pc)
+ * old sp => +16:+-----+
+ * when instrumented | |
+ * function calls | ... |
+ * _mcount() | |
+ * | |
+ * instrumented => +xx:+-----+
+ * function's fp | x29 | -> parent's fp
+ * +-----+
+ * | x30 | -> instrumented function's lr (= parent's pc)
+ * +-----+
+ * | ... |
+ */
+
+ .macro mcount_enter
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+ .endm
+
+ .macro mcount_exit
+ ldp x29, x30, [sp], #16
+ ret
+ .endm
+
+ .macro mcount_adjust_addr rd, rn
+ sub \rd, \rn, #AARCH64_INSN_SIZE
+ .endm
+
+ /* for instrumented function's parent */
+ .macro mcount_get_parent_fp reg
+ ldr \reg, [x29]
+ ldr \reg, [\reg]
+ .endm
+
+ /* for instrumented function */
+ .macro mcount_get_pc0 reg
+ mcount_adjust_addr \reg, x30
+ .endm
+
+ .macro mcount_get_pc reg
+ ldr \reg, [x29, #8]
+ mcount_adjust_addr \reg, \reg
+ .endm
+
+ .macro mcount_get_lr reg
+ ldr \reg, [x29]
+ ldr \reg, [\reg, #8]
+ mcount_adjust_addr \reg, \reg
+ .endm
+
+ .macro mcount_get_lr_addr reg
+ ldr \reg, [x29]
+ add \reg, \reg, #8
+ .endm
+
+#ifndef CONFIG_DYNAMIC_FTRACE
+/*
+ * void _mcount(unsigned long return_address)
+ * @return_address: return address to instrumented function
+ *
+ * This function makes calls, if enabled, to:
+ * - tracer function to probe instrumented function's entry,
+ * - ftrace_graph_caller to set up an exit hook
+ */
+ENTRY(_mcount)
+#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
+ ldr x0, =ftrace_trace_stop
+ ldr x0, [x0] // if ftrace_trace_stop
+ ret // return;
+#endif
+ mcount_enter
+
+ ldr x0, =ftrace_trace_function
+ ldr x2, [x0]
+ adr x0, ftrace_stub
+ cmp x0, x2 // if (ftrace_trace_function
+ b.eq skip_ftrace_call // != ftrace_stub) {
+
+ mcount_get_pc x0 // function's pc
+ mcount_get_lr x1 // function's lr (= parent's pc)
+ blr x2 // (*ftrace_trace_function)(pc, lr);
+
+#ifndef CONFIG_FUNCTION_GRAPH_TRACER
+skip_ftrace_call: // return;
+ mcount_exit // }
+#else
+ mcount_exit // return;
+ // }
+skip_ftrace_call:
+ ldr x1, =ftrace_graph_return
+ ldr x2, [x1] // if ((ftrace_graph_return
+ cmp x0, x2 // != ftrace_stub)
+ b.ne ftrace_graph_caller
+
+ ldr x1, =ftrace_graph_entry // || (ftrace_graph_entry
+ ldr x2, [x1] // != ftrace_graph_entry_stub))
+ ldr x0, =ftrace_graph_entry_stub
+ cmp x0, x2
+ b.ne ftrace_graph_caller // ftrace_graph_caller();
+
+ mcount_exit
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+ENDPROC(_mcount)
+
+#else /* CONFIG_DYNAMIC_FTRACE */
+/*
+ * _mcount() is used to build the kernel with -pg option, but all the branch
+ * instructions to _mcount() are replaced to NOP initially at kernel start up,
+ * and later on, NOP to branch to ftrace_caller() when enabled or branch to
+ * NOP when disabled per-function base.
+ */
+ENTRY(_mcount)
+ ret
+ENDPROC(_mcount)
+
+/*
+ * void ftrace_caller(unsigned long return_address)
+ * @return_address: return address to instrumented function
+ *
+ * This function is a counterpart of _mcount() in 'static' ftrace, and
+ * makes calls to:
+ * - tracer function to probe instrumented function's entry,
+ * - ftrace_graph_caller to set up an exit hook
+ */
+ENTRY(ftrace_caller)
+ mcount_enter
+
+ mcount_get_pc0 x0 // function's pc
+ mcount_get_lr x1 // function's lr
+
+ .global ftrace_call
+ftrace_call: // tracer(pc, lr);
+ nop // This will be replaced with "bl xxx"
+ // where xxx can be any kind of tracer.
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ .global ftrace_graph_call
+ftrace_graph_call: // ftrace_graph_caller();
+ nop // If enabled, this will be replaced
+ // "b ftrace_graph_caller"
+#endif
+
+ mcount_exit
+ENDPROC(ftrace_caller)
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+ENTRY(ftrace_stub)
+ ret
+ENDPROC(ftrace_stub)
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+/*
+ * void ftrace_graph_caller(void)
+ *
+ * Called from _mcount() or ftrace_caller() when function_graph tracer is
+ * selected.
+ * This function w/ prepare_ftrace_return() fakes link register's value on
+ * the call stack in order to intercept instrumented function's return path
+ * and run return_to_handler() later on its exit.
+ */
+ENTRY(ftrace_graph_caller)
+ mcount_get_lr_addr x0 // pointer to function's saved lr
+ mcount_get_pc x1 // function's pc
+ mcount_get_parent_fp x2 // parent's fp
+ bl prepare_ftrace_return // prepare_ftrace_return(&lr, pc, fp)
+
+ mcount_exit
+ENDPROC(ftrace_graph_caller)
+
+/*
+ * void return_to_handler(void)
+ *
+ * Run ftrace_return_to_handler() before going back to parent.
+ * @fp is checked against the value passed by ftrace_graph_caller()
+ * only when CONFIG_FUNCTION_GRAPH_FP_TEST is enabled.
+ */
+ENTRY(return_to_handler)
+ str x0, [sp, #-16]!
+ mov x0, x29 // parent's fp
+ bl ftrace_return_to_handler// addr = ftrace_return_to_hander(fp);
+ mov x30, x0 // restore the original return address
+ ldr x0, [sp], #16
+ ret
+END(return_to_handler)
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index a8e4bdbbb4b8..a010e3af4597 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -30,6 +30,32 @@
#include <asm/unistd32.h>
/*
+ * Context tracking subsystem. Used to instrument transitions
+ * between user and kernel mode.
+ */
+ .macro ct_user_exit, syscall = 0
+#ifdef CONFIG_CONTEXT_TRACKING
+ bl context_tracking_user_exit
+ .if \syscall == 1
+ /*
+ * Save/restore needed during syscalls. Restore syscall arguments from
+ * the values already saved on stack during kernel_entry.
+ */
+ ldp x0, x1, [sp]
+ ldp x2, x3, [sp, #S_X2]
+ ldp x4, x5, [sp, #S_X4]
+ ldp x6, x7, [sp, #S_X6]
+ .endif
+#endif
+ .endm
+
+ .macro ct_user_enter
+#ifdef CONFIG_CONTEXT_TRACKING
+ bl context_tracking_user_enter
+#endif
+ .endm
+
+/*
* Bad Abort numbers
*-----------------
*/
@@ -88,6 +114,7 @@
.macro kernel_exit, el, ret = 0
ldp x21, x22, [sp, #S_PC] // load ELR, SPSR
.if \el == 0
+ ct_user_enter
ldr x23, [sp, #S_SP] // load return stack pointer
.endif
.if \ret
@@ -348,7 +375,6 @@ el0_sync:
lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class
cmp x24, #ESR_EL1_EC_SVC64 // SVC in 64-bit state
b.eq el0_svc
- adr lr, ret_from_exception
cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0
b.eq el0_da
cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in EL0
@@ -377,7 +403,6 @@ el0_sync_compat:
lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class
cmp x24, #ESR_EL1_EC_SVC32 // SVC in 32-bit state
b.eq el0_svc_compat
- adr lr, ret_from_exception
cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0
b.eq el0_da
cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in EL0
@@ -420,78 +445,94 @@ el0_da:
/*
* Data abort handling
*/
- mrs x0, far_el1
- bic x0, x0, #(0xff << 56)
+ mrs x26, far_el1
disable_step x1
isb
enable_dbg
// enable interrupts before calling the main handler
enable_irq
+ ct_user_exit
+ bic x0, x26, #(0xff << 56)
mov x1, x25
mov x2, sp
+ adr lr, ret_from_exception
b do_mem_abort
el0_ia:
/*
* Instruction abort handling
*/
- mrs x0, far_el1
+ mrs x26, far_el1
disable_step x1
isb
enable_dbg
// enable interrupts before calling the main handler
enable_irq
+ ct_user_exit
+ mov x0, x26
orr x1, x25, #1 << 24 // use reserved ISS bit for instruction aborts
mov x2, sp
+ adr lr, ret_from_exception
b do_mem_abort
el0_fpsimd_acc:
/*
* Floating Point or Advanced SIMD access
*/
+ ct_user_exit
mov x0, x25
mov x1, sp
+ adr lr, ret_from_exception
b do_fpsimd_acc
el0_fpsimd_exc:
/*
* Floating Point or Advanced SIMD exception
*/
+ ct_user_exit
mov x0, x25
mov x1, sp
+ adr lr, ret_from_exception
b do_fpsimd_exc
el0_sp_pc:
/*
* Stack or PC alignment exception handling
*/
- mrs x0, far_el1
+ mrs x26, far_el1
disable_step x1
isb
enable_dbg
// enable interrupts before calling the main handler
enable_irq
+ mov x0, x26
mov x1, x25
mov x2, sp
+ adr lr, ret_from_exception
b do_sp_pc_abort
el0_undef:
/*
* Undefined instruction
*/
- mov x0, sp
// enable interrupts before calling the main handler
+ ct_user_exit
enable_irq
+ mov x0, sp
+ adr lr, ret_from_exception
b do_undefinstr
el0_dbg:
/*
* Debug exception handling
*/
tbnz x24, #0, el0_inv // EL0 only
+ ct_user_exit
mrs x0, far_el1
disable_step x1
mov x1, x25
mov x2, sp
b do_debug_exception
el0_inv:
+ ct_user_exit
mov x0, sp
mov x1, #BAD_SYNC
mrs x2, esr_el1
+ adr lr, ret_from_exception
b bad_mode
ENDPROC(el0_sync)
@@ -506,6 +547,7 @@ el0_irq_naked:
bl trace_hardirqs_off
#endif
+ ct_user_exit
irq_handler
get_thread_info tsk
@@ -575,7 +617,7 @@ fast_work_pending:
str x0, [sp, #S_X0] // returned x0
work_pending:
tbnz x1, #TIF_NEED_RESCHED, work_resched
- /* TIF_SIGPENDING or TIF_NOTIFY_RESUME case */
+ /* TIF_SIGPENDING, TIF_NOTIFY_RESUME or TIF_FOREIGN_FPSTATE case */
ldr x2, [sp, #S_PSTATE]
mov x0, sp // 'regs'
tst x2, #PSR_MODE_MASK // user mode regs?
@@ -628,10 +670,12 @@ el0_svc_naked: // compat entry point
isb
enable_dbg
enable_irq
+ ct_user_exit 1
get_thread_info tsk
- ldr x16, [tsk, #TI_FLAGS] // check for syscall tracing
- tbnz x16, #TIF_SYSCALL_TRACE, __sys_trace // are we tracing syscalls?
+ ldr x16, [tsk, #TI_FLAGS] // check for syscall hooks
+ tst x16, #_TIF_SYSCALL_WORK
+ b.ne __sys_trace
adr lr, ret_fast_syscall // return address
cmp scno, sc_nr // check upper syscall limit
b.hs ni_sys
@@ -647,9 +691,8 @@ ENDPROC(el0_svc)
* switches, and waiting for our parent to respond.
*/
__sys_trace:
- mov x1, sp
- mov w0, #0 // trace entry
- bl syscall_trace
+ mov x0, sp
+ bl syscall_trace_enter
adr lr, __sys_trace_return // return address
uxtw scno, w0 // syscall number (possibly new)
mov x1, sp // pointer to regs
@@ -664,9 +707,8 @@ __sys_trace:
__sys_trace_return:
str x0, [sp] // save returned x0
- mov x1, sp
- mov w0, #1 // trace exit
- bl syscall_trace
+ mov x0, sp
+ bl syscall_trace_exit
b ret_to_user
/*
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 4aef42a04bdc..ad8aebb1cdef 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -35,6 +35,60 @@
#define FPEXC_IDF (1 << 7)
/*
+ * In order to reduce the number of times the FPSIMD state is needlessly saved
+ * and restored, we need to keep track of two things:
+ * (a) for each task, we need to remember which CPU was the last one to have
+ * the task's FPSIMD state loaded into its FPSIMD registers;
+ * (b) for each CPU, we need to remember which task's userland FPSIMD state has
+ * been loaded into its FPSIMD registers most recently, or whether it has
+ * been used to perform kernel mode NEON in the meantime.
+ *
+ * For (a), we add a 'cpu' field to struct fpsimd_state, which gets updated to
+ * the id of the current CPU everytime the state is loaded onto a CPU. For (b),
+ * we add the per-cpu variable 'fpsimd_last_state' (below), which contains the
+ * address of the userland FPSIMD state of the task that was loaded onto the CPU
+ * the most recently, or NULL if kernel mode NEON has been performed after that.
+ *
+ * With this in place, we no longer have to restore the next FPSIMD state right
+ * when switching between tasks. Instead, we can defer this check to userland
+ * resume, at which time we verify whether the CPU's fpsimd_last_state and the
+ * task's fpsimd_state.cpu are still mutually in sync. If this is the case, we
+ * can omit the FPSIMD restore.
+ *
+ * As an optimization, we use the thread_info flag TIF_FOREIGN_FPSTATE to
+ * indicate whether or not the userland FPSIMD state of the current task is
+ * present in the registers. The flag is set unless the FPSIMD registers of this
+ * CPU currently contain the most recent userland FPSIMD state of the current
+ * task.
+ *
+ * For a certain task, the sequence may look something like this:
+ * - the task gets scheduled in; if both the task's fpsimd_state.cpu field
+ * contains the id of the current CPU, and the CPU's fpsimd_last_state per-cpu
+ * variable points to the task's fpsimd_state, the TIF_FOREIGN_FPSTATE flag is
+ * cleared, otherwise it is set;
+ *
+ * - the task returns to userland; if TIF_FOREIGN_FPSTATE is set, the task's
+ * userland FPSIMD state is copied from memory to the registers, the task's
+ * fpsimd_state.cpu field is set to the id of the current CPU, the current
+ * CPU's fpsimd_last_state pointer is set to this task's fpsimd_state and the
+ * TIF_FOREIGN_FPSTATE flag is cleared;
+ *
+ * - the task executes an ordinary syscall; upon return to userland, the
+ * TIF_FOREIGN_FPSTATE flag will still be cleared, so no FPSIMD state is
+ * restored;
+ *
+ * - the task executes a syscall which executes some NEON instructions; this is
+ * preceded by a call to kernel_neon_begin(), which copies the task's FPSIMD
+ * register contents to memory, clears the fpsimd_last_state per-cpu variable
+ * and sets the TIF_FOREIGN_FPSTATE flag;
+ *
+ * - the task gets preempted after kernel_neon_end() is called; as we have not
+ * returned from the 2nd syscall yet, TIF_FOREIGN_FPSTATE is still set so
+ * whatever is in the FPSIMD registers is not saved to memory, but discarded.
+ */
+static DEFINE_PER_CPU(struct fpsimd_state *, fpsimd_last_state);
+
+/*
* Trapped FP/ASIMD access.
*/
void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs)
@@ -72,43 +126,137 @@ void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
void fpsimd_thread_switch(struct task_struct *next)
{
- /* check if not kernel threads */
- if (current->mm)
+ /*
+ * Save the current FPSIMD state to memory, but only if whatever is in
+ * the registers is in fact the most recent userland FPSIMD state of
+ * 'current'.
+ */
+ if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE))
fpsimd_save_state(&current->thread.fpsimd_state);
- if (next->mm)
- fpsimd_load_state(&next->thread.fpsimd_state);
+
+ if (next->mm) {
+ /*
+ * If we are switching to a task whose most recent userland
+ * FPSIMD state is already in the registers of *this* cpu,
+ * we can skip loading the state from memory. Otherwise, set
+ * the TIF_FOREIGN_FPSTATE flag so the state will be loaded
+ * upon the next return to userland.
+ */
+ struct fpsimd_state *st = &next->thread.fpsimd_state;
+
+ if (__this_cpu_read(fpsimd_last_state) == st
+ && st->cpu == smp_processor_id())
+ clear_ti_thread_flag(task_thread_info(next),
+ TIF_FOREIGN_FPSTATE);
+ else
+ set_ti_thread_flag(task_thread_info(next),
+ TIF_FOREIGN_FPSTATE);
+ }
}
void fpsimd_flush_thread(void)
{
- preempt_disable();
memset(&current->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
- fpsimd_load_state(&current->thread.fpsimd_state);
+ set_thread_flag(TIF_FOREIGN_FPSTATE);
+}
+
+/*
+ * Save the userland FPSIMD state of 'current' to memory, but only if the state
+ * currently held in the registers does in fact belong to 'current'
+ */
+void fpsimd_preserve_current_state(void)
+{
+ preempt_disable();
+ if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
+ fpsimd_save_state(&current->thread.fpsimd_state);
+ preempt_enable();
+}
+
+/*
+ * Load the userland FPSIMD state of 'current' from memory, but only if the
+ * FPSIMD state already held in the registers is /not/ the most recent FPSIMD
+ * state of 'current'
+ */
+void fpsimd_restore_current_state(void)
+{
+ preempt_disable();
+ if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
+ struct fpsimd_state *st = &current->thread.fpsimd_state;
+
+ fpsimd_load_state(st);
+ this_cpu_write(fpsimd_last_state, st);
+ st->cpu = smp_processor_id();
+ }
+ preempt_enable();
+}
+
+/*
+ * Load an updated userland FPSIMD state for 'current' from memory and set the
+ * flag that indicates that the FPSIMD register contents are the most recent
+ * FPSIMD state of 'current'
+ */
+void fpsimd_update_current_state(struct fpsimd_state *state)
+{
+ preempt_disable();
+ fpsimd_load_state(state);
+ if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
+ struct fpsimd_state *st = &current->thread.fpsimd_state;
+
+ this_cpu_write(fpsimd_last_state, st);
+ st->cpu = smp_processor_id();
+ }
preempt_enable();
}
+/*
+ * Invalidate live CPU copies of task t's FPSIMD state
+ */
+void fpsimd_flush_task_state(struct task_struct *t)
+{
+ t->thread.fpsimd_state.cpu = NR_CPUS;
+}
+
#ifdef CONFIG_KERNEL_MODE_NEON
+static DEFINE_PER_CPU(struct fpsimd_partial_state, hardirq_fpsimdstate);
+static DEFINE_PER_CPU(struct fpsimd_partial_state, softirq_fpsimdstate);
+
/*
* Kernel-side NEON support functions
*/
-void kernel_neon_begin(void)
+void kernel_neon_begin_partial(u32 num_regs)
{
- /* Avoid using the NEON in interrupt context */
- BUG_ON(in_interrupt());
- preempt_disable();
+ if (in_interrupt()) {
+ struct fpsimd_partial_state *s = this_cpu_ptr(
+ in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
- if (current->mm)
- fpsimd_save_state(&current->thread.fpsimd_state);
+ BUG_ON(num_regs > 32);
+ fpsimd_save_partial_state(s, roundup(num_regs, 2));
+ } else {
+ /*
+ * Save the userland FPSIMD state if we have one and if we
+ * haven't done so already. Clear fpsimd_last_state to indicate
+ * that there is no longer userland FPSIMD state in the
+ * registers.
+ */
+ preempt_disable();
+ if (current->mm &&
+ !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
+ fpsimd_save_state(&current->thread.fpsimd_state);
+ this_cpu_write(fpsimd_last_state, NULL);
+ }
}
-EXPORT_SYMBOL(kernel_neon_begin);
+EXPORT_SYMBOL(kernel_neon_begin_partial);
void kernel_neon_end(void)
{
- if (current->mm)
- fpsimd_load_state(&current->thread.fpsimd_state);
-
- preempt_enable();
+ if (in_interrupt()) {
+ struct fpsimd_partial_state *s = this_cpu_ptr(
+ in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
+ fpsimd_load_partial_state(s);
+ } else {
+ preempt_enable();
+ }
}
EXPORT_SYMBOL(kernel_neon_end);
@@ -120,12 +268,12 @@ static int fpsimd_cpu_pm_notifier(struct notifier_block *self,
{
switch (cmd) {
case CPU_PM_ENTER:
- if (current->mm)
+ if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE))
fpsimd_save_state(&current->thread.fpsimd_state);
break;
case CPU_PM_EXIT:
if (current->mm)
- fpsimd_load_state(&current->thread.fpsimd_state);
+ set_thread_flag(TIF_FOREIGN_FPSTATE);
break;
case CPU_PM_ENTER_FAILED:
default:
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
new file mode 100644
index 000000000000..649890a3ac4e
--- /dev/null
+++ b/arch/arm64/kernel/ftrace.c
@@ -0,0 +1,177 @@
+/*
+ * arch/arm64/kernel/ftrace.c
+ *
+ * Copyright (C) 2013 Linaro Limited
+ * Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/ftrace.h>
+#include <linux/swab.h>
+#include <linux/uaccess.h>
+
+#include <asm/cacheflush.h>
+#include <asm/ftrace.h>
+#include <asm/insn.h>
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+/*
+ * Replace a single instruction, which may be a branch or NOP.
+ * If @validate == true, a replaced instruction is checked against 'old'.
+ */
+static int ftrace_modify_code(unsigned long pc, u32 old, u32 new,
+ bool validate)
+{
+ u32 replaced;
+
+ /*
+ * Note:
+ * Due to modules and __init, code can disappear and change,
+ * we need to protect against faulting as well as code changing.
+ * We do this by aarch64_insn_*() which use the probe_kernel_*().
+ *
+ * No lock is held here because all the modifications are run
+ * through stop_machine().
+ */
+ if (validate) {
+ if (aarch64_insn_read((void *)pc, &replaced))
+ return -EFAULT;
+
+ if (replaced != old)
+ return -EINVAL;
+ }
+ if (aarch64_insn_patch_text_nosync((void *)pc, new))
+ return -EPERM;
+
+ return 0;
+}
+
+/*
+ * Replace tracer function in ftrace_caller()
+ */
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+ unsigned long pc;
+ u32 new;
+
+ pc = (unsigned long)&ftrace_call;
+ new = aarch64_insn_gen_branch_imm(pc, (unsigned long)func, true);
+
+ return ftrace_modify_code(pc, 0, new, false);
+}
+
+/*
+ * Turn on the call to ftrace_caller() in instrumented function
+ */
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned long pc = rec->ip;
+ u32 old, new;
+
+ old = aarch64_insn_gen_nop();
+ new = aarch64_insn_gen_branch_imm(pc, addr, true);
+
+ return ftrace_modify_code(pc, old, new, true);
+}
+
+/*
+ * Turn off the call to ftrace_caller() in instrumented function
+ */
+int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
+ unsigned long addr)
+{
+ unsigned long pc = rec->ip;
+ u32 old, new;
+
+ old = aarch64_insn_gen_branch_imm(pc, addr, true);
+ new = aarch64_insn_gen_nop();
+
+ return ftrace_modify_code(pc, old, new, true);
+}
+
+int __init ftrace_dyn_arch_init(void *data)
+{
+ *(unsigned long *)data = 0;
+ return 0;
+}
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+/*
+ * function_graph tracer expects ftrace_return_to_handler() to be called
+ * on the way back to parent. For this purpose, this function is called
+ * in _mcount() or ftrace_caller() to replace return address (*parent) on
+ * the call stack to return_to_handler.
+ *
+ * Note that @frame_pointer is used only for sanity check later.
+ */
+void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
+ unsigned long frame_pointer)
+{
+ unsigned long return_hooker = (unsigned long)&return_to_handler;
+ unsigned long old;
+ struct ftrace_graph_ent trace;
+ int err;
+
+ if (unlikely(atomic_read(&current->tracing_graph_pause)))
+ return;
+
+ /*
+ * Note:
+ * No protection against faulting at *parent, which may be seen
+ * on other archs. It's unlikely on AArch64.
+ */
+ old = *parent;
+ *parent = return_hooker;
+
+ trace.func = self_addr;
+ trace.depth = current->curr_ret_stack + 1;
+
+ /* Only trace if the calling function expects to */
+ if (!ftrace_graph_entry(&trace)) {
+ *parent = old;
+ return;
+ }
+
+ err = ftrace_push_return_trace(old, self_addr, &trace.depth,
+ frame_pointer);
+ if (err == -EBUSY) {
+ *parent = old;
+ return;
+ }
+}
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+/*
+ * Turn on/off the call to ftrace_graph_caller() in ftrace_caller()
+ * depending on @enable.
+ */
+static int ftrace_modify_graph_caller(bool enable)
+{
+ unsigned long pc = (unsigned long)&ftrace_graph_call;
+ u32 branch, nop;
+
+ branch = aarch64_insn_gen_branch_imm(pc,
+ (unsigned long)ftrace_graph_caller, false);
+ nop = aarch64_insn_gen_nop();
+
+ if (enable)
+ return ftrace_modify_code(pc, nop, branch, true);
+ else
+ return ftrace_modify_code(pc, branch, nop, true);
+}
+
+int ftrace_enable_ftrace_graph_caller(void)
+{
+ return ftrace_modify_graph_caller(true);
+}
+
+int ftrace_disable_ftrace_graph_caller(void)
+{
+ return ftrace_modify_graph_caller(false);
+}
+#endif /* CONFIG_DYNAMIC_FTRACE */
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 0b281fffda51..650ec9ea42fd 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -22,10 +22,12 @@
#include <linux/linkage.h>
#include <linux/init.h>
+#include <linux/irqchip/arm-gic-v3.h>
#include <asm/assembler.h>
#include <asm/ptrace.h>
#include <asm/asm-offsets.h>
+#include <asm/cache.h>
#include <asm/cputype.h>
#include <asm/memory.h>
#include <asm/thread_info.h>
@@ -34,29 +36,17 @@
#include <asm/page.h>
#include <asm/virt.h>
-/*
- * swapper_pg_dir is the virtual address of the initial page table. We place
- * the page tables 3 * PAGE_SIZE below KERNEL_RAM_VADDR. The idmap_pg_dir has
- * 2 pages and is placed below swapper_pg_dir.
- */
#define KERNEL_RAM_VADDR (PAGE_OFFSET + TEXT_OFFSET)
#if (KERNEL_RAM_VADDR & 0xfffff) != 0x80000
#error KERNEL_RAM_VADDR must start at 0xXXX80000
#endif
-#define SWAPPER_DIR_SIZE (3 * PAGE_SIZE)
-#define IDMAP_DIR_SIZE (2 * PAGE_SIZE)
-
- .globl swapper_pg_dir
- .equ swapper_pg_dir, KERNEL_RAM_VADDR - SWAPPER_DIR_SIZE
-
- .globl idmap_pg_dir
- .equ idmap_pg_dir, swapper_pg_dir - IDMAP_DIR_SIZE
-
- .macro pgtbl, ttb0, ttb1, phys
- add \ttb1, \phys, #TEXT_OFFSET - SWAPPER_DIR_SIZE
- sub \ttb0, \ttb1, #IDMAP_DIR_SIZE
+ .macro pgtbl, ttb0, ttb1, virt_to_phys
+ ldr \ttb1, =swapper_pg_dir
+ ldr \ttb0, =idmap_pg_dir
+ add \ttb1, \ttb1, \virt_to_phys
+ add \ttb0, \ttb0, \virt_to_phys
.endm
#ifdef CONFIG_ARM64_64K_PAGES
@@ -183,6 +173,23 @@ CPU_LE( bic x0, x0, #(3 << 24) ) // Clear the EE and E0E bits for EL1
msr cnthctl_el2, x0
msr cntvoff_el2, xzr // Clear virtual offset
+#ifdef CONFIG_ARM_GIC_V3
+ /* GICv3 system register access */
+ mrs x0, id_aa64pfr0_el1
+ ubfx x0, x0, #24, #4
+ cmp x0, #1
+ b.ne 3f
+
+ mrs_s x0, ICC_SRE_EL2
+ orr x0, x0, #ICC_SRE_EL2_SRE // Set ICC_SRE_EL2.SRE==1
+ orr x0, x0, #ICC_SRE_EL2_ENABLE // Set ICC_SRE_EL2.Enable==1
+ msr_s ICC_SRE_EL2, x0
+ isb // Make sure SRE is now set
+ msr_s ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults
+
+3:
+#endif
+
/* Populate ID registers. */
mrs x0, midr_el1
mrs x1, mpidr_el1
@@ -240,8 +247,9 @@ ENDPROC(set_cpu_boot_mode_flag)
* This is not in .bss, because we set it sufficiently early that the boot-time
* zeroing of .bss would clobber it.
*/
- .pushsection .data
+ .pushsection .data..cacheline_aligned
ENTRY(__boot_cpu_mode)
+ .align L1_CACHE_SHIFT
.long BOOT_CPU_MODE_EL2
.long 0
.popsection
@@ -298,7 +306,7 @@ ENTRY(secondary_startup)
mov x23, x0 // x23=current cpu_table
cbz x23, __error_p // invalid processor (x23=0)?
- pgtbl x25, x26, x24 // x25=TTBR0, x26=TTBR1
+ pgtbl x25, x26, x28 // x25=TTBR0, x26=TTBR1
ldr x12, [x23, #CPU_INFO_SETUP]
add x12, x12, x28 // __virt_to_phys
blr x12 // initialise processor
@@ -340,8 +348,13 @@ ENDPROC(__enable_mmu)
* x27 = *virtual* address to jump to upon completion
*
* other registers depend on the function called upon completion
+ *
+ * We align the entire function to the smallest power of two larger than it to
+ * ensure it fits within a single block map entry. Otherwise were PHYS_OFFSET
+ * close to the end of a 512MB or 1GB block we might require an additional
+ * table to map the entire function.
*/
- .align 6
+ .align 4
__turn_mmu_on:
msr sctlr_el1, x0
isb
@@ -384,26 +397,18 @@ ENDPROC(__calc_phys_offset)
* Preserves: tbl, flags
* Corrupts: phys, start, end, pstate
*/
- .macro create_block_map, tbl, flags, phys, start, end, idmap=0
+ .macro create_block_map, tbl, flags, phys, start, end
lsr \phys, \phys, #BLOCK_SHIFT
- .if \idmap
- and \start, \phys, #PTRS_PER_PTE - 1 // table index
- .else
lsr \start, \start, #BLOCK_SHIFT
and \start, \start, #PTRS_PER_PTE - 1 // table index
- .endif
orr \phys, \flags, \phys, lsl #BLOCK_SHIFT // table entry
- .ifnc \start,\end
lsr \end, \end, #BLOCK_SHIFT
and \end, \end, #PTRS_PER_PTE - 1 // table end index
- .endif
9999: str \phys, [\tbl, \start, lsl #3] // store the entry
- .ifnc \start,\end
add \start, \start, #1 // next entry
add \phys, \phys, #BLOCK_SIZE // next block
cmp \start, \end
b.ls 9999b
- .endif
.endm
/*
@@ -412,10 +417,19 @@ ENDPROC(__calc_phys_offset)
* - identity mapping to enable the MMU (low address, TTBR0)
* - first few MB of the kernel linear mapping to jump to once the MMU has
* been enabled, including the FDT blob (TTBR1)
- * - UART mapping if CONFIG_EARLY_PRINTK is enabled (TTBR1)
+ * - pgd entry for fixed mappings (TTBR1)
*/
__create_page_tables:
- pgtbl x25, x26, x24 // idmap_pg_dir and swapper_pg_dir addresses
+ pgtbl x25, x26, x28 // idmap_pg_dir and swapper_pg_dir addresses
+ mov x27, lr
+
+ /*
+ * Invalidate the idmap and swapper page tables to avoid potential
+ * dirty cache lines being evicted.
+ */
+ mov x0, x25
+ add x1, x26, #SWAPPER_DIR_SIZE
+ bl __inval_cache_range
/*
* Clear the idmap and swapper page tables.
@@ -435,9 +449,13 @@ __create_page_tables:
* Create the identity mapping.
*/
add x0, x25, #PAGE_SIZE // section table address
- adr x3, __turn_mmu_on // virtual/physical address
+ ldr x3, =KERNEL_START
+ add x3, x3, x28 // __pa(KERNEL_START)
create_pgd_entry x25, x0, x3, x5, x6
- create_block_map x0, x7, x3, x5, x5, idmap=1
+ ldr x6, =KERNEL_END
+ mov x5, x3 // __pa(KERNEL_START)
+ add x6, x6, x28 // __pa(KERNEL_END)
+ create_block_map x0, x7, x3, x5, x6
/*
* Map the kernel image (starting with PHYS_OFFSET).
@@ -445,7 +463,7 @@ __create_page_tables:
add x0, x26, #PAGE_SIZE // section table address
mov x5, #PAGE_OFFSET
create_pgd_entry x26, x0, x5, x3, x6
- ldr x6, =KERNEL_END - 1
+ ldr x6, =KERNEL_END
mov x3, x24 // phys offset
create_block_map x0, x7, x3, x5, x6
@@ -465,15 +483,23 @@ __create_page_tables:
sub x6, x6, #1 // inclusive range
create_block_map x0, x7, x3, x5, x6
1:
-#ifdef CONFIG_EARLY_PRINTK
/*
- * Create the pgd entry for the UART mapping. The full mapping is done
- * later based earlyprintk kernel parameter.
+ * Create the pgd entry for the fixed mappings.
*/
- ldr x5, =EARLYCON_IOBASE // UART virtual address
+ ldr x5, =FIXADDR_TOP // Fixed mapping virtual address
add x0, x26, #2 * PAGE_SIZE // section table address
create_pgd_entry x26, x0, x5, x6, x7
-#endif
+
+ /*
+ * Since the page tables have been populated with non-cacheable
+ * accesses (MMU disabled), invalidate the idmap and swapper page
+ * tables again to remove any speculatively loaded cache lines.
+ */
+ mov x0, x25
+ add x1, x26, #SWAPPER_DIR_SIZE
+ bl __inval_cache_range
+
+ mov lr, x27
ret
ENDPROC(__create_page_tables)
.ltorg
@@ -483,7 +509,7 @@ ENDPROC(__create_page_tables)
__switch_data:
.quad __mmap_switched
.quad __bss_start // x6
- .quad _end // x7
+ .quad __bss_stop // x7
.quad processor_id // x4
.quad __fdt_pointer // x5
.quad memstart_addr // x6
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index f17f581116fc..a45e2db6e502 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -20,6 +20,7 @@
#define pr_fmt(fmt) "hw-breakpoint: " fmt
+#include <linux/compat.h>
#include <linux/cpu_pm.h>
#include <linux/errno.h>
#include <linux/hw_breakpoint.h>
@@ -27,7 +28,6 @@
#include <linux/ptrace.h>
#include <linux/smp.h>
-#include <asm/compat.h>
#include <asm/current.h>
#include <asm/debug-monitors.h>
#include <asm/hw_breakpoint.h>
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index 0959611d9ff1..a272f335c289 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -19,6 +19,7 @@
#include <linux/init.h>
#include <linux/linkage.h>
+#include <linux/irqchip/arm-gic-v3.h>
#include <asm/assembler.h>
#include <asm/ptrace.h>
diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
new file mode 100644
index 000000000000..75c9cf1aafee
--- /dev/null
+++ b/arch/arm64/kernel/kgdb.c
@@ -0,0 +1,336 @@
+/*
+ * AArch64 KGDB support
+ *
+ * Based on arch/arm/kernel/kgdb.c
+ *
+ * Copyright (C) 2013 Cavium Inc.
+ * Author: Vijaya Kumar K <vijaya.kumar@caviumnetworks.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/irq.h>
+#include <linux/kdebug.h>
+#include <linux/kgdb.h>
+#include <asm/traps.h>
+
+struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
+ { "x0", 8, offsetof(struct pt_regs, regs[0])},
+ { "x1", 8, offsetof(struct pt_regs, regs[1])},
+ { "x2", 8, offsetof(struct pt_regs, regs[2])},
+ { "x3", 8, offsetof(struct pt_regs, regs[3])},
+ { "x4", 8, offsetof(struct pt_regs, regs[4])},
+ { "x5", 8, offsetof(struct pt_regs, regs[5])},
+ { "x6", 8, offsetof(struct pt_regs, regs[6])},
+ { "x7", 8, offsetof(struct pt_regs, regs[7])},
+ { "x8", 8, offsetof(struct pt_regs, regs[8])},
+ { "x9", 8, offsetof(struct pt_regs, regs[9])},
+ { "x10", 8, offsetof(struct pt_regs, regs[10])},
+ { "x11", 8, offsetof(struct pt_regs, regs[11])},
+ { "x12", 8, offsetof(struct pt_regs, regs[12])},
+ { "x13", 8, offsetof(struct pt_regs, regs[13])},
+ { "x14", 8, offsetof(struct pt_regs, regs[14])},
+ { "x15", 8, offsetof(struct pt_regs, regs[15])},
+ { "x16", 8, offsetof(struct pt_regs, regs[16])},
+ { "x17", 8, offsetof(struct pt_regs, regs[17])},
+ { "x18", 8, offsetof(struct pt_regs, regs[18])},
+ { "x19", 8, offsetof(struct pt_regs, regs[19])},
+ { "x20", 8, offsetof(struct pt_regs, regs[20])},
+ { "x21", 8, offsetof(struct pt_regs, regs[21])},
+ { "x22", 8, offsetof(struct pt_regs, regs[22])},
+ { "x23", 8, offsetof(struct pt_regs, regs[23])},
+ { "x24", 8, offsetof(struct pt_regs, regs[24])},
+ { "x25", 8, offsetof(struct pt_regs, regs[25])},
+ { "x26", 8, offsetof(struct pt_regs, regs[26])},
+ { "x27", 8, offsetof(struct pt_regs, regs[27])},
+ { "x28", 8, offsetof(struct pt_regs, regs[28])},
+ { "x29", 8, offsetof(struct pt_regs, regs[29])},
+ { "x30", 8, offsetof(struct pt_regs, regs[30])},
+ { "sp", 8, offsetof(struct pt_regs, sp)},
+ { "pc", 8, offsetof(struct pt_regs, pc)},
+ { "pstate", 8, offsetof(struct pt_regs, pstate)},
+ { "v0", 16, -1 },
+ { "v1", 16, -1 },
+ { "v2", 16, -1 },
+ { "v3", 16, -1 },
+ { "v4", 16, -1 },
+ { "v5", 16, -1 },
+ { "v6", 16, -1 },
+ { "v7", 16, -1 },
+ { "v8", 16, -1 },
+ { "v9", 16, -1 },
+ { "v10", 16, -1 },
+ { "v11", 16, -1 },
+ { "v12", 16, -1 },
+ { "v13", 16, -1 },
+ { "v14", 16, -1 },
+ { "v15", 16, -1 },
+ { "v16", 16, -1 },
+ { "v17", 16, -1 },
+ { "v18", 16, -1 },
+ { "v19", 16, -1 },
+ { "v20", 16, -1 },
+ { "v21", 16, -1 },
+ { "v22", 16, -1 },
+ { "v23", 16, -1 },
+ { "v24", 16, -1 },
+ { "v25", 16, -1 },
+ { "v26", 16, -1 },
+ { "v27", 16, -1 },
+ { "v28", 16, -1 },
+ { "v29", 16, -1 },
+ { "v30", 16, -1 },
+ { "v31", 16, -1 },
+ { "fpsr", 4, -1 },
+ { "fpcr", 4, -1 },
+};
+
+char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
+{
+ if (regno >= DBG_MAX_REG_NUM || regno < 0)
+ return NULL;
+
+ if (dbg_reg_def[regno].offset != -1)
+ memcpy(mem, (void *)regs + dbg_reg_def[regno].offset,
+ dbg_reg_def[regno].size);
+ else
+ memset(mem, 0, dbg_reg_def[regno].size);
+ return dbg_reg_def[regno].name;
+}
+
+int dbg_set_reg(int regno, void *mem, struct pt_regs *regs)
+{
+ if (regno >= DBG_MAX_REG_NUM || regno < 0)
+ return -EINVAL;
+
+ if (dbg_reg_def[regno].offset != -1)
+ memcpy((void *)regs + dbg_reg_def[regno].offset, mem,
+ dbg_reg_def[regno].size);
+ return 0;
+}
+
+void
+sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task)
+{
+ struct pt_regs *thread_regs;
+
+ /* Initialize to zero */
+ memset((char *)gdb_regs, 0, NUMREGBYTES);
+ thread_regs = task_pt_regs(task);
+ memcpy((void *)gdb_regs, (void *)thread_regs->regs, GP_REG_BYTES);
+}
+
+void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc)
+{
+ regs->pc = pc;
+}
+
+static int compiled_break;
+
+static void kgdb_arch_update_addr(struct pt_regs *regs,
+ char *remcom_in_buffer)
+{
+ unsigned long addr;
+ char *ptr;
+
+ ptr = &remcom_in_buffer[1];
+ if (kgdb_hex2long(&ptr, &addr))
+ kgdb_arch_set_pc(regs, addr);
+ else if (compiled_break == 1)
+ kgdb_arch_set_pc(regs, regs->pc + 4);
+
+ compiled_break = 0;
+}
+
+int kgdb_arch_handle_exception(int exception_vector, int signo,
+ int err_code, char *remcom_in_buffer,
+ char *remcom_out_buffer,
+ struct pt_regs *linux_regs)
+{
+ int err;
+
+ switch (remcom_in_buffer[0]) {
+ case 'D':
+ case 'k':
+ /*
+ * Packet D (Detach), k (kill). No special handling
+ * is required here. Handle same as c packet.
+ */
+ case 'c':
+ /*
+ * Packet c (Continue) to continue executing.
+ * Set pc to required address.
+ * Try to read optional parameter and set pc.
+ * If this was a compiled breakpoint, we need to move
+ * to the next instruction else we will just breakpoint
+ * over and over again.
+ */
+ kgdb_arch_update_addr(linux_regs, remcom_in_buffer);
+ atomic_set(&kgdb_cpu_doing_single_step, -1);
+ kgdb_single_step = 0;
+
+ /*
+ * Received continue command, disable single step
+ */
+ if (kernel_active_single_step())
+ kernel_disable_single_step();
+
+ err = 0;
+ break;
+ case 's':
+ /*
+ * Update step address value with address passed
+ * with step packet.
+ * On debug exception return PC is copied to ELR
+ * So just update PC.
+ * If no step address is passed, resume from the address
+ * pointed by PC. Do not update PC
+ */
+ kgdb_arch_update_addr(linux_regs, remcom_in_buffer);
+ atomic_set(&kgdb_cpu_doing_single_step, raw_smp_processor_id());
+ kgdb_single_step = 1;
+
+ /*
+ * Enable single step handling
+ */
+ if (!kernel_active_single_step())
+ kernel_enable_single_step(linux_regs);
+ err = 0;
+ break;
+ default:
+ err = -1;
+ }
+ return err;
+}
+
+static int kgdb_brk_fn(struct pt_regs *regs, unsigned int esr)
+{
+ kgdb_handle_exception(1, SIGTRAP, 0, regs);
+ return 0;
+}
+
+static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr)
+{
+ compiled_break = 1;
+ kgdb_handle_exception(1, SIGTRAP, 0, regs);
+
+ return 0;
+}
+
+static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr)
+{
+ kgdb_handle_exception(1, SIGTRAP, 0, regs);
+ return 0;
+}
+
+static struct break_hook kgdb_brkpt_hook = {
+ .esr_mask = 0xffffffff,
+ .esr_val = DBG_ESR_VAL_BRK(KGDB_DYN_DGB_BRK_IMM),
+ .fn = kgdb_brk_fn
+};
+
+static struct break_hook kgdb_compiled_brkpt_hook = {
+ .esr_mask = 0xffffffff,
+ .esr_val = DBG_ESR_VAL_BRK(KDBG_COMPILED_DBG_BRK_IMM),
+ .fn = kgdb_compiled_brk_fn
+};
+
+static struct step_hook kgdb_step_hook = {
+ .fn = kgdb_step_brk_fn
+};
+
+static void kgdb_call_nmi_hook(void *ignored)
+{
+ kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs());
+}
+
+void kgdb_roundup_cpus(unsigned long flags)
+{
+ local_irq_enable();
+ smp_call_function(kgdb_call_nmi_hook, NULL, 0);
+ local_irq_disable();
+}
+
+static int __kgdb_notify(struct die_args *args, unsigned long cmd)
+{
+ struct pt_regs *regs = args->regs;
+
+ if (kgdb_handle_exception(1, args->signr, cmd, regs))
+ return NOTIFY_DONE;
+ return NOTIFY_STOP;
+}
+
+static int
+kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr)
+{
+ unsigned long flags;
+ int ret;
+
+ local_irq_save(flags);
+ ret = __kgdb_notify(ptr, cmd);
+ local_irq_restore(flags);
+
+ return ret;
+}
+
+static struct notifier_block kgdb_notifier = {
+ .notifier_call = kgdb_notify,
+ /*
+ * Want to be lowest priority
+ */
+ .priority = -INT_MAX,
+};
+
+/*
+ * kgdb_arch_init - Perform any architecture specific initalization.
+ * This function will handle the initalization of any architecture
+ * specific callbacks.
+ */
+int kgdb_arch_init(void)
+{
+ int ret = register_die_notifier(&kgdb_notifier);
+
+ if (ret != 0)
+ return ret;
+
+ register_break_hook(&kgdb_brkpt_hook);
+ register_break_hook(&kgdb_compiled_brkpt_hook);
+ register_step_hook(&kgdb_step_hook);
+ return 0;
+}
+
+/*
+ * kgdb_arch_exit - Perform any architecture specific uninitalization.
+ * This function will handle the uninitalization of any architecture
+ * specific callbacks, for dynamic registration and unregistration.
+ */
+void kgdb_arch_exit(void)
+{
+ unregister_break_hook(&kgdb_brkpt_hook);
+ unregister_break_hook(&kgdb_compiled_brkpt_hook);
+ unregister_step_hook(&kgdb_step_hook);
+ unregister_die_notifier(&kgdb_notifier);
+}
+
+/*
+ * ARM instructions are always in LE.
+ * Break instruction is encoded in LE format
+ */
+struct kgdb_arch arch_kgdb_ops = {
+ .gdb_bpt_instr = {
+ KGDB_DYN_BRK_INS_BYTE0,
+ KGDB_DYN_BRK_INS_BYTE1,
+ KGDB_DYN_BRK_INS_BYTE2,
+ KGDB_DYN_BRK_INS_BYTE3,
+ }
+};
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 5b1cd792274a..baf5afb7e6a0 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -1348,8 +1348,8 @@ early_initcall(init_hw_perf_events);
* Callchain handling code.
*/
struct frame_tail {
- struct frame_tail __user *fp;
- unsigned long lr;
+ struct frame_tail __user *fp;
+ unsigned long lr;
} __attribute__((packed));
/*
@@ -1386,22 +1386,84 @@ user_backtrace(struct frame_tail __user *tail,
return buftail.fp;
}
+#ifdef CONFIG_COMPAT
+/*
+ * The registers we're interested in are at the end of the variable
+ * length saved register structure. The fp points at the end of this
+ * structure so the address of this struct is:
+ * (struct compat_frame_tail *)(xxx->fp)-1
+ *
+ * This code has been adapted from the ARM OProfile support.
+ */
+struct compat_frame_tail {
+ compat_uptr_t fp; /* a (struct compat_frame_tail *) in compat mode */
+ u32 sp;
+ u32 lr;
+} __attribute__((packed));
+
+static struct compat_frame_tail __user *
+compat_user_backtrace(struct compat_frame_tail __user *tail,
+ struct perf_callchain_entry *entry)
+{
+ struct compat_frame_tail buftail;
+ unsigned long err;
+
+ /* Also check accessibility of one struct frame_tail beyond */
+ if (!access_ok(VERIFY_READ, tail, sizeof(buftail)))
+ return NULL;
+
+ pagefault_disable();
+ err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail));
+ pagefault_enable();
+
+ if (err)
+ return NULL;
+
+ perf_callchain_store(entry, buftail.lr);
+
+ /*
+ * Frame pointers should strictly progress back up the stack
+ * (towards higher addresses).
+ */
+ if (tail + 1 >= (struct compat_frame_tail __user *)
+ compat_ptr(buftail.fp))
+ return NULL;
+
+ return (struct compat_frame_tail __user *)compat_ptr(buftail.fp) - 1;
+}
+#endif /* CONFIG_COMPAT */
+
void perf_callchain_user(struct perf_callchain_entry *entry,
struct pt_regs *regs)
{
- struct frame_tail __user *tail;
-
if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
/* We don't support guest os callchain now */
return;
}
perf_callchain_store(entry, regs->pc);
- tail = (struct frame_tail __user *)regs->regs[29];
- while (entry->nr < PERF_MAX_STACK_DEPTH &&
- tail && !((unsigned long)tail & 0xf))
- tail = user_backtrace(tail, entry);
+ if (!compat_user_mode(regs)) {
+ /* AARCH64 mode */
+ struct frame_tail __user *tail;
+
+ tail = (struct frame_tail __user *)regs->regs[29];
+
+ while (entry->nr < PERF_MAX_STACK_DEPTH &&
+ tail && !((unsigned long)tail & 0xf))
+ tail = user_backtrace(tail, entry);
+ } else {
+#ifdef CONFIG_COMPAT
+ /* AARCH32 compat mode */
+ struct compat_frame_tail __user *tail;
+
+ tail = (struct compat_frame_tail __user *)regs->compat_fp - 1;
+
+ while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
+ tail && !((unsigned long)tail & 0x3))
+ tail = compat_user_backtrace(tail, entry);
+#endif
+ }
}
/*
@@ -1429,6 +1491,7 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry,
frame.fp = regs->regs[29];
frame.sp = regs->sp;
frame.pc = regs->pc;
+
walk_stackframe(&frame, callchain_trace, entry);
}
diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c
new file mode 100644
index 000000000000..422ebd63b619
--- /dev/null
+++ b/arch/arm64/kernel/perf_regs.c
@@ -0,0 +1,46 @@
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/perf_event.h>
+#include <linux/bug.h>
+
+#include <asm/compat.h>
+#include <asm/perf_regs.h>
+#include <asm/ptrace.h>
+
+u64 perf_reg_value(struct pt_regs *regs, int idx)
+{
+ if (WARN_ON_ONCE((u32)idx >= PERF_REG_ARM64_MAX))
+ return 0;
+
+ /*
+ * Compat (i.e. 32 bit) mode:
+ * - PC has been set in the pt_regs struct in kernel_entry,
+ * - Handle SP and LR here.
+ */
+ if (compat_user_mode(regs)) {
+ if ((u32)idx == PERF_REG_ARM64_SP)
+ return regs->compat_sp;
+ if ((u32)idx == PERF_REG_ARM64_LR)
+ return regs->compat_lr;
+ }
+
+ return regs->regs[idx];
+}
+
+#define REG_RESERVED (~((1ULL << PERF_REG_ARM64_MAX) - 1))
+
+int perf_reg_validate(u64 mask)
+{
+ if (!mask || mask & REG_RESERVED)
+ return -EINVAL;
+
+ return 0;
+}
+
+u64 perf_reg_abi(struct task_struct *task)
+{
+ if (is_compat_thread(task_thread_info(task)))
+ return PERF_SAMPLE_REGS_ABI_32;
+ else
+ return PERF_SAMPLE_REGS_ABI_64;
+}
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 6e7e579c629d..6601cda23170 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -20,6 +20,7 @@
#include <stdarg.h>
+#include <linux/compat.h>
#include <linux/export.h>
#include <linux/sched.h>
#include <linux/kernel.h>
@@ -217,7 +218,7 @@ void release_thread(struct task_struct *dead_task)
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
- fpsimd_save_state(&current->thread.fpsimd_state);
+ fpsimd_preserve_current_state();
*dst = *src;
return 0;
}
@@ -312,7 +313,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
* Complete any pending TLB or cache maintenance on this CPU in case
* the thread migrates to a different CPU.
*/
- dsb();
+ dsb(ish);
/* the actual thread switch */
last = cpu_switch_to(prev, next);
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index b1269dac1289..167c5edecad4 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -19,6 +19,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <linux/compat.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/mm.h>
@@ -41,6 +42,9 @@
#include <asm/traps.h>
#include <asm/system_misc.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/syscalls.h>
+
/*
* TODO: does not yet catch signals sent when the child dies.
* in exit.c or in signal.c.
@@ -518,6 +522,7 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset,
return ret;
target->thread.fpsimd_state.user_fpsimd = newstate;
+ fpsimd_flush_task_state(target);
return ret;
}
@@ -775,6 +780,7 @@ static int compat_vfp_set(struct task_struct *target,
uregs->fpcr = fpscr & VFP_FPSCR_CTRL_MASK;
}
+ fpsimd_flush_task_state(target);
return ret;
}
@@ -1073,35 +1079,49 @@ long arch_ptrace(struct task_struct *child, long request,
return ptrace_request(child, request, addr, data);
}
-asmlinkage int syscall_trace(int dir, struct pt_regs *regs)
+enum ptrace_syscall_dir {
+ PTRACE_SYSCALL_ENTER = 0,
+ PTRACE_SYSCALL_EXIT,
+};
+
+static void tracehook_report_syscall(struct pt_regs *regs,
+ enum ptrace_syscall_dir dir)
{
+ int regno;
unsigned long saved_reg;
- if (!test_thread_flag(TIF_SYSCALL_TRACE))
- return regs->syscallno;
-
- if (is_compat_task()) {
- /* AArch32 uses ip (r12) for scratch */
- saved_reg = regs->regs[12];
- regs->regs[12] = dir;
- } else {
- /*
- * Save X7. X7 is used to denote syscall entry/exit:
- * X7 = 0 -> entry, = 1 -> exit
- */
- saved_reg = regs->regs[7];
- regs->regs[7] = dir;
- }
+ /*
+ * A scratch register (ip(r12) on AArch32, x7 on AArch64) is
+ * used to denote syscall entry/exit:
+ */
+ regno = (is_compat_task() ? 12 : 7);
+ saved_reg = regs->regs[regno];
+ regs->regs[regno] = dir;
- if (dir)
+ if (dir == PTRACE_SYSCALL_EXIT)
tracehook_report_syscall_exit(regs, 0);
else if (tracehook_report_syscall_entry(regs))
regs->syscallno = ~0UL;
- if (is_compat_task())
- regs->regs[12] = saved_reg;
- else
- regs->regs[7] = saved_reg;
+ regs->regs[regno] = saved_reg;
+}
+
+asmlinkage int syscall_trace_enter(struct pt_regs *regs)
+{
+ if (test_thread_flag(TIF_SYSCALL_TRACE))
+ tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER);
+
+ if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
+ trace_sys_enter(regs, regs->syscallno);
return regs->syscallno;
}
+
+asmlinkage void syscall_trace_exit(struct pt_regs *regs)
+{
+ if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
+ trace_sys_exit(regs, regs_return_value(regs));
+
+ if (test_thread_flag(TIF_SYSCALL_TRACE))
+ tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT);
+}
diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c
new file mode 100644
index 000000000000..89102a6ffad5
--- /dev/null
+++ b/arch/arm64/kernel/return_address.c
@@ -0,0 +1,55 @@
+/*
+ * arch/arm64/kernel/return_address.c
+ *
+ * Copyright (C) 2013 Linaro Limited
+ * Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/export.h>
+#include <linux/ftrace.h>
+
+#include <asm/stacktrace.h>
+
+struct return_address_data {
+ unsigned int level;
+ void *addr;
+};
+
+static int save_return_addr(struct stackframe *frame, void *d)
+{
+ struct return_address_data *data = d;
+
+ if (!data->level) {
+ data->addr = (void *)frame->pc;
+ return 1;
+ } else {
+ --data->level;
+ return 0;
+ }
+}
+
+void *return_address(unsigned int level)
+{
+ struct return_address_data data;
+ struct stackframe frame;
+ register unsigned long current_sp asm ("sp");
+
+ data.level = level + 2;
+ data.addr = NULL;
+
+ frame.fp = (unsigned long)__builtin_frame_address(0);
+ frame.sp = current_sp;
+ frame.pc = (unsigned long)return_address; /* dummy */
+
+ walk_stackframe(&frame, save_return_addr, &data);
+
+ if (!data.level)
+ return data.addr;
+ else
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(return_address);
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index c8e9effe52e1..46b441679d8f 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -42,6 +42,7 @@
#include <linux/of_fdt.h>
#include <linux/of_platform.h>
+#include <asm/fixmap.h>
#include <asm/cputype.h>
#include <asm/elf.h>
#include <asm/cputable.h>
@@ -69,6 +70,7 @@ EXPORT_SYMBOL_GPL(elf_hwcap);
COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\
COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV)
unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT;
+unsigned int compat_elf_hwcap2 __read_mostly;
#endif
static const char *cpu_name;
@@ -242,6 +244,38 @@ static void __init setup_processor(void)
block = (features >> 16) & 0xf;
if (block && !(block & 0x8))
elf_hwcap |= HWCAP_CRC32;
+
+#ifdef CONFIG_COMPAT
+ /*
+ * ID_ISAR5_EL1 carries similar information as above, but pertaining to
+ * the Aarch32 32-bit execution state.
+ */
+ features = read_cpuid(ID_ISAR5_EL1);
+ block = (features >> 4) & 0xf;
+ if (!(block & 0x8)) {
+ switch (block) {
+ default:
+ case 2:
+ compat_elf_hwcap2 |= COMPAT_HWCAP2_PMULL;
+ case 1:
+ compat_elf_hwcap2 |= COMPAT_HWCAP2_AES;
+ case 0:
+ break;
+ }
+ }
+
+ block = (features >> 8) & 0xf;
+ if (block && !(block & 0x8))
+ compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA1;
+
+ block = (features >> 12) & 0xf;
+ if (block && !(block & 0x8))
+ compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA2;
+
+ block = (features >> 16) & 0xf;
+ if (block && !(block & 0x8))
+ compat_elf_hwcap2 |= COMPAT_HWCAP2_CRC32;
+#endif
}
static void __init setup_machine_fdt(phys_addr_t dt_phys)
@@ -327,6 +361,8 @@ void __init setup_arch(char **cmdline_p)
*cmdline_p = boot_command_line;
+ early_ioremap_init();
+
parse_early_param();
arm64_memblock_init();
@@ -360,7 +396,7 @@ static int __init arm64_device_init(void)
of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
return 0;
}
-arch_initcall(arm64_device_init);
+arch_initcall_sync(arm64_device_init);
static DEFINE_PER_CPU(struct cpu, cpu_data);
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 890a591f75dd..bbc1aad21ce6 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -17,6 +17,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <linux/compat.h>
#include <linux/errno.h>
#include <linux/signal.h>
#include <linux/personality.h>
@@ -25,7 +26,6 @@
#include <linux/tracehook.h>
#include <linux/ratelimit.h>
-#include <asm/compat.h>
#include <asm/debug-monitors.h>
#include <asm/elf.h>
#include <asm/cacheflush.h>
@@ -51,7 +51,7 @@ static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
int err;
/* dump the hardware registers to the fpsimd_state structure */
- fpsimd_save_state(fpsimd);
+ fpsimd_preserve_current_state();
/* copy the FP and status/control registers */
err = __copy_to_user(ctx->vregs, fpsimd->vregs, sizeof(fpsimd->vregs));
@@ -86,11 +86,8 @@ static int restore_fpsimd_context(struct fpsimd_context __user *ctx)
__get_user_error(fpsimd.fpcr, &ctx->fpcr, err);
/* load the hardware registers from the fpsimd_state structure */
- if (!err) {
- preempt_disable();
- fpsimd_load_state(&fpsimd);
- preempt_enable();
- }
+ if (!err)
+ fpsimd_update_current_state(&fpsimd);
return err ? -EFAULT : 0;
}
@@ -100,8 +97,7 @@ static int restore_sigframe(struct pt_regs *regs,
{
sigset_t set;
int i, err;
- struct aux_context __user *aux =
- (struct aux_context __user *)sf->uc.uc_mcontext.__reserved;
+ void *aux = sf->uc.uc_mcontext.__reserved;
err = __copy_from_user(&set, &sf->uc.uc_sigmask, sizeof(set));
if (err == 0)
@@ -121,8 +117,11 @@ static int restore_sigframe(struct pt_regs *regs,
err |= !valid_user_regs(&regs->user_regs);
- if (err == 0)
- err |= restore_fpsimd_context(&aux->fpsimd);
+ if (err == 0) {
+ struct fpsimd_context *fpsimd_ctx =
+ container_of(aux, struct fpsimd_context, head);
+ err |= restore_fpsimd_context(fpsimd_ctx);
+ }
return err;
}
@@ -167,8 +166,8 @@ static int setup_sigframe(struct rt_sigframe __user *sf,
struct pt_regs *regs, sigset_t *set)
{
int i, err = 0;
- struct aux_context __user *aux =
- (struct aux_context __user *)sf->uc.uc_mcontext.__reserved;
+ void *aux = sf->uc.uc_mcontext.__reserved;
+ struct _aarch64_ctx *end;
/* set up the stack frame for unwinding */
__put_user_error(regs->regs[29], &sf->fp, err);
@@ -185,12 +184,17 @@ static int setup_sigframe(struct rt_sigframe __user *sf,
err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(*set));
- if (err == 0)
- err |= preserve_fpsimd_context(&aux->fpsimd);
+ if (err == 0) {
+ struct fpsimd_context *fpsimd_ctx =
+ container_of(aux, struct fpsimd_context, head);
+ err |= preserve_fpsimd_context(fpsimd_ctx);
+ aux += sizeof(*fpsimd_ctx);
+ }
/* set the "end" magic */
- __put_user_error(0, &aux->end.magic, err);
- __put_user_error(0, &aux->end.size, err);
+ end = aux;
+ __put_user_error(0, &end->magic, err);
+ __put_user_error(0, &end->size, err);
return err;
}
@@ -416,4 +420,8 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
clear_thread_flag(TIF_NOTIFY_RESUME);
tracehook_notify_resume(regs);
}
+
+ if (thread_flags & _TIF_FOREIGN_FPSTATE)
+ fpsimd_restore_current_state();
+
}
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index b3fc9f5ec6d3..ac7e237d0bda 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -219,7 +219,7 @@ static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame)
* Note that this also saves V16-31, which aren't visible
* in AArch32.
*/
- fpsimd_save_state(fpsimd);
+ fpsimd_preserve_current_state();
/* Place structure header on the stack */
__put_user_error(magic, &frame->magic, err);
@@ -282,11 +282,8 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame)
* We don't need to touch the exception register, so
* reload the hardware state.
*/
- if (!err) {
- preempt_disable();
- fpsimd_load_state(&fpsimd);
- preempt_enable();
- }
+ if (!err)
+ fpsimd_update_current_state(&fpsimd);
return err ? -EFAULT : 0;
}
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 7cfb92a4ab66..6819fab9a80b 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -35,6 +35,7 @@
#include <linux/clockchips.h>
#include <linux/completion.h>
#include <linux/of.h>
+#include <linux/irq_work.h>
#include <asm/atomic.h>
#include <asm/cacheflush.h>
@@ -62,6 +63,7 @@ enum ipi_msg_type {
IPI_CALL_FUNC_SINGLE,
IPI_CPU_STOP,
IPI_TIMER,
+ IPI_IRQ_WORK,
};
/*
@@ -114,6 +116,11 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
return ret;
}
+static void smp_store_cpu_info(unsigned int cpuid)
+{
+ store_cpu_topology(cpuid);
+}
+
/*
* This is the secondary CPU boot entry. We're using this CPUs
* idle thread stack, but a set of temporary page tables.
@@ -152,6 +159,8 @@ asmlinkage void secondary_start_kernel(void)
*/
notify_cpu_starting(cpu);
+ smp_store_cpu_info(cpu);
+
/*
* OK, now it's safe to let the boot CPU continue. Wait for
* the CPU migration code to notice that the CPU is online
@@ -390,6 +399,10 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
int err;
unsigned int cpu, ncores = num_possible_cpus();
+ init_cpu_topology();
+
+ smp_store_cpu_info(smp_processor_id());
+
/*
* are we trying to boot more cores than exist?
*/
@@ -443,6 +456,14 @@ void arch_send_call_function_single_ipi(int cpu)
smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE);
}
+#ifdef CONFIG_IRQ_WORK
+void arch_irq_work_raise(void)
+{
+ if (smp_cross_call)
+ smp_cross_call(cpumask_of(smp_processor_id()), IPI_IRQ_WORK);
+}
+#endif
+
static const char *ipi_types[NR_IPI] = {
#define S(x,s) [x - IPI_RESCHEDULE] = s
S(IPI_RESCHEDULE, "Rescheduling interrupts"),
@@ -450,6 +471,7 @@ static const char *ipi_types[NR_IPI] = {
S(IPI_CALL_FUNC_SINGLE, "Single function call interrupts"),
S(IPI_CPU_STOP, "CPU stop interrupts"),
S(IPI_TIMER, "Timer broadcast interrupts"),
+ S(IPI_IRQ_WORK, "IRQ work interrupts"),
};
void show_ipi_list(struct seq_file *p, int prec)
@@ -542,6 +564,14 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
break;
#endif
+#ifdef CONFIG_IRQ_WORK
+ case IPI_IRQ_WORK:
+ irq_enter();
+ irq_work_run();
+ irq_exit();
+ break;
+#endif
+
default:
pr_crit("CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr);
break;
diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c
index 44c22805d2e2..0347d38eea29 100644
--- a/arch/arm64/kernel/smp_spin_table.c
+++ b/arch/arm64/kernel/smp_spin_table.c
@@ -30,7 +30,6 @@ extern void secondary_holding_pen(void);
volatile unsigned long secondary_holding_pen_release = INVALID_HWID;
static phys_addr_t cpu_release_addr[NR_CPUS];
-static DEFINE_RAW_SPINLOCK(boot_lock);
/*
* Write secondary_holding_pen_release in a way that is guaranteed to be
@@ -94,14 +93,6 @@ static int smp_spin_table_cpu_prepare(unsigned int cpu)
static int smp_spin_table_cpu_boot(unsigned int cpu)
{
- unsigned long timeout;
-
- /*
- * Set synchronisation state between this boot processor
- * and the secondary one
- */
- raw_spin_lock(&boot_lock);
-
/*
* Update the pen release flag.
*/
@@ -112,34 +103,7 @@ static int smp_spin_table_cpu_boot(unsigned int cpu)
*/
sev();
- timeout = jiffies + (1 * HZ);
- while (time_before(jiffies, timeout)) {
- if (secondary_holding_pen_release == INVALID_HWID)
- break;
- udelay(10);
- }
-
- /*
- * Now the secondary core is starting up let it run its
- * calibrations, then wait for it to finish
- */
- raw_spin_unlock(&boot_lock);
-
- return secondary_holding_pen_release != INVALID_HWID ? -ENOSYS : 0;
-}
-
-void smp_spin_table_cpu_postboot(void)
-{
- /*
- * Let the primary processor know we're out of the pen.
- */
- write_pen_release(INVALID_HWID);
-
- /*
- * Synchronise with the boot thread.
- */
- raw_spin_lock(&boot_lock);
- raw_spin_unlock(&boot_lock);
+ return 0;
}
const struct cpu_operations smp_spin_table_ops = {
@@ -147,5 +111,4 @@ const struct cpu_operations smp_spin_table_ops = {
.cpu_init = smp_spin_table_cpu_init,
.cpu_prepare = smp_spin_table_cpu_prepare,
.cpu_boot = smp_spin_table_cpu_boot,
- .cpu_postboot = smp_spin_table_cpu_postboot,
};
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 38f0558f0c0a..55437ba1f5a4 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -35,7 +35,7 @@
* ldp x29, x30, [sp]
* add sp, sp, #0x10
*/
-int unwind_frame(struct stackframe *frame)
+int notrace unwind_frame(struct stackframe *frame)
{
unsigned long high, low;
unsigned long fp = frame->fp;
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
new file mode 100644
index 000000000000..a7328373e613
--- /dev/null
+++ b/arch/arm64/kernel/topology.c
@@ -0,0 +1,558 @@
+/*
+ * arch/arm64/kernel/topology.c
+ *
+ * Copyright (C) 2011,2013,2014 Linaro Limited.
+ *
+ * Based on the arm32 version written by Vincent Guittot in turn based on
+ * arch/sh/kernel/topology.c
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/init.h>
+#include <linux/percpu.h>
+#include <linux/node.h>
+#include <linux/nodemask.h>
+#include <linux/of.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#include <asm/topology.h>
+
+/*
+ * cpu power table
+ * This per cpu data structure describes the relative capacity of each core.
+ * On a heteregenous system, cores don't have the same computation capacity
+ * and we reflect that difference in the cpu_power field so the scheduler can
+ * take this difference into account during load balance. A per cpu structure
+ * is preferred because each CPU updates its own cpu_power field during the
+ * load balance except for idle cores. One idle core is selected to run the
+ * rebalance_domains for all idle cores and the cpu_power can be updated
+ * during this sequence.
+ */
+static DEFINE_PER_CPU(unsigned long, cpu_scale);
+
+unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu)
+{
+ return per_cpu(cpu_scale, cpu);
+}
+
+static void set_power_scale(unsigned int cpu, unsigned long power)
+{
+ per_cpu(cpu_scale, cpu) = power;
+}
+
+static int __init get_cpu_for_node(struct device_node *node)
+{
+ struct device_node *cpu_node;
+ int cpu;
+
+ cpu_node = of_parse_phandle(node, "cpu", 0);
+ if (!cpu_node)
+ return -1;
+
+ for_each_possible_cpu(cpu) {
+ if (of_get_cpu_node(cpu, NULL) == cpu_node) {
+ of_node_put(cpu_node);
+ return cpu;
+ }
+ }
+
+ pr_crit("Unable to find CPU node for %s\n", cpu_node->full_name);
+
+ of_node_put(cpu_node);
+ return -1;
+}
+
+static int __init parse_core(struct device_node *core, int cluster_id,
+ int core_id)
+{
+ char name[10];
+ bool leaf = true;
+ int i = 0;
+ int cpu;
+ struct device_node *t;
+
+ do {
+ snprintf(name, sizeof(name), "thread%d", i);
+ t = of_get_child_by_name(core, name);
+ if (t) {
+ leaf = false;
+ cpu = get_cpu_for_node(t);
+ if (cpu >= 0) {
+ cpu_topology[cpu].cluster_id = cluster_id;
+ cpu_topology[cpu].core_id = core_id;
+ cpu_topology[cpu].thread_id = i;
+ } else {
+ pr_err("%s: Can't get CPU for thread\n",
+ t->full_name);
+ of_node_put(t);
+ return -EINVAL;
+ }
+ of_node_put(t);
+ }
+ i++;
+ } while (t);
+
+ cpu = get_cpu_for_node(core);
+ if (cpu >= 0) {
+ if (!leaf) {
+ pr_err("%s: Core has both threads and CPU\n",
+ core->full_name);
+ return -EINVAL;
+ }
+
+ cpu_topology[cpu].cluster_id = cluster_id;
+ cpu_topology[cpu].core_id = core_id;
+ } else if (leaf) {
+ pr_err("%s: Can't get CPU for leaf core\n", core->full_name);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int __init parse_cluster(struct device_node *cluster, int depth)
+{
+ char name[10];
+ bool leaf = true;
+ bool has_cores = false;
+ struct device_node *c;
+ static int cluster_id __initdata;
+ int core_id = 0;
+ int i, ret;
+
+ /*
+ * First check for child clusters; we currently ignore any
+ * information about the nesting of clusters and present the
+ * scheduler with a flat list of them.
+ */
+ i = 0;
+ do {
+ snprintf(name, sizeof(name), "cluster%d", i);
+ c = of_get_child_by_name(cluster, name);
+ if (c) {
+ leaf = false;
+ ret = parse_cluster(c, depth + 1);
+ of_node_put(c);
+ if (ret != 0)
+ return ret;
+ }
+ i++;
+ } while (c);
+
+ /* Now check for cores */
+ i = 0;
+ do {
+ snprintf(name, sizeof(name), "core%d", i);
+ c = of_get_child_by_name(cluster, name);
+ if (c) {
+ has_cores = true;
+
+ if (depth == 0) {
+ pr_err("%s: cpu-map children should be clusters\n",
+ c->full_name);
+ of_node_put(c);
+ return -EINVAL;
+ }
+
+ if (leaf) {
+ ret = parse_core(c, cluster_id, core_id++);
+ } else {
+ pr_err("%s: Non-leaf cluster with core %s\n",
+ cluster->full_name, name);
+ ret = -EINVAL;
+ }
+
+ of_node_put(c);
+ if (ret != 0)
+ return ret;
+ }
+ i++;
+ } while (c);
+
+ if (leaf && !has_cores)
+ pr_warn("%s: empty cluster\n", cluster->full_name);
+
+ if (leaf)
+ cluster_id++;
+
+ return 0;
+}
+
+struct cpu_efficiency {
+ const char *compatible;
+ unsigned long efficiency;
+};
+
+/*
+ * Table of relative efficiency of each processors
+ * The efficiency value must fit in 20bit and the final
+ * cpu_scale value must be in the range
+ * 0 < cpu_scale < 3*SCHED_POWER_SCALE/2
+ * in order to return at most 1 when DIV_ROUND_CLOSEST
+ * is used to compute the capacity of a CPU.
+ * Processors that are not defined in the table,
+ * use the default SCHED_POWER_SCALE value for cpu_scale.
+ */
+static const struct cpu_efficiency table_efficiency[] = {
+ { "arm,cortex-a57", 3891 },
+ { "arm,cortex-a53", 2048 },
+ { NULL, },
+};
+
+static unsigned long *__cpu_capacity;
+#define cpu_capacity(cpu) __cpu_capacity[cpu]
+
+static unsigned long middle_capacity = 1;
+
+/*
+ * Iterate all CPUs' descriptor in DT and compute the efficiency
+ * (as per table_efficiency). Also calculate a middle efficiency
+ * as close as possible to (max{eff_i} - min{eff_i}) / 2
+ * This is later used to scale the cpu_power field such that an
+ * 'average' CPU is of middle power. Also see the comments near
+ * table_efficiency[] and update_cpu_power().
+ */
+static int __init parse_dt_topology(void)
+{
+ struct device_node *cn, *map;
+ int ret = 0;
+ int cpu;
+
+ cn = of_find_node_by_path("/cpus");
+ if (!cn) {
+ pr_err("No CPU information found in DT\n");
+ return 0;
+ }
+
+ /*
+ * When topology is provided cpu-map is essentially a root
+ * cluster with restricted subnodes.
+ */
+ map = of_get_child_by_name(cn, "cpu-map");
+ if (!map)
+ goto out;
+
+ ret = parse_cluster(map, 0);
+ if (ret != 0)
+ goto out_map;
+
+ /*
+ * Check that all cores are in the topology; the SMP code will
+ * only mark cores described in the DT as possible.
+ */
+ for_each_possible_cpu(cpu) {
+ if (cpu_topology[cpu].cluster_id == -1) {
+ pr_err("CPU%d: No topology information specified\n",
+ cpu);
+ ret = -EINVAL;
+ }
+ }
+
+out_map:
+ of_node_put(map);
+out:
+ of_node_put(cn);
+ return ret;
+}
+
+static void __init parse_dt_cpu_power(void)
+{
+ const struct cpu_efficiency *cpu_eff;
+ struct device_node *cn;
+ unsigned long min_capacity = ULONG_MAX;
+ unsigned long max_capacity = 0;
+ unsigned long capacity = 0;
+ int cpu;
+
+ __cpu_capacity = kcalloc(nr_cpu_ids, sizeof(*__cpu_capacity),
+ GFP_NOWAIT);
+
+ for_each_possible_cpu(cpu) {
+ const u32 *rate;
+ int len;
+
+ /* Too early to use cpu->of_node */
+ cn = of_get_cpu_node(cpu, NULL);
+ if (!cn) {
+ pr_err("Missing device node for CPU %d\n", cpu);
+ continue;
+ }
+
+ for (cpu_eff = table_efficiency; cpu_eff->compatible; cpu_eff++)
+ if (of_device_is_compatible(cn, cpu_eff->compatible))
+ break;
+
+ if (cpu_eff->compatible == NULL) {
+ pr_warn("%s: Unknown CPU type\n", cn->full_name);
+ continue;
+ }
+
+ rate = of_get_property(cn, "clock-frequency", &len);
+ if (!rate || len != 4) {
+ pr_err("%s: Missing clock-frequency property\n",
+ cn->full_name);
+ continue;
+ }
+
+ capacity = ((be32_to_cpup(rate)) >> 20) * cpu_eff->efficiency;
+
+ /* Save min capacity of the system */
+ if (capacity < min_capacity)
+ min_capacity = capacity;
+
+ /* Save max capacity of the system */
+ if (capacity > max_capacity)
+ max_capacity = capacity;
+
+ cpu_capacity(cpu) = capacity;
+ }
+
+ /* If min and max capacities are equal we bypass the update of the
+ * cpu_scale because all CPUs have the same capacity. Otherwise, we
+ * compute a middle_capacity factor that will ensure that the capacity
+ * of an 'average' CPU of the system will be as close as possible to
+ * SCHED_POWER_SCALE, which is the default value, but with the
+ * constraint explained near table_efficiency[].
+ */
+ if (min_capacity == max_capacity)
+ return;
+ else if (4 * max_capacity < (3 * (max_capacity + min_capacity)))
+ middle_capacity = (min_capacity + max_capacity)
+ >> (SCHED_POWER_SHIFT+1);
+ else
+ middle_capacity = ((max_capacity / 3)
+ >> (SCHED_POWER_SHIFT-1)) + 1;
+}
+
+/*
+ * Look for a customed capacity of a CPU in the cpu_topo_data table during the
+ * boot. The update of all CPUs is in O(n^2) for heteregeneous system but the
+ * function returns directly for SMP system.
+ */
+static void update_cpu_power(unsigned int cpu)
+{
+ if (!cpu_capacity(cpu))
+ return;
+
+ set_power_scale(cpu, cpu_capacity(cpu) / middle_capacity);
+
+ pr_info("CPU%u: update cpu_power %lu\n",
+ cpu, arch_scale_freq_power(NULL, cpu));
+}
+
+/*
+ * cpu topology table
+ */
+struct cpu_topology cpu_topology[NR_CPUS];
+EXPORT_SYMBOL_GPL(cpu_topology);
+
+const struct cpumask *cpu_coregroup_mask(int cpu)
+{
+ return &cpu_topology[cpu].core_sibling;
+}
+
+static void update_siblings_masks(unsigned int cpuid)
+{
+ struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
+ int cpu;
+
+ if (cpuid_topo->cluster_id == -1) {
+ /*
+ * DT does not contain topology information for this cpu.
+ */
+ pr_debug("CPU%u: No topology information configured\n", cpuid);
+ return;
+ }
+
+ /* update core and thread sibling masks */
+ for_each_possible_cpu(cpu) {
+ cpu_topo = &cpu_topology[cpu];
+
+ if (cpuid_topo->cluster_id != cpu_topo->cluster_id)
+ continue;
+
+ cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
+ if (cpu != cpuid)
+ cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
+
+ if (cpuid_topo->core_id != cpu_topo->core_id)
+ continue;
+
+ cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
+ if (cpu != cpuid)
+ cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
+ }
+}
+
+void store_cpu_topology(unsigned int cpuid)
+{
+ update_siblings_masks(cpuid);
+ update_cpu_power(cpuid);
+}
+
+#ifdef CONFIG_SCHED_HMP
+
+/*
+ * Retrieve logical cpu index corresponding to a given MPIDR[23:0]
+ * - mpidr: MPIDR[23:0] to be used for the look-up
+ *
+ * Returns the cpu logical index or -EINVAL on look-up error
+ */
+static inline int get_logical_index(u32 mpidr)
+{
+ int cpu;
+ for (cpu = 0; cpu < nr_cpu_ids; cpu++)
+ if (cpu_logical_map(cpu) == mpidr)
+ return cpu;
+ return -EINVAL;
+}
+
+static const char * const little_cores[] = {
+ "arm,cortex-a53",
+ NULL,
+};
+
+static bool is_little_cpu(struct device_node *cn)
+{
+ const char * const *lc;
+ for (lc = little_cores; *lc; lc++)
+ if (of_device_is_compatible(cn, *lc))
+ return true;
+ return false;
+}
+
+void __init arch_get_fast_and_slow_cpus(struct cpumask *fast,
+ struct cpumask *slow)
+{
+ struct device_node *cn = NULL;
+ int cpu;
+
+ cpumask_clear(fast);
+ cpumask_clear(slow);
+
+ /*
+ * Use the config options if they are given. This helps testing
+ * HMP scheduling on systems without a big.LITTLE architecture.
+ */
+ if (strlen(CONFIG_HMP_FAST_CPU_MASK) && strlen(CONFIG_HMP_SLOW_CPU_MASK)) {
+ if (cpulist_parse(CONFIG_HMP_FAST_CPU_MASK, fast))
+ WARN(1, "Failed to parse HMP fast cpu mask!\n");
+ if (cpulist_parse(CONFIG_HMP_SLOW_CPU_MASK, slow))
+ WARN(1, "Failed to parse HMP slow cpu mask!\n");
+ return;
+ }
+
+ /*
+ * Else, parse device tree for little cores.
+ */
+ while ((cn = of_find_node_by_type(cn, "cpu"))) {
+
+ const u32 *mpidr;
+ int len;
+
+ mpidr = of_get_property(cn, "reg", &len);
+ if (!mpidr || len != 8) {
+ pr_err("%s missing reg property\n", cn->full_name);
+ continue;
+ }
+
+ cpu = get_logical_index(be32_to_cpup(mpidr+1));
+ if (cpu == -EINVAL) {
+ pr_err("couldn't get logical index for mpidr %x\n",
+ be32_to_cpup(mpidr+1));
+ break;
+ }
+
+ if (is_little_cpu(cn))
+ cpumask_set_cpu(cpu, slow);
+ else
+ cpumask_set_cpu(cpu, fast);
+ }
+
+ if (!cpumask_empty(fast) && !cpumask_empty(slow))
+ return;
+
+ /*
+ * We didn't find both big and little cores so let's call all cores
+ * fast as this will keep the system running, with all cores being
+ * treated equal.
+ */
+ cpumask_setall(fast);
+ cpumask_clear(slow);
+}
+
+struct cpumask hmp_slow_cpu_mask;
+
+void __init arch_get_hmp_domains(struct list_head *hmp_domains_list)
+{
+ struct cpumask hmp_fast_cpu_mask;
+ struct hmp_domain *domain;
+
+ arch_get_fast_and_slow_cpus(&hmp_fast_cpu_mask, &hmp_slow_cpu_mask);
+
+ /*
+ * Initialize hmp_domains
+ * Must be ordered with respect to compute capacity.
+ * Fastest domain at head of list.
+ */
+ if(!cpumask_empty(&hmp_slow_cpu_mask)) {
+ domain = (struct hmp_domain *)
+ kmalloc(sizeof(struct hmp_domain), GFP_KERNEL);
+ cpumask_copy(&domain->possible_cpus, &hmp_slow_cpu_mask);
+ cpumask_and(&domain->cpus, cpu_online_mask, &domain->possible_cpus);
+ list_add(&domain->hmp_domains, hmp_domains_list);
+ }
+ domain = (struct hmp_domain *)
+ kmalloc(sizeof(struct hmp_domain), GFP_KERNEL);
+ cpumask_copy(&domain->possible_cpus, &hmp_fast_cpu_mask);
+ cpumask_and(&domain->cpus, cpu_online_mask, &domain->possible_cpus);
+ list_add(&domain->hmp_domains, hmp_domains_list);
+}
+#endif /* CONFIG_SCHED_HMP */
+
+static void __init reset_cpu_topology(void)
+{
+ unsigned int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct cpu_topology *cpu_topo = &cpu_topology[cpu];
+
+ cpu_topo->thread_id = -1;
+ cpu_topo->core_id = 0;
+ cpu_topo->cluster_id = -1;
+
+ cpumask_clear(&cpu_topo->core_sibling);
+ cpumask_set_cpu(cpu, &cpu_topo->core_sibling);
+ cpumask_clear(&cpu_topo->thread_sibling);
+ cpumask_set_cpu(cpu, &cpu_topo->thread_sibling);
+ }
+}
+
+static void __init reset_cpu_power(void)
+{
+ unsigned int cpu;
+
+ for_each_possible_cpu(cpu)
+ set_power_scale(cpu, SCHED_POWER_SCALE);
+}
+
+void __init init_cpu_topology(void)
+{
+ reset_cpu_topology();
+
+ /*
+ * Discard anything that was parsed if we hit an error so we
+ * don't use partial information.
+ */
+ if (parse_dt_topology())
+ reset_cpu_topology();
+
+ reset_cpu_power();
+ parse_dt_cpu_power();
+}
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index a7149cae1615..7931d6916683 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -156,11 +156,12 @@ int arch_setup_additional_pages(struct linux_binprm *bprm,
int uses_interp)
{
struct mm_struct *mm = current->mm;
- unsigned long vdso_base, vdso_mapping_len;
+ unsigned long vdso_base, vdso_text_len, vdso_mapping_len;
int ret;
+ vdso_text_len = vdso_pages << PAGE_SHIFT;
/* Be sure to map the data page */
- vdso_mapping_len = (vdso_pages + 1) << PAGE_SHIFT;
+ vdso_mapping_len = vdso_text_len + PAGE_SIZE;
down_write(&mm->mmap_sem);
vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
@@ -170,35 +171,52 @@ int arch_setup_additional_pages(struct linux_binprm *bprm,
}
mm->context.vdso = (void *)vdso_base;
- ret = install_special_mapping(mm, vdso_base, vdso_mapping_len,
+ ret = install_special_mapping(mm, vdso_base, vdso_text_len,
VM_READ|VM_EXEC|
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
vdso_pagelist);
- if (ret) {
- mm->context.vdso = NULL;
+ if (ret)
+ goto up_fail;
+
+ vdso_base += vdso_text_len;
+ ret = install_special_mapping(mm, vdso_base, PAGE_SIZE,
+ VM_READ|VM_MAYREAD,
+ vdso_pagelist + vdso_pages);
+ if (ret)
goto up_fail;
- }
-up_fail:
up_write(&mm->mmap_sem);
+ return 0;
+up_fail:
+ mm->context.vdso = NULL;
+ up_write(&mm->mmap_sem);
return ret;
}
const char *arch_vma_name(struct vm_area_struct *vma)
{
+ unsigned long vdso_text;
+
+ if (!vma->vm_mm)
+ return NULL;
+
+ vdso_text = (unsigned long)vma->vm_mm->context.vdso;
+
/*
* We can re-use the vdso pointer in mm_context_t for identifying
* the vectors page for compat applications. The vDSO will always
* sit above TASK_UNMAPPED_BASE and so we don't need to worry about
* it conflicting with the vectors base.
*/
- if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) {
+ if (vma->vm_start == vdso_text) {
#ifdef CONFIG_COMPAT
if (vma->vm_start == AARCH32_VECTORS_BASE)
return "[vectors]";
#endif
return "[vdso]";
+ } else if (vma->vm_start == (vdso_text + (vdso_pages << PAGE_SHIFT))) {
+ return "[vvar]";
}
return NULL;
diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index 6d20b7d162d8..84b942612051 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -47,9 +47,9 @@ $(obj-vdso): %.o: %.S
$(call if_changed_dep,vdsoas)
# Actual build commands
-quiet_cmd_vdsold = VDSOL $@
+quiet_cmd_vdsold = VDSOL $@
cmd_vdsold = $(CC) $(c_flags) -Wl,-n -Wl,-T $^ -o $@
-quiet_cmd_vdsoas = VDSOA $@
+quiet_cmd_vdsoas = VDSOA $@
cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $<
# Install commands for the unstripped file
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 4ba7a55b49c7..51258bc4cb08 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -104,6 +104,13 @@ SECTIONS
_edata = .;
BSS_SECTION(0, 0, 0)
+
+ . = ALIGN(PAGE_SIZE);
+ idmap_pg_dir = .;
+ . += IDMAP_DIR_SIZE;
+ swapper_pg_dir = .;
+ . += SWAPPER_DIR_SIZE;
+
_end = .;
STABS_DEBUG
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 72a9fd583ad3..32a096174b94 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -20,4 +20,8 @@ kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o
kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
+kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o
+kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v2-switch.o
+kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v3.o
+kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v3-switch.o
kvm-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 08745578d54d..76794692c20b 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -136,13 +136,67 @@ static unsigned long num_core_regs(void)
}
/**
+ * ARM64 versions of the TIMER registers, always available on arm64
+ */
+
+#define NUM_TIMER_REGS 3
+
+static bool is_timer_reg(u64 index)
+{
+ switch (index) {
+ case KVM_REG_ARM_TIMER_CTL:
+ case KVM_REG_ARM_TIMER_CNT:
+ case KVM_REG_ARM_TIMER_CVAL:
+ return true;
+ }
+ return false;
+}
+
+static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
+{
+ if (put_user(KVM_REG_ARM_TIMER_CTL, uindices))
+ return -EFAULT;
+ uindices++;
+ if (put_user(KVM_REG_ARM_TIMER_CNT, uindices))
+ return -EFAULT;
+ uindices++;
+ if (put_user(KVM_REG_ARM_TIMER_CVAL, uindices))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+ void __user *uaddr = (void __user *)(long)reg->addr;
+ u64 val;
+ int ret;
+
+ ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id));
+ if (ret != 0)
+ return -EFAULT;
+
+ return kvm_arm_timer_set_reg(vcpu, reg->id, val);
+}
+
+static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+ void __user *uaddr = (void __user *)(long)reg->addr;
+ u64 val;
+
+ val = kvm_arm_timer_get_reg(vcpu, reg->id);
+ return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id));
+}
+
+/**
* kvm_arm_num_regs - how many registers do we present via KVM_GET_ONE_REG
*
* This is for all registers.
*/
unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu)
{
- return num_core_regs() + kvm_arm_num_sys_reg_descs(vcpu);
+ return num_core_regs() + kvm_arm_num_sys_reg_descs(vcpu)
+ + NUM_TIMER_REGS;
}
/**
@@ -154,6 +208,7 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
{
unsigned int i;
const u64 core_reg = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE;
+ int ret;
for (i = 0; i < sizeof(struct kvm_regs) / sizeof(__u32); i++) {
if (put_user(core_reg | i, uindices))
@@ -161,6 +216,11 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
uindices++;
}
+ ret = copy_timer_indices(vcpu, uindices);
+ if (ret)
+ return ret;
+ uindices += NUM_TIMER_REGS;
+
return kvm_arm_copy_sys_reg_indices(vcpu, uindices);
}
@@ -174,6 +234,9 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
return get_core_reg(vcpu, reg);
+ if (is_timer_reg(reg->id))
+ return get_timer_reg(vcpu, reg);
+
return kvm_arm_sys_reg_get_reg(vcpu, reg);
}
@@ -187,6 +250,9 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
return set_core_reg(vcpu, reg);
+ if (is_timer_reg(reg->id))
+ return set_timer_reg(vcpu, reg);
+
return kvm_arm_sys_reg_set_reg(vcpu, reg);
}
@@ -214,6 +280,8 @@ int __attribute_const__ kvm_target_cpu(void)
return KVM_ARM_TARGET_AEM_V8;
case ARM_CPU_PART_FOUNDATION:
return KVM_ARM_TARGET_FOUNDATION_V8;
+ case ARM_CPU_PART_CORTEX_A53:
+ return KVM_ARM_TARGET_CORTEX_A53;
case ARM_CPU_PART_CORTEX_A57:
return KVM_ARM_TARGET_CORTEX_A57;
};
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index fd9aeba99683..34b8bd0711e9 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -30,11 +30,15 @@ typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
- if (kvm_psci_call(vcpu))
+ int ret;
+
+ ret = kvm_psci_call(vcpu);
+ if (ret < 0) {
+ kvm_inject_undefined(vcpu);
return 1;
+ }
- kvm_inject_undefined(vcpu);
- return 1;
+ return ret;
}
static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
@@ -71,9 +75,9 @@ static exit_handle_fn arm_exit_handlers[] = {
[ESR_EL2_EC_WFI] = kvm_handle_wfx,
[ESR_EL2_EC_CP15_32] = kvm_handle_cp15_32,
[ESR_EL2_EC_CP15_64] = kvm_handle_cp15_64,
- [ESR_EL2_EC_CP14_MR] = kvm_handle_cp14_access,
+ [ESR_EL2_EC_CP14_MR] = kvm_handle_cp14_32,
[ESR_EL2_EC_CP14_LS] = kvm_handle_cp14_load_store,
- [ESR_EL2_EC_CP14_64] = kvm_handle_cp14_access,
+ [ESR_EL2_EC_CP14_64] = kvm_handle_cp14_64,
[ESR_EL2_EC_HVC32] = handle_hvc,
[ESR_EL2_EC_SMC32] = handle_smc,
[ESR_EL2_EC_HVC64] = handle_hvc,
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index 12e26f358c31..c3191168a994 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -68,6 +68,12 @@ __do_hyp_init:
msr tcr_el2, x4
ldr x4, =VTCR_EL2_FLAGS
+ /*
+ * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS bits in
+ * VTCR_EL2.
+ */
+ mrs x5, ID_AA64MMFR0_EL1
+ bfi x4, x5, #16, #3
msr vtcr_el2, x4
mrs x4, mair_el1
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 2c56012cb2d2..b72aa9f9215c 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -16,11 +16,11 @@
*/
#include <linux/linkage.h>
-#include <linux/irqchip/arm-gic.h>
#include <asm/assembler.h>
#include <asm/memory.h>
#include <asm/asm-offsets.h>
+#include <asm/debug-monitors.h>
#include <asm/fpsimdmacros.h>
#include <asm/kvm.h>
#include <asm/kvm_asm.h>
@@ -36,9 +36,6 @@
.pushsection .hyp.text, "ax"
.align PAGE_SHIFT
-__kvm_hyp_code_start:
- .globl __kvm_hyp_code_start
-
.macro save_common_regs
// x2: base address for cpu context
// x3: tmp register
@@ -215,6 +212,7 @@ __kvm_hyp_code_start:
mrs x22, amair_el1
mrs x23, cntkctl_el1
mrs x24, par_el1
+ mrs x25, mdscr_el1
stp x4, x5, [x3]
stp x6, x7, [x3, #16]
@@ -226,7 +224,202 @@ __kvm_hyp_code_start:
stp x18, x19, [x3, #112]
stp x20, x21, [x3, #128]
stp x22, x23, [x3, #144]
- str x24, [x3, #160]
+ stp x24, x25, [x3, #160]
+.endm
+
+.macro save_debug
+ // x2: base address for cpu context
+ // x3: tmp register
+
+ mrs x26, id_aa64dfr0_el1
+ ubfx x24, x26, #12, #4 // Extract BRPs
+ ubfx x25, x26, #20, #4 // Extract WRPs
+ mov w26, #15
+ sub w24, w26, w24 // How many BPs to skip
+ sub w25, w26, w25 // How many WPs to skip
+
+ add x3, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1)
+
+ adr x26, 1f
+ add x26, x26, x24, lsl #2
+ br x26
+1:
+ mrs x20, dbgbcr15_el1
+ mrs x19, dbgbcr14_el1
+ mrs x18, dbgbcr13_el1
+ mrs x17, dbgbcr12_el1
+ mrs x16, dbgbcr11_el1
+ mrs x15, dbgbcr10_el1
+ mrs x14, dbgbcr9_el1
+ mrs x13, dbgbcr8_el1
+ mrs x12, dbgbcr7_el1
+ mrs x11, dbgbcr6_el1
+ mrs x10, dbgbcr5_el1
+ mrs x9, dbgbcr4_el1
+ mrs x8, dbgbcr3_el1
+ mrs x7, dbgbcr2_el1
+ mrs x6, dbgbcr1_el1
+ mrs x5, dbgbcr0_el1
+
+ adr x26, 1f
+ add x26, x26, x24, lsl #2
+ br x26
+
+1:
+ str x20, [x3, #(15 * 8)]
+ str x19, [x3, #(14 * 8)]
+ str x18, [x3, #(13 * 8)]
+ str x17, [x3, #(12 * 8)]
+ str x16, [x3, #(11 * 8)]
+ str x15, [x3, #(10 * 8)]
+ str x14, [x3, #(9 * 8)]
+ str x13, [x3, #(8 * 8)]
+ str x12, [x3, #(7 * 8)]
+ str x11, [x3, #(6 * 8)]
+ str x10, [x3, #(5 * 8)]
+ str x9, [x3, #(4 * 8)]
+ str x8, [x3, #(3 * 8)]
+ str x7, [x3, #(2 * 8)]
+ str x6, [x3, #(1 * 8)]
+ str x5, [x3, #(0 * 8)]
+
+ add x3, x2, #CPU_SYSREG_OFFSET(DBGBVR0_EL1)
+
+ adr x26, 1f
+ add x26, x26, x24, lsl #2
+ br x26
+1:
+ mrs x20, dbgbvr15_el1
+ mrs x19, dbgbvr14_el1
+ mrs x18, dbgbvr13_el1
+ mrs x17, dbgbvr12_el1
+ mrs x16, dbgbvr11_el1
+ mrs x15, dbgbvr10_el1
+ mrs x14, dbgbvr9_el1
+ mrs x13, dbgbvr8_el1
+ mrs x12, dbgbvr7_el1
+ mrs x11, dbgbvr6_el1
+ mrs x10, dbgbvr5_el1
+ mrs x9, dbgbvr4_el1
+ mrs x8, dbgbvr3_el1
+ mrs x7, dbgbvr2_el1
+ mrs x6, dbgbvr1_el1
+ mrs x5, dbgbvr0_el1
+
+ adr x26, 1f
+ add x26, x26, x24, lsl #2
+ br x26
+
+1:
+ str x20, [x3, #(15 * 8)]
+ str x19, [x3, #(14 * 8)]
+ str x18, [x3, #(13 * 8)]
+ str x17, [x3, #(12 * 8)]
+ str x16, [x3, #(11 * 8)]
+ str x15, [x3, #(10 * 8)]
+ str x14, [x3, #(9 * 8)]
+ str x13, [x3, #(8 * 8)]
+ str x12, [x3, #(7 * 8)]
+ str x11, [x3, #(6 * 8)]
+ str x10, [x3, #(5 * 8)]
+ str x9, [x3, #(4 * 8)]
+ str x8, [x3, #(3 * 8)]
+ str x7, [x3, #(2 * 8)]
+ str x6, [x3, #(1 * 8)]
+ str x5, [x3, #(0 * 8)]
+
+ add x3, x2, #CPU_SYSREG_OFFSET(DBGWCR0_EL1)
+
+ adr x26, 1f
+ add x26, x26, x25, lsl #2
+ br x26
+1:
+ mrs x20, dbgwcr15_el1
+ mrs x19, dbgwcr14_el1
+ mrs x18, dbgwcr13_el1
+ mrs x17, dbgwcr12_el1
+ mrs x16, dbgwcr11_el1
+ mrs x15, dbgwcr10_el1
+ mrs x14, dbgwcr9_el1
+ mrs x13, dbgwcr8_el1
+ mrs x12, dbgwcr7_el1
+ mrs x11, dbgwcr6_el1
+ mrs x10, dbgwcr5_el1
+ mrs x9, dbgwcr4_el1
+ mrs x8, dbgwcr3_el1
+ mrs x7, dbgwcr2_el1
+ mrs x6, dbgwcr1_el1
+ mrs x5, dbgwcr0_el1
+
+ adr x26, 1f
+ add x26, x26, x25, lsl #2
+ br x26
+
+1:
+ str x20, [x3, #(15 * 8)]
+ str x19, [x3, #(14 * 8)]
+ str x18, [x3, #(13 * 8)]
+ str x17, [x3, #(12 * 8)]
+ str x16, [x3, #(11 * 8)]
+ str x15, [x3, #(10 * 8)]
+ str x14, [x3, #(9 * 8)]
+ str x13, [x3, #(8 * 8)]
+ str x12, [x3, #(7 * 8)]
+ str x11, [x3, #(6 * 8)]
+ str x10, [x3, #(5 * 8)]
+ str x9, [x3, #(4 * 8)]
+ str x8, [x3, #(3 * 8)]
+ str x7, [x3, #(2 * 8)]
+ str x6, [x3, #(1 * 8)]
+ str x5, [x3, #(0 * 8)]
+
+ add x3, x2, #CPU_SYSREG_OFFSET(DBGWVR0_EL1)
+
+ adr x26, 1f
+ add x26, x26, x25, lsl #2
+ br x26
+1:
+ mrs x20, dbgwvr15_el1
+ mrs x19, dbgwvr14_el1
+ mrs x18, dbgwvr13_el1
+ mrs x17, dbgwvr12_el1
+ mrs x16, dbgwvr11_el1
+ mrs x15, dbgwvr10_el1
+ mrs x14, dbgwvr9_el1
+ mrs x13, dbgwvr8_el1
+ mrs x12, dbgwvr7_el1
+ mrs x11, dbgwvr6_el1
+ mrs x10, dbgwvr5_el1
+ mrs x9, dbgwvr4_el1
+ mrs x8, dbgwvr3_el1
+ mrs x7, dbgwvr2_el1
+ mrs x6, dbgwvr1_el1
+ mrs x5, dbgwvr0_el1
+
+ adr x26, 1f
+ add x26, x26, x25, lsl #2
+ br x26
+
+1:
+ str x20, [x3, #(15 * 8)]
+ str x19, [x3, #(14 * 8)]
+ str x18, [x3, #(13 * 8)]
+ str x17, [x3, #(12 * 8)]
+ str x16, [x3, #(11 * 8)]
+ str x15, [x3, #(10 * 8)]
+ str x14, [x3, #(9 * 8)]
+ str x13, [x3, #(8 * 8)]
+ str x12, [x3, #(7 * 8)]
+ str x11, [x3, #(6 * 8)]
+ str x10, [x3, #(5 * 8)]
+ str x9, [x3, #(4 * 8)]
+ str x8, [x3, #(3 * 8)]
+ str x7, [x3, #(2 * 8)]
+ str x6, [x3, #(1 * 8)]
+ str x5, [x3, #(0 * 8)]
+
+ mrs x21, mdccint_el1
+ str x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)]
.endm
.macro restore_sysregs
@@ -245,7 +438,7 @@ __kvm_hyp_code_start:
ldp x18, x19, [x3, #112]
ldp x20, x21, [x3, #128]
ldp x22, x23, [x3, #144]
- ldr x24, [x3, #160]
+ ldp x24, x25, [x3, #160]
msr vmpidr_el2, x4
msr csselr_el1, x5
@@ -268,6 +461,198 @@ __kvm_hyp_code_start:
msr amair_el1, x22
msr cntkctl_el1, x23
msr par_el1, x24
+ msr mdscr_el1, x25
+.endm
+
+.macro restore_debug
+ // x2: base address for cpu context
+ // x3: tmp register
+
+ mrs x26, id_aa64dfr0_el1
+ ubfx x24, x26, #12, #4 // Extract BRPs
+ ubfx x25, x26, #20, #4 // Extract WRPs
+ mov w26, #15
+ sub w24, w26, w24 // How many BPs to skip
+ sub w25, w26, w25 // How many WPs to skip
+
+ add x3, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1)
+
+ adr x26, 1f
+ add x26, x26, x24, lsl #2
+ br x26
+1:
+ ldr x20, [x3, #(15 * 8)]
+ ldr x19, [x3, #(14 * 8)]
+ ldr x18, [x3, #(13 * 8)]
+ ldr x17, [x3, #(12 * 8)]
+ ldr x16, [x3, #(11 * 8)]
+ ldr x15, [x3, #(10 * 8)]
+ ldr x14, [x3, #(9 * 8)]
+ ldr x13, [x3, #(8 * 8)]
+ ldr x12, [x3, #(7 * 8)]
+ ldr x11, [x3, #(6 * 8)]
+ ldr x10, [x3, #(5 * 8)]
+ ldr x9, [x3, #(4 * 8)]
+ ldr x8, [x3, #(3 * 8)]
+ ldr x7, [x3, #(2 * 8)]
+ ldr x6, [x3, #(1 * 8)]
+ ldr x5, [x3, #(0 * 8)]
+
+ adr x26, 1f
+ add x26, x26, x24, lsl #2
+ br x26
+1:
+ msr dbgbcr15_el1, x20
+ msr dbgbcr14_el1, x19
+ msr dbgbcr13_el1, x18
+ msr dbgbcr12_el1, x17
+ msr dbgbcr11_el1, x16
+ msr dbgbcr10_el1, x15
+ msr dbgbcr9_el1, x14
+ msr dbgbcr8_el1, x13
+ msr dbgbcr7_el1, x12
+ msr dbgbcr6_el1, x11
+ msr dbgbcr5_el1, x10
+ msr dbgbcr4_el1, x9
+ msr dbgbcr3_el1, x8
+ msr dbgbcr2_el1, x7
+ msr dbgbcr1_el1, x6
+ msr dbgbcr0_el1, x5
+
+ add x3, x2, #CPU_SYSREG_OFFSET(DBGBVR0_EL1)
+
+ adr x26, 1f
+ add x26, x26, x24, lsl #2
+ br x26
+1:
+ ldr x20, [x3, #(15 * 8)]
+ ldr x19, [x3, #(14 * 8)]
+ ldr x18, [x3, #(13 * 8)]
+ ldr x17, [x3, #(12 * 8)]
+ ldr x16, [x3, #(11 * 8)]
+ ldr x15, [x3, #(10 * 8)]
+ ldr x14, [x3, #(9 * 8)]
+ ldr x13, [x3, #(8 * 8)]
+ ldr x12, [x3, #(7 * 8)]
+ ldr x11, [x3, #(6 * 8)]
+ ldr x10, [x3, #(5 * 8)]
+ ldr x9, [x3, #(4 * 8)]
+ ldr x8, [x3, #(3 * 8)]
+ ldr x7, [x3, #(2 * 8)]
+ ldr x6, [x3, #(1 * 8)]
+ ldr x5, [x3, #(0 * 8)]
+
+ adr x26, 1f
+ add x26, x26, x24, lsl #2
+ br x26
+1:
+ msr dbgbvr15_el1, x20
+ msr dbgbvr14_el1, x19
+ msr dbgbvr13_el1, x18
+ msr dbgbvr12_el1, x17
+ msr dbgbvr11_el1, x16
+ msr dbgbvr10_el1, x15
+ msr dbgbvr9_el1, x14
+ msr dbgbvr8_el1, x13
+ msr dbgbvr7_el1, x12
+ msr dbgbvr6_el1, x11
+ msr dbgbvr5_el1, x10
+ msr dbgbvr4_el1, x9
+ msr dbgbvr3_el1, x8
+ msr dbgbvr2_el1, x7
+ msr dbgbvr1_el1, x6
+ msr dbgbvr0_el1, x5
+
+ add x3, x2, #CPU_SYSREG_OFFSET(DBGWCR0_EL1)
+
+ adr x26, 1f
+ add x26, x26, x25, lsl #2
+ br x26
+1:
+ ldr x20, [x3, #(15 * 8)]
+ ldr x19, [x3, #(14 * 8)]
+ ldr x18, [x3, #(13 * 8)]
+ ldr x17, [x3, #(12 * 8)]
+ ldr x16, [x3, #(11 * 8)]
+ ldr x15, [x3, #(10 * 8)]
+ ldr x14, [x3, #(9 * 8)]
+ ldr x13, [x3, #(8 * 8)]
+ ldr x12, [x3, #(7 * 8)]
+ ldr x11, [x3, #(6 * 8)]
+ ldr x10, [x3, #(5 * 8)]
+ ldr x9, [x3, #(4 * 8)]
+ ldr x8, [x3, #(3 * 8)]
+ ldr x7, [x3, #(2 * 8)]
+ ldr x6, [x3, #(1 * 8)]
+ ldr x5, [x3, #(0 * 8)]
+
+ adr x26, 1f
+ add x26, x26, x25, lsl #2
+ br x26
+1:
+ msr dbgwcr15_el1, x20
+ msr dbgwcr14_el1, x19
+ msr dbgwcr13_el1, x18
+ msr dbgwcr12_el1, x17
+ msr dbgwcr11_el1, x16
+ msr dbgwcr10_el1, x15
+ msr dbgwcr9_el1, x14
+ msr dbgwcr8_el1, x13
+ msr dbgwcr7_el1, x12
+ msr dbgwcr6_el1, x11
+ msr dbgwcr5_el1, x10
+ msr dbgwcr4_el1, x9
+ msr dbgwcr3_el1, x8
+ msr dbgwcr2_el1, x7
+ msr dbgwcr1_el1, x6
+ msr dbgwcr0_el1, x5
+
+ add x3, x2, #CPU_SYSREG_OFFSET(DBGWVR0_EL1)
+
+ adr x26, 1f
+ add x26, x26, x25, lsl #2
+ br x26
+1:
+ ldr x20, [x3, #(15 * 8)]
+ ldr x19, [x3, #(14 * 8)]
+ ldr x18, [x3, #(13 * 8)]
+ ldr x17, [x3, #(12 * 8)]
+ ldr x16, [x3, #(11 * 8)]
+ ldr x15, [x3, #(10 * 8)]
+ ldr x14, [x3, #(9 * 8)]
+ ldr x13, [x3, #(8 * 8)]
+ ldr x12, [x3, #(7 * 8)]
+ ldr x11, [x3, #(6 * 8)]
+ ldr x10, [x3, #(5 * 8)]
+ ldr x9, [x3, #(4 * 8)]
+ ldr x8, [x3, #(3 * 8)]
+ ldr x7, [x3, #(2 * 8)]
+ ldr x6, [x3, #(1 * 8)]
+ ldr x5, [x3, #(0 * 8)]
+
+ adr x26, 1f
+ add x26, x26, x25, lsl #2
+ br x26
+1:
+ msr dbgwvr15_el1, x20
+ msr dbgwvr14_el1, x19
+ msr dbgwvr13_el1, x18
+ msr dbgwvr12_el1, x17
+ msr dbgwvr11_el1, x16
+ msr dbgwvr10_el1, x15
+ msr dbgwvr9_el1, x14
+ msr dbgwvr8_el1, x13
+ msr dbgwvr7_el1, x12
+ msr dbgwvr6_el1, x11
+ msr dbgwvr5_el1, x10
+ msr dbgwvr4_el1, x9
+ msr dbgwvr3_el1, x8
+ msr dbgwvr2_el1, x7
+ msr dbgwvr1_el1, x6
+ msr dbgwvr0_el1, x5
+
+ ldr x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)]
+ msr mdccint_el1, x21
.endm
.macro skip_32bit_state tmp, target
@@ -282,6 +667,35 @@ __kvm_hyp_code_start:
tbz \tmp, #12, \target
.endm
+.macro skip_debug_state tmp, target
+ ldr \tmp, [x0, #VCPU_DEBUG_FLAGS]
+ tbz \tmp, #KVM_ARM64_DEBUG_DIRTY_SHIFT, \target
+.endm
+
+.macro compute_debug_state target
+ // Compute debug state: If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY
+ // is set, we do a full save/restore cycle and disable trapping.
+ add x25, x0, #VCPU_CONTEXT
+
+ // Check the state of MDSCR_EL1
+ ldr x25, [x25, #CPU_SYSREG_OFFSET(MDSCR_EL1)]
+ and x26, x25, #DBG_MDSCR_KDE
+ and x25, x25, #DBG_MDSCR_MDE
+ adds xzr, x25, x26
+ b.eq 9998f // Nothing to see there
+
+ // If any interesting bits was set, we must set the flag
+ mov x26, #KVM_ARM64_DEBUG_DIRTY
+ str x26, [x0, #VCPU_DEBUG_FLAGS]
+ b 9999f // Don't skip restore
+
+9998:
+ // Otherwise load the flags from memory in case we recently
+ // trapped
+ skip_debug_state x25, \target
+9999:
+.endm
+
.macro save_guest_32bit_state
skip_32bit_state x3, 1f
@@ -297,10 +711,13 @@ __kvm_hyp_code_start:
mrs x4, dacr32_el2
mrs x5, ifsr32_el2
mrs x6, fpexc32_el2
- mrs x7, dbgvcr32_el2
stp x4, x5, [x3]
- stp x6, x7, [x3, #16]
+ str x6, [x3, #16]
+ skip_debug_state x8, 2f
+ mrs x7, dbgvcr32_el2
+ str x7, [x3, #24]
+2:
skip_tee_state x8, 1f
add x3, x2, #CPU_SYSREG_OFFSET(TEECR32_EL1)
@@ -323,12 +740,15 @@ __kvm_hyp_code_start:
add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2)
ldp x4, x5, [x3]
- ldp x6, x7, [x3, #16]
+ ldr x6, [x3, #16]
msr dacr32_el2, x4
msr ifsr32_el2, x5
msr fpexc32_el2, x6
- msr dbgvcr32_el2, x7
+ skip_debug_state x8, 2f
+ ldr x7, [x3, #24]
+ msr dbgvcr32_el2, x7
+2:
skip_tee_state x8, 1f
add x3, x2, #CPU_SYSREG_OFFSET(TEECR32_EL1)
@@ -339,11 +759,8 @@ __kvm_hyp_code_start:
.endm
.macro activate_traps
- ldr x2, [x0, #VCPU_IRQ_LINES]
- ldr x1, [x0, #VCPU_HCR_EL2]
- orr x2, x2, x1
- msr hcr_el2, x2
-
+ ldr x2, [x0, #VCPU_HCR_EL2]
+ msr hcr_el2, x2
ldr x2, =(CPTR_EL2_TTA)
msr cptr_el2, x2
@@ -353,6 +770,14 @@ __kvm_hyp_code_start:
mrs x2, mdcr_el2
and x2, x2, #MDCR_EL2_HPMN_MASK
orr x2, x2, #(MDCR_EL2_TPM | MDCR_EL2_TPMCR)
+ orr x2, x2, #(MDCR_EL2_TDRA | MDCR_EL2_TDOSA)
+
+ // Check for KVM_ARM64_DEBUG_DIRTY, and set debug to trap
+ // if not dirty.
+ ldr x3, [x0, #VCPU_DEBUG_FLAGS]
+ tbnz x3, #KVM_ARM64_DEBUG_DIRTY_SHIFT, 1f
+ orr x2, x2, #MDCR_EL2_TDA
+1:
msr mdcr_el2, x2
.endm
@@ -379,100 +804,33 @@ __kvm_hyp_code_start:
.endm
/*
- * Save the VGIC CPU state into memory
- * x0: Register pointing to VCPU struct
- * Do not corrupt x1!!!
+ * Call into the vgic backend for state saving
*/
.macro save_vgic_state
- /* Get VGIC VCTRL base into x2 */
- ldr x2, [x0, #VCPU_KVM]
- kern_hyp_va x2
- ldr x2, [x2, #KVM_VGIC_VCTRL]
- kern_hyp_va x2
- cbz x2, 2f // disabled
-
- /* Compute the address of struct vgic_cpu */
- add x3, x0, #VCPU_VGIC_CPU
-
- /* Save all interesting registers */
- ldr w4, [x2, #GICH_HCR]
- ldr w5, [x2, #GICH_VMCR]
- ldr w6, [x2, #GICH_MISR]
- ldr w7, [x2, #GICH_EISR0]
- ldr w8, [x2, #GICH_EISR1]
- ldr w9, [x2, #GICH_ELRSR0]
- ldr w10, [x2, #GICH_ELRSR1]
- ldr w11, [x2, #GICH_APR]
-CPU_BE( rev w4, w4 )
-CPU_BE( rev w5, w5 )
-CPU_BE( rev w6, w6 )
-CPU_BE( rev w7, w7 )
-CPU_BE( rev w8, w8 )
-CPU_BE( rev w9, w9 )
-CPU_BE( rev w10, w10 )
-CPU_BE( rev w11, w11 )
-
- str w4, [x3, #VGIC_CPU_HCR]
- str w5, [x3, #VGIC_CPU_VMCR]
- str w6, [x3, #VGIC_CPU_MISR]
- str w7, [x3, #VGIC_CPU_EISR]
- str w8, [x3, #(VGIC_CPU_EISR + 4)]
- str w9, [x3, #VGIC_CPU_ELRSR]
- str w10, [x3, #(VGIC_CPU_ELRSR + 4)]
- str w11, [x3, #VGIC_CPU_APR]
-
- /* Clear GICH_HCR */
- str wzr, [x2, #GICH_HCR]
-
- /* Save list registers */
- add x2, x2, #GICH_LR0
- ldr w4, [x3, #VGIC_CPU_NR_LR]
- add x3, x3, #VGIC_CPU_LR
-1: ldr w5, [x2], #4
-CPU_BE( rev w5, w5 )
- str w5, [x3], #4
- sub w4, w4, #1
- cbnz w4, 1b
-2:
+ adr x24, __vgic_sr_vectors
+ ldr x24, [x24, VGIC_SAVE_FN]
+ kern_hyp_va x24
+ blr x24
+ mrs x24, hcr_el2
+ mov x25, #HCR_INT_OVERRIDE
+ neg x25, x25
+ and x24, x24, x25
+ msr hcr_el2, x24
.endm
/*
- * Restore the VGIC CPU state from memory
- * x0: Register pointing to VCPU struct
+ * Call into the vgic backend for state restoring
*/
.macro restore_vgic_state
- /* Get VGIC VCTRL base into x2 */
- ldr x2, [x0, #VCPU_KVM]
- kern_hyp_va x2
- ldr x2, [x2, #KVM_VGIC_VCTRL]
- kern_hyp_va x2
- cbz x2, 2f // disabled
-
- /* Compute the address of struct vgic_cpu */
- add x3, x0, #VCPU_VGIC_CPU
-
- /* We only restore a minimal set of registers */
- ldr w4, [x3, #VGIC_CPU_HCR]
- ldr w5, [x3, #VGIC_CPU_VMCR]
- ldr w6, [x3, #VGIC_CPU_APR]
-CPU_BE( rev w4, w4 )
-CPU_BE( rev w5, w5 )
-CPU_BE( rev w6, w6 )
-
- str w4, [x2, #GICH_HCR]
- str w5, [x2, #GICH_VMCR]
- str w6, [x2, #GICH_APR]
-
- /* Restore list registers */
- add x2, x2, #GICH_LR0
- ldr w4, [x3, #VGIC_CPU_NR_LR]
- add x3, x3, #VGIC_CPU_LR
-1: ldr w5, [x3], #4
-CPU_BE( rev w5, w5 )
- str w5, [x2], #4
- sub w4, w4, #1
- cbnz w4, 1b
-2:
+ mrs x24, hcr_el2
+ ldr x25, [x0, #VCPU_IRQ_LINES]
+ orr x24, x24, #HCR_INT_OVERRIDE
+ orr x24, x24, x25
+ msr hcr_el2, x24
+ adr x24, __vgic_sr_vectors
+ ldr x24, [x24, #VGIC_RESTORE_FN]
+ kern_hyp_va x24
+ blr x24
.endm
.macro save_timer_state
@@ -537,6 +895,14 @@ __restore_sysregs:
restore_sysregs
ret
+__save_debug:
+ save_debug
+ ret
+
+__restore_debug:
+ restore_debug
+ ret
+
__save_fpsimd:
save_fpsimd
ret
@@ -568,6 +934,9 @@ ENTRY(__kvm_vcpu_run)
bl __save_fpsimd
bl __save_sysregs
+ compute_debug_state 1f
+ bl __save_debug
+1:
activate_traps
activate_vm
@@ -579,6 +948,10 @@ ENTRY(__kvm_vcpu_run)
bl __restore_sysregs
bl __restore_fpsimd
+
+ skip_debug_state x3, 1f
+ bl __restore_debug
+1:
restore_guest_32bit_state
restore_guest_regs
@@ -595,6 +968,10 @@ __kvm_vcpu_return:
save_guest_regs
bl __save_fpsimd
bl __save_sysregs
+
+ skip_debug_state x3, 1f
+ bl __save_debug
+1:
save_guest_32bit_state
save_timer_state
@@ -609,6 +986,14 @@ __kvm_vcpu_return:
bl __restore_sysregs
bl __restore_fpsimd
+
+ skip_debug_state x3, 1f
+ // Clear the dirty flag for the next run, as all the state has
+ // already been saved. Note that we nuke the whole 64bit word.
+ // If we ever add more flags, we'll have to be more careful...
+ str xzr, [x0, #VCPU_DEBUG_FLAGS]
+ bl __restore_debug
+1:
restore_host_regs
mov x0, x1
@@ -630,9 +1015,15 @@ ENTRY(__kvm_tlb_flush_vmid_ipa)
* whole of Stage-1. Weep...
*/
tlbi ipas2e1is, x1
- dsb sy
+ /*
+ * We have to ensure completion of the invalidation at Stage-2,
+ * since a table walk on another CPU could refill a TLB with a
+ * complete (S1 + S2) walk based on the old Stage-2 mapping if
+ * the Stage-1 invalidation happened first.
+ */
+ dsb ish
tlbi vmalle1is
- dsb sy
+ dsb ish
isb
msr vttbr_el2, xzr
@@ -643,10 +1034,16 @@ ENTRY(__kvm_flush_vm_context)
dsb ishst
tlbi alle1is
ic ialluis
- dsb sy
+ dsb ish
ret
ENDPROC(__kvm_flush_vm_context)
+ // struct vgic_sr_vectors __vgi_sr_vectors;
+ .align 3
+ENTRY(__vgic_sr_vectors)
+ .skip VGIC_SR_VECTOR_SZ
+ENDPROC(__vgic_sr_vectors)
+
__kvm_hyp_panic:
// Guess the context by looking at VTTBR:
// If zero, then we're already a host.
@@ -824,7 +1221,7 @@ el1_trap:
mrs x2, far_el2
2: mrs x0, tpidr_el2
- str x1, [x0, #VCPU_ESR_EL2]
+ str w1, [x0, #VCPU_ESR_EL2]
str x2, [x0, #VCPU_FAR_EL2]
str x3, [x0, #VCPU_HPFAR_EL2]
@@ -874,7 +1271,4 @@ ENTRY(__kvm_hyp_vector)
ventry el1_error_invalid // Error 32-bit EL1
ENDPROC(__kvm_hyp_vector)
-__kvm_hyp_code_end:
- .globl __kvm_hyp_code_end
-
.popsection
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 02e9d09e1d80..4cc3b719208e 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -27,8 +27,10 @@
#include <asm/kvm_host.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_coproc.h>
+#include <asm/kvm_mmu.h>
#include <asm/cacheflush.h>
#include <asm/cputype.h>
+#include <asm/debug-monitors.h>
#include <trace/events/kvm.h>
#include "sys_regs.h"
@@ -70,13 +72,13 @@ static u32 get_ccsidr(u32 csselr)
static void do_dc_cisw(u32 val)
{
asm volatile("dc cisw, %x0" : : "r" (val));
- dsb();
+ dsb(ish);
}
static void do_dc_csw(u32 val)
{
asm volatile("dc csw, %x0" : : "r" (val));
- dsb();
+ dsb(ish);
}
/* See note at ARM ARM B1.14.4 */
@@ -121,17 +123,51 @@ done:
}
/*
- * We could trap ID_DFR0 and tell the guest we don't support performance
- * monitoring. Unfortunately the patch to make the kernel check ID_DFR0 was
- * NAKed, so it will read the PMCR anyway.
- *
- * Therefore we tell the guest we have 0 counters. Unfortunately, we
- * must always support PMCCNTR (the cycle counter): we just RAZ/WI for
- * all PM registers, which doesn't crash the guest kernel at least.
+ * Generic accessor for VM registers. Only called as long as HCR_TVM
+ * is set.
+ */
+static bool access_vm_reg(struct kvm_vcpu *vcpu,
+ const struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ unsigned long val;
+
+ BUG_ON(!p->is_write);
+
+ val = *vcpu_reg(vcpu, p->Rt);
+ if (!p->is_aarch32) {
+ vcpu_sys_reg(vcpu, r->reg) = val;
+ } else {
+ if (!p->is_32bit)
+ vcpu_cp15_64_high(vcpu, r->reg) = val >> 32;
+ vcpu_cp15_64_low(vcpu, r->reg) = val & 0xffffffffUL;
+ }
+
+ return true;
+}
+
+/*
+ * SCTLR_EL1 accessor. Only called as long as HCR_TVM is set. If the
+ * guest enables the MMU, we stop trapping the VM sys_regs and leave
+ * it in complete control of the caches.
*/
-static bool pm_fake(struct kvm_vcpu *vcpu,
- const struct sys_reg_params *p,
- const struct sys_reg_desc *r)
+static bool access_sctlr(struct kvm_vcpu *vcpu,
+ const struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ access_vm_reg(vcpu, p, r);
+
+ if (vcpu_has_cache_enabled(vcpu)) { /* MMU+Caches enabled? */
+ vcpu->arch.hcr_el2 &= ~HCR_TVM;
+ stage2_flush_vm(vcpu->kvm);
+ }
+
+ return true;
+}
+
+static bool trap_raz_wi(struct kvm_vcpu *vcpu,
+ const struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
{
if (p->is_write)
return ignore_write(vcpu, p);
@@ -139,6 +175,73 @@ static bool pm_fake(struct kvm_vcpu *vcpu,
return read_zero(vcpu, p);
}
+static bool trap_oslsr_el1(struct kvm_vcpu *vcpu,
+ const struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (p->is_write) {
+ return ignore_write(vcpu, p);
+ } else {
+ *vcpu_reg(vcpu, p->Rt) = (1 << 3);
+ return true;
+ }
+}
+
+static bool trap_dbgauthstatus_el1(struct kvm_vcpu *vcpu,
+ const struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (p->is_write) {
+ return ignore_write(vcpu, p);
+ } else {
+ u32 val;
+ asm volatile("mrs %0, dbgauthstatus_el1" : "=r" (val));
+ *vcpu_reg(vcpu, p->Rt) = val;
+ return true;
+ }
+}
+
+/*
+ * We want to avoid world-switching all the DBG registers all the
+ * time:
+ *
+ * - If we've touched any debug register, it is likely that we're
+ * going to touch more of them. It then makes sense to disable the
+ * traps and start doing the save/restore dance
+ * - If debug is active (DBG_MDSCR_KDE or DBG_MDSCR_MDE set), it is
+ * then mandatory to save/restore the registers, as the guest
+ * depends on them.
+ *
+ * For this, we use a DIRTY bit, indicating the guest has modified the
+ * debug registers, used as follow:
+ *
+ * On guest entry:
+ * - If the dirty bit is set (because we're coming back from trapping),
+ * disable the traps, save host registers, restore guest registers.
+ * - If debug is actively in use (DBG_MDSCR_KDE or DBG_MDSCR_MDE set),
+ * set the dirty bit, disable the traps, save host registers,
+ * restore guest registers.
+ * - Otherwise, enable the traps
+ *
+ * On guest exit:
+ * - If the dirty bit is set, save guest registers, restore host
+ * registers and clear the dirty bit. This ensure that the host can
+ * now use the debug registers.
+ */
+static bool trap_debug_regs(struct kvm_vcpu *vcpu,
+ const struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (p->is_write) {
+ vcpu_sys_reg(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt);
+ vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
+ } else {
+ *vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, r->reg);
+ }
+
+ return true;
+}
+
static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
{
u64 amair;
@@ -155,9 +258,39 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
vcpu_sys_reg(vcpu, MPIDR_EL1) = (1UL << 31) | (vcpu->vcpu_id & 0xff);
}
+/* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */
+#define DBG_BCR_BVR_WCR_WVR_EL1(n) \
+ /* DBGBVRn_EL1 */ \
+ { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b100), \
+ trap_debug_regs, reset_val, (DBGBVR0_EL1 + (n)), 0 }, \
+ /* DBGBCRn_EL1 */ \
+ { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b101), \
+ trap_debug_regs, reset_val, (DBGBCR0_EL1 + (n)), 0 }, \
+ /* DBGWVRn_EL1 */ \
+ { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b110), \
+ trap_debug_regs, reset_val, (DBGWVR0_EL1 + (n)), 0 }, \
+ /* DBGWCRn_EL1 */ \
+ { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b111), \
+ trap_debug_regs, reset_val, (DBGWCR0_EL1 + (n)), 0 }
+
/*
* Architected system registers.
* Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
+ *
+ * We could trap ID_DFR0 and tell the guest we don't support performance
+ * monitoring. Unfortunately the patch to make the kernel check ID_DFR0 was
+ * NAKed, so it will read the PMCR anyway.
+ *
+ * Therefore we tell the guest we have 0 counters. Unfortunately, we
+ * must always support PMCCNTR (the cycle counter): we just RAZ/WI for
+ * all PM registers, which doesn't crash the guest kernel at least.
+ *
+ * Debug handling: We do trap most, if not all debug related system
+ * registers. The implementation is good enough to ensure that a guest
+ * can use these with minimal performance degradation. The drawback is
+ * that we don't implement any of the external debug, none of the
+ * OSlock protocol. This should be revisited if we ever encounter a
+ * more demanding guest...
*/
static const struct sys_reg_desc sys_reg_descs[] = {
/* DC ISW */
@@ -170,12 +303,71 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b010),
access_dcsw },
+ DBG_BCR_BVR_WCR_WVR_EL1(0),
+ DBG_BCR_BVR_WCR_WVR_EL1(1),
+ /* MDCCINT_EL1 */
+ { Op0(0b10), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b000),
+ trap_debug_regs, reset_val, MDCCINT_EL1, 0 },
+ /* MDSCR_EL1 */
+ { Op0(0b10), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b010),
+ trap_debug_regs, reset_val, MDSCR_EL1, 0 },
+ DBG_BCR_BVR_WCR_WVR_EL1(2),
+ DBG_BCR_BVR_WCR_WVR_EL1(3),
+ DBG_BCR_BVR_WCR_WVR_EL1(4),
+ DBG_BCR_BVR_WCR_WVR_EL1(5),
+ DBG_BCR_BVR_WCR_WVR_EL1(6),
+ DBG_BCR_BVR_WCR_WVR_EL1(7),
+ DBG_BCR_BVR_WCR_WVR_EL1(8),
+ DBG_BCR_BVR_WCR_WVR_EL1(9),
+ DBG_BCR_BVR_WCR_WVR_EL1(10),
+ DBG_BCR_BVR_WCR_WVR_EL1(11),
+ DBG_BCR_BVR_WCR_WVR_EL1(12),
+ DBG_BCR_BVR_WCR_WVR_EL1(13),
+ DBG_BCR_BVR_WCR_WVR_EL1(14),
+ DBG_BCR_BVR_WCR_WVR_EL1(15),
+
+ /* MDRAR_EL1 */
+ { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000),
+ trap_raz_wi },
+ /* OSLAR_EL1 */
+ { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b100),
+ trap_raz_wi },
+ /* OSLSR_EL1 */
+ { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0001), Op2(0b100),
+ trap_oslsr_el1 },
+ /* OSDLR_EL1 */
+ { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0011), Op2(0b100),
+ trap_raz_wi },
+ /* DBGPRCR_EL1 */
+ { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0100), Op2(0b100),
+ trap_raz_wi },
+ /* DBGCLAIMSET_EL1 */
+ { Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1000), Op2(0b110),
+ trap_raz_wi },
+ /* DBGCLAIMCLR_EL1 */
+ { Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1001), Op2(0b110),
+ trap_raz_wi },
+ /* DBGAUTHSTATUS_EL1 */
+ { Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b110),
+ trap_dbgauthstatus_el1 },
+
/* TEECR32_EL1 */
{ Op0(0b10), Op1(0b010), CRn(0b0000), CRm(0b0000), Op2(0b000),
NULL, reset_val, TEECR32_EL1, 0 },
/* TEEHBR32_EL1 */
{ Op0(0b10), Op1(0b010), CRn(0b0001), CRm(0b0000), Op2(0b000),
NULL, reset_val, TEEHBR32_EL1, 0 },
+
+ /* MDCCSR_EL1 */
+ { Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0001), Op2(0b000),
+ trap_raz_wi },
+ /* DBGDTR_EL0 */
+ { Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0100), Op2(0b000),
+ trap_raz_wi },
+ /* DBGDTR[TR]X_EL0 */
+ { Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0101), Op2(0b000),
+ trap_raz_wi },
+
/* DBGVCR32_EL2 */
{ Op0(0b10), Op1(0b100), CRn(0b0000), CRm(0b0111), Op2(0b000),
NULL, reset_val, DBGVCR32_EL2, 0 },
@@ -185,56 +377,56 @@ static const struct sys_reg_desc sys_reg_descs[] = {
NULL, reset_mpidr, MPIDR_EL1 },
/* SCTLR_EL1 */
{ Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000),
- NULL, reset_val, SCTLR_EL1, 0x00C50078 },
+ access_sctlr, reset_val, SCTLR_EL1, 0x00C50078 },
/* CPACR_EL1 */
{ Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b010),
NULL, reset_val, CPACR_EL1, 0 },
/* TTBR0_EL1 */
{ Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b000),
- NULL, reset_unknown, TTBR0_EL1 },
+ access_vm_reg, reset_unknown, TTBR0_EL1 },
/* TTBR1_EL1 */
{ Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b001),
- NULL, reset_unknown, TTBR1_EL1 },
+ access_vm_reg, reset_unknown, TTBR1_EL1 },
/* TCR_EL1 */
{ Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b010),
- NULL, reset_val, TCR_EL1, 0 },
+ access_vm_reg, reset_val, TCR_EL1, 0 },
/* AFSR0_EL1 */
{ Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0001), Op2(0b000),
- NULL, reset_unknown, AFSR0_EL1 },
+ access_vm_reg, reset_unknown, AFSR0_EL1 },
/* AFSR1_EL1 */
{ Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0001), Op2(0b001),
- NULL, reset_unknown, AFSR1_EL1 },
+ access_vm_reg, reset_unknown, AFSR1_EL1 },
/* ESR_EL1 */
{ Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0010), Op2(0b000),
- NULL, reset_unknown, ESR_EL1 },
+ access_vm_reg, reset_unknown, ESR_EL1 },
/* FAR_EL1 */
{ Op0(0b11), Op1(0b000), CRn(0b0110), CRm(0b0000), Op2(0b000),
- NULL, reset_unknown, FAR_EL1 },
+ access_vm_reg, reset_unknown, FAR_EL1 },
/* PAR_EL1 */
{ Op0(0b11), Op1(0b000), CRn(0b0111), CRm(0b0100), Op2(0b000),
NULL, reset_unknown, PAR_EL1 },
/* PMINTENSET_EL1 */
{ Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b001),
- pm_fake },
+ trap_raz_wi },
/* PMINTENCLR_EL1 */
{ Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b010),
- pm_fake },
+ trap_raz_wi },
/* MAIR_EL1 */
{ Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0010), Op2(0b000),
- NULL, reset_unknown, MAIR_EL1 },
+ access_vm_reg, reset_unknown, MAIR_EL1 },
/* AMAIR_EL1 */
{ Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0011), Op2(0b000),
- NULL, reset_amair_el1, AMAIR_EL1 },
+ access_vm_reg, reset_amair_el1, AMAIR_EL1 },
/* VBAR_EL1 */
{ Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b0000), Op2(0b000),
NULL, reset_val, VBAR_EL1, 0 },
/* CONTEXTIDR_EL1 */
{ Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b001),
- NULL, reset_val, CONTEXTIDR_EL1, 0 },
+ access_vm_reg, reset_val, CONTEXTIDR_EL1, 0 },
/* TPIDR_EL1 */
{ Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b100),
NULL, reset_unknown, TPIDR_EL1 },
@@ -249,43 +441,43 @@ static const struct sys_reg_desc sys_reg_descs[] = {
/* PMCR_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b000),
- pm_fake },
+ trap_raz_wi },
/* PMCNTENSET_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b001),
- pm_fake },
+ trap_raz_wi },
/* PMCNTENCLR_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b010),
- pm_fake },
+ trap_raz_wi },
/* PMOVSCLR_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b011),
- pm_fake },
+ trap_raz_wi },
/* PMSWINC_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b100),
- pm_fake },
+ trap_raz_wi },
/* PMSELR_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b101),
- pm_fake },
+ trap_raz_wi },
/* PMCEID0_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b110),
- pm_fake },
+ trap_raz_wi },
/* PMCEID1_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b111),
- pm_fake },
+ trap_raz_wi },
/* PMCCNTR_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b000),
- pm_fake },
+ trap_raz_wi },
/* PMXEVTYPER_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b001),
- pm_fake },
+ trap_raz_wi },
/* PMXEVCNTR_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b010),
- pm_fake },
+ trap_raz_wi },
/* PMUSERENR_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b000),
- pm_fake },
+ trap_raz_wi },
/* PMOVSSET_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b011),
- pm_fake },
+ trap_raz_wi },
/* TPIDR_EL0 */
{ Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b010),
@@ -305,27 +497,205 @@ static const struct sys_reg_desc sys_reg_descs[] = {
NULL, reset_val, FPEXC32_EL2, 0x70 },
};
-/* Trapped cp15 registers */
+static bool trap_dbgidr(struct kvm_vcpu *vcpu,
+ const struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (p->is_write) {
+ return ignore_write(vcpu, p);
+ } else {
+ u64 dfr = read_cpuid(ID_AA64DFR0_EL1);
+ u64 pfr = read_cpuid(ID_AA64PFR0_EL1);
+ u32 el3 = !!((pfr >> 12) & 0xf);
+
+ *vcpu_reg(vcpu, p->Rt) = ((((dfr >> 20) & 0xf) << 28) |
+ (((dfr >> 12) & 0xf) << 24) |
+ (((dfr >> 28) & 0xf) << 20) |
+ (6 << 16) | (el3 << 14) | (el3 << 12));
+ return true;
+ }
+}
+
+static bool trap_debug32(struct kvm_vcpu *vcpu,
+ const struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (p->is_write) {
+ vcpu_cp14(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt);
+ vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
+ } else {
+ *vcpu_reg(vcpu, p->Rt) = vcpu_cp14(vcpu, r->reg);
+ }
+
+ return true;
+}
+
+#define DBG_BCR_BVR_WCR_WVR(n) \
+ /* DBGBVRn */ \
+ { Op1( 0), CRn( 0), CRm((n)), Op2( 4), trap_debug32, \
+ NULL, (cp14_DBGBVR0 + (n) * 2) }, \
+ /* DBGBCRn */ \
+ { Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_debug32, \
+ NULL, (cp14_DBGBCR0 + (n) * 2) }, \
+ /* DBGWVRn */ \
+ { Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_debug32, \
+ NULL, (cp14_DBGWVR0 + (n) * 2) }, \
+ /* DBGWCRn */ \
+ { Op1( 0), CRn( 0), CRm((n)), Op2( 7), trap_debug32, \
+ NULL, (cp14_DBGWCR0 + (n) * 2) }
+
+#define DBGBXVR(n) \
+ { Op1( 0), CRn( 1), CRm((n)), Op2( 1), trap_debug32, \
+ NULL, cp14_DBGBXVR0 + n * 2 }
+
+/*
+ * Trapped cp14 registers. We generally ignore most of the external
+ * debug, on the principle that they don't really make sense to a
+ * guest. Revisit this one day, whould this principle change.
+ */
+static const struct sys_reg_desc cp14_regs[] = {
+ /* DBGIDR */
+ { Op1( 0), CRn( 0), CRm( 0), Op2( 0), trap_dbgidr },
+ /* DBGDTRRXext */
+ { Op1( 0), CRn( 0), CRm( 0), Op2( 2), trap_raz_wi },
+
+ DBG_BCR_BVR_WCR_WVR(0),
+ /* DBGDSCRint */
+ { Op1( 0), CRn( 0), CRm( 1), Op2( 0), trap_raz_wi },
+ DBG_BCR_BVR_WCR_WVR(1),
+ /* DBGDCCINT */
+ { Op1( 0), CRn( 0), CRm( 2), Op2( 0), trap_debug32 },
+ /* DBGDSCRext */
+ { Op1( 0), CRn( 0), CRm( 2), Op2( 2), trap_debug32 },
+ DBG_BCR_BVR_WCR_WVR(2),
+ /* DBGDTR[RT]Xint */
+ { Op1( 0), CRn( 0), CRm( 3), Op2( 0), trap_raz_wi },
+ /* DBGDTR[RT]Xext */
+ { Op1( 0), CRn( 0), CRm( 3), Op2( 2), trap_raz_wi },
+ DBG_BCR_BVR_WCR_WVR(3),
+ DBG_BCR_BVR_WCR_WVR(4),
+ DBG_BCR_BVR_WCR_WVR(5),
+ /* DBGWFAR */
+ { Op1( 0), CRn( 0), CRm( 6), Op2( 0), trap_raz_wi },
+ /* DBGOSECCR */
+ { Op1( 0), CRn( 0), CRm( 6), Op2( 2), trap_raz_wi },
+ DBG_BCR_BVR_WCR_WVR(6),
+ /* DBGVCR */
+ { Op1( 0), CRn( 0), CRm( 7), Op2( 0), trap_debug32 },
+ DBG_BCR_BVR_WCR_WVR(7),
+ DBG_BCR_BVR_WCR_WVR(8),
+ DBG_BCR_BVR_WCR_WVR(9),
+ DBG_BCR_BVR_WCR_WVR(10),
+ DBG_BCR_BVR_WCR_WVR(11),
+ DBG_BCR_BVR_WCR_WVR(12),
+ DBG_BCR_BVR_WCR_WVR(13),
+ DBG_BCR_BVR_WCR_WVR(14),
+ DBG_BCR_BVR_WCR_WVR(15),
+
+ /* DBGDRAR (32bit) */
+ { Op1( 0), CRn( 1), CRm( 0), Op2( 0), trap_raz_wi },
+
+ DBGBXVR(0),
+ /* DBGOSLAR */
+ { Op1( 0), CRn( 1), CRm( 0), Op2( 4), trap_raz_wi },
+ DBGBXVR(1),
+ /* DBGOSLSR */
+ { Op1( 0), CRn( 1), CRm( 1), Op2( 4), trap_oslsr_el1 },
+ DBGBXVR(2),
+ DBGBXVR(3),
+ /* DBGOSDLR */
+ { Op1( 0), CRn( 1), CRm( 3), Op2( 4), trap_raz_wi },
+ DBGBXVR(4),
+ /* DBGPRCR */
+ { Op1( 0), CRn( 1), CRm( 4), Op2( 4), trap_raz_wi },
+ DBGBXVR(5),
+ DBGBXVR(6),
+ DBGBXVR(7),
+ DBGBXVR(8),
+ DBGBXVR(9),
+ DBGBXVR(10),
+ DBGBXVR(11),
+ DBGBXVR(12),
+ DBGBXVR(13),
+ DBGBXVR(14),
+ DBGBXVR(15),
+
+ /* DBGDSAR (32bit) */
+ { Op1( 0), CRn( 2), CRm( 0), Op2( 0), trap_raz_wi },
+
+ /* DBGDEVID2 */
+ { Op1( 0), CRn( 7), CRm( 0), Op2( 7), trap_raz_wi },
+ /* DBGDEVID1 */
+ { Op1( 0), CRn( 7), CRm( 1), Op2( 7), trap_raz_wi },
+ /* DBGDEVID */
+ { Op1( 0), CRn( 7), CRm( 2), Op2( 7), trap_raz_wi },
+ /* DBGCLAIMSET */
+ { Op1( 0), CRn( 7), CRm( 8), Op2( 6), trap_raz_wi },
+ /* DBGCLAIMCLR */
+ { Op1( 0), CRn( 7), CRm( 9), Op2( 6), trap_raz_wi },
+ /* DBGAUTHSTATUS */
+ { Op1( 0), CRn( 7), CRm(14), Op2( 6), trap_dbgauthstatus_el1 },
+};
+
+/* Trapped cp14 64bit registers */
+static const struct sys_reg_desc cp14_64_regs[] = {
+ /* DBGDRAR (64bit) */
+ { Op1( 0), CRm( 1), .access = trap_raz_wi },
+
+ /* DBGDSAR (64bit) */
+ { Op1( 0), CRm( 2), .access = trap_raz_wi },
+};
+
+/*
+ * Trapped cp15 registers. TTBR0/TTBR1 get a double encoding,
+ * depending on the way they are accessed (as a 32bit or a 64bit
+ * register).
+ */
static const struct sys_reg_desc cp15_regs[] = {
+ { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_sctlr, NULL, c1_SCTLR },
+ { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
+ { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 },
+ { Op1( 0), CRn( 2), CRm( 0), Op2( 2), access_vm_reg, NULL, c2_TTBCR },
+ { Op1( 0), CRn( 3), CRm( 0), Op2( 0), access_vm_reg, NULL, c3_DACR },
+ { Op1( 0), CRn( 5), CRm( 0), Op2( 0), access_vm_reg, NULL, c5_DFSR },
+ { Op1( 0), CRn( 5), CRm( 0), Op2( 1), access_vm_reg, NULL, c5_IFSR },
+ { Op1( 0), CRn( 5), CRm( 1), Op2( 0), access_vm_reg, NULL, c5_ADFSR },
+ { Op1( 0), CRn( 5), CRm( 1), Op2( 1), access_vm_reg, NULL, c5_AIFSR },
+ { Op1( 0), CRn( 6), CRm( 0), Op2( 0), access_vm_reg, NULL, c6_DFAR },
+ { Op1( 0), CRn( 6), CRm( 0), Op2( 2), access_vm_reg, NULL, c6_IFAR },
+
/*
* DC{C,I,CI}SW operations:
*/
{ Op1( 0), CRn( 7), CRm( 6), Op2( 2), access_dcsw },
{ Op1( 0), CRn( 7), CRm(10), Op2( 2), access_dcsw },
{ Op1( 0), CRn( 7), CRm(14), Op2( 2), access_dcsw },
- { Op1( 0), CRn( 9), CRm(12), Op2( 0), pm_fake },
- { Op1( 0), CRn( 9), CRm(12), Op2( 1), pm_fake },
- { Op1( 0), CRn( 9), CRm(12), Op2( 2), pm_fake },
- { Op1( 0), CRn( 9), CRm(12), Op2( 3), pm_fake },
- { Op1( 0), CRn( 9), CRm(12), Op2( 5), pm_fake },
- { Op1( 0), CRn( 9), CRm(12), Op2( 6), pm_fake },
- { Op1( 0), CRn( 9), CRm(12), Op2( 7), pm_fake },
- { Op1( 0), CRn( 9), CRm(13), Op2( 0), pm_fake },
- { Op1( 0), CRn( 9), CRm(13), Op2( 1), pm_fake },
- { Op1( 0), CRn( 9), CRm(13), Op2( 2), pm_fake },
- { Op1( 0), CRn( 9), CRm(14), Op2( 0), pm_fake },
- { Op1( 0), CRn( 9), CRm(14), Op2( 1), pm_fake },
- { Op1( 0), CRn( 9), CRm(14), Op2( 2), pm_fake },
+
+ /* PMU */
+ { Op1( 0), CRn( 9), CRm(12), Op2( 0), trap_raz_wi },
+ { Op1( 0), CRn( 9), CRm(12), Op2( 1), trap_raz_wi },
+ { Op1( 0), CRn( 9), CRm(12), Op2( 2), trap_raz_wi },
+ { Op1( 0), CRn( 9), CRm(12), Op2( 3), trap_raz_wi },
+ { Op1( 0), CRn( 9), CRm(12), Op2( 5), trap_raz_wi },
+ { Op1( 0), CRn( 9), CRm(12), Op2( 6), trap_raz_wi },
+ { Op1( 0), CRn( 9), CRm(12), Op2( 7), trap_raz_wi },
+ { Op1( 0), CRn( 9), CRm(13), Op2( 0), trap_raz_wi },
+ { Op1( 0), CRn( 9), CRm(13), Op2( 1), trap_raz_wi },
+ { Op1( 0), CRn( 9), CRm(13), Op2( 2), trap_raz_wi },
+ { Op1( 0), CRn( 9), CRm(14), Op2( 0), trap_raz_wi },
+ { Op1( 0), CRn( 9), CRm(14), Op2( 1), trap_raz_wi },
+ { Op1( 0), CRn( 9), CRm(14), Op2( 2), trap_raz_wi },
+
+ { Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, c10_PRRR },
+ { Op1( 0), CRn(10), CRm( 2), Op2( 1), access_vm_reg, NULL, c10_NMRR },
+ { Op1( 0), CRn(10), CRm( 3), Op2( 0), access_vm_reg, NULL, c10_AMAIR0 },
+ { Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, c10_AMAIR1 },
+ { Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID },
+};
+
+static const struct sys_reg_desc cp15_64_regs[] = {
+ { Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
+ { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 },
};
/* Target specific emulation tables */
@@ -385,26 +755,29 @@ int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run)
return 1;
}
-int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run)
-{
- kvm_inject_undefined(vcpu);
- return 1;
-}
-
-static void emulate_cp15(struct kvm_vcpu *vcpu,
- const struct sys_reg_params *params)
+/*
+ * emulate_cp -- tries to match a sys_reg access in a handling table, and
+ * call the corresponding trap handler.
+ *
+ * @params: pointer to the descriptor of the access
+ * @table: array of trap descriptors
+ * @num: size of the trap descriptor array
+ *
+ * Return 0 if the access has been handled, and -1 if not.
+ */
+static int emulate_cp(struct kvm_vcpu *vcpu,
+ const struct sys_reg_params *params,
+ const struct sys_reg_desc *table,
+ size_t num)
{
- size_t num;
- const struct sys_reg_desc *table, *r;
+ const struct sys_reg_desc *r;
- table = get_target_table(vcpu->arch.target, false, &num);
+ if (!table)
+ return -1; /* Not handled */
- /* Search target-specific then generic table. */
r = find_reg(params, table, num);
- if (!r)
- r = find_reg(params, cp15_regs, ARRAY_SIZE(cp15_regs));
- if (likely(r)) {
+ if (r) {
/*
* Not having an accessor means that we have
* configured a trap that we don't know how to
@@ -416,27 +789,58 @@ static void emulate_cp15(struct kvm_vcpu *vcpu,
if (likely(r->access(vcpu, params, r))) {
/* Skip instruction, since it was emulated */
kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
- return;
}
- /* If access function fails, it should complain. */
+
+ /* Handled */
+ return 0;
}
- kvm_err("Unsupported guest CP15 access at: %08lx\n", *vcpu_pc(vcpu));
+ /* Not handled */
+ return -1;
+}
+
+static void unhandled_cp_access(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *params)
+{
+ u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu);
+ int cp;
+
+ switch(hsr_ec) {
+ case ESR_EL2_EC_CP15_32:
+ case ESR_EL2_EC_CP15_64:
+ cp = 15;
+ break;
+ case ESR_EL2_EC_CP14_MR:
+ case ESR_EL2_EC_CP14_64:
+ cp = 14;
+ break;
+ default:
+ WARN_ON((cp = -1));
+ }
+
+ kvm_err("Unsupported guest CP%d access at: %08lx\n",
+ cp, *vcpu_pc(vcpu));
print_sys_reg_instr(params);
kvm_inject_undefined(vcpu);
}
/**
- * kvm_handle_cp15_64 -- handles a mrrc/mcrr trap on a guest CP15 access
+ * kvm_handle_cp_64 -- handles a mrrc/mcrr trap on a guest CP15 access
* @vcpu: The VCPU pointer
* @run: The kvm_run struct
*/
-int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
+static int kvm_handle_cp_64(struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *global,
+ size_t nr_global,
+ const struct sys_reg_desc *target_specific,
+ size_t nr_specific)
{
struct sys_reg_params params;
u32 hsr = kvm_vcpu_get_hsr(vcpu);
int Rt2 = (hsr >> 10) & 0xf;
+ params.is_aarch32 = true;
+ params.is_32bit = false;
params.CRm = (hsr >> 1) & 0xf;
params.Rt = (hsr >> 5) & 0xf;
params.is_write = ((hsr & 1) == 0);
@@ -458,8 +862,14 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
*vcpu_reg(vcpu, params.Rt) = val;
}
- emulate_cp15(vcpu, &params);
+ if (!emulate_cp(vcpu, &params, target_specific, nr_specific))
+ goto out;
+ if (!emulate_cp(vcpu, &params, global, nr_global))
+ goto out;
+
+ unhandled_cp_access(vcpu, &params);
+out:
/* Do the opposite hack for the read side */
if (!params.is_write) {
u64 val = *vcpu_reg(vcpu, params.Rt);
@@ -475,11 +885,17 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
* @vcpu: The VCPU pointer
* @run: The kvm_run struct
*/
-int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
+static int kvm_handle_cp_32(struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *global,
+ size_t nr_global,
+ const struct sys_reg_desc *target_specific,
+ size_t nr_specific)
{
struct sys_reg_params params;
u32 hsr = kvm_vcpu_get_hsr(vcpu);
+ params.is_aarch32 = true;
+ params.is_32bit = true;
params.CRm = (hsr >> 1) & 0xf;
params.Rt = (hsr >> 5) & 0xf;
params.is_write = ((hsr & 1) == 0);
@@ -488,10 +904,51 @@ int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
params.Op1 = (hsr >> 14) & 0x7;
params.Op2 = (hsr >> 17) & 0x7;
- emulate_cp15(vcpu, &params);
+ if (!emulate_cp(vcpu, &params, target_specific, nr_specific))
+ return 1;
+ if (!emulate_cp(vcpu, &params, global, nr_global))
+ return 1;
+
+ unhandled_cp_access(vcpu, &params);
return 1;
}
+int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ const struct sys_reg_desc *target_specific;
+ size_t num;
+
+ target_specific = get_target_table(vcpu->arch.target, false, &num);
+ return kvm_handle_cp_64(vcpu,
+ cp15_64_regs, ARRAY_SIZE(cp15_64_regs),
+ target_specific, num);
+}
+
+int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ const struct sys_reg_desc *target_specific;
+ size_t num;
+
+ target_specific = get_target_table(vcpu->arch.target, false, &num);
+ return kvm_handle_cp_32(vcpu,
+ cp15_regs, ARRAY_SIZE(cp15_regs),
+ target_specific, num);
+}
+
+int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ return kvm_handle_cp_64(vcpu,
+ cp14_64_regs, ARRAY_SIZE(cp14_64_regs),
+ NULL, 0);
+}
+
+int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ return kvm_handle_cp_32(vcpu,
+ cp14_regs, ARRAY_SIZE(cp14_regs),
+ NULL, 0);
+}
+
static int emulate_sys_reg(struct kvm_vcpu *vcpu,
const struct sys_reg_params *params)
{
@@ -549,6 +1006,8 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run)
struct sys_reg_params params;
unsigned long esr = kvm_vcpu_get_hsr(vcpu);
+ params.is_aarch32 = false;
+ params.is_32bit = false;
params.Op0 = (esr >> 20) & 3;
params.Op1 = (esr >> 14) & 0x7;
params.CRn = (esr >> 10) & 0xf;
@@ -701,17 +1160,15 @@ static struct sys_reg_desc invariant_sys_regs[] = {
NULL, get_ctr_el0 },
};
-static int reg_from_user(void *val, const void __user *uaddr, u64 id)
+static int reg_from_user(u64 *val, const void __user *uaddr, u64 id)
{
- /* This Just Works because we are little endian. */
if (copy_from_user(val, uaddr, KVM_REG_SIZE(id)) != 0)
return -EFAULT;
return 0;
}
-static int reg_to_user(void __user *uaddr, const void *val, u64 id)
+static int reg_to_user(void __user *uaddr, const u64 *val, u64 id)
{
- /* This Just Works because we are little endian. */
if (copy_to_user(uaddr, val, KVM_REG_SIZE(id)) != 0)
return -EFAULT;
return 0;
@@ -761,7 +1218,7 @@ static bool is_valid_cache(u32 val)
u32 level, ctype;
if (val >= CSSELR_MAX)
- return -ENOENT;
+ return false;
/* Bottom bit is Instruction or Data bit. Next 3 bits are level. */
level = (val >> 1);
@@ -887,7 +1344,7 @@ static unsigned int num_demux_regs(void)
static int write_demux_regids(u64 __user *uindices)
{
- u64 val = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX;
+ u64 val = KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX;
unsigned int i;
val |= KVM_REG_ARM_DEMUX_ID_CCSIDR;
@@ -994,14 +1451,32 @@ int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
return write_demux_regids(uindices);
}
+static int check_sysreg_table(const struct sys_reg_desc *table, unsigned int n)
+{
+ unsigned int i;
+
+ for (i = 1; i < n; i++) {
+ if (cmp_sys_reg(&table[i-1], &table[i]) >= 0) {
+ kvm_err("sys_reg table %p out of order (%d)\n", table, i - 1);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
void kvm_sys_reg_table_init(void)
{
unsigned int i;
struct sys_reg_desc clidr;
/* Make sure tables are unique and in order. */
- for (i = 1; i < ARRAY_SIZE(sys_reg_descs); i++)
- BUG_ON(cmp_sys_reg(&sys_reg_descs[i-1], &sys_reg_descs[i]) >= 0);
+ BUG_ON(check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs)));
+ BUG_ON(check_sysreg_table(cp14_regs, ARRAY_SIZE(cp14_regs)));
+ BUG_ON(check_sysreg_table(cp14_64_regs, ARRAY_SIZE(cp14_64_regs)));
+ BUG_ON(check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs)));
+ BUG_ON(check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs)));
+ BUG_ON(check_sysreg_table(invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs)));
/* We abuse the reset function to overwrite the table itself. */
for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++)
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
index d50d3722998e..d411e251412c 100644
--- a/arch/arm64/kvm/sys_regs.h
+++ b/arch/arm64/kvm/sys_regs.h
@@ -30,6 +30,8 @@ struct sys_reg_params {
u8 Op2;
u8 Rt;
bool is_write;
+ bool is_aarch32;
+ bool is_32bit; /* Only valid if is_aarch32 is true */
};
struct sys_reg_desc {
diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c
index 8fe6f76b0edc..475fd2929310 100644
--- a/arch/arm64/kvm/sys_regs_generic_v8.c
+++ b/arch/arm64/kvm/sys_regs_generic_v8.c
@@ -88,6 +88,8 @@ static int __init sys_reg_genericv8_init(void)
&genericv8_target_table);
kvm_register_target_sys_reg_table(KVM_ARM_TARGET_FOUNDATION_V8,
&genericv8_target_table);
+ kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A53,
+ &genericv8_target_table);
kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A57,
&genericv8_target_table);
kvm_register_target_sys_reg_table(KVM_ARM_TARGET_XGENE_POTENZA,
diff --git a/arch/arm64/kvm/vgic-v2-switch.S b/arch/arm64/kvm/vgic-v2-switch.S
new file mode 100644
index 000000000000..ae211772f991
--- /dev/null
+++ b/arch/arm64/kvm/vgic-v2-switch.S
@@ -0,0 +1,133 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <linux/irqchip/arm-gic.h>
+
+#include <asm/assembler.h>
+#include <asm/memory.h>
+#include <asm/asm-offsets.h>
+#include <asm/kvm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_mmu.h>
+
+ .text
+ .pushsection .hyp.text, "ax"
+
+/*
+ * Save the VGIC CPU state into memory
+ * x0: Register pointing to VCPU struct
+ * Do not corrupt x1!!!
+ */
+ENTRY(__save_vgic_v2_state)
+__save_vgic_v2_state:
+ /* Get VGIC VCTRL base into x2 */
+ ldr x2, [x0, #VCPU_KVM]
+ kern_hyp_va x2
+ ldr x2, [x2, #KVM_VGIC_VCTRL]
+ kern_hyp_va x2
+ cbz x2, 2f // disabled
+
+ /* Compute the address of struct vgic_cpu */
+ add x3, x0, #VCPU_VGIC_CPU
+
+ /* Save all interesting registers */
+ ldr w4, [x2, #GICH_HCR]
+ ldr w5, [x2, #GICH_VMCR]
+ ldr w6, [x2, #GICH_MISR]
+ ldr w7, [x2, #GICH_EISR0]
+ ldr w8, [x2, #GICH_EISR1]
+ ldr w9, [x2, #GICH_ELRSR0]
+ ldr w10, [x2, #GICH_ELRSR1]
+ ldr w11, [x2, #GICH_APR]
+CPU_BE( rev w4, w4 )
+CPU_BE( rev w5, w5 )
+CPU_BE( rev w6, w6 )
+CPU_BE( rev w7, w7 )
+CPU_BE( rev w8, w8 )
+CPU_BE( rev w9, w9 )
+CPU_BE( rev w10, w10 )
+CPU_BE( rev w11, w11 )
+
+ str w4, [x3, #VGIC_V2_CPU_HCR]
+ str w5, [x3, #VGIC_V2_CPU_VMCR]
+ str w6, [x3, #VGIC_V2_CPU_MISR]
+ str w7, [x3, #VGIC_V2_CPU_EISR]
+ str w8, [x3, #(VGIC_V2_CPU_EISR + 4)]
+ str w9, [x3, #VGIC_V2_CPU_ELRSR]
+ str w10, [x3, #(VGIC_V2_CPU_ELRSR + 4)]
+ str w11, [x3, #VGIC_V2_CPU_APR]
+
+ /* Clear GICH_HCR */
+ str wzr, [x2, #GICH_HCR]
+
+ /* Save list registers */
+ add x2, x2, #GICH_LR0
+ ldr w4, [x3, #VGIC_CPU_NR_LR]
+ add x3, x3, #VGIC_V2_CPU_LR
+1: ldr w5, [x2], #4
+CPU_BE( rev w5, w5 )
+ str w5, [x3], #4
+ sub w4, w4, #1
+ cbnz w4, 1b
+2:
+ ret
+ENDPROC(__save_vgic_v2_state)
+
+/*
+ * Restore the VGIC CPU state from memory
+ * x0: Register pointing to VCPU struct
+ */
+ENTRY(__restore_vgic_v2_state)
+__restore_vgic_v2_state:
+ /* Get VGIC VCTRL base into x2 */
+ ldr x2, [x0, #VCPU_KVM]
+ kern_hyp_va x2
+ ldr x2, [x2, #KVM_VGIC_VCTRL]
+ kern_hyp_va x2
+ cbz x2, 2f // disabled
+
+ /* Compute the address of struct vgic_cpu */
+ add x3, x0, #VCPU_VGIC_CPU
+
+ /* We only restore a minimal set of registers */
+ ldr w4, [x3, #VGIC_V2_CPU_HCR]
+ ldr w5, [x3, #VGIC_V2_CPU_VMCR]
+ ldr w6, [x3, #VGIC_V2_CPU_APR]
+CPU_BE( rev w4, w4 )
+CPU_BE( rev w5, w5 )
+CPU_BE( rev w6, w6 )
+
+ str w4, [x2, #GICH_HCR]
+ str w5, [x2, #GICH_VMCR]
+ str w6, [x2, #GICH_APR]
+
+ /* Restore list registers */
+ add x2, x2, #GICH_LR0
+ ldr w4, [x3, #VGIC_CPU_NR_LR]
+ add x3, x3, #VGIC_V2_CPU_LR
+1: ldr w5, [x3], #4
+CPU_BE( rev w5, w5 )
+ str w5, [x2], #4
+ sub w4, w4, #1
+ cbnz w4, 1b
+2:
+ ret
+ENDPROC(__restore_vgic_v2_state)
+
+ .popsection
diff --git a/arch/arm64/kvm/vgic-v3-switch.S b/arch/arm64/kvm/vgic-v3-switch.S
new file mode 100644
index 000000000000..d16046999e06
--- /dev/null
+++ b/arch/arm64/kvm/vgic-v3-switch.S
@@ -0,0 +1,267 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <linux/irqchip/arm-gic-v3.h>
+
+#include <asm/assembler.h>
+#include <asm/memory.h>
+#include <asm/asm-offsets.h>
+#include <asm/kvm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_arm.h>
+
+ .text
+ .pushsection .hyp.text, "ax"
+
+/*
+ * We store LRs in reverse order to let the CPU deal with streaming
+ * access. Use this macro to make it look saner...
+ */
+#define LR_OFFSET(n) (VGIC_V3_CPU_LR + (15 - n) * 8)
+
+/*
+ * Save the VGIC CPU state into memory
+ * x0: Register pointing to VCPU struct
+ * Do not corrupt x1!!!
+ */
+.macro save_vgic_v3_state
+ // Compute the address of struct vgic_cpu
+ add x3, x0, #VCPU_VGIC_CPU
+
+ // Make sure stores to the GIC via the memory mapped interface
+ // are now visible to the system register interface
+ dsb st
+
+ // Save all interesting registers
+ mrs_s x4, ICH_HCR_EL2
+ mrs_s x5, ICH_VMCR_EL2
+ mrs_s x6, ICH_MISR_EL2
+ mrs_s x7, ICH_EISR_EL2
+ mrs_s x8, ICH_ELSR_EL2
+
+ str w4, [x3, #VGIC_V3_CPU_HCR]
+ str w5, [x3, #VGIC_V3_CPU_VMCR]
+ str w6, [x3, #VGIC_V3_CPU_MISR]
+ str w7, [x3, #VGIC_V3_CPU_EISR]
+ str w8, [x3, #VGIC_V3_CPU_ELRSR]
+
+ msr_s ICH_HCR_EL2, xzr
+
+ mrs_s x21, ICH_VTR_EL2
+ mvn w22, w21
+ ubfiz w23, w22, 2, 4 // w23 = (15 - ListRegs) * 4
+
+ adr x24, 1f
+ add x24, x24, x23
+ br x24
+
+1:
+ mrs_s x20, ICH_LR15_EL2
+ mrs_s x19, ICH_LR14_EL2
+ mrs_s x18, ICH_LR13_EL2
+ mrs_s x17, ICH_LR12_EL2
+ mrs_s x16, ICH_LR11_EL2
+ mrs_s x15, ICH_LR10_EL2
+ mrs_s x14, ICH_LR9_EL2
+ mrs_s x13, ICH_LR8_EL2
+ mrs_s x12, ICH_LR7_EL2
+ mrs_s x11, ICH_LR6_EL2
+ mrs_s x10, ICH_LR5_EL2
+ mrs_s x9, ICH_LR4_EL2
+ mrs_s x8, ICH_LR3_EL2
+ mrs_s x7, ICH_LR2_EL2
+ mrs_s x6, ICH_LR1_EL2
+ mrs_s x5, ICH_LR0_EL2
+
+ adr x24, 1f
+ add x24, x24, x23
+ br x24
+
+1:
+ str x20, [x3, #LR_OFFSET(15)]
+ str x19, [x3, #LR_OFFSET(14)]
+ str x18, [x3, #LR_OFFSET(13)]
+ str x17, [x3, #LR_OFFSET(12)]
+ str x16, [x3, #LR_OFFSET(11)]
+ str x15, [x3, #LR_OFFSET(10)]
+ str x14, [x3, #LR_OFFSET(9)]
+ str x13, [x3, #LR_OFFSET(8)]
+ str x12, [x3, #LR_OFFSET(7)]
+ str x11, [x3, #LR_OFFSET(6)]
+ str x10, [x3, #LR_OFFSET(5)]
+ str x9, [x3, #LR_OFFSET(4)]
+ str x8, [x3, #LR_OFFSET(3)]
+ str x7, [x3, #LR_OFFSET(2)]
+ str x6, [x3, #LR_OFFSET(1)]
+ str x5, [x3, #LR_OFFSET(0)]
+
+ tbnz w21, #29, 6f // 6 bits
+ tbz w21, #30, 5f // 5 bits
+ // 7 bits
+ mrs_s x20, ICH_AP0R3_EL2
+ str w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)]
+ mrs_s x19, ICH_AP0R2_EL2
+ str w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)]
+6: mrs_s x18, ICH_AP0R1_EL2
+ str w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)]
+5: mrs_s x17, ICH_AP0R0_EL2
+ str w17, [x3, #VGIC_V3_CPU_AP0R]
+
+ tbnz w21, #29, 6f // 6 bits
+ tbz w21, #30, 5f // 5 bits
+ // 7 bits
+ mrs_s x20, ICH_AP1R3_EL2
+ str w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)]
+ mrs_s x19, ICH_AP1R2_EL2
+ str w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)]
+6: mrs_s x18, ICH_AP1R1_EL2
+ str w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)]
+5: mrs_s x17, ICH_AP1R0_EL2
+ str w17, [x3, #VGIC_V3_CPU_AP1R]
+
+ // Restore SRE_EL1 access and re-enable SRE at EL1.
+ mrs_s x5, ICC_SRE_EL2
+ orr x5, x5, #ICC_SRE_EL2_ENABLE
+ msr_s ICC_SRE_EL2, x5
+ isb
+ mov x5, #1
+ msr_s ICC_SRE_EL1, x5
+.endm
+
+/*
+ * Restore the VGIC CPU state from memory
+ * x0: Register pointing to VCPU struct
+ */
+.macro restore_vgic_v3_state
+ // Disable SRE_EL1 access. Necessary, otherwise
+ // ICH_VMCR_EL2.VFIQEn becomes one, and FIQ happens...
+ msr_s ICC_SRE_EL1, xzr
+ isb
+
+ // Compute the address of struct vgic_cpu
+ add x3, x0, #VCPU_VGIC_CPU
+
+ // Restore all interesting registers
+ ldr w4, [x3, #VGIC_V3_CPU_HCR]
+ ldr w5, [x3, #VGIC_V3_CPU_VMCR]
+
+ msr_s ICH_HCR_EL2, x4
+ msr_s ICH_VMCR_EL2, x5
+
+ mrs_s x21, ICH_VTR_EL2
+
+ tbnz w21, #29, 6f // 6 bits
+ tbz w21, #30, 5f // 5 bits
+ // 7 bits
+ ldr w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)]
+ msr_s ICH_AP1R3_EL2, x20
+ ldr w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)]
+ msr_s ICH_AP1R2_EL2, x19
+6: ldr w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)]
+ msr_s ICH_AP1R1_EL2, x18
+5: ldr w17, [x3, #VGIC_V3_CPU_AP1R]
+ msr_s ICH_AP1R0_EL2, x17
+
+ tbnz w21, #29, 6f // 6 bits
+ tbz w21, #30, 5f // 5 bits
+ // 7 bits
+ ldr w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)]
+ msr_s ICH_AP0R3_EL2, x20
+ ldr w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)]
+ msr_s ICH_AP0R2_EL2, x19
+6: ldr w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)]
+ msr_s ICH_AP0R1_EL2, x18
+5: ldr w17, [x3, #VGIC_V3_CPU_AP0R]
+ msr_s ICH_AP0R0_EL2, x17
+
+ and w22, w21, #0xf
+ mvn w22, w21
+ ubfiz w23, w22, 2, 4 // w23 = (15 - ListRegs) * 4
+
+ adr x24, 1f
+ add x24, x24, x23
+ br x24
+
+1:
+ ldr x20, [x3, #LR_OFFSET(15)]
+ ldr x19, [x3, #LR_OFFSET(14)]
+ ldr x18, [x3, #LR_OFFSET(13)]
+ ldr x17, [x3, #LR_OFFSET(12)]
+ ldr x16, [x3, #LR_OFFSET(11)]
+ ldr x15, [x3, #LR_OFFSET(10)]
+ ldr x14, [x3, #LR_OFFSET(9)]
+ ldr x13, [x3, #LR_OFFSET(8)]
+ ldr x12, [x3, #LR_OFFSET(7)]
+ ldr x11, [x3, #LR_OFFSET(6)]
+ ldr x10, [x3, #LR_OFFSET(5)]
+ ldr x9, [x3, #LR_OFFSET(4)]
+ ldr x8, [x3, #LR_OFFSET(3)]
+ ldr x7, [x3, #LR_OFFSET(2)]
+ ldr x6, [x3, #LR_OFFSET(1)]
+ ldr x5, [x3, #LR_OFFSET(0)]
+
+ adr x24, 1f
+ add x24, x24, x23
+ br x24
+
+1:
+ msr_s ICH_LR15_EL2, x20
+ msr_s ICH_LR14_EL2, x19
+ msr_s ICH_LR13_EL2, x18
+ msr_s ICH_LR12_EL2, x17
+ msr_s ICH_LR11_EL2, x16
+ msr_s ICH_LR10_EL2, x15
+ msr_s ICH_LR9_EL2, x14
+ msr_s ICH_LR8_EL2, x13
+ msr_s ICH_LR7_EL2, x12
+ msr_s ICH_LR6_EL2, x11
+ msr_s ICH_LR5_EL2, x10
+ msr_s ICH_LR4_EL2, x9
+ msr_s ICH_LR3_EL2, x8
+ msr_s ICH_LR2_EL2, x7
+ msr_s ICH_LR1_EL2, x6
+ msr_s ICH_LR0_EL2, x5
+
+ // Ensure that the above will have reached the
+ // (re)distributors. This ensure the guest will read
+ // the correct values from the memory-mapped interface.
+ isb
+ dsb sy
+
+ // Prevent the guest from touching the GIC system registers
+ mrs_s x5, ICC_SRE_EL2
+ and x5, x5, #~ICC_SRE_EL2_ENABLE
+ msr_s ICC_SRE_EL2, x5
+.endm
+
+ENTRY(__save_vgic_v3_state)
+ save_vgic_v3_state
+ ret
+ENDPROC(__save_vgic_v3_state)
+
+ENTRY(__restore_vgic_v3_state)
+ restore_vgic_v3_state
+ ret
+ENDPROC(__restore_vgic_v3_state)
+
+ENTRY(__vgic_v3_get_ich_vtr_el2)
+ mrs_s x0, ICH_VTR_EL2
+ ret
+ENDPROC(__vgic_v3_get_ich_vtr_el2)
+
+ .popsection
diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile
index b51d36401d83..3ecb56c624d3 100644
--- a/arch/arm64/mm/Makefile
+++ b/arch/arm64/mm/Makefile
@@ -1,5 +1,5 @@
obj-y := dma-mapping.o extable.o fault.o init.o \
cache.o copypage.o flush.o \
ioremap.o mmap.o pgd.o mmu.o \
- context.o tlb.o proc.o
+ context.o proc.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 1ea9f26d1b70..fda756875fa6 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -30,7 +30,7 @@
*
* Corrupted registers: x0-x7, x9-x11
*/
-ENTRY(__flush_dcache_all)
+__flush_dcache_all:
dsb sy // ensure ordering with previous memory accesses
mrs x0, clidr_el1 // read clidr
and x3, x0, #0x7000000 // extract loc from clidr
@@ -166,3 +166,97 @@ ENTRY(__flush_dcache_area)
dsb sy
ret
ENDPROC(__flush_dcache_area)
+
+/*
+ * __inval_cache_range(start, end)
+ * - start - start address of region
+ * - end - end address of region
+ */
+ENTRY(__inval_cache_range)
+ /* FALLTHROUGH */
+
+/*
+ * __dma_inv_range(start, end)
+ * - start - virtual start address of region
+ * - end - virtual end address of region
+ */
+__dma_inv_range:
+ dcache_line_size x2, x3
+ sub x3, x2, #1
+ tst x1, x3 // end cache line aligned?
+ bic x1, x1, x3
+ b.eq 1f
+ dc civac, x1 // clean & invalidate D / U line
+1: tst x0, x3 // start cache line aligned?
+ bic x0, x0, x3
+ b.eq 2f
+ dc civac, x0 // clean & invalidate D / U line
+ b 3f
+2: dc ivac, x0 // invalidate D / U line
+3: add x0, x0, x2
+ cmp x0, x1
+ b.lo 2b
+ dsb sy
+ ret
+ENDPROC(__inval_cache_range)
+ENDPROC(__dma_inv_range)
+
+/*
+ * __dma_clean_range(start, end)
+ * - start - virtual start address of region
+ * - end - virtual end address of region
+ */
+__dma_clean_range:
+ dcache_line_size x2, x3
+ sub x3, x2, #1
+ bic x0, x0, x3
+1: dc cvac, x0 // clean D / U line
+ add x0, x0, x2
+ cmp x0, x1
+ b.lo 1b
+ dsb sy
+ ret
+ENDPROC(__dma_clean_range)
+
+/*
+ * __dma_flush_range(start, end)
+ * - start - virtual start address of region
+ * - end - virtual end address of region
+ */
+ENTRY(__dma_flush_range)
+ dcache_line_size x2, x3
+ sub x3, x2, #1
+ bic x0, x0, x3
+1: dc civac, x0 // clean & invalidate D / U line
+ add x0, x0, x2
+ cmp x0, x1
+ b.lo 1b
+ dsb sy
+ ret
+ENDPROC(__dma_flush_range)
+
+/*
+ * __dma_map_area(start, size, dir)
+ * - start - kernel virtual start address
+ * - size - size of region
+ * - dir - DMA direction
+ */
+ENTRY(__dma_map_area)
+ add x1, x1, x0
+ cmp w2, #DMA_FROM_DEVICE
+ b.eq __dma_inv_range
+ b __dma_clean_range
+ENDPROC(__dma_map_area)
+
+/*
+ * __dma_unmap_area(start, size, dir)
+ * - start - kernel virtual start address
+ * - size - size of region
+ * - dir - DMA direction
+ */
+ENTRY(__dma_unmap_area)
+ add x1, x1, x0
+ cmp w2, #DMA_TO_DEVICE
+ b.ne __dma_inv_range
+ ret
+ENDPROC(__dma_unmap_area)
diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c
index 9aecbace4128..13bbc3be6f5a 100644
--- a/arch/arm64/mm/copypage.c
+++ b/arch/arm64/mm/copypage.c
@@ -27,8 +27,10 @@ void __cpu_copy_user_page(void *kto, const void *kfrom, unsigned long vaddr)
copy_page(kto, kfrom);
__flush_dcache_area(kto, PAGE_SIZE);
}
+EXPORT_SYMBOL_GPL(__cpu_copy_user_page);
void __cpu_clear_user_page(void *kaddr, unsigned long vaddr)
{
clear_page(kaddr);
}
+EXPORT_SYMBOL_GPL(__cpu_clear_user_page);
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index fbd76785c5db..4164c5ace9f8 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -22,26 +22,37 @@
#include <linux/slab.h>
#include <linux/dma-mapping.h>
#include <linux/dma-contiguous.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
#include <linux/vmalloc.h>
#include <linux/swiotlb.h>
+#include <linux/amba/bus.h>
#include <asm/cacheflush.h>
struct dma_map_ops *dma_ops;
EXPORT_SYMBOL(dma_ops);
-static void *arm64_swiotlb_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flags,
- struct dma_attrs *attrs)
+static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot,
+ bool coherent)
+{
+ if (!coherent || dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs))
+ return pgprot_writecombine(prot);
+ return prot;
+}
+
+static void *__dma_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flags,
+ struct dma_attrs *attrs)
{
if (dev == NULL) {
WARN_ONCE(1, "Use an actual device structure for DMA allocation\n");
return NULL;
}
- if (IS_ENABLED(CONFIG_ZONE_DMA32) &&
+ if (IS_ENABLED(CONFIG_ZONE_DMA) &&
dev->coherent_dma_mask <= DMA_BIT_MASK(32))
- flags |= GFP_DMA32;
+ flags |= GFP_DMA;
if (IS_ENABLED(CONFIG_DMA_CMA)) {
struct page *page;
@@ -58,9 +69,9 @@ static void *arm64_swiotlb_alloc_coherent(struct device *dev, size_t size,
}
}
-static void arm64_swiotlb_free_coherent(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_handle,
- struct dma_attrs *attrs)
+static void __dma_free_coherent(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t dma_handle,
+ struct dma_attrs *attrs)
{
if (dev == NULL) {
WARN_ONCE(1, "Use an actual device structure for DMA allocation\n");
@@ -78,9 +89,212 @@ static void arm64_swiotlb_free_coherent(struct device *dev, size_t size,
}
}
-static struct dma_map_ops arm64_swiotlb_dma_ops = {
- .alloc = arm64_swiotlb_alloc_coherent,
- .free = arm64_swiotlb_free_coherent,
+static void *__dma_alloc_noncoherent(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flags,
+ struct dma_attrs *attrs)
+{
+ struct page *page, **map;
+ void *ptr, *coherent_ptr;
+ int order, i;
+
+ size = PAGE_ALIGN(size);
+ order = get_order(size);
+
+ ptr = __dma_alloc_coherent(dev, size, dma_handle, flags, attrs);
+ if (!ptr)
+ goto no_mem;
+ map = kmalloc(sizeof(struct page *) << order, flags & ~GFP_DMA);
+ if (!map)
+ goto no_map;
+
+ /* remove any dirty cache lines on the kernel alias */
+ __dma_flush_range(ptr, ptr + size);
+
+ /* create a coherent mapping */
+ page = virt_to_page(ptr);
+ for (i = 0; i < (size >> PAGE_SHIFT); i++)
+ map[i] = page + i;
+ coherent_ptr = vmap(map, size >> PAGE_SHIFT, VM_MAP,
+ __get_dma_pgprot(attrs, __pgprot(PROT_NORMAL_NC), false));
+ kfree(map);
+ if (!coherent_ptr)
+ goto no_map;
+
+ return coherent_ptr;
+
+no_map:
+ __dma_free_coherent(dev, size, ptr, *dma_handle, attrs);
+no_mem:
+ *dma_handle = ~0;
+ return NULL;
+}
+
+static void __dma_free_noncoherent(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t dma_handle,
+ struct dma_attrs *attrs)
+{
+ void *swiotlb_addr = phys_to_virt(dma_to_phys(dev, dma_handle));
+
+ vunmap(vaddr);
+ __dma_free_coherent(dev, size, swiotlb_addr, dma_handle, attrs);
+}
+
+static dma_addr_t __swiotlb_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction dir,
+ struct dma_attrs *attrs)
+{
+ dma_addr_t dev_addr;
+
+ dev_addr = swiotlb_map_page(dev, page, offset, size, dir, attrs);
+ __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
+
+ return dev_addr;
+}
+
+
+static void __swiotlb_unmap_page(struct device *dev, dma_addr_t dev_addr,
+ size_t size, enum dma_data_direction dir,
+ struct dma_attrs *attrs)
+{
+ __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
+ swiotlb_unmap_page(dev, dev_addr, size, dir, attrs);
+}
+
+static int __swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
+ int nelems, enum dma_data_direction dir,
+ struct dma_attrs *attrs)
+{
+ struct scatterlist *sg;
+ int i, ret;
+
+ ret = swiotlb_map_sg_attrs(dev, sgl, nelems, dir, attrs);
+ for_each_sg(sgl, sg, ret, i)
+ __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
+ sg->length, dir);
+
+ return ret;
+}
+
+static void __swiotlb_unmap_sg_attrs(struct device *dev,
+ struct scatterlist *sgl, int nelems,
+ enum dma_data_direction dir,
+ struct dma_attrs *attrs)
+{
+ struct scatterlist *sg;
+ int i;
+
+ for_each_sg(sgl, sg, nelems, i)
+ __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
+ sg->length, dir);
+ swiotlb_unmap_sg_attrs(dev, sgl, nelems, dir, attrs);
+}
+
+static void __swiotlb_sync_single_for_cpu(struct device *dev,
+ dma_addr_t dev_addr, size_t size,
+ enum dma_data_direction dir)
+{
+ __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
+ swiotlb_sync_single_for_cpu(dev, dev_addr, size, dir);
+}
+
+static void __swiotlb_sync_single_for_device(struct device *dev,
+ dma_addr_t dev_addr, size_t size,
+ enum dma_data_direction dir)
+{
+ swiotlb_sync_single_for_device(dev, dev_addr, size, dir);
+ __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
+}
+
+static void __swiotlb_sync_sg_for_cpu(struct device *dev,
+ struct scatterlist *sgl, int nelems,
+ enum dma_data_direction dir)
+{
+ struct scatterlist *sg;
+ int i;
+
+ for_each_sg(sgl, sg, nelems, i)
+ __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
+ sg->length, dir);
+ swiotlb_sync_sg_for_cpu(dev, sgl, nelems, dir);
+}
+
+static void __swiotlb_sync_sg_for_device(struct device *dev,
+ struct scatterlist *sgl, int nelems,
+ enum dma_data_direction dir)
+{
+ struct scatterlist *sg;
+ int i;
+
+ swiotlb_sync_sg_for_device(dev, sgl, nelems, dir);
+ for_each_sg(sgl, sg, nelems, i)
+ __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
+ sg->length, dir);
+}
+
+/* vma->vm_page_prot must be set appropriately before calling this function */
+static int __dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
+ void *cpu_addr, dma_addr_t dma_addr, size_t size)
+{
+ int ret = -ENXIO;
+ unsigned long nr_vma_pages = (vma->vm_end - vma->vm_start) >>
+ PAGE_SHIFT;
+ unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
+ unsigned long pfn = dma_to_phys(dev, dma_addr) >> PAGE_SHIFT;
+ unsigned long off = vma->vm_pgoff;
+
+ if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
+ return ret;
+
+ if (off < nr_pages && nr_vma_pages <= (nr_pages - off)) {
+ ret = remap_pfn_range(vma, vma->vm_start,
+ pfn + off,
+ vma->vm_end - vma->vm_start,
+ vma->vm_page_prot);
+ }
+
+ return ret;
+}
+
+static int __swiotlb_mmap_noncoherent(struct device *dev,
+ struct vm_area_struct *vma,
+ void *cpu_addr, dma_addr_t dma_addr, size_t size,
+ struct dma_attrs *attrs)
+{
+ vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, false);
+ return __dma_common_mmap(dev, vma, cpu_addr, dma_addr, size);
+}
+
+static int __swiotlb_mmap_coherent(struct device *dev,
+ struct vm_area_struct *vma,
+ void *cpu_addr, dma_addr_t dma_addr, size_t size,
+ struct dma_attrs *attrs)
+{
+ /* Just use whatever page_prot attributes were specified */
+ return __dma_common_mmap(dev, vma, cpu_addr, dma_addr, size);
+}
+
+struct dma_map_ops noncoherent_swiotlb_dma_ops = {
+ .alloc = __dma_alloc_noncoherent,
+ .free = __dma_free_noncoherent,
+ .mmap = __swiotlb_mmap_noncoherent,
+ .map_page = __swiotlb_map_page,
+ .unmap_page = __swiotlb_unmap_page,
+ .map_sg = __swiotlb_map_sg_attrs,
+ .unmap_sg = __swiotlb_unmap_sg_attrs,
+ .sync_single_for_cpu = __swiotlb_sync_single_for_cpu,
+ .sync_single_for_device = __swiotlb_sync_single_for_device,
+ .sync_sg_for_cpu = __swiotlb_sync_sg_for_cpu,
+ .sync_sg_for_device = __swiotlb_sync_sg_for_device,
+ .dma_supported = swiotlb_dma_supported,
+ .mapping_error = swiotlb_dma_mapping_error,
+};
+EXPORT_SYMBOL(noncoherent_swiotlb_dma_ops);
+
+struct dma_map_ops coherent_swiotlb_dma_ops = {
+ .alloc = __dma_alloc_coherent,
+ .free = __dma_free_coherent,
+ .mmap = __swiotlb_mmap_coherent,
.map_page = swiotlb_map_page,
.unmap_page = swiotlb_unmap_page,
.map_sg = swiotlb_map_sg_attrs,
@@ -92,12 +306,47 @@ static struct dma_map_ops arm64_swiotlb_dma_ops = {
.dma_supported = swiotlb_dma_supported,
.mapping_error = swiotlb_dma_mapping_error,
};
+EXPORT_SYMBOL(coherent_swiotlb_dma_ops);
-void __init arm64_swiotlb_init(void)
+static int dma_bus_notifier(struct notifier_block *nb,
+ unsigned long event, void *_dev)
{
- dma_ops = &arm64_swiotlb_dma_ops;
- swiotlb_init(1);
+ struct device *dev = _dev;
+
+ if (event != BUS_NOTIFY_ADD_DEVICE)
+ return NOTIFY_DONE;
+
+ if (of_property_read_bool(dev->of_node, "dma-coherent"))
+ set_dma_ops(dev, &coherent_swiotlb_dma_ops);
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block platform_bus_nb = {
+ .notifier_call = dma_bus_notifier,
+};
+
+static struct notifier_block amba_bus_nb = {
+ .notifier_call = dma_bus_notifier,
+};
+
+extern int swiotlb_late_init_with_default_size(size_t default_size);
+
+static int __init swiotlb_late_init(void)
+{
+ size_t swiotlb_size = min(SZ_64M, MAX_ORDER_NR_PAGES << PAGE_SHIFT);
+
+ /*
+ * These must be registered before of_platform_populate().
+ */
+ bus_register_notifier(&platform_bus_type, &platform_bus_nb);
+ bus_register_notifier(&amba_bustype, &amba_bus_nb);
+
+ dma_ops = &noncoherent_swiotlb_dma_ops;
+
+ return swiotlb_late_init_with_default_size(swiotlb_size);
}
+arch_initcall(swiotlb_late_init);
#define PREALLOC_DMA_DEBUG_ENTRIES 4096
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index d0b4c2efda90..cc3339d0a276 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -30,6 +30,7 @@
#include <linux/memblock.h>
#include <linux/sort.h>
#include <linux/of_fdt.h>
+#include <linux/dma-mapping.h>
#include <linux/dma-contiguous.h>
#include <asm/sections.h>
@@ -59,22 +60,22 @@ static int __init early_initrd(char *p)
early_param("initrd", early_initrd);
#endif
-#define MAX_DMA32_PFN ((4UL * 1024 * 1024 * 1024) >> PAGE_SHIFT)
-
static void __init zone_sizes_init(unsigned long min, unsigned long max)
{
struct memblock_region *reg;
unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES];
- unsigned long max_dma32 = min;
+ unsigned long max_dma = min;
memset(zone_size, 0, sizeof(zone_size));
-#ifdef CONFIG_ZONE_DMA32
/* 4GB maximum for 32-bit only capable devices */
- max_dma32 = max(min, min(max, MAX_DMA32_PFN));
- zone_size[ZONE_DMA32] = max_dma32 - min;
-#endif
- zone_size[ZONE_NORMAL] = max - max_dma32;
+ if (IS_ENABLED(CONFIG_ZONE_DMA)) {
+ unsigned long max_dma_phys =
+ (unsigned long)dma_to_phys(NULL, DMA_BIT_MASK(32) + 1);
+ max_dma = max(min, min(max, max_dma_phys >> PAGE_SHIFT));
+ zone_size[ZONE_DMA] = max_dma - min;
+ }
+ zone_size[ZONE_NORMAL] = max - max_dma;
memcpy(zhole_size, zone_size, sizeof(zhole_size));
@@ -84,15 +85,15 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
if (start >= max)
continue;
-#ifdef CONFIG_ZONE_DMA32
- if (start < max_dma32) {
- unsigned long dma_end = min(end, max_dma32);
- zhole_size[ZONE_DMA32] -= dma_end - start;
+
+ if (IS_ENABLED(CONFIG_ZONE_DMA) && start < max_dma) {
+ unsigned long dma_end = min(end, max_dma);
+ zhole_size[ZONE_DMA] -= dma_end - start;
}
-#endif
- if (end > max_dma32) {
+
+ if (end > max_dma) {
unsigned long normal_end = min(end, max);
- unsigned long normal_start = max(start, max_dma32);
+ unsigned long normal_start = max(start, max_dma);
zhole_size[ZONE_NORMAL] -= normal_end - normal_start;
}
}
@@ -127,20 +128,16 @@ void __init arm64_memblock_init(void)
{
u64 *reserve_map, base, size;
- /* Register the kernel text, kernel data and initrd with memblock */
+ /*
+ * Register the kernel text, kernel data, initrd, and initial
+ * pagetables with memblock.
+ */
memblock_reserve(__pa(_text), _end - _text);
#ifdef CONFIG_BLK_DEV_INITRD
if (initrd_start)
memblock_reserve(__virt_to_phys(initrd_start), initrd_end - initrd_start);
#endif
- /*
- * Reserve the page tables. These are already in use,
- * and can only be in node 0.
- */
- memblock_reserve(__pa(swapper_pg_dir), SWAPPER_DIR_SIZE);
- memblock_reserve(__pa(idmap_pg_dir), IDMAP_DIR_SIZE);
-
/* Reserve the dtb region */
memblock_reserve(virt_to_phys(initial_boot_params),
be32_to_cpu(initial_boot_params->totalsize));
@@ -160,6 +157,7 @@ void __init arm64_memblock_init(void)
memblock_reserve(base, size);
}
+ early_init_fdt_scan_reserved_mem();
dma_contiguous_reserve(0);
memblock_allow_resize();
@@ -261,8 +259,6 @@ static void __init free_unused_memmap(void)
*/
void __init mem_init(void)
{
- arm64_swiotlb_init();
-
max_mapnr = pfn_to_page(max_pfn + PHYS_PFN_OFFSET) - mem_map;
#ifndef CONFIG_SPARSEMEM_VMEMMAP
diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c
index 2bb1d586664c..7ec328392ae0 100644
--- a/arch/arm64/mm/ioremap.c
+++ b/arch/arm64/mm/ioremap.c
@@ -25,6 +25,10 @@
#include <linux/vmalloc.h>
#include <linux/io.h>
+#include <asm/fixmap.h>
+#include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
+
static void __iomem *__ioremap_caller(phys_addr_t phys_addr, size_t size,
pgprot_t prot, void *caller)
{
@@ -98,3 +102,84 @@ void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size)
__builtin_return_address(0));
}
EXPORT_SYMBOL(ioremap_cache);
+
+#ifndef CONFIG_ARM64_64K_PAGES
+static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
+#endif
+
+static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+
+ pgd = pgd_offset_k(addr);
+ BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd));
+
+ pud = pud_offset(pgd, addr);
+ BUG_ON(pud_none(*pud) || pud_bad(*pud));
+
+ return pmd_offset(pud, addr);
+}
+
+static inline pte_t * __init early_ioremap_pte(unsigned long addr)
+{
+ pmd_t *pmd = early_ioremap_pmd(addr);
+
+ BUG_ON(pmd_none(*pmd) || pmd_bad(*pmd));
+
+ return pte_offset_kernel(pmd, addr);
+}
+
+void __init early_ioremap_init(void)
+{
+ pmd_t *pmd;
+
+ pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
+#ifndef CONFIG_ARM64_64K_PAGES
+ /* need to populate pmd for 4k pagesize only */
+ pmd_populate_kernel(&init_mm, pmd, bm_pte);
+#endif
+ /*
+ * The boot-ioremap range spans multiple pmds, for which
+ * we are not prepared:
+ */
+ BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
+ != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
+
+ if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
+ WARN_ON(1);
+ pr_warn("pmd %p != %p\n",
+ pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END)));
+ pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
+ fix_to_virt(FIX_BTMAP_BEGIN));
+ pr_warn("fix_to_virt(FIX_BTMAP_END): %08lx\n",
+ fix_to_virt(FIX_BTMAP_END));
+
+ pr_warn("FIX_BTMAP_END: %d\n", FIX_BTMAP_END);
+ pr_warn("FIX_BTMAP_BEGIN: %d\n",
+ FIX_BTMAP_BEGIN);
+ }
+
+ early_ioremap_setup();
+}
+
+void __init __early_set_fixmap(enum fixed_addresses idx,
+ phys_addr_t phys, pgprot_t flags)
+{
+ unsigned long addr = __fix_to_virt(idx);
+ pte_t *pte;
+
+ if (idx >= __end_of_fixed_addresses) {
+ BUG();
+ return;
+ }
+
+ pte = early_ioremap_pte(addr);
+
+ if (pgprot_val(flags))
+ set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
+ else {
+ pte_clear(&init_mm, addr, pte);
+ flush_tlb_kernel_range(addr, addr+PAGE_SIZE);
+ }
+}
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index f8dc7e8fce6f..a2155ca6921c 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -43,11 +43,6 @@
struct page *empty_zero_page;
EXPORT_SYMBOL(empty_zero_page);
-pgprot_t pgprot_default;
-EXPORT_SYMBOL(pgprot_default);
-
-static pmdval_t prot_sect_kernel;
-
struct cachepolicy {
const char policy[16];
u64 mair;
@@ -122,33 +117,6 @@ static int __init early_cachepolicy(char *p)
}
early_param("cachepolicy", early_cachepolicy);
-/*
- * Adjust the PMD section entries according to the CPU in use.
- */
-static void __init init_mem_pgprot(void)
-{
- pteval_t default_pgprot;
- int i;
-
- default_pgprot = PTE_ATTRINDX(MT_NORMAL);
- prot_sect_kernel = PMD_TYPE_SECT | PMD_SECT_AF | PMD_ATTRINDX(MT_NORMAL);
-
-#ifdef CONFIG_SMP
- /*
- * Mark memory with the "shared" attribute for SMP systems
- */
- default_pgprot |= PTE_SHARED;
- prot_sect_kernel |= PMD_SECT_S;
-#endif
-
- for (i = 0; i < 16; i++) {
- unsigned long v = pgprot_val(protection_map[i]);
- protection_map[i] = __pgprot(v | default_pgprot);
- }
-
- pgprot_default = __pgprot(PTE_TYPE_PAGE | PTE_AF | default_pgprot);
-}
-
pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
unsigned long size, pgprot_t vma_prot)
{
@@ -168,7 +136,8 @@ static void __init *early_alloc(unsigned long sz)
}
static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
- unsigned long end, unsigned long pfn)
+ unsigned long end, unsigned long pfn,
+ pgprot_t prot)
{
pte_t *pte;
@@ -180,16 +149,28 @@ static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
pte = pte_offset_kernel(pmd, addr);
do {
- set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
+ set_pte(pte, pfn_pte(pfn, prot));
pfn++;
} while (pte++, addr += PAGE_SIZE, addr != end);
}
static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
- unsigned long end, phys_addr_t phys)
+ unsigned long end, phys_addr_t phys,
+ int map_io)
{
pmd_t *pmd;
unsigned long next;
+ pmdval_t prot_sect;
+ pgprot_t prot_pte;
+
+ if (map_io) {
+ prot_sect = PMD_TYPE_SECT | PMD_SECT_AF |
+ PMD_ATTRINDX(MT_DEVICE_nGnRE);
+ prot_pte = __pgprot(PROT_DEVICE_nGnRE);
+ } else {
+ prot_sect = PROT_SECT_NORMAL_EXEC;
+ prot_pte = PAGE_KERNEL_EXEC;
+ }
/*
* Check for initial section mappings in the pgd/pud and remove them.
@@ -205,7 +186,7 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
/* try section mapping first */
if (((addr | next | phys) & ~SECTION_MASK) == 0) {
pmd_t old_pmd =*pmd;
- set_pmd(pmd, __pmd(phys | prot_sect_kernel));
+ set_pmd(pmd, __pmd(phys | prot_sect));
/*
* Check for previous table entries created during
* boot (__create_page_tables) and flush them.
@@ -213,21 +194,23 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
if (!pmd_none(old_pmd))
flush_tlb_all();
} else {
- alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys));
+ alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys),
+ prot_pte);
}
phys += next - addr;
} while (pmd++, addr = next, addr != end);
}
static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
- unsigned long end, unsigned long phys)
+ unsigned long end, unsigned long phys,
+ int map_io)
{
pud_t *pud = pud_offset(pgd, addr);
unsigned long next;
do {
next = pud_addr_end(addr, end);
- alloc_init_pmd(pud, addr, next, phys);
+ alloc_init_pmd(pud, addr, next, phys, map_io);
phys += next - addr;
} while (pud++, addr = next, addr != end);
}
@@ -236,70 +219,43 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
* Create the page directory entries and any necessary page tables for the
* mapping specified by 'md'.
*/
-static void __init create_mapping(phys_addr_t phys, unsigned long virt,
- phys_addr_t size)
+static void __init __create_mapping(pgd_t *pgd, phys_addr_t phys,
+ unsigned long virt, phys_addr_t size,
+ int map_io)
{
unsigned long addr, length, end, next;
- pgd_t *pgd;
-
- if (virt < VMALLOC_START) {
- pr_warning("BUG: not creating mapping for 0x%016llx at 0x%016lx - outside kernel range\n",
- phys, virt);
- return;
- }
addr = virt & PAGE_MASK;
length = PAGE_ALIGN(size + (virt & ~PAGE_MASK));
- pgd = pgd_offset_k(addr);
end = addr + length;
do {
next = pgd_addr_end(addr, end);
- alloc_init_pud(pgd, addr, next, phys);
+ alloc_init_pud(pgd, addr, next, phys, map_io);
phys += next - addr;
} while (pgd++, addr = next, addr != end);
}
-#ifdef CONFIG_EARLY_PRINTK
-/*
- * Create an early I/O mapping using the pgd/pmd entries already populated
- * in head.S as this function is called too early to allocated any memory. The
- * mapping size is 2MB with 4KB pages or 64KB or 64KB pages.
- */
-void __iomem * __init early_io_map(phys_addr_t phys, unsigned long virt)
+static void __init create_mapping(phys_addr_t phys, unsigned long virt,
+ phys_addr_t size)
{
- unsigned long size, mask;
- bool page64k = IS_ENABLED(CONFIG_ARM64_64K_PAGES);
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
-
- /*
- * No early pte entries with !ARM64_64K_PAGES configuration, so using
- * sections (pmd).
- */
- size = page64k ? PAGE_SIZE : SECTION_SIZE;
- mask = ~(size - 1);
-
- pgd = pgd_offset_k(virt);
- pud = pud_offset(pgd, virt);
- if (pud_none(*pud))
- return NULL;
- pmd = pmd_offset(pud, virt);
-
- if (page64k) {
- if (pmd_none(*pmd))
- return NULL;
- pte = pte_offset_kernel(pmd, virt);
- set_pte(pte, __pte((phys & mask) | PROT_DEVICE_nGnRE));
- } else {
- set_pmd(pmd, __pmd((phys & mask) | PROT_SECT_DEVICE_nGnRE));
+ if (virt < VMALLOC_START) {
+ pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n",
+ &phys, virt);
+ return;
}
+ __create_mapping(pgd_offset_k(virt & PAGE_MASK), phys, virt, size, 0);
+}
- return (void __iomem *)((virt & mask) + (phys & ~mask));
+void __init create_id_mapping(phys_addr_t addr, phys_addr_t size, int map_io)
+{
+ if ((addr >> PGDIR_SHIFT) >= ARRAY_SIZE(idmap_pg_dir)) {
+ pr_warn("BUG: not creating id mapping for %pa\n", &addr);
+ return;
+ }
+ __create_mapping(&idmap_pg_dir[pgd_index(addr)],
+ addr, addr, size, map_io);
}
-#endif
static void __init map_mem(void)
{
@@ -357,7 +313,6 @@ void __init paging_init(void)
{
void *zero_page;
- init_mem_pgprot();
map_mem();
/*
@@ -416,6 +371,9 @@ int kern_addr_valid(unsigned long addr)
if (pmd_none(*pmd))
return 0;
+ if (pmd_sect(*pmd))
+ return pfn_valid(pmd_pfn(*pmd));
+
pte = pte_offset_kernel(pmd, addr);
if (pte_none(*pte))
return 0;
@@ -456,7 +414,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
if (!p)
return -ENOMEM;
- set_pmd(pmd, __pmd(__pa(p) | prot_sect_kernel));
+ set_pmd(pmd, __pmd(__pa(p) | PROT_SECT_NORMAL));
} else
vmemmap_verify((pte_t *)pmd, node, addr, next);
} while (addr = next, addr != end);
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 1333e6f9a8e5..e085ee6ef4e2 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -173,12 +173,6 @@ ENDPROC(cpu_do_switch_mm)
* value of the SCTLR_EL1 register.
*/
ENTRY(__cpu_setup)
- /*
- * Preserve the link register across the function call.
- */
- mov x28, lr
- bl __flush_dcache_all
- mov lr, x28
ic iallu // I+BTB cache invalidate
tlbi vmalle1is // invalidate I + D TLBs
dsb sy
@@ -215,8 +209,14 @@ ENTRY(__cpu_setup)
* Set/prepare TCR and TTBR. We use 512GB (39-bit) address range for
* both user and kernel.
*/
- ldr x10, =TCR_TxSZ(VA_BITS) | TCR_FLAGS | TCR_IPS_40BIT | \
+ ldr x10, =TCR_TxSZ(VA_BITS) | TCR_FLAGS | \
TCR_ASID16 | TCR_TBI0 | (1 << 31)
+ /*
+ * Read the PARange bits from ID_AA64MMFR0_EL1 and set the IPS bits in
+ * TCR_EL1.
+ */
+ mrs x9, ID_AA64MMFR0_EL1
+ bfi x10, x9, #32, #3
#ifdef CONFIG_ARM64_64K_PAGES
orr x10, x10, TCR_TG0_64K
orr x10, x10, TCR_TG1_64K
diff --git a/arch/arm64/mm/tlb.S b/arch/arm64/mm/tlb.S
deleted file mode 100644
index 19da91e0cd27..000000000000
--- a/arch/arm64/mm/tlb.S
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Based on arch/arm/mm/tlb.S
- *
- * Copyright (C) 1997-2002 Russell King
- * Copyright (C) 2012 ARM Ltd.
- * Written by Catalin Marinas <catalin.marinas@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/asm-offsets.h>
-#include <asm/page.h>
-#include <asm/tlbflush.h>
-#include "proc-macros.S"
-
-/*
- * __cpu_flush_user_tlb_range(start, end, vma)
- *
- * Invalidate a range of TLB entries in the specified address space.
- *
- * - start - start address (may not be aligned)
- * - end - end address (exclusive, may not be aligned)
- * - vma - vma_struct describing address range
- */
-ENTRY(__cpu_flush_user_tlb_range)
- vma_vm_mm x3, x2 // get vma->vm_mm
- mmid w3, x3 // get vm_mm->context.id
- dsb sy
- lsr x0, x0, #12 // align address
- lsr x1, x1, #12
- bfi x0, x3, #48, #16 // start VA and ASID
- bfi x1, x3, #48, #16 // end VA and ASID
-1: tlbi vae1is, x0 // TLB invalidate by address and ASID
- add x0, x0, #1
- cmp x0, x1
- b.lo 1b
- dsb sy
- ret
-ENDPROC(__cpu_flush_user_tlb_range)
-
-/*
- * __cpu_flush_kern_tlb_range(start,end)
- *
- * Invalidate a range of kernel TLB entries.
- *
- * - start - start address (may not be aligned)
- * - end - end address (exclusive, may not be aligned)
- */
-ENTRY(__cpu_flush_kern_tlb_range)
- dsb sy
- lsr x0, x0, #12 // align address
- lsr x1, x1, #12
-1: tlbi vaae1is, x0 // TLB invalidate by address
- add x0, x0, #1
- cmp x0, x1
- b.lo 1b
- dsb sy
- isb
- ret
-ENDPROC(__cpu_flush_kern_tlb_range)