aboutsummaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig47
-rw-r--r--arch/x86/Makefile2
-rw-r--r--arch/x86/boot/compressed/cmdline.c2
-rw-r--r--arch/x86/boot/compressed/eboot.c112
-rw-r--r--arch/x86/boot/compressed/head_64.S80
-rw-r--r--arch/x86/boot/compressed/kaslr.c4
-rw-r--r--arch/x86/boot/compressed/misc.h6
-rw-r--r--arch/x86/boot/compressed/pgtable_64.c28
-rw-r--r--arch/x86/crypto/Makefile25
-rw-r--r--arch/x86/crypto/aegis128-aesni-asm.S749
-rw-r--r--arch/x86/crypto/aegis128-aesni-glue.c407
-rw-r--r--arch/x86/crypto/aegis128l-aesni-asm.S825
-rw-r--r--arch/x86/crypto/aegis128l-aesni-glue.c407
-rw-r--r--arch/x86/crypto/aegis256-aesni-asm.S702
-rw-r--r--arch/x86/crypto/aegis256-aesni-glue.c407
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_glue.c2
-rw-r--r--arch/x86/crypto/morus1280-avx2-asm.S621
-rw-r--r--arch/x86/crypto/morus1280-avx2-glue.c68
-rw-r--r--arch/x86/crypto/morus1280-sse2-asm.S895
-rw-r--r--arch/x86/crypto/morus1280-sse2-glue.c68
-rw-r--r--arch/x86/crypto/morus1280_glue.c302
-rw-r--r--arch/x86/crypto/morus640-sse2-asm.S614
-rw-r--r--arch/x86/crypto/morus640-sse2-glue.c68
-rw-r--r--arch/x86/crypto/morus640_glue.c298
-rw-r--r--arch/x86/crypto/salsa20-i586-asm_32.S938
-rw-r--r--arch/x86/crypto/salsa20-x86_64-asm_64.S805
-rw-r--r--arch/x86/crypto/salsa20_glue.c91
-rw-r--r--arch/x86/entry/entry_64_compat.S8
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl1
-rw-r--r--arch/x86/entry/syscalls/syscall_64.tbl1
-rw-r--r--arch/x86/entry/vdso/Makefile11
-rw-r--r--arch/x86/entry/vdso/vdso32/vdso-fakesections.c1
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_64.c2
-rw-r--r--arch/x86/events/amd/ibs.c2
-rw-r--r--arch/x86/events/amd/uncore.c21
-rw-r--r--arch/x86/events/core.c10
-rw-r--r--arch/x86/events/intel/core.c9
-rw-r--r--arch/x86/events/intel/cstate.c2
-rw-r--r--arch/x86/events/intel/pt.c4
-rw-r--r--arch/x86/events/intel/uncore.c76
-rw-r--r--arch/x86/events/intel/uncore.h127
-rw-r--r--arch/x86/events/intel/uncore_nhmex.c2
-rw-r--r--arch/x86/events/intel/uncore_snb.c101
-rw-r--r--arch/x86/events/intel/uncore_snbep.c119
-rw-r--r--arch/x86/events/msr.c9
-rw-r--r--arch/x86/hyperv/Makefile3
-rw-r--r--arch/x86/hyperv/hv_apic.c256
-rw-r--r--arch/x86/hyperv/hv_init.c32
-rw-r--r--arch/x86/hyperv/mmu.c75
-rw-r--r--arch/x86/include/asm/asm.h2
-rw-r--r--arch/x86/include/asm/cacheinfo.h7
-rw-r--r--arch/x86/include/asm/compat.h43
-rw-r--r--arch/x86/include/asm/cpufeature.h15
-rw-r--r--arch/x86/include/asm/cpufeatures.h21
-rw-r--r--arch/x86/include/asm/dma-mapping.h5
-rw-r--r--arch/x86/include/asm/ftrace.h21
-rw-r--r--arch/x86/include/asm/hardirq.h8
-rw-r--r--arch/x86/include/asm/hyperv-tlfs.h30
-rw-r--r--arch/x86/include/asm/insn.h18
-rw-r--r--arch/x86/include/asm/intel_mid_vrtc.h4
-rw-r--r--arch/x86/include/asm/io.h8
-rw-r--r--arch/x86/include/asm/irq_vectors.h7
-rw-r--r--arch/x86/include/asm/jailhouse_para.h2
-rw-r--r--arch/x86/include/asm/kvm_host.h3
-rw-r--r--arch/x86/include/asm/mc146818rtc.h4
-rw-r--r--arch/x86/include/asm/mmu_context.h17
-rw-r--r--arch/x86/include/asm/mshyperv.h44
-rw-r--r--arch/x86/include/asm/msr-index.h13
-rw-r--r--arch/x86/include/asm/nospec-branch.h84
-rw-r--r--arch/x86/include/asm/page_64_types.h2
-rw-r--r--arch/x86/include/asm/paravirt.h4
-rw-r--r--arch/x86/include/asm/pci.h3
-rw-r--r--arch/x86/include/asm/pgalloc.h4
-rw-r--r--arch/x86/include/asm/pgtable.h17
-rw-r--r--arch/x86/include/asm/pgtable_32_types.h2
-rw-r--r--arch/x86/include/asm/pgtable_64.h2
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h33
-rw-r--r--arch/x86/include/asm/pkeys.h31
-rw-r--r--arch/x86/include/asm/processor.h11
-rw-r--r--arch/x86/include/asm/pvclock.h2
-rw-r--r--arch/x86/include/asm/qspinlock.h21
-rw-r--r--arch/x86/include/asm/qspinlock_paravirt.h3
-rw-r--r--arch/x86/include/asm/smp.h1
-rw-r--r--arch/x86/include/asm/sparsemem.h4
-rw-r--r--arch/x86/include/asm/spec-ctrl.h80
-rw-r--r--arch/x86/include/asm/stacktrace.h2
-rw-r--r--arch/x86/include/asm/string_64.h10
-rw-r--r--arch/x86/include/asm/thread_info.h4
-rw-r--r--arch/x86/include/asm/uaccess_64.h14
-rw-r--r--arch/x86/include/asm/x86_init.h6
-rw-r--r--arch/x86/include/uapi/asm/kvm_para.h2
-rw-r--r--arch/x86/include/uapi/asm/msgbuf.h31
-rw-r--r--arch/x86/include/uapi/asm/sembuf.h11
-rw-r--r--arch/x86/include/uapi/asm/shmbuf.h42
-rw-r--r--arch/x86/kernel/acpi/boot.c4
-rw-r--r--arch/x86/kernel/amd_nb.c6
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c1
-rw-r--r--arch/x86/kernel/apm_32.c17
-rw-r--r--arch/x86/kernel/cpu/Makefile2
-rw-r--r--arch/x86/kernel/cpu/amd.c58
-rw-r--r--arch/x86/kernel/cpu/bugs.c397
-rw-r--r--arch/x86/kernel/cpu/cacheinfo.c (renamed from arch/x86/kernel/cpu/intel_cacheinfo.c)46
-rw-r--r--arch/x86/kernel/cpu/centaur.c53
-rw-r--r--arch/x86/kernel/cpu/common.c108
-rw-r--r--arch/x86/kernel/cpu/cpu.h12
-rw-r--r--arch/x86/kernel/cpu/intel.c40
-rw-r--r--arch/x86/kernel/cpu/intel_rdt.c50
-rw-r--r--arch/x86/kernel/cpu/intel_rdt.h18
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c24
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_monitor.c170
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_rdtgroup.c33
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c18
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c44
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c6
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/Makefile2
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.c (renamed from arch/x86/kernel/cpu/mtrr/main.c)37
-rw-r--r--arch/x86/kernel/cpu/topology.c8
-rw-r--r--arch/x86/kernel/dumpstack.c144
-rw-r--r--arch/x86/kernel/e820.c32
-rw-r--r--arch/x86/kernel/early-quirks.c8
-rw-r--r--arch/x86/kernel/head64.c35
-rw-r--r--arch/x86/kernel/hpet.c5
-rw-r--r--arch/x86/kernel/jailhouse.c4
-rw-r--r--arch/x86/kernel/kexec-bzimage64.c5
-rw-r--r--arch/x86/kernel/kprobes/core.c4
-rw-r--r--arch/x86/kernel/kvm.c8
-rw-r--r--arch/x86/kernel/kvmclock.c4
-rw-r--r--arch/x86/kernel/ldt.c2
-rw-r--r--arch/x86/kernel/machine_kexec_32.c6
-rw-r--r--arch/x86/kernel/machine_kexec_64.c8
-rw-r--r--arch/x86/kernel/pci-dma.c58
-rw-r--r--arch/x86/kernel/pci-nommu.c90
-rw-r--r--arch/x86/kernel/perf_regs.c10
-rw-r--r--arch/x86/kernel/process.c146
-rw-r--r--arch/x86/kernel/process_32.c8
-rw-r--r--arch/x86/kernel/process_64.c1
-rw-r--r--arch/x86/kernel/ptrace.c2
-rw-r--r--arch/x86/kernel/pvclock.c15
-rw-r--r--arch/x86/kernel/rtc.c10
-rw-r--r--arch/x86/kernel/setup.c14
-rw-r--r--arch/x86/kernel/signal_compat.c2
-rw-r--r--arch/x86/kernel/smpboot.c59
-rw-r--r--arch/x86/kernel/sys_x86_64.c2
-rw-r--r--arch/x86/kernel/traps.c3
-rw-r--r--arch/x86/kernel/tsc.c24
-rw-r--r--arch/x86/kernel/umip.c1
-rw-r--r--arch/x86/kernel/uprobes.c8
-rw-r--r--arch/x86/kernel/vmlinux.lds.S4
-rw-r--r--arch/x86/kvm/cpuid.c26
-rw-r--r--arch/x86/kvm/hyperv.c23
-rw-r--r--arch/x86/kvm/lapic.c53
-rw-r--r--arch/x86/kvm/mmu.c1
-rw-r--r--arch/x86/kvm/svm.c97
-rw-r--r--arch/x86/kvm/vmx.c168
-rw-r--r--arch/x86/kvm/x86.c47
-rw-r--r--arch/x86/kvm/x86.h7
-rw-r--r--arch/x86/lib/memcpy_64.S102
-rw-r--r--arch/x86/lib/usercopy_64.c30
-rw-r--r--arch/x86/mm/dump_pagetables.c17
-rw-r--r--arch/x86/mm/fault.c12
-rw-r--r--arch/x86/mm/ident_map.c2
-rw-r--r--arch/x86/mm/init_64.c8
-rw-r--r--arch/x86/mm/kasan_init_64.c14
-rw-r--r--arch/x86/mm/kaslr.c8
-rw-r--r--arch/x86/mm/numa.c22
-rw-r--r--arch/x86/mm/pageattr.c44
-rw-r--r--arch/x86/mm/pkeys.c21
-rw-r--r--arch/x86/mm/pti.c26
-rw-r--r--arch/x86/mm/tlb.c2
-rw-r--r--arch/x86/net/Makefile7
-rw-r--r--arch/x86/net/bpf_jit.S154
-rw-r--r--arch/x86/net/bpf_jit_comp.c361
-rw-r--r--arch/x86/net/bpf_jit_comp32.c2419
-rw-r--r--arch/x86/platform/efi/efi_64.c2
-rw-r--r--arch/x86/platform/intel-mid/intel_mid_vrtc.c12
-rw-r--r--arch/x86/power/hibernate_64.c4
-rw-r--r--arch/x86/um/Kconfig10
-rw-r--r--arch/x86/um/vdso/Makefile4
-rw-r--r--arch/x86/xen/efi.c57
-rw-r--r--arch/x86/xen/enlighten_hvm.c13
-rw-r--r--arch/x86/xen/enlighten_pv.c86
-rw-r--r--arch/x86/xen/mmu.c4
-rw-r--r--arch/x86/xen/mmu_pv.c4
-rw-r--r--arch/x86/xen/time.c10
186 files changed, 12271 insertions, 3629 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 00fcf81f2c56..cb6e3a219294 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1,8 +1,8 @@
# SPDX-License-Identifier: GPL-2.0
# Select 32 or 64 bit
config 64BIT
- bool "64-bit kernel" if ARCH = "x86"
- default ARCH != "i386"
+ bool "64-bit kernel" if "$(ARCH)" = "x86"
+ default "$(ARCH)" != "i386"
---help---
Say yes to build a 64-bit kernel - formerly known as x86_64
Say no to build a 32-bit kernel - formerly known as i386
@@ -28,6 +28,8 @@ config X86_64
select ARCH_USE_CMPXCHG_LOCKREF
select HAVE_ARCH_SOFT_DIRTY
select MODULES_USE_ELF_RELA
+ select NEED_DMA_MAP_STATE
+ select SWIOTLB
select X86_DEV_DMA_OPS
select ARCH_HAS_SYSCALL_WRAPPER
@@ -52,6 +54,7 @@ config X86
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_FAST_MULTIPLIER
+ select ARCH_HAS_FILTER_PGPROT
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_KCOV if X86_64
@@ -59,6 +62,7 @@ config X86
select ARCH_HAS_PMEM_API if X86_64
select ARCH_HAS_REFCOUNT
select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64
+ select ARCH_HAS_UACCESS_MCSAFE if X86_64
select ARCH_HAS_SET_MEMORY
select ARCH_HAS_SG_CHAIN
select ARCH_HAS_STRICT_KERNEL_RWX
@@ -133,11 +137,10 @@ config X86
select HAVE_C_RECORDMCOUNT
select HAVE_DEBUG_KMEMLEAK
select HAVE_DEBUG_STACKOVERFLOW
- select HAVE_DMA_API_DEBUG
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_REGS
- select HAVE_EBPF_JIT if X86_64
+ select HAVE_EBPF_JIT
select HAVE_EFFICIENT_UNALIGNED_ACCESS
select HAVE_EXIT_THREAD
select HAVE_FENTRY if X86_64 || DYNAMIC_FTRACE
@@ -183,6 +186,7 @@ config X86
select HAVE_UNSTABLE_SCHED_CLOCK
select HAVE_USER_RETURN_NOTIFIER
select IRQ_FORCED_THREADING
+ select NEED_SG_DMA_LENGTH
select PCI_LOCKLESS_CONFIG
select PERF_EVENTS
select RTC_LIB
@@ -235,13 +239,6 @@ config ARCH_MMAP_RND_COMPAT_BITS_MAX
config SBUS
bool
-config NEED_DMA_MAP_STATE
- def_bool y
- depends on X86_64 || INTEL_IOMMU || DMA_API_DEBUG || SWIOTLB
-
-config NEED_SG_DMA_LENGTH
- def_bool y
-
config GENERIC_ISA_DMA
def_bool y
depends on ISA_DMA_API
@@ -273,6 +270,9 @@ config ARCH_HAS_CPU_RELAX
config ARCH_HAS_CACHE_LINE_SIZE
def_bool y
+config ARCH_HAS_FILTER_PGPROT
+ def_bool y
+
config HAVE_SETUP_PER_CPU_AREA
def_bool y
@@ -871,6 +871,7 @@ config DMI
config GART_IOMMU
bool "Old AMD GART IOMMU support"
+ select IOMMU_HELPER
select SWIOTLB
depends on X86_64 && PCI && AMD_NB
---help---
@@ -892,6 +893,7 @@ config GART_IOMMU
config CALGARY_IOMMU
bool "IBM Calgary IOMMU support"
+ select IOMMU_HELPER
select SWIOTLB
depends on X86_64 && PCI
---help---
@@ -919,20 +921,6 @@ config CALGARY_IOMMU_ENABLED_BY_DEFAULT
Calgary anyway, pass 'iommu=calgary' on the kernel command line.
If unsure, say Y.
-# need this always selected by IOMMU for the VIA workaround
-config SWIOTLB
- def_bool y if X86_64
- ---help---
- Support for software bounce buffers used on x86-64 systems
- which don't have a hardware IOMMU. Using this PCI devices
- which can only access 32-bits of memory can be used on systems
- with more than 3 GB of memory.
- If unsure, say Y.
-
-config IOMMU_HELPER
- def_bool y
- depends on CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU
-
config MAXSMP
bool "Enable Maximum number of SMP Processors and NUMA Nodes"
depends on X86_64 && SMP && DEBUG_KERNEL
@@ -1454,6 +1442,7 @@ config HIGHMEM
config X86_PAE
bool "PAE (Physical Address Extension) Support"
depends on X86_32 && !HIGHMEM4G
+ select PHYS_ADDR_T_64BIT
select SWIOTLB
---help---
PAE is required for NX support, and furthermore enables
@@ -1481,14 +1470,6 @@ config X86_5LEVEL
Say N if unsure.
-config ARCH_PHYS_ADDR_T_64BIT
- def_bool y
- depends on X86_64 || X86_PAE
-
-config ARCH_DMA_ADDR_T_64BIT
- def_bool y
- depends on X86_64 || HIGHMEM64G
-
config X86_DIRECT_GBPAGES
def_bool y
depends on X86_64 && !DEBUG_PAGEALLOC
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 60135cbd905c..f0a6ea22429d 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -94,7 +94,7 @@ ifeq ($(CONFIG_X86_32),y)
else
BITS := 64
UTS_MACHINE := x86_64
- CHECKFLAGS += -D__x86_64__ -m64
+ CHECKFLAGS += -D__x86_64__
biarch := -m64
KBUILD_AFLAGS += -m64
diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c
index 0cb325734cfb..af6cda0b7900 100644
--- a/arch/x86/boot/compressed/cmdline.c
+++ b/arch/x86/boot/compressed/cmdline.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include "misc.h"
-#if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE
+#if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE || CONFIG_X86_5LEVEL
static unsigned long fs;
static inline void set_fs(unsigned long seg)
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 47d3efff6805..a8a8642d2b0b 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -109,23 +109,34 @@ void efi_char16_printk(efi_system_table_t *table, efi_char16_t *str)
}
static efi_status_t
-__setup_efi_pci32(efi_pci_io_protocol_32 *pci, struct pci_setup_rom **__rom)
+__setup_efi_pci(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom)
{
struct pci_setup_rom *rom = NULL;
efi_status_t status;
unsigned long size;
- uint64_t attributes;
+ uint64_t attributes, romsize;
+ void *romimage;
- status = efi_early->call(pci->attributes, pci,
- EfiPciIoAttributeOperationGet, 0, 0,
- &attributes);
+ status = efi_call_proto(efi_pci_io_protocol, attributes, pci,
+ EfiPciIoAttributeOperationGet, 0, 0,
+ &attributes);
if (status != EFI_SUCCESS)
return status;
- if (!pci->romimage || !pci->romsize)
+ /*
+ * Some firmware images contain EFI function pointers at the place where the
+ * romimage and romsize fields are supposed to be. Typically the EFI
+ * code is mapped at high addresses, translating to an unrealistically
+ * large romsize. The UEFI spec limits the size of option ROMs to 16
+ * MiB so we reject any ROMs over 16 MiB in size to catch this.
+ */
+ romimage = (void *)(unsigned long)efi_table_attr(efi_pci_io_protocol,
+ romimage, pci);
+ romsize = efi_table_attr(efi_pci_io_protocol, romsize, pci);
+ if (!romimage || !romsize || romsize > SZ_16M)
return EFI_INVALID_PARAMETER;
- size = pci->romsize + sizeof(*rom);
+ size = romsize + sizeof(*rom);
status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom);
if (status != EFI_SUCCESS) {
@@ -141,29 +152,32 @@ __setup_efi_pci32(efi_pci_io_protocol_32 *pci, struct pci_setup_rom **__rom)
rom->pcilen = pci->romsize;
*__rom = rom;
- status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16,
- PCI_VENDOR_ID, 1, &(rom->vendor));
+ status = efi_call_proto(efi_pci_io_protocol, pci.read, pci,
+ EfiPciIoWidthUint16, PCI_VENDOR_ID, 1,
+ &rom->vendor);
if (status != EFI_SUCCESS) {
efi_printk(sys_table, "Failed to read rom->vendor\n");
goto free_struct;
}
- status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16,
- PCI_DEVICE_ID, 1, &(rom->devid));
+ status = efi_call_proto(efi_pci_io_protocol, pci.read, pci,
+ EfiPciIoWidthUint16, PCI_DEVICE_ID, 1,
+ &rom->devid);
if (status != EFI_SUCCESS) {
efi_printk(sys_table, "Failed to read rom->devid\n");
goto free_struct;
}
- status = efi_early->call(pci->get_location, pci, &(rom->segment),
- &(rom->bus), &(rom->device), &(rom->function));
+ status = efi_call_proto(efi_pci_io_protocol, get_location, pci,
+ &rom->segment, &rom->bus, &rom->device,
+ &rom->function);
if (status != EFI_SUCCESS)
goto free_struct;
- memcpy(rom->romdata, pci->romimage, pci->romsize);
+ memcpy(rom->romdata, romimage, romsize);
return status;
free_struct:
@@ -175,7 +189,7 @@ static void
setup_efi_pci32(struct boot_params *params, void **pci_handle,
unsigned long size)
{
- efi_pci_io_protocol_32 *pci = NULL;
+ efi_pci_io_protocol_t *pci = NULL;
efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID;
u32 *handles = (u32 *)(unsigned long)pci_handle;
efi_status_t status;
@@ -202,7 +216,7 @@ setup_efi_pci32(struct boot_params *params, void **pci_handle,
if (!pci)
continue;
- status = __setup_efi_pci32(pci, &rom);
+ status = __setup_efi_pci(pci, &rom);
if (status != EFI_SUCCESS)
continue;
@@ -216,73 +230,11 @@ setup_efi_pci32(struct boot_params *params, void **pci_handle,
}
}
-static efi_status_t
-__setup_efi_pci64(efi_pci_io_protocol_64 *pci, struct pci_setup_rom **__rom)
-{
- struct pci_setup_rom *rom;
- efi_status_t status;
- unsigned long size;
- uint64_t attributes;
-
- status = efi_early->call(pci->attributes, pci,
- EfiPciIoAttributeOperationGet, 0,
- &attributes);
- if (status != EFI_SUCCESS)
- return status;
-
- if (!pci->romimage || !pci->romsize)
- return EFI_INVALID_PARAMETER;
-
- size = pci->romsize + sizeof(*rom);
-
- status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom);
- if (status != EFI_SUCCESS) {
- efi_printk(sys_table, "Failed to alloc mem for rom\n");
- return status;
- }
-
- rom->data.type = SETUP_PCI;
- rom->data.len = size - sizeof(struct setup_data);
- rom->data.next = 0;
- rom->pcilen = pci->romsize;
- *__rom = rom;
-
- status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16,
- PCI_VENDOR_ID, 1, &(rom->vendor));
-
- if (status != EFI_SUCCESS) {
- efi_printk(sys_table, "Failed to read rom->vendor\n");
- goto free_struct;
- }
-
- status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16,
- PCI_DEVICE_ID, 1, &(rom->devid));
-
- if (status != EFI_SUCCESS) {
- efi_printk(sys_table, "Failed to read rom->devid\n");
- goto free_struct;
- }
-
- status = efi_early->call(pci->get_location, pci, &(rom->segment),
- &(rom->bus), &(rom->device), &(rom->function));
-
- if (status != EFI_SUCCESS)
- goto free_struct;
-
- memcpy(rom->romdata, pci->romimage, pci->romsize);
- return status;
-
-free_struct:
- efi_call_early(free_pool, rom);
- return status;
-
-}
-
static void
setup_efi_pci64(struct boot_params *params, void **pci_handle,
unsigned long size)
{
- efi_pci_io_protocol_64 *pci = NULL;
+ efi_pci_io_protocol_t *pci = NULL;
efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID;
u64 *handles = (u64 *)(unsigned long)pci_handle;
efi_status_t status;
@@ -309,7 +261,7 @@ setup_efi_pci64(struct boot_params *params, void **pci_handle,
if (!pci)
continue;
- status = __setup_efi_pci64(pci, &rom);
+ status = __setup_efi_pci(pci, &rom);
if (status != EFI_SUCCESS)
continue;
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index fca012baba19..64037895b085 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -306,6 +306,25 @@ ENTRY(startup_64)
leaq boot_stack_end(%rbx), %rsp
/*
+ * paging_prepare() and cleanup_trampoline() below can have GOT
+ * references. Adjust the table with address we are running at.
+ *
+ * Zero RAX for adjust_got: the GOT was not adjusted before;
+ * there's no adjustment to undo.
+ */
+ xorq %rax, %rax
+
+ /*
+ * Calculate the address the binary is loaded at and use it as
+ * a GOT adjustment.
+ */
+ call 1f
+1: popq %rdi
+ subq $1b, %rdi
+
+ call adjust_got
+
+ /*
* At this point we are in long mode with 4-level paging enabled,
* but we might want to enable 5-level paging or vice versa.
*
@@ -346,6 +365,7 @@ ENTRY(startup_64)
* this function call.
*/
pushq %rsi
+ movq %rsi, %rdi /* real mode address */
call paging_prepare
popq %rsi
@@ -370,10 +390,14 @@ trampoline_return:
/*
* cleanup_trampoline() would restore trampoline memory.
*
+ * RDI is address of the page table to use instead of page table
+ * in trampoline memory (if required).
+ *
* RSI holds real mode data and needs to be preserved across
* this function call.
*/
pushq %rsi
+ leaq top_pgtable(%rbx), %rdi
call cleanup_trampoline
popq %rsi
@@ -381,6 +405,21 @@ trampoline_return:
pushq $0
popfq
+ /*
+ * Previously we've adjusted the GOT with address the binary was
+ * loaded at. Now we need to re-adjust for relocation address.
+ *
+ * Calculate the address the binary is loaded at, so that we can
+ * undo the previous GOT adjustment.
+ */
+ call 1f
+1: popq %rax
+ subq $1b, %rax
+
+ /* The new adjustment is the relocation address */
+ movq %rbx, %rdi
+ call adjust_got
+
/*
* Copy the compressed kernel to the end of our buffer
* where decompression in place becomes safe.
@@ -482,19 +521,6 @@ relocated:
rep stosq
/*
- * Adjust our own GOT
- */
- leaq _got(%rip), %rdx
- leaq _egot(%rip), %rcx
-1:
- cmpq %rcx, %rdx
- jae 2f
- addq %rbx, (%rdx)
- addq $8, %rdx
- jmp 1b
-2:
-
-/*
* Do the extraction, and jump to the new kernel..
*/
pushq %rsi /* Save the real mode argument */
@@ -512,6 +538,27 @@ relocated:
*/
jmp *%rax
+/*
+ * Adjust the global offset table
+ *
+ * RAX is the previous adjustment of the table to undo (use 0 if it's the
+ * first time we touch GOT).
+ * RDI is the new adjustment to apply.
+ */
+adjust_got:
+ /* Walk through the GOT adding the address to the entries */
+ leaq _got(%rip), %rdx
+ leaq _egot(%rip), %rcx
+1:
+ cmpq %rcx, %rdx
+ jae 2f
+ subq %rax, (%rdx) /* Undo previous adjustment */
+ addq %rdi, (%rdx) /* Apply the new adjustment */
+ addq $8, %rdx
+ jmp 1b
+2:
+ ret
+
.code32
/*
* This is the 32-bit trampoline that will be copied over to low memory.
@@ -649,3 +696,10 @@ boot_stack_end:
.balign 4096
pgtable:
.fill BOOT_PGT_SIZE, 1, 0
+
+/*
+ * The page table is going to be used instead of page table in the trampoline
+ * memory.
+ */
+top_pgtable:
+ .fill PAGE_SIZE, 1, 0
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index a0a50b91ecef..b87a7582853d 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -47,7 +47,7 @@
#include <linux/decompress/mm.h>
#ifdef CONFIG_X86_5LEVEL
-unsigned int pgtable_l5_enabled __ro_after_init;
+unsigned int __pgtable_l5_enabled;
unsigned int pgdir_shift __ro_after_init = 39;
unsigned int ptrs_per_p4d __ro_after_init = 1;
#endif
@@ -734,7 +734,7 @@ void choose_random_location(unsigned long input,
#ifdef CONFIG_X86_5LEVEL
if (__read_cr4() & X86_CR4_LA57) {
- pgtable_l5_enabled = 1;
+ __pgtable_l5_enabled = 1;
pgdir_shift = 48;
ptrs_per_p4d = 512;
}
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 9e11be4cae19..a423bdb42686 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -12,10 +12,8 @@
#undef CONFIG_PARAVIRT_SPINLOCKS
#undef CONFIG_KASAN
-#ifdef CONFIG_X86_5LEVEL
-/* cpu_feature_enabled() cannot be used that early */
-#define pgtable_l5_enabled __pgtable_l5_enabled
-#endif
+/* cpu_feature_enabled() cannot be used this early */
+#define USE_EARLY_PGTABLE_L5
#include <linux/linkage.h>
#include <linux/screen_info.h>
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
index 32af1cbcd903..8c5107545251 100644
--- a/arch/x86/boot/compressed/pgtable_64.c
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -23,14 +23,6 @@ struct paging_config {
static char trampoline_save[TRAMPOLINE_32BIT_SIZE];
/*
- * The page table is going to be used instead of page table in the trampoline
- * memory.
- *
- * It must not be in BSS as BSS is cleared after cleanup_trampoline().
- */
-static char top_pgtable[PAGE_SIZE] __aligned(PAGE_SIZE) __section(.data);
-
-/*
* Trampoline address will be printed by extract_kernel() for debugging
* purposes.
*
@@ -39,16 +31,23 @@ static char top_pgtable[PAGE_SIZE] __aligned(PAGE_SIZE) __section(.data);
*/
unsigned long *trampoline_32bit __section(.data);
-struct paging_config paging_prepare(void)
+extern struct boot_params *boot_params;
+int cmdline_find_option_bool(const char *option);
+
+struct paging_config paging_prepare(void *rmode)
{
struct paging_config paging_config = {};
unsigned long bios_start, ebda_start;
+ /* Initialize boot_params. Required for cmdline_find_option_bool(). */
+ boot_params = rmode;
+
/*
* Check if LA57 is desired and supported.
*
- * There are two parts to the check:
+ * There are several parts to the check:
* - if the kernel supports 5-level paging: CONFIG_X86_5LEVEL=y
+ * - if user asked to disable 5-level paging: no5lvl in cmdline
* - if the machine supports 5-level paging:
* + CPUID leaf 7 is supported
* + the leaf has the feature bit set
@@ -56,6 +55,7 @@ struct paging_config paging_prepare(void)
* That's substitute for boot_cpu_has() in early boot code.
*/
if (IS_ENABLED(CONFIG_X86_5LEVEL) &&
+ !cmdline_find_option_bool("no5lvl") &&
native_cpuid_eax(0) >= 7 &&
(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) {
paging_config.l5_required = 1;
@@ -134,19 +134,19 @@ out:
return paging_config;
}
-void cleanup_trampoline(void)
+void cleanup_trampoline(void *pgtable)
{
void *trampoline_pgtable;
- trampoline_pgtable = trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET;
+ trampoline_pgtable = trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long);
/*
* Move the top level page table out of trampoline memory,
* if it's there.
*/
if ((void *)__native_read_cr3() == trampoline_pgtable) {
- memcpy(top_pgtable, trampoline_pgtable, PAGE_SIZE);
- native_write_cr3((unsigned long)top_pgtable);
+ memcpy(pgtable, trampoline_pgtable, PAGE_SIZE);
+ native_write_cr3((unsigned long)pgtable);
}
/* Restore trampoline memory */
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 5f07333bb224..a450ad573dcb 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -15,7 +15,6 @@ obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o
obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
-obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o
obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
@@ -24,7 +23,6 @@ obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o
-obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o
obj-$(CONFIG_CRYPTO_CHACHA20_X86_64) += chacha20-x86_64.o
obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o
obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
@@ -38,6 +36,16 @@ obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o
obj-$(CONFIG_CRYPTO_CRCT10DIF_PCLMUL) += crct10dif-pclmul.o
obj-$(CONFIG_CRYPTO_POLY1305_X86_64) += poly1305-x86_64.o
+obj-$(CONFIG_CRYPTO_AEGIS128_AESNI_SSE2) += aegis128-aesni.o
+obj-$(CONFIG_CRYPTO_AEGIS128L_AESNI_SSE2) += aegis128l-aesni.o
+obj-$(CONFIG_CRYPTO_AEGIS256_AESNI_SSE2) += aegis256-aesni.o
+
+obj-$(CONFIG_CRYPTO_MORUS640_GLUE) += morus640_glue.o
+obj-$(CONFIG_CRYPTO_MORUS1280_GLUE) += morus1280_glue.o
+
+obj-$(CONFIG_CRYPTO_MORUS640_SSE2) += morus640-sse2.o
+obj-$(CONFIG_CRYPTO_MORUS1280_SSE2) += morus1280-sse2.o
+
# These modules require assembler to support AVX.
ifeq ($(avx_supported),yes)
obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64) += \
@@ -55,11 +63,12 @@ ifeq ($(avx2_supported),yes)
obj-$(CONFIG_CRYPTO_SHA1_MB) += sha1-mb/
obj-$(CONFIG_CRYPTO_SHA256_MB) += sha256-mb/
obj-$(CONFIG_CRYPTO_SHA512_MB) += sha512-mb/
+
+ obj-$(CONFIG_CRYPTO_MORUS1280_AVX2) += morus1280-avx2.o
endif
aes-i586-y := aes-i586-asm_32.o aes_glue.o
twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o
-salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o
serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o
aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
@@ -68,10 +77,16 @@ camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o
-salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
chacha20-x86_64-y := chacha20-ssse3-x86_64.o chacha20_glue.o
serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o
+aegis128-aesni-y := aegis128-aesni-asm.o aegis128-aesni-glue.o
+aegis128l-aesni-y := aegis128l-aesni-asm.o aegis128l-aesni-glue.o
+aegis256-aesni-y := aegis256-aesni-asm.o aegis256-aesni-glue.o
+
+morus640-sse2-y := morus640-sse2-asm.o morus640-sse2-glue.o
+morus1280-sse2-y := morus1280-sse2-asm.o morus1280-sse2-glue.o
+
ifeq ($(avx_supported),yes)
camellia-aesni-avx-x86_64-y := camellia-aesni-avx-asm_64.o \
camellia_aesni_avx_glue.o
@@ -87,6 +102,8 @@ ifeq ($(avx2_supported),yes)
camellia-aesni-avx2-y := camellia-aesni-avx2-asm_64.o camellia_aesni_avx2_glue.o
chacha20-x86_64-y += chacha20-avx2-x86_64.o
serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o
+
+ morus1280-avx2-y := morus1280-avx2-asm.o morus1280-avx2-glue.o
endif
aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis128-aesni-asm.S
new file mode 100644
index 000000000000..9254e0b6cc06
--- /dev/null
+++ b/arch/x86/crypto/aegis128-aesni-asm.S
@@ -0,0 +1,749 @@
+/*
+ * AES-NI + SSE2 implementation of AEGIS-128
+ *
+ * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
+ * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/frame.h>
+
+#define STATE0 %xmm0
+#define STATE1 %xmm1
+#define STATE2 %xmm2
+#define STATE3 %xmm3
+#define STATE4 %xmm4
+#define KEY %xmm5
+#define MSG %xmm5
+#define T0 %xmm6
+#define T1 %xmm7
+
+#define STATEP %rdi
+#define LEN %rsi
+#define SRC %rdx
+#define DST %rcx
+
+.section .rodata.cst16.aegis128_const, "aM", @progbits, 32
+.align 16
+.Laegis128_const_0:
+ .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
+ .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
+.Laegis128_const_1:
+ .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
+ .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
+
+.section .rodata.cst16.aegis128_counter, "aM", @progbits, 16
+.align 16
+.Laegis128_counter:
+ .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
+ .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
+
+.text
+
+/*
+ * aegis128_update
+ * input:
+ * STATE[0-4] - input state
+ * output:
+ * STATE[0-4] - output state (shifted positions)
+ * changed:
+ * T0
+ */
+.macro aegis128_update
+ movdqa STATE4, T0
+ aesenc STATE0, STATE4
+ aesenc STATE1, STATE0
+ aesenc STATE2, STATE1
+ aesenc STATE3, STATE2
+ aesenc T0, STATE3
+.endm
+
+/*
+ * __load_partial: internal ABI
+ * input:
+ * LEN - bytes
+ * SRC - src
+ * output:
+ * MSG - message block
+ * changed:
+ * T0
+ * %r8
+ * %r9
+ */
+__load_partial:
+ xor %r9, %r9
+ pxor MSG, MSG
+
+ mov LEN, %r8
+ and $0x1, %r8
+ jz .Lld_partial_1
+
+ mov LEN, %r8
+ and $0x1E, %r8
+ add SRC, %r8
+ mov (%r8), %r9b
+
+.Lld_partial_1:
+ mov LEN, %r8
+ and $0x2, %r8
+ jz .Lld_partial_2
+
+ mov LEN, %r8
+ and $0x1C, %r8
+ add SRC, %r8
+ shl $0x10, %r9
+ mov (%r8), %r9w
+
+.Lld_partial_2:
+ mov LEN, %r8
+ and $0x4, %r8
+ jz .Lld_partial_4
+
+ mov LEN, %r8
+ and $0x18, %r8
+ add SRC, %r8
+ shl $32, %r9
+ mov (%r8), %r8d
+ xor %r8, %r9
+
+.Lld_partial_4:
+ movq %r9, MSG
+
+ mov LEN, %r8
+ and $0x8, %r8
+ jz .Lld_partial_8
+
+ mov LEN, %r8
+ and $0x10, %r8
+ add SRC, %r8
+ pslldq $8, MSG
+ movq (%r8), T0
+ pxor T0, MSG
+
+.Lld_partial_8:
+ ret
+ENDPROC(__load_partial)
+
+/*
+ * __store_partial: internal ABI
+ * input:
+ * LEN - bytes
+ * DST - dst
+ * output:
+ * T0 - message block
+ * changed:
+ * %r8
+ * %r9
+ * %r10
+ */
+__store_partial:
+ mov LEN, %r8
+ mov DST, %r9
+
+ movq T0, %r10
+
+ cmp $8, %r8
+ jl .Lst_partial_8
+
+ mov %r10, (%r9)
+ psrldq $8, T0
+ movq T0, %r10
+
+ sub $8, %r8
+ add $8, %r9
+
+.Lst_partial_8:
+ cmp $4, %r8
+ jl .Lst_partial_4
+
+ mov %r10d, (%r9)
+ shr $32, %r10
+
+ sub $4, %r8
+ add $4, %r9
+
+.Lst_partial_4:
+ cmp $2, %r8
+ jl .Lst_partial_2
+
+ mov %r10w, (%r9)
+ shr $0x10, %r10
+
+ sub $2, %r8
+ add $2, %r9
+
+.Lst_partial_2:
+ cmp $1, %r8
+ jl .Lst_partial_1
+
+ mov %r10b, (%r9)
+
+.Lst_partial_1:
+ ret
+ENDPROC(__store_partial)
+
+/*
+ * void crypto_aegis128_aesni_init(void *state, const void *key, const void *iv);
+ */
+ENTRY(crypto_aegis128_aesni_init)
+ FRAME_BEGIN
+
+ /* load IV: */
+ movdqu (%rdx), T1
+
+ /* load key: */
+ movdqa (%rsi), KEY
+ pxor KEY, T1
+ movdqa T1, STATE0
+ movdqa KEY, STATE3
+ movdqa KEY, STATE4
+
+ /* load the constants: */
+ movdqa .Laegis128_const_0, STATE2
+ movdqa .Laegis128_const_1, STATE1
+ pxor STATE2, STATE3
+ pxor STATE1, STATE4
+
+ /* update 10 times with KEY / KEY xor IV: */
+ aegis128_update; pxor KEY, STATE4
+ aegis128_update; pxor T1, STATE3
+ aegis128_update; pxor KEY, STATE2
+ aegis128_update; pxor T1, STATE1
+ aegis128_update; pxor KEY, STATE0
+ aegis128_update; pxor T1, STATE4
+ aegis128_update; pxor KEY, STATE3
+ aegis128_update; pxor T1, STATE2
+ aegis128_update; pxor KEY, STATE1
+ aegis128_update; pxor T1, STATE0
+
+ /* store the state: */
+ movdqu STATE0, 0x00(STATEP)
+ movdqu STATE1, 0x10(STATEP)
+ movdqu STATE2, 0x20(STATEP)
+ movdqu STATE3, 0x30(STATEP)
+ movdqu STATE4, 0x40(STATEP)
+
+ FRAME_END
+ ret
+ENDPROC(crypto_aegis128_aesni_init)
+
+/*
+ * void crypto_aegis128_aesni_ad(void *state, unsigned int length,
+ * const void *data);
+ */
+ENTRY(crypto_aegis128_aesni_ad)
+ FRAME_BEGIN
+
+ cmp $0x10, LEN
+ jb .Lad_out
+
+ /* load the state: */
+ movdqu 0x00(STATEP), STATE0
+ movdqu 0x10(STATEP), STATE1
+ movdqu 0x20(STATEP), STATE2
+ movdqu 0x30(STATEP), STATE3
+ movdqu 0x40(STATEP), STATE4
+
+ mov SRC, %r8
+ and $0xF, %r8
+ jnz .Lad_u_loop
+
+.align 8
+.Lad_a_loop:
+ movdqa 0x00(SRC), MSG
+ aegis128_update
+ pxor MSG, STATE4
+ sub $0x10, LEN
+ cmp $0x10, LEN
+ jl .Lad_out_1
+
+ movdqa 0x10(SRC), MSG
+ aegis128_update
+ pxor MSG, STATE3
+ sub $0x10, LEN
+ cmp $0x10, LEN
+ jl .Lad_out_2
+
+ movdqa 0x20(SRC), MSG
+ aegis128_update
+ pxor MSG, STATE2
+ sub $0x10, LEN
+ cmp $0x10, LEN
+ jl .Lad_out_3
+
+ movdqa 0x30(SRC), MSG
+ aegis128_update
+ pxor MSG, STATE1
+ sub $0x10, LEN
+ cmp $0x10, LEN
+ jl .Lad_out_4
+
+ movdqa 0x40(SRC), MSG
+ aegis128_update
+ pxor MSG, STATE0
+ sub $0x10, LEN
+ cmp $0x10, LEN
+ jl .Lad_out_0
+
+ add $0x50, SRC
+ jmp .Lad_a_loop
+
+.align 8
+.Lad_u_loop:
+ movdqu 0x00(SRC), MSG
+ aegis128_update
+ pxor MSG, STATE4
+ sub $0x10, LEN
+ cmp $0x10, LEN
+ jl .Lad_out_1
+
+ movdqu 0x10(SRC), MSG
+ aegis128_update
+ pxor MSG, STATE3
+ sub $0x10, LEN
+ cmp $0x10, LEN
+ jl .Lad_out_2
+
+ movdqu 0x20(SRC), MSG
+ aegis128_update
+ pxor MSG, STATE2
+ sub $0x10, LEN
+ cmp $0x10, LEN
+ jl .Lad_out_3
+
+ movdqu 0x30(SRC), MSG
+ aegis128_update
+ pxor MSG, STATE1
+ sub $0x10, LEN
+ cmp $0x10, LEN
+ jl .Lad_out_4
+
+ movdqu 0x40(SRC), MSG
+ aegis128_update
+ pxor MSG, STATE0
+ sub $0x10, LEN
+ cmp $0x10, LEN
+ jl .Lad_out_0
+
+ add $0x50, SRC
+ jmp .Lad_u_loop
+
+ /* store the state: */
+.Lad_out_0:
+ movdqu STATE0, 0x00(STATEP)
+ movdqu STATE1, 0x10(STATEP)
+ movdqu STATE2, 0x20(STATEP)
+ movdqu STATE3, 0x30(STATEP)
+ movdqu STATE4, 0x40(STATEP)
+ FRAME_END
+ ret
+
+.Lad_out_1:
+ movdqu STATE4, 0x00(STATEP)
+ movdqu STATE0, 0x10(STATEP)
+ movdqu STATE1, 0x20(STATEP)
+ movdqu STATE2, 0x30(STATEP)
+ movdqu STATE3, 0x40(STATEP)
+ FRAME_END
+ ret
+
+.Lad_out_2:
+ movdqu STATE3, 0x00(STATEP)
+ movdqu STATE4, 0x10(STATEP)
+ movdqu STATE0, 0x20(STATEP)
+ movdqu STATE1, 0x30(STATEP)
+ movdqu STATE2, 0x40(STATEP)
+ FRAME_END
+ ret
+
+.Lad_out_3:
+ movdqu STATE2, 0x00(STATEP)
+ movdqu STATE3, 0x10(STATEP)
+ movdqu STATE4, 0x20(STATEP)
+ movdqu STATE0, 0x30(STATEP)
+ movdqu STATE1, 0x40(STATEP)
+ FRAME_END
+ ret
+
+.Lad_out_4:
+ movdqu STATE1, 0x00(STATEP)
+ movdqu STATE2, 0x10(STATEP)
+ movdqu STATE3, 0x20(STATEP)
+ movdqu STATE4, 0x30(STATEP)
+ movdqu STATE0, 0x40(STATEP)
+ FRAME_END
+ ret
+
+.Lad_out:
+ FRAME_END
+ ret
+ENDPROC(crypto_aegis128_aesni_ad)
+
+.macro encrypt_block a s0 s1 s2 s3 s4 i
+ movdq\a (\i * 0x10)(SRC), MSG
+ movdqa MSG, T0
+ pxor \s1, T0
+ pxor \s4, T0
+ movdqa \s2, T1
+ pand \s3, T1
+ pxor T1, T0
+ movdq\a T0, (\i * 0x10)(DST)
+
+ aegis128_update
+ pxor MSG, \s4
+
+ sub $0x10, LEN
+ cmp $0x10, LEN
+ jl .Lenc_out_\i
+.endm
+
+/*
+ * void crypto_aegis128_aesni_enc(void *state, unsigned int length,
+ * const void *src, void *dst);
+ */
+ENTRY(crypto_aegis128_aesni_enc)
+ FRAME_BEGIN
+
+ cmp $0x10, LEN
+ jb .Lenc_out
+
+ /* load the state: */
+ movdqu 0x00(STATEP), STATE0
+ movdqu 0x10(STATEP), STATE1
+ movdqu 0x20(STATEP), STATE2
+ movdqu 0x30(STATEP), STATE3
+ movdqu 0x40(STATEP), STATE4
+
+ mov SRC, %r8
+ or DST, %r8
+ and $0xF, %r8
+ jnz .Lenc_u_loop
+
+.align 8
+.Lenc_a_loop:
+ encrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0
+ encrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1
+ encrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2
+ encrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3
+ encrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4
+
+ add $0x50, SRC
+ add $0x50, DST
+ jmp .Lenc_a_loop
+
+.align 8
+.Lenc_u_loop:
+ encrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0
+ encrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1
+ encrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2
+ encrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3
+ encrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4
+
+ add $0x50, SRC
+ add $0x50, DST
+ jmp .Lenc_u_loop
+
+ /* store the state: */
+.Lenc_out_0:
+ movdqu STATE4, 0x00(STATEP)
+ movdqu STATE0, 0x10(STATEP)
+ movdqu STATE1, 0x20(STATEP)
+ movdqu STATE2, 0x30(STATEP)
+ movdqu STATE3, 0x40(STATEP)
+ FRAME_END
+ ret
+
+.Lenc_out_1:
+ movdqu STATE3, 0x00(STATEP)
+ movdqu STATE4, 0x10(STATEP)
+ movdqu STATE0, 0x20(STATEP)
+ movdqu STATE1, 0x30(STATEP)
+ movdqu STATE2, 0x40(STATEP)
+ FRAME_END
+ ret
+
+.Lenc_out_2:
+ movdqu STATE2, 0x00(STATEP)
+ movdqu STATE3, 0x10(STATEP)
+ movdqu STATE4, 0x20(STATEP)
+ movdqu STATE0, 0x30(STATEP)
+ movdqu STATE1, 0x40(STATEP)
+ FRAME_END
+ ret
+
+.Lenc_out_3:
+ movdqu STATE1, 0x00(STATEP)
+ movdqu STATE2, 0x10(STATEP)
+ movdqu STATE3, 0x20(STATEP)
+ movdqu STATE4, 0x30(STATEP)
+ movdqu STATE0, 0x40(STATEP)
+ FRAME_END
+ ret
+
+.Lenc_out_4:
+ movdqu STATE0, 0x00(STATEP)
+ movdqu STATE1, 0x10(STATEP)
+ movdqu STATE2, 0x20(STATEP)
+ movdqu STATE3, 0x30(STATEP)
+ movdqu STATE4, 0x40(STATEP)
+ FRAME_END
+ ret
+
+.Lenc_out:
+ FRAME_END
+ ret
+ENDPROC(crypto_aegis128_aesni_enc)
+
+/*
+ * void crypto_aegis128_aesni_enc_tail(void *state, unsigned int length,
+ * const void *src, void *dst);
+ */
+ENTRY(crypto_aegis128_aesni_enc_tail)
+ FRAME_BEGIN
+
+ /* load the state: */
+ movdqu 0x00(STATEP), STATE0
+ movdqu 0x10(STATEP), STATE1
+ movdqu 0x20(STATEP), STATE2
+ movdqu 0x30(STATEP), STATE3
+ movdqu 0x40(STATEP), STATE4
+
+ /* encrypt message: */
+ call __load_partial
+
+ movdqa MSG, T0
+ pxor STATE1, T0
+ pxor STATE4, T0
+ movdqa STATE2, T1
+ pand STATE3, T1
+ pxor T1, T0
+
+ call __store_partial
+
+ aegis128_update
+ pxor MSG, STATE4
+
+ /* store the state: */
+ movdqu STATE4, 0x00(STATEP)
+ movdqu STATE0, 0x10(STATEP)
+ movdqu STATE1, 0x20(STATEP)
+ movdqu STATE2, 0x30(STATEP)
+ movdqu STATE3, 0x40(STATEP)
+
+ FRAME_END
+ENDPROC(crypto_aegis128_aesni_enc_tail)
+
+.macro decrypt_block a s0 s1 s2 s3 s4 i
+ movdq\a (\i * 0x10)(SRC), MSG
+ pxor \s1, MSG
+ pxor \s4, MSG
+ movdqa \s2, T1
+ pand \s3, T1
+ pxor T1, MSG
+ movdq\a MSG, (\i * 0x10)(DST)
+
+ aegis128_update
+ pxor MSG, \s4
+
+ sub $0x10, LEN
+ cmp $0x10, LEN
+ jl .Ldec_out_\i
+.endm
+
+/*
+ * void crypto_aegis128_aesni_dec(void *state, unsigned int length,
+ * const void *src, void *dst);
+ */
+ENTRY(crypto_aegis128_aesni_dec)
+ FRAME_BEGIN
+
+ cmp $0x10, LEN
+ jb .Ldec_out
+
+ /* load the state: */
+ movdqu 0x00(STATEP), STATE0
+ movdqu 0x10(STATEP), STATE1
+ movdqu 0x20(STATEP), STATE2
+ movdqu 0x30(STATEP), STATE3
+ movdqu 0x40(STATEP), STATE4
+
+ mov SRC, %r8
+ or DST, %r8
+ and $0xF, %r8
+ jnz .Ldec_u_loop
+
+.align 8
+.Ldec_a_loop:
+ decrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0
+ decrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1
+ decrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2
+ decrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3
+ decrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4
+
+ add $0x50, SRC
+ add $0x50, DST
+ jmp .Ldec_a_loop
+
+.align 8
+.Ldec_u_loop:
+ decrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0
+ decrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1
+ decrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2
+ decrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3
+ decrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4
+
+ add $0x50, SRC
+ add $0x50, DST
+ jmp .Ldec_u_loop
+
+ /* store the state: */
+.Ldec_out_0:
+ movdqu STATE4, 0x00(STATEP)
+ movdqu STATE0, 0x10(STATEP)
+ movdqu STATE1, 0x20(STATEP)
+ movdqu STATE2, 0x30(STATEP)
+ movdqu STATE3, 0x40(STATEP)
+ FRAME_END
+ ret
+
+.Ldec_out_1:
+ movdqu STATE3, 0x00(STATEP)
+ movdqu STATE4, 0x10(STATEP)
+ movdqu STATE0, 0x20(STATEP)
+ movdqu STATE1, 0x30(STATEP)
+ movdqu STATE2, 0x40(STATEP)
+ FRAME_END
+ ret
+
+.Ldec_out_2:
+ movdqu STATE2, 0x00(STATEP)
+ movdqu STATE3, 0x10(STATEP)
+ movdqu STATE4, 0x20(STATEP)
+ movdqu STATE0, 0x30(STATEP)
+ movdqu STATE1, 0x40(STATEP)
+ FRAME_END
+ ret
+
+.Ldec_out_3:
+ movdqu STATE1, 0x00(STATEP)
+ movdqu STATE2, 0x10(STATEP)
+ movdqu STATE3, 0x20(STATEP)
+ movdqu STATE4, 0x30(STATEP)
+ movdqu STATE0, 0x40(STATEP)
+ FRAME_END
+ ret
+
+.Ldec_out_4:
+ movdqu STATE0, 0x00(STATEP)
+ movdqu STATE1, 0x10(STATEP)
+ movdqu STATE2, 0x20(STATEP)
+ movdqu STATE3, 0x30(STATEP)
+ movdqu STATE4, 0x40(STATEP)
+ FRAME_END
+ ret
+
+.Ldec_out:
+ FRAME_END
+ ret
+ENDPROC(crypto_aegis128_aesni_dec)
+
+/*
+ * void crypto_aegis128_aesni_dec_tail(void *state, unsigned int length,
+ * const void *src, void *dst);
+ */
+ENTRY(crypto_aegis128_aesni_dec_tail)
+ FRAME_BEGIN
+
+ /* load the state: */
+ movdqu 0x00(STATEP), STATE0
+ movdqu 0x10(STATEP), STATE1
+ movdqu 0x20(STATEP), STATE2
+ movdqu 0x30(STATEP), STATE3
+ movdqu 0x40(STATEP), STATE4
+
+ /* decrypt message: */
+ call __load_partial
+
+ pxor STATE1, MSG
+ pxor STATE4, MSG
+ movdqa STATE2, T1
+ pand STATE3, T1
+ pxor T1, MSG
+
+ movdqa MSG, T0
+ call __store_partial
+
+ /* mask with byte count: */
+ movq LEN, T0
+ punpcklbw T0, T0
+ punpcklbw T0, T0
+ punpcklbw T0, T0
+ punpcklbw T0, T0
+ movdqa .Laegis128_counter, T1
+ pcmpgtb T1, T0
+ pand T0, MSG
+
+ aegis128_update
+ pxor MSG, STATE4
+
+ /* store the state: */
+ movdqu STATE4, 0x00(STATEP)
+ movdqu STATE0, 0x10(STATEP)
+ movdqu STATE1, 0x20(STATEP)
+ movdqu STATE2, 0x30(STATEP)
+ movdqu STATE3, 0x40(STATEP)
+
+ FRAME_END
+ ret
+ENDPROC(crypto_aegis128_aesni_dec_tail)
+
+/*
+ * void crypto_aegis128_aesni_final(void *state, void *tag_xor,
+ * u64 assoclen, u64 cryptlen);
+ */
+ENTRY(crypto_aegis128_aesni_final)
+ FRAME_BEGIN
+
+ /* load the state: */
+ movdqu 0x00(STATEP), STATE0
+ movdqu 0x10(STATEP), STATE1
+ movdqu 0x20(STATEP), STATE2
+ movdqu 0x30(STATEP), STATE3
+ movdqu 0x40(STATEP), STATE4
+
+ /* prepare length block: */
+ movq %rdx, MSG
+ movq %rcx, T0
+ pslldq $8, T0
+ pxor T0, MSG
+ psllq $3, MSG /* multiply by 8 (to get bit count) */
+
+ pxor STATE3, MSG
+
+ /* update state: */
+ aegis128_update; pxor MSG, STATE4
+ aegis128_update; pxor MSG, STATE3
+ aegis128_update; pxor MSG, STATE2
+ aegis128_update; pxor MSG, STATE1
+ aegis128_update; pxor MSG, STATE0
+ aegis128_update; pxor MSG, STATE4
+ aegis128_update; pxor MSG, STATE3
+
+ /* xor tag: */
+ movdqu (%rsi), MSG
+
+ pxor STATE0, MSG
+ pxor STATE1, MSG
+ pxor STATE2, MSG
+ pxor STATE3, MSG
+ pxor STATE4, MSG
+
+ movdqu MSG, (%rsi)
+
+ FRAME_END
+ ret
+ENDPROC(crypto_aegis128_aesni_final)
diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis128-aesni-glue.c
new file mode 100644
index 000000000000..5de7c0d46edf
--- /dev/null
+++ b/arch/x86/crypto/aegis128-aesni-glue.c
@@ -0,0 +1,407 @@
+/*
+ * The AEGIS-128 Authenticated-Encryption Algorithm
+ * Glue for AES-NI + SSE2 implementation
+ *
+ * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
+ * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <crypto/cryptd.h>
+#include <crypto/internal/aead.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
+#include <linux/module.h>
+#include <asm/fpu/api.h>
+#include <asm/cpu_device_id.h>
+
+#define AEGIS128_BLOCK_ALIGN 16
+#define AEGIS128_BLOCK_SIZE 16
+#define AEGIS128_NONCE_SIZE 16
+#define AEGIS128_STATE_BLOCKS 5
+#define AEGIS128_KEY_SIZE 16
+#define AEGIS128_MIN_AUTH_SIZE 8
+#define AEGIS128_MAX_AUTH_SIZE 16
+
+asmlinkage void crypto_aegis128_aesni_init(void *state, void *key, void *iv);
+
+asmlinkage void crypto_aegis128_aesni_ad(
+ void *state, unsigned int length, const void *data);
+
+asmlinkage void crypto_aegis128_aesni_enc(
+ void *state, unsigned int length, const void *src, void *dst);
+
+asmlinkage void crypto_aegis128_aesni_dec(
+ void *state, unsigned int length, const void *src, void *dst);
+
+asmlinkage void crypto_aegis128_aesni_enc_tail(
+ void *state, unsigned int length, const void *src, void *dst);
+
+asmlinkage void crypto_aegis128_aesni_dec_tail(
+ void *state, unsigned int length, const void *src, void *dst);
+
+asmlinkage void crypto_aegis128_aesni_final(
+ void *state, void *tag_xor, unsigned int cryptlen,
+ unsigned int assoclen);
+
+struct aegis_block {
+ u8 bytes[AEGIS128_BLOCK_SIZE] __aligned(AEGIS128_BLOCK_ALIGN);
+};
+
+struct aegis_state {
+ struct aegis_block blocks[AEGIS128_STATE_BLOCKS];
+};
+
+struct aegis_ctx {
+ struct aegis_block key;
+};
+
+struct aegis_crypt_ops {
+ int (*skcipher_walk_init)(struct skcipher_walk *walk,
+ struct aead_request *req, bool atomic);
+
+ void (*crypt_blocks)(void *state, unsigned int length, const void *src,
+ void *dst);
+ void (*crypt_tail)(void *state, unsigned int length, const void *src,
+ void *dst);
+};
+
+static void crypto_aegis128_aesni_process_ad(
+ struct aegis_state *state, struct scatterlist *sg_src,
+ unsigned int assoclen)
+{
+ struct scatter_walk walk;
+ struct aegis_block buf;
+ unsigned int pos = 0;
+
+ scatterwalk_start(&walk, sg_src);
+ while (assoclen != 0) {
+ unsigned int size = scatterwalk_clamp(&walk, assoclen);
+ unsigned int left = size;
+ void *mapped = scatterwalk_map(&walk);
+ const u8 *src = (const u8 *)mapped;
+
+ if (pos + size >= AEGIS128_BLOCK_SIZE) {
+ if (pos > 0) {
+ unsigned int fill = AEGIS128_BLOCK_SIZE - pos;
+ memcpy(buf.bytes + pos, src, fill);
+ crypto_aegis128_aesni_ad(state,
+ AEGIS128_BLOCK_SIZE,
+ buf.bytes);
+ pos = 0;
+ left -= fill;
+ src += fill;
+ }
+
+ crypto_aegis128_aesni_ad(state, left, src);
+
+ src += left & ~(AEGIS128_BLOCK_SIZE - 1);
+ left &= AEGIS128_BLOCK_SIZE - 1;
+ }
+
+ memcpy(buf.bytes + pos, src, left);
+ pos += left;
+ assoclen -= size;
+
+ scatterwalk_unmap(mapped);
+ scatterwalk_advance(&walk, size);
+ scatterwalk_done(&walk, 0, assoclen);
+ }
+
+ if (pos > 0) {
+ memset(buf.bytes + pos, 0, AEGIS128_BLOCK_SIZE - pos);
+ crypto_aegis128_aesni_ad(state, AEGIS128_BLOCK_SIZE, buf.bytes);
+ }
+}
+
+static void crypto_aegis128_aesni_process_crypt(
+ struct aegis_state *state, struct aead_request *req,
+ const struct aegis_crypt_ops *ops)
+{
+ struct skcipher_walk walk;
+ u8 *src, *dst;
+ unsigned int chunksize, base;
+
+ ops->skcipher_walk_init(&walk, req, false);
+
+ while (walk.nbytes) {
+ src = walk.src.virt.addr;
+ dst = walk.dst.virt.addr;
+ chunksize = walk.nbytes;
+
+ ops->crypt_blocks(state, chunksize, src, dst);
+
+ base = chunksize & ~(AEGIS128_BLOCK_SIZE - 1);
+ src += base;
+ dst += base;
+ chunksize &= AEGIS128_BLOCK_SIZE - 1;
+
+ if (chunksize > 0)
+ ops->crypt_tail(state, chunksize, src, dst);
+
+ skcipher_walk_done(&walk, 0);
+ }
+}
+
+static struct aegis_ctx *crypto_aegis128_aesni_ctx(struct crypto_aead *aead)
+{
+ u8 *ctx = crypto_aead_ctx(aead);
+ ctx = PTR_ALIGN(ctx, __alignof__(struct aegis_ctx));
+ return (void *)ctx;
+}
+
+static int crypto_aegis128_aesni_setkey(struct crypto_aead *aead, const u8 *key,
+ unsigned int keylen)
+{
+ struct aegis_ctx *ctx = crypto_aegis128_aesni_ctx(aead);
+
+ if (keylen != AEGIS128_KEY_SIZE) {
+ crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+ return -EINVAL;
+ }
+
+ memcpy(ctx->key.bytes, key, AEGIS128_KEY_SIZE);
+
+ return 0;
+}
+
+static int crypto_aegis128_aesni_setauthsize(struct crypto_aead *tfm,
+ unsigned int authsize)
+{
+ if (authsize > AEGIS128_MAX_AUTH_SIZE)
+ return -EINVAL;
+ if (authsize < AEGIS128_MIN_AUTH_SIZE)
+ return -EINVAL;
+ return 0;
+}
+
+static void crypto_aegis128_aesni_crypt(struct aead_request *req,
+ struct aegis_block *tag_xor,
+ unsigned int cryptlen,
+ const struct aegis_crypt_ops *ops)
+{
+ struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+ struct aegis_ctx *ctx = crypto_aegis128_aesni_ctx(tfm);
+ struct aegis_state state;
+
+ kernel_fpu_begin();
+
+ crypto_aegis128_aesni_init(&state, ctx->key.bytes, req->iv);
+ crypto_aegis128_aesni_process_ad(&state, req->src, req->assoclen);
+ crypto_aegis128_aesni_process_crypt(&state, req, ops);
+ crypto_aegis128_aesni_final(&state, tag_xor, req->assoclen, cryptlen);
+
+ kernel_fpu_end();
+}
+
+static int crypto_aegis128_aesni_encrypt(struct aead_request *req)
+{
+ static const struct aegis_crypt_ops OPS = {
+ .skcipher_walk_init = skcipher_walk_aead_encrypt,
+ .crypt_blocks = crypto_aegis128_aesni_enc,
+ .crypt_tail = crypto_aegis128_aesni_enc_tail,
+ };
+
+ struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+ struct aegis_block tag = {};
+ unsigned int authsize = crypto_aead_authsize(tfm);
+ unsigned int cryptlen = req->cryptlen;
+
+ crypto_aegis128_aesni_crypt(req, &tag, cryptlen, &OPS);
+
+ scatterwalk_map_and_copy(tag.bytes, req->dst,
+ req->assoclen + cryptlen, authsize, 1);
+ return 0;
+}
+
+static int crypto_aegis128_aesni_decrypt(struct aead_request *req)
+{
+ static const struct aegis_block zeros = {};
+
+ static const struct aegis_crypt_ops OPS = {
+ .skcipher_walk_init = skcipher_walk_aead_decrypt,
+ .crypt_blocks = crypto_aegis128_aesni_dec,
+ .crypt_tail = crypto_aegis128_aesni_dec_tail,
+ };
+
+ struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+ struct aegis_block tag;
+ unsigned int authsize = crypto_aead_authsize(tfm);
+ unsigned int cryptlen = req->cryptlen - authsize;
+
+ scatterwalk_map_and_copy(tag.bytes, req->src,
+ req->assoclen + cryptlen, authsize, 0);
+
+ crypto_aegis128_aesni_crypt(req, &tag, cryptlen, &OPS);
+
+ return crypto_memneq(tag.bytes, zeros.bytes, authsize) ? -EBADMSG : 0;
+}
+
+static int crypto_aegis128_aesni_init_tfm(struct crypto_aead *aead)
+{
+ return 0;
+}
+
+static void crypto_aegis128_aesni_exit_tfm(struct crypto_aead *aead)
+{
+}
+
+static int cryptd_aegis128_aesni_setkey(struct crypto_aead *aead,
+ const u8 *key, unsigned int keylen)
+{
+ struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+ struct cryptd_aead *cryptd_tfm = *ctx;
+
+ return crypto_aead_setkey(&cryptd_tfm->base, key, keylen);
+}
+
+static int cryptd_aegis128_aesni_setauthsize(struct crypto_aead *aead,
+ unsigned int authsize)
+{
+ struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+ struct cryptd_aead *cryptd_tfm = *ctx;
+
+ return crypto_aead_setauthsize(&cryptd_tfm->base, authsize);
+}
+
+static int cryptd_aegis128_aesni_encrypt(struct aead_request *req)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+ struct cryptd_aead *cryptd_tfm = *ctx;
+
+ aead = &cryptd_tfm->base;
+ if (irq_fpu_usable() && (!in_atomic() ||
+ !cryptd_aead_queued(cryptd_tfm)))
+ aead = cryptd_aead_child(cryptd_tfm);
+
+ aead_request_set_tfm(req, aead);
+
+ return crypto_aead_encrypt(req);
+}
+
+static int cryptd_aegis128_aesni_decrypt(struct aead_request *req)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+ struct cryptd_aead *cryptd_tfm = *ctx;
+
+ aead = &cryptd_tfm->base;
+ if (irq_fpu_usable() && (!in_atomic() ||
+ !cryptd_aead_queued(cryptd_tfm)))
+ aead = cryptd_aead_child(cryptd_tfm);
+
+ aead_request_set_tfm(req, aead);
+
+ return crypto_aead_decrypt(req);
+}
+
+static int cryptd_aegis128_aesni_init_tfm(struct crypto_aead *aead)
+{
+ struct cryptd_aead *cryptd_tfm;
+ struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+
+ cryptd_tfm = cryptd_alloc_aead("__aegis128-aesni", CRYPTO_ALG_INTERNAL,
+ CRYPTO_ALG_INTERNAL);
+ if (IS_ERR(cryptd_tfm))
+ return PTR_ERR(cryptd_tfm);
+
+ *ctx = cryptd_tfm;
+ crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base));
+ return 0;
+}
+
+static void cryptd_aegis128_aesni_exit_tfm(struct crypto_aead *aead)
+{
+ struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+
+ cryptd_free_aead(*ctx);
+}
+
+static struct aead_alg crypto_aegis128_aesni_alg[] = {
+ {
+ .setkey = crypto_aegis128_aesni_setkey,
+ .setauthsize = crypto_aegis128_aesni_setauthsize,
+ .encrypt = crypto_aegis128_aesni_encrypt,
+ .decrypt = crypto_aegis128_aesni_decrypt,
+ .init = crypto_aegis128_aesni_init_tfm,
+ .exit = crypto_aegis128_aesni_exit_tfm,
+
+ .ivsize = AEGIS128_NONCE_SIZE,
+ .maxauthsize = AEGIS128_MAX_AUTH_SIZE,
+ .chunksize = AEGIS128_BLOCK_SIZE,
+
+ .base = {
+ .cra_flags = CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct aegis_ctx) +
+ __alignof__(struct aegis_ctx),
+ .cra_alignmask = 0,
+
+ .cra_name = "__aegis128",
+ .cra_driver_name = "__aegis128-aesni",
+
+ .cra_module = THIS_MODULE,
+ }
+ }, {
+ .setkey = cryptd_aegis128_aesni_setkey,
+ .setauthsize = cryptd_aegis128_aesni_setauthsize,
+ .encrypt = cryptd_aegis128_aesni_encrypt,
+ .decrypt = cryptd_aegis128_aesni_decrypt,
+ .init = cryptd_aegis128_aesni_init_tfm,
+ .exit = cryptd_aegis128_aesni_exit_tfm,
+
+ .ivsize = AEGIS128_NONCE_SIZE,
+ .maxauthsize = AEGIS128_MAX_AUTH_SIZE,
+ .chunksize = AEGIS128_BLOCK_SIZE,
+
+ .base = {
+ .cra_flags = CRYPTO_ALG_ASYNC,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct cryptd_aead *),
+ .cra_alignmask = 0,
+
+ .cra_priority = 400,
+
+ .cra_name = "aegis128",
+ .cra_driver_name = "aegis128-aesni",
+
+ .cra_module = THIS_MODULE,
+ }
+ }
+};
+
+static const struct x86_cpu_id aesni_cpu_id[] = {
+ X86_FEATURE_MATCH(X86_FEATURE_AES),
+ X86_FEATURE_MATCH(X86_FEATURE_XMM2),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
+
+static int __init crypto_aegis128_aesni_module_init(void)
+{
+ if (!x86_match_cpu(aesni_cpu_id))
+ return -ENODEV;
+
+ return crypto_register_aeads(crypto_aegis128_aesni_alg,
+ ARRAY_SIZE(crypto_aegis128_aesni_alg));
+}
+
+static void __exit crypto_aegis128_aesni_module_exit(void)
+{
+ crypto_unregister_aeads(crypto_aegis128_aesni_alg,
+ ARRAY_SIZE(crypto_aegis128_aesni_alg));
+}
+
+module_init(crypto_aegis128_aesni_module_init);
+module_exit(crypto_aegis128_aesni_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
+MODULE_DESCRIPTION("AEGIS-128 AEAD algorithm -- AESNI+SSE2 implementation");
+MODULE_ALIAS_CRYPTO("aegis128");
+MODULE_ALIAS_CRYPTO("aegis128-aesni");
diff --git a/arch/x86/crypto/aegis128l-aesni-asm.S b/arch/x86/crypto/aegis128l-aesni-asm.S
new file mode 100644
index 000000000000..9263c344f2c7
--- /dev/null
+++ b/arch/x86/crypto/aegis128l-aesni-asm.S
@@ -0,0 +1,825 @@
+/*
+ * AES-NI + SSE2 implementation of AEGIS-128L
+ *
+ * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
+ * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/frame.h>
+
+#define STATE0 %xmm0
+#define STATE1 %xmm1
+#define STATE2 %xmm2
+#define STATE3 %xmm3
+#define STATE4 %xmm4
+#define STATE5 %xmm5
+#define STATE6 %xmm6
+#define STATE7 %xmm7
+#define MSG0 %xmm8
+#define MSG1 %xmm9
+#define T0 %xmm10
+#define T1 %xmm11
+#define T2 %xmm12
+#define T3 %xmm13
+
+#define STATEP %rdi
+#define LEN %rsi
+#define SRC %rdx
+#define DST %rcx
+
+.section .rodata.cst16.aegis128l_const, "aM", @progbits, 32
+.align 16
+.Laegis128l_const_0:
+ .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
+ .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
+.Laegis128l_const_1:
+ .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
+ .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
+
+.section .rodata.cst16.aegis128l_counter, "aM", @progbits, 16
+.align 16
+.Laegis128l_counter0:
+ .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
+ .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
+.Laegis128l_counter1:
+ .byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
+ .byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
+
+.text
+
+/*
+ * __load_partial: internal ABI
+ * input:
+ * LEN - bytes
+ * SRC - src
+ * output:
+ * MSG0 - first message block
+ * MSG1 - second message block
+ * changed:
+ * T0
+ * %r8
+ * %r9
+ */
+__load_partial:
+ xor %r9, %r9
+ pxor MSG0, MSG0
+ pxor MSG1, MSG1
+
+ mov LEN, %r8
+ and $0x1, %r8
+ jz .Lld_partial_1
+
+ mov LEN, %r8
+ and $0x1E, %r8
+ add SRC, %r8
+ mov (%r8), %r9b
+
+.Lld_partial_1:
+ mov LEN, %r8
+ and $0x2, %r8
+ jz .Lld_partial_2
+
+ mov LEN, %r8
+ and $0x1C, %r8
+ add SRC, %r8
+ shl $0x10, %r9
+ mov (%r8), %r9w
+
+.Lld_partial_2:
+ mov LEN, %r8
+ and $0x4, %r8
+ jz .Lld_partial_4
+
+ mov LEN, %r8
+ and $0x18, %r8
+ add SRC, %r8
+ shl $32, %r9
+ mov (%r8), %r8d
+ xor %r8, %r9
+
+.Lld_partial_4:
+ movq %r9, MSG0
+
+ mov LEN, %r8
+ and $0x8, %r8
+ jz .Lld_partial_8
+
+ mov LEN, %r8
+ and $0x10, %r8
+ add SRC, %r8
+ pslldq $8, MSG0
+ movq (%r8), T0
+ pxor T0, MSG0
+
+.Lld_partial_8:
+ mov LEN, %r8
+ and $0x10, %r8
+ jz .Lld_partial_16
+
+ movdqa MSG0, MSG1
+ movdqu (SRC), MSG0
+
+.Lld_partial_16:
+ ret
+ENDPROC(__load_partial)
+
+/*
+ * __store_partial: internal ABI
+ * input:
+ * LEN - bytes
+ * DST - dst
+ * output:
+ * T0 - first message block
+ * T1 - second message block
+ * changed:
+ * %r8
+ * %r9
+ * %r10
+ */
+__store_partial:
+ mov LEN, %r8
+ mov DST, %r9
+
+ cmp $16, %r8
+ jl .Lst_partial_16
+
+ movdqu T0, (%r9)
+ movdqa T1, T0
+
+ sub $16, %r8
+ add $16, %r9
+
+.Lst_partial_16:
+ movq T0, %r10
+
+ cmp $8, %r8
+ jl .Lst_partial_8
+
+ mov %r10, (%r9)
+ psrldq $8, T0
+ movq T0, %r10
+
+ sub $8, %r8
+ add $8, %r9
+
+.Lst_partial_8:
+ cmp $4, %r8
+ jl .Lst_partial_4
+
+ mov %r10d, (%r9)
+ shr $32, %r10
+
+ sub $4, %r8
+ add $4, %r9
+
+.Lst_partial_4:
+ cmp $2, %r8
+ jl .Lst_partial_2
+
+ mov %r10w, (%r9)
+ shr $0x10, %r10
+
+ sub $2, %r8
+ add $2, %r9
+
+.Lst_partial_2:
+ cmp $1, %r8
+ jl .Lst_partial_1
+
+ mov %r10b, (%r9)
+
+.Lst_partial_1:
+ ret
+ENDPROC(__store_partial)
+
+.macro update
+ movdqa STATE7, T0
+ aesenc STATE0, STATE7
+ aesenc STATE1, STATE0
+ aesenc STATE2, STATE1
+ aesenc STATE3, STATE2
+ aesenc STATE4, STATE3
+ aesenc STATE5, STATE4
+ aesenc STATE6, STATE5
+ aesenc T0, STATE6
+.endm
+
+.macro update0
+ update
+ pxor MSG0, STATE7
+ pxor MSG1, STATE3
+.endm
+
+.macro update1
+ update
+ pxor MSG0, STATE6
+ pxor MSG1, STATE2
+.endm
+
+.macro update2
+ update
+ pxor MSG0, STATE5
+ pxor MSG1, STATE1
+.endm
+
+.macro update3
+ update
+ pxor MSG0, STATE4
+ pxor MSG1, STATE0
+.endm
+
+.macro update4
+ update
+ pxor MSG0, STATE3
+ pxor MSG1, STATE7
+.endm
+
+.macro update5
+ update
+ pxor MSG0, STATE2
+ pxor MSG1, STATE6
+.endm
+
+.macro update6
+ update
+ pxor MSG0, STATE1
+ pxor MSG1, STATE5
+.endm
+
+.macro update7
+ update
+ pxor MSG0, STATE0
+ pxor MSG1, STATE4
+.endm
+
+.macro state_load
+ movdqu 0x00(STATEP), STATE0
+ movdqu 0x10(STATEP), STATE1
+ movdqu 0x20(STATEP), STATE2
+ movdqu 0x30(STATEP), STATE3
+ movdqu 0x40(STATEP), STATE4
+ movdqu 0x50(STATEP), STATE5
+ movdqu 0x60(STATEP), STATE6
+ movdqu 0x70(STATEP), STATE7
+.endm
+
+.macro state_store s0 s1 s2 s3 s4 s5 s6 s7
+ movdqu \s7, 0x00(STATEP)
+ movdqu \s0, 0x10(STATEP)
+ movdqu \s1, 0x20(STATEP)
+ movdqu \s2, 0x30(STATEP)
+ movdqu \s3, 0x40(STATEP)
+ movdqu \s4, 0x50(STATEP)
+ movdqu \s5, 0x60(STATEP)
+ movdqu \s6, 0x70(STATEP)
+.endm
+
+.macro state_store0
+ state_store STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7
+.endm
+
+.macro state_store1
+ state_store STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6
+.endm
+
+.macro state_store2
+ state_store STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5
+.endm
+
+.macro state_store3
+ state_store STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4
+.endm
+
+.macro state_store4
+ state_store STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3
+.endm
+
+.macro state_store5
+ state_store STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2
+.endm
+
+.macro state_store6
+ state_store STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1
+.endm
+
+.macro state_store7
+ state_store STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0
+.endm
+
+/*
+ * void crypto_aegis128l_aesni_init(void *state, const void *key, const void *iv);
+ */
+ENTRY(crypto_aegis128l_aesni_init)
+ FRAME_BEGIN
+
+ /* load key: */
+ movdqa (%rsi), MSG1
+ movdqa MSG1, STATE0
+ movdqa MSG1, STATE4
+ movdqa MSG1, STATE5
+ movdqa MSG1, STATE6
+ movdqa MSG1, STATE7
+
+ /* load IV: */
+ movdqu (%rdx), MSG0
+ pxor MSG0, STATE0
+ pxor MSG0, STATE4
+
+ /* load the constants: */
+ movdqa .Laegis128l_const_0, STATE2
+ movdqa .Laegis128l_const_1, STATE1
+ movdqa STATE1, STATE3
+ pxor STATE2, STATE5
+ pxor STATE1, STATE6
+ pxor STATE2, STATE7
+
+ /* update 10 times with IV and KEY: */
+ update0
+ update1
+ update2
+ update3
+ update4
+ update5
+ update6
+ update7
+ update0
+ update1
+
+ state_store1
+
+ FRAME_END
+ ret
+ENDPROC(crypto_aegis128l_aesni_init)
+
+.macro ad_block a i
+ movdq\a (\i * 0x20 + 0x00)(SRC), MSG0
+ movdq\a (\i * 0x20 + 0x10)(SRC), MSG1
+ update\i
+ sub $0x20, LEN
+ cmp $0x20, LEN
+ jl .Lad_out_\i
+.endm
+
+/*
+ * void crypto_aegis128l_aesni_ad(void *state, unsigned int length,
+ * const void *data);
+ */
+ENTRY(crypto_aegis128l_aesni_ad)
+ FRAME_BEGIN
+
+ cmp $0x20, LEN
+ jb .Lad_out
+
+ state_load
+
+ mov SRC, %r8
+ and $0xf, %r8
+ jnz .Lad_u_loop
+
+.align 8
+.Lad_a_loop:
+ ad_block a 0
+ ad_block a 1
+ ad_block a 2
+ ad_block a 3
+ ad_block a 4
+ ad_block a 5
+ ad_block a 6
+ ad_block a 7
+
+ add $0x100, SRC
+ jmp .Lad_a_loop
+
+.align 8
+.Lad_u_loop:
+ ad_block u 0
+ ad_block u 1
+ ad_block u 2
+ ad_block u 3
+ ad_block u 4
+ ad_block u 5
+ ad_block u 6
+ ad_block u 7
+
+ add $0x100, SRC
+ jmp .Lad_u_loop
+
+.Lad_out_0:
+ state_store0
+ FRAME_END
+ ret
+
+.Lad_out_1:
+ state_store1
+ FRAME_END
+ ret
+
+.Lad_out_2:
+ state_store2
+ FRAME_END
+ ret
+
+.Lad_out_3:
+ state_store3
+ FRAME_END
+ ret
+
+.Lad_out_4:
+ state_store4
+ FRAME_END
+ ret
+
+.Lad_out_5:
+ state_store5
+ FRAME_END
+ ret
+
+.Lad_out_6:
+ state_store6
+ FRAME_END
+ ret
+
+.Lad_out_7:
+ state_store7
+ FRAME_END
+ ret
+
+.Lad_out:
+ FRAME_END
+ ret
+ENDPROC(crypto_aegis128l_aesni_ad)
+
+.macro crypt m0 m1 s0 s1 s2 s3 s4 s5 s6 s7
+ pxor \s1, \m0
+ pxor \s6, \m0
+ movdqa \s2, T3
+ pand \s3, T3
+ pxor T3, \m0
+
+ pxor \s2, \m1
+ pxor \s5, \m1
+ movdqa \s6, T3
+ pand \s7, T3
+ pxor T3, \m1
+.endm
+
+.macro crypt0 m0 m1
+ crypt \m0 \m1 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7
+.endm
+
+.macro crypt1 m0 m1
+ crypt \m0 \m1 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6
+.endm
+
+.macro crypt2 m0 m1
+ crypt \m0 \m1 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5
+.endm
+
+.macro crypt3 m0 m1
+ crypt \m0 \m1 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4
+.endm
+
+.macro crypt4 m0 m1
+ crypt \m0 \m1 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3
+.endm
+
+.macro crypt5 m0 m1
+ crypt \m0 \m1 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2
+.endm
+
+.macro crypt6 m0 m1
+ crypt \m0 \m1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1
+.endm
+
+.macro crypt7 m0 m1
+ crypt \m0 \m1 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0
+.endm
+
+.macro encrypt_block a i
+ movdq\a (\i * 0x20 + 0x00)(SRC), MSG0
+ movdq\a (\i * 0x20 + 0x10)(SRC), MSG1
+ movdqa MSG0, T0
+ movdqa MSG1, T1
+ crypt\i T0, T1
+ movdq\a T0, (\i * 0x20 + 0x00)(DST)
+ movdq\a T1, (\i * 0x20 + 0x10)(DST)
+
+ update\i
+
+ sub $0x20, LEN
+ cmp $0x20, LEN
+ jl .Lenc_out_\i
+.endm
+
+.macro decrypt_block a i
+ movdq\a (\i * 0x20 + 0x00)(SRC), MSG0
+ movdq\a (\i * 0x20 + 0x10)(SRC), MSG1
+ crypt\i MSG0, MSG1
+ movdq\a MSG0, (\i * 0x20 + 0x00)(DST)
+ movdq\a MSG1, (\i * 0x20 + 0x10)(DST)
+
+ update\i
+
+ sub $0x20, LEN
+ cmp $0x20, LEN
+ jl .Ldec_out_\i
+.endm
+
+/*
+ * void crypto_aegis128l_aesni_enc(void *state, unsigned int length,
+ * const void *src, void *dst);
+ */
+ENTRY(crypto_aegis128l_aesni_enc)
+ FRAME_BEGIN
+
+ cmp $0x20, LEN
+ jb .Lenc_out
+
+ state_load
+
+ mov SRC, %r8
+ or DST, %r8
+ and $0xf, %r8
+ jnz .Lenc_u_loop
+
+.align 8
+.Lenc_a_loop:
+ encrypt_block a 0
+ encrypt_block a 1
+ encrypt_block a 2
+ encrypt_block a 3
+ encrypt_block a 4
+ encrypt_block a 5
+ encrypt_block a 6
+ encrypt_block a 7
+
+ add $0x100, SRC
+ add $0x100, DST
+ jmp .Lenc_a_loop
+
+.align 8
+.Lenc_u_loop:
+ encrypt_block u 0
+ encrypt_block u 1
+ encrypt_block u 2
+ encrypt_block u 3
+ encrypt_block u 4
+ encrypt_block u 5
+ encrypt_block u 6
+ encrypt_block u 7
+
+ add $0x100, SRC
+ add $0x100, DST
+ jmp .Lenc_u_loop
+
+.Lenc_out_0:
+ state_store0
+ FRAME_END
+ ret
+
+.Lenc_out_1:
+ state_store1
+ FRAME_END
+ ret
+
+.Lenc_out_2:
+ state_store2
+ FRAME_END
+ ret
+
+.Lenc_out_3:
+ state_store3
+ FRAME_END
+ ret
+
+.Lenc_out_4:
+ state_store4
+ FRAME_END
+ ret
+
+.Lenc_out_5:
+ state_store5
+ FRAME_END
+ ret
+
+.Lenc_out_6:
+ state_store6
+ FRAME_END
+ ret
+
+.Lenc_out_7:
+ state_store7
+ FRAME_END
+ ret
+
+.Lenc_out:
+ FRAME_END
+ ret
+ENDPROC(crypto_aegis128l_aesni_enc)
+
+/*
+ * void crypto_aegis128l_aesni_enc_tail(void *state, unsigned int length,
+ * const void *src, void *dst);
+ */
+ENTRY(crypto_aegis128l_aesni_enc_tail)
+ FRAME_BEGIN
+
+ state_load
+
+ /* encrypt message: */
+ call __load_partial
+
+ movdqa MSG0, T0
+ movdqa MSG1, T1
+ crypt0 T0, T1
+
+ call __store_partial
+
+ update0
+
+ state_store0
+
+ FRAME_END
+ENDPROC(crypto_aegis128l_aesni_enc_tail)
+
+/*
+ * void crypto_aegis128l_aesni_dec(void *state, unsigned int length,
+ * const void *src, void *dst);
+ */
+ENTRY(crypto_aegis128l_aesni_dec)
+ FRAME_BEGIN
+
+ cmp $0x20, LEN
+ jb .Ldec_out
+
+ state_load
+
+ mov SRC, %r8
+ or DST, %r8
+ and $0xF, %r8
+ jnz .Ldec_u_loop
+
+.align 8
+.Ldec_a_loop:
+ decrypt_block a 0
+ decrypt_block a 1
+ decrypt_block a 2
+ decrypt_block a 3
+ decrypt_block a 4
+ decrypt_block a 5
+ decrypt_block a 6
+ decrypt_block a 7
+
+ add $0x100, SRC
+ add $0x100, DST
+ jmp .Ldec_a_loop
+
+.align 8
+.Ldec_u_loop:
+ decrypt_block u 0
+ decrypt_block u 1
+ decrypt_block u 2
+ decrypt_block u 3
+ decrypt_block u 4
+ decrypt_block u 5
+ decrypt_block u 6
+ decrypt_block u 7
+
+ add $0x100, SRC
+ add $0x100, DST
+ jmp .Ldec_u_loop
+
+.Ldec_out_0:
+ state_store0
+ FRAME_END
+ ret
+
+.Ldec_out_1:
+ state_store1
+ FRAME_END
+ ret
+
+.Ldec_out_2:
+ state_store2
+ FRAME_END
+ ret
+
+.Ldec_out_3:
+ state_store3
+ FRAME_END
+ ret
+
+.Ldec_out_4:
+ state_store4
+ FRAME_END
+ ret
+
+.Ldec_out_5:
+ state_store5
+ FRAME_END
+ ret
+
+.Ldec_out_6:
+ state_store6
+ FRAME_END
+ ret
+
+.Ldec_out_7:
+ state_store7
+ FRAME_END
+ ret
+
+.Ldec_out:
+ FRAME_END
+ ret
+ENDPROC(crypto_aegis128l_aesni_dec)
+
+/*
+ * void crypto_aegis128l_aesni_dec_tail(void *state, unsigned int length,
+ * const void *src, void *dst);
+ */
+ENTRY(crypto_aegis128l_aesni_dec_tail)
+ FRAME_BEGIN
+
+ state_load
+
+ /* decrypt message: */
+ call __load_partial
+
+ crypt0 MSG0, MSG1
+
+ movdqa MSG0, T0
+ movdqa MSG1, T1
+ call __store_partial
+
+ /* mask with byte count: */
+ movq LEN, T0
+ punpcklbw T0, T0
+ punpcklbw T0, T0
+ punpcklbw T0, T0
+ punpcklbw T0, T0
+ movdqa T0, T1
+ movdqa .Laegis128l_counter0, T2
+ movdqa .Laegis128l_counter1, T3
+ pcmpgtb T2, T0
+ pcmpgtb T3, T1
+ pand T0, MSG0
+ pand T1, MSG1
+
+ update0
+
+ state_store0
+
+ FRAME_END
+ ret
+ENDPROC(crypto_aegis128l_aesni_dec_tail)
+
+/*
+ * void crypto_aegis128l_aesni_final(void *state, void *tag_xor,
+ * u64 assoclen, u64 cryptlen);
+ */
+ENTRY(crypto_aegis128l_aesni_final)
+ FRAME_BEGIN
+
+ state_load
+
+ /* prepare length block: */
+ movq %rdx, MSG0
+ movq %rcx, T0
+ pslldq $8, T0
+ pxor T0, MSG0
+ psllq $3, MSG0 /* multiply by 8 (to get bit count) */
+
+ pxor STATE2, MSG0
+ movdqa MSG0, MSG1
+
+ /* update state: */
+ update0
+ update1
+ update2
+ update3
+ update4
+ update5
+ update6
+
+ /* xor tag: */
+ movdqu (%rsi), T0
+
+ pxor STATE1, T0
+ pxor STATE2, T0
+ pxor STATE3, T0
+ pxor STATE4, T0
+ pxor STATE5, T0
+ pxor STATE6, T0
+ pxor STATE7, T0
+
+ movdqu T0, (%rsi)
+
+ FRAME_END
+ ret
+ENDPROC(crypto_aegis128l_aesni_final)
diff --git a/arch/x86/crypto/aegis128l-aesni-glue.c b/arch/x86/crypto/aegis128l-aesni-glue.c
new file mode 100644
index 000000000000..876e4866e633
--- /dev/null
+++ b/arch/x86/crypto/aegis128l-aesni-glue.c
@@ -0,0 +1,407 @@
+/*
+ * The AEGIS-128L Authenticated-Encryption Algorithm
+ * Glue for AES-NI + SSE2 implementation
+ *
+ * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
+ * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <crypto/cryptd.h>
+#include <crypto/internal/aead.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
+#include <linux/module.h>
+#include <asm/fpu/api.h>
+#include <asm/cpu_device_id.h>
+
+#define AEGIS128L_BLOCK_ALIGN 16
+#define AEGIS128L_BLOCK_SIZE 32
+#define AEGIS128L_NONCE_SIZE 16
+#define AEGIS128L_STATE_BLOCKS 8
+#define AEGIS128L_KEY_SIZE 16
+#define AEGIS128L_MIN_AUTH_SIZE 8
+#define AEGIS128L_MAX_AUTH_SIZE 16
+
+asmlinkage void crypto_aegis128l_aesni_init(void *state, void *key, void *iv);
+
+asmlinkage void crypto_aegis128l_aesni_ad(
+ void *state, unsigned int length, const void *data);
+
+asmlinkage void crypto_aegis128l_aesni_enc(
+ void *state, unsigned int length, const void *src, void *dst);
+
+asmlinkage void crypto_aegis128l_aesni_dec(
+ void *state, unsigned int length, const void *src, void *dst);
+
+asmlinkage void crypto_aegis128l_aesni_enc_tail(
+ void *state, unsigned int length, const void *src, void *dst);
+
+asmlinkage void crypto_aegis128l_aesni_dec_tail(
+ void *state, unsigned int length, const void *src, void *dst);
+
+asmlinkage void crypto_aegis128l_aesni_final(
+ void *state, void *tag_xor, unsigned int cryptlen,
+ unsigned int assoclen);
+
+struct aegis_block {
+ u8 bytes[AEGIS128L_BLOCK_SIZE] __aligned(AEGIS128L_BLOCK_ALIGN);
+};
+
+struct aegis_state {
+ struct aegis_block blocks[AEGIS128L_STATE_BLOCKS];
+};
+
+struct aegis_ctx {
+ struct aegis_block key;
+};
+
+struct aegis_crypt_ops {
+ int (*skcipher_walk_init)(struct skcipher_walk *walk,
+ struct aead_request *req, bool atomic);
+
+ void (*crypt_blocks)(void *state, unsigned int length, const void *src,
+ void *dst);
+ void (*crypt_tail)(void *state, unsigned int length, const void *src,
+ void *dst);
+};
+
+static void crypto_aegis128l_aesni_process_ad(
+ struct aegis_state *state, struct scatterlist *sg_src,
+ unsigned int assoclen)
+{
+ struct scatter_walk walk;
+ struct aegis_block buf;
+ unsigned int pos = 0;
+
+ scatterwalk_start(&walk, sg_src);
+ while (assoclen != 0) {
+ unsigned int size = scatterwalk_clamp(&walk, assoclen);
+ unsigned int left = size;
+ void *mapped = scatterwalk_map(&walk);
+ const u8 *src = (const u8 *)mapped;
+
+ if (pos + size >= AEGIS128L_BLOCK_SIZE) {
+ if (pos > 0) {
+ unsigned int fill = AEGIS128L_BLOCK_SIZE - pos;
+ memcpy(buf.bytes + pos, src, fill);
+ crypto_aegis128l_aesni_ad(state,
+ AEGIS128L_BLOCK_SIZE,
+ buf.bytes);
+ pos = 0;
+ left -= fill;
+ src += fill;
+ }
+
+ crypto_aegis128l_aesni_ad(state, left, src);
+
+ src += left & ~(AEGIS128L_BLOCK_SIZE - 1);
+ left &= AEGIS128L_BLOCK_SIZE - 1;
+ }
+
+ memcpy(buf.bytes + pos, src, left);
+ pos += left;
+ assoclen -= size;
+
+ scatterwalk_unmap(mapped);
+ scatterwalk_advance(&walk, size);
+ scatterwalk_done(&walk, 0, assoclen);
+ }
+
+ if (pos > 0) {
+ memset(buf.bytes + pos, 0, AEGIS128L_BLOCK_SIZE - pos);
+ crypto_aegis128l_aesni_ad(state, AEGIS128L_BLOCK_SIZE, buf.bytes);
+ }
+}
+
+static void crypto_aegis128l_aesni_process_crypt(
+ struct aegis_state *state, struct aead_request *req,
+ const struct aegis_crypt_ops *ops)
+{
+ struct skcipher_walk walk;
+ u8 *src, *dst;
+ unsigned int chunksize, base;
+
+ ops->skcipher_walk_init(&walk, req, false);
+
+ while (walk.nbytes) {
+ src = walk.src.virt.addr;
+ dst = walk.dst.virt.addr;
+ chunksize = walk.nbytes;
+
+ ops->crypt_blocks(state, chunksize, src, dst);
+
+ base = chunksize & ~(AEGIS128L_BLOCK_SIZE - 1);
+ src += base;
+ dst += base;
+ chunksize &= AEGIS128L_BLOCK_SIZE - 1;
+
+ if (chunksize > 0)
+ ops->crypt_tail(state, chunksize, src, dst);
+
+ skcipher_walk_done(&walk, 0);
+ }
+}
+
+static struct aegis_ctx *crypto_aegis128l_aesni_ctx(struct crypto_aead *aead)
+{
+ u8 *ctx = crypto_aead_ctx(aead);
+ ctx = PTR_ALIGN(ctx, __alignof__(struct aegis_ctx));
+ return (void *)ctx;
+}
+
+static int crypto_aegis128l_aesni_setkey(struct crypto_aead *aead,
+ const u8 *key, unsigned int keylen)
+{
+ struct aegis_ctx *ctx = crypto_aegis128l_aesni_ctx(aead);
+
+ if (keylen != AEGIS128L_KEY_SIZE) {
+ crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+ return -EINVAL;
+ }
+
+ memcpy(ctx->key.bytes, key, AEGIS128L_KEY_SIZE);
+
+ return 0;
+}
+
+static int crypto_aegis128l_aesni_setauthsize(struct crypto_aead *tfm,
+ unsigned int authsize)
+{
+ if (authsize > AEGIS128L_MAX_AUTH_SIZE)
+ return -EINVAL;
+ if (authsize < AEGIS128L_MIN_AUTH_SIZE)
+ return -EINVAL;
+ return 0;
+}
+
+static void crypto_aegis128l_aesni_crypt(struct aead_request *req,
+ struct aegis_block *tag_xor,
+ unsigned int cryptlen,
+ const struct aegis_crypt_ops *ops)
+{
+ struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+ struct aegis_ctx *ctx = crypto_aegis128l_aesni_ctx(tfm);
+ struct aegis_state state;
+
+ kernel_fpu_begin();
+
+ crypto_aegis128l_aesni_init(&state, ctx->key.bytes, req->iv);
+ crypto_aegis128l_aesni_process_ad(&state, req->src, req->assoclen);
+ crypto_aegis128l_aesni_process_crypt(&state, req, ops);
+ crypto_aegis128l_aesni_final(&state, tag_xor, req->assoclen, cryptlen);
+
+ kernel_fpu_end();
+}
+
+static int crypto_aegis128l_aesni_encrypt(struct aead_request *req)
+{
+ static const struct aegis_crypt_ops OPS = {
+ .skcipher_walk_init = skcipher_walk_aead_encrypt,
+ .crypt_blocks = crypto_aegis128l_aesni_enc,
+ .crypt_tail = crypto_aegis128l_aesni_enc_tail,
+ };
+
+ struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+ struct aegis_block tag = {};
+ unsigned int authsize = crypto_aead_authsize(tfm);
+ unsigned int cryptlen = req->cryptlen;
+
+ crypto_aegis128l_aesni_crypt(req, &tag, cryptlen, &OPS);
+
+ scatterwalk_map_and_copy(tag.bytes, req->dst,
+ req->assoclen + cryptlen, authsize, 1);
+ return 0;
+}
+
+static int crypto_aegis128l_aesni_decrypt(struct aead_request *req)
+{
+ static const struct aegis_block zeros = {};
+
+ static const struct aegis_crypt_ops OPS = {
+ .skcipher_walk_init = skcipher_walk_aead_decrypt,
+ .crypt_blocks = crypto_aegis128l_aesni_dec,
+ .crypt_tail = crypto_aegis128l_aesni_dec_tail,
+ };
+
+ struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+ struct aegis_block tag;
+ unsigned int authsize = crypto_aead_authsize(tfm);
+ unsigned int cryptlen = req->cryptlen - authsize;
+
+ scatterwalk_map_and_copy(tag.bytes, req->src,
+ req->assoclen + cryptlen, authsize, 0);
+
+ crypto_aegis128l_aesni_crypt(req, &tag, cryptlen, &OPS);
+
+ return crypto_memneq(tag.bytes, zeros.bytes, authsize) ? -EBADMSG : 0;
+}
+
+static int crypto_aegis128l_aesni_init_tfm(struct crypto_aead *aead)
+{
+ return 0;
+}
+
+static void crypto_aegis128l_aesni_exit_tfm(struct crypto_aead *aead)
+{
+}
+
+static int cryptd_aegis128l_aesni_setkey(struct crypto_aead *aead,
+ const u8 *key, unsigned int keylen)
+{
+ struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+ struct cryptd_aead *cryptd_tfm = *ctx;
+
+ return crypto_aead_setkey(&cryptd_tfm->base, key, keylen);
+}
+
+static int cryptd_aegis128l_aesni_setauthsize(struct crypto_aead *aead,
+ unsigned int authsize)
+{
+ struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+ struct cryptd_aead *cryptd_tfm = *ctx;
+
+ return crypto_aead_setauthsize(&cryptd_tfm->base, authsize);
+}
+
+static int cryptd_aegis128l_aesni_encrypt(struct aead_request *req)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+ struct cryptd_aead *cryptd_tfm = *ctx;
+
+ aead = &cryptd_tfm->base;
+ if (irq_fpu_usable() && (!in_atomic() ||
+ !cryptd_aead_queued(cryptd_tfm)))
+ aead = cryptd_aead_child(cryptd_tfm);
+
+ aead_request_set_tfm(req, aead);
+
+ return crypto_aead_encrypt(req);
+}
+
+static int cryptd_aegis128l_aesni_decrypt(struct aead_request *req)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+ struct cryptd_aead *cryptd_tfm = *ctx;
+
+ aead = &cryptd_tfm->base;
+ if (irq_fpu_usable() && (!in_atomic() ||
+ !cryptd_aead_queued(cryptd_tfm)))
+ aead = cryptd_aead_child(cryptd_tfm);
+
+ aead_request_set_tfm(req, aead);
+
+ return crypto_aead_decrypt(req);
+}
+
+static int cryptd_aegis128l_aesni_init_tfm(struct crypto_aead *aead)
+{
+ struct cryptd_aead *cryptd_tfm;
+ struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+
+ cryptd_tfm = cryptd_alloc_aead("__aegis128l-aesni", CRYPTO_ALG_INTERNAL,
+ CRYPTO_ALG_INTERNAL);
+ if (IS_ERR(cryptd_tfm))
+ return PTR_ERR(cryptd_tfm);
+
+ *ctx = cryptd_tfm;
+ crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base));
+ return 0;
+}
+
+static void cryptd_aegis128l_aesni_exit_tfm(struct crypto_aead *aead)
+{
+ struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+
+ cryptd_free_aead(*ctx);
+}
+
+static struct aead_alg crypto_aegis128l_aesni_alg[] = {
+ {
+ .setkey = crypto_aegis128l_aesni_setkey,
+ .setauthsize = crypto_aegis128l_aesni_setauthsize,
+ .encrypt = crypto_aegis128l_aesni_encrypt,
+ .decrypt = crypto_aegis128l_aesni_decrypt,
+ .init = crypto_aegis128l_aesni_init_tfm,
+ .exit = crypto_aegis128l_aesni_exit_tfm,
+
+ .ivsize = AEGIS128L_NONCE_SIZE,
+ .maxauthsize = AEGIS128L_MAX_AUTH_SIZE,
+ .chunksize = AEGIS128L_BLOCK_SIZE,
+
+ .base = {
+ .cra_flags = CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct aegis_ctx) +
+ __alignof__(struct aegis_ctx),
+ .cra_alignmask = 0,
+
+ .cra_name = "__aegis128l",
+ .cra_driver_name = "__aegis128l-aesni",
+
+ .cra_module = THIS_MODULE,
+ }
+ }, {
+ .setkey = cryptd_aegis128l_aesni_setkey,
+ .setauthsize = cryptd_aegis128l_aesni_setauthsize,
+ .encrypt = cryptd_aegis128l_aesni_encrypt,
+ .decrypt = cryptd_aegis128l_aesni_decrypt,
+ .init = cryptd_aegis128l_aesni_init_tfm,
+ .exit = cryptd_aegis128l_aesni_exit_tfm,
+
+ .ivsize = AEGIS128L_NONCE_SIZE,
+ .maxauthsize = AEGIS128L_MAX_AUTH_SIZE,
+ .chunksize = AEGIS128L_BLOCK_SIZE,
+
+ .base = {
+ .cra_flags = CRYPTO_ALG_ASYNC,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct cryptd_aead *),
+ .cra_alignmask = 0,
+
+ .cra_priority = 400,
+
+ .cra_name = "aegis128l",
+ .cra_driver_name = "aegis128l-aesni",
+
+ .cra_module = THIS_MODULE,
+ }
+ }
+};
+
+static const struct x86_cpu_id aesni_cpu_id[] = {
+ X86_FEATURE_MATCH(X86_FEATURE_AES),
+ X86_FEATURE_MATCH(X86_FEATURE_XMM2),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
+
+static int __init crypto_aegis128l_aesni_module_init(void)
+{
+ if (!x86_match_cpu(aesni_cpu_id))
+ return -ENODEV;
+
+ return crypto_register_aeads(crypto_aegis128l_aesni_alg,
+ ARRAY_SIZE(crypto_aegis128l_aesni_alg));
+}
+
+static void __exit crypto_aegis128l_aesni_module_exit(void)
+{
+ crypto_unregister_aeads(crypto_aegis128l_aesni_alg,
+ ARRAY_SIZE(crypto_aegis128l_aesni_alg));
+}
+
+module_init(crypto_aegis128l_aesni_module_init);
+module_exit(crypto_aegis128l_aesni_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
+MODULE_DESCRIPTION("AEGIS-128L AEAD algorithm -- AESNI+SSE2 implementation");
+MODULE_ALIAS_CRYPTO("aegis128l");
+MODULE_ALIAS_CRYPTO("aegis128l-aesni");
diff --git a/arch/x86/crypto/aegis256-aesni-asm.S b/arch/x86/crypto/aegis256-aesni-asm.S
new file mode 100644
index 000000000000..1d977d515bf9
--- /dev/null
+++ b/arch/x86/crypto/aegis256-aesni-asm.S
@@ -0,0 +1,702 @@
+/*
+ * AES-NI + SSE2 implementation of AEGIS-128L
+ *
+ * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
+ * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/frame.h>
+
+#define STATE0 %xmm0
+#define STATE1 %xmm1
+#define STATE2 %xmm2
+#define STATE3 %xmm3
+#define STATE4 %xmm4
+#define STATE5 %xmm5
+#define MSG %xmm6
+#define T0 %xmm7
+#define T1 %xmm8
+#define T2 %xmm9
+#define T3 %xmm10
+
+#define STATEP %rdi
+#define LEN %rsi
+#define SRC %rdx
+#define DST %rcx
+
+.section .rodata.cst16.aegis256_const, "aM", @progbits, 32
+.align 16
+.Laegis256_const_0:
+ .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
+ .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
+.Laegis256_const_1:
+ .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
+ .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
+
+.section .rodata.cst16.aegis256_counter, "aM", @progbits, 16
+.align 16
+.Laegis256_counter:
+ .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
+ .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
+
+.text
+
+/*
+ * __load_partial: internal ABI
+ * input:
+ * LEN - bytes
+ * SRC - src
+ * output:
+ * MSG - message block
+ * changed:
+ * T0
+ * %r8
+ * %r9
+ */
+__load_partial:
+ xor %r9, %r9
+ pxor MSG, MSG
+
+ mov LEN, %r8
+ and $0x1, %r8
+ jz .Lld_partial_1
+
+ mov LEN, %r8
+ and $0x1E, %r8
+ add SRC, %r8
+ mov (%r8), %r9b
+
+.Lld_partial_1:
+ mov LEN, %r8
+ and $0x2, %r8
+ jz .Lld_partial_2
+
+ mov LEN, %r8
+ and $0x1C, %r8
+ add SRC, %r8
+ shl $0x10, %r9
+ mov (%r8), %r9w
+
+.Lld_partial_2:
+ mov LEN, %r8
+ and $0x4, %r8
+ jz .Lld_partial_4
+
+ mov LEN, %r8
+ and $0x18, %r8
+ add SRC, %r8
+ shl $32, %r9
+ mov (%r8), %r8d
+ xor %r8, %r9
+
+.Lld_partial_4:
+ movq %r9, MSG
+
+ mov LEN, %r8
+ and $0x8, %r8
+ jz .Lld_partial_8
+
+ mov LEN, %r8
+ and $0x10, %r8
+ add SRC, %r8
+ pslldq $8, MSG
+ movq (%r8), T0
+ pxor T0, MSG
+
+.Lld_partial_8:
+ ret
+ENDPROC(__load_partial)
+
+/*
+ * __store_partial: internal ABI
+ * input:
+ * LEN - bytes
+ * DST - dst
+ * output:
+ * T0 - message block
+ * changed:
+ * %r8
+ * %r9
+ * %r10
+ */
+__store_partial:
+ mov LEN, %r8
+ mov DST, %r9
+
+ movq T0, %r10
+
+ cmp $8, %r8
+ jl .Lst_partial_8
+
+ mov %r10, (%r9)
+ psrldq $8, T0
+ movq T0, %r10
+
+ sub $8, %r8
+ add $8, %r9
+
+.Lst_partial_8:
+ cmp $4, %r8
+ jl .Lst_partial_4
+
+ mov %r10d, (%r9)
+ shr $32, %r10
+
+ sub $4, %r8
+ add $4, %r9
+
+.Lst_partial_4:
+ cmp $2, %r8
+ jl .Lst_partial_2
+
+ mov %r10w, (%r9)
+ shr $0x10, %r10
+
+ sub $2, %r8
+ add $2, %r9
+
+.Lst_partial_2:
+ cmp $1, %r8
+ jl .Lst_partial_1
+
+ mov %r10b, (%r9)
+
+.Lst_partial_1:
+ ret
+ENDPROC(__store_partial)
+
+.macro update
+ movdqa STATE5, T0
+ aesenc STATE0, STATE5
+ aesenc STATE1, STATE0
+ aesenc STATE2, STATE1
+ aesenc STATE3, STATE2
+ aesenc STATE4, STATE3
+ aesenc T0, STATE4
+.endm
+
+.macro update0 m
+ update
+ pxor \m, STATE5
+.endm
+
+.macro update1 m
+ update
+ pxor \m, STATE4
+.endm
+
+.macro update2 m
+ update
+ pxor \m, STATE3
+.endm
+
+.macro update3 m
+ update
+ pxor \m, STATE2
+.endm
+
+.macro update4 m
+ update
+ pxor \m, STATE1
+.endm
+
+.macro update5 m
+ update
+ pxor \m, STATE0
+.endm
+
+.macro state_load
+ movdqu 0x00(STATEP), STATE0
+ movdqu 0x10(STATEP), STATE1
+ movdqu 0x20(STATEP), STATE2
+ movdqu 0x30(STATEP), STATE3
+ movdqu 0x40(STATEP), STATE4
+ movdqu 0x50(STATEP), STATE5
+.endm
+
+.macro state_store s0 s1 s2 s3 s4 s5
+ movdqu \s5, 0x00(STATEP)
+ movdqu \s0, 0x10(STATEP)
+ movdqu \s1, 0x20(STATEP)
+ movdqu \s2, 0x30(STATEP)
+ movdqu \s3, 0x40(STATEP)
+ movdqu \s4, 0x50(STATEP)
+.endm
+
+.macro state_store0
+ state_store STATE0 STATE1 STATE2 STATE3 STATE4 STATE5
+.endm
+
+.macro state_store1
+ state_store STATE5 STATE0 STATE1 STATE2 STATE3 STATE4
+.endm
+
+.macro state_store2
+ state_store STATE4 STATE5 STATE0 STATE1 STATE2 STATE3
+.endm
+
+.macro state_store3
+ state_store STATE3 STATE4 STATE5 STATE0 STATE1 STATE2
+.endm
+
+.macro state_store4
+ state_store STATE2 STATE3 STATE4 STATE5 STATE0 STATE1
+.endm
+
+.macro state_store5
+ state_store STATE1 STATE2 STATE3 STATE4 STATE5 STATE0
+.endm
+
+/*
+ * void crypto_aegis256_aesni_init(void *state, const void *key, const void *iv);
+ */
+ENTRY(crypto_aegis256_aesni_init)
+ FRAME_BEGIN
+
+ /* load key: */
+ movdqa 0x00(%rsi), MSG
+ movdqa 0x10(%rsi), T1
+ movdqa MSG, STATE4
+ movdqa T1, STATE5
+
+ /* load IV: */
+ movdqu 0x00(%rdx), T2
+ movdqu 0x10(%rdx), T3
+ pxor MSG, T2
+ pxor T1, T3
+ movdqa T2, STATE0
+ movdqa T3, STATE1
+
+ /* load the constants: */
+ movdqa .Laegis256_const_0, STATE3
+ movdqa .Laegis256_const_1, STATE2
+ pxor STATE3, STATE4
+ pxor STATE2, STATE5
+
+ /* update 10 times with IV and KEY: */
+ update0 MSG
+ update1 T1
+ update2 T2
+ update3 T3
+ update4 MSG
+ update5 T1
+ update0 T2
+ update1 T3
+ update2 MSG
+ update3 T1
+ update4 T2
+ update5 T3
+ update0 MSG
+ update1 T1
+ update2 T2
+ update3 T3
+
+ state_store3
+
+ FRAME_END
+ ret
+ENDPROC(crypto_aegis256_aesni_init)
+
+.macro ad_block a i
+ movdq\a (\i * 0x10)(SRC), MSG
+ update\i MSG
+ sub $0x10, LEN
+ cmp $0x10, LEN
+ jl .Lad_out_\i
+.endm
+
+/*
+ * void crypto_aegis256_aesni_ad(void *state, unsigned int length,
+ * const void *data);
+ */
+ENTRY(crypto_aegis256_aesni_ad)
+ FRAME_BEGIN
+
+ cmp $0x10, LEN
+ jb .Lad_out
+
+ state_load
+
+ mov SRC, %r8
+ and $0xf, %r8
+ jnz .Lad_u_loop
+
+.align 8
+.Lad_a_loop:
+ ad_block a 0
+ ad_block a 1
+ ad_block a 2
+ ad_block a 3
+ ad_block a 4
+ ad_block a 5
+
+ add $0x60, SRC
+ jmp .Lad_a_loop
+
+.align 8
+.Lad_u_loop:
+ ad_block u 0
+ ad_block u 1
+ ad_block u 2
+ ad_block u 3
+ ad_block u 4
+ ad_block u 5
+
+ add $0x60, SRC
+ jmp .Lad_u_loop
+
+.Lad_out_0:
+ state_store0
+ FRAME_END
+ ret
+
+.Lad_out_1:
+ state_store1
+ FRAME_END
+ ret
+
+.Lad_out_2:
+ state_store2
+ FRAME_END
+ ret
+
+.Lad_out_3:
+ state_store3
+ FRAME_END
+ ret
+
+.Lad_out_4:
+ state_store4
+ FRAME_END
+ ret
+
+.Lad_out_5:
+ state_store5
+ FRAME_END
+ ret
+
+.Lad_out:
+ FRAME_END
+ ret
+ENDPROC(crypto_aegis256_aesni_ad)
+
+.macro crypt m s0 s1 s2 s3 s4 s5
+ pxor \s1, \m
+ pxor \s4, \m
+ pxor \s5, \m
+ movdqa \s2, T3
+ pand \s3, T3
+ pxor T3, \m
+.endm
+
+.macro crypt0 m
+ crypt \m STATE0 STATE1 STATE2 STATE3 STATE4 STATE5
+.endm
+
+.macro crypt1 m
+ crypt \m STATE5 STATE0 STATE1 STATE2 STATE3 STATE4
+.endm
+
+.macro crypt2 m
+ crypt \m STATE4 STATE5 STATE0 STATE1 STATE2 STATE3
+.endm
+
+.macro crypt3 m
+ crypt \m STATE3 STATE4 STATE5 STATE0 STATE1 STATE2
+.endm
+
+.macro crypt4 m
+ crypt \m STATE2 STATE3 STATE4 STATE5 STATE0 STATE1
+.endm
+
+.macro crypt5 m
+ crypt \m STATE1 STATE2 STATE3 STATE4 STATE5 STATE0
+.endm
+
+.macro encrypt_block a i
+ movdq\a (\i * 0x10)(SRC), MSG
+ movdqa MSG, T0
+ crypt\i T0
+ movdq\a T0, (\i * 0x10)(DST)
+
+ update\i MSG
+
+ sub $0x10, LEN
+ cmp $0x10, LEN
+ jl .Lenc_out_\i
+.endm
+
+.macro decrypt_block a i
+ movdq\a (\i * 0x10)(SRC), MSG
+ crypt\i MSG
+ movdq\a MSG, (\i * 0x10)(DST)
+
+ update\i MSG
+
+ sub $0x10, LEN
+ cmp $0x10, LEN
+ jl .Ldec_out_\i
+.endm
+
+/*
+ * void crypto_aegis256_aesni_enc(void *state, unsigned int length,
+ * const void *src, void *dst);
+ */
+ENTRY(crypto_aegis256_aesni_enc)
+ FRAME_BEGIN
+
+ cmp $0x10, LEN
+ jb .Lenc_out
+
+ state_load
+
+ mov SRC, %r8
+ or DST, %r8
+ and $0xf, %r8
+ jnz .Lenc_u_loop
+
+.align 8
+.Lenc_a_loop:
+ encrypt_block a 0
+ encrypt_block a 1
+ encrypt_block a 2
+ encrypt_block a 3
+ encrypt_block a 4
+ encrypt_block a 5
+
+ add $0x60, SRC
+ add $0x60, DST
+ jmp .Lenc_a_loop
+
+.align 8
+.Lenc_u_loop:
+ encrypt_block u 0
+ encrypt_block u 1
+ encrypt_block u 2
+ encrypt_block u 3
+ encrypt_block u 4
+ encrypt_block u 5
+
+ add $0x60, SRC
+ add $0x60, DST
+ jmp .Lenc_u_loop
+
+.Lenc_out_0:
+ state_store0
+ FRAME_END
+ ret
+
+.Lenc_out_1:
+ state_store1
+ FRAME_END
+ ret
+
+.Lenc_out_2:
+ state_store2
+ FRAME_END
+ ret
+
+.Lenc_out_3:
+ state_store3
+ FRAME_END
+ ret
+
+.Lenc_out_4:
+ state_store4
+ FRAME_END
+ ret
+
+.Lenc_out_5:
+ state_store5
+ FRAME_END
+ ret
+
+.Lenc_out:
+ FRAME_END
+ ret
+ENDPROC(crypto_aegis256_aesni_enc)
+
+/*
+ * void crypto_aegis256_aesni_enc_tail(void *state, unsigned int length,
+ * const void *src, void *dst);
+ */
+ENTRY(crypto_aegis256_aesni_enc_tail)
+ FRAME_BEGIN
+
+ state_load
+
+ /* encrypt message: */
+ call __load_partial
+
+ movdqa MSG, T0
+ crypt0 T0
+
+ call __store_partial
+
+ update0 MSG
+
+ state_store0
+
+ FRAME_END
+ENDPROC(crypto_aegis256_aesni_enc_tail)
+
+/*
+ * void crypto_aegis256_aesni_dec(void *state, unsigned int length,
+ * const void *src, void *dst);
+ */
+ENTRY(crypto_aegis256_aesni_dec)
+ FRAME_BEGIN
+
+ cmp $0x10, LEN
+ jb .Ldec_out
+
+ state_load
+
+ mov SRC, %r8
+ or DST, %r8
+ and $0xF, %r8
+ jnz .Ldec_u_loop
+
+.align 8
+.Ldec_a_loop:
+ decrypt_block a 0
+ decrypt_block a 1
+ decrypt_block a 2
+ decrypt_block a 3
+ decrypt_block a 4
+ decrypt_block a 5
+
+ add $0x60, SRC
+ add $0x60, DST
+ jmp .Ldec_a_loop
+
+.align 8
+.Ldec_u_loop:
+ decrypt_block u 0
+ decrypt_block u 1
+ decrypt_block u 2
+ decrypt_block u 3
+ decrypt_block u 4
+ decrypt_block u 5
+
+ add $0x60, SRC
+ add $0x60, DST
+ jmp .Ldec_u_loop
+
+.Ldec_out_0:
+ state_store0
+ FRAME_END
+ ret
+
+.Ldec_out_1:
+ state_store1
+ FRAME_END
+ ret
+
+.Ldec_out_2:
+ state_store2
+ FRAME_END
+ ret
+
+.Ldec_out_3:
+ state_store3
+ FRAME_END
+ ret
+
+.Ldec_out_4:
+ state_store4
+ FRAME_END
+ ret
+
+.Ldec_out_5:
+ state_store5
+ FRAME_END
+ ret
+
+.Ldec_out:
+ FRAME_END
+ ret
+ENDPROC(crypto_aegis256_aesni_dec)
+
+/*
+ * void crypto_aegis256_aesni_dec_tail(void *state, unsigned int length,
+ * const void *src, void *dst);
+ */
+ENTRY(crypto_aegis256_aesni_dec_tail)
+ FRAME_BEGIN
+
+ state_load
+
+ /* decrypt message: */
+ call __load_partial
+
+ crypt0 MSG
+
+ movdqa MSG, T0
+ call __store_partial
+
+ /* mask with byte count: */
+ movq LEN, T0
+ punpcklbw T0, T0
+ punpcklbw T0, T0
+ punpcklbw T0, T0
+ punpcklbw T0, T0
+ movdqa .Laegis256_counter, T1
+ pcmpgtb T1, T0
+ pand T0, MSG
+
+ update0 MSG
+
+ state_store0
+
+ FRAME_END
+ ret
+ENDPROC(crypto_aegis256_aesni_dec_tail)
+
+/*
+ * void crypto_aegis256_aesni_final(void *state, void *tag_xor,
+ * u64 assoclen, u64 cryptlen);
+ */
+ENTRY(crypto_aegis256_aesni_final)
+ FRAME_BEGIN
+
+ state_load
+
+ /* prepare length block: */
+ movq %rdx, MSG
+ movq %rcx, T0
+ pslldq $8, T0
+ pxor T0, MSG
+ psllq $3, MSG /* multiply by 8 (to get bit count) */
+
+ pxor STATE3, MSG
+
+ /* update state: */
+ update0 MSG
+ update1 MSG
+ update2 MSG
+ update3 MSG
+ update4 MSG
+ update5 MSG
+ update0 MSG
+
+ /* xor tag: */
+ movdqu (%rsi), MSG
+
+ pxor STATE0, MSG
+ pxor STATE1, MSG
+ pxor STATE2, MSG
+ pxor STATE3, MSG
+ pxor STATE4, MSG
+ pxor STATE5, MSG
+
+ movdqu MSG, (%rsi)
+
+ FRAME_END
+ ret
+ENDPROC(crypto_aegis256_aesni_final)
diff --git a/arch/x86/crypto/aegis256-aesni-glue.c b/arch/x86/crypto/aegis256-aesni-glue.c
new file mode 100644
index 000000000000..2b5dd3af8f4d
--- /dev/null
+++ b/arch/x86/crypto/aegis256-aesni-glue.c
@@ -0,0 +1,407 @@
+/*
+ * The AEGIS-256 Authenticated-Encryption Algorithm
+ * Glue for AES-NI + SSE2 implementation
+ *
+ * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
+ * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <crypto/cryptd.h>
+#include <crypto/internal/aead.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
+#include <linux/module.h>
+#include <asm/fpu/api.h>
+#include <asm/cpu_device_id.h>
+
+#define AEGIS256_BLOCK_ALIGN 16
+#define AEGIS256_BLOCK_SIZE 16
+#define AEGIS256_NONCE_SIZE 32
+#define AEGIS256_STATE_BLOCKS 6
+#define AEGIS256_KEY_SIZE 32
+#define AEGIS256_MIN_AUTH_SIZE 8
+#define AEGIS256_MAX_AUTH_SIZE 16
+
+asmlinkage void crypto_aegis256_aesni_init(void *state, void *key, void *iv);
+
+asmlinkage void crypto_aegis256_aesni_ad(
+ void *state, unsigned int length, const void *data);
+
+asmlinkage void crypto_aegis256_aesni_enc(
+ void *state, unsigned int length, const void *src, void *dst);
+
+asmlinkage void crypto_aegis256_aesni_dec(
+ void *state, unsigned int length, const void *src, void *dst);
+
+asmlinkage void crypto_aegis256_aesni_enc_tail(
+ void *state, unsigned int length, const void *src, void *dst);
+
+asmlinkage void crypto_aegis256_aesni_dec_tail(
+ void *state, unsigned int length, const void *src, void *dst);
+
+asmlinkage void crypto_aegis256_aesni_final(
+ void *state, void *tag_xor, unsigned int cryptlen,
+ unsigned int assoclen);
+
+struct aegis_block {
+ u8 bytes[AEGIS256_BLOCK_SIZE] __aligned(AEGIS256_BLOCK_ALIGN);
+};
+
+struct aegis_state {
+ struct aegis_block blocks[AEGIS256_STATE_BLOCKS];
+};
+
+struct aegis_ctx {
+ struct aegis_block key[AEGIS256_KEY_SIZE / AEGIS256_BLOCK_SIZE];
+};
+
+struct aegis_crypt_ops {
+ int (*skcipher_walk_init)(struct skcipher_walk *walk,
+ struct aead_request *req, bool atomic);
+
+ void (*crypt_blocks)(void *state, unsigned int length, const void *src,
+ void *dst);
+ void (*crypt_tail)(void *state, unsigned int length, const void *src,
+ void *dst);
+};
+
+static void crypto_aegis256_aesni_process_ad(
+ struct aegis_state *state, struct scatterlist *sg_src,
+ unsigned int assoclen)
+{
+ struct scatter_walk walk;
+ struct aegis_block buf;
+ unsigned int pos = 0;
+
+ scatterwalk_start(&walk, sg_src);
+ while (assoclen != 0) {
+ unsigned int size = scatterwalk_clamp(&walk, assoclen);
+ unsigned int left = size;
+ void *mapped = scatterwalk_map(&walk);
+ const u8 *src = (const u8 *)mapped;
+
+ if (pos + size >= AEGIS256_BLOCK_SIZE) {
+ if (pos > 0) {
+ unsigned int fill = AEGIS256_BLOCK_SIZE - pos;
+ memcpy(buf.bytes + pos, src, fill);
+ crypto_aegis256_aesni_ad(state,
+ AEGIS256_BLOCK_SIZE,
+ buf.bytes);
+ pos = 0;
+ left -= fill;
+ src += fill;
+ }
+
+ crypto_aegis256_aesni_ad(state, left, src);
+
+ src += left & ~(AEGIS256_BLOCK_SIZE - 1);
+ left &= AEGIS256_BLOCK_SIZE - 1;
+ }
+
+ memcpy(buf.bytes + pos, src, left);
+ pos += left;
+ assoclen -= size;
+
+ scatterwalk_unmap(mapped);
+ scatterwalk_advance(&walk, size);
+ scatterwalk_done(&walk, 0, assoclen);
+ }
+
+ if (pos > 0) {
+ memset(buf.bytes + pos, 0, AEGIS256_BLOCK_SIZE - pos);
+ crypto_aegis256_aesni_ad(state, AEGIS256_BLOCK_SIZE, buf.bytes);
+ }
+}
+
+static void crypto_aegis256_aesni_process_crypt(
+ struct aegis_state *state, struct aead_request *req,
+ const struct aegis_crypt_ops *ops)
+{
+ struct skcipher_walk walk;
+ u8 *src, *dst;
+ unsigned int chunksize, base;
+
+ ops->skcipher_walk_init(&walk, req, false);
+
+ while (walk.nbytes) {
+ src = walk.src.virt.addr;
+ dst = walk.dst.virt.addr;
+ chunksize = walk.nbytes;
+
+ ops->crypt_blocks(state, chunksize, src, dst);
+
+ base = chunksize & ~(AEGIS256_BLOCK_SIZE - 1);
+ src += base;
+ dst += base;
+ chunksize &= AEGIS256_BLOCK_SIZE - 1;
+
+ if (chunksize > 0)
+ ops->crypt_tail(state, chunksize, src, dst);
+
+ skcipher_walk_done(&walk, 0);
+ }
+}
+
+static struct aegis_ctx *crypto_aegis256_aesni_ctx(struct crypto_aead *aead)
+{
+ u8 *ctx = crypto_aead_ctx(aead);
+ ctx = PTR_ALIGN(ctx, __alignof__(struct aegis_ctx));
+ return (void *)ctx;
+}
+
+static int crypto_aegis256_aesni_setkey(struct crypto_aead *aead, const u8 *key,
+ unsigned int keylen)
+{
+ struct aegis_ctx *ctx = crypto_aegis256_aesni_ctx(aead);
+
+ if (keylen != AEGIS256_KEY_SIZE) {
+ crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+ return -EINVAL;
+ }
+
+ memcpy(ctx->key, key, AEGIS256_KEY_SIZE);
+
+ return 0;
+}
+
+static int crypto_aegis256_aesni_setauthsize(struct crypto_aead *tfm,
+ unsigned int authsize)
+{
+ if (authsize > AEGIS256_MAX_AUTH_SIZE)
+ return -EINVAL;
+ if (authsize < AEGIS256_MIN_AUTH_SIZE)
+ return -EINVAL;
+ return 0;
+}
+
+static void crypto_aegis256_aesni_crypt(struct aead_request *req,
+ struct aegis_block *tag_xor,
+ unsigned int cryptlen,
+ const struct aegis_crypt_ops *ops)
+{
+ struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+ struct aegis_ctx *ctx = crypto_aegis256_aesni_ctx(tfm);
+ struct aegis_state state;
+
+ kernel_fpu_begin();
+
+ crypto_aegis256_aesni_init(&state, ctx->key, req->iv);
+ crypto_aegis256_aesni_process_ad(&state, req->src, req->assoclen);
+ crypto_aegis256_aesni_process_crypt(&state, req, ops);
+ crypto_aegis256_aesni_final(&state, tag_xor, req->assoclen, cryptlen);
+
+ kernel_fpu_end();
+}
+
+static int crypto_aegis256_aesni_encrypt(struct aead_request *req)
+{
+ static const struct aegis_crypt_ops OPS = {
+ .skcipher_walk_init = skcipher_walk_aead_encrypt,
+ .crypt_blocks = crypto_aegis256_aesni_enc,
+ .crypt_tail = crypto_aegis256_aesni_enc_tail,
+ };
+
+ struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+ struct aegis_block tag = {};
+ unsigned int authsize = crypto_aead_authsize(tfm);
+ unsigned int cryptlen = req->cryptlen;
+
+ crypto_aegis256_aesni_crypt(req, &tag, cryptlen, &OPS);
+
+ scatterwalk_map_and_copy(tag.bytes, req->dst,
+ req->assoclen + cryptlen, authsize, 1);
+ return 0;
+}
+
+static int crypto_aegis256_aesni_decrypt(struct aead_request *req)
+{
+ static const struct aegis_block zeros = {};
+
+ static const struct aegis_crypt_ops OPS = {
+ .skcipher_walk_init = skcipher_walk_aead_decrypt,
+ .crypt_blocks = crypto_aegis256_aesni_dec,
+ .crypt_tail = crypto_aegis256_aesni_dec_tail,
+ };
+
+ struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+ struct aegis_block tag;
+ unsigned int authsize = crypto_aead_authsize(tfm);
+ unsigned int cryptlen = req->cryptlen - authsize;
+
+ scatterwalk_map_and_copy(tag.bytes, req->src,
+ req->assoclen + cryptlen, authsize, 0);
+
+ crypto_aegis256_aesni_crypt(req, &tag, cryptlen, &OPS);
+
+ return crypto_memneq(tag.bytes, zeros.bytes, authsize) ? -EBADMSG : 0;
+}
+
+static int crypto_aegis256_aesni_init_tfm(struct crypto_aead *aead)
+{
+ return 0;
+}
+
+static void crypto_aegis256_aesni_exit_tfm(struct crypto_aead *aead)
+{
+}
+
+static int cryptd_aegis256_aesni_setkey(struct crypto_aead *aead,
+ const u8 *key, unsigned int keylen)
+{
+ struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+ struct cryptd_aead *cryptd_tfm = *ctx;
+
+ return crypto_aead_setkey(&cryptd_tfm->base, key, keylen);
+}
+
+static int cryptd_aegis256_aesni_setauthsize(struct crypto_aead *aead,
+ unsigned int authsize)
+{
+ struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+ struct cryptd_aead *cryptd_tfm = *ctx;
+
+ return crypto_aead_setauthsize(&cryptd_tfm->base, authsize);
+}
+
+static int cryptd_aegis256_aesni_encrypt(struct aead_request *req)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+ struct cryptd_aead *cryptd_tfm = *ctx;
+
+ aead = &cryptd_tfm->base;
+ if (irq_fpu_usable() && (!in_atomic() ||
+ !cryptd_aead_queued(cryptd_tfm)))
+ aead = cryptd_aead_child(cryptd_tfm);
+
+ aead_request_set_tfm(req, aead);
+
+ return crypto_aead_encrypt(req);
+}
+
+static int cryptd_aegis256_aesni_decrypt(struct aead_request *req)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+ struct cryptd_aead *cryptd_tfm = *ctx;
+
+ aead = &cryptd_tfm->base;
+ if (irq_fpu_usable() && (!in_atomic() ||
+ !cryptd_aead_queued(cryptd_tfm)))
+ aead = cryptd_aead_child(cryptd_tfm);
+
+ aead_request_set_tfm(req, aead);
+
+ return crypto_aead_decrypt(req);
+}
+
+static int cryptd_aegis256_aesni_init_tfm(struct crypto_aead *aead)
+{
+ struct cryptd_aead *cryptd_tfm;
+ struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+
+ cryptd_tfm = cryptd_alloc_aead("__aegis256-aesni", CRYPTO_ALG_INTERNAL,
+ CRYPTO_ALG_INTERNAL);
+ if (IS_ERR(cryptd_tfm))
+ return PTR_ERR(cryptd_tfm);
+
+ *ctx = cryptd_tfm;
+ crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base));
+ return 0;
+}
+
+static void cryptd_aegis256_aesni_exit_tfm(struct crypto_aead *aead)
+{
+ struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+
+ cryptd_free_aead(*ctx);
+}
+
+static struct aead_alg crypto_aegis256_aesni_alg[] = {
+ {
+ .setkey = crypto_aegis256_aesni_setkey,
+ .setauthsize = crypto_aegis256_aesni_setauthsize,
+ .encrypt = crypto_aegis256_aesni_encrypt,
+ .decrypt = crypto_aegis256_aesni_decrypt,
+ .init = crypto_aegis256_aesni_init_tfm,
+ .exit = crypto_aegis256_aesni_exit_tfm,
+
+ .ivsize = AEGIS256_NONCE_SIZE,
+ .maxauthsize = AEGIS256_MAX_AUTH_SIZE,
+ .chunksize = AEGIS256_BLOCK_SIZE,
+
+ .base = {
+ .cra_flags = CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct aegis_ctx) +
+ __alignof__(struct aegis_ctx),
+ .cra_alignmask = 0,
+
+ .cra_name = "__aegis256",
+ .cra_driver_name = "__aegis256-aesni",
+
+ .cra_module = THIS_MODULE,
+ }
+ }, {
+ .setkey = cryptd_aegis256_aesni_setkey,
+ .setauthsize = cryptd_aegis256_aesni_setauthsize,
+ .encrypt = cryptd_aegis256_aesni_encrypt,
+ .decrypt = cryptd_aegis256_aesni_decrypt,
+ .init = cryptd_aegis256_aesni_init_tfm,
+ .exit = cryptd_aegis256_aesni_exit_tfm,
+
+ .ivsize = AEGIS256_NONCE_SIZE,
+ .maxauthsize = AEGIS256_MAX_AUTH_SIZE,
+ .chunksize = AEGIS256_BLOCK_SIZE,
+
+ .base = {
+ .cra_flags = CRYPTO_ALG_ASYNC,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct cryptd_aead *),
+ .cra_alignmask = 0,
+
+ .cra_priority = 400,
+
+ .cra_name = "aegis256",
+ .cra_driver_name = "aegis256-aesni",
+
+ .cra_module = THIS_MODULE,
+ }
+ }
+};
+
+static const struct x86_cpu_id aesni_cpu_id[] = {
+ X86_FEATURE_MATCH(X86_FEATURE_AES),
+ X86_FEATURE_MATCH(X86_FEATURE_XMM2),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
+
+static int __init crypto_aegis256_aesni_module_init(void)
+{
+ if (!x86_match_cpu(aesni_cpu_id))
+ return -ENODEV;
+
+ return crypto_register_aeads(crypto_aegis256_aesni_alg,
+ ARRAY_SIZE(crypto_aegis256_aesni_alg));
+}
+
+static void __exit crypto_aegis256_aesni_module_exit(void)
+{
+ crypto_unregister_aeads(crypto_aegis256_aesni_alg,
+ ARRAY_SIZE(crypto_aegis256_aesni_alg));
+}
+
+module_init(crypto_aegis256_aesni_module_init);
+module_exit(crypto_aegis256_aesni_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
+MODULE_DESCRIPTION("AEGIS-256 AEAD algorithm -- AESNI+SSE2 implementation");
+MODULE_ALIAS_CRYPTO("aegis256");
+MODULE_ALIAS_CRYPTO("aegis256-aesni");
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
index 0420bab19efb..2ddbe3a1868b 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
@@ -364,5 +364,5 @@ module_exit(ghash_pclmulqdqni_mod_exit);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("GHASH Message Digest Algorithm, "
- "acclerated by PCLMULQDQ-NI");
+ "accelerated by PCLMULQDQ-NI");
MODULE_ALIAS_CRYPTO("ghash");
diff --git a/arch/x86/crypto/morus1280-avx2-asm.S b/arch/x86/crypto/morus1280-avx2-asm.S
new file mode 100644
index 000000000000..37d422e77931
--- /dev/null
+++ b/arch/x86/crypto/morus1280-avx2-asm.S
@@ -0,0 +1,621 @@
+/*
+ * AVX2 implementation of MORUS-1280
+ *
+ * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
+ * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/frame.h>
+
+#define SHUFFLE_MASK(i0, i1, i2, i3) \
+ (i0 | (i1 << 2) | (i2 << 4) | (i3 << 6))
+
+#define MASK1 SHUFFLE_MASK(3, 0, 1, 2)
+#define MASK2 SHUFFLE_MASK(2, 3, 0, 1)
+#define MASK3 SHUFFLE_MASK(1, 2, 3, 0)
+
+#define STATE0 %ymm0
+#define STATE0_LOW %xmm0
+#define STATE1 %ymm1
+#define STATE2 %ymm2
+#define STATE3 %ymm3
+#define STATE4 %ymm4
+#define KEY %ymm5
+#define MSG %ymm5
+#define MSG_LOW %xmm5
+#define T0 %ymm6
+#define T0_LOW %xmm6
+#define T1 %ymm7
+
+.section .rodata.cst32.morus1280_const, "aM", @progbits, 32
+.align 32
+.Lmorus1280_const:
+ .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
+ .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
+ .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
+ .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
+
+.section .rodata.cst32.morus1280_counter, "aM", @progbits, 32
+.align 32
+.Lmorus1280_counter:
+ .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
+ .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
+ .byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
+ .byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
+
+.text
+
+.macro morus1280_round s0, s1, s2, s3, s4, b, w
+ vpand \s1, \s2, T0
+ vpxor T0, \s0, \s0
+ vpxor \s3, \s0, \s0
+ vpsllq $\b, \s0, T0
+ vpsrlq $(64 - \b), \s0, \s0
+ vpxor T0, \s0, \s0
+ vpermq $\w, \s3, \s3
+.endm
+
+/*
+ * __morus1280_update: internal ABI
+ * input:
+ * STATE[0-4] - input state
+ * MSG - message block
+ * output:
+ * STATE[0-4] - output state
+ * changed:
+ * T0
+ */
+__morus1280_update:
+ morus1280_round STATE0, STATE1, STATE2, STATE3, STATE4, 13, MASK1
+ vpxor MSG, STATE1, STATE1
+ morus1280_round STATE1, STATE2, STATE3, STATE4, STATE0, 46, MASK2
+ vpxor MSG, STATE2, STATE2
+ morus1280_round STATE2, STATE3, STATE4, STATE0, STATE1, 38, MASK3
+ vpxor MSG, STATE3, STATE3
+ morus1280_round STATE3, STATE4, STATE0, STATE1, STATE2, 7, MASK2
+ vpxor MSG, STATE4, STATE4
+ morus1280_round STATE4, STATE0, STATE1, STATE2, STATE3, 4, MASK1
+ ret
+ENDPROC(__morus1280_update)
+
+/*
+ * __morus1280_update_zero: internal ABI
+ * input:
+ * STATE[0-4] - input state
+ * output:
+ * STATE[0-4] - output state
+ * changed:
+ * T0
+ */
+__morus1280_update_zero:
+ morus1280_round STATE0, STATE1, STATE2, STATE3, STATE4, 13, MASK1
+ morus1280_round STATE1, STATE2, STATE3, STATE4, STATE0, 46, MASK2
+ morus1280_round STATE2, STATE3, STATE4, STATE0, STATE1, 38, MASK3
+ morus1280_round STATE3, STATE4, STATE0, STATE1, STATE2, 7, MASK2
+ morus1280_round STATE4, STATE0, STATE1, STATE2, STATE3, 4, MASK1
+ ret
+ENDPROC(__morus1280_update_zero)
+
+/*
+ * __load_partial: internal ABI
+ * input:
+ * %rsi - src
+ * %rcx - bytes
+ * output:
+ * MSG - message block
+ * changed:
+ * %r8
+ * %r9
+ */
+__load_partial:
+ xor %r9, %r9
+ vpxor MSG, MSG, MSG
+
+ mov %rcx, %r8
+ and $0x1, %r8
+ jz .Lld_partial_1
+
+ mov %rcx, %r8
+ and $0x1E, %r8
+ add %rsi, %r8
+ mov (%r8), %r9b
+
+.Lld_partial_1:
+ mov %rcx, %r8
+ and $0x2, %r8
+ jz .Lld_partial_2
+
+ mov %rcx, %r8
+ and $0x1C, %r8
+ add %rsi, %r8
+ shl $16, %r9
+ mov (%r8), %r9w
+
+.Lld_partial_2:
+ mov %rcx, %r8
+ and $0x4, %r8
+ jz .Lld_partial_4
+
+ mov %rcx, %r8
+ and $0x18, %r8
+ add %rsi, %r8
+ shl $32, %r9
+ mov (%r8), %r8d
+ xor %r8, %r9
+
+.Lld_partial_4:
+ movq %r9, MSG_LOW
+
+ mov %rcx, %r8
+ and $0x8, %r8
+ jz .Lld_partial_8
+
+ mov %rcx, %r8
+ and $0x10, %r8
+ add %rsi, %r8
+ pshufd $MASK2, MSG_LOW, MSG_LOW
+ pinsrq $0, (%r8), MSG_LOW
+
+.Lld_partial_8:
+ mov %rcx, %r8
+ and $0x10, %r8
+ jz .Lld_partial_16
+
+ vpermq $MASK2, MSG, MSG
+ movdqu (%rsi), MSG_LOW
+
+.Lld_partial_16:
+ ret
+ENDPROC(__load_partial)
+
+/*
+ * __store_partial: internal ABI
+ * input:
+ * %rdx - dst
+ * %rcx - bytes
+ * output:
+ * T0 - message block
+ * changed:
+ * %r8
+ * %r9
+ * %r10
+ */
+__store_partial:
+ mov %rcx, %r8
+ mov %rdx, %r9
+
+ cmp $16, %r8
+ jl .Lst_partial_16
+
+ movdqu T0_LOW, (%r9)
+ vpermq $MASK2, T0, T0
+
+ sub $16, %r8
+ add $16, %r9
+
+.Lst_partial_16:
+ movq T0_LOW, %r10
+
+ cmp $8, %r8
+ jl .Lst_partial_8
+
+ mov %r10, (%r9)
+ pextrq $1, T0_LOW, %r10
+
+ sub $8, %r8
+ add $8, %r9
+
+.Lst_partial_8:
+ cmp $4, %r8
+ jl .Lst_partial_4
+
+ mov %r10d, (%r9)
+ shr $32, %r10
+
+ sub $4, %r8
+ add $4, %r9
+
+.Lst_partial_4:
+ cmp $2, %r8
+ jl .Lst_partial_2
+
+ mov %r10w, (%r9)
+ shr $16, %r10
+
+ sub $2, %r8
+ add $2, %r9
+
+.Lst_partial_2:
+ cmp $1, %r8
+ jl .Lst_partial_1
+
+ mov %r10b, (%r9)
+
+.Lst_partial_1:
+ ret
+ENDPROC(__store_partial)
+
+/*
+ * void crypto_morus1280_avx2_init(void *state, const void *key,
+ * const void *iv);
+ */
+ENTRY(crypto_morus1280_avx2_init)
+ FRAME_BEGIN
+
+ /* load IV: */
+ vpxor STATE0, STATE0, STATE0
+ movdqu (%rdx), STATE0_LOW
+ /* load key: */
+ vmovdqu (%rsi), KEY
+ vmovdqa KEY, STATE1
+ /* load all ones: */
+ vpcmpeqd STATE2, STATE2, STATE2
+ /* load all zeros: */
+ vpxor STATE3, STATE3, STATE3
+ /* load the constant: */
+ vmovdqa .Lmorus1280_const, STATE4
+
+ /* update 16 times with zero: */
+ call __morus1280_update_zero
+ call __morus1280_update_zero
+ call __morus1280_update_zero
+ call __morus1280_update_zero
+ call __morus1280_update_zero
+ call __morus1280_update_zero
+ call __morus1280_update_zero
+ call __morus1280_update_zero
+ call __morus1280_update_zero
+ call __morus1280_update_zero
+ call __morus1280_update_zero
+ call __morus1280_update_zero
+ call __morus1280_update_zero
+ call __morus1280_update_zero
+ call __morus1280_update_zero
+ call __morus1280_update_zero
+
+ /* xor-in the key again after updates: */
+ vpxor KEY, STATE1, STATE1
+
+ /* store the state: */
+ vmovdqu STATE0, (0 * 32)(%rdi)
+ vmovdqu STATE1, (1 * 32)(%rdi)
+ vmovdqu STATE2, (2 * 32)(%rdi)
+ vmovdqu STATE3, (3 * 32)(%rdi)
+ vmovdqu STATE4, (4 * 32)(%rdi)
+
+ FRAME_END
+ ret
+ENDPROC(crypto_morus1280_avx2_init)
+
+/*
+ * void crypto_morus1280_avx2_ad(void *state, const void *data,
+ * unsigned int length);
+ */
+ENTRY(crypto_morus1280_avx2_ad)
+ FRAME_BEGIN
+
+ cmp $32, %rdx
+ jb .Lad_out
+
+ /* load the state: */
+ vmovdqu (0 * 32)(%rdi), STATE0
+ vmovdqu (1 * 32)(%rdi), STATE1
+ vmovdqu (2 * 32)(%rdi), STATE2
+ vmovdqu (3 * 32)(%rdi), STATE3
+ vmovdqu (4 * 32)(%rdi), STATE4
+
+ mov %rsi, %r8
+ and $0x1F, %r8
+ jnz .Lad_u_loop
+
+.align 4
+.Lad_a_loop:
+ vmovdqa (%rsi), MSG
+ call __morus1280_update
+ sub $32, %rdx
+ add $32, %rsi
+ cmp $32, %rdx
+ jge .Lad_a_loop
+
+ jmp .Lad_cont
+.align 4
+.Lad_u_loop:
+ vmovdqu (%rsi), MSG
+ call __morus1280_update
+ sub $32, %rdx
+ add $32, %rsi
+ cmp $32, %rdx
+ jge .Lad_u_loop
+
+.Lad_cont:
+ /* store the state: */
+ vmovdqu STATE0, (0 * 32)(%rdi)
+ vmovdqu STATE1, (1 * 32)(%rdi)
+ vmovdqu STATE2, (2 * 32)(%rdi)
+ vmovdqu STATE3, (3 * 32)(%rdi)
+ vmovdqu STATE4, (4 * 32)(%rdi)
+
+.Lad_out:
+ FRAME_END
+ ret
+ENDPROC(crypto_morus1280_avx2_ad)
+
+/*
+ * void crypto_morus1280_avx2_enc(void *state, const void *src, void *dst,
+ * unsigned int length);
+ */
+ENTRY(crypto_morus1280_avx2_enc)
+ FRAME_BEGIN
+
+ cmp $32, %rcx
+ jb .Lenc_out
+
+ /* load the state: */
+ vmovdqu (0 * 32)(%rdi), STATE0
+ vmovdqu (1 * 32)(%rdi), STATE1
+ vmovdqu (2 * 32)(%rdi), STATE2
+ vmovdqu (3 * 32)(%rdi), STATE3
+ vmovdqu (4 * 32)(%rdi), STATE4
+
+ mov %rsi, %r8
+ or %rdx, %r8
+ and $0x1F, %r8
+ jnz .Lenc_u_loop
+
+.align 4
+.Lenc_a_loop:
+ vmovdqa (%rsi), MSG
+ vmovdqa MSG, T0
+ vpxor STATE0, T0, T0
+ vpermq $MASK3, STATE1, T1
+ vpxor T1, T0, T0
+ vpand STATE2, STATE3, T1
+ vpxor T1, T0, T0
+ vmovdqa T0, (%rdx)
+
+ call __morus1280_update
+ sub $32, %rcx
+ add $32, %rsi
+ add $32, %rdx
+ cmp $32, %rcx
+ jge .Lenc_a_loop
+
+ jmp .Lenc_cont
+.align 4
+.Lenc_u_loop:
+ vmovdqu (%rsi), MSG
+ vmovdqa MSG, T0
+ vpxor STATE0, T0, T0
+ vpermq $MASK3, STATE1, T1
+ vpxor T1, T0, T0
+ vpand STATE2, STATE3, T1
+ vpxor T1, T0, T0
+ vmovdqu T0, (%rdx)
+
+ call __morus1280_update
+ sub $32, %rcx
+ add $32, %rsi
+ add $32, %rdx
+ cmp $32, %rcx
+ jge .Lenc_u_loop
+
+.Lenc_cont:
+ /* store the state: */
+ vmovdqu STATE0, (0 * 32)(%rdi)
+ vmovdqu STATE1, (1 * 32)(%rdi)
+ vmovdqu STATE2, (2 * 32)(%rdi)
+ vmovdqu STATE3, (3 * 32)(%rdi)
+ vmovdqu STATE4, (4 * 32)(%rdi)
+
+.Lenc_out:
+ FRAME_END
+ ret
+ENDPROC(crypto_morus1280_avx2_enc)
+
+/*
+ * void crypto_morus1280_avx2_enc_tail(void *state, const void *src, void *dst,
+ * unsigned int length);
+ */
+ENTRY(crypto_morus1280_avx2_enc_tail)
+ FRAME_BEGIN
+
+ /* load the state: */
+ vmovdqu (0 * 32)(%rdi), STATE0
+ vmovdqu (1 * 32)(%rdi), STATE1
+ vmovdqu (2 * 32)(%rdi), STATE2
+ vmovdqu (3 * 32)(%rdi), STATE3
+ vmovdqu (4 * 32)(%rdi), STATE4
+
+ /* encrypt message: */
+ call __load_partial
+
+ vmovdqa MSG, T0
+ vpxor STATE0, T0, T0
+ vpermq $MASK3, STATE1, T1
+ vpxor T1, T0, T0
+ vpand STATE2, STATE3, T1
+ vpxor T1, T0, T0
+
+ call __store_partial
+
+ call __morus1280_update
+
+ /* store the state: */
+ vmovdqu STATE0, (0 * 32)(%rdi)
+ vmovdqu STATE1, (1 * 32)(%rdi)
+ vmovdqu STATE2, (2 * 32)(%rdi)
+ vmovdqu STATE3, (3 * 32)(%rdi)
+ vmovdqu STATE4, (4 * 32)(%rdi)
+
+ FRAME_END
+ENDPROC(crypto_morus1280_avx2_enc_tail)
+
+/*
+ * void crypto_morus1280_avx2_dec(void *state, const void *src, void *dst,
+ * unsigned int length);
+ */
+ENTRY(crypto_morus1280_avx2_dec)
+ FRAME_BEGIN
+
+ cmp $32, %rcx
+ jb .Ldec_out
+
+ /* load the state: */
+ vmovdqu (0 * 32)(%rdi), STATE0
+ vmovdqu (1 * 32)(%rdi), STATE1
+ vmovdqu (2 * 32)(%rdi), STATE2
+ vmovdqu (3 * 32)(%rdi), STATE3
+ vmovdqu (4 * 32)(%rdi), STATE4
+
+ mov %rsi, %r8
+ or %rdx, %r8
+ and $0x1F, %r8
+ jnz .Ldec_u_loop
+
+.align 4
+.Ldec_a_loop:
+ vmovdqa (%rsi), MSG
+ vpxor STATE0, MSG, MSG
+ vpermq $MASK3, STATE1, T0
+ vpxor T0, MSG, MSG
+ vpand STATE2, STATE3, T0
+ vpxor T0, MSG, MSG
+ vmovdqa MSG, (%rdx)
+
+ call __morus1280_update
+ sub $32, %rcx
+ add $32, %rsi
+ add $32, %rdx
+ cmp $32, %rcx
+ jge .Ldec_a_loop
+
+ jmp .Ldec_cont
+.align 4
+.Ldec_u_loop:
+ vmovdqu (%rsi), MSG
+ vpxor STATE0, MSG, MSG
+ vpermq $MASK3, STATE1, T0
+ vpxor T0, MSG, MSG
+ vpand STATE2, STATE3, T0
+ vpxor T0, MSG, MSG
+ vmovdqu MSG, (%rdx)
+
+ call __morus1280_update
+ sub $32, %rcx
+ add $32, %rsi
+ add $32, %rdx
+ cmp $32, %rcx
+ jge .Ldec_u_loop
+
+.Ldec_cont:
+ /* store the state: */
+ vmovdqu STATE0, (0 * 32)(%rdi)
+ vmovdqu STATE1, (1 * 32)(%rdi)
+ vmovdqu STATE2, (2 * 32)(%rdi)
+ vmovdqu STATE3, (3 * 32)(%rdi)
+ vmovdqu STATE4, (4 * 32)(%rdi)
+
+.Ldec_out:
+ FRAME_END
+ ret
+ENDPROC(crypto_morus1280_avx2_dec)
+
+/*
+ * void crypto_morus1280_avx2_dec_tail(void *state, const void *src, void *dst,
+ * unsigned int length);
+ */
+ENTRY(crypto_morus1280_avx2_dec_tail)
+ FRAME_BEGIN
+
+ /* load the state: */
+ vmovdqu (0 * 32)(%rdi), STATE0
+ vmovdqu (1 * 32)(%rdi), STATE1
+ vmovdqu (2 * 32)(%rdi), STATE2
+ vmovdqu (3 * 32)(%rdi), STATE3
+ vmovdqu (4 * 32)(%rdi), STATE4
+
+ /* decrypt message: */
+ call __load_partial
+
+ vpxor STATE0, MSG, MSG
+ vpermq $MASK3, STATE1, T0
+ vpxor T0, MSG, MSG
+ vpand STATE2, STATE3, T0
+ vpxor T0, MSG, MSG
+ vmovdqa MSG, T0
+
+ call __store_partial
+
+ /* mask with byte count: */
+ movq %rcx, T0_LOW
+ vpbroadcastb T0_LOW, T0
+ vmovdqa .Lmorus1280_counter, T1
+ vpcmpgtb T1, T0, T0
+ vpand T0, MSG, MSG
+
+ call __morus1280_update
+
+ /* store the state: */
+ vmovdqu STATE0, (0 * 32)(%rdi)
+ vmovdqu STATE1, (1 * 32)(%rdi)
+ vmovdqu STATE2, (2 * 32)(%rdi)
+ vmovdqu STATE3, (3 * 32)(%rdi)
+ vmovdqu STATE4, (4 * 32)(%rdi)
+
+ FRAME_END
+ ret
+ENDPROC(crypto_morus1280_avx2_dec_tail)
+
+/*
+ * void crypto_morus1280_avx2_final(void *state, void *tag_xor,
+ * u64 assoclen, u64 cryptlen);
+ */
+ENTRY(crypto_morus1280_avx2_final)
+ FRAME_BEGIN
+
+ /* load the state: */
+ vmovdqu (0 * 32)(%rdi), STATE0
+ vmovdqu (1 * 32)(%rdi), STATE1
+ vmovdqu (2 * 32)(%rdi), STATE2
+ vmovdqu (3 * 32)(%rdi), STATE3
+ vmovdqu (4 * 32)(%rdi), STATE4
+
+ /* xor state[0] into state[4]: */
+ vpxor STATE0, STATE4, STATE4
+
+ /* prepare length block: */
+ vpxor MSG, MSG, MSG
+ vpinsrq $0, %rdx, MSG_LOW, MSG_LOW
+ vpinsrq $1, %rcx, MSG_LOW, MSG_LOW
+ vpsllq $3, MSG, MSG /* multiply by 8 (to get bit count) */
+
+ /* update state: */
+ call __morus1280_update
+ call __morus1280_update
+ call __morus1280_update
+ call __morus1280_update
+ call __morus1280_update
+ call __morus1280_update
+ call __morus1280_update
+ call __morus1280_update
+ call __morus1280_update
+ call __morus1280_update
+
+ /* xor tag: */
+ vmovdqu (%rsi), MSG
+
+ vpxor STATE0, MSG, MSG
+ vpermq $MASK3, STATE1, T0
+ vpxor T0, MSG, MSG
+ vpand STATE2, STATE3, T0
+ vpxor T0, MSG, MSG
+ vmovdqu MSG, (%rsi)
+
+ FRAME_END
+ ret
+ENDPROC(crypto_morus1280_avx2_final)
diff --git a/arch/x86/crypto/morus1280-avx2-glue.c b/arch/x86/crypto/morus1280-avx2-glue.c
new file mode 100644
index 000000000000..f111f36d26dc
--- /dev/null
+++ b/arch/x86/crypto/morus1280-avx2-glue.c
@@ -0,0 +1,68 @@
+/*
+ * The MORUS-1280 Authenticated-Encryption Algorithm
+ * Glue for AVX2 implementation
+ *
+ * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
+ * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <crypto/internal/aead.h>
+#include <crypto/morus1280_glue.h>
+#include <linux/module.h>
+#include <asm/fpu/api.h>
+#include <asm/cpu_device_id.h>
+
+asmlinkage void crypto_morus1280_avx2_init(void *state, const void *key,
+ const void *iv);
+asmlinkage void crypto_morus1280_avx2_ad(void *state, const void *data,
+ unsigned int length);
+
+asmlinkage void crypto_morus1280_avx2_enc(void *state, const void *src,
+ void *dst, unsigned int length);
+asmlinkage void crypto_morus1280_avx2_dec(void *state, const void *src,
+ void *dst, unsigned int length);
+
+asmlinkage void crypto_morus1280_avx2_enc_tail(void *state, const void *src,
+ void *dst, unsigned int length);
+asmlinkage void crypto_morus1280_avx2_dec_tail(void *state, const void *src,
+ void *dst, unsigned int length);
+
+asmlinkage void crypto_morus1280_avx2_final(void *state, void *tag_xor,
+ u64 assoclen, u64 cryptlen);
+
+MORUS1280_DECLARE_ALGS(avx2, "morus1280-avx2", 400);
+
+static const struct x86_cpu_id avx2_cpu_id[] = {
+ X86_FEATURE_MATCH(X86_FEATURE_AVX2),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, avx2_cpu_id);
+
+static int __init crypto_morus1280_avx2_module_init(void)
+{
+ if (!x86_match_cpu(avx2_cpu_id))
+ return -ENODEV;
+
+ return crypto_register_aeads(crypto_morus1280_avx2_algs,
+ ARRAY_SIZE(crypto_morus1280_avx2_algs));
+}
+
+static void __exit crypto_morus1280_avx2_module_exit(void)
+{
+ crypto_unregister_aeads(crypto_morus1280_avx2_algs,
+ ARRAY_SIZE(crypto_morus1280_avx2_algs));
+}
+
+module_init(crypto_morus1280_avx2_module_init);
+module_exit(crypto_morus1280_avx2_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
+MODULE_DESCRIPTION("MORUS-1280 AEAD algorithm -- AVX2 implementation");
+MODULE_ALIAS_CRYPTO("morus1280");
+MODULE_ALIAS_CRYPTO("morus1280-avx2");
diff --git a/arch/x86/crypto/morus1280-sse2-asm.S b/arch/x86/crypto/morus1280-sse2-asm.S
new file mode 100644
index 000000000000..1fe637c7be9d
--- /dev/null
+++ b/arch/x86/crypto/morus1280-sse2-asm.S
@@ -0,0 +1,895 @@
+/*
+ * SSE2 implementation of MORUS-1280
+ *
+ * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
+ * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/frame.h>
+
+#define SHUFFLE_MASK(i0, i1, i2, i3) \
+ (i0 | (i1 << 2) | (i2 << 4) | (i3 << 6))
+
+#define MASK2 SHUFFLE_MASK(2, 3, 0, 1)
+
+#define STATE0_LO %xmm0
+#define STATE0_HI %xmm1
+#define STATE1_LO %xmm2
+#define STATE1_HI %xmm3
+#define STATE2_LO %xmm4
+#define STATE2_HI %xmm5
+#define STATE3_LO %xmm6
+#define STATE3_HI %xmm7
+#define STATE4_LO %xmm8
+#define STATE4_HI %xmm9
+#define KEY_LO %xmm10
+#define KEY_HI %xmm11
+#define MSG_LO %xmm10
+#define MSG_HI %xmm11
+#define T0_LO %xmm12
+#define T0_HI %xmm13
+#define T1_LO %xmm14
+#define T1_HI %xmm15
+
+.section .rodata.cst16.morus640_const, "aM", @progbits, 16
+.align 16
+.Lmorus640_const_0:
+ .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
+ .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
+.Lmorus640_const_1:
+ .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
+ .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
+
+.section .rodata.cst16.morus640_counter, "aM", @progbits, 16
+.align 16
+.Lmorus640_counter_0:
+ .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
+ .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
+.Lmorus640_counter_1:
+ .byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
+ .byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
+
+.text
+
+.macro rol1 hi, lo
+ /*
+ * HI_1 | HI_0 || LO_1 | LO_0
+ * ==>
+ * HI_0 | HI_1 || LO_1 | LO_0
+ * ==>
+ * HI_0 | LO_1 || LO_0 | HI_1
+ */
+ pshufd $MASK2, \hi, \hi
+ movdqa \hi, T0_LO
+ punpcklqdq \lo, T0_LO
+ punpckhqdq \hi, \lo
+ movdqa \lo, \hi
+ movdqa T0_LO, \lo
+.endm
+
+.macro rol2 hi, lo
+ movdqa \lo, T0_LO
+ movdqa \hi, \lo
+ movdqa T0_LO, \hi
+.endm
+
+.macro rol3 hi, lo
+ /*
+ * HI_1 | HI_0 || LO_1 | LO_0
+ * ==>
+ * HI_0 | HI_1 || LO_1 | LO_0
+ * ==>
+ * LO_0 | HI_1 || HI_0 | LO_1
+ */
+ pshufd $MASK2, \hi, \hi
+ movdqa \lo, T0_LO
+ punpckhqdq \hi, T0_LO
+ punpcklqdq \lo, \hi
+ movdqa T0_LO, \lo
+.endm
+
+.macro morus1280_round s0_l, s0_h, s1_l, s1_h, s2_l, s2_h, s3_l, s3_h, s4_l, s4_h, b, w
+ movdqa \s1_l, T0_LO
+ pand \s2_l, T0_LO
+ pxor T0_LO, \s0_l
+
+ movdqa \s1_h, T0_LO
+ pand \s2_h, T0_LO
+ pxor T0_LO, \s0_h
+
+ pxor \s3_l, \s0_l
+ pxor \s3_h, \s0_h
+
+ movdqa \s0_l, T0_LO
+ psllq $\b, T0_LO
+ psrlq $(64 - \b), \s0_l
+ pxor T0_LO, \s0_l
+
+ movdqa \s0_h, T0_LO
+ psllq $\b, T0_LO
+ psrlq $(64 - \b), \s0_h
+ pxor T0_LO, \s0_h
+
+ \w \s3_h, \s3_l
+.endm
+
+/*
+ * __morus1280_update: internal ABI
+ * input:
+ * STATE[0-4] - input state
+ * MSG - message block
+ * output:
+ * STATE[0-4] - output state
+ * changed:
+ * T0
+ */
+__morus1280_update:
+ morus1280_round \
+ STATE0_LO, STATE0_HI, \
+ STATE1_LO, STATE1_HI, \
+ STATE2_LO, STATE2_HI, \
+ STATE3_LO, STATE3_HI, \
+ STATE4_LO, STATE4_HI, \
+ 13, rol1
+ pxor MSG_LO, STATE1_LO
+ pxor MSG_HI, STATE1_HI
+ morus1280_round \
+ STATE1_LO, STATE1_HI, \
+ STATE2_LO, STATE2_HI, \
+ STATE3_LO, STATE3_HI, \
+ STATE4_LO, STATE4_HI, \
+ STATE0_LO, STATE0_HI, \
+ 46, rol2
+ pxor MSG_LO, STATE2_LO
+ pxor MSG_HI, STATE2_HI
+ morus1280_round \
+ STATE2_LO, STATE2_HI, \
+ STATE3_LO, STATE3_HI, \
+ STATE4_LO, STATE4_HI, \
+ STATE0_LO, STATE0_HI, \
+ STATE1_LO, STATE1_HI, \
+ 38, rol3
+ pxor MSG_LO, STATE3_LO
+ pxor MSG_HI, STATE3_HI
+ morus1280_round \
+ STATE3_LO, STATE3_HI, \
+ STATE4_LO, STATE4_HI, \
+ STATE0_LO, STATE0_HI, \
+ STATE1_LO, STATE1_HI, \
+ STATE2_LO, STATE2_HI, \
+ 7, rol2
+ pxor MSG_LO, STATE4_LO
+ pxor MSG_HI, STATE4_HI
+ morus1280_round \
+ STATE4_LO, STATE4_HI, \
+ STATE0_LO, STATE0_HI, \
+ STATE1_LO, STATE1_HI, \
+ STATE2_LO, STATE2_HI, \
+ STATE3_LO, STATE3_HI, \
+ 4, rol1
+ ret
+ENDPROC(__morus1280_update)
+
+/*
+ * __morus1280_update_zero: internal ABI
+ * input:
+ * STATE[0-4] - input state
+ * output:
+ * STATE[0-4] - output state
+ * changed:
+ * T0
+ */
+__morus1280_update_zero:
+ morus1280_round \
+ STATE0_LO, STATE0_HI, \
+ STATE1_LO, STATE1_HI, \
+ STATE2_LO, STATE2_HI, \
+ STATE3_LO, STATE3_HI, \
+ STATE4_LO, STATE4_HI, \
+ 13, rol1
+ morus1280_round \
+ STATE1_LO, STATE1_HI, \
+ STATE2_LO, STATE2_HI, \
+ STATE3_LO, STATE3_HI, \
+ STATE4_LO, STATE4_HI, \
+ STATE0_LO, STATE0_HI, \
+ 46, rol2
+ morus1280_round \
+ STATE2_LO, STATE2_HI, \
+ STATE3_LO, STATE3_HI, \
+ STATE4_LO, STATE4_HI, \
+ STATE0_LO, STATE0_HI, \
+ STATE1_LO, STATE1_HI, \
+ 38, rol3
+ morus1280_round \
+ STATE3_LO, STATE3_HI, \
+ STATE4_LO, STATE4_HI, \
+ STATE0_LO, STATE0_HI, \
+ STATE1_LO, STATE1_HI, \
+ STATE2_LO, STATE2_HI, \
+ 7, rol2
+ morus1280_round \
+ STATE4_LO, STATE4_HI, \
+ STATE0_LO, STATE0_HI, \
+ STATE1_LO, STATE1_HI, \
+ STATE2_LO, STATE2_HI, \
+ STATE3_LO, STATE3_HI, \
+ 4, rol1
+ ret
+ENDPROC(__morus1280_update_zero)
+
+/*
+ * __load_partial: internal ABI
+ * input:
+ * %rsi - src
+ * %rcx - bytes
+ * output:
+ * MSG - message block
+ * changed:
+ * %r8
+ * %r9
+ */
+__load_partial:
+ xor %r9, %r9
+ pxor MSG_LO, MSG_LO
+ pxor MSG_HI, MSG_HI
+
+ mov %rcx, %r8
+ and $0x1, %r8
+ jz .Lld_partial_1
+
+ mov %rcx, %r8
+ and $0x1E, %r8
+ add %rsi, %r8
+ mov (%r8), %r9b
+
+.Lld_partial_1:
+ mov %rcx, %r8
+ and $0x2, %r8
+ jz .Lld_partial_2
+
+ mov %rcx, %r8
+ and $0x1C, %r8
+ add %rsi, %r8
+ shl $16, %r9
+ mov (%r8), %r9w
+
+.Lld_partial_2:
+ mov %rcx, %r8
+ and $0x4, %r8
+ jz .Lld_partial_4
+
+ mov %rcx, %r8
+ and $0x18, %r8
+ add %rsi, %r8
+ shl $32, %r9
+ mov (%r8), %r8d
+ xor %r8, %r9
+
+.Lld_partial_4:
+ movq %r9, MSG_LO
+
+ mov %rcx, %r8
+ and $0x8, %r8
+ jz .Lld_partial_8
+
+ mov %rcx, %r8
+ and $0x10, %r8
+ add %rsi, %r8
+ pslldq $8, MSG_LO
+ movq (%r8), T0_LO
+ pxor T0_LO, MSG_LO
+
+.Lld_partial_8:
+ mov %rcx, %r8
+ and $0x10, %r8
+ jz .Lld_partial_16
+
+ movdqa MSG_LO, MSG_HI
+ movdqu (%rsi), MSG_LO
+
+.Lld_partial_16:
+ ret
+ENDPROC(__load_partial)
+
+/*
+ * __store_partial: internal ABI
+ * input:
+ * %rdx - dst
+ * %rcx - bytes
+ * output:
+ * T0 - message block
+ * changed:
+ * %r8
+ * %r9
+ * %r10
+ */
+__store_partial:
+ mov %rcx, %r8
+ mov %rdx, %r9
+
+ cmp $16, %r8
+ jl .Lst_partial_16
+
+ movdqu T0_LO, (%r9)
+ movdqa T0_HI, T0_LO
+
+ sub $16, %r8
+ add $16, %r9
+
+.Lst_partial_16:
+ movq T0_LO, %r10
+
+ cmp $8, %r8
+ jl .Lst_partial_8
+
+ mov %r10, (%r9)
+ psrldq $8, T0_LO
+ movq T0_LO, %r10
+
+ sub $8, %r8
+ add $8, %r9
+
+.Lst_partial_8:
+ cmp $4, %r8
+ jl .Lst_partial_4
+
+ mov %r10d, (%r9)
+ shr $32, %r10
+
+ sub $4, %r8
+ add $4, %r9
+
+.Lst_partial_4:
+ cmp $2, %r8
+ jl .Lst_partial_2
+
+ mov %r10w, (%r9)
+ shr $16, %r10
+
+ sub $2, %r8
+ add $2, %r9
+
+.Lst_partial_2:
+ cmp $1, %r8
+ jl .Lst_partial_1
+
+ mov %r10b, (%r9)
+
+.Lst_partial_1:
+ ret
+ENDPROC(__store_partial)
+
+/*
+ * void crypto_morus1280_sse2_init(void *state, const void *key,
+ * const void *iv);
+ */
+ENTRY(crypto_morus1280_sse2_init)
+ FRAME_BEGIN
+
+ /* load IV: */
+ pxor STATE0_HI, STATE0_HI
+ movdqu (%rdx), STATE0_LO
+ /* load key: */
+ movdqu 0(%rsi), KEY_LO
+ movdqu 16(%rsi), KEY_HI
+ movdqa KEY_LO, STATE1_LO
+ movdqa KEY_HI, STATE1_HI
+ /* load all ones: */
+ pcmpeqd STATE2_LO, STATE2_LO
+ pcmpeqd STATE2_HI, STATE2_HI
+ /* load all zeros: */
+ pxor STATE3_LO, STATE3_LO
+ pxor STATE3_HI, STATE3_HI
+ /* load the constant: */
+ movdqa .Lmorus640_const_0, STATE4_LO
+ movdqa .Lmorus640_const_1, STATE4_HI
+
+ /* update 16 times with zero: */
+ call __morus1280_update_zero
+ call __morus1280_update_zero
+ call __morus1280_update_zero
+ call __morus1280_update_zero
+ call __morus1280_update_zero
+ call __morus1280_update_zero
+ call __morus1280_update_zero
+ call __morus1280_update_zero
+ call __morus1280_update_zero
+ call __morus1280_update_zero
+ call __morus1280_update_zero
+ call __morus1280_update_zero
+ call __morus1280_update_zero
+ call __morus1280_update_zero
+ call __morus1280_update_zero
+ call __morus1280_update_zero
+
+ /* xor-in the key again after updates: */
+ pxor KEY_LO, STATE1_LO
+ pxor KEY_HI, STATE1_HI
+
+ /* store the state: */
+ movdqu STATE0_LO, (0 * 16)(%rdi)
+ movdqu STATE0_HI, (1 * 16)(%rdi)
+ movdqu STATE1_LO, (2 * 16)(%rdi)
+ movdqu STATE1_HI, (3 * 16)(%rdi)
+ movdqu STATE2_LO, (4 * 16)(%rdi)
+ movdqu STATE2_HI, (5 * 16)(%rdi)
+ movdqu STATE3_LO, (6 * 16)(%rdi)
+ movdqu STATE3_HI, (7 * 16)(%rdi)
+ movdqu STATE4_LO, (8 * 16)(%rdi)
+ movdqu STATE4_HI, (9 * 16)(%rdi)
+
+ FRAME_END
+ ret
+ENDPROC(crypto_morus1280_sse2_init)
+
+/*
+ * void crypto_morus1280_sse2_ad(void *state, const void *data,
+ * unsigned int length);
+ */
+ENTRY(crypto_morus1280_sse2_ad)
+ FRAME_BEGIN
+
+ cmp $32, %rdx
+ jb .Lad_out
+
+ /* load the state: */
+ movdqu (0 * 16)(%rdi), STATE0_LO
+ movdqu (1 * 16)(%rdi), STATE0_HI
+ movdqu (2 * 16)(%rdi), STATE1_LO
+ movdqu (3 * 16)(%rdi), STATE1_HI
+ movdqu (4 * 16)(%rdi), STATE2_LO
+ movdqu (5 * 16)(%rdi), STATE2_HI
+ movdqu (6 * 16)(%rdi), STATE3_LO
+ movdqu (7 * 16)(%rdi), STATE3_HI
+ movdqu (8 * 16)(%rdi), STATE4_LO
+ movdqu (9 * 16)(%rdi), STATE4_HI
+
+ mov %rsi, %r8
+ and $0xF, %r8
+ jnz .Lad_u_loop
+
+.align 4
+.Lad_a_loop:
+ movdqa 0(%rsi), MSG_LO
+ movdqa 16(%rsi), MSG_HI
+ call __morus1280_update
+ sub $32, %rdx
+ add $32, %rsi
+ cmp $32, %rdx
+ jge .Lad_a_loop
+
+ jmp .Lad_cont
+.align 4
+.Lad_u_loop:
+ movdqu 0(%rsi), MSG_LO
+ movdqu 16(%rsi), MSG_HI
+ call __morus1280_update
+ sub $32, %rdx
+ add $32, %rsi
+ cmp $32, %rdx
+ jge .Lad_u_loop
+
+.Lad_cont:
+ /* store the state: */
+ movdqu STATE0_LO, (0 * 16)(%rdi)
+ movdqu STATE0_HI, (1 * 16)(%rdi)
+ movdqu STATE1_LO, (2 * 16)(%rdi)
+ movdqu STATE1_HI, (3 * 16)(%rdi)
+ movdqu STATE2_LO, (4 * 16)(%rdi)
+ movdqu STATE2_HI, (5 * 16)(%rdi)
+ movdqu STATE3_LO, (6 * 16)(%rdi)
+ movdqu STATE3_HI, (7 * 16)(%rdi)
+ movdqu STATE4_LO, (8 * 16)(%rdi)
+ movdqu STATE4_HI, (9 * 16)(%rdi)
+
+.Lad_out:
+ FRAME_END
+ ret
+ENDPROC(crypto_morus1280_sse2_ad)
+
+/*
+ * void crypto_morus1280_sse2_enc(void *state, const void *src, void *dst,
+ * unsigned int length);
+ */
+ENTRY(crypto_morus1280_sse2_enc)
+ FRAME_BEGIN
+
+ cmp $32, %rcx
+ jb .Lenc_out
+
+ /* load the state: */
+ movdqu (0 * 16)(%rdi), STATE0_LO
+ movdqu (1 * 16)(%rdi), STATE0_HI
+ movdqu (2 * 16)(%rdi), STATE1_LO
+ movdqu (3 * 16)(%rdi), STATE1_HI
+ movdqu (4 * 16)(%rdi), STATE2_LO
+ movdqu (5 * 16)(%rdi), STATE2_HI
+ movdqu (6 * 16)(%rdi), STATE3_LO
+ movdqu (7 * 16)(%rdi), STATE3_HI
+ movdqu (8 * 16)(%rdi), STATE4_LO
+ movdqu (9 * 16)(%rdi), STATE4_HI
+
+ mov %rsi, %r8
+ or %rdx, %r8
+ and $0xF, %r8
+ jnz .Lenc_u_loop
+
+.align 4
+.Lenc_a_loop:
+ movdqa 0(%rsi), MSG_LO
+ movdqa 16(%rsi), MSG_HI
+ movdqa STATE1_LO, T1_LO
+ movdqa STATE1_HI, T1_HI
+ rol3 T1_HI, T1_LO
+ movdqa MSG_LO, T0_LO
+ movdqa MSG_HI, T0_HI
+ pxor T1_LO, T0_LO
+ pxor T1_HI, T0_HI
+ pxor STATE0_LO, T0_LO
+ pxor STATE0_HI, T0_HI
+ movdqa STATE2_LO, T1_LO
+ movdqa STATE2_HI, T1_HI
+ pand STATE3_LO, T1_LO
+ pand STATE3_HI, T1_HI
+ pxor T1_LO, T0_LO
+ pxor T1_HI, T0_HI
+ movdqa T0_LO, 0(%rdx)
+ movdqa T0_HI, 16(%rdx)
+
+ call __morus1280_update
+ sub $32, %rcx
+ add $32, %rsi
+ add $32, %rdx
+ cmp $32, %rcx
+ jge .Lenc_a_loop
+
+ jmp .Lenc_cont
+.align 4
+.Lenc_u_loop:
+ movdqu 0(%rsi), MSG_LO
+ movdqu 16(%rsi), MSG_HI
+ movdqa STATE1_LO, T1_LO
+ movdqa STATE1_HI, T1_HI
+ rol3 T1_HI, T1_LO
+ movdqa MSG_LO, T0_LO
+ movdqa MSG_HI, T0_HI
+ pxor T1_LO, T0_LO
+ pxor T1_HI, T0_HI
+ pxor STATE0_LO, T0_LO
+ pxor STATE0_HI, T0_HI
+ movdqa STATE2_LO, T1_LO
+ movdqa STATE2_HI, T1_HI
+ pand STATE3_LO, T1_LO
+ pand STATE3_HI, T1_HI
+ pxor T1_LO, T0_LO
+ pxor T1_HI, T0_HI
+ movdqu T0_LO, 0(%rdx)
+ movdqu T0_HI, 16(%rdx)
+
+ call __morus1280_update
+ sub $32, %rcx
+ add $32, %rsi
+ add $32, %rdx
+ cmp $32, %rcx
+ jge .Lenc_u_loop
+
+.Lenc_cont:
+ /* store the state: */
+ movdqu STATE0_LO, (0 * 16)(%rdi)
+ movdqu STATE0_HI, (1 * 16)(%rdi)
+ movdqu STATE1_LO, (2 * 16)(%rdi)
+ movdqu STATE1_HI, (3 * 16)(%rdi)
+ movdqu STATE2_LO, (4 * 16)(%rdi)
+ movdqu STATE2_HI, (5 * 16)(%rdi)
+ movdqu STATE3_LO, (6 * 16)(%rdi)
+ movdqu STATE3_HI, (7 * 16)(%rdi)
+ movdqu STATE4_LO, (8 * 16)(%rdi)
+ movdqu STATE4_HI, (9 * 16)(%rdi)
+
+.Lenc_out:
+ FRAME_END
+ ret
+ENDPROC(crypto_morus1280_sse2_enc)
+
+/*
+ * void crypto_morus1280_sse2_enc_tail(void *state, const void *src, void *dst,
+ * unsigned int length);
+ */
+ENTRY(crypto_morus1280_sse2_enc_tail)
+ FRAME_BEGIN
+
+ /* load the state: */
+ movdqu (0 * 16)(%rdi), STATE0_LO
+ movdqu (1 * 16)(%rdi), STATE0_HI
+ movdqu (2 * 16)(%rdi), STATE1_LO
+ movdqu (3 * 16)(%rdi), STATE1_HI
+ movdqu (4 * 16)(%rdi), STATE2_LO
+ movdqu (5 * 16)(%rdi), STATE2_HI
+ movdqu (6 * 16)(%rdi), STATE3_LO
+ movdqu (7 * 16)(%rdi), STATE3_HI
+ movdqu (8 * 16)(%rdi), STATE4_LO
+ movdqu (9 * 16)(%rdi), STATE4_HI
+
+ /* encrypt message: */
+ call __load_partial
+
+ movdqa STATE1_LO, T1_LO
+ movdqa STATE1_HI, T1_HI
+ rol3 T1_HI, T1_LO
+ movdqa MSG_LO, T0_LO
+ movdqa MSG_HI, T0_HI
+ pxor T1_LO, T0_LO
+ pxor T1_HI, T0_HI
+ pxor STATE0_LO, T0_LO
+ pxor STATE0_HI, T0_HI
+ movdqa STATE2_LO, T1_LO
+ movdqa STATE2_HI, T1_HI
+ pand STATE3_LO, T1_LO
+ pand STATE3_HI, T1_HI
+ pxor T1_LO, T0_LO
+ pxor T1_HI, T0_HI
+
+ call __store_partial
+
+ call __morus1280_update
+
+ /* store the state: */
+ movdqu STATE0_LO, (0 * 16)(%rdi)
+ movdqu STATE0_HI, (1 * 16)(%rdi)
+ movdqu STATE1_LO, (2 * 16)(%rdi)
+ movdqu STATE1_HI, (3 * 16)(%rdi)
+ movdqu STATE2_LO, (4 * 16)(%rdi)
+ movdqu STATE2_HI, (5 * 16)(%rdi)
+ movdqu STATE3_LO, (6 * 16)(%rdi)
+ movdqu STATE3_HI, (7 * 16)(%rdi)
+ movdqu STATE4_LO, (8 * 16)(%rdi)
+ movdqu STATE4_HI, (9 * 16)(%rdi)
+
+ FRAME_END
+ENDPROC(crypto_morus1280_sse2_enc_tail)
+
+/*
+ * void crypto_morus1280_sse2_dec(void *state, const void *src, void *dst,
+ * unsigned int length);
+ */
+ENTRY(crypto_morus1280_sse2_dec)
+ FRAME_BEGIN
+
+ cmp $32, %rcx
+ jb .Ldec_out
+
+ /* load the state: */
+ movdqu (0 * 16)(%rdi), STATE0_LO
+ movdqu (1 * 16)(%rdi), STATE0_HI
+ movdqu (2 * 16)(%rdi), STATE1_LO
+ movdqu (3 * 16)(%rdi), STATE1_HI
+ movdqu (4 * 16)(%rdi), STATE2_LO
+ movdqu (5 * 16)(%rdi), STATE2_HI
+ movdqu (6 * 16)(%rdi), STATE3_LO
+ movdqu (7 * 16)(%rdi), STATE3_HI
+ movdqu (8 * 16)(%rdi), STATE4_LO
+ movdqu (9 * 16)(%rdi), STATE4_HI
+
+ mov %rsi, %r8
+ or %rdx, %r8
+ and $0xF, %r8
+ jnz .Ldec_u_loop
+
+.align 4
+.Ldec_a_loop:
+ movdqa 0(%rsi), MSG_LO
+ movdqa 16(%rsi), MSG_HI
+ pxor STATE0_LO, MSG_LO
+ pxor STATE0_HI, MSG_HI
+ movdqa STATE1_LO, T1_LO
+ movdqa STATE1_HI, T1_HI
+ rol3 T1_HI, T1_LO
+ pxor T1_LO, MSG_LO
+ pxor T1_HI, MSG_HI
+ movdqa STATE2_LO, T1_LO
+ movdqa STATE2_HI, T1_HI
+ pand STATE3_LO, T1_LO
+ pand STATE3_HI, T1_HI
+ pxor T1_LO, MSG_LO
+ pxor T1_HI, MSG_HI
+ movdqa MSG_LO, 0(%rdx)
+ movdqa MSG_HI, 16(%rdx)
+
+ call __morus1280_update
+ sub $32, %rcx
+ add $32, %rsi
+ add $32, %rdx
+ cmp $32, %rcx
+ jge .Ldec_a_loop
+
+ jmp .Ldec_cont
+.align 4
+.Ldec_u_loop:
+ movdqu 0(%rsi), MSG_LO
+ movdqu 16(%rsi), MSG_HI
+ pxor STATE0_LO, MSG_LO
+ pxor STATE0_HI, MSG_HI
+ movdqa STATE1_LO, T1_LO
+ movdqa STATE1_HI, T1_HI
+ rol3 T1_HI, T1_LO
+ pxor T1_LO, MSG_LO
+ pxor T1_HI, MSG_HI
+ movdqa STATE2_LO, T1_LO
+ movdqa STATE2_HI, T1_HI
+ pand STATE3_LO, T1_LO
+ pand STATE3_HI, T1_HI
+ pxor T1_LO, MSG_LO
+ pxor T1_HI, MSG_HI
+ movdqu MSG_LO, 0(%rdx)
+ movdqu MSG_HI, 16(%rdx)
+
+ call __morus1280_update
+ sub $32, %rcx
+ add $32, %rsi
+ add $32, %rdx
+ cmp $32, %rcx
+ jge .Ldec_u_loop
+
+.Ldec_cont:
+ /* store the state: */
+ movdqu STATE0_LO, (0 * 16)(%rdi)
+ movdqu STATE0_HI, (1 * 16)(%rdi)
+ movdqu STATE1_LO, (2 * 16)(%rdi)
+ movdqu STATE1_HI, (3 * 16)(%rdi)
+ movdqu STATE2_LO, (4 * 16)(%rdi)
+ movdqu STATE2_HI, (5 * 16)(%rdi)
+ movdqu STATE3_LO, (6 * 16)(%rdi)
+ movdqu STATE3_HI, (7 * 16)(%rdi)
+ movdqu STATE4_LO, (8 * 16)(%rdi)
+ movdqu STATE4_HI, (9 * 16)(%rdi)
+
+.Ldec_out:
+ FRAME_END
+ ret
+ENDPROC(crypto_morus1280_sse2_dec)
+
+/*
+ * void crypto_morus1280_sse2_dec_tail(void *state, const void *src, void *dst,
+ * unsigned int length);
+ */
+ENTRY(crypto_morus1280_sse2_dec_tail)
+ FRAME_BEGIN
+
+ /* load the state: */
+ movdqu (0 * 16)(%rdi), STATE0_LO
+ movdqu (1 * 16)(%rdi), STATE0_HI
+ movdqu (2 * 16)(%rdi), STATE1_LO
+ movdqu (3 * 16)(%rdi), STATE1_HI
+ movdqu (4 * 16)(%rdi), STATE2_LO
+ movdqu (5 * 16)(%rdi), STATE2_HI
+ movdqu (6 * 16)(%rdi), STATE3_LO
+ movdqu (7 * 16)(%rdi), STATE3_HI
+ movdqu (8 * 16)(%rdi), STATE4_LO
+ movdqu (9 * 16)(%rdi), STATE4_HI
+
+ /* decrypt message: */
+ call __load_partial
+
+ pxor STATE0_LO, MSG_LO
+ pxor STATE0_HI, MSG_HI
+ movdqa STATE1_LO, T1_LO
+ movdqa STATE1_HI, T1_HI
+ rol3 T1_HI, T1_LO
+ pxor T1_LO, MSG_LO
+ pxor T1_HI, MSG_HI
+ movdqa STATE2_LO, T1_LO
+ movdqa STATE2_HI, T1_HI
+ pand STATE3_LO, T1_LO
+ pand STATE3_HI, T1_HI
+ pxor T1_LO, MSG_LO
+ pxor T1_HI, MSG_HI
+ movdqa MSG_LO, T0_LO
+ movdqa MSG_HI, T0_HI
+
+ call __store_partial
+
+ /* mask with byte count: */
+ movq %rcx, T0_LO
+ punpcklbw T0_LO, T0_LO
+ punpcklbw T0_LO, T0_LO
+ punpcklbw T0_LO, T0_LO
+ punpcklbw T0_LO, T0_LO
+ movdqa T0_LO, T0_HI
+ movdqa .Lmorus640_counter_0, T1_LO
+ movdqa .Lmorus640_counter_1, T1_HI
+ pcmpgtb T1_LO, T0_LO
+ pcmpgtb T1_HI, T0_HI
+ pand T0_LO, MSG_LO
+ pand T0_HI, MSG_HI
+
+ call __morus1280_update
+
+ /* store the state: */
+ movdqu STATE0_LO, (0 * 16)(%rdi)
+ movdqu STATE0_HI, (1 * 16)(%rdi)
+ movdqu STATE1_LO, (2 * 16)(%rdi)
+ movdqu STATE1_HI, (3 * 16)(%rdi)
+ movdqu STATE2_LO, (4 * 16)(%rdi)
+ movdqu STATE2_HI, (5 * 16)(%rdi)
+ movdqu STATE3_LO, (6 * 16)(%rdi)
+ movdqu STATE3_HI, (7 * 16)(%rdi)
+ movdqu STATE4_LO, (8 * 16)(%rdi)
+ movdqu STATE4_HI, (9 * 16)(%rdi)
+
+ FRAME_END
+ ret
+ENDPROC(crypto_morus1280_sse2_dec_tail)
+
+/*
+ * void crypto_morus1280_sse2_final(void *state, void *tag_xor,
+ * u64 assoclen, u64 cryptlen);
+ */
+ENTRY(crypto_morus1280_sse2_final)
+ FRAME_BEGIN
+
+ /* load the state: */
+ movdqu (0 * 16)(%rdi), STATE0_LO
+ movdqu (1 * 16)(%rdi), STATE0_HI
+ movdqu (2 * 16)(%rdi), STATE1_LO
+ movdqu (3 * 16)(%rdi), STATE1_HI
+ movdqu (4 * 16)(%rdi), STATE2_LO
+ movdqu (5 * 16)(%rdi), STATE2_HI
+ movdqu (6 * 16)(%rdi), STATE3_LO
+ movdqu (7 * 16)(%rdi), STATE3_HI
+ movdqu (8 * 16)(%rdi), STATE4_LO
+ movdqu (9 * 16)(%rdi), STATE4_HI
+
+ /* xor state[0] into state[4]: */
+ pxor STATE0_LO, STATE4_LO
+ pxor STATE0_HI, STATE4_HI
+
+ /* prepare length block: */
+ movq %rdx, MSG_LO
+ movq %rcx, T0_LO
+ pslldq $8, T0_LO
+ pxor T0_LO, MSG_LO
+ psllq $3, MSG_LO /* multiply by 8 (to get bit count) */
+ pxor MSG_HI, MSG_HI
+
+ /* update state: */
+ call __morus1280_update
+ call __morus1280_update
+ call __morus1280_update
+ call __morus1280_update
+ call __morus1280_update
+ call __morus1280_update
+ call __morus1280_update
+ call __morus1280_update
+ call __morus1280_update
+ call __morus1280_update
+
+ /* xor tag: */
+ movdqu 0(%rsi), MSG_LO
+ movdqu 16(%rsi), MSG_HI
+
+ pxor STATE0_LO, MSG_LO
+ pxor STATE0_HI, MSG_HI
+ movdqa STATE1_LO, T0_LO
+ movdqa STATE1_HI, T0_HI
+ rol3 T0_HI, T0_LO
+ pxor T0_LO, MSG_LO
+ pxor T0_HI, MSG_HI
+ movdqa STATE2_LO, T0_LO
+ movdqa STATE2_HI, T0_HI
+ pand STATE3_LO, T0_LO
+ pand STATE3_HI, T0_HI
+ pxor T0_LO, MSG_LO
+ pxor T0_HI, MSG_HI
+
+ movdqu MSG_LO, 0(%rsi)
+ movdqu MSG_HI, 16(%rsi)
+
+ FRAME_END
+ ret
+ENDPROC(crypto_morus1280_sse2_final)
diff --git a/arch/x86/crypto/morus1280-sse2-glue.c b/arch/x86/crypto/morus1280-sse2-glue.c
new file mode 100644
index 000000000000..839270aa713c
--- /dev/null
+++ b/arch/x86/crypto/morus1280-sse2-glue.c
@@ -0,0 +1,68 @@
+/*
+ * The MORUS-1280 Authenticated-Encryption Algorithm
+ * Glue for SSE2 implementation
+ *
+ * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
+ * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <crypto/internal/aead.h>
+#include <crypto/morus1280_glue.h>
+#include <linux/module.h>
+#include <asm/fpu/api.h>
+#include <asm/cpu_device_id.h>
+
+asmlinkage void crypto_morus1280_sse2_init(void *state, const void *key,
+ const void *iv);
+asmlinkage void crypto_morus1280_sse2_ad(void *state, const void *data,
+ unsigned int length);
+
+asmlinkage void crypto_morus1280_sse2_enc(void *state, const void *src,
+ void *dst, unsigned int length);
+asmlinkage void crypto_morus1280_sse2_dec(void *state, const void *src,
+ void *dst, unsigned int length);
+
+asmlinkage void crypto_morus1280_sse2_enc_tail(void *state, const void *src,
+ void *dst, unsigned int length);
+asmlinkage void crypto_morus1280_sse2_dec_tail(void *state, const void *src,
+ void *dst, unsigned int length);
+
+asmlinkage void crypto_morus1280_sse2_final(void *state, void *tag_xor,
+ u64 assoclen, u64 cryptlen);
+
+MORUS1280_DECLARE_ALGS(sse2, "morus1280-sse2", 350);
+
+static const struct x86_cpu_id sse2_cpu_id[] = {
+ X86_FEATURE_MATCH(X86_FEATURE_XMM2),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, sse2_cpu_id);
+
+static int __init crypto_morus1280_sse2_module_init(void)
+{
+ if (!x86_match_cpu(sse2_cpu_id))
+ return -ENODEV;
+
+ return crypto_register_aeads(crypto_morus1280_sse2_algs,
+ ARRAY_SIZE(crypto_morus1280_sse2_algs));
+}
+
+static void __exit crypto_morus1280_sse2_module_exit(void)
+{
+ crypto_unregister_aeads(crypto_morus1280_sse2_algs,
+ ARRAY_SIZE(crypto_morus1280_sse2_algs));
+}
+
+module_init(crypto_morus1280_sse2_module_init);
+module_exit(crypto_morus1280_sse2_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
+MODULE_DESCRIPTION("MORUS-1280 AEAD algorithm -- SSE2 implementation");
+MODULE_ALIAS_CRYPTO("morus1280");
+MODULE_ALIAS_CRYPTO("morus1280-sse2");
diff --git a/arch/x86/crypto/morus1280_glue.c b/arch/x86/crypto/morus1280_glue.c
new file mode 100644
index 000000000000..0dccdda1eb3a
--- /dev/null
+++ b/arch/x86/crypto/morus1280_glue.c
@@ -0,0 +1,302 @@
+/*
+ * The MORUS-1280 Authenticated-Encryption Algorithm
+ * Common x86 SIMD glue skeleton
+ *
+ * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
+ * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <crypto/cryptd.h>
+#include <crypto/internal/aead.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/morus1280_glue.h>
+#include <crypto/scatterwalk.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/scatterlist.h>
+#include <asm/fpu/api.h>
+
+struct morus1280_state {
+ struct morus1280_block s[MORUS_STATE_BLOCKS];
+};
+
+struct morus1280_ops {
+ int (*skcipher_walk_init)(struct skcipher_walk *walk,
+ struct aead_request *req, bool atomic);
+
+ void (*crypt_blocks)(void *state, const void *src, void *dst,
+ unsigned int length);
+ void (*crypt_tail)(void *state, const void *src, void *dst,
+ unsigned int length);
+};
+
+static void crypto_morus1280_glue_process_ad(
+ struct morus1280_state *state,
+ const struct morus1280_glue_ops *ops,
+ struct scatterlist *sg_src, unsigned int assoclen)
+{
+ struct scatter_walk walk;
+ struct morus1280_block buf;
+ unsigned int pos = 0;
+
+ scatterwalk_start(&walk, sg_src);
+ while (assoclen != 0) {
+ unsigned int size = scatterwalk_clamp(&walk, assoclen);
+ unsigned int left = size;
+ void *mapped = scatterwalk_map(&walk);
+ const u8 *src = (const u8 *)mapped;
+
+ if (pos + size >= MORUS1280_BLOCK_SIZE) {
+ if (pos > 0) {
+ unsigned int fill = MORUS1280_BLOCK_SIZE - pos;
+ memcpy(buf.bytes + pos, src, fill);
+ ops->ad(state, buf.bytes, MORUS1280_BLOCK_SIZE);
+ pos = 0;
+ left -= fill;
+ src += fill;
+ }
+
+ ops->ad(state, src, left);
+ src += left & ~(MORUS1280_BLOCK_SIZE - 1);
+ left &= MORUS1280_BLOCK_SIZE - 1;
+ }
+
+ memcpy(buf.bytes + pos, src, left);
+
+ pos += left;
+ assoclen -= size;
+ scatterwalk_unmap(mapped);
+ scatterwalk_advance(&walk, size);
+ scatterwalk_done(&walk, 0, assoclen);
+ }
+
+ if (pos > 0) {
+ memset(buf.bytes + pos, 0, MORUS1280_BLOCK_SIZE - pos);
+ ops->ad(state, buf.bytes, MORUS1280_BLOCK_SIZE);
+ }
+}
+
+static void crypto_morus1280_glue_process_crypt(struct morus1280_state *state,
+ struct morus1280_ops ops,
+ struct aead_request *req)
+{
+ struct skcipher_walk walk;
+ u8 *cursor_src, *cursor_dst;
+ unsigned int chunksize, base;
+
+ ops.skcipher_walk_init(&walk, req, false);
+
+ while (walk.nbytes) {
+ cursor_src = walk.src.virt.addr;
+ cursor_dst = walk.dst.virt.addr;
+ chunksize = walk.nbytes;
+
+ ops.crypt_blocks(state, cursor_src, cursor_dst, chunksize);
+
+ base = chunksize & ~(MORUS1280_BLOCK_SIZE - 1);
+ cursor_src += base;
+ cursor_dst += base;
+ chunksize &= MORUS1280_BLOCK_SIZE - 1;
+
+ if (chunksize > 0)
+ ops.crypt_tail(state, cursor_src, cursor_dst,
+ chunksize);
+
+ skcipher_walk_done(&walk, 0);
+ }
+}
+
+int crypto_morus1280_glue_setkey(struct crypto_aead *aead, const u8 *key,
+ unsigned int keylen)
+{
+ struct morus1280_ctx *ctx = crypto_aead_ctx(aead);
+
+ if (keylen == MORUS1280_BLOCK_SIZE) {
+ memcpy(ctx->key.bytes, key, MORUS1280_BLOCK_SIZE);
+ } else if (keylen == MORUS1280_BLOCK_SIZE / 2) {
+ memcpy(ctx->key.bytes, key, keylen);
+ memcpy(ctx->key.bytes + keylen, key, keylen);
+ } else {
+ crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(crypto_morus1280_glue_setkey);
+
+int crypto_morus1280_glue_setauthsize(struct crypto_aead *tfm,
+ unsigned int authsize)
+{
+ return (authsize <= MORUS_MAX_AUTH_SIZE) ? 0 : -EINVAL;
+}
+EXPORT_SYMBOL_GPL(crypto_morus1280_glue_setauthsize);
+
+static void crypto_morus1280_glue_crypt(struct aead_request *req,
+ struct morus1280_ops ops,
+ unsigned int cryptlen,
+ struct morus1280_block *tag_xor)
+{
+ struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+ struct morus1280_ctx *ctx = crypto_aead_ctx(tfm);
+ struct morus1280_state state;
+
+ kernel_fpu_begin();
+
+ ctx->ops->init(&state, &ctx->key, req->iv);
+ crypto_morus1280_glue_process_ad(&state, ctx->ops, req->src, req->assoclen);
+ crypto_morus1280_glue_process_crypt(&state, ops, req);
+ ctx->ops->final(&state, tag_xor, req->assoclen, cryptlen);
+
+ kernel_fpu_end();
+}
+
+int crypto_morus1280_glue_encrypt(struct aead_request *req)
+{
+ struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+ struct morus1280_ctx *ctx = crypto_aead_ctx(tfm);
+ struct morus1280_ops OPS = {
+ .skcipher_walk_init = skcipher_walk_aead_encrypt,
+ .crypt_blocks = ctx->ops->enc,
+ .crypt_tail = ctx->ops->enc_tail,
+ };
+
+ struct morus1280_block tag = {};
+ unsigned int authsize = crypto_aead_authsize(tfm);
+ unsigned int cryptlen = req->cryptlen;
+
+ crypto_morus1280_glue_crypt(req, OPS, cryptlen, &tag);
+
+ scatterwalk_map_and_copy(tag.bytes, req->dst,
+ req->assoclen + cryptlen, authsize, 1);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(crypto_morus1280_glue_encrypt);
+
+int crypto_morus1280_glue_decrypt(struct aead_request *req)
+{
+ static const u8 zeros[MORUS1280_BLOCK_SIZE] = {};
+
+ struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+ struct morus1280_ctx *ctx = crypto_aead_ctx(tfm);
+ struct morus1280_ops OPS = {
+ .skcipher_walk_init = skcipher_walk_aead_decrypt,
+ .crypt_blocks = ctx->ops->dec,
+ .crypt_tail = ctx->ops->dec_tail,
+ };
+
+ struct morus1280_block tag;
+ unsigned int authsize = crypto_aead_authsize(tfm);
+ unsigned int cryptlen = req->cryptlen - authsize;
+
+ scatterwalk_map_and_copy(tag.bytes, req->src,
+ req->assoclen + cryptlen, authsize, 0);
+
+ crypto_morus1280_glue_crypt(req, OPS, cryptlen, &tag);
+
+ return crypto_memneq(tag.bytes, zeros, authsize) ? -EBADMSG : 0;
+}
+EXPORT_SYMBOL_GPL(crypto_morus1280_glue_decrypt);
+
+void crypto_morus1280_glue_init_ops(struct crypto_aead *aead,
+ const struct morus1280_glue_ops *ops)
+{
+ struct morus1280_ctx *ctx = crypto_aead_ctx(aead);
+ ctx->ops = ops;
+}
+EXPORT_SYMBOL_GPL(crypto_morus1280_glue_init_ops);
+
+int cryptd_morus1280_glue_setkey(struct crypto_aead *aead, const u8 *key,
+ unsigned int keylen)
+{
+ struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+ struct cryptd_aead *cryptd_tfm = *ctx;
+
+ return crypto_aead_setkey(&cryptd_tfm->base, key, keylen);
+}
+EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_setkey);
+
+int cryptd_morus1280_glue_setauthsize(struct crypto_aead *aead,
+ unsigned int authsize)
+{
+ struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+ struct cryptd_aead *cryptd_tfm = *ctx;
+
+ return crypto_aead_setauthsize(&cryptd_tfm->base, authsize);
+}
+EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_setauthsize);
+
+int cryptd_morus1280_glue_encrypt(struct aead_request *req)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+ struct cryptd_aead *cryptd_tfm = *ctx;
+
+ aead = &cryptd_tfm->base;
+ if (irq_fpu_usable() && (!in_atomic() ||
+ !cryptd_aead_queued(cryptd_tfm)))
+ aead = cryptd_aead_child(cryptd_tfm);
+
+ aead_request_set_tfm(req, aead);
+
+ return crypto_aead_encrypt(req);
+}
+EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_encrypt);
+
+int cryptd_morus1280_glue_decrypt(struct aead_request *req)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+ struct cryptd_aead *cryptd_tfm = *ctx;
+
+ aead = &cryptd_tfm->base;
+ if (irq_fpu_usable() && (!in_atomic() ||
+ !cryptd_aead_queued(cryptd_tfm)))
+ aead = cryptd_aead_child(cryptd_tfm);
+
+ aead_request_set_tfm(req, aead);
+
+ return crypto_aead_decrypt(req);
+}
+EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_decrypt);
+
+int cryptd_morus1280_glue_init_tfm(struct crypto_aead *aead)
+{
+ struct cryptd_aead *cryptd_tfm;
+ struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+ const char *name = crypto_aead_alg(aead)->base.cra_driver_name;
+ char internal_name[CRYPTO_MAX_ALG_NAME];
+
+ if (snprintf(internal_name, CRYPTO_MAX_ALG_NAME, "__%s", name)
+ >= CRYPTO_MAX_ALG_NAME)
+ return -ENAMETOOLONG;
+
+ cryptd_tfm = cryptd_alloc_aead(internal_name, CRYPTO_ALG_INTERNAL,
+ CRYPTO_ALG_INTERNAL);
+ if (IS_ERR(cryptd_tfm))
+ return PTR_ERR(cryptd_tfm);
+
+ *ctx = cryptd_tfm;
+ crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base));
+ return 0;
+}
+EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_init_tfm);
+
+void cryptd_morus1280_glue_exit_tfm(struct crypto_aead *aead)
+{
+ struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+
+ cryptd_free_aead(*ctx);
+}
+EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_exit_tfm);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
+MODULE_DESCRIPTION("MORUS-1280 AEAD mode -- glue for x86 optimizations");
diff --git a/arch/x86/crypto/morus640-sse2-asm.S b/arch/x86/crypto/morus640-sse2-asm.S
new file mode 100644
index 000000000000..71c72a0a0862
--- /dev/null
+++ b/arch/x86/crypto/morus640-sse2-asm.S
@@ -0,0 +1,614 @@
+/*
+ * SSE2 implementation of MORUS-640
+ *
+ * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
+ * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/frame.h>
+
+#define SHUFFLE_MASK(i0, i1, i2, i3) \
+ (i0 | (i1 << 2) | (i2 << 4) | (i3 << 6))
+
+#define MASK1 SHUFFLE_MASK(3, 0, 1, 2)
+#define MASK2 SHUFFLE_MASK(2, 3, 0, 1)
+#define MASK3 SHUFFLE_MASK(1, 2, 3, 0)
+
+#define STATE0 %xmm0
+#define STATE1 %xmm1
+#define STATE2 %xmm2
+#define STATE3 %xmm3
+#define STATE4 %xmm4
+#define KEY %xmm5
+#define MSG %xmm5
+#define T0 %xmm6
+#define T1 %xmm7
+
+.section .rodata.cst16.morus640_const, "aM", @progbits, 32
+.align 16
+.Lmorus640_const_0:
+ .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
+ .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
+.Lmorus640_const_1:
+ .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
+ .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
+
+.section .rodata.cst16.morus640_counter, "aM", @progbits, 16
+.align 16
+.Lmorus640_counter:
+ .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
+ .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
+
+.text
+
+.macro morus640_round s0, s1, s2, s3, s4, b, w
+ movdqa \s1, T0
+ pand \s2, T0
+ pxor T0, \s0
+ pxor \s3, \s0
+ movdqa \s0, T0
+ pslld $\b, T0
+ psrld $(32 - \b), \s0
+ pxor T0, \s0
+ pshufd $\w, \s3, \s3
+.endm
+
+/*
+ * __morus640_update: internal ABI
+ * input:
+ * STATE[0-4] - input state
+ * MSG - message block
+ * output:
+ * STATE[0-4] - output state
+ * changed:
+ * T0
+ */
+__morus640_update:
+ morus640_round STATE0, STATE1, STATE2, STATE3, STATE4, 5, MASK1
+ pxor MSG, STATE1
+ morus640_round STATE1, STATE2, STATE3, STATE4, STATE0, 31, MASK2
+ pxor MSG, STATE2
+ morus640_round STATE2, STATE3, STATE4, STATE0, STATE1, 7, MASK3
+ pxor MSG, STATE3
+ morus640_round STATE3, STATE4, STATE0, STATE1, STATE2, 22, MASK2
+ pxor MSG, STATE4
+ morus640_round STATE4, STATE0, STATE1, STATE2, STATE3, 13, MASK1
+ ret
+ENDPROC(__morus640_update)
+
+
+/*
+ * __morus640_update_zero: internal ABI
+ * input:
+ * STATE[0-4] - input state
+ * output:
+ * STATE[0-4] - output state
+ * changed:
+ * T0
+ */
+__morus640_update_zero:
+ morus640_round STATE0, STATE1, STATE2, STATE3, STATE4, 5, MASK1
+ morus640_round STATE1, STATE2, STATE3, STATE4, STATE0, 31, MASK2
+ morus640_round STATE2, STATE3, STATE4, STATE0, STATE1, 7, MASK3
+ morus640_round STATE3, STATE4, STATE0, STATE1, STATE2, 22, MASK2
+ morus640_round STATE4, STATE0, STATE1, STATE2, STATE3, 13, MASK1
+ ret
+ENDPROC(__morus640_update_zero)
+
+/*
+ * __load_partial: internal ABI
+ * input:
+ * %rsi - src
+ * %rcx - bytes
+ * output:
+ * MSG - message block
+ * changed:
+ * T0
+ * %r8
+ * %r9
+ */
+__load_partial:
+ xor %r9, %r9
+ pxor MSG, MSG
+
+ mov %rcx, %r8
+ and $0x1, %r8
+ jz .Lld_partial_1
+
+ mov %rcx, %r8
+ and $0x1E, %r8
+ add %rsi, %r8
+ mov (%r8), %r9b
+
+.Lld_partial_1:
+ mov %rcx, %r8
+ and $0x2, %r8
+ jz .Lld_partial_2
+
+ mov %rcx, %r8
+ and $0x1C, %r8
+ add %rsi, %r8
+ shl $16, %r9
+ mov (%r8), %r9w
+
+.Lld_partial_2:
+ mov %rcx, %r8
+ and $0x4, %r8
+ jz .Lld_partial_4
+
+ mov %rcx, %r8
+ and $0x18, %r8
+ add %rsi, %r8
+ shl $32, %r9
+ mov (%r8), %r8d
+ xor %r8, %r9
+
+.Lld_partial_4:
+ movq %r9, MSG
+
+ mov %rcx, %r8
+ and $0x8, %r8
+ jz .Lld_partial_8
+
+ mov %rcx, %r8
+ and $0x10, %r8
+ add %rsi, %r8
+ pslldq $8, MSG
+ movq (%r8), T0
+ pxor T0, MSG
+
+.Lld_partial_8:
+ ret
+ENDPROC(__load_partial)
+
+/*
+ * __store_partial: internal ABI
+ * input:
+ * %rdx - dst
+ * %rcx - bytes
+ * output:
+ * T0 - message block
+ * changed:
+ * %r8
+ * %r9
+ * %r10
+ */
+__store_partial:
+ mov %rcx, %r8
+ mov %rdx, %r9
+
+ movq T0, %r10
+
+ cmp $8, %r8
+ jl .Lst_partial_8
+
+ mov %r10, (%r9)
+ psrldq $8, T0
+ movq T0, %r10
+
+ sub $8, %r8
+ add $8, %r9
+
+.Lst_partial_8:
+ cmp $4, %r8
+ jl .Lst_partial_4
+
+ mov %r10d, (%r9)
+ shr $32, %r10
+
+ sub $4, %r8
+ add $4, %r9
+
+.Lst_partial_4:
+ cmp $2, %r8
+ jl .Lst_partial_2
+
+ mov %r10w, (%r9)
+ shr $16, %r10
+
+ sub $2, %r8
+ add $2, %r9
+
+.Lst_partial_2:
+ cmp $1, %r8
+ jl .Lst_partial_1
+
+ mov %r10b, (%r9)
+
+.Lst_partial_1:
+ ret
+ENDPROC(__store_partial)
+
+/*
+ * void crypto_morus640_sse2_init(void *state, const void *key, const void *iv);
+ */
+ENTRY(crypto_morus640_sse2_init)
+ FRAME_BEGIN
+
+ /* load IV: */
+ movdqu (%rdx), STATE0
+ /* load key: */
+ movdqu (%rsi), KEY
+ movdqa KEY, STATE1
+ /* load all ones: */
+ pcmpeqd STATE2, STATE2
+ /* load the constants: */
+ movdqa .Lmorus640_const_0, STATE3
+ movdqa .Lmorus640_const_1, STATE4
+
+ /* update 16 times with zero: */
+ call __morus640_update_zero
+ call __morus640_update_zero
+ call __morus640_update_zero
+ call __morus640_update_zero
+ call __morus640_update_zero
+ call __morus640_update_zero
+ call __morus640_update_zero
+ call __morus640_update_zero
+ call __morus640_update_zero
+ call __morus640_update_zero
+ call __morus640_update_zero
+ call __morus640_update_zero
+ call __morus640_update_zero
+ call __morus640_update_zero
+ call __morus640_update_zero
+ call __morus640_update_zero
+
+ /* xor-in the key again after updates: */
+ pxor KEY, STATE1
+
+ /* store the state: */
+ movdqu STATE0, (0 * 16)(%rdi)
+ movdqu STATE1, (1 * 16)(%rdi)
+ movdqu STATE2, (2 * 16)(%rdi)
+ movdqu STATE3, (3 * 16)(%rdi)
+ movdqu STATE4, (4 * 16)(%rdi)
+
+ FRAME_END
+ ret
+ENDPROC(crypto_morus640_sse2_init)
+
+/*
+ * void crypto_morus640_sse2_ad(void *state, const void *data,
+ * unsigned int length);
+ */
+ENTRY(crypto_morus640_sse2_ad)
+ FRAME_BEGIN
+
+ cmp $16, %rdx
+ jb .Lad_out
+
+ /* load the state: */
+ movdqu (0 * 16)(%rdi), STATE0
+ movdqu (1 * 16)(%rdi), STATE1
+ movdqu (2 * 16)(%rdi), STATE2
+ movdqu (3 * 16)(%rdi), STATE3
+ movdqu (4 * 16)(%rdi), STATE4
+
+ mov %rsi, %r8
+ and $0xF, %r8
+ jnz .Lad_u_loop
+
+.align 4
+.Lad_a_loop:
+ movdqa (%rsi), MSG
+ call __morus640_update
+ sub $16, %rdx
+ add $16, %rsi
+ cmp $16, %rdx
+ jge .Lad_a_loop
+
+ jmp .Lad_cont
+.align 4
+.Lad_u_loop:
+ movdqu (%rsi), MSG
+ call __morus640_update
+ sub $16, %rdx
+ add $16, %rsi
+ cmp $16, %rdx
+ jge .Lad_u_loop
+
+.Lad_cont:
+ /* store the state: */
+ movdqu STATE0, (0 * 16)(%rdi)
+ movdqu STATE1, (1 * 16)(%rdi)
+ movdqu STATE2, (2 * 16)(%rdi)
+ movdqu STATE3, (3 * 16)(%rdi)
+ movdqu STATE4, (4 * 16)(%rdi)
+
+.Lad_out:
+ FRAME_END
+ ret
+ENDPROC(crypto_morus640_sse2_ad)
+
+/*
+ * void crypto_morus640_sse2_enc(void *state, const void *src, void *dst,
+ * unsigned int length);
+ */
+ENTRY(crypto_morus640_sse2_enc)
+ FRAME_BEGIN
+
+ cmp $16, %rcx
+ jb .Lenc_out
+
+ /* load the state: */
+ movdqu (0 * 16)(%rdi), STATE0
+ movdqu (1 * 16)(%rdi), STATE1
+ movdqu (2 * 16)(%rdi), STATE2
+ movdqu (3 * 16)(%rdi), STATE3
+ movdqu (4 * 16)(%rdi), STATE4
+
+ mov %rsi, %r8
+ or %rdx, %r8
+ and $0xF, %r8
+ jnz .Lenc_u_loop
+
+.align 4
+.Lenc_a_loop:
+ movdqa (%rsi), MSG
+ movdqa MSG, T0
+ pxor STATE0, T0
+ pshufd $MASK3, STATE1, T1
+ pxor T1, T0
+ movdqa STATE2, T1
+ pand STATE3, T1
+ pxor T1, T0
+ movdqa T0, (%rdx)
+
+ call __morus640_update
+ sub $16, %rcx
+ add $16, %rsi
+ add $16, %rdx
+ cmp $16, %rcx
+ jge .Lenc_a_loop
+
+ jmp .Lenc_cont
+.align 4
+.Lenc_u_loop:
+ movdqu (%rsi), MSG
+ movdqa MSG, T0
+ pxor STATE0, T0
+ pshufd $MASK3, STATE1, T1
+ pxor T1, T0
+ movdqa STATE2, T1
+ pand STATE3, T1
+ pxor T1, T0
+ movdqu T0, (%rdx)
+
+ call __morus640_update
+ sub $16, %rcx
+ add $16, %rsi
+ add $16, %rdx
+ cmp $16, %rcx
+ jge .Lenc_u_loop
+
+.Lenc_cont:
+ /* store the state: */
+ movdqu STATE0, (0 * 16)(%rdi)
+ movdqu STATE1, (1 * 16)(%rdi)
+ movdqu STATE2, (2 * 16)(%rdi)
+ movdqu STATE3, (3 * 16)(%rdi)
+ movdqu STATE4, (4 * 16)(%rdi)
+
+.Lenc_out:
+ FRAME_END
+ ret
+ENDPROC(crypto_morus640_sse2_enc)
+
+/*
+ * void crypto_morus640_sse2_enc_tail(void *state, const void *src, void *dst,
+ * unsigned int length);
+ */
+ENTRY(crypto_morus640_sse2_enc_tail)
+ FRAME_BEGIN
+
+ /* load the state: */
+ movdqu (0 * 16)(%rdi), STATE0
+ movdqu (1 * 16)(%rdi), STATE1
+ movdqu (2 * 16)(%rdi), STATE2
+ movdqu (3 * 16)(%rdi), STATE3
+ movdqu (4 * 16)(%rdi), STATE4
+
+ /* encrypt message: */
+ call __load_partial
+
+ movdqa MSG, T0
+ pxor STATE0, T0
+ pshufd $MASK3, STATE1, T1
+ pxor T1, T0
+ movdqa STATE2, T1
+ pand STATE3, T1
+ pxor T1, T0
+
+ call __store_partial
+
+ call __morus640_update
+
+ /* store the state: */
+ movdqu STATE0, (0 * 16)(%rdi)
+ movdqu STATE1, (1 * 16)(%rdi)
+ movdqu STATE2, (2 * 16)(%rdi)
+ movdqu STATE3, (3 * 16)(%rdi)
+ movdqu STATE4, (4 * 16)(%rdi)
+
+ FRAME_END
+ENDPROC(crypto_morus640_sse2_enc_tail)
+
+/*
+ * void crypto_morus640_sse2_dec(void *state, const void *src, void *dst,
+ * unsigned int length);
+ */
+ENTRY(crypto_morus640_sse2_dec)
+ FRAME_BEGIN
+
+ cmp $16, %rcx
+ jb .Ldec_out
+
+ /* load the state: */
+ movdqu (0 * 16)(%rdi), STATE0
+ movdqu (1 * 16)(%rdi), STATE1
+ movdqu (2 * 16)(%rdi), STATE2
+ movdqu (3 * 16)(%rdi), STATE3
+ movdqu (4 * 16)(%rdi), STATE4
+
+ mov %rsi, %r8
+ or %rdx, %r8
+ and $0xF, %r8
+ jnz .Ldec_u_loop
+
+.align 4
+.Ldec_a_loop:
+ movdqa (%rsi), MSG
+ pxor STATE0, MSG
+ pshufd $MASK3, STATE1, T0
+ pxor T0, MSG
+ movdqa STATE2, T0
+ pand STATE3, T0
+ pxor T0, MSG
+ movdqa MSG, (%rdx)
+
+ call __morus640_update
+ sub $16, %rcx
+ add $16, %rsi
+ add $16, %rdx
+ cmp $16, %rcx
+ jge .Ldec_a_loop
+
+ jmp .Ldec_cont
+.align 4
+.Ldec_u_loop:
+ movdqu (%rsi), MSG
+ pxor STATE0, MSG
+ pshufd $MASK3, STATE1, T0
+ pxor T0, MSG
+ movdqa STATE2, T0
+ pand STATE3, T0
+ pxor T0, MSG
+ movdqu MSG, (%rdx)
+
+ call __morus640_update
+ sub $16, %rcx
+ add $16, %rsi
+ add $16, %rdx
+ cmp $16, %rcx
+ jge .Ldec_u_loop
+
+.Ldec_cont:
+ /* store the state: */
+ movdqu STATE0, (0 * 16)(%rdi)
+ movdqu STATE1, (1 * 16)(%rdi)
+ movdqu STATE2, (2 * 16)(%rdi)
+ movdqu STATE3, (3 * 16)(%rdi)
+ movdqu STATE4, (4 * 16)(%rdi)
+
+.Ldec_out:
+ FRAME_END
+ ret
+ENDPROC(crypto_morus640_sse2_dec)
+
+/*
+ * void crypto_morus640_sse2_dec_tail(void *state, const void *src, void *dst,
+ * unsigned int length);
+ */
+ENTRY(crypto_morus640_sse2_dec_tail)
+ FRAME_BEGIN
+
+ /* load the state: */
+ movdqu (0 * 16)(%rdi), STATE0
+ movdqu (1 * 16)(%rdi), STATE1
+ movdqu (2 * 16)(%rdi), STATE2
+ movdqu (3 * 16)(%rdi), STATE3
+ movdqu (4 * 16)(%rdi), STATE4
+
+ /* decrypt message: */
+ call __load_partial
+
+ pxor STATE0, MSG
+ pshufd $MASK3, STATE1, T0
+ pxor T0, MSG
+ movdqa STATE2, T0
+ pand STATE3, T0
+ pxor T0, MSG
+ movdqa MSG, T0
+
+ call __store_partial
+
+ /* mask with byte count: */
+ movq %rcx, T0
+ punpcklbw T0, T0
+ punpcklbw T0, T0
+ punpcklbw T0, T0
+ punpcklbw T0, T0
+ movdqa .Lmorus640_counter, T1
+ pcmpgtb T1, T0
+ pand T0, MSG
+
+ call __morus640_update
+
+ /* store the state: */
+ movdqu STATE0, (0 * 16)(%rdi)
+ movdqu STATE1, (1 * 16)(%rdi)
+ movdqu STATE2, (2 * 16)(%rdi)
+ movdqu STATE3, (3 * 16)(%rdi)
+ movdqu STATE4, (4 * 16)(%rdi)
+
+ FRAME_END
+ ret
+ENDPROC(crypto_morus640_sse2_dec_tail)
+
+/*
+ * void crypto_morus640_sse2_final(void *state, void *tag_xor,
+ * u64 assoclen, u64 cryptlen);
+ */
+ENTRY(crypto_morus640_sse2_final)
+ FRAME_BEGIN
+
+ /* load the state: */
+ movdqu (0 * 16)(%rdi), STATE0
+ movdqu (1 * 16)(%rdi), STATE1
+ movdqu (2 * 16)(%rdi), STATE2
+ movdqu (3 * 16)(%rdi), STATE3
+ movdqu (4 * 16)(%rdi), STATE4
+
+ /* xor state[0] into state[4]: */
+ pxor STATE0, STATE4
+
+ /* prepare length block: */
+ movq %rdx, MSG
+ movq %rcx, T0
+ pslldq $8, T0
+ pxor T0, MSG
+ psllq $3, MSG /* multiply by 8 (to get bit count) */
+
+ /* update state: */
+ call __morus640_update
+ call __morus640_update
+ call __morus640_update
+ call __morus640_update
+ call __morus640_update
+ call __morus640_update
+ call __morus640_update
+ call __morus640_update
+ call __morus640_update
+ call __morus640_update
+
+ /* xor tag: */
+ movdqu (%rsi), MSG
+
+ pxor STATE0, MSG
+ pshufd $MASK3, STATE1, T0
+ pxor T0, MSG
+ movdqa STATE2, T0
+ pand STATE3, T0
+ pxor T0, MSG
+
+ movdqu MSG, (%rsi)
+
+ FRAME_END
+ ret
+ENDPROC(crypto_morus640_sse2_final)
diff --git a/arch/x86/crypto/morus640-sse2-glue.c b/arch/x86/crypto/morus640-sse2-glue.c
new file mode 100644
index 000000000000..26b47e2db8d2
--- /dev/null
+++ b/arch/x86/crypto/morus640-sse2-glue.c
@@ -0,0 +1,68 @@
+/*
+ * The MORUS-640 Authenticated-Encryption Algorithm
+ * Glue for SSE2 implementation
+ *
+ * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
+ * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <crypto/internal/aead.h>
+#include <crypto/morus640_glue.h>
+#include <linux/module.h>
+#include <asm/fpu/api.h>
+#include <asm/cpu_device_id.h>
+
+asmlinkage void crypto_morus640_sse2_init(void *state, const void *key,
+ const void *iv);
+asmlinkage void crypto_morus640_sse2_ad(void *state, const void *data,
+ unsigned int length);
+
+asmlinkage void crypto_morus640_sse2_enc(void *state, const void *src,
+ void *dst, unsigned int length);
+asmlinkage void crypto_morus640_sse2_dec(void *state, const void *src,
+ void *dst, unsigned int length);
+
+asmlinkage void crypto_morus640_sse2_enc_tail(void *state, const void *src,
+ void *dst, unsigned int length);
+asmlinkage void crypto_morus640_sse2_dec_tail(void *state, const void *src,
+ void *dst, unsigned int length);
+
+asmlinkage void crypto_morus640_sse2_final(void *state, void *tag_xor,
+ u64 assoclen, u64 cryptlen);
+
+MORUS640_DECLARE_ALGS(sse2, "morus640-sse2", 400);
+
+static const struct x86_cpu_id sse2_cpu_id[] = {
+ X86_FEATURE_MATCH(X86_FEATURE_XMM2),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, sse2_cpu_id);
+
+static int __init crypto_morus640_sse2_module_init(void)
+{
+ if (!x86_match_cpu(sse2_cpu_id))
+ return -ENODEV;
+
+ return crypto_register_aeads(crypto_morus640_sse2_algs,
+ ARRAY_SIZE(crypto_morus640_sse2_algs));
+}
+
+static void __exit crypto_morus640_sse2_module_exit(void)
+{
+ crypto_unregister_aeads(crypto_morus640_sse2_algs,
+ ARRAY_SIZE(crypto_morus640_sse2_algs));
+}
+
+module_init(crypto_morus640_sse2_module_init);
+module_exit(crypto_morus640_sse2_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
+MODULE_DESCRIPTION("MORUS-640 AEAD algorithm -- SSE2 implementation");
+MODULE_ALIAS_CRYPTO("morus640");
+MODULE_ALIAS_CRYPTO("morus640-sse2");
diff --git a/arch/x86/crypto/morus640_glue.c b/arch/x86/crypto/morus640_glue.c
new file mode 100644
index 000000000000..7b58fe4d9bd1
--- /dev/null
+++ b/arch/x86/crypto/morus640_glue.c
@@ -0,0 +1,298 @@
+/*
+ * The MORUS-640 Authenticated-Encryption Algorithm
+ * Common x86 SIMD glue skeleton
+ *
+ * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
+ * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <crypto/cryptd.h>
+#include <crypto/internal/aead.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/morus640_glue.h>
+#include <crypto/scatterwalk.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/scatterlist.h>
+#include <asm/fpu/api.h>
+
+struct morus640_state {
+ struct morus640_block s[MORUS_STATE_BLOCKS];
+};
+
+struct morus640_ops {
+ int (*skcipher_walk_init)(struct skcipher_walk *walk,
+ struct aead_request *req, bool atomic);
+
+ void (*crypt_blocks)(void *state, const void *src, void *dst,
+ unsigned int length);
+ void (*crypt_tail)(void *state, const void *src, void *dst,
+ unsigned int length);
+};
+
+static void crypto_morus640_glue_process_ad(
+ struct morus640_state *state,
+ const struct morus640_glue_ops *ops,
+ struct scatterlist *sg_src, unsigned int assoclen)
+{
+ struct scatter_walk walk;
+ struct morus640_block buf;
+ unsigned int pos = 0;
+
+ scatterwalk_start(&walk, sg_src);
+ while (assoclen != 0) {
+ unsigned int size = scatterwalk_clamp(&walk, assoclen);
+ unsigned int left = size;
+ void *mapped = scatterwalk_map(&walk);
+ const u8 *src = (const u8 *)mapped;
+
+ if (pos + size >= MORUS640_BLOCK_SIZE) {
+ if (pos > 0) {
+ unsigned int fill = MORUS640_BLOCK_SIZE - pos;
+ memcpy(buf.bytes + pos, src, fill);
+ ops->ad(state, buf.bytes, MORUS640_BLOCK_SIZE);
+ pos = 0;
+ left -= fill;
+ src += fill;
+ }
+
+ ops->ad(state, src, left);
+ src += left & ~(MORUS640_BLOCK_SIZE - 1);
+ left &= MORUS640_BLOCK_SIZE - 1;
+ }
+
+ memcpy(buf.bytes + pos, src, left);
+
+ pos += left;
+ assoclen -= size;
+ scatterwalk_unmap(mapped);
+ scatterwalk_advance(&walk, size);
+ scatterwalk_done(&walk, 0, assoclen);
+ }
+
+ if (pos > 0) {
+ memset(buf.bytes + pos, 0, MORUS640_BLOCK_SIZE - pos);
+ ops->ad(state, buf.bytes, MORUS640_BLOCK_SIZE);
+ }
+}
+
+static void crypto_morus640_glue_process_crypt(struct morus640_state *state,
+ struct morus640_ops ops,
+ struct aead_request *req)
+{
+ struct skcipher_walk walk;
+ u8 *cursor_src, *cursor_dst;
+ unsigned int chunksize, base;
+
+ ops.skcipher_walk_init(&walk, req, false);
+
+ while (walk.nbytes) {
+ cursor_src = walk.src.virt.addr;
+ cursor_dst = walk.dst.virt.addr;
+ chunksize = walk.nbytes;
+
+ ops.crypt_blocks(state, cursor_src, cursor_dst, chunksize);
+
+ base = chunksize & ~(MORUS640_BLOCK_SIZE - 1);
+ cursor_src += base;
+ cursor_dst += base;
+ chunksize &= MORUS640_BLOCK_SIZE - 1;
+
+ if (chunksize > 0)
+ ops.crypt_tail(state, cursor_src, cursor_dst,
+ chunksize);
+
+ skcipher_walk_done(&walk, 0);
+ }
+}
+
+int crypto_morus640_glue_setkey(struct crypto_aead *aead, const u8 *key,
+ unsigned int keylen)
+{
+ struct morus640_ctx *ctx = crypto_aead_ctx(aead);
+
+ if (keylen != MORUS640_BLOCK_SIZE) {
+ crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+ return -EINVAL;
+ }
+
+ memcpy(ctx->key.bytes, key, MORUS640_BLOCK_SIZE);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(crypto_morus640_glue_setkey);
+
+int crypto_morus640_glue_setauthsize(struct crypto_aead *tfm,
+ unsigned int authsize)
+{
+ return (authsize <= MORUS_MAX_AUTH_SIZE) ? 0 : -EINVAL;
+}
+EXPORT_SYMBOL_GPL(crypto_morus640_glue_setauthsize);
+
+static void crypto_morus640_glue_crypt(struct aead_request *req,
+ struct morus640_ops ops,
+ unsigned int cryptlen,
+ struct morus640_block *tag_xor)
+{
+ struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+ struct morus640_ctx *ctx = crypto_aead_ctx(tfm);
+ struct morus640_state state;
+
+ kernel_fpu_begin();
+
+ ctx->ops->init(&state, &ctx->key, req->iv);
+ crypto_morus640_glue_process_ad(&state, ctx->ops, req->src, req->assoclen);
+ crypto_morus640_glue_process_crypt(&state, ops, req);
+ ctx->ops->final(&state, tag_xor, req->assoclen, cryptlen);
+
+ kernel_fpu_end();
+}
+
+int crypto_morus640_glue_encrypt(struct aead_request *req)
+{
+ struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+ struct morus640_ctx *ctx = crypto_aead_ctx(tfm);
+ struct morus640_ops OPS = {
+ .skcipher_walk_init = skcipher_walk_aead_encrypt,
+ .crypt_blocks = ctx->ops->enc,
+ .crypt_tail = ctx->ops->enc_tail,
+ };
+
+ struct morus640_block tag = {};
+ unsigned int authsize = crypto_aead_authsize(tfm);
+ unsigned int cryptlen = req->cryptlen;
+
+ crypto_morus640_glue_crypt(req, OPS, cryptlen, &tag);
+
+ scatterwalk_map_and_copy(tag.bytes, req->dst,
+ req->assoclen + cryptlen, authsize, 1);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(crypto_morus640_glue_encrypt);
+
+int crypto_morus640_glue_decrypt(struct aead_request *req)
+{
+ static const u8 zeros[MORUS640_BLOCK_SIZE] = {};
+
+ struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+ struct morus640_ctx *ctx = crypto_aead_ctx(tfm);
+ struct morus640_ops OPS = {
+ .skcipher_walk_init = skcipher_walk_aead_decrypt,
+ .crypt_blocks = ctx->ops->dec,
+ .crypt_tail = ctx->ops->dec_tail,
+ };
+
+ struct morus640_block tag;
+ unsigned int authsize = crypto_aead_authsize(tfm);
+ unsigned int cryptlen = req->cryptlen - authsize;
+
+ scatterwalk_map_and_copy(tag.bytes, req->src,
+ req->assoclen + cryptlen, authsize, 0);
+
+ crypto_morus640_glue_crypt(req, OPS, cryptlen, &tag);
+
+ return crypto_memneq(tag.bytes, zeros, authsize) ? -EBADMSG : 0;
+}
+EXPORT_SYMBOL_GPL(crypto_morus640_glue_decrypt);
+
+void crypto_morus640_glue_init_ops(struct crypto_aead *aead,
+ const struct morus640_glue_ops *ops)
+{
+ struct morus640_ctx *ctx = crypto_aead_ctx(aead);
+ ctx->ops = ops;
+}
+EXPORT_SYMBOL_GPL(crypto_morus640_glue_init_ops);
+
+int cryptd_morus640_glue_setkey(struct crypto_aead *aead, const u8 *key,
+ unsigned int keylen)
+{
+ struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+ struct cryptd_aead *cryptd_tfm = *ctx;
+
+ return crypto_aead_setkey(&cryptd_tfm->base, key, keylen);
+}
+EXPORT_SYMBOL_GPL(cryptd_morus640_glue_setkey);
+
+int cryptd_morus640_glue_setauthsize(struct crypto_aead *aead,
+ unsigned int authsize)
+{
+ struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+ struct cryptd_aead *cryptd_tfm = *ctx;
+
+ return crypto_aead_setauthsize(&cryptd_tfm->base, authsize);
+}
+EXPORT_SYMBOL_GPL(cryptd_morus640_glue_setauthsize);
+
+int cryptd_morus640_glue_encrypt(struct aead_request *req)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+ struct cryptd_aead *cryptd_tfm = *ctx;
+
+ aead = &cryptd_tfm->base;
+ if (irq_fpu_usable() && (!in_atomic() ||
+ !cryptd_aead_queued(cryptd_tfm)))
+ aead = cryptd_aead_child(cryptd_tfm);
+
+ aead_request_set_tfm(req, aead);
+
+ return crypto_aead_encrypt(req);
+}
+EXPORT_SYMBOL_GPL(cryptd_morus640_glue_encrypt);
+
+int cryptd_morus640_glue_decrypt(struct aead_request *req)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+ struct cryptd_aead *cryptd_tfm = *ctx;
+
+ aead = &cryptd_tfm->base;
+ if (irq_fpu_usable() && (!in_atomic() ||
+ !cryptd_aead_queued(cryptd_tfm)))
+ aead = cryptd_aead_child(cryptd_tfm);
+
+ aead_request_set_tfm(req, aead);
+
+ return crypto_aead_decrypt(req);
+}
+EXPORT_SYMBOL_GPL(cryptd_morus640_glue_decrypt);
+
+int cryptd_morus640_glue_init_tfm(struct crypto_aead *aead)
+{
+ struct cryptd_aead *cryptd_tfm;
+ struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+ const char *name = crypto_aead_alg(aead)->base.cra_driver_name;
+ char internal_name[CRYPTO_MAX_ALG_NAME];
+
+ if (snprintf(internal_name, CRYPTO_MAX_ALG_NAME, "__%s", name)
+ >= CRYPTO_MAX_ALG_NAME)
+ return -ENAMETOOLONG;
+
+ cryptd_tfm = cryptd_alloc_aead(internal_name, CRYPTO_ALG_INTERNAL,
+ CRYPTO_ALG_INTERNAL);
+ if (IS_ERR(cryptd_tfm))
+ return PTR_ERR(cryptd_tfm);
+
+ *ctx = cryptd_tfm;
+ crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base));
+ return 0;
+}
+EXPORT_SYMBOL_GPL(cryptd_morus640_glue_init_tfm);
+
+void cryptd_morus640_glue_exit_tfm(struct crypto_aead *aead)
+{
+ struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+
+ cryptd_free_aead(*ctx);
+}
+EXPORT_SYMBOL_GPL(cryptd_morus640_glue_exit_tfm);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
+MODULE_DESCRIPTION("MORUS-640 AEAD mode -- glue for x86 optimizations");
diff --git a/arch/x86/crypto/salsa20-i586-asm_32.S b/arch/x86/crypto/salsa20-i586-asm_32.S
deleted file mode 100644
index 6014b7b9e52a..000000000000
--- a/arch/x86/crypto/salsa20-i586-asm_32.S
+++ /dev/null
@@ -1,938 +0,0 @@
-# Derived from:
-# salsa20_pm.s version 20051229
-# D. J. Bernstein
-# Public domain.
-
-#include <linux/linkage.h>
-
-.text
-
-# enter salsa20_encrypt_bytes
-ENTRY(salsa20_encrypt_bytes)
- mov %esp,%eax
- and $31,%eax
- add $256,%eax
- sub %eax,%esp
- # eax_stack = eax
- movl %eax,80(%esp)
- # ebx_stack = ebx
- movl %ebx,84(%esp)
- # esi_stack = esi
- movl %esi,88(%esp)
- # edi_stack = edi
- movl %edi,92(%esp)
- # ebp_stack = ebp
- movl %ebp,96(%esp)
- # x = arg1
- movl 4(%esp,%eax),%edx
- # m = arg2
- movl 8(%esp,%eax),%esi
- # out = arg3
- movl 12(%esp,%eax),%edi
- # bytes = arg4
- movl 16(%esp,%eax),%ebx
- # bytes -= 0
- sub $0,%ebx
- # goto done if unsigned<=
- jbe ._done
-._start:
- # in0 = *(uint32 *) (x + 0)
- movl 0(%edx),%eax
- # in1 = *(uint32 *) (x + 4)
- movl 4(%edx),%ecx
- # in2 = *(uint32 *) (x + 8)
- movl 8(%edx),%ebp
- # j0 = in0
- movl %eax,164(%esp)
- # in3 = *(uint32 *) (x + 12)
- movl 12(%edx),%eax
- # j1 = in1
- movl %ecx,168(%esp)
- # in4 = *(uint32 *) (x + 16)
- movl 16(%edx),%ecx
- # j2 = in2
- movl %ebp,172(%esp)
- # in5 = *(uint32 *) (x + 20)
- movl 20(%edx),%ebp
- # j3 = in3
- movl %eax,176(%esp)
- # in6 = *(uint32 *) (x + 24)
- movl 24(%edx),%eax
- # j4 = in4
- movl %ecx,180(%esp)
- # in7 = *(uint32 *) (x + 28)
- movl 28(%edx),%ecx
- # j5 = in5
- movl %ebp,184(%esp)
- # in8 = *(uint32 *) (x + 32)
- movl 32(%edx),%ebp
- # j6 = in6
- movl %eax,188(%esp)
- # in9 = *(uint32 *) (x + 36)
- movl 36(%edx),%eax
- # j7 = in7
- movl %ecx,192(%esp)
- # in10 = *(uint32 *) (x + 40)
- movl 40(%edx),%ecx
- # j8 = in8
- movl %ebp,196(%esp)
- # in11 = *(uint32 *) (x + 44)
- movl 44(%edx),%ebp
- # j9 = in9
- movl %eax,200(%esp)
- # in12 = *(uint32 *) (x + 48)
- movl 48(%edx),%eax
- # j10 = in10
- movl %ecx,204(%esp)
- # in13 = *(uint32 *) (x + 52)
- movl 52(%edx),%ecx
- # j11 = in11
- movl %ebp,208(%esp)
- # in14 = *(uint32 *) (x + 56)
- movl 56(%edx),%ebp
- # j12 = in12
- movl %eax,212(%esp)
- # in15 = *(uint32 *) (x + 60)
- movl 60(%edx),%eax
- # j13 = in13
- movl %ecx,216(%esp)
- # j14 = in14
- movl %ebp,220(%esp)
- # j15 = in15
- movl %eax,224(%esp)
- # x_backup = x
- movl %edx,64(%esp)
-._bytesatleast1:
- # bytes - 64
- cmp $64,%ebx
- # goto nocopy if unsigned>=
- jae ._nocopy
- # ctarget = out
- movl %edi,228(%esp)
- # out = &tmp
- leal 0(%esp),%edi
- # i = bytes
- mov %ebx,%ecx
- # while (i) { *out++ = *m++; --i }
- rep movsb
- # out = &tmp
- leal 0(%esp),%edi
- # m = &tmp
- leal 0(%esp),%esi
-._nocopy:
- # out_backup = out
- movl %edi,72(%esp)
- # m_backup = m
- movl %esi,68(%esp)
- # bytes_backup = bytes
- movl %ebx,76(%esp)
- # in0 = j0
- movl 164(%esp),%eax
- # in1 = j1
- movl 168(%esp),%ecx
- # in2 = j2
- movl 172(%esp),%edx
- # in3 = j3
- movl 176(%esp),%ebx
- # x0 = in0
- movl %eax,100(%esp)
- # x1 = in1
- movl %ecx,104(%esp)
- # x2 = in2
- movl %edx,108(%esp)
- # x3 = in3
- movl %ebx,112(%esp)
- # in4 = j4
- movl 180(%esp),%eax
- # in5 = j5
- movl 184(%esp),%ecx
- # in6 = j6
- movl 188(%esp),%edx
- # in7 = j7
- movl 192(%esp),%ebx
- # x4 = in4
- movl %eax,116(%esp)
- # x5 = in5
- movl %ecx,120(%esp)
- # x6 = in6
- movl %edx,124(%esp)
- # x7 = in7
- movl %ebx,128(%esp)
- # in8 = j8
- movl 196(%esp),%eax
- # in9 = j9
- movl 200(%esp),%ecx
- # in10 = j10
- movl 204(%esp),%edx
- # in11 = j11
- movl 208(%esp),%ebx
- # x8 = in8
- movl %eax,132(%esp)
- # x9 = in9
- movl %ecx,136(%esp)
- # x10 = in10
- movl %edx,140(%esp)
- # x11 = in11
- movl %ebx,144(%esp)
- # in12 = j12
- movl 212(%esp),%eax
- # in13 = j13
- movl 216(%esp),%ecx
- # in14 = j14
- movl 220(%esp),%edx
- # in15 = j15
- movl 224(%esp),%ebx
- # x12 = in12
- movl %eax,148(%esp)
- # x13 = in13
- movl %ecx,152(%esp)
- # x14 = in14
- movl %edx,156(%esp)
- # x15 = in15
- movl %ebx,160(%esp)
- # i = 20
- mov $20,%ebp
- # p = x0
- movl 100(%esp),%eax
- # s = x5
- movl 120(%esp),%ecx
- # t = x10
- movl 140(%esp),%edx
- # w = x15
- movl 160(%esp),%ebx
-._mainloop:
- # x0 = p
- movl %eax,100(%esp)
- # x10 = t
- movl %edx,140(%esp)
- # p += x12
- addl 148(%esp),%eax
- # x5 = s
- movl %ecx,120(%esp)
- # t += x6
- addl 124(%esp),%edx
- # x15 = w
- movl %ebx,160(%esp)
- # r = x1
- movl 104(%esp),%esi
- # r += s
- add %ecx,%esi
- # v = x11
- movl 144(%esp),%edi
- # v += w
- add %ebx,%edi
- # p <<<= 7
- rol $7,%eax
- # p ^= x4
- xorl 116(%esp),%eax
- # t <<<= 7
- rol $7,%edx
- # t ^= x14
- xorl 156(%esp),%edx
- # r <<<= 7
- rol $7,%esi
- # r ^= x9
- xorl 136(%esp),%esi
- # v <<<= 7
- rol $7,%edi
- # v ^= x3
- xorl 112(%esp),%edi
- # x4 = p
- movl %eax,116(%esp)
- # x14 = t
- movl %edx,156(%esp)
- # p += x0
- addl 100(%esp),%eax
- # x9 = r
- movl %esi,136(%esp)
- # t += x10
- addl 140(%esp),%edx
- # x3 = v
- movl %edi,112(%esp)
- # p <<<= 9
- rol $9,%eax
- # p ^= x8
- xorl 132(%esp),%eax
- # t <<<= 9
- rol $9,%edx
- # t ^= x2
- xorl 108(%esp),%edx
- # s += r
- add %esi,%ecx
- # s <<<= 9
- rol $9,%ecx
- # s ^= x13
- xorl 152(%esp),%ecx
- # w += v
- add %edi,%ebx
- # w <<<= 9
- rol $9,%ebx
- # w ^= x7
- xorl 128(%esp),%ebx
- # x8 = p
- movl %eax,132(%esp)
- # x2 = t
- movl %edx,108(%esp)
- # p += x4
- addl 116(%esp),%eax
- # x13 = s
- movl %ecx,152(%esp)
- # t += x14
- addl 156(%esp),%edx
- # x7 = w
- movl %ebx,128(%esp)
- # p <<<= 13
- rol $13,%eax
- # p ^= x12
- xorl 148(%esp),%eax
- # t <<<= 13
- rol $13,%edx
- # t ^= x6
- xorl 124(%esp),%edx
- # r += s
- add %ecx,%esi
- # r <<<= 13
- rol $13,%esi
- # r ^= x1
- xorl 104(%esp),%esi
- # v += w
- add %ebx,%edi
- # v <<<= 13
- rol $13,%edi
- # v ^= x11
- xorl 144(%esp),%edi
- # x12 = p
- movl %eax,148(%esp)
- # x6 = t
- movl %edx,124(%esp)
- # p += x8
- addl 132(%esp),%eax
- # x1 = r
- movl %esi,104(%esp)
- # t += x2
- addl 108(%esp),%edx
- # x11 = v
- movl %edi,144(%esp)
- # p <<<= 18
- rol $18,%eax
- # p ^= x0
- xorl 100(%esp),%eax
- # t <<<= 18
- rol $18,%edx
- # t ^= x10
- xorl 140(%esp),%edx
- # s += r
- add %esi,%ecx
- # s <<<= 18
- rol $18,%ecx
- # s ^= x5
- xorl 120(%esp),%ecx
- # w += v
- add %edi,%ebx
- # w <<<= 18
- rol $18,%ebx
- # w ^= x15
- xorl 160(%esp),%ebx
- # x0 = p
- movl %eax,100(%esp)
- # x10 = t
- movl %edx,140(%esp)
- # p += x3
- addl 112(%esp),%eax
- # p <<<= 7
- rol $7,%eax
- # x5 = s
- movl %ecx,120(%esp)
- # t += x9
- addl 136(%esp),%edx
- # x15 = w
- movl %ebx,160(%esp)
- # r = x4
- movl 116(%esp),%esi
- # r += s
- add %ecx,%esi
- # v = x14
- movl 156(%esp),%edi
- # v += w
- add %ebx,%edi
- # p ^= x1
- xorl 104(%esp),%eax
- # t <<<= 7
- rol $7,%edx
- # t ^= x11
- xorl 144(%esp),%edx
- # r <<<= 7
- rol $7,%esi
- # r ^= x6
- xorl 124(%esp),%esi
- # v <<<= 7
- rol $7,%edi
- # v ^= x12
- xorl 148(%esp),%edi
- # x1 = p
- movl %eax,104(%esp)
- # x11 = t
- movl %edx,144(%esp)
- # p += x0
- addl 100(%esp),%eax
- # x6 = r
- movl %esi,124(%esp)
- # t += x10
- addl 140(%esp),%edx
- # x12 = v
- movl %edi,148(%esp)
- # p <<<= 9
- rol $9,%eax
- # p ^= x2
- xorl 108(%esp),%eax
- # t <<<= 9
- rol $9,%edx
- # t ^= x8
- xorl 132(%esp),%edx
- # s += r
- add %esi,%ecx
- # s <<<= 9
- rol $9,%ecx
- # s ^= x7
- xorl 128(%esp),%ecx
- # w += v
- add %edi,%ebx
- # w <<<= 9
- rol $9,%ebx
- # w ^= x13
- xorl 152(%esp),%ebx
- # x2 = p
- movl %eax,108(%esp)
- # x8 = t
- movl %edx,132(%esp)
- # p += x1
- addl 104(%esp),%eax
- # x7 = s
- movl %ecx,128(%esp)
- # t += x11
- addl 144(%esp),%edx
- # x13 = w
- movl %ebx,152(%esp)
- # p <<<= 13
- rol $13,%eax
- # p ^= x3
- xorl 112(%esp),%eax
- # t <<<= 13
- rol $13,%edx
- # t ^= x9
- xorl 136(%esp),%edx
- # r += s
- add %ecx,%esi
- # r <<<= 13
- rol $13,%esi
- # r ^= x4
- xorl 116(%esp),%esi
- # v += w
- add %ebx,%edi
- # v <<<= 13
- rol $13,%edi
- # v ^= x14
- xorl 156(%esp),%edi
- # x3 = p
- movl %eax,112(%esp)
- # x9 = t
- movl %edx,136(%esp)
- # p += x2
- addl 108(%esp),%eax
- # x4 = r
- movl %esi,116(%esp)
- # t += x8
- addl 132(%esp),%edx
- # x14 = v
- movl %edi,156(%esp)
- # p <<<= 18
- rol $18,%eax
- # p ^= x0
- xorl 100(%esp),%eax
- # t <<<= 18
- rol $18,%edx
- # t ^= x10
- xorl 140(%esp),%edx
- # s += r
- add %esi,%ecx
- # s <<<= 18
- rol $18,%ecx
- # s ^= x5
- xorl 120(%esp),%ecx
- # w += v
- add %edi,%ebx
- # w <<<= 18
- rol $18,%ebx
- # w ^= x15
- xorl 160(%esp),%ebx
- # x0 = p
- movl %eax,100(%esp)
- # x10 = t
- movl %edx,140(%esp)
- # p += x12
- addl 148(%esp),%eax
- # x5 = s
- movl %ecx,120(%esp)
- # t += x6
- addl 124(%esp),%edx
- # x15 = w
- movl %ebx,160(%esp)
- # r = x1
- movl 104(%esp),%esi
- # r += s
- add %ecx,%esi
- # v = x11
- movl 144(%esp),%edi
- # v += w
- add %ebx,%edi
- # p <<<= 7
- rol $7,%eax
- # p ^= x4
- xorl 116(%esp),%eax
- # t <<<= 7
- rol $7,%edx
- # t ^= x14
- xorl 156(%esp),%edx
- # r <<<= 7
- rol $7,%esi
- # r ^= x9
- xorl 136(%esp),%esi
- # v <<<= 7
- rol $7,%edi
- # v ^= x3
- xorl 112(%esp),%edi
- # x4 = p
- movl %eax,116(%esp)
- # x14 = t
- movl %edx,156(%esp)
- # p += x0
- addl 100(%esp),%eax
- # x9 = r
- movl %esi,136(%esp)
- # t += x10
- addl 140(%esp),%edx
- # x3 = v
- movl %edi,112(%esp)
- # p <<<= 9
- rol $9,%eax
- # p ^= x8
- xorl 132(%esp),%eax
- # t <<<= 9
- rol $9,%edx
- # t ^= x2
- xorl 108(%esp),%edx
- # s += r
- add %esi,%ecx
- # s <<<= 9
- rol $9,%ecx
- # s ^= x13
- xorl 152(%esp),%ecx
- # w += v
- add %edi,%ebx
- # w <<<= 9
- rol $9,%ebx
- # w ^= x7
- xorl 128(%esp),%ebx
- # x8 = p
- movl %eax,132(%esp)
- # x2 = t
- movl %edx,108(%esp)
- # p += x4
- addl 116(%esp),%eax
- # x13 = s
- movl %ecx,152(%esp)
- # t += x14
- addl 156(%esp),%edx
- # x7 = w
- movl %ebx,128(%esp)
- # p <<<= 13
- rol $13,%eax
- # p ^= x12
- xorl 148(%esp),%eax
- # t <<<= 13
- rol $13,%edx
- # t ^= x6
- xorl 124(%esp),%edx
- # r += s
- add %ecx,%esi
- # r <<<= 13
- rol $13,%esi
- # r ^= x1
- xorl 104(%esp),%esi
- # v += w
- add %ebx,%edi
- # v <<<= 13
- rol $13,%edi
- # v ^= x11
- xorl 144(%esp),%edi
- # x12 = p
- movl %eax,148(%esp)
- # x6 = t
- movl %edx,124(%esp)
- # p += x8
- addl 132(%esp),%eax
- # x1 = r
- movl %esi,104(%esp)
- # t += x2
- addl 108(%esp),%edx
- # x11 = v
- movl %edi,144(%esp)
- # p <<<= 18
- rol $18,%eax
- # p ^= x0
- xorl 100(%esp),%eax
- # t <<<= 18
- rol $18,%edx
- # t ^= x10
- xorl 140(%esp),%edx
- # s += r
- add %esi,%ecx
- # s <<<= 18
- rol $18,%ecx
- # s ^= x5
- xorl 120(%esp),%ecx
- # w += v
- add %edi,%ebx
- # w <<<= 18
- rol $18,%ebx
- # w ^= x15
- xorl 160(%esp),%ebx
- # x0 = p
- movl %eax,100(%esp)
- # x10 = t
- movl %edx,140(%esp)
- # p += x3
- addl 112(%esp),%eax
- # p <<<= 7
- rol $7,%eax
- # x5 = s
- movl %ecx,120(%esp)
- # t += x9
- addl 136(%esp),%edx
- # x15 = w
- movl %ebx,160(%esp)
- # r = x4
- movl 116(%esp),%esi
- # r += s
- add %ecx,%esi
- # v = x14
- movl 156(%esp),%edi
- # v += w
- add %ebx,%edi
- # p ^= x1
- xorl 104(%esp),%eax
- # t <<<= 7
- rol $7,%edx
- # t ^= x11
- xorl 144(%esp),%edx
- # r <<<= 7
- rol $7,%esi
- # r ^= x6
- xorl 124(%esp),%esi
- # v <<<= 7
- rol $7,%edi
- # v ^= x12
- xorl 148(%esp),%edi
- # x1 = p
- movl %eax,104(%esp)
- # x11 = t
- movl %edx,144(%esp)
- # p += x0
- addl 100(%esp),%eax
- # x6 = r
- movl %esi,124(%esp)
- # t += x10
- addl 140(%esp),%edx
- # x12 = v
- movl %edi,148(%esp)
- # p <<<= 9
- rol $9,%eax
- # p ^= x2
- xorl 108(%esp),%eax
- # t <<<= 9
- rol $9,%edx
- # t ^= x8
- xorl 132(%esp),%edx
- # s += r
- add %esi,%ecx
- # s <<<= 9
- rol $9,%ecx
- # s ^= x7
- xorl 128(%esp),%ecx
- # w += v
- add %edi,%ebx
- # w <<<= 9
- rol $9,%ebx
- # w ^= x13
- xorl 152(%esp),%ebx
- # x2 = p
- movl %eax,108(%esp)
- # x8 = t
- movl %edx,132(%esp)
- # p += x1
- addl 104(%esp),%eax
- # x7 = s
- movl %ecx,128(%esp)
- # t += x11
- addl 144(%esp),%edx
- # x13 = w
- movl %ebx,152(%esp)
- # p <<<= 13
- rol $13,%eax
- # p ^= x3
- xorl 112(%esp),%eax
- # t <<<= 13
- rol $13,%edx
- # t ^= x9
- xorl 136(%esp),%edx
- # r += s
- add %ecx,%esi
- # r <<<= 13
- rol $13,%esi
- # r ^= x4
- xorl 116(%esp),%esi
- # v += w
- add %ebx,%edi
- # v <<<= 13
- rol $13,%edi
- # v ^= x14
- xorl 156(%esp),%edi
- # x3 = p
- movl %eax,112(%esp)
- # x9 = t
- movl %edx,136(%esp)
- # p += x2
- addl 108(%esp),%eax
- # x4 = r
- movl %esi,116(%esp)
- # t += x8
- addl 132(%esp),%edx
- # x14 = v
- movl %edi,156(%esp)
- # p <<<= 18
- rol $18,%eax
- # p ^= x0
- xorl 100(%esp),%eax
- # t <<<= 18
- rol $18,%edx
- # t ^= x10
- xorl 140(%esp),%edx
- # s += r
- add %esi,%ecx
- # s <<<= 18
- rol $18,%ecx
- # s ^= x5
- xorl 120(%esp),%ecx
- # w += v
- add %edi,%ebx
- # w <<<= 18
- rol $18,%ebx
- # w ^= x15
- xorl 160(%esp),%ebx
- # i -= 4
- sub $4,%ebp
- # goto mainloop if unsigned >
- ja ._mainloop
- # x0 = p
- movl %eax,100(%esp)
- # x5 = s
- movl %ecx,120(%esp)
- # x10 = t
- movl %edx,140(%esp)
- # x15 = w
- movl %ebx,160(%esp)
- # out = out_backup
- movl 72(%esp),%edi
- # m = m_backup
- movl 68(%esp),%esi
- # in0 = x0
- movl 100(%esp),%eax
- # in1 = x1
- movl 104(%esp),%ecx
- # in0 += j0
- addl 164(%esp),%eax
- # in1 += j1
- addl 168(%esp),%ecx
- # in0 ^= *(uint32 *) (m + 0)
- xorl 0(%esi),%eax
- # in1 ^= *(uint32 *) (m + 4)
- xorl 4(%esi),%ecx
- # *(uint32 *) (out + 0) = in0
- movl %eax,0(%edi)
- # *(uint32 *) (out + 4) = in1
- movl %ecx,4(%edi)
- # in2 = x2
- movl 108(%esp),%eax
- # in3 = x3
- movl 112(%esp),%ecx
- # in2 += j2
- addl 172(%esp),%eax
- # in3 += j3
- addl 176(%esp),%ecx
- # in2 ^= *(uint32 *) (m + 8)
- xorl 8(%esi),%eax
- # in3 ^= *(uint32 *) (m + 12)
- xorl 12(%esi),%ecx
- # *(uint32 *) (out + 8) = in2
- movl %eax,8(%edi)
- # *(uint32 *) (out + 12) = in3
- movl %ecx,12(%edi)
- # in4 = x4
- movl 116(%esp),%eax
- # in5 = x5
- movl 120(%esp),%ecx
- # in4 += j4
- addl 180(%esp),%eax
- # in5 += j5
- addl 184(%esp),%ecx
- # in4 ^= *(uint32 *) (m + 16)
- xorl 16(%esi),%eax
- # in5 ^= *(uint32 *) (m + 20)
- xorl 20(%esi),%ecx
- # *(uint32 *) (out + 16) = in4
- movl %eax,16(%edi)
- # *(uint32 *) (out + 20) = in5
- movl %ecx,20(%edi)
- # in6 = x6
- movl 124(%esp),%eax
- # in7 = x7
- movl 128(%esp),%ecx
- # in6 += j6
- addl 188(%esp),%eax
- # in7 += j7
- addl 192(%esp),%ecx
- # in6 ^= *(uint32 *) (m + 24)
- xorl 24(%esi),%eax
- # in7 ^= *(uint32 *) (m + 28)
- xorl 28(%esi),%ecx
- # *(uint32 *) (out + 24) = in6
- movl %eax,24(%edi)
- # *(uint32 *) (out + 28) = in7
- movl %ecx,28(%edi)
- # in8 = x8
- movl 132(%esp),%eax
- # in9 = x9
- movl 136(%esp),%ecx
- # in8 += j8
- addl 196(%esp),%eax
- # in9 += j9
- addl 200(%esp),%ecx
- # in8 ^= *(uint32 *) (m + 32)
- xorl 32(%esi),%eax
- # in9 ^= *(uint32 *) (m + 36)
- xorl 36(%esi),%ecx
- # *(uint32 *) (out + 32) = in8
- movl %eax,32(%edi)
- # *(uint32 *) (out + 36) = in9
- movl %ecx,36(%edi)
- # in10 = x10
- movl 140(%esp),%eax
- # in11 = x11
- movl 144(%esp),%ecx
- # in10 += j10
- addl 204(%esp),%eax
- # in11 += j11
- addl 208(%esp),%ecx
- # in10 ^= *(uint32 *) (m + 40)
- xorl 40(%esi),%eax
- # in11 ^= *(uint32 *) (m + 44)
- xorl 44(%esi),%ecx
- # *(uint32 *) (out + 40) = in10
- movl %eax,40(%edi)
- # *(uint32 *) (out + 44) = in11
- movl %ecx,44(%edi)
- # in12 = x12
- movl 148(%esp),%eax
- # in13 = x13
- movl 152(%esp),%ecx
- # in12 += j12
- addl 212(%esp),%eax
- # in13 += j13
- addl 216(%esp),%ecx
- # in12 ^= *(uint32 *) (m + 48)
- xorl 48(%esi),%eax
- # in13 ^= *(uint32 *) (m + 52)
- xorl 52(%esi),%ecx
- # *(uint32 *) (out + 48) = in12
- movl %eax,48(%edi)
- # *(uint32 *) (out + 52) = in13
- movl %ecx,52(%edi)
- # in14 = x14
- movl 156(%esp),%eax
- # in15 = x15
- movl 160(%esp),%ecx
- # in14 += j14
- addl 220(%esp),%eax
- # in15 += j15
- addl 224(%esp),%ecx
- # in14 ^= *(uint32 *) (m + 56)
- xorl 56(%esi),%eax
- # in15 ^= *(uint32 *) (m + 60)
- xorl 60(%esi),%ecx
- # *(uint32 *) (out + 56) = in14
- movl %eax,56(%edi)
- # *(uint32 *) (out + 60) = in15
- movl %ecx,60(%edi)
- # bytes = bytes_backup
- movl 76(%esp),%ebx
- # in8 = j8
- movl 196(%esp),%eax
- # in9 = j9
- movl 200(%esp),%ecx
- # in8 += 1
- add $1,%eax
- # in9 += 0 + carry
- adc $0,%ecx
- # j8 = in8
- movl %eax,196(%esp)
- # j9 = in9
- movl %ecx,200(%esp)
- # bytes - 64
- cmp $64,%ebx
- # goto bytesatleast65 if unsigned>
- ja ._bytesatleast65
- # goto bytesatleast64 if unsigned>=
- jae ._bytesatleast64
- # m = out
- mov %edi,%esi
- # out = ctarget
- movl 228(%esp),%edi
- # i = bytes
- mov %ebx,%ecx
- # while (i) { *out++ = *m++; --i }
- rep movsb
-._bytesatleast64:
- # x = x_backup
- movl 64(%esp),%eax
- # in8 = j8
- movl 196(%esp),%ecx
- # in9 = j9
- movl 200(%esp),%edx
- # *(uint32 *) (x + 32) = in8
- movl %ecx,32(%eax)
- # *(uint32 *) (x + 36) = in9
- movl %edx,36(%eax)
-._done:
- # eax = eax_stack
- movl 80(%esp),%eax
- # ebx = ebx_stack
- movl 84(%esp),%ebx
- # esi = esi_stack
- movl 88(%esp),%esi
- # edi = edi_stack
- movl 92(%esp),%edi
- # ebp = ebp_stack
- movl 96(%esp),%ebp
- # leave
- add %eax,%esp
- ret
-._bytesatleast65:
- # bytes -= 64
- sub $64,%ebx
- # out += 64
- add $64,%edi
- # m += 64
- add $64,%esi
- # goto bytesatleast1
- jmp ._bytesatleast1
-ENDPROC(salsa20_encrypt_bytes)
diff --git a/arch/x86/crypto/salsa20-x86_64-asm_64.S b/arch/x86/crypto/salsa20-x86_64-asm_64.S
deleted file mode 100644
index 03a4918f41ee..000000000000
--- a/arch/x86/crypto/salsa20-x86_64-asm_64.S
+++ /dev/null
@@ -1,805 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-
-# enter salsa20_encrypt_bytes
-ENTRY(salsa20_encrypt_bytes)
- mov %rsp,%r11
- and $31,%r11
- add $256,%r11
- sub %r11,%rsp
- # x = arg1
- mov %rdi,%r8
- # m = arg2
- mov %rsi,%rsi
- # out = arg3
- mov %rdx,%rdi
- # bytes = arg4
- mov %rcx,%rdx
- # unsigned>? bytes - 0
- cmp $0,%rdx
- # comment:fp stack unchanged by jump
- # goto done if !unsigned>
- jbe ._done
- # comment:fp stack unchanged by fallthrough
-# start:
-._start:
- # r11_stack = r11
- movq %r11,0(%rsp)
- # r12_stack = r12
- movq %r12,8(%rsp)
- # r13_stack = r13
- movq %r13,16(%rsp)
- # r14_stack = r14
- movq %r14,24(%rsp)
- # r15_stack = r15
- movq %r15,32(%rsp)
- # rbx_stack = rbx
- movq %rbx,40(%rsp)
- # rbp_stack = rbp
- movq %rbp,48(%rsp)
- # in0 = *(uint64 *) (x + 0)
- movq 0(%r8),%rcx
- # in2 = *(uint64 *) (x + 8)
- movq 8(%r8),%r9
- # in4 = *(uint64 *) (x + 16)
- movq 16(%r8),%rax
- # in6 = *(uint64 *) (x + 24)
- movq 24(%r8),%r10
- # in8 = *(uint64 *) (x + 32)
- movq 32(%r8),%r11
- # in10 = *(uint64 *) (x + 40)
- movq 40(%r8),%r12
- # in12 = *(uint64 *) (x + 48)
- movq 48(%r8),%r13
- # in14 = *(uint64 *) (x + 56)
- movq 56(%r8),%r14
- # j0 = in0
- movq %rcx,56(%rsp)
- # j2 = in2
- movq %r9,64(%rsp)
- # j4 = in4
- movq %rax,72(%rsp)
- # j6 = in6
- movq %r10,80(%rsp)
- # j8 = in8
- movq %r11,88(%rsp)
- # j10 = in10
- movq %r12,96(%rsp)
- # j12 = in12
- movq %r13,104(%rsp)
- # j14 = in14
- movq %r14,112(%rsp)
- # x_backup = x
- movq %r8,120(%rsp)
-# bytesatleast1:
-._bytesatleast1:
- # unsigned<? bytes - 64
- cmp $64,%rdx
- # comment:fp stack unchanged by jump
- # goto nocopy if !unsigned<
- jae ._nocopy
- # ctarget = out
- movq %rdi,128(%rsp)
- # out = &tmp
- leaq 192(%rsp),%rdi
- # i = bytes
- mov %rdx,%rcx
- # while (i) { *out++ = *m++; --i }
- rep movsb
- # out = &tmp
- leaq 192(%rsp),%rdi
- # m = &tmp
- leaq 192(%rsp),%rsi
- # comment:fp stack unchanged by fallthrough
-# nocopy:
-._nocopy:
- # out_backup = out
- movq %rdi,136(%rsp)
- # m_backup = m
- movq %rsi,144(%rsp)
- # bytes_backup = bytes
- movq %rdx,152(%rsp)
- # x1 = j0
- movq 56(%rsp),%rdi
- # x0 = x1
- mov %rdi,%rdx
- # (uint64) x1 >>= 32
- shr $32,%rdi
- # x3 = j2
- movq 64(%rsp),%rsi
- # x2 = x3
- mov %rsi,%rcx
- # (uint64) x3 >>= 32
- shr $32,%rsi
- # x5 = j4
- movq 72(%rsp),%r8
- # x4 = x5
- mov %r8,%r9
- # (uint64) x5 >>= 32
- shr $32,%r8
- # x5_stack = x5
- movq %r8,160(%rsp)
- # x7 = j6
- movq 80(%rsp),%r8
- # x6 = x7
- mov %r8,%rax
- # (uint64) x7 >>= 32
- shr $32,%r8
- # x9 = j8
- movq 88(%rsp),%r10
- # x8 = x9
- mov %r10,%r11
- # (uint64) x9 >>= 32
- shr $32,%r10
- # x11 = j10
- movq 96(%rsp),%r12
- # x10 = x11
- mov %r12,%r13
- # x10_stack = x10
- movq %r13,168(%rsp)
- # (uint64) x11 >>= 32
- shr $32,%r12
- # x13 = j12
- movq 104(%rsp),%r13
- # x12 = x13
- mov %r13,%r14
- # (uint64) x13 >>= 32
- shr $32,%r13
- # x15 = j14
- movq 112(%rsp),%r15
- # x14 = x15
- mov %r15,%rbx
- # (uint64) x15 >>= 32
- shr $32,%r15
- # x15_stack = x15
- movq %r15,176(%rsp)
- # i = 20
- mov $20,%r15
-# mainloop:
-._mainloop:
- # i_backup = i
- movq %r15,184(%rsp)
- # x5 = x5_stack
- movq 160(%rsp),%r15
- # a = x12 + x0
- lea (%r14,%rdx),%rbp
- # (uint32) a <<<= 7
- rol $7,%ebp
- # x4 ^= a
- xor %rbp,%r9
- # b = x1 + x5
- lea (%rdi,%r15),%rbp
- # (uint32) b <<<= 7
- rol $7,%ebp
- # x9 ^= b
- xor %rbp,%r10
- # a = x0 + x4
- lea (%rdx,%r9),%rbp
- # (uint32) a <<<= 9
- rol $9,%ebp
- # x8 ^= a
- xor %rbp,%r11
- # b = x5 + x9
- lea (%r15,%r10),%rbp
- # (uint32) b <<<= 9
- rol $9,%ebp
- # x13 ^= b
- xor %rbp,%r13
- # a = x4 + x8
- lea (%r9,%r11),%rbp
- # (uint32) a <<<= 13
- rol $13,%ebp
- # x12 ^= a
- xor %rbp,%r14
- # b = x9 + x13
- lea (%r10,%r13),%rbp
- # (uint32) b <<<= 13
- rol $13,%ebp
- # x1 ^= b
- xor %rbp,%rdi
- # a = x8 + x12
- lea (%r11,%r14),%rbp
- # (uint32) a <<<= 18
- rol $18,%ebp
- # x0 ^= a
- xor %rbp,%rdx
- # b = x13 + x1
- lea (%r13,%rdi),%rbp
- # (uint32) b <<<= 18
- rol $18,%ebp
- # x5 ^= b
- xor %rbp,%r15
- # x10 = x10_stack
- movq 168(%rsp),%rbp
- # x5_stack = x5
- movq %r15,160(%rsp)
- # c = x6 + x10
- lea (%rax,%rbp),%r15
- # (uint32) c <<<= 7
- rol $7,%r15d
- # x14 ^= c
- xor %r15,%rbx
- # c = x10 + x14
- lea (%rbp,%rbx),%r15
- # (uint32) c <<<= 9
- rol $9,%r15d
- # x2 ^= c
- xor %r15,%rcx
- # c = x14 + x2
- lea (%rbx,%rcx),%r15
- # (uint32) c <<<= 13
- rol $13,%r15d
- # x6 ^= c
- xor %r15,%rax
- # c = x2 + x6
- lea (%rcx,%rax),%r15
- # (uint32) c <<<= 18
- rol $18,%r15d
- # x10 ^= c
- xor %r15,%rbp
- # x15 = x15_stack
- movq 176(%rsp),%r15
- # x10_stack = x10
- movq %rbp,168(%rsp)
- # d = x11 + x15
- lea (%r12,%r15),%rbp
- # (uint32) d <<<= 7
- rol $7,%ebp
- # x3 ^= d
- xor %rbp,%rsi
- # d = x15 + x3
- lea (%r15,%rsi),%rbp
- # (uint32) d <<<= 9
- rol $9,%ebp
- # x7 ^= d
- xor %rbp,%r8
- # d = x3 + x7
- lea (%rsi,%r8),%rbp
- # (uint32) d <<<= 13
- rol $13,%ebp
- # x11 ^= d
- xor %rbp,%r12
- # d = x7 + x11
- lea (%r8,%r12),%rbp
- # (uint32) d <<<= 18
- rol $18,%ebp
- # x15 ^= d
- xor %rbp,%r15
- # x15_stack = x15
- movq %r15,176(%rsp)
- # x5 = x5_stack
- movq 160(%rsp),%r15
- # a = x3 + x0
- lea (%rsi,%rdx),%rbp
- # (uint32) a <<<= 7
- rol $7,%ebp
- # x1 ^= a
- xor %rbp,%rdi
- # b = x4 + x5
- lea (%r9,%r15),%rbp
- # (uint32) b <<<= 7
- rol $7,%ebp
- # x6 ^= b
- xor %rbp,%rax
- # a = x0 + x1
- lea (%rdx,%rdi),%rbp
- # (uint32) a <<<= 9
- rol $9,%ebp
- # x2 ^= a
- xor %rbp,%rcx
- # b = x5 + x6
- lea (%r15,%rax),%rbp
- # (uint32) b <<<= 9
- rol $9,%ebp
- # x7 ^= b
- xor %rbp,%r8
- # a = x1 + x2
- lea (%rdi,%rcx),%rbp
- # (uint32) a <<<= 13
- rol $13,%ebp
- # x3 ^= a
- xor %rbp,%rsi
- # b = x6 + x7
- lea (%rax,%r8),%rbp
- # (uint32) b <<<= 13
- rol $13,%ebp
- # x4 ^= b
- xor %rbp,%r9
- # a = x2 + x3
- lea (%rcx,%rsi),%rbp
- # (uint32) a <<<= 18
- rol $18,%ebp
- # x0 ^= a
- xor %rbp,%rdx
- # b = x7 + x4
- lea (%r8,%r9),%rbp
- # (uint32) b <<<= 18
- rol $18,%ebp
- # x5 ^= b
- xor %rbp,%r15
- # x10 = x10_stack
- movq 168(%rsp),%rbp
- # x5_stack = x5
- movq %r15,160(%rsp)
- # c = x9 + x10
- lea (%r10,%rbp),%r15
- # (uint32) c <<<= 7
- rol $7,%r15d
- # x11 ^= c
- xor %r15,%r12
- # c = x10 + x11
- lea (%rbp,%r12),%r15
- # (uint32) c <<<= 9
- rol $9,%r15d
- # x8 ^= c
- xor %r15,%r11
- # c = x11 + x8
- lea (%r12,%r11),%r15
- # (uint32) c <<<= 13
- rol $13,%r15d
- # x9 ^= c
- xor %r15,%r10
- # c = x8 + x9
- lea (%r11,%r10),%r15
- # (uint32) c <<<= 18
- rol $18,%r15d
- # x10 ^= c
- xor %r15,%rbp
- # x15 = x15_stack
- movq 176(%rsp),%r15
- # x10_stack = x10
- movq %rbp,168(%rsp)
- # d = x14 + x15
- lea (%rbx,%r15),%rbp
- # (uint32) d <<<= 7
- rol $7,%ebp
- # x12 ^= d
- xor %rbp,%r14
- # d = x15 + x12
- lea (%r15,%r14),%rbp
- # (uint32) d <<<= 9
- rol $9,%ebp
- # x13 ^= d
- xor %rbp,%r13
- # d = x12 + x13
- lea (%r14,%r13),%rbp
- # (uint32) d <<<= 13
- rol $13,%ebp
- # x14 ^= d
- xor %rbp,%rbx
- # d = x13 + x14
- lea (%r13,%rbx),%rbp
- # (uint32) d <<<= 18
- rol $18,%ebp
- # x15 ^= d
- xor %rbp,%r15
- # x15_stack = x15
- movq %r15,176(%rsp)
- # x5 = x5_stack
- movq 160(%rsp),%r15
- # a = x12 + x0
- lea (%r14,%rdx),%rbp
- # (uint32) a <<<= 7
- rol $7,%ebp
- # x4 ^= a
- xor %rbp,%r9
- # b = x1 + x5
- lea (%rdi,%r15),%rbp
- # (uint32) b <<<= 7
- rol $7,%ebp
- # x9 ^= b
- xor %rbp,%r10
- # a = x0 + x4
- lea (%rdx,%r9),%rbp
- # (uint32) a <<<= 9
- rol $9,%ebp
- # x8 ^= a
- xor %rbp,%r11
- # b = x5 + x9
- lea (%r15,%r10),%rbp
- # (uint32) b <<<= 9
- rol $9,%ebp
- # x13 ^= b
- xor %rbp,%r13
- # a = x4 + x8
- lea (%r9,%r11),%rbp
- # (uint32) a <<<= 13
- rol $13,%ebp
- # x12 ^= a
- xor %rbp,%r14
- # b = x9 + x13
- lea (%r10,%r13),%rbp
- # (uint32) b <<<= 13
- rol $13,%ebp
- # x1 ^= b
- xor %rbp,%rdi
- # a = x8 + x12
- lea (%r11,%r14),%rbp
- # (uint32) a <<<= 18
- rol $18,%ebp
- # x0 ^= a
- xor %rbp,%rdx
- # b = x13 + x1
- lea (%r13,%rdi),%rbp
- # (uint32) b <<<= 18
- rol $18,%ebp
- # x5 ^= b
- xor %rbp,%r15
- # x10 = x10_stack
- movq 168(%rsp),%rbp
- # x5_stack = x5
- movq %r15,160(%rsp)
- # c = x6 + x10
- lea (%rax,%rbp),%r15
- # (uint32) c <<<= 7
- rol $7,%r15d
- # x14 ^= c
- xor %r15,%rbx
- # c = x10 + x14
- lea (%rbp,%rbx),%r15
- # (uint32) c <<<= 9
- rol $9,%r15d
- # x2 ^= c
- xor %r15,%rcx
- # c = x14 + x2
- lea (%rbx,%rcx),%r15
- # (uint32) c <<<= 13
- rol $13,%r15d
- # x6 ^= c
- xor %r15,%rax
- # c = x2 + x6
- lea (%rcx,%rax),%r15
- # (uint32) c <<<= 18
- rol $18,%r15d
- # x10 ^= c
- xor %r15,%rbp
- # x15 = x15_stack
- movq 176(%rsp),%r15
- # x10_stack = x10
- movq %rbp,168(%rsp)
- # d = x11 + x15
- lea (%r12,%r15),%rbp
- # (uint32) d <<<= 7
- rol $7,%ebp
- # x3 ^= d
- xor %rbp,%rsi
- # d = x15 + x3
- lea (%r15,%rsi),%rbp
- # (uint32) d <<<= 9
- rol $9,%ebp
- # x7 ^= d
- xor %rbp,%r8
- # d = x3 + x7
- lea (%rsi,%r8),%rbp
- # (uint32) d <<<= 13
- rol $13,%ebp
- # x11 ^= d
- xor %rbp,%r12
- # d = x7 + x11
- lea (%r8,%r12),%rbp
- # (uint32) d <<<= 18
- rol $18,%ebp
- # x15 ^= d
- xor %rbp,%r15
- # x15_stack = x15
- movq %r15,176(%rsp)
- # x5 = x5_stack
- movq 160(%rsp),%r15
- # a = x3 + x0
- lea (%rsi,%rdx),%rbp
- # (uint32) a <<<= 7
- rol $7,%ebp
- # x1 ^= a
- xor %rbp,%rdi
- # b = x4 + x5
- lea (%r9,%r15),%rbp
- # (uint32) b <<<= 7
- rol $7,%ebp
- # x6 ^= b
- xor %rbp,%rax
- # a = x0 + x1
- lea (%rdx,%rdi),%rbp
- # (uint32) a <<<= 9
- rol $9,%ebp
- # x2 ^= a
- xor %rbp,%rcx
- # b = x5 + x6
- lea (%r15,%rax),%rbp
- # (uint32) b <<<= 9
- rol $9,%ebp
- # x7 ^= b
- xor %rbp,%r8
- # a = x1 + x2
- lea (%rdi,%rcx),%rbp
- # (uint32) a <<<= 13
- rol $13,%ebp
- # x3 ^= a
- xor %rbp,%rsi
- # b = x6 + x7
- lea (%rax,%r8),%rbp
- # (uint32) b <<<= 13
- rol $13,%ebp
- # x4 ^= b
- xor %rbp,%r9
- # a = x2 + x3
- lea (%rcx,%rsi),%rbp
- # (uint32) a <<<= 18
- rol $18,%ebp
- # x0 ^= a
- xor %rbp,%rdx
- # b = x7 + x4
- lea (%r8,%r9),%rbp
- # (uint32) b <<<= 18
- rol $18,%ebp
- # x5 ^= b
- xor %rbp,%r15
- # x10 = x10_stack
- movq 168(%rsp),%rbp
- # x5_stack = x5
- movq %r15,160(%rsp)
- # c = x9 + x10
- lea (%r10,%rbp),%r15
- # (uint32) c <<<= 7
- rol $7,%r15d
- # x11 ^= c
- xor %r15,%r12
- # c = x10 + x11
- lea (%rbp,%r12),%r15
- # (uint32) c <<<= 9
- rol $9,%r15d
- # x8 ^= c
- xor %r15,%r11
- # c = x11 + x8
- lea (%r12,%r11),%r15
- # (uint32) c <<<= 13
- rol $13,%r15d
- # x9 ^= c
- xor %r15,%r10
- # c = x8 + x9
- lea (%r11,%r10),%r15
- # (uint32) c <<<= 18
- rol $18,%r15d
- # x10 ^= c
- xor %r15,%rbp
- # x15 = x15_stack
- movq 176(%rsp),%r15
- # x10_stack = x10
- movq %rbp,168(%rsp)
- # d = x14 + x15
- lea (%rbx,%r15),%rbp
- # (uint32) d <<<= 7
- rol $7,%ebp
- # x12 ^= d
- xor %rbp,%r14
- # d = x15 + x12
- lea (%r15,%r14),%rbp
- # (uint32) d <<<= 9
- rol $9,%ebp
- # x13 ^= d
- xor %rbp,%r13
- # d = x12 + x13
- lea (%r14,%r13),%rbp
- # (uint32) d <<<= 13
- rol $13,%ebp
- # x14 ^= d
- xor %rbp,%rbx
- # d = x13 + x14
- lea (%r13,%rbx),%rbp
- # (uint32) d <<<= 18
- rol $18,%ebp
- # x15 ^= d
- xor %rbp,%r15
- # x15_stack = x15
- movq %r15,176(%rsp)
- # i = i_backup
- movq 184(%rsp),%r15
- # unsigned>? i -= 4
- sub $4,%r15
- # comment:fp stack unchanged by jump
- # goto mainloop if unsigned>
- ja ._mainloop
- # (uint32) x2 += j2
- addl 64(%rsp),%ecx
- # x3 <<= 32
- shl $32,%rsi
- # x3 += j2
- addq 64(%rsp),%rsi
- # (uint64) x3 >>= 32
- shr $32,%rsi
- # x3 <<= 32
- shl $32,%rsi
- # x2 += x3
- add %rsi,%rcx
- # (uint32) x6 += j6
- addl 80(%rsp),%eax
- # x7 <<= 32
- shl $32,%r8
- # x7 += j6
- addq 80(%rsp),%r8
- # (uint64) x7 >>= 32
- shr $32,%r8
- # x7 <<= 32
- shl $32,%r8
- # x6 += x7
- add %r8,%rax
- # (uint32) x8 += j8
- addl 88(%rsp),%r11d
- # x9 <<= 32
- shl $32,%r10
- # x9 += j8
- addq 88(%rsp),%r10
- # (uint64) x9 >>= 32
- shr $32,%r10
- # x9 <<= 32
- shl $32,%r10
- # x8 += x9
- add %r10,%r11
- # (uint32) x12 += j12
- addl 104(%rsp),%r14d
- # x13 <<= 32
- shl $32,%r13
- # x13 += j12
- addq 104(%rsp),%r13
- # (uint64) x13 >>= 32
- shr $32,%r13
- # x13 <<= 32
- shl $32,%r13
- # x12 += x13
- add %r13,%r14
- # (uint32) x0 += j0
- addl 56(%rsp),%edx
- # x1 <<= 32
- shl $32,%rdi
- # x1 += j0
- addq 56(%rsp),%rdi
- # (uint64) x1 >>= 32
- shr $32,%rdi
- # x1 <<= 32
- shl $32,%rdi
- # x0 += x1
- add %rdi,%rdx
- # x5 = x5_stack
- movq 160(%rsp),%rdi
- # (uint32) x4 += j4
- addl 72(%rsp),%r9d
- # x5 <<= 32
- shl $32,%rdi
- # x5 += j4
- addq 72(%rsp),%rdi
- # (uint64) x5 >>= 32
- shr $32,%rdi
- # x5 <<= 32
- shl $32,%rdi
- # x4 += x5
- add %rdi,%r9
- # x10 = x10_stack
- movq 168(%rsp),%r8
- # (uint32) x10 += j10
- addl 96(%rsp),%r8d
- # x11 <<= 32
- shl $32,%r12
- # x11 += j10
- addq 96(%rsp),%r12
- # (uint64) x11 >>= 32
- shr $32,%r12
- # x11 <<= 32
- shl $32,%r12
- # x10 += x11
- add %r12,%r8
- # x15 = x15_stack
- movq 176(%rsp),%rdi
- # (uint32) x14 += j14
- addl 112(%rsp),%ebx
- # x15 <<= 32
- shl $32,%rdi
- # x15 += j14
- addq 112(%rsp),%rdi
- # (uint64) x15 >>= 32
- shr $32,%rdi
- # x15 <<= 32
- shl $32,%rdi
- # x14 += x15
- add %rdi,%rbx
- # out = out_backup
- movq 136(%rsp),%rdi
- # m = m_backup
- movq 144(%rsp),%rsi
- # x0 ^= *(uint64 *) (m + 0)
- xorq 0(%rsi),%rdx
- # *(uint64 *) (out + 0) = x0
- movq %rdx,0(%rdi)
- # x2 ^= *(uint64 *) (m + 8)
- xorq 8(%rsi),%rcx
- # *(uint64 *) (out + 8) = x2
- movq %rcx,8(%rdi)
- # x4 ^= *(uint64 *) (m + 16)
- xorq 16(%rsi),%r9
- # *(uint64 *) (out + 16) = x4
- movq %r9,16(%rdi)
- # x6 ^= *(uint64 *) (m + 24)
- xorq 24(%rsi),%rax
- # *(uint64 *) (out + 24) = x6
- movq %rax,24(%rdi)
- # x8 ^= *(uint64 *) (m + 32)
- xorq 32(%rsi),%r11
- # *(uint64 *) (out + 32) = x8
- movq %r11,32(%rdi)
- # x10 ^= *(uint64 *) (m + 40)
- xorq 40(%rsi),%r8
- # *(uint64 *) (out + 40) = x10
- movq %r8,40(%rdi)
- # x12 ^= *(uint64 *) (m + 48)
- xorq 48(%rsi),%r14
- # *(uint64 *) (out + 48) = x12
- movq %r14,48(%rdi)
- # x14 ^= *(uint64 *) (m + 56)
- xorq 56(%rsi),%rbx
- # *(uint64 *) (out + 56) = x14
- movq %rbx,56(%rdi)
- # bytes = bytes_backup
- movq 152(%rsp),%rdx
- # in8 = j8
- movq 88(%rsp),%rcx
- # in8 += 1
- add $1,%rcx
- # j8 = in8
- movq %rcx,88(%rsp)
- # unsigned>? unsigned<? bytes - 64
- cmp $64,%rdx
- # comment:fp stack unchanged by jump
- # goto bytesatleast65 if unsigned>
- ja ._bytesatleast65
- # comment:fp stack unchanged by jump
- # goto bytesatleast64 if !unsigned<
- jae ._bytesatleast64
- # m = out
- mov %rdi,%rsi
- # out = ctarget
- movq 128(%rsp),%rdi
- # i = bytes
- mov %rdx,%rcx
- # while (i) { *out++ = *m++; --i }
- rep movsb
- # comment:fp stack unchanged by fallthrough
-# bytesatleast64:
-._bytesatleast64:
- # x = x_backup
- movq 120(%rsp),%rdi
- # in8 = j8
- movq 88(%rsp),%rsi
- # *(uint64 *) (x + 32) = in8
- movq %rsi,32(%rdi)
- # r11 = r11_stack
- movq 0(%rsp),%r11
- # r12 = r12_stack
- movq 8(%rsp),%r12
- # r13 = r13_stack
- movq 16(%rsp),%r13
- # r14 = r14_stack
- movq 24(%rsp),%r14
- # r15 = r15_stack
- movq 32(%rsp),%r15
- # rbx = rbx_stack
- movq 40(%rsp),%rbx
- # rbp = rbp_stack
- movq 48(%rsp),%rbp
- # comment:fp stack unchanged by fallthrough
-# done:
-._done:
- # leave
- add %r11,%rsp
- mov %rdi,%rax
- mov %rsi,%rdx
- ret
-# bytesatleast65:
-._bytesatleast65:
- # bytes -= 64
- sub $64,%rdx
- # out += 64
- add $64,%rdi
- # m += 64
- add $64,%rsi
- # comment:fp stack unchanged by jump
- # goto bytesatleast1
- jmp ._bytesatleast1
-ENDPROC(salsa20_encrypt_bytes)
diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c
deleted file mode 100644
index b07d7d959806..000000000000
--- a/arch/x86/crypto/salsa20_glue.c
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Glue code for optimized assembly version of Salsa20.
- *
- * Copyright (c) 2007 Tan Swee Heng <thesweeheng@gmail.com>
- *
- * The assembly codes are public domain assembly codes written by Daniel. J.
- * Bernstein <djb@cr.yp.to>. The codes are modified to include indentation
- * and to remove extraneous comments and functions that are not needed.
- * - i586 version, renamed as salsa20-i586-asm_32.S
- * available from <http://cr.yp.to/snuffle/salsa20/x86-pm/salsa20.s>
- * - x86-64 version, renamed as salsa20-x86_64-asm_64.S
- * available from <http://cr.yp.to/snuffle/salsa20/amd64-3/salsa20.s>
- *
- * Also modified to set up the initial state using the generic C code rather
- * than in assembly.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- */
-
-#include <asm/unaligned.h>
-#include <crypto/internal/skcipher.h>
-#include <crypto/salsa20.h>
-#include <linux/module.h>
-
-asmlinkage void salsa20_encrypt_bytes(u32 state[16], const u8 *src, u8 *dst,
- u32 bytes);
-
-static int salsa20_asm_crypt(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- const struct salsa20_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct skcipher_walk walk;
- u32 state[16];
- int err;
-
- err = skcipher_walk_virt(&walk, req, true);
-
- crypto_salsa20_init(state, ctx, walk.iv);
-
- while (walk.nbytes > 0) {
- unsigned int nbytes = walk.nbytes;
-
- if (nbytes < walk.total)
- nbytes = round_down(nbytes, walk.stride);
-
- salsa20_encrypt_bytes(state, walk.src.virt.addr,
- walk.dst.virt.addr, nbytes);
- err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
- }
-
- return err;
-}
-
-static struct skcipher_alg alg = {
- .base.cra_name = "salsa20",
- .base.cra_driver_name = "salsa20-asm",
- .base.cra_priority = 200,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct salsa20_ctx),
- .base.cra_module = THIS_MODULE,
-
- .min_keysize = SALSA20_MIN_KEY_SIZE,
- .max_keysize = SALSA20_MAX_KEY_SIZE,
- .ivsize = SALSA20_IV_SIZE,
- .chunksize = SALSA20_BLOCK_SIZE,
- .setkey = crypto_salsa20_setkey,
- .encrypt = salsa20_asm_crypt,
- .decrypt = salsa20_asm_crypt,
-};
-
-static int __init init(void)
-{
- return crypto_register_skcipher(&alg);
-}
-
-static void __exit fini(void)
-{
- crypto_unregister_skcipher(&alg);
-}
-
-module_init(init);
-module_exit(fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION ("Salsa20 stream cipher algorithm (optimized assembly version)");
-MODULE_ALIAS_CRYPTO("salsa20");
-MODULE_ALIAS_CRYPTO("salsa20-asm");
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 9af927e59d49..9de7f1e1dede 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -84,13 +84,13 @@ ENTRY(entry_SYSENTER_compat)
pushq %rdx /* pt_regs->dx */
pushq %rcx /* pt_regs->cx */
pushq $-ENOSYS /* pt_regs->ax */
- pushq $0 /* pt_regs->r8 = 0 */
+ pushq %r8 /* pt_regs->r8 */
xorl %r8d, %r8d /* nospec r8 */
- pushq $0 /* pt_regs->r9 = 0 */
+ pushq %r9 /* pt_regs->r9 */
xorl %r9d, %r9d /* nospec r9 */
- pushq $0 /* pt_regs->r10 = 0 */
+ pushq %r10 /* pt_regs->r10 */
xorl %r10d, %r10d /* nospec r10 */
- pushq $0 /* pt_regs->r11 = 0 */
+ pushq %r11 /* pt_regs->r11 */
xorl %r11d, %r11d /* nospec r11 */
pushq %rbx /* pt_regs->rbx */
xorl %ebx, %ebx /* nospec rbx */
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index d6b27dab1b30..14a2f996e543 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -396,3 +396,4 @@
382 i386 pkey_free sys_pkey_free __ia32_sys_pkey_free
383 i386 statx sys_statx __ia32_sys_statx
384 i386 arch_prctl sys_arch_prctl __ia32_compat_sys_arch_prctl
+385 i386 io_pgetevents sys_io_pgetevents __ia32_compat_sys_io_pgetevents
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 4dfe42666d0c..cd36232ab62f 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -341,6 +341,7 @@
330 common pkey_alloc __x64_sys_pkey_alloc
331 common pkey_free __x64_sys_pkey_free
332 common statx __x64_sys_statx
+333 common io_pgetevents __x64_sys_io_pgetevents
#
# x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index d998a487c9b1..261802b1cc50 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -44,14 +44,14 @@ obj-y += $(vdso_img_objs)
targets += $(vdso_img_cfiles)
targets += $(vdso_img_sodbg) $(vdso_img-y:%=vdso%.so)
-export CPPFLAGS_vdso.lds += -P -C
+CPPFLAGS_vdso.lds += -P -C
VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \
-Wl,--no-undefined \
-Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 \
$(DISABLE_LTO)
-$(obj)/vdso64.so.dbg: $(src)/vdso.lds $(vobjs) FORCE
+$(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE
$(call if_changed,vdso)
HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(srctree)/arch/$(SUBARCH)/include/uapi
@@ -100,11 +100,8 @@ VDSO_LDFLAGS_vdsox32.lds = -Wl,-m,elf32_x86_64 \
-Wl,-z,max-page-size=4096 \
-Wl,-z,common-page-size=4096
-# 64-bit objects to re-brand as x32
-vobjs64-for-x32 := $(filter-out $(vobjs-nox32),$(vobjs-y))
-
# x32-rebranded versions
-vobjx32s-y := $(vobjs64-for-x32:.o=-x32.o)
+vobjx32s-y := $(vobjs-y:.o=-x32.o)
# same thing, but in the output directory
vobjx32s := $(foreach F,$(vobjx32s-y),$(obj)/$F)
@@ -122,7 +119,7 @@ $(obj)/%.so: OBJCOPYFLAGS := -S
$(obj)/%.so: $(obj)/%.so.dbg
$(call if_changed,objcopy)
-$(obj)/vdsox32.so.dbg: $(src)/vdsox32.lds $(vobjx32s) FORCE
+$(obj)/vdsox32.so.dbg: $(obj)/vdsox32.lds $(vobjx32s) FORCE
$(call if_changed,vdso)
CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
diff --git a/arch/x86/entry/vdso/vdso32/vdso-fakesections.c b/arch/x86/entry/vdso/vdso32/vdso-fakesections.c
deleted file mode 100644
index 541468e25265..000000000000
--- a/arch/x86/entry/vdso/vdso32/vdso-fakesections.c
+++ /dev/null
@@ -1 +0,0 @@
-#include "../vdso-fakesections.c"
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index 70b7845434cb..7782cdbcd67d 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -107,7 +107,7 @@ static bool write_ok_or_segv(unsigned long ptr, size_t size)
thread->cr2 = ptr;
thread->trap_nr = X86_TRAP_PF;
- memset(&info, 0, sizeof(info));
+ clear_siginfo(&info);
info.si_signo = SIGSEGV;
info.si_errno = 0;
info.si_code = SEGV_MAPERR;
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 786fd875de92..4b98101209a1 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -889,7 +889,7 @@ static void force_ibs_eilvt_setup(void)
if (!ibs_eilvt_valid())
goto out;
- pr_info("IBS: LVT offset %d assigned\n", offset);
+ pr_info("LVT offset %d assigned\n", offset);
return;
out:
diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index f5cbbba99283..981ba5e8241b 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -19,6 +19,7 @@
#include <asm/cpufeature.h>
#include <asm/perf_event.h>
#include <asm/msr.h>
+#include <asm/smp.h>
#define NUM_COUNTERS_NB 4
#define NUM_COUNTERS_L2 4
@@ -399,26 +400,8 @@ static int amd_uncore_cpu_starting(unsigned int cpu)
}
if (amd_uncore_llc) {
- unsigned int apicid = cpu_data(cpu).apicid;
- unsigned int nshared, subleaf, prev_eax = 0;
-
uncore = *per_cpu_ptr(amd_uncore_llc, cpu);
- /*
- * Iterate over Cache Topology Definition leaves until no
- * more cache descriptions are available.
- */
- for (subleaf = 0; subleaf < 5; subleaf++) {
- cpuid_count(0x8000001d, subleaf, &eax, &ebx, &ecx, &edx);
-
- /* EAX[0:4] gives type of cache */
- if (!(eax & 0x1f))
- break;
-
- prev_eax = eax;
- }
- nshared = ((prev_eax >> 14) & 0xfff) + 1;
-
- uncore->id = apicid - (apicid % nshared);
+ uncore->id = per_cpu(cpu_llc_id, cpu);
uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_llc);
*per_cpu_ptr(amd_uncore_llc, cpu) = uncore;
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index a6006e7bb729..6e461fb1e0d4 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -27,6 +27,7 @@
#include <linux/cpu.h>
#include <linux/bitops.h>
#include <linux/device.h>
+#include <linux/nospec.h>
#include <asm/apic.h>
#include <asm/stacktrace.h>
@@ -304,17 +305,20 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
config = attr->config;
- cache_type = (config >> 0) & 0xff;
+ cache_type = (config >> 0) & 0xff;
if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
return -EINVAL;
+ cache_type = array_index_nospec(cache_type, PERF_COUNT_HW_CACHE_MAX);
cache_op = (config >> 8) & 0xff;
if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
return -EINVAL;
+ cache_op = array_index_nospec(cache_op, PERF_COUNT_HW_CACHE_OP_MAX);
cache_result = (config >> 16) & 0xff;
if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
return -EINVAL;
+ cache_result = array_index_nospec(cache_result, PERF_COUNT_HW_CACHE_RESULT_MAX);
val = hw_cache_event_ids[cache_type][cache_op][cache_result];
@@ -421,6 +425,8 @@ int x86_setup_perfctr(struct perf_event *event)
if (attr->config >= x86_pmu.max_events)
return -EINVAL;
+ attr->config = array_index_nospec((unsigned long)attr->config, x86_pmu.max_events);
+
/*
* The generic map:
*/
@@ -2391,7 +2397,7 @@ static unsigned long get_segment_base(unsigned int segment)
#ifdef CONFIG_IA32_EMULATION
-#include <asm/compat.h>
+#include <linux/compat.h>
static inline int
perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *entry)
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 607bf565a90c..707b2a96e516 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3339,7 +3339,8 @@ static void intel_pmu_cpu_starting(int cpu)
cpuc->lbr_sel = NULL;
- flip_smm_bit(&x86_pmu.attr_freeze_on_smi);
+ if (x86_pmu.version > 1)
+ flip_smm_bit(&x86_pmu.attr_freeze_on_smi);
if (!cpuc->shared_regs)
return;
@@ -3502,6 +3503,8 @@ static __initconst const struct x86_pmu core_pmu = {
.cpu_dying = intel_pmu_cpu_dying,
};
+static struct attribute *intel_pmu_attrs[];
+
static __initconst const struct x86_pmu intel_pmu = {
.name = "Intel",
.handle_irq = intel_pmu_handle_irq,
@@ -3533,6 +3536,8 @@ static __initconst const struct x86_pmu intel_pmu = {
.format_attrs = intel_arch3_formats_attr,
.events_sysfs_show = intel_event_sysfs_show,
+ .attrs = intel_pmu_attrs,
+
.cpu_prepare = intel_pmu_cpu_prepare,
.cpu_starting = intel_pmu_cpu_starting,
.cpu_dying = intel_pmu_cpu_dying,
@@ -3911,8 +3916,6 @@ __init int intel_pmu_init(void)
x86_pmu.max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters);
-
- x86_pmu.attrs = intel_pmu_attrs;
/*
* Quirk: v2 perfmon does not report fixed-purpose events, so
* assume at least 3 events, when not running in a hypervisor:
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 9aca448bb8e6..9f8084f18d58 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -92,6 +92,7 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/perf_event.h>
+#include <linux/nospec.h>
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
#include "../perf_event.h"
@@ -302,6 +303,7 @@ static int cstate_pmu_event_init(struct perf_event *event)
} else if (event->pmu == &cstate_pkg_pmu) {
if (cfg >= PERF_CSTATE_PKG_EVENT_MAX)
return -EINVAL;
+ cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_PKG_EVENT_MAX);
if (!pkg_msr[cfg].attr)
return -EINVAL;
event->hw.event_base = pkg_msr[cfg].msr;
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 3b993942a0e4..8d016ce5b80d 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -1194,7 +1194,7 @@ static int pt_event_addr_filters_validate(struct list_head *filters)
filter->action == PERF_ADDR_FILTER_ACTION_START)
return -EOPNOTSUPP;
- if (!filter->inode) {
+ if (!filter->path.dentry) {
if (!valid_kernel_ip(filter->offset))
return -EINVAL;
@@ -1221,7 +1221,7 @@ static void pt_event_addr_filters_sync(struct perf_event *event)
return;
list_for_each_entry(filter, &head->list, entry) {
- if (filter->inode && !offs[range]) {
+ if (filter->path.dentry && !offs[range]) {
msr_a = msr_b = 0;
} else {
/* apply the offset */
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index a7956fc7ca1d..15b07379e72d 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -203,7 +203,7 @@ static void uncore_assign_hw_event(struct intel_uncore_box *box,
hwc->idx = idx;
hwc->last_tag = ++box->tags[idx];
- if (hwc->idx == UNCORE_PMC_IDX_FIXED) {
+ if (uncore_pmc_fixed(hwc->idx)) {
hwc->event_base = uncore_fixed_ctr(box);
hwc->config_base = uncore_fixed_ctl(box);
return;
@@ -218,7 +218,9 @@ void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *e
u64 prev_count, new_count, delta;
int shift;
- if (event->hw.idx >= UNCORE_PMC_IDX_FIXED)
+ if (uncore_pmc_freerunning(event->hw.idx))
+ shift = 64 - uncore_freerunning_bits(box, event);
+ else if (uncore_pmc_fixed(event->hw.idx))
shift = 64 - uncore_fixed_ctr_bits(box);
else
shift = 64 - uncore_perf_ctr_bits(box);
@@ -449,15 +451,30 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int
return ret ? -EINVAL : 0;
}
-static void uncore_pmu_event_start(struct perf_event *event, int flags)
+void uncore_pmu_event_start(struct perf_event *event, int flags)
{
struct intel_uncore_box *box = uncore_event_to_box(event);
int idx = event->hw.idx;
- if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+ if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX))
return;
- if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX))
+ /*
+ * Free running counter is read-only and always active.
+ * Use the current counter value as start point.
+ * There is no overflow interrupt for free running counter.
+ * Use hrtimer to periodically poll the counter to avoid overflow.
+ */
+ if (uncore_pmc_freerunning(event->hw.idx)) {
+ list_add_tail(&event->active_entry, &box->active_list);
+ local64_set(&event->hw.prev_count,
+ uncore_read_counter(box, event));
+ if (box->n_active++ == 0)
+ uncore_pmu_start_hrtimer(box);
+ return;
+ }
+
+ if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
return;
event->hw.state = 0;
@@ -474,11 +491,20 @@ static void uncore_pmu_event_start(struct perf_event *event, int flags)
}
}
-static void uncore_pmu_event_stop(struct perf_event *event, int flags)
+void uncore_pmu_event_stop(struct perf_event *event, int flags)
{
struct intel_uncore_box *box = uncore_event_to_box(event);
struct hw_perf_event *hwc = &event->hw;
+ /* Cannot disable free running counter which is read-only */
+ if (uncore_pmc_freerunning(hwc->idx)) {
+ list_del(&event->active_entry);
+ if (--box->n_active == 0)
+ uncore_pmu_cancel_hrtimer(box);
+ uncore_perf_event_update(box, event);
+ return;
+ }
+
if (__test_and_clear_bit(hwc->idx, box->active_mask)) {
uncore_disable_event(box, event);
box->n_active--;
@@ -502,7 +528,7 @@ static void uncore_pmu_event_stop(struct perf_event *event, int flags)
}
}
-static int uncore_pmu_event_add(struct perf_event *event, int flags)
+int uncore_pmu_event_add(struct perf_event *event, int flags)
{
struct intel_uncore_box *box = uncore_event_to_box(event);
struct hw_perf_event *hwc = &event->hw;
@@ -512,6 +538,17 @@ static int uncore_pmu_event_add(struct perf_event *event, int flags)
if (!box)
return -ENODEV;
+ /*
+ * The free funning counter is assigned in event_init().
+ * The free running counter event and free running counter
+ * are 1:1 mapped. It doesn't need to be tracked in event_list.
+ */
+ if (uncore_pmc_freerunning(hwc->idx)) {
+ if (flags & PERF_EF_START)
+ uncore_pmu_event_start(event, 0);
+ return 0;
+ }
+
ret = n = uncore_collect_events(box, event, false);
if (ret < 0)
return ret;
@@ -563,13 +600,21 @@ static int uncore_pmu_event_add(struct perf_event *event, int flags)
return 0;
}
-static void uncore_pmu_event_del(struct perf_event *event, int flags)
+void uncore_pmu_event_del(struct perf_event *event, int flags)
{
struct intel_uncore_box *box = uncore_event_to_box(event);
int i;
uncore_pmu_event_stop(event, PERF_EF_UPDATE);
+ /*
+ * The event for free running counter is not tracked by event_list.
+ * It doesn't need to force event->hw.idx = -1 to reassign the counter.
+ * Because the event and the free running counter are 1:1 mapped.
+ */
+ if (uncore_pmc_freerunning(event->hw.idx))
+ return;
+
for (i = 0; i < box->n_events; i++) {
if (event == box->event_list[i]) {
uncore_put_event_constraint(box, event);
@@ -603,6 +648,10 @@ static int uncore_validate_group(struct intel_uncore_pmu *pmu,
struct intel_uncore_box *fake_box;
int ret = -EINVAL, n;
+ /* The free running counter is always active. */
+ if (uncore_pmc_freerunning(event->hw.idx))
+ return 0;
+
fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE);
if (!fake_box)
return -ENOMEM;
@@ -690,6 +739,17 @@ static int uncore_pmu_event_init(struct perf_event *event)
/* fixed counters have event field hardcoded to zero */
hwc->config = 0ULL;
+ } else if (is_freerunning_event(event)) {
+ if (!check_valid_freerunning_event(box, event))
+ return -EINVAL;
+ event->hw.idx = UNCORE_PMC_IDX_FREERUNNING;
+ /*
+ * The free running counter event and free running counter
+ * are always 1:1 mapped.
+ * The free running counter is always active.
+ * Assign the free running counter here.
+ */
+ event->hw.event_base = uncore_freerunning_counter(box, event);
} else {
hwc->config = event->attr.config &
(pmu->type->event_mask | ((u64)pmu->type->event_mask_ext << 32));
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index 414dc7e7c950..c9e1e0bef3c3 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -12,8 +12,13 @@
#define UNCORE_FIXED_EVENT 0xff
#define UNCORE_PMC_IDX_MAX_GENERIC 8
+#define UNCORE_PMC_IDX_MAX_FIXED 1
+#define UNCORE_PMC_IDX_MAX_FREERUNNING 1
#define UNCORE_PMC_IDX_FIXED UNCORE_PMC_IDX_MAX_GENERIC
-#define UNCORE_PMC_IDX_MAX (UNCORE_PMC_IDX_FIXED + 1)
+#define UNCORE_PMC_IDX_FREERUNNING (UNCORE_PMC_IDX_FIXED + \
+ UNCORE_PMC_IDX_MAX_FIXED)
+#define UNCORE_PMC_IDX_MAX (UNCORE_PMC_IDX_FREERUNNING + \
+ UNCORE_PMC_IDX_MAX_FREERUNNING)
#define UNCORE_PCI_DEV_FULL_DATA(dev, func, type, idx) \
((dev << 24) | (func << 16) | (type << 8) | idx)
@@ -35,6 +40,7 @@ struct intel_uncore_ops;
struct intel_uncore_pmu;
struct intel_uncore_box;
struct uncore_event_desc;
+struct freerunning_counters;
struct intel_uncore_type {
const char *name;
@@ -42,6 +48,7 @@ struct intel_uncore_type {
int num_boxes;
int perf_ctr_bits;
int fixed_ctr_bits;
+ int num_freerunning_types;
unsigned perf_ctr;
unsigned event_ctl;
unsigned event_mask;
@@ -59,6 +66,7 @@ struct intel_uncore_type {
struct intel_uncore_pmu *pmus;
struct intel_uncore_ops *ops;
struct uncore_event_desc *event_descs;
+ struct freerunning_counters *freerunning;
const struct attribute_group *attr_groups[4];
struct pmu *pmu; /* for custom pmu ops */
};
@@ -129,6 +137,14 @@ struct uncore_event_desc {
const char *config;
};
+struct freerunning_counters {
+ unsigned int counter_base;
+ unsigned int counter_offset;
+ unsigned int box_offset;
+ unsigned int num_counters;
+ unsigned int bits;
+};
+
struct pci2phy_map {
struct list_head list;
int segment;
@@ -157,6 +173,16 @@ static ssize_t __uncore_##_var##_show(struct kobject *kobj, \
static struct kobj_attribute format_attr_##_var = \
__ATTR(_name, 0444, __uncore_##_var##_show, NULL)
+static inline bool uncore_pmc_fixed(int idx)
+{
+ return idx == UNCORE_PMC_IDX_FIXED;
+}
+
+static inline bool uncore_pmc_freerunning(int idx)
+{
+ return idx == UNCORE_PMC_IDX_FREERUNNING;
+}
+
static inline unsigned uncore_pci_box_ctl(struct intel_uncore_box *box)
{
return box->pmu->type->box_ctl;
@@ -214,6 +240,60 @@ static inline unsigned uncore_msr_fixed_ctr(struct intel_uncore_box *box)
return box->pmu->type->fixed_ctr + uncore_msr_box_offset(box);
}
+
+/*
+ * In the uncore document, there is no event-code assigned to free running
+ * counters. Some events need to be defined to indicate the free running
+ * counters. The events are encoded as event-code + umask-code.
+ *
+ * The event-code for all free running counters is 0xff, which is the same as
+ * the fixed counters.
+ *
+ * The umask-code is used to distinguish a fixed counter and a free running
+ * counter, and different types of free running counters.
+ * - For fixed counters, the umask-code is 0x0X.
+ * X indicates the index of the fixed counter, which starts from 0.
+ * - For free running counters, the umask-code uses the rest of the space.
+ * It would bare the format of 0xXY.
+ * X stands for the type of free running counters, which starts from 1.
+ * Y stands for the index of free running counters of same type, which
+ * starts from 0.
+ *
+ * For example, there are three types of IIO free running counters on Skylake
+ * server, IO CLOCKS counters, BANDWIDTH counters and UTILIZATION counters.
+ * The event-code for all the free running counters is 0xff.
+ * 'ioclk' is the first counter of IO CLOCKS. IO CLOCKS is the first type,
+ * which umask-code starts from 0x10.
+ * So 'ioclk' is encoded as event=0xff,umask=0x10
+ * 'bw_in_port2' is the third counter of BANDWIDTH counters. BANDWIDTH is
+ * the second type, which umask-code starts from 0x20.
+ * So 'bw_in_port2' is encoded as event=0xff,umask=0x22
+ */
+static inline unsigned int uncore_freerunning_idx(u64 config)
+{
+ return ((config >> 8) & 0xf);
+}
+
+#define UNCORE_FREERUNNING_UMASK_START 0x10
+
+static inline unsigned int uncore_freerunning_type(u64 config)
+{
+ return ((((config >> 8) - UNCORE_FREERUNNING_UMASK_START) >> 4) & 0xf);
+}
+
+static inline
+unsigned int uncore_freerunning_counter(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ unsigned int type = uncore_freerunning_type(event->attr.config);
+ unsigned int idx = uncore_freerunning_idx(event->attr.config);
+ struct intel_uncore_pmu *pmu = box->pmu;
+
+ return pmu->type->freerunning[type].counter_base +
+ pmu->type->freerunning[type].counter_offset * idx +
+ pmu->type->freerunning[type].box_offset * pmu->pmu_idx;
+}
+
static inline
unsigned uncore_msr_event_ctl(struct intel_uncore_box *box, int idx)
{
@@ -276,11 +356,52 @@ static inline int uncore_fixed_ctr_bits(struct intel_uncore_box *box)
return box->pmu->type->fixed_ctr_bits;
}
+static inline
+unsigned int uncore_freerunning_bits(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ unsigned int type = uncore_freerunning_type(event->attr.config);
+
+ return box->pmu->type->freerunning[type].bits;
+}
+
+static inline int uncore_num_freerunning(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ unsigned int type = uncore_freerunning_type(event->attr.config);
+
+ return box->pmu->type->freerunning[type].num_counters;
+}
+
+static inline int uncore_num_freerunning_types(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ return box->pmu->type->num_freerunning_types;
+}
+
+static inline bool check_valid_freerunning_event(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ unsigned int type = uncore_freerunning_type(event->attr.config);
+ unsigned int idx = uncore_freerunning_idx(event->attr.config);
+
+ return (type < uncore_num_freerunning_types(box, event)) &&
+ (idx < uncore_num_freerunning(box, event));
+}
+
static inline int uncore_num_counters(struct intel_uncore_box *box)
{
return box->pmu->type->num_counters;
}
+static inline bool is_freerunning_event(struct perf_event *event)
+{
+ u64 cfg = event->attr.config;
+
+ return ((cfg & UNCORE_FIXED_EVENT) == UNCORE_FIXED_EVENT) &&
+ (((cfg >> 8) & 0xff) >= UNCORE_FREERUNNING_UMASK_START);
+}
+
static inline void uncore_disable_box(struct intel_uncore_box *box)
{
if (box->pmu->type->ops->disable_box)
@@ -346,6 +467,10 @@ struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu
u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event);
void uncore_pmu_start_hrtimer(struct intel_uncore_box *box);
void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box);
+void uncore_pmu_event_start(struct perf_event *event, int flags);
+void uncore_pmu_event_stop(struct perf_event *event, int flags);
+int uncore_pmu_event_add(struct perf_event *event, int flags);
+void uncore_pmu_event_del(struct perf_event *event, int flags);
void uncore_pmu_event_read(struct perf_event *event);
void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event);
struct event_constraint *
diff --git a/arch/x86/events/intel/uncore_nhmex.c b/arch/x86/events/intel/uncore_nhmex.c
index 93e7a8397cde..173e2674be6e 100644
--- a/arch/x86/events/intel/uncore_nhmex.c
+++ b/arch/x86/events/intel/uncore_nhmex.c
@@ -246,7 +246,7 @@ static void nhmex_uncore_msr_enable_event(struct intel_uncore_box *box, struct p
{
struct hw_perf_event *hwc = &event->hw;
- if (hwc->idx >= UNCORE_PMC_IDX_FIXED)
+ if (hwc->idx == UNCORE_PMC_IDX_FIXED)
wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0);
else if (box->pmu->type->event_mask & NHMEX_PMON_CTL_EN_BIT0)
wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22);
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index aee5e8496be4..8527c3e1038b 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -285,6 +285,15 @@ static struct uncore_event_desc snb_uncore_imc_events[] = {
#define SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE 0x5054
#define SNB_UNCORE_PCI_IMC_CTR_BASE SNB_UNCORE_PCI_IMC_DATA_READS_BASE
+enum perf_snb_uncore_imc_freerunning_types {
+ SNB_PCI_UNCORE_IMC_DATA = 0,
+ SNB_PCI_UNCORE_IMC_FREERUNNING_TYPE_MAX,
+};
+
+static struct freerunning_counters snb_uncore_imc_freerunning[] = {
+ [SNB_PCI_UNCORE_IMC_DATA] = { SNB_UNCORE_PCI_IMC_DATA_READS_BASE, 0x4, 0x0, 2, 32 },
+};
+
static struct attribute *snb_uncore_imc_formats_attr[] = {
&format_attr_event.attr,
NULL,
@@ -341,9 +350,8 @@ static u64 snb_uncore_imc_read_counter(struct intel_uncore_box *box, struct perf
}
/*
- * custom event_init() function because we define our own fixed, free
- * running counters, so we do not want to conflict with generic uncore
- * logic. Also simplifies processing
+ * Keep the custom event_init() function compatible with old event
+ * encoding for free running counters.
*/
static int snb_uncore_imc_event_init(struct perf_event *event)
{
@@ -405,11 +413,11 @@ static int snb_uncore_imc_event_init(struct perf_event *event)
switch (cfg) {
case SNB_UNCORE_PCI_IMC_DATA_READS:
base = SNB_UNCORE_PCI_IMC_DATA_READS_BASE;
- idx = UNCORE_PMC_IDX_FIXED;
+ idx = UNCORE_PMC_IDX_FREERUNNING;
break;
case SNB_UNCORE_PCI_IMC_DATA_WRITES:
base = SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE;
- idx = UNCORE_PMC_IDX_FIXED + 1;
+ idx = UNCORE_PMC_IDX_FREERUNNING;
break;
default:
return -EINVAL;
@@ -430,75 +438,6 @@ static int snb_uncore_imc_hw_config(struct intel_uncore_box *box, struct perf_ev
return 0;
}
-static void snb_uncore_imc_event_start(struct perf_event *event, int flags)
-{
- struct intel_uncore_box *box = uncore_event_to_box(event);
- u64 count;
-
- if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
- return;
-
- event->hw.state = 0;
- box->n_active++;
-
- list_add_tail(&event->active_entry, &box->active_list);
-
- count = snb_uncore_imc_read_counter(box, event);
- local64_set(&event->hw.prev_count, count);
-
- if (box->n_active == 1)
- uncore_pmu_start_hrtimer(box);
-}
-
-static void snb_uncore_imc_event_stop(struct perf_event *event, int flags)
-{
- struct intel_uncore_box *box = uncore_event_to_box(event);
- struct hw_perf_event *hwc = &event->hw;
-
- if (!(hwc->state & PERF_HES_STOPPED)) {
- box->n_active--;
-
- WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
- hwc->state |= PERF_HES_STOPPED;
-
- list_del(&event->active_entry);
-
- if (box->n_active == 0)
- uncore_pmu_cancel_hrtimer(box);
- }
-
- if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
- /*
- * Drain the remaining delta count out of a event
- * that we are disabling:
- */
- uncore_perf_event_update(box, event);
- hwc->state |= PERF_HES_UPTODATE;
- }
-}
-
-static int snb_uncore_imc_event_add(struct perf_event *event, int flags)
-{
- struct intel_uncore_box *box = uncore_event_to_box(event);
- struct hw_perf_event *hwc = &event->hw;
-
- if (!box)
- return -ENODEV;
-
- hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
- if (!(flags & PERF_EF_START))
- hwc->state |= PERF_HES_ARCH;
-
- snb_uncore_imc_event_start(event, 0);
-
- return 0;
-}
-
-static void snb_uncore_imc_event_del(struct perf_event *event, int flags)
-{
- snb_uncore_imc_event_stop(event, PERF_EF_UPDATE);
-}
-
int snb_pci2phy_map_init(int devid)
{
struct pci_dev *dev = NULL;
@@ -530,10 +469,10 @@ int snb_pci2phy_map_init(int devid)
static struct pmu snb_uncore_imc_pmu = {
.task_ctx_nr = perf_invalid_context,
.event_init = snb_uncore_imc_event_init,
- .add = snb_uncore_imc_event_add,
- .del = snb_uncore_imc_event_del,
- .start = snb_uncore_imc_event_start,
- .stop = snb_uncore_imc_event_stop,
+ .add = uncore_pmu_event_add,
+ .del = uncore_pmu_event_del,
+ .start = uncore_pmu_event_start,
+ .stop = uncore_pmu_event_stop,
.read = uncore_pmu_event_read,
};
@@ -552,12 +491,10 @@ static struct intel_uncore_type snb_uncore_imc = {
.name = "imc",
.num_counters = 2,
.num_boxes = 1,
- .fixed_ctr_bits = 32,
- .fixed_ctr = SNB_UNCORE_PCI_IMC_CTR_BASE,
+ .num_freerunning_types = SNB_PCI_UNCORE_IMC_FREERUNNING_TYPE_MAX,
+ .freerunning = snb_uncore_imc_freerunning,
.event_descs = snb_uncore_imc_events,
.format_group = &snb_uncore_imc_format_group,
- .perf_ctr = SNB_UNCORE_PCI_IMC_DATA_READS_BASE,
- .event_mask = SNB_UNCORE_PCI_IMC_EVENT_MASK,
.ops = &snb_uncore_imc_ops,
.pmu = &snb_uncore_imc_pmu,
};
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index c98b943e58b4..87dc0263a2e1 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -3028,10 +3028,27 @@ static struct intel_uncore_type bdx_uncore_cbox = {
.format_group = &hswep_uncore_cbox_format_group,
};
+static struct intel_uncore_type bdx_uncore_sbox = {
+ .name = "sbox",
+ .num_counters = 4,
+ .num_boxes = 4,
+ .perf_ctr_bits = 48,
+ .event_ctl = HSWEP_S0_MSR_PMON_CTL0,
+ .perf_ctr = HSWEP_S0_MSR_PMON_CTR0,
+ .event_mask = HSWEP_S_MSR_PMON_RAW_EVENT_MASK,
+ .box_ctl = HSWEP_S0_MSR_PMON_BOX_CTL,
+ .msr_offset = HSWEP_SBOX_MSR_OFFSET,
+ .ops = &hswep_uncore_sbox_msr_ops,
+ .format_group = &hswep_uncore_sbox_format_group,
+};
+
+#define BDX_MSR_UNCORE_SBOX 3
+
static struct intel_uncore_type *bdx_msr_uncores[] = {
&bdx_uncore_ubox,
&bdx_uncore_cbox,
&hswep_uncore_pcu,
+ &bdx_uncore_sbox,
NULL,
};
@@ -3043,10 +3060,25 @@ static struct event_constraint bdx_uncore_pcu_constraints[] = {
void bdx_uncore_cpu_init(void)
{
+ int pkg = topology_phys_to_logical_pkg(0);
+
if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
uncore_msr_uncores = bdx_msr_uncores;
+ /* BDX-DE doesn't have SBOX */
+ if (boot_cpu_data.x86_model == 86) {
+ uncore_msr_uncores[BDX_MSR_UNCORE_SBOX] = NULL;
+ /* Detect systems with no SBOXes */
+ } else if (uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3]) {
+ struct pci_dev *pdev;
+ u32 capid4;
+
+ pdev = uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3];
+ pci_read_config_dword(pdev, 0x94, &capid4);
+ if (((capid4 >> 6) & 0x3) == 0)
+ bdx_msr_uncores[BDX_MSR_UNCORE_SBOX] = NULL;
+ }
hswep_uncore_pcu.constraints = bdx_uncore_pcu_constraints;
}
@@ -3264,6 +3296,11 @@ static const struct pci_device_id bdx_uncore_pci_ids[] = {
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f46),
.driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 2),
},
+ { /* PCU.3 (for Capability registers) */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fc0),
+ .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+ HSWEP_PCI_PCU_3),
+ },
{ /* end: all zeroes */ }
};
@@ -3485,6 +3522,87 @@ static struct intel_uncore_type skx_uncore_iio = {
.format_group = &skx_uncore_iio_format_group,
};
+enum perf_uncore_iio_freerunning_type_id {
+ SKX_IIO_MSR_IOCLK = 0,
+ SKX_IIO_MSR_BW = 1,
+ SKX_IIO_MSR_UTIL = 2,
+
+ SKX_IIO_FREERUNNING_TYPE_MAX,
+};
+
+
+static struct freerunning_counters skx_iio_freerunning[] = {
+ [SKX_IIO_MSR_IOCLK] = { 0xa45, 0x1, 0x20, 1, 36 },
+ [SKX_IIO_MSR_BW] = { 0xb00, 0x1, 0x10, 8, 36 },
+ [SKX_IIO_MSR_UTIL] = { 0xb08, 0x1, 0x10, 8, 36 },
+};
+
+static struct uncore_event_desc skx_uncore_iio_freerunning_events[] = {
+ /* Free-Running IO CLOCKS Counter */
+ INTEL_UNCORE_EVENT_DESC(ioclk, "event=0xff,umask=0x10"),
+ /* Free-Running IIO BANDWIDTH Counters */
+ INTEL_UNCORE_EVENT_DESC(bw_in_port0, "event=0xff,umask=0x20"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port0.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port1, "event=0xff,umask=0x21"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port1.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port2, "event=0xff,umask=0x22"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port2.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port3, "event=0xff,umask=0x23"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port3.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port0, "event=0xff,umask=0x24"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port0.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port0.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port1, "event=0xff,umask=0x25"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port1.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port1.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port2, "event=0xff,umask=0x26"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port2.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port2.unit, "MiB"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port3, "event=0xff,umask=0x27"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port3.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_out_port3.unit, "MiB"),
+ /* Free-running IIO UTILIZATION Counters */
+ INTEL_UNCORE_EVENT_DESC(util_in_port0, "event=0xff,umask=0x30"),
+ INTEL_UNCORE_EVENT_DESC(util_out_port0, "event=0xff,umask=0x31"),
+ INTEL_UNCORE_EVENT_DESC(util_in_port1, "event=0xff,umask=0x32"),
+ INTEL_UNCORE_EVENT_DESC(util_out_port1, "event=0xff,umask=0x33"),
+ INTEL_UNCORE_EVENT_DESC(util_in_port2, "event=0xff,umask=0x34"),
+ INTEL_UNCORE_EVENT_DESC(util_out_port2, "event=0xff,umask=0x35"),
+ INTEL_UNCORE_EVENT_DESC(util_in_port3, "event=0xff,umask=0x36"),
+ INTEL_UNCORE_EVENT_DESC(util_out_port3, "event=0xff,umask=0x37"),
+ { /* end: all zeroes */ },
+};
+
+static struct intel_uncore_ops skx_uncore_iio_freerunning_ops = {
+ .read_counter = uncore_msr_read_counter,
+};
+
+static struct attribute *skx_uncore_iio_freerunning_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ NULL,
+};
+
+static const struct attribute_group skx_uncore_iio_freerunning_format_group = {
+ .name = "format",
+ .attrs = skx_uncore_iio_freerunning_formats_attr,
+};
+
+static struct intel_uncore_type skx_uncore_iio_free_running = {
+ .name = "iio_free_running",
+ .num_counters = 17,
+ .num_boxes = 6,
+ .num_freerunning_types = SKX_IIO_FREERUNNING_TYPE_MAX,
+ .freerunning = skx_iio_freerunning,
+ .ops = &skx_uncore_iio_freerunning_ops,
+ .event_descs = skx_uncore_iio_freerunning_events,
+ .format_group = &skx_uncore_iio_freerunning_format_group,
+};
+
static struct attribute *skx_uncore_formats_attr[] = {
&format_attr_event.attr,
&format_attr_umask.attr,
@@ -3558,6 +3676,7 @@ static struct intel_uncore_type *skx_msr_uncores[] = {
&skx_uncore_ubox,
&skx_uncore_chabox,
&skx_uncore_iio,
+ &skx_uncore_iio_free_running,
&skx_uncore_irp,
&skx_uncore_pcu,
NULL,
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index e7edf19e64c2..b4771a6ddbc1 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/perf_event.h>
+#include <linux/nospec.h>
#include <asm/intel-family.h>
enum perf_msr_id {
@@ -158,9 +159,6 @@ static int msr_event_init(struct perf_event *event)
if (event->attr.type != event->pmu->type)
return -ENOENT;
- if (cfg >= PERF_MSR_EVENT_MAX)
- return -EINVAL;
-
/* unsupported modes and filters */
if (event->attr.exclude_user ||
event->attr.exclude_kernel ||
@@ -171,6 +169,11 @@ static int msr_event_init(struct perf_event *event)
event->attr.sample_period) /* no sampling */
return -EINVAL;
+ if (cfg >= PERF_MSR_EVENT_MAX)
+ return -EINVAL;
+
+ cfg = array_index_nospec((unsigned long)cfg, PERF_MSR_EVENT_MAX);
+
if (!msr[cfg].attr)
return -EINVAL;
diff --git a/arch/x86/hyperv/Makefile b/arch/x86/hyperv/Makefile
index 367a8203cfcf..b173d404e3df 100644
--- a/arch/x86/hyperv/Makefile
+++ b/arch/x86/hyperv/Makefile
@@ -1 +1,2 @@
-obj-y := hv_init.o mmu.o
+obj-y := hv_init.o mmu.o
+obj-$(CONFIG_X86_64) += hv_apic.o
diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
new file mode 100644
index 000000000000..f68855499391
--- /dev/null
+++ b/arch/x86/hyperv/hv_apic.c
@@ -0,0 +1,256 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Hyper-V specific APIC code.
+ *
+ * Copyright (C) 2018, Microsoft, Inc.
+ *
+ * Author : K. Y. Srinivasan <kys@microsoft.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for more
+ * details.
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/version.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/clockchips.h>
+#include <linux/hyperv.h>
+#include <linux/slab.h>
+#include <linux/cpuhotplug.h>
+#include <asm/hypervisor.h>
+#include <asm/mshyperv.h>
+#include <asm/apic.h>
+
+static struct apic orig_apic;
+
+static u64 hv_apic_icr_read(void)
+{
+ u64 reg_val;
+
+ rdmsrl(HV_X64_MSR_ICR, reg_val);
+ return reg_val;
+}
+
+static void hv_apic_icr_write(u32 low, u32 id)
+{
+ u64 reg_val;
+
+ reg_val = SET_APIC_DEST_FIELD(id);
+ reg_val = reg_val << 32;
+ reg_val |= low;
+
+ wrmsrl(HV_X64_MSR_ICR, reg_val);
+}
+
+static u32 hv_apic_read(u32 reg)
+{
+ u32 reg_val, hi;
+
+ switch (reg) {
+ case APIC_EOI:
+ rdmsr(HV_X64_MSR_EOI, reg_val, hi);
+ return reg_val;
+ case APIC_TASKPRI:
+ rdmsr(HV_X64_MSR_TPR, reg_val, hi);
+ return reg_val;
+
+ default:
+ return native_apic_mem_read(reg);
+ }
+}
+
+static void hv_apic_write(u32 reg, u32 val)
+{
+ switch (reg) {
+ case APIC_EOI:
+ wrmsr(HV_X64_MSR_EOI, val, 0);
+ break;
+ case APIC_TASKPRI:
+ wrmsr(HV_X64_MSR_TPR, val, 0);
+ break;
+ default:
+ native_apic_mem_write(reg, val);
+ }
+}
+
+static void hv_apic_eoi_write(u32 reg, u32 val)
+{
+ wrmsr(HV_X64_MSR_EOI, val, 0);
+}
+
+/*
+ * IPI implementation on Hyper-V.
+ */
+static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector)
+{
+ struct ipi_arg_ex **arg;
+ struct ipi_arg_ex *ipi_arg;
+ unsigned long flags;
+ int nr_bank = 0;
+ int ret = 1;
+
+ local_irq_save(flags);
+ arg = (struct ipi_arg_ex **)this_cpu_ptr(hyperv_pcpu_input_arg);
+
+ ipi_arg = *arg;
+ if (unlikely(!ipi_arg))
+ goto ipi_mask_ex_done;
+
+ ipi_arg->vector = vector;
+ ipi_arg->reserved = 0;
+ ipi_arg->vp_set.valid_bank_mask = 0;
+
+ if (!cpumask_equal(mask, cpu_present_mask)) {
+ ipi_arg->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ nr_bank = cpumask_to_vpset(&(ipi_arg->vp_set), mask);
+ }
+ if (!nr_bank)
+ ipi_arg->vp_set.format = HV_GENERIC_SET_ALL;
+
+ ret = hv_do_rep_hypercall(HVCALL_SEND_IPI_EX, 0, nr_bank,
+ ipi_arg, NULL);
+
+ipi_mask_ex_done:
+ local_irq_restore(flags);
+ return ((ret == 0) ? true : false);
+}
+
+static bool __send_ipi_mask(const struct cpumask *mask, int vector)
+{
+ int cur_cpu, vcpu;
+ struct ipi_arg_non_ex **arg;
+ struct ipi_arg_non_ex *ipi_arg;
+ int ret = 1;
+ unsigned long flags;
+
+ if (cpumask_empty(mask))
+ return true;
+
+ if (!hv_hypercall_pg)
+ return false;
+
+ if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
+ return false;
+
+ if ((ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
+ return __send_ipi_mask_ex(mask, vector);
+
+ local_irq_save(flags);
+ arg = (struct ipi_arg_non_ex **)this_cpu_ptr(hyperv_pcpu_input_arg);
+
+ ipi_arg = *arg;
+ if (unlikely(!ipi_arg))
+ goto ipi_mask_done;
+
+ ipi_arg->vector = vector;
+ ipi_arg->reserved = 0;
+ ipi_arg->cpu_mask = 0;
+
+ for_each_cpu(cur_cpu, mask) {
+ vcpu = hv_cpu_number_to_vp_number(cur_cpu);
+ /*
+ * This particular version of the IPI hypercall can
+ * only target upto 64 CPUs.
+ */
+ if (vcpu >= 64)
+ goto ipi_mask_done;
+
+ __set_bit(vcpu, (unsigned long *)&ipi_arg->cpu_mask);
+ }
+
+ ret = hv_do_hypercall(HVCALL_SEND_IPI, ipi_arg, NULL);
+
+ipi_mask_done:
+ local_irq_restore(flags);
+ return ((ret == 0) ? true : false);
+}
+
+static bool __send_ipi_one(int cpu, int vector)
+{
+ struct cpumask mask = CPU_MASK_NONE;
+
+ cpumask_set_cpu(cpu, &mask);
+ return __send_ipi_mask(&mask, vector);
+}
+
+static void hv_send_ipi(int cpu, int vector)
+{
+ if (!__send_ipi_one(cpu, vector))
+ orig_apic.send_IPI(cpu, vector);
+}
+
+static void hv_send_ipi_mask(const struct cpumask *mask, int vector)
+{
+ if (!__send_ipi_mask(mask, vector))
+ orig_apic.send_IPI_mask(mask, vector);
+}
+
+static void hv_send_ipi_mask_allbutself(const struct cpumask *mask, int vector)
+{
+ unsigned int this_cpu = smp_processor_id();
+ struct cpumask new_mask;
+ const struct cpumask *local_mask;
+
+ cpumask_copy(&new_mask, mask);
+ cpumask_clear_cpu(this_cpu, &new_mask);
+ local_mask = &new_mask;
+ if (!__send_ipi_mask(local_mask, vector))
+ orig_apic.send_IPI_mask_allbutself(mask, vector);
+}
+
+static void hv_send_ipi_allbutself(int vector)
+{
+ hv_send_ipi_mask_allbutself(cpu_online_mask, vector);
+}
+
+static void hv_send_ipi_all(int vector)
+{
+ if (!__send_ipi_mask(cpu_online_mask, vector))
+ orig_apic.send_IPI_all(vector);
+}
+
+static void hv_send_ipi_self(int vector)
+{
+ if (!__send_ipi_one(smp_processor_id(), vector))
+ orig_apic.send_IPI_self(vector);
+}
+
+void __init hv_apic_init(void)
+{
+ if (ms_hyperv.hints & HV_X64_CLUSTER_IPI_RECOMMENDED) {
+ if ((ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
+ pr_info("Hyper-V: Using ext hypercalls for IPI\n");
+ else
+ pr_info("Hyper-V: Using IPI hypercalls\n");
+ /*
+ * Set the IPI entry points.
+ */
+ orig_apic = *apic;
+
+ apic->send_IPI = hv_send_ipi;
+ apic->send_IPI_mask = hv_send_ipi_mask;
+ apic->send_IPI_mask_allbutself = hv_send_ipi_mask_allbutself;
+ apic->send_IPI_allbutself = hv_send_ipi_allbutself;
+ apic->send_IPI_all = hv_send_ipi_all;
+ apic->send_IPI_self = hv_send_ipi_self;
+ }
+
+ if (ms_hyperv.hints & HV_X64_APIC_ACCESS_RECOMMENDED) {
+ pr_info("Hyper-V: Using MSR based APIC access\n");
+ apic_set_eoi_write(hv_apic_eoi_write);
+ apic->read = hv_apic_read;
+ apic->write = hv_apic_write;
+ apic->icr_write = hv_apic_icr_write;
+ apic->icr_read = hv_apic_icr_read;
+ }
+}
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index cfecc2272f2d..4c431e1c1eff 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -91,12 +91,19 @@ EXPORT_SYMBOL_GPL(hv_vp_index);
struct hv_vp_assist_page **hv_vp_assist_page;
EXPORT_SYMBOL_GPL(hv_vp_assist_page);
+void __percpu **hyperv_pcpu_input_arg;
+EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg);
+
u32 hv_max_vp_index;
static int hv_cpu_init(unsigned int cpu)
{
u64 msr_vp_index;
struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()];
+ void **input_arg;
+
+ input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
+ *input_arg = page_address(alloc_page(GFP_KERNEL));
hv_get_vp_index(msr_vp_index);
@@ -217,6 +224,16 @@ static int hv_cpu_die(unsigned int cpu)
{
struct hv_reenlightenment_control re_ctrl;
unsigned int new_cpu;
+ unsigned long flags;
+ void **input_arg;
+ void *input_pg = NULL;
+
+ local_irq_save(flags);
+ input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
+ input_pg = *input_arg;
+ *input_arg = NULL;
+ local_irq_restore(flags);
+ free_page((unsigned long)input_pg);
if (hv_vp_assist_page && hv_vp_assist_page[cpu])
wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, 0);
@@ -242,8 +259,9 @@ static int hv_cpu_die(unsigned int cpu)
*
* 1. Setup the hypercall page.
* 2. Register Hyper-V specific clocksource.
+ * 3. Setup Hyper-V specific APIC entry points.
*/
-void hyperv_init(void)
+void __init hyperv_init(void)
{
u64 guest_id, required_msrs;
union hv_x64_msr_hypercall_contents hypercall_msr;
@@ -259,6 +277,16 @@ void hyperv_init(void)
if ((ms_hyperv.features & required_msrs) != required_msrs)
return;
+ /*
+ * Allocate the per-CPU state for the hypercall input arg.
+ * If this allocation fails, we will not be able to setup
+ * (per-CPU) hypercall input page and thus this failure is
+ * fatal on Hyper-V.
+ */
+ hyperv_pcpu_input_arg = alloc_percpu(void *);
+
+ BUG_ON(hyperv_pcpu_input_arg == NULL);
+
/* Allocate percpu VP index */
hv_vp_index = kmalloc_array(num_possible_cpus(), sizeof(*hv_vp_index),
GFP_KERNEL);
@@ -296,7 +324,7 @@ void hyperv_init(void)
hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg);
wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
- hyper_alloc_mmu();
+ hv_apic_init();
/*
* Register Hyper-V specific clocksource.
diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c
index 56c9ebac946f..5f053d7d1bd9 100644
--- a/arch/x86/hyperv/mmu.c
+++ b/arch/x86/hyperv/mmu.c
@@ -25,20 +25,13 @@ struct hv_flush_pcpu {
struct hv_flush_pcpu_ex {
u64 address_space;
u64 flags;
- struct {
- u64 format;
- u64 valid_bank_mask;
- u64 bank_contents[];
- } hv_vp_set;
+ struct hv_vpset hv_vp_set;
u64 gva_list[];
};
/* Each gva in gva_list encodes up to 4096 pages to flush */
#define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)
-static struct hv_flush_pcpu __percpu **pcpu_flush;
-
-static struct hv_flush_pcpu_ex __percpu **pcpu_flush_ex;
/*
* Fills in gva_list starting from offset. Returns the number of items added.
@@ -70,41 +63,6 @@ static inline int fill_gva_list(u64 gva_list[], int offset,
return gva_n - offset;
}
-/* Return the number of banks in the resulting vp_set */
-static inline int cpumask_to_vp_set(struct hv_flush_pcpu_ex *flush,
- const struct cpumask *cpus)
-{
- int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1;
-
- /* valid_bank_mask can represent up to 64 banks */
- if (hv_max_vp_index / 64 >= 64)
- return 0;
-
- /*
- * Clear all banks up to the maximum possible bank as hv_flush_pcpu_ex
- * structs are not cleared between calls, we risk flushing unneeded
- * vCPUs otherwise.
- */
- for (vcpu_bank = 0; vcpu_bank <= hv_max_vp_index / 64; vcpu_bank++)
- flush->hv_vp_set.bank_contents[vcpu_bank] = 0;
-
- /*
- * Some banks may end up being empty but this is acceptable.
- */
- for_each_cpu(cpu, cpus) {
- vcpu = hv_cpu_number_to_vp_number(cpu);
- vcpu_bank = vcpu / 64;
- vcpu_offset = vcpu % 64;
- __set_bit(vcpu_offset, (unsigned long *)
- &flush->hv_vp_set.bank_contents[vcpu_bank]);
- if (vcpu_bank >= nr_bank)
- nr_bank = vcpu_bank + 1;
- }
- flush->hv_vp_set.valid_bank_mask = GENMASK_ULL(nr_bank - 1, 0);
-
- return nr_bank;
-}
-
static void hyperv_flush_tlb_others(const struct cpumask *cpus,
const struct flush_tlb_info *info)
{
@@ -116,7 +74,7 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus,
trace_hyperv_mmu_flush_tlb_others(cpus, info);
- if (!pcpu_flush || !hv_hypercall_pg)
+ if (!hv_hypercall_pg)
goto do_native;
if (cpumask_empty(cpus))
@@ -124,10 +82,8 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus,
local_irq_save(flags);
- flush_pcpu = this_cpu_ptr(pcpu_flush);
-
- if (unlikely(!*flush_pcpu))
- *flush_pcpu = page_address(alloc_page(GFP_ATOMIC));
+ flush_pcpu = (struct hv_flush_pcpu **)
+ this_cpu_ptr(hyperv_pcpu_input_arg);
flush = *flush_pcpu;
@@ -203,7 +159,7 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
trace_hyperv_mmu_flush_tlb_others(cpus, info);
- if (!pcpu_flush_ex || !hv_hypercall_pg)
+ if (!hv_hypercall_pg)
goto do_native;
if (cpumask_empty(cpus))
@@ -211,10 +167,8 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
local_irq_save(flags);
- flush_pcpu = this_cpu_ptr(pcpu_flush_ex);
-
- if (unlikely(!*flush_pcpu))
- *flush_pcpu = page_address(alloc_page(GFP_ATOMIC));
+ flush_pcpu = (struct hv_flush_pcpu_ex **)
+ this_cpu_ptr(hyperv_pcpu_input_arg);
flush = *flush_pcpu;
@@ -239,8 +193,8 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
flush->hv_vp_set.valid_bank_mask = 0;
if (!cpumask_equal(cpus, cpu_present_mask)) {
- flush->hv_vp_set.format = HV_GENERIC_SET_SPARCE_4K;
- nr_bank = cpumask_to_vp_set(flush, cpus);
+ flush->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ nr_bank = cpumask_to_vpset(&(flush->hv_vp_set), cpus);
}
if (!nr_bank) {
@@ -296,14 +250,3 @@ void hyperv_setup_mmu_ops(void)
pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others_ex;
}
}
-
-void hyper_alloc_mmu(void)
-{
- if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
- return;
-
- if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
- pcpu_flush = alloc_percpu(struct hv_flush_pcpu *);
- else
- pcpu_flush_ex = alloc_percpu(struct hv_flush_pcpu_ex *);
-}
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 386a6900e206..219faaec51df 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -136,7 +136,6 @@
#endif
#ifndef __ASSEMBLY__
-#ifndef __BPF__
/*
* This output constraint should be used for any inline asm which has a "call"
* instruction. Otherwise the asm may be inserted before the frame pointer
@@ -146,6 +145,5 @@
register unsigned long current_stack_pointer asm(_ASM_SP);
#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
#endif
-#endif
#endif /* _ASM_X86_ASM_H */
diff --git a/arch/x86/include/asm/cacheinfo.h b/arch/x86/include/asm/cacheinfo.h
new file mode 100644
index 000000000000..e958e28f7ab5
--- /dev/null
+++ b/arch/x86/include/asm/cacheinfo.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_CACHEINFO_H
+#define _ASM_X86_CACHEINFO_H
+
+void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id);
+
+#endif /* _ASM_X86_CACHEINFO_H */
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index e1c8dab86670..fb97cf7c4137 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -17,7 +17,6 @@
typedef u32 compat_size_t;
typedef s32 compat_ssize_t;
-typedef s32 compat_time_t;
typedef s32 compat_clock_t;
typedef s32 compat_pid_t;
typedef u16 __compat_uid_t;
@@ -46,16 +45,6 @@ typedef u32 compat_u32;
typedef u64 __attribute__((aligned(4))) compat_u64;
typedef u32 compat_uptr_t;
-struct compat_timespec {
- compat_time_t tv_sec;
- s32 tv_nsec;
-};
-
-struct compat_timeval {
- compat_time_t tv_sec;
- s32 tv_usec;
-};
-
struct compat_stat {
compat_dev_t st_dev;
u16 __pad1;
@@ -145,10 +134,10 @@ struct compat_ipc64_perm {
struct compat_semid64_ds {
struct compat_ipc64_perm sem_perm;
- compat_time_t sem_otime;
- compat_ulong_t __unused1;
- compat_time_t sem_ctime;
- compat_ulong_t __unused2;
+ compat_ulong_t sem_otime;
+ compat_ulong_t sem_otime_high;
+ compat_ulong_t sem_ctime;
+ compat_ulong_t sem_ctime_high;
compat_ulong_t sem_nsems;
compat_ulong_t __unused3;
compat_ulong_t __unused4;
@@ -156,12 +145,12 @@ struct compat_semid64_ds {
struct compat_msqid64_ds {
struct compat_ipc64_perm msg_perm;
- compat_time_t msg_stime;
- compat_ulong_t __unused1;
- compat_time_t msg_rtime;
- compat_ulong_t __unused2;
- compat_time_t msg_ctime;
- compat_ulong_t __unused3;
+ compat_ulong_t msg_stime;
+ compat_ulong_t msg_stime_high;
+ compat_ulong_t msg_rtime;
+ compat_ulong_t msg_rtime_high;
+ compat_ulong_t msg_ctime;
+ compat_ulong_t msg_ctime_high;
compat_ulong_t msg_cbytes;
compat_ulong_t msg_qnum;
compat_ulong_t msg_qbytes;
@@ -174,12 +163,12 @@ struct compat_msqid64_ds {
struct compat_shmid64_ds {
struct compat_ipc64_perm shm_perm;
compat_size_t shm_segsz;
- compat_time_t shm_atime;
- compat_ulong_t __unused1;
- compat_time_t shm_dtime;
- compat_ulong_t __unused2;
- compat_time_t shm_ctime;
- compat_ulong_t __unused3;
+ compat_ulong_t shm_atime;
+ compat_ulong_t shm_atime_high;
+ compat_ulong_t shm_dtime;
+ compat_ulong_t shm_dtime_high;
+ compat_ulong_t shm_ctime;
+ compat_ulong_t shm_ctime_high;
compat_pid_t shm_cpid;
compat_pid_t shm_lpid;
compat_ulong_t shm_nattch;
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index b27da9602a6d..aced6c9290d6 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -140,6 +140,20 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)
+#if defined(__clang__) && !defined(CC_HAVE_ASM_GOTO)
+
+/*
+ * Workaround for the sake of BPF compilation which utilizes kernel
+ * headers, but clang does not support ASM GOTO and fails the build.
+ */
+#ifndef __BPF_TRACING__
+#warning "Compiler lacks ASM_GOTO support. Add -D __BPF_TRACING__ to your compiler arguments"
+#endif
+
+#define static_cpu_has(bit) boot_cpu_has(bit)
+
+#else
+
/*
* Static testing of CPU features. Used the same as boot_cpu_has().
* These will statically patch the target code for additional
@@ -195,6 +209,7 @@ t_no:
boot_cpu_has(bit) : \
_static_cpu_has(bit) \
)
+#endif
#define cpu_has_bug(c, bit) cpu_has(c, (bit))
#define set_cpu_bug(c, bit) set_cpu_cap(c, (bit))
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index d554c11e01ff..fb00a2fca990 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -198,7 +198,6 @@
#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */
#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */
#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
-
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
@@ -207,13 +206,19 @@
#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
#define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */
-
+#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
+#define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */
#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */
#define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */
-
#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */
+#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */
+#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* "" AMD SSBD implementation via LS_CFG MSR */
+#define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */
+#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */
+#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */
/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
@@ -274,9 +279,10 @@
#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */
#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */
-#define X86_FEATURE_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */
-#define X86_FEATURE_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */
-#define X86_FEATURE_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */
+#define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */
+#define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
@@ -320,6 +326,7 @@
#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */
#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */
+#define X86_FEATURE_CLDEMOTE (16*32+25) /* CLDEMOTE instruction */
/* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */
#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */
@@ -333,6 +340,7 @@
#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
+#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */
/*
* BUG word(s)
@@ -362,5 +370,6 @@
#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
+#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 89ce4bfd241f..ce4d176b3d13 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -30,10 +30,7 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
return dma_ops;
}
-int arch_dma_supported(struct device *dev, u64 mask);
-#define arch_dma_supported arch_dma_supported
-
-bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp);
+bool arch_dma_alloc_attrs(struct device **dev);
#define arch_dma_alloc_attrs arch_dma_alloc_attrs
#endif
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 09ad88572746..c18ed65287d5 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -46,10 +46,24 @@ int ftrace_int3_handler(struct pt_regs *regs);
#endif /* CONFIG_FUNCTION_TRACER */
-#if !defined(__ASSEMBLY__) && !defined(COMPILE_OFFSETS)
+#ifndef __ASSEMBLY__
+
+#define ARCH_HAS_SYSCALL_MATCH_SYM_NAME
+static inline bool arch_syscall_match_sym_name(const char *sym, const char *name)
+{
+ /*
+ * Compare the symbol name with the system call name. Skip the
+ * "__x64_sys", "__ia32_sys" or simple "sys" prefix.
+ */
+ return !strcmp(sym + 3, name + 3) ||
+ (!strncmp(sym, "__x64_", 6) && !strcmp(sym + 9, name + 3)) ||
+ (!strncmp(sym, "__ia32_", 7) && !strcmp(sym + 10, name + 3));
+}
+
+#ifndef COMPILE_OFFSETS
#if defined(CONFIG_FTRACE_SYSCALLS) && defined(CONFIG_IA32_EMULATION)
-#include <asm/compat.h>
+#include <linux/compat.h>
/*
* Because ia32 syscalls do not map to x86_64 syscall numbers
@@ -67,6 +81,7 @@ static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs)
return false;
}
#endif /* CONFIG_FTRACE_SYSCALLS && CONFIG_IA32_EMULATION */
-#endif /* !__ASSEMBLY__ && !COMPILE_OFFSETS */
+#endif /* !COMPILE_OFFSETS */
+#endif /* !__ASSEMBLY__ */
#endif /* _ASM_X86_FTRACE_H */
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 5ea2afd4c871..740a428acf1e 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -50,14 +50,6 @@ DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
#define inc_irq_stat(member) this_cpu_inc(irq_stat.member)
-#define local_softirq_pending() this_cpu_read(irq_stat.__softirq_pending)
-
-#define __ARCH_SET_SOFTIRQ_PENDING
-
-#define set_softirq_pending(x) \
- this_cpu_write(irq_stat.__softirq_pending, (x))
-#define or_softirq_pending(x) this_cpu_or(irq_stat.__softirq_pending, (x))
-
extern void ack_bad_irq(unsigned int irq);
extern u64 arch_irq_stat_cpu(unsigned int cpu);
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 416cb0e0c496..3bfa92c2793c 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -164,6 +164,11 @@
*/
#define HV_X64_DEPRECATING_AEOI_RECOMMENDED (1 << 9)
+/*
+ * Recommend using cluster IPI hypercalls.
+ */
+#define HV_X64_CLUSTER_IPI_RECOMMENDED (1 << 10)
+
/* Recommend using the newer ExProcessorMasks interface */
#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED (1 << 11)
@@ -329,12 +334,17 @@ struct hv_tsc_emulation_status {
#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK \
(~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1))
+#define HV_IPI_LOW_VECTOR 0x10
+#define HV_IPI_HIGH_VECTOR 0xff
+
/* Declare the various hypercall operations. */
#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE 0x0002
#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST 0x0003
#define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008
+#define HVCALL_SEND_IPI 0x000b
#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013
#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014
+#define HVCALL_SEND_IPI_EX 0x0015
#define HVCALL_POST_MESSAGE 0x005c
#define HVCALL_SIGNAL_EVENT 0x005d
@@ -360,7 +370,7 @@ struct hv_tsc_emulation_status {
#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3)
enum HV_GENERIC_SET_FORMAT {
- HV_GENERIC_SET_SPARCE_4K,
+ HV_GENERIC_SET_SPARSE_4K,
HV_GENERIC_SET_ALL,
};
@@ -706,4 +716,22 @@ struct hv_enlightened_vmcs {
#define HV_STIMER_AUTOENABLE (1ULL << 3)
#define HV_STIMER_SINT(config) (__u8)(((config) >> 16) & 0x0F)
+struct ipi_arg_non_ex {
+ u32 vector;
+ u32 reserved;
+ u64 cpu_mask;
+};
+
+struct hv_vpset {
+ u64 format;
+ u64 valid_bank_mask;
+ u64 bank_contents[];
+};
+
+struct ipi_arg_ex {
+ u32 vector;
+ u32 reserved;
+ struct hv_vpset vp_set;
+};
+
#endif
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index b3e32b010ab1..c2c01f84df75 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -208,4 +208,22 @@ static inline int insn_offset_immediate(struct insn *insn)
return insn_offset_displacement(insn) + insn->displacement.nbytes;
}
+#define POP_SS_OPCODE 0x1f
+#define MOV_SREG_OPCODE 0x8e
+
+/*
+ * Intel SDM Vol.3A 6.8.3 states;
+ * "Any single-step trap that would be delivered following the MOV to SS
+ * instruction or POP to SS instruction (because EFLAGS.TF is 1) is
+ * suppressed."
+ * This function returns true if @insn is MOV SS or POP SS. On these
+ * instructions, single stepping is suppressed.
+ */
+static inline int insn_masking_exception(struct insn *insn)
+{
+ return insn->opcode.bytes[0] == POP_SS_OPCODE ||
+ (insn->opcode.bytes[0] == MOV_SREG_OPCODE &&
+ X86_MODRM_REG(insn->modrm.bytes[0]) == 2);
+}
+
#endif /* _ASM_X86_INSN_H */
diff --git a/arch/x86/include/asm/intel_mid_vrtc.h b/arch/x86/include/asm/intel_mid_vrtc.h
index 35555016b1be..0b44b1abe4d9 100644
--- a/arch/x86/include/asm/intel_mid_vrtc.h
+++ b/arch/x86/include/asm/intel_mid_vrtc.h
@@ -4,7 +4,7 @@
extern unsigned char vrtc_cmos_read(unsigned char reg);
extern void vrtc_cmos_write(unsigned char val, unsigned char reg);
-extern void vrtc_get_time(struct timespec *now);
-extern int vrtc_set_mmss(const struct timespec *now);
+extern void vrtc_get_time(struct timespec64 *now);
+extern int vrtc_set_mmss(const struct timespec64 *now);
#endif
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index f6e5b9375d8c..6de64840dd22 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -94,10 +94,10 @@ build_mmio_write(__writel, "l", unsigned int, "r", )
#ifdef CONFIG_X86_64
-build_mmio_read(readq, "q", unsigned long, "=r", :"memory")
-build_mmio_read(__readq, "q", unsigned long, "=r", )
-build_mmio_write(writeq, "q", unsigned long, "r", :"memory")
-build_mmio_write(__writeq, "q", unsigned long, "r", )
+build_mmio_read(readq, "q", u64, "=r", :"memory")
+build_mmio_read(__readq, "q", u64, "=r", )
+build_mmio_write(writeq, "q", u64, "r", :"memory")
+build_mmio_write(__writeq, "q", u64, "r", )
#define readq_relaxed(a) __readq(a)
#define writeq_relaxed(v, a) __writeq(v, a)
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 404c5fdff859..548d90bbf919 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -34,11 +34,6 @@
* (0x80 is the syscall vector, 0x30-0x3f are for ISA)
*/
#define FIRST_EXTERNAL_VECTOR 0x20
-/*
- * We start allocating at 0x21 to spread out vectors evenly between
- * priority levels. (0x80 is the syscall vector)
- */
-#define VECTOR_OFFSET_START 1
/*
* Reserve the lowest usable vector (and hence lowest priority) 0x20 for
@@ -119,8 +114,6 @@
#define FIRST_SYSTEM_VECTOR NR_VECTORS
#endif
-#define FPU_IRQ 13
-
/*
* Size the maximum number of interrupts.
*
diff --git a/arch/x86/include/asm/jailhouse_para.h b/arch/x86/include/asm/jailhouse_para.h
index b885a961a150..a34897aef2c2 100644
--- a/arch/x86/include/asm/jailhouse_para.h
+++ b/arch/x86/include/asm/jailhouse_para.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL2.0 */
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Jailhouse paravirt detection
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 949c977bc4c9..f4b2588865e9 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -924,7 +924,7 @@ struct kvm_x86_ops {
int (*hardware_setup)(void); /* __init */
void (*hardware_unsetup)(void); /* __exit */
bool (*cpu_has_accelerated_tpr)(void);
- bool (*cpu_has_high_real_mode_segbase)(void);
+ bool (*has_emulated_msr)(int index);
void (*cpuid_update)(struct kvm_vcpu *vcpu);
struct kvm *(*vm_alloc)(void);
@@ -1013,6 +1013,7 @@ struct kvm_x86_ops {
bool (*has_wbinvd_exit)(void);
+ u64 (*read_l1_tsc_offset)(struct kvm_vcpu *vcpu);
void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2);
diff --git a/arch/x86/include/asm/mc146818rtc.h b/arch/x86/include/asm/mc146818rtc.h
index 1775a32f7ea6..97198001e567 100644
--- a/arch/x86/include/asm/mc146818rtc.h
+++ b/arch/x86/include/asm/mc146818rtc.h
@@ -95,8 +95,8 @@ static inline unsigned char current_lock_cmos_reg(void)
unsigned char rtc_cmos_read(unsigned char addr);
void rtc_cmos_write(unsigned char val, unsigned char addr);
-extern int mach_set_rtc_mmss(const struct timespec *now);
-extern void mach_get_cmos_time(struct timespec *now);
+extern int mach_set_rtc_mmss(const struct timespec64 *now);
+extern void mach_get_cmos_time(struct timespec64 *now);
#define RTC_IRQ 8
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 57e3785d0d26..bbc796eb0a3b 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -193,7 +193,7 @@ static inline int init_new_context(struct task_struct *tsk,
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
- /* pkey 0 is the default and always allocated */
+ /* pkey 0 is the default and allocated implicitly */
mm->context.pkey_allocation_map = 0x1;
/* -1 means unallocated or invalid */
mm->context.execute_only_pkey = -1;
@@ -288,21 +288,6 @@ static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
mpx_notify_unmap(mm, vma, start, end);
}
-#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
-static inline int vma_pkey(struct vm_area_struct *vma)
-{
- unsigned long vma_pkey_mask = VM_PKEY_BIT0 | VM_PKEY_BIT1 |
- VM_PKEY_BIT2 | VM_PKEY_BIT3;
-
- return (vma->vm_flags & vma_pkey_mask) >> VM_PKEY_SHIFT;
-}
-#else
-static inline int vma_pkey(struct vm_area_struct *vma)
-{
- return 0;
-}
-#endif
-
/*
* We only want to enforce protection keys on the current process
* because we effectively have no access to PKRU for other
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index b90e79610cf7..997192131b7b 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -122,6 +122,7 @@ static inline void hv_disable_stimer0_percpu_irq(int irq) {}
#if IS_ENABLED(CONFIG_HYPERV)
extern struct clocksource *hyperv_cs;
extern void *hv_hypercall_pg;
+extern void __percpu **hyperv_pcpu_input_arg;
static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
{
@@ -258,9 +259,41 @@ static inline int hv_cpu_number_to_vp_number(int cpu_number)
return hv_vp_index[cpu_number];
}
-void hyperv_init(void);
+static inline int cpumask_to_vpset(struct hv_vpset *vpset,
+ const struct cpumask *cpus)
+{
+ int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1;
+
+ /* valid_bank_mask can represent up to 64 banks */
+ if (hv_max_vp_index / 64 >= 64)
+ return 0;
+
+ /*
+ * Clear all banks up to the maximum possible bank as hv_flush_pcpu_ex
+ * structs are not cleared between calls, we risk flushing unneeded
+ * vCPUs otherwise.
+ */
+ for (vcpu_bank = 0; vcpu_bank <= hv_max_vp_index / 64; vcpu_bank++)
+ vpset->bank_contents[vcpu_bank] = 0;
+
+ /*
+ * Some banks may end up being empty but this is acceptable.
+ */
+ for_each_cpu(cpu, cpus) {
+ vcpu = hv_cpu_number_to_vp_number(cpu);
+ vcpu_bank = vcpu / 64;
+ vcpu_offset = vcpu % 64;
+ __set_bit(vcpu_offset, (unsigned long *)
+ &vpset->bank_contents[vcpu_bank]);
+ if (vcpu_bank >= nr_bank)
+ nr_bank = vcpu_bank + 1;
+ }
+ vpset->valid_bank_mask = GENMASK_ULL(nr_bank - 1, 0);
+ return nr_bank;
+}
+
+void __init hyperv_init(void);
void hyperv_setup_mmu_ops(void);
-void hyper_alloc_mmu(void);
void hyperv_report_panic(struct pt_regs *regs, long err);
bool hv_is_hyperv_initialized(void);
void hyperv_cleanup(void);
@@ -269,6 +302,13 @@ void hyperv_reenlightenment_intr(struct pt_regs *regs);
void set_hv_tscchange_cb(void (*cb)(void));
void clear_hv_tscchange_cb(void);
void hyperv_stop_tsc_emulation(void);
+
+#ifdef CONFIG_X86_64
+void hv_apic_init(void);
+#else
+static inline void hv_apic_init(void) {}
+#endif
+
#else /* CONFIG_HYPERV */
static inline void hyperv_init(void) {}
static inline bool hv_is_hyperv_initialized(void) { return false; }
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 53d5b1b9255e..68b2c3150de1 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -42,6 +42,8 @@
#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */
#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */
#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */
+#define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */
+#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */
@@ -60,14 +62,19 @@
#define NHM_C3_AUTO_DEMOTE (1UL << 25)
#define NHM_C1_AUTO_DEMOTE (1UL << 26)
#define ATM_LNC_C6_AUTO_DEMOTE (1UL << 25)
-#define SNB_C1_AUTO_UNDEMOTE (1UL << 27)
-#define SNB_C3_AUTO_UNDEMOTE (1UL << 28)
+#define SNB_C3_AUTO_UNDEMOTE (1UL << 27)
+#define SNB_C1_AUTO_UNDEMOTE (1UL << 28)
#define MSR_MTRRcap 0x000000fe
#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a
#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */
#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */
+#define ARCH_CAP_SSB_NO (1 << 4) /*
+ * Not susceptible to Speculative Store Bypass
+ * attack, so no Speculative Store Bypass
+ * control required.
+ */
#define MSR_IA32_BBL_CR_CTL 0x00000119
#define MSR_IA32_BBL_CR_CTL3 0x0000011e
@@ -340,6 +347,8 @@
#define MSR_AMD64_SEV_ENABLED_BIT 0
#define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT)
+#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f
+
/* Fam 17h MSRs */
#define MSR_F17H_IRPERF 0xc00000e9
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index f928ad9b143f..f6f6c63da62f 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -217,6 +217,14 @@ enum spectre_v2_mitigation {
SPECTRE_V2_IBRS,
};
+/* The Speculative Store Bypass disable variants */
+enum ssb_mitigation {
+ SPEC_STORE_BYPASS_NONE,
+ SPEC_STORE_BYPASS_DISABLE,
+ SPEC_STORE_BYPASS_PRCTL,
+ SPEC_STORE_BYPASS_SECCOMP,
+};
+
extern char __indirect_thunk_start[];
extern char __indirect_thunk_end[];
@@ -241,22 +249,27 @@ static inline void vmexit_fill_RSB(void)
#endif
}
-#define alternative_msr_write(_msr, _val, _feature) \
- asm volatile(ALTERNATIVE("", \
- "movl %[msr], %%ecx\n\t" \
- "movl %[val], %%eax\n\t" \
- "movl $0, %%edx\n\t" \
- "wrmsr", \
- _feature) \
- : : [msr] "i" (_msr), [val] "i" (_val) \
- : "eax", "ecx", "edx", "memory")
+static __always_inline
+void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature)
+{
+ asm volatile(ALTERNATIVE("", "wrmsr", %c[feature])
+ : : "c" (msr),
+ "a" ((u32)val),
+ "d" ((u32)(val >> 32)),
+ [feature] "i" (feature)
+ : "memory");
+}
static inline void indirect_branch_prediction_barrier(void)
{
- alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB,
- X86_FEATURE_USE_IBPB);
+ u64 val = PRED_CMD_IBPB;
+
+ alternative_msr_write(MSR_IA32_PRED_CMD, val, X86_FEATURE_USE_IBPB);
}
+/* The Intel SPEC CTRL MSR base value cache */
+extern u64 x86_spec_ctrl_base;
+
/*
* With retpoline, we must use IBRS to restrict branch prediction
* before calling into firmware.
@@ -265,14 +278,18 @@ static inline void indirect_branch_prediction_barrier(void)
*/
#define firmware_restrict_branch_speculation_start() \
do { \
+ u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS; \
+ \
preempt_disable(); \
- alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS, \
+ alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \
X86_FEATURE_USE_IBRS_FW); \
} while (0)
#define firmware_restrict_branch_speculation_end() \
do { \
- alternative_msr_write(MSR_IA32_SPEC_CTRL, 0, \
+ u64 val = x86_spec_ctrl_base; \
+ \
+ alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \
X86_FEATURE_USE_IBRS_FW); \
preempt_enable(); \
} while (0)
@@ -291,16 +308,20 @@ do { \
* lfence
* jmp spec_trap
* do_rop:
- * mov %rax,(%rsp)
+ * mov %rax,(%rsp) for x86_64
+ * mov %edx,(%esp) for x86_32
* retq
*
* Without retpolines configured:
*
- * jmp *%rax
+ * jmp *%rax for x86_64
+ * jmp *%edx for x86_32
*/
#ifdef CONFIG_RETPOLINE
-# define RETPOLINE_RAX_BPF_JIT_SIZE 17
-# define RETPOLINE_RAX_BPF_JIT() \
+# ifdef CONFIG_X86_64
+# define RETPOLINE_RAX_BPF_JIT_SIZE 17
+# define RETPOLINE_RAX_BPF_JIT() \
+do { \
EMIT1_off32(0xE8, 7); /* callq do_rop */ \
/* spec_trap: */ \
EMIT2(0xF3, 0x90); /* pause */ \
@@ -308,11 +329,30 @@ do { \
EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \
/* do_rop: */ \
EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */ \
- EMIT1(0xC3); /* retq */
-#else
-# define RETPOLINE_RAX_BPF_JIT_SIZE 2
-# define RETPOLINE_RAX_BPF_JIT() \
- EMIT2(0xFF, 0xE0); /* jmp *%rax */
+ EMIT1(0xC3); /* retq */ \
+} while (0)
+# else /* !CONFIG_X86_64 */
+# define RETPOLINE_EDX_BPF_JIT() \
+do { \
+ EMIT1_off32(0xE8, 7); /* call do_rop */ \
+ /* spec_trap: */ \
+ EMIT2(0xF3, 0x90); /* pause */ \
+ EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \
+ EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \
+ /* do_rop: */ \
+ EMIT3(0x89, 0x14, 0x24); /* mov %edx,(%esp) */ \
+ EMIT1(0xC3); /* ret */ \
+} while (0)
+# endif
+#else /* !CONFIG_RETPOLINE */
+# ifdef CONFIG_X86_64
+# define RETPOLINE_RAX_BPF_JIT_SIZE 2
+# define RETPOLINE_RAX_BPF_JIT() \
+ EMIT2(0xFF, 0xE0); /* jmp *%rax */
+# else /* !CONFIG_X86_64 */
+# define RETPOLINE_EDX_BPF_JIT() \
+ EMIT2(0xFF, 0xE2) /* jmp *%edx */
+# endif
#endif
#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 2c5a966dc222..6afac386a434 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -53,7 +53,7 @@
#define __PHYSICAL_MASK_SHIFT 52
#ifdef CONFIG_X86_5LEVEL
-#define __VIRTUAL_MASK_SHIFT (pgtable_l5_enabled ? 56 : 47)
+#define __VIRTUAL_MASK_SHIFT (pgtable_l5_enabled() ? 56 : 47)
#else
#define __VIRTUAL_MASK_SHIFT 47
#endif
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 9be2bf13825b..d49bbf4bb5c8 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -574,14 +574,14 @@ static inline void __set_pgd(pgd_t *pgdp, pgd_t pgd)
}
#define set_pgd(pgdp, pgdval) do { \
- if (pgtable_l5_enabled) \
+ if (pgtable_l5_enabled()) \
__set_pgd(pgdp, pgdval); \
else \
set_p4d((p4d_t *)(pgdp), (p4d_t) { (pgdval).pgd }); \
} while (0)
#define pgd_clear(pgdp) do { \
- if (pgtable_l5_enabled) \
+ if (pgtable_l5_enabled()) \
set_pgd(pgdp, __pgd(0)); \
} while (0)
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index d32175e30259..662963681ea6 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -117,9 +117,6 @@ void native_restore_msi_irqs(struct pci_dev *dev);
#define native_setup_msi_irqs NULL
#define native_teardown_msi_irq NULL
#endif
-
-#define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys)
-
#endif /* __KERNEL__ */
#ifdef CONFIG_X86_64
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index 263c142a6a6c..ada6410fd2ec 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -167,7 +167,7 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
#if CONFIG_PGTABLE_LEVELS > 4
static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d)
{
- if (!pgtable_l5_enabled)
+ if (!pgtable_l5_enabled())
return;
paravirt_alloc_p4d(mm, __pa(p4d) >> PAGE_SHIFT);
set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(p4d)));
@@ -193,7 +193,7 @@ extern void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d);
static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
unsigned long address)
{
- if (pgtable_l5_enabled)
+ if (pgtable_l5_enabled())
___p4d_free_tlb(tlb, p4d);
}
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 5f49b4ff0c24..99ecde23c3ec 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -65,7 +65,7 @@ extern pmdval_t early_pmd_flags;
#ifndef __PAGETABLE_P4D_FOLDED
#define set_pgd(pgdp, pgd) native_set_pgd(pgdp, pgd)
-#define pgd_clear(pgd) (pgtable_l5_enabled ? native_pgd_clear(pgd) : 0)
+#define pgd_clear(pgd) (pgtable_l5_enabled() ? native_pgd_clear(pgd) : 0)
#endif
#ifndef set_p4d
@@ -601,6 +601,11 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
#define canon_pgprot(p) __pgprot(massage_pgprot(p))
+static inline pgprot_t arch_filter_pgprot(pgprot_t prot)
+{
+ return canon_pgprot(prot);
+}
+
static inline int is_new_memtype_allowed(u64 paddr, unsigned long size,
enum page_cache_mode pcm,
enum page_cache_mode new_pcm)
@@ -876,7 +881,7 @@ static inline unsigned long p4d_index(unsigned long address)
#if CONFIG_PGTABLE_LEVELS > 4
static inline int pgd_present(pgd_t pgd)
{
- if (!pgtable_l5_enabled)
+ if (!pgtable_l5_enabled())
return 1;
return pgd_flags(pgd) & _PAGE_PRESENT;
}
@@ -893,9 +898,9 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd)
#define pgd_page(pgd) pfn_to_page(pgd_pfn(pgd))
/* to find an entry in a page-table-directory. */
-static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
+static __always_inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
{
- if (!pgtable_l5_enabled)
+ if (!pgtable_l5_enabled())
return (p4d_t *)pgd;
return (p4d_t *)pgd_page_vaddr(*pgd) + p4d_index(address);
}
@@ -904,7 +909,7 @@ static inline int pgd_bad(pgd_t pgd)
{
unsigned long ignore_flags = _PAGE_USER;
- if (!pgtable_l5_enabled)
+ if (!pgtable_l5_enabled())
return 0;
if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
@@ -915,7 +920,7 @@ static inline int pgd_bad(pgd_t pgd)
static inline int pgd_none(pgd_t pgd)
{
- if (!pgtable_l5_enabled)
+ if (!pgtable_l5_enabled())
return 0;
/*
* There is no need to do a workaround for the KNL stray
diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h
index e3225e83db7d..d9a001a4a872 100644
--- a/arch/x86/include/asm/pgtable_32_types.h
+++ b/arch/x86/include/asm/pgtable_32_types.h
@@ -15,7 +15,7 @@
# include <asm/pgtable-2level_types.h>
#endif
-#define pgtable_l5_enabled 0
+#define pgtable_l5_enabled() 0
#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 877bc27718ae..3c5385f9a88f 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -220,7 +220,7 @@ static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
{
pgd_t pgd;
- if (pgtable_l5_enabled || !IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) {
+ if (pgtable_l5_enabled() || !IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) {
*p4dp = p4d;
return;
}
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index d5c21a382475..054765ab2da2 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -22,12 +22,23 @@ typedef struct { pteval_t pte; } pte_t;
#ifdef CONFIG_X86_5LEVEL
extern unsigned int __pgtable_l5_enabled;
-#ifndef pgtable_l5_enabled
-#define pgtable_l5_enabled cpu_feature_enabled(X86_FEATURE_LA57)
-#endif
+
+#ifdef USE_EARLY_PGTABLE_L5
+/*
+ * cpu_feature_enabled() is not available in early boot code.
+ * Use variable instead.
+ */
+static inline bool pgtable_l5_enabled(void)
+{
+ return __pgtable_l5_enabled;
+}
#else
-#define pgtable_l5_enabled 0
-#endif
+#define pgtable_l5_enabled() cpu_feature_enabled(X86_FEATURE_LA57)
+#endif /* USE_EARLY_PGTABLE_L5 */
+
+#else
+#define pgtable_l5_enabled() 0
+#endif /* CONFIG_X86_5LEVEL */
extern unsigned int pgdir_shift;
extern unsigned int ptrs_per_p4d;
@@ -102,21 +113,21 @@ extern unsigned int ptrs_per_p4d;
#define LDT_PGD_ENTRY_L4 -3UL
#define LDT_PGD_ENTRY_L5 -112UL
-#define LDT_PGD_ENTRY (pgtable_l5_enabled ? LDT_PGD_ENTRY_L5 : LDT_PGD_ENTRY_L4)
+#define LDT_PGD_ENTRY (pgtable_l5_enabled() ? LDT_PGD_ENTRY_L5 : LDT_PGD_ENTRY_L4)
#define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)
-#define __VMALLOC_BASE_L4 0xffffc90000000000
-#define __VMALLOC_BASE_L5 0xffa0000000000000
+#define __VMALLOC_BASE_L4 0xffffc90000000000UL
+#define __VMALLOC_BASE_L5 0xffa0000000000000UL
#define VMALLOC_SIZE_TB_L4 32UL
#define VMALLOC_SIZE_TB_L5 12800UL
-#define __VMEMMAP_BASE_L4 0xffffea0000000000
-#define __VMEMMAP_BASE_L5 0xffd4000000000000
+#define __VMEMMAP_BASE_L4 0xffffea0000000000UL
+#define __VMEMMAP_BASE_L5 0xffd4000000000000UL
#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
# define VMALLOC_START vmalloc_base
-# define VMALLOC_SIZE_TB (pgtable_l5_enabled ? VMALLOC_SIZE_TB_L5 : VMALLOC_SIZE_TB_L4)
+# define VMALLOC_SIZE_TB (pgtable_l5_enabled() ? VMALLOC_SIZE_TB_L5 : VMALLOC_SIZE_TB_L4)
# define VMEMMAP_START vmemmap_base
#else
# define VMALLOC_START __VMALLOC_BASE_L4
diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h
index a0ba1ffda0df..19b137f1b3be 100644
--- a/arch/x86/include/asm/pkeys.h
+++ b/arch/x86/include/asm/pkeys.h
@@ -2,11 +2,18 @@
#ifndef _ASM_X86_PKEYS_H
#define _ASM_X86_PKEYS_H
+#define ARCH_DEFAULT_PKEY 0
+
#define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? 16 : 1)
extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
unsigned long init_val);
+static inline bool arch_pkeys_enabled(void)
+{
+ return boot_cpu_has(X86_FEATURE_OSPKE);
+}
+
/*
* Try to dedicate one of the protection keys to be used as an
* execute-only protection key.
@@ -15,7 +22,7 @@ extern int __execute_only_pkey(struct mm_struct *mm);
static inline int execute_only_pkey(struct mm_struct *mm)
{
if (!boot_cpu_has(X86_FEATURE_OSPKE))
- return 0;
+ return ARCH_DEFAULT_PKEY;
return __execute_only_pkey(mm);
}
@@ -49,13 +56,21 @@ bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey)
{
/*
* "Allocated" pkeys are those that have been returned
- * from pkey_alloc(). pkey 0 is special, and never
- * returned from pkey_alloc().
+ * from pkey_alloc() or pkey 0 which is allocated
+ * implicitly when the mm is created.
*/
- if (pkey <= 0)
+ if (pkey < 0)
return false;
if (pkey >= arch_max_pkey())
return false;
+ /*
+ * The exec-only pkey is set in the allocation map, but
+ * is not available to any of the user interfaces like
+ * mprotect_pkey().
+ */
+ if (pkey == mm->context.execute_only_pkey)
+ return false;
+
return mm_pkey_allocation_map(mm) & (1U << pkey);
}
@@ -106,4 +121,12 @@ extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
unsigned long init_val);
extern void copy_init_pkru_to_fpregs(void);
+static inline int vma_pkey(struct vm_area_struct *vma)
+{
+ unsigned long vma_pkey_mask = VM_PKEY_BIT0 | VM_PKEY_BIT1 |
+ VM_PKEY_BIT2 | VM_PKEY_BIT3;
+
+ return (vma->vm_flags & vma_pkey_mask) >> VM_PKEY_SHIFT;
+}
+
#endif /*_ASM_X86_PKEYS_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 4fa4206029e3..e28add6b791f 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -186,15 +186,6 @@ extern void identify_boot_cpu(void);
extern void identify_secondary_cpu(struct cpuinfo_x86 *);
extern void print_cpu_info(struct cpuinfo_x86 *);
void print_cpu_msr(struct cpuinfo_x86 *);
-extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
-extern u32 get_scattered_cpuid_leaf(unsigned int level,
- unsigned int sub_leaf,
- enum cpuid_regs_idx reg);
-extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
-extern void init_amd_cacheinfo(struct cpuinfo_x86 *c);
-
-extern void detect_extended_topology(struct cpuinfo_x86 *c);
-extern void detect_ht(struct cpuinfo_x86 *c);
#ifdef CONFIG_X86_32
extern int have_cpuid_p(void);
@@ -749,13 +740,11 @@ enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT,
extern void enable_sep_cpu(void);
extern int sysenter_setup(void);
-extern void early_trap_init(void);
void early_trap_pf_init(void);
/* Defined in head.S */
extern struct desc_ptr early_gdt_descr;
-extern void cpu_set_gdt(int);
extern void switch_to_new_gdt(int);
extern void load_direct_gdt(int);
extern void load_fixmap_gdt(int);
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index a7471dcd2205..b6033680d458 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -12,7 +12,7 @@ void pvclock_set_flags(u8 flags);
unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src);
void pvclock_read_wallclock(struct pvclock_wall_clock *wall,
struct pvclock_vcpu_time_info *vcpu,
- struct timespec *ts);
+ struct timespec64 *ts);
void pvclock_resume(void);
void pvclock_touch_watchdogs(void);
diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h
index 5e16b5d40d32..3e70bed8a978 100644
--- a/arch/x86/include/asm/qspinlock.h
+++ b/arch/x86/include/asm/qspinlock.h
@@ -7,6 +7,14 @@
#include <asm-generic/qspinlock_types.h>
#include <asm/paravirt.h>
+#define _Q_PENDING_LOOPS (1 << 9)
+
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
+extern void __pv_init_lock_hash(void);
+extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
+extern void __raw_callee_save___pv_queued_spin_unlock(struct qspinlock *lock);
+
#define queued_spin_unlock queued_spin_unlock
/**
* queued_spin_unlock - release a queued spinlock
@@ -16,15 +24,9 @@
*/
static inline void native_queued_spin_unlock(struct qspinlock *lock)
{
- smp_store_release((u8 *)lock, 0);
+ smp_store_release(&lock->locked, 0);
}
-#ifdef CONFIG_PARAVIRT_SPINLOCKS
-extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
-extern void __pv_init_lock_hash(void);
-extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
-extern void __raw_callee_save___pv_queued_spin_unlock(struct qspinlock *lock);
-
static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
{
pv_queued_spin_lock_slowpath(lock, val);
@@ -40,11 +42,6 @@ static inline bool vcpu_is_preempted(long cpu)
{
return pv_vcpu_is_preempted(cpu);
}
-#else
-static inline void queued_spin_unlock(struct qspinlock *lock)
-{
- native_queued_spin_unlock(lock);
-}
#endif
#ifdef CONFIG_PARAVIRT
diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h
index 923307ea11c7..9ef5ee03d2d7 100644
--- a/arch/x86/include/asm/qspinlock_paravirt.h
+++ b/arch/x86/include/asm/qspinlock_paravirt.h
@@ -22,8 +22,7 @@ PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath);
*
* void __pv_queued_spin_unlock(struct qspinlock *lock)
* {
- * struct __qspinlock *l = (void *)lock;
- * u8 lockval = cmpxchg(&l->locked, _Q_LOCKED_VAL, 0);
+ * u8 lockval = cmpxchg(&lock->locked, _Q_LOCKED_VAL, 0);
*
* if (likely(lockval == _Q_LOCKED_VAL))
* return;
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index f75bff8f9d82..547c4fe50711 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -171,7 +171,6 @@ static inline int wbinvd_on_all_cpus(void)
wbinvd();
return 0;
}
-#define smp_num_siblings 1
#endif /* CONFIG_SMP */
extern unsigned disabled_cpus;
diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h
index 4617a2bf123c..199218719a86 100644
--- a/arch/x86/include/asm/sparsemem.h
+++ b/arch/x86/include/asm/sparsemem.h
@@ -27,8 +27,8 @@
# endif
#else /* CONFIG_X86_32 */
# define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */
-# define MAX_PHYSADDR_BITS (pgtable_l5_enabled ? 52 : 44)
-# define MAX_PHYSMEM_BITS (pgtable_l5_enabled ? 52 : 46)
+# define MAX_PHYSADDR_BITS (pgtable_l5_enabled() ? 52 : 44)
+# define MAX_PHYSMEM_BITS (pgtable_l5_enabled() ? 52 : 46)
#endif
#endif /* CONFIG_SPARSEMEM */
diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h
new file mode 100644
index 000000000000..ae7c2c5cd7f0
--- /dev/null
+++ b/arch/x86/include/asm/spec-ctrl.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_SPECCTRL_H_
+#define _ASM_X86_SPECCTRL_H_
+
+#include <linux/thread_info.h>
+#include <asm/nospec-branch.h>
+
+/*
+ * On VMENTER we must preserve whatever view of the SPEC_CTRL MSR
+ * the guest has, while on VMEXIT we restore the host view. This
+ * would be easier if SPEC_CTRL were architecturally maskable or
+ * shadowable for guests but this is not (currently) the case.
+ * Takes the guest view of SPEC_CTRL MSR as a parameter and also
+ * the guest's version of VIRT_SPEC_CTRL, if emulated.
+ */
+extern void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool guest);
+
+/**
+ * x86_spec_ctrl_set_guest - Set speculation control registers for the guest
+ * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL
+ * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL
+ * (may get translated to MSR_AMD64_LS_CFG bits)
+ *
+ * Avoids writing to the MSR if the content/bits are the same
+ */
+static inline
+void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl)
+{
+ x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, true);
+}
+
+/**
+ * x86_spec_ctrl_restore_host - Restore host speculation control registers
+ * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL
+ * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL
+ * (may get translated to MSR_AMD64_LS_CFG bits)
+ *
+ * Avoids writing to the MSR if the content/bits are the same
+ */
+static inline
+void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl)
+{
+ x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, false);
+}
+
+/* AMD specific Speculative Store Bypass MSR data */
+extern u64 x86_amd_ls_cfg_base;
+extern u64 x86_amd_ls_cfg_ssbd_mask;
+
+static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn)
+{
+ BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT);
+ return (tifn & _TIF_SSBD) >> (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT);
+}
+
+static inline unsigned long ssbd_spec_ctrl_to_tif(u64 spec_ctrl)
+{
+ BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT);
+ return (spec_ctrl & SPEC_CTRL_SSBD) << (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT);
+}
+
+static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn)
+{
+ return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL;
+}
+
+#ifdef CONFIG_SMP
+extern void speculative_store_bypass_ht_init(void);
+#else
+static inline void speculative_store_bypass_ht_init(void) { }
+#endif
+
+extern void speculative_store_bypass_update(unsigned long tif);
+
+static inline void speculative_store_bypass_update_current(void)
+{
+ speculative_store_bypass_update(current_thread_info()->flags);
+}
+
+#endif
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index 133d9425fced..b6dc698f992a 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -111,4 +111,6 @@ static inline unsigned long caller_frame_pointer(void)
return (unsigned long)frame;
}
+void show_opcodes(u8 *rip, const char *loglvl);
+void show_ip(struct pt_regs *regs, const char *loglvl);
#endif /* _ASM_X86_STACKTRACE_H */
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index 533f74c300c2..d33f92b9fa22 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -116,7 +116,8 @@ int strcmp(const char *cs, const char *ct);
#endif
#define __HAVE_ARCH_MEMCPY_MCSAFE 1
-__must_check int memcpy_mcsafe_unrolled(void *dst, const void *src, size_t cnt);
+__must_check unsigned long __memcpy_mcsafe(void *dst, const void *src,
+ size_t cnt);
DECLARE_STATIC_KEY_FALSE(mcsafe_key);
/**
@@ -131,14 +132,15 @@ DECLARE_STATIC_KEY_FALSE(mcsafe_key);
* actually do machine check recovery. Everyone else can just
* use memcpy().
*
- * Return 0 for success, -EFAULT for fail
+ * Return 0 for success, or number of bytes not copied if there was an
+ * exception.
*/
-static __always_inline __must_check int
+static __always_inline __must_check unsigned long
memcpy_mcsafe(void *dst, const void *src, size_t cnt)
{
#ifdef CONFIG_X86_MCE
if (static_branch_unlikely(&mcsafe_key))
- return memcpy_mcsafe_unrolled(dst, src, cnt);
+ return __memcpy_mcsafe(dst, src, cnt);
else
#endif
memcpy(dst, src, cnt);
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index a5d9521bb2cb..2ff2a30a264f 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -79,6 +79,7 @@ struct thread_info {
#define TIF_SIGPENDING 2 /* signal pending */
#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/
+#define TIF_SSBD 5 /* Reduced data speculation */
#define TIF_SYSCALL_EMU 6 /* syscall emulation active */
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
#define TIF_SECCOMP 8 /* secure computing */
@@ -105,6 +106,7 @@ struct thread_info {
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
+#define _TIF_SSBD (1 << TIF_SSBD)
#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
@@ -144,7 +146,7 @@ struct thread_info {
/* flags to check in __switch_to() */
#define _TIF_WORK_CTXSW \
- (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP)
+ (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_SSBD)
#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 62546b3a398e..62acb613114b 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -47,6 +47,17 @@ copy_user_generic(void *to, const void *from, unsigned len)
}
static __always_inline __must_check unsigned long
+copy_to_user_mcsafe(void *to, const void *from, unsigned len)
+{
+ unsigned long ret;
+
+ __uaccess_begin();
+ ret = memcpy_mcsafe(to, from, len);
+ __uaccess_end();
+ return ret;
+}
+
+static __always_inline __must_check unsigned long
raw_copy_from_user(void *dst, const void __user *src, unsigned long size)
{
int ret = 0;
@@ -194,4 +205,7 @@ __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size)
unsigned long
copy_user_handle_tail(char *to, char *from, unsigned len);
+unsigned long
+mcsafe_handle_tail(char *to, char *from, unsigned len);
+
#endif /* _ASM_X86_UACCESS_64_H */
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index ce8b4da07e35..2d27236c16a3 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -170,7 +170,7 @@ struct x86_cpuinit_ops {
void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node);
};
-struct timespec;
+struct timespec64;
/**
* struct x86_legacy_devices - legacy x86 devices
@@ -264,8 +264,8 @@ struct x86_hyper_runtime {
struct x86_platform_ops {
unsigned long (*calibrate_cpu)(void);
unsigned long (*calibrate_tsc)(void);
- void (*get_wallclock)(struct timespec *ts);
- int (*set_wallclock)(const struct timespec *ts);
+ void (*get_wallclock)(struct timespec64 *ts);
+ int (*set_wallclock)(const struct timespec64 *ts);
void (*iommu_shutdown)(void);
bool (*is_untracked_pat_range)(u64 start, u64 end);
void (*nmi_init)(void);
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 4c851ebb3ceb..0ede697c3961 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -29,7 +29,7 @@
#define KVM_FEATURE_PV_TLB_FLUSH 9
#define KVM_FEATURE_ASYNC_PF_VMEXIT 10
-#define KVM_HINTS_DEDICATED 0
+#define KVM_HINTS_REALTIME 0
/* The last 8 bits are used to indicate how to interpret the flags field
* in pvclock structure. If no bits are set, all flags are ignored.
diff --git a/arch/x86/include/uapi/asm/msgbuf.h b/arch/x86/include/uapi/asm/msgbuf.h
index 809134c644a6..90ab9a795b49 100644
--- a/arch/x86/include/uapi/asm/msgbuf.h
+++ b/arch/x86/include/uapi/asm/msgbuf.h
@@ -1 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __ASM_X64_MSGBUF_H
+#define __ASM_X64_MSGBUF_H
+
+#if !defined(__x86_64__) || !defined(__ILP32__)
#include <asm-generic/msgbuf.h>
+#else
+/*
+ * The msqid64_ds structure for x86 architecture with x32 ABI.
+ *
+ * On x86-32 and x86-64 we can just use the generic definition, but
+ * x32 uses the same binary layout as x86_64, which is differnet
+ * from other 32-bit architectures.
+ */
+
+struct msqid64_ds {
+ struct ipc64_perm msg_perm;
+ __kernel_time_t msg_stime; /* last msgsnd time */
+ __kernel_time_t msg_rtime; /* last msgrcv time */
+ __kernel_time_t msg_ctime; /* last change time */
+ __kernel_ulong_t msg_cbytes; /* current number of bytes on queue */
+ __kernel_ulong_t msg_qnum; /* number of messages in queue */
+ __kernel_ulong_t msg_qbytes; /* max number of bytes on queue */
+ __kernel_pid_t msg_lspid; /* pid of last msgsnd */
+ __kernel_pid_t msg_lrpid; /* last receive pid */
+ __kernel_ulong_t __unused4;
+ __kernel_ulong_t __unused5;
+};
+
+#endif
+
+#endif /* __ASM_GENERIC_MSGBUF_H */
diff --git a/arch/x86/include/uapi/asm/sembuf.h b/arch/x86/include/uapi/asm/sembuf.h
index cabd7476bd6c..89de6cd9f0a7 100644
--- a/arch/x86/include/uapi/asm/sembuf.h
+++ b/arch/x86/include/uapi/asm/sembuf.h
@@ -8,15 +8,24 @@
* between kernel and user space.
*
* Pad space is left for:
- * - 64-bit time_t to solve y2038 problem
* - 2 miscellaneous 32-bit values
+ *
+ * x86_64 and x32 incorrectly added padding here, so the structures
+ * are still incompatible with the padding on x86.
*/
struct semid64_ds {
struct ipc64_perm sem_perm; /* permissions .. see ipc.h */
+#ifdef __i386__
+ unsigned long sem_otime; /* last semop time */
+ unsigned long sem_otime_high;
+ unsigned long sem_ctime; /* last change time */
+ unsigned long sem_ctime_high;
+#else
__kernel_time_t sem_otime; /* last semop time */
__kernel_ulong_t __unused1;
__kernel_time_t sem_ctime; /* last change time */
__kernel_ulong_t __unused2;
+#endif
__kernel_ulong_t sem_nsems; /* no. of semaphores in array */
__kernel_ulong_t __unused3;
__kernel_ulong_t __unused4;
diff --git a/arch/x86/include/uapi/asm/shmbuf.h b/arch/x86/include/uapi/asm/shmbuf.h
index 83c05fc2de38..644421f3823b 100644
--- a/arch/x86/include/uapi/asm/shmbuf.h
+++ b/arch/x86/include/uapi/asm/shmbuf.h
@@ -1 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __ASM_X86_SHMBUF_H
+#define __ASM_X86_SHMBUF_H
+
+#if !defined(__x86_64__) || !defined(__ILP32__)
#include <asm-generic/shmbuf.h>
+#else
+/*
+ * The shmid64_ds structure for x86 architecture with x32 ABI.
+ *
+ * On x86-32 and x86-64 we can just use the generic definition, but
+ * x32 uses the same binary layout as x86_64, which is differnet
+ * from other 32-bit architectures.
+ */
+
+struct shmid64_ds {
+ struct ipc64_perm shm_perm; /* operation perms */
+ size_t shm_segsz; /* size of segment (bytes) */
+ __kernel_time_t shm_atime; /* last attach time */
+ __kernel_time_t shm_dtime; /* last detach time */
+ __kernel_time_t shm_ctime; /* last change time */
+ __kernel_pid_t shm_cpid; /* pid of creator */
+ __kernel_pid_t shm_lpid; /* pid of last operator */
+ __kernel_ulong_t shm_nattch; /* no. of current attaches */
+ __kernel_ulong_t __unused4;
+ __kernel_ulong_t __unused5;
+};
+
+struct shminfo64 {
+ __kernel_ulong_t shmmax;
+ __kernel_ulong_t shmmin;
+ __kernel_ulong_t shmmni;
+ __kernel_ulong_t shmseg;
+ __kernel_ulong_t shmall;
+ __kernel_ulong_t __unused1;
+ __kernel_ulong_t __unused2;
+ __kernel_ulong_t __unused3;
+ __kernel_ulong_t __unused4;
+};
+
+#endif
+
+#endif /* __ASM_X86_SHMBUF_H */
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index dde444f932c1..3b20607d581b 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -215,6 +215,10 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
apic_id = processor->local_apic_id;
enabled = processor->lapic_flags & ACPI_MADT_ENABLED;
+ /* Ignore invalid ID */
+ if (apic_id == 0xffffffff)
+ return 0;
+
/*
* We need to register disabled CPU as well to permit
* counting disabled CPUs. This allows us to size
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index c88e0b127810..b481b95bd8f6 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -14,8 +14,11 @@
#include <asm/amd_nb.h>
#define PCI_DEVICE_ID_AMD_17H_ROOT 0x1450
+#define PCI_DEVICE_ID_AMD_17H_M10H_ROOT 0x15d0
#define PCI_DEVICE_ID_AMD_17H_DF_F3 0x1463
#define PCI_DEVICE_ID_AMD_17H_DF_F4 0x1464
+#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F3 0x15eb
+#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec
/* Protect the PCI config register pairs used for SMN and DF indirect access. */
static DEFINE_MUTEX(smn_mutex);
@@ -24,6 +27,7 @@ static u32 *flush_words;
static const struct pci_device_id amd_root_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_ROOT) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_ROOT) },
{}
};
@@ -39,6 +43,7 @@ const struct pci_device_id amd_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F3) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) },
{}
};
@@ -51,6 +56,7 @@ static const struct pci_device_id amd_nb_link_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F4) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) },
{}
};
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 8b04234e010b..7685444a106b 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -116,6 +116,7 @@ static void init_x2apic_ldr(void)
goto update;
}
cmsk = cluster_hotplug_mask;
+ cmsk->clusterid = cluster;
cluster_hotplug_mask = NULL;
update:
this_cpu_write(cluster_masks, cmsk);
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index dfcbe6924eaf..5d0de79fdab0 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -1715,19 +1715,6 @@ static int proc_apm_show(struct seq_file *m, void *v)
return 0;
}
-static int proc_apm_open(struct inode *inode, struct file *file)
-{
- return single_open(file, proc_apm_show, NULL);
-}
-
-static const struct file_operations apm_file_ops = {
- .owner = THIS_MODULE,
- .open = proc_apm_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int apm(void *unused)
{
unsigned short bx;
@@ -2360,7 +2347,7 @@ static int __init apm_init(void)
set_desc_base(&gdt[APM_DS >> 3],
(unsigned long)__va((unsigned long)apm_info.bios.dseg << 4));
- proc_create("apm", 0, NULL, &apm_file_ops);
+ proc_create_single("apm", 0, NULL, proc_apm_show);
kapmd_task = kthread_create(apm, NULL, "kapmd");
if (IS_ERR(kapmd_task)) {
@@ -2446,7 +2433,7 @@ MODULE_PARM_DESC(idle_threshold,
"System idle percentage above which to make APM BIOS idle calls");
module_param(idle_period, int, 0444);
MODULE_PARM_DESC(idle_period,
- "Period (in sec/100) over which to caculate the idle percentage");
+ "Period (in sec/100) over which to calculate the idle percentage");
module_param(smp, bool, 0444);
MODULE_PARM_DESC(smp,
"Set this to enable APM use on an SMP platform. Use with caution on older systems");
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index a66229f51b12..7a40196967cb 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -17,7 +17,7 @@ KCOV_INSTRUMENT_perf_event.o := n
nostackp := $(call cc-option, -fno-stack-protector)
CFLAGS_common.o := $(nostackp)
-obj-y := intel_cacheinfo.o scattered.o topology.o
+obj-y := cacheinfo.o scattered.o topology.o
obj-y += common.o
obj-y += rdrand.o
obj-y += match.o
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 12bc0a1139da..082d7875cef8 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -9,7 +9,9 @@
#include <linux/random.h>
#include <asm/processor.h>
#include <asm/apic.h>
+#include <asm/cacheinfo.h>
#include <asm/cpu.h>
+#include <asm/spec-ctrl.h>
#include <asm/smp.h>
#include <asm/pci-direct.h>
#include <asm/delay.h>
@@ -297,7 +299,6 @@ static int nearby_node(int apicid)
}
#endif
-#ifdef CONFIG_SMP
/*
* Fix up cpu_core_id for pre-F17h systems to be in the
* [0 .. cores_per_node - 1] range. Not really needed but
@@ -327,6 +328,7 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
/* get information required for multi-node processors */
if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
+ int err;
u32 eax, ebx, ecx, edx;
cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
@@ -345,21 +347,15 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
}
/*
- * We may have multiple LLCs if L3 caches exist, so check if we
- * have an L3 cache by looking at the L3 cache CPUID leaf.
+ * In case leaf B is available, use it to derive
+ * topology information.
*/
- if (cpuid_edx(0x80000006)) {
- if (c->x86 == 0x17) {
- /*
- * LLC is at the core complex level.
- * Core complex id is ApicId[3].
- */
- per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
- } else {
- /* LLC is at the node level. */
- per_cpu(cpu_llc_id, cpu) = node_id;
- }
- }
+ err = detect_extended_topology(c);
+ if (!err)
+ c->x86_coreid_bits = get_count_order(c->x86_max_cores);
+
+ cacheinfo_amd_init_llc_id(c, cpu, node_id);
+
} else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
u64 value;
@@ -375,7 +371,6 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
legacy_fixup_core_id(c);
}
}
-#endif
/*
* On a AMD dual core setup the lower bits of the APIC id distinguish the cores.
@@ -383,7 +378,6 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
*/
static void amd_detect_cmp(struct cpuinfo_x86 *c)
{
-#ifdef CONFIG_SMP
unsigned bits;
int cpu = smp_processor_id();
@@ -394,17 +388,11 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c)
c->phys_proc_id = c->initial_apicid >> bits;
/* use socket ID also for last level cache */
per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
- amd_get_topology(c);
-#endif
}
u16 amd_get_nb_id(int cpu)
{
- u16 id = 0;
-#ifdef CONFIG_SMP
- id = per_cpu(cpu_llc_id, cpu);
-#endif
- return id;
+ return per_cpu(cpu_llc_id, cpu);
}
EXPORT_SYMBOL_GPL(amd_get_nb_id);
@@ -554,6 +542,26 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
rdmsrl(MSR_FAM10H_NODE_ID, value);
nodes_per_socket = ((value >> 3) & 7) + 1;
}
+
+ if (c->x86 >= 0x15 && c->x86 <= 0x17) {
+ unsigned int bit;
+
+ switch (c->x86) {
+ case 0x15: bit = 54; break;
+ case 0x16: bit = 33; break;
+ case 0x17: bit = 10; break;
+ default: return;
+ }
+ /*
+ * Try to cache the base value so further operations can
+ * avoid RMW. If that faults, do not enable SSBD.
+ */
+ if (!rdmsrl_safe(MSR_AMD64_LS_CFG, &x86_amd_ls_cfg_base)) {
+ setup_force_cpu_cap(X86_FEATURE_LS_CFG_SSBD);
+ setup_force_cpu_cap(X86_FEATURE_SSBD);
+ x86_amd_ls_cfg_ssbd_mask = 1ULL << bit;
+ }
+ }
}
static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
@@ -791,6 +799,7 @@ static void init_amd_bd(struct cpuinfo_x86 *c)
static void init_amd_zn(struct cpuinfo_x86 *c)
{
+ set_cpu_cap(c, X86_FEATURE_ZEN);
/*
* Fix erratum 1076: CPB feature bit not being set in CPUID. It affects
* all up to and including B1.
@@ -842,6 +851,7 @@ static void init_amd(struct cpuinfo_x86 *c)
/* Multi core CPU? */
if (c->extended_cpuid_level >= 0x80000008) {
amd_detect_cmp(c);
+ amd_get_topology(c);
srat_detect_node(c);
}
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index bfca937bdcc3..7416fc206b4a 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -12,8 +12,10 @@
#include <linux/utsname.h>
#include <linux/cpu.h>
#include <linux/module.h>
+#include <linux/nospec.h>
+#include <linux/prctl.h>
-#include <asm/nospec-branch.h>
+#include <asm/spec-ctrl.h>
#include <asm/cmdline.h>
#include <asm/bugs.h>
#include <asm/processor.h>
@@ -27,6 +29,27 @@
#include <asm/intel-family.h>
static void __init spectre_v2_select_mitigation(void);
+static void __init ssb_select_mitigation(void);
+
+/*
+ * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any
+ * writes to SPEC_CTRL contain whatever reserved bits have been set.
+ */
+u64 __ro_after_init x86_spec_ctrl_base;
+EXPORT_SYMBOL_GPL(x86_spec_ctrl_base);
+
+/*
+ * The vendor and possibly platform specific bits which can be modified in
+ * x86_spec_ctrl_base.
+ */
+static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS;
+
+/*
+ * AMD specific MSR info for Speculative Store Bypass control.
+ * x86_amd_ls_cfg_ssbd_mask is initialized in identify_boot_cpu().
+ */
+u64 __ro_after_init x86_amd_ls_cfg_base;
+u64 __ro_after_init x86_amd_ls_cfg_ssbd_mask;
void __init check_bugs(void)
{
@@ -37,9 +60,27 @@ void __init check_bugs(void)
print_cpu_info(&boot_cpu_data);
}
+ /*
+ * Read the SPEC_CTRL MSR to account for reserved bits which may
+ * have unknown values. AMD64_LS_CFG MSR is cached in the early AMD
+ * init code as it is not enumerated and depends on the family.
+ */
+ if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
+ rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+
+ /* Allow STIBP in MSR_SPEC_CTRL if supported */
+ if (boot_cpu_has(X86_FEATURE_STIBP))
+ x86_spec_ctrl_mask |= SPEC_CTRL_STIBP;
+
/* Select the proper spectre mitigation before patching alternatives */
spectre_v2_select_mitigation();
+ /*
+ * Select proper mitigation for any exposure to the Speculative Store
+ * Bypass vulnerability.
+ */
+ ssb_select_mitigation();
+
#ifdef CONFIG_X86_32
/*
* Check whether we are able to run this kernel safely on SMP.
@@ -93,7 +134,76 @@ static const char *spectre_v2_strings[] = {
#undef pr_fmt
#define pr_fmt(fmt) "Spectre V2 : " fmt
-static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
+static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
+ SPECTRE_V2_NONE;
+
+void
+x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
+{
+ u64 msrval, guestval, hostval = x86_spec_ctrl_base;
+ struct thread_info *ti = current_thread_info();
+
+ /* Is MSR_SPEC_CTRL implemented ? */
+ if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) {
+ /*
+ * Restrict guest_spec_ctrl to supported values. Clear the
+ * modifiable bits in the host base value and or the
+ * modifiable bits from the guest value.
+ */
+ guestval = hostval & ~x86_spec_ctrl_mask;
+ guestval |= guest_spec_ctrl & x86_spec_ctrl_mask;
+
+ /* SSBD controlled in MSR_SPEC_CTRL */
+ if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD))
+ hostval |= ssbd_tif_to_spec_ctrl(ti->flags);
+
+ if (hostval != guestval) {
+ msrval = setguest ? guestval : hostval;
+ wrmsrl(MSR_IA32_SPEC_CTRL, msrval);
+ }
+ }
+
+ /*
+ * If SSBD is not handled in MSR_SPEC_CTRL on AMD, update
+ * MSR_AMD64_L2_CFG or MSR_VIRT_SPEC_CTRL if supported.
+ */
+ if (!static_cpu_has(X86_FEATURE_LS_CFG_SSBD) &&
+ !static_cpu_has(X86_FEATURE_VIRT_SSBD))
+ return;
+
+ /*
+ * If the host has SSBD mitigation enabled, force it in the host's
+ * virtual MSR value. If its not permanently enabled, evaluate
+ * current's TIF_SSBD thread flag.
+ */
+ if (static_cpu_has(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE))
+ hostval = SPEC_CTRL_SSBD;
+ else
+ hostval = ssbd_tif_to_spec_ctrl(ti->flags);
+
+ /* Sanitize the guest value */
+ guestval = guest_virt_spec_ctrl & SPEC_CTRL_SSBD;
+
+ if (hostval != guestval) {
+ unsigned long tif;
+
+ tif = setguest ? ssbd_spec_ctrl_to_tif(guestval) :
+ ssbd_spec_ctrl_to_tif(hostval);
+
+ speculative_store_bypass_update(tif);
+ }
+}
+EXPORT_SYMBOL_GPL(x86_virt_spec_ctrl);
+
+static void x86_amd_ssb_disable(void)
+{
+ u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_ssbd_mask;
+
+ if (boot_cpu_has(X86_FEATURE_VIRT_SSBD))
+ wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, SPEC_CTRL_SSBD);
+ else if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD))
+ wrmsrl(MSR_AMD64_LS_CFG, msrval);
+}
#ifdef RETPOLINE
static bool spectre_v2_bad_module;
@@ -312,32 +422,289 @@ retpoline_auto:
}
#undef pr_fmt
+#define pr_fmt(fmt) "Speculative Store Bypass: " fmt
+
+static enum ssb_mitigation ssb_mode __ro_after_init = SPEC_STORE_BYPASS_NONE;
+
+/* The kernel command line selection */
+enum ssb_mitigation_cmd {
+ SPEC_STORE_BYPASS_CMD_NONE,
+ SPEC_STORE_BYPASS_CMD_AUTO,
+ SPEC_STORE_BYPASS_CMD_ON,
+ SPEC_STORE_BYPASS_CMD_PRCTL,
+ SPEC_STORE_BYPASS_CMD_SECCOMP,
+};
+
+static const char *ssb_strings[] = {
+ [SPEC_STORE_BYPASS_NONE] = "Vulnerable",
+ [SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled",
+ [SPEC_STORE_BYPASS_PRCTL] = "Mitigation: Speculative Store Bypass disabled via prctl",
+ [SPEC_STORE_BYPASS_SECCOMP] = "Mitigation: Speculative Store Bypass disabled via prctl and seccomp",
+};
+
+static const struct {
+ const char *option;
+ enum ssb_mitigation_cmd cmd;
+} ssb_mitigation_options[] = {
+ { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */
+ { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */
+ { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */
+ { "prctl", SPEC_STORE_BYPASS_CMD_PRCTL }, /* Disable Speculative Store Bypass via prctl */
+ { "seccomp", SPEC_STORE_BYPASS_CMD_SECCOMP }, /* Disable Speculative Store Bypass via prctl and seccomp */
+};
+
+static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void)
+{
+ enum ssb_mitigation_cmd cmd = SPEC_STORE_BYPASS_CMD_AUTO;
+ char arg[20];
+ int ret, i;
+
+ if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable")) {
+ return SPEC_STORE_BYPASS_CMD_NONE;
+ } else {
+ ret = cmdline_find_option(boot_command_line, "spec_store_bypass_disable",
+ arg, sizeof(arg));
+ if (ret < 0)
+ return SPEC_STORE_BYPASS_CMD_AUTO;
+
+ for (i = 0; i < ARRAY_SIZE(ssb_mitigation_options); i++) {
+ if (!match_option(arg, ret, ssb_mitigation_options[i].option))
+ continue;
+
+ cmd = ssb_mitigation_options[i].cmd;
+ break;
+ }
+
+ if (i >= ARRAY_SIZE(ssb_mitigation_options)) {
+ pr_err("unknown option (%s). Switching to AUTO select\n", arg);
+ return SPEC_STORE_BYPASS_CMD_AUTO;
+ }
+ }
+
+ return cmd;
+}
+
+static enum ssb_mitigation __init __ssb_select_mitigation(void)
+{
+ enum ssb_mitigation mode = SPEC_STORE_BYPASS_NONE;
+ enum ssb_mitigation_cmd cmd;
+
+ if (!boot_cpu_has(X86_FEATURE_SSBD))
+ return mode;
+
+ cmd = ssb_parse_cmdline();
+ if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS) &&
+ (cmd == SPEC_STORE_BYPASS_CMD_NONE ||
+ cmd == SPEC_STORE_BYPASS_CMD_AUTO))
+ return mode;
+
+ switch (cmd) {
+ case SPEC_STORE_BYPASS_CMD_AUTO:
+ case SPEC_STORE_BYPASS_CMD_SECCOMP:
+ /*
+ * Choose prctl+seccomp as the default mode if seccomp is
+ * enabled.
+ */
+ if (IS_ENABLED(CONFIG_SECCOMP))
+ mode = SPEC_STORE_BYPASS_SECCOMP;
+ else
+ mode = SPEC_STORE_BYPASS_PRCTL;
+ break;
+ case SPEC_STORE_BYPASS_CMD_ON:
+ mode = SPEC_STORE_BYPASS_DISABLE;
+ break;
+ case SPEC_STORE_BYPASS_CMD_PRCTL:
+ mode = SPEC_STORE_BYPASS_PRCTL;
+ break;
+ case SPEC_STORE_BYPASS_CMD_NONE:
+ break;
+ }
+
+ /*
+ * We have three CPU feature flags that are in play here:
+ * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible.
+ * - X86_FEATURE_SSBD - CPU is able to turn off speculative store bypass
+ * - X86_FEATURE_SPEC_STORE_BYPASS_DISABLE - engage the mitigation
+ */
+ if (mode == SPEC_STORE_BYPASS_DISABLE) {
+ setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE);
+ /*
+ * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD uses
+ * a completely different MSR and bit dependent on family.
+ */
+ switch (boot_cpu_data.x86_vendor) {
+ case X86_VENDOR_INTEL:
+ x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
+ x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
+ wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+ break;
+ case X86_VENDOR_AMD:
+ x86_amd_ssb_disable();
+ break;
+ }
+ }
+
+ return mode;
+}
+
+static void ssb_select_mitigation(void)
+{
+ ssb_mode = __ssb_select_mitigation();
+
+ if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
+ pr_info("%s\n", ssb_strings[ssb_mode]);
+}
+
+#undef pr_fmt
+#define pr_fmt(fmt) "Speculation prctl: " fmt
+
+static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
+{
+ bool update;
+
+ if (ssb_mode != SPEC_STORE_BYPASS_PRCTL &&
+ ssb_mode != SPEC_STORE_BYPASS_SECCOMP)
+ return -ENXIO;
+
+ switch (ctrl) {
+ case PR_SPEC_ENABLE:
+ /* If speculation is force disabled, enable is not allowed */
+ if (task_spec_ssb_force_disable(task))
+ return -EPERM;
+ task_clear_spec_ssb_disable(task);
+ update = test_and_clear_tsk_thread_flag(task, TIF_SSBD);
+ break;
+ case PR_SPEC_DISABLE:
+ task_set_spec_ssb_disable(task);
+ update = !test_and_set_tsk_thread_flag(task, TIF_SSBD);
+ break;
+ case PR_SPEC_FORCE_DISABLE:
+ task_set_spec_ssb_disable(task);
+ task_set_spec_ssb_force_disable(task);
+ update = !test_and_set_tsk_thread_flag(task, TIF_SSBD);
+ break;
+ default:
+ return -ERANGE;
+ }
+
+ /*
+ * If being set on non-current task, delay setting the CPU
+ * mitigation until it is next scheduled.
+ */
+ if (task == current && update)
+ speculative_store_bypass_update_current();
+
+ return 0;
+}
+
+int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
+ unsigned long ctrl)
+{
+ switch (which) {
+ case PR_SPEC_STORE_BYPASS:
+ return ssb_prctl_set(task, ctrl);
+ default:
+ return -ENODEV;
+ }
+}
+
+#ifdef CONFIG_SECCOMP
+void arch_seccomp_spec_mitigate(struct task_struct *task)
+{
+ if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP)
+ ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE);
+}
+#endif
+
+static int ssb_prctl_get(struct task_struct *task)
+{
+ switch (ssb_mode) {
+ case SPEC_STORE_BYPASS_DISABLE:
+ return PR_SPEC_DISABLE;
+ case SPEC_STORE_BYPASS_SECCOMP:
+ case SPEC_STORE_BYPASS_PRCTL:
+ if (task_spec_ssb_force_disable(task))
+ return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
+ if (task_spec_ssb_disable(task))
+ return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
+ return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
+ default:
+ if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
+ return PR_SPEC_ENABLE;
+ return PR_SPEC_NOT_AFFECTED;
+ }
+}
+
+int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
+{
+ switch (which) {
+ case PR_SPEC_STORE_BYPASS:
+ return ssb_prctl_get(task);
+ default:
+ return -ENODEV;
+ }
+}
+
+void x86_spec_ctrl_setup_ap(void)
+{
+ if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
+ wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+
+ if (ssb_mode == SPEC_STORE_BYPASS_DISABLE)
+ x86_amd_ssb_disable();
+}
#ifdef CONFIG_SYSFS
-ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
+
+static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
+ char *buf, unsigned int bug)
{
- if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
+ if (!boot_cpu_has_bug(bug))
return sprintf(buf, "Not affected\n");
- if (boot_cpu_has(X86_FEATURE_PTI))
- return sprintf(buf, "Mitigation: PTI\n");
+
+ switch (bug) {
+ case X86_BUG_CPU_MELTDOWN:
+ if (boot_cpu_has(X86_FEATURE_PTI))
+ return sprintf(buf, "Mitigation: PTI\n");
+
+ break;
+
+ case X86_BUG_SPECTRE_V1:
+ return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+
+ case X86_BUG_SPECTRE_V2:
+ return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
+ boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
+ boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
+ spectre_v2_module_string());
+
+ case X86_BUG_SPEC_STORE_BYPASS:
+ return sprintf(buf, "%s\n", ssb_strings[ssb_mode]);
+
+ default:
+ break;
+ }
+
return sprintf(buf, "Vulnerable\n");
}
+ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return cpu_show_common(dev, attr, buf, X86_BUG_CPU_MELTDOWN);
+}
+
ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf)
{
- if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
- return sprintf(buf, "Not affected\n");
- return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+ return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V1);
}
ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf)
{
- if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
- return sprintf(buf, "Not affected\n");
+ return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V2);
+}
- return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
- boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
- boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
- spectre_v2_module_string());
+ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return cpu_show_common(dev, attr, buf, X86_BUG_SPEC_STORE_BYPASS);
}
#endif
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c
index 54d04d574148..38354c66df81 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/cacheinfo.c
@@ -20,6 +20,8 @@
#include <asm/amd_nb.h>
#include <asm/smp.h>
+#include "cpu.h"
+
#define LVL_1_INST 1
#define LVL_1_DATA 2
#define LVL_2 3
@@ -637,6 +639,45 @@ static int find_num_cache_leaves(struct cpuinfo_x86 *c)
return i;
}
+void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id)
+{
+ /*
+ * We may have multiple LLCs if L3 caches exist, so check if we
+ * have an L3 cache by looking at the L3 cache CPUID leaf.
+ */
+ if (!cpuid_edx(0x80000006))
+ return;
+
+ if (c->x86 < 0x17) {
+ /* LLC is at the node level. */
+ per_cpu(cpu_llc_id, cpu) = node_id;
+ } else if (c->x86 == 0x17 &&
+ c->x86_model >= 0 && c->x86_model <= 0x1F) {
+ /*
+ * LLC is at the core complex level.
+ * Core complex ID is ApicId[3] for these processors.
+ */
+ per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
+ } else {
+ /*
+ * LLC ID is calculated from the number of threads sharing the
+ * cache.
+ * */
+ u32 eax, ebx, ecx, edx, num_sharing_cache = 0;
+ u32 llc_index = find_num_cache_leaves(c) - 1;
+
+ cpuid_count(0x8000001d, llc_index, &eax, &ebx, &ecx, &edx);
+ if (eax)
+ num_sharing_cache = ((eax >> 14) & 0xfff) + 1;
+
+ if (num_sharing_cache) {
+ int bits = get_count_order(num_sharing_cache) - 1;
+
+ per_cpu(cpu_llc_id, cpu) = c->apicid >> bits;
+ }
+ }
+}
+
void init_amd_cacheinfo(struct cpuinfo_x86 *c)
{
@@ -650,7 +691,7 @@ void init_amd_cacheinfo(struct cpuinfo_x86 *c)
}
}
-unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c)
+void init_intel_cacheinfo(struct cpuinfo_x86 *c)
{
/* Cache sizes */
unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
@@ -802,7 +843,8 @@ unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c)
c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
- return l2;
+ if (!l2)
+ cpu_detect_cache_sizes(c);
}
static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index e5ec0f11c0de..14433ff5b828 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -18,6 +18,13 @@
#define RNG_ENABLED (1 << 3)
#define RNG_ENABLE (1 << 6) /* MSR_VIA_RNG */
+#define X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW 0x00200000
+#define X86_VMX_FEATURE_PROC_CTLS_VNMI 0x00400000
+#define X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS 0x80000000
+#define X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC 0x00000001
+#define X86_VMX_FEATURE_PROC_CTLS2_EPT 0x00000002
+#define X86_VMX_FEATURE_PROC_CTLS2_VPID 0x00000020
+
static void init_c3(struct cpuinfo_x86 *c)
{
u32 lo, hi;
@@ -112,6 +119,31 @@ static void early_init_centaur(struct cpuinfo_x86 *c)
}
}
+static void centaur_detect_vmx_virtcap(struct cpuinfo_x86 *c)
+{
+ u32 vmx_msr_low, vmx_msr_high, msr_ctl, msr_ctl2;
+
+ rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high);
+ msr_ctl = vmx_msr_high | vmx_msr_low;
+
+ if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW)
+ set_cpu_cap(c, X86_FEATURE_TPR_SHADOW);
+ if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_VNMI)
+ set_cpu_cap(c, X86_FEATURE_VNMI);
+ if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS) {
+ rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
+ vmx_msr_low, vmx_msr_high);
+ msr_ctl2 = vmx_msr_high | vmx_msr_low;
+ if ((msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC) &&
+ (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW))
+ set_cpu_cap(c, X86_FEATURE_FLEXPRIORITY);
+ if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_EPT)
+ set_cpu_cap(c, X86_FEATURE_EPT);
+ if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VPID)
+ set_cpu_cap(c, X86_FEATURE_VPID);
+ }
+}
+
static void init_centaur(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_X86_32
@@ -128,6 +160,24 @@ static void init_centaur(struct cpuinfo_x86 *c)
clear_cpu_cap(c, 0*32+31);
#endif
early_init_centaur(c);
+ init_intel_cacheinfo(c);
+ detect_num_cpu_cores(c);
+#ifdef CONFIG_X86_32
+ detect_ht(c);
+#endif
+
+ if (c->cpuid_level > 9) {
+ unsigned int eax = cpuid_eax(10);
+
+ /*
+ * Check for version and the number of counters
+ * Version(eax[7:0]) can't be 0;
+ * Counters(eax[15:8]) should be greater than 1;
+ */
+ if ((eax & 0xff) && (((eax >> 8) & 0xff) > 1))
+ set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
+ }
+
switch (c->x86) {
#ifdef CONFIG_X86_32
case 5:
@@ -199,6 +249,9 @@ static void init_centaur(struct cpuinfo_x86 *c)
#ifdef CONFIG_X86_64
set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
#endif
+
+ if (cpu_has(c, X86_FEATURE_VMX))
+ centaur_detect_vmx_virtcap(c);
}
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 8a5b185735e1..95c8e507580d 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -66,6 +66,13 @@ cpumask_var_t cpu_callin_mask;
/* representing cpus for which sibling maps can be computed */
cpumask_var_t cpu_sibling_setup_mask;
+/* Number of siblings per CPU package */
+int smp_num_siblings = 1;
+EXPORT_SYMBOL(smp_num_siblings);
+
+/* Last level cache ID of each logical CPU */
+DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id) = BAD_APICID;
+
/* correctly size the local cpu masks */
void __init setup_cpu_local_masks(void)
{
@@ -577,6 +584,19 @@ static void get_model_name(struct cpuinfo_x86 *c)
*(s + 1) = '\0';
}
+void detect_num_cpu_cores(struct cpuinfo_x86 *c)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ c->x86_max_cores = 1;
+ if (!IS_ENABLED(CONFIG_SMP) || c->cpuid_level < 4)
+ return;
+
+ cpuid_count(4, 0, &eax, &ebx, &ecx, &edx);
+ if (eax & 0x1f)
+ c->x86_max_cores = (eax >> 26) + 1;
+}
+
void cpu_detect_cache_sizes(struct cpuinfo_x86 *c)
{
unsigned int n, dummy, ebx, ecx, edx, l2size;
@@ -757,17 +777,32 @@ static void init_speculation_control(struct cpuinfo_x86 *c)
* and they also have a different bit for STIBP support. Also,
* a hypervisor might have set the individual AMD bits even on
* Intel CPUs, for finer-grained selection of what's available.
- *
- * We use the AMD bits in 0x8000_0008 EBX as the generic hardware
- * features, which are visible in /proc/cpuinfo and used by the
- * kernel. So set those accordingly from the Intel bits.
*/
if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) {
set_cpu_cap(c, X86_FEATURE_IBRS);
set_cpu_cap(c, X86_FEATURE_IBPB);
+ set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
}
+
if (cpu_has(c, X86_FEATURE_INTEL_STIBP))
set_cpu_cap(c, X86_FEATURE_STIBP);
+
+ if (cpu_has(c, X86_FEATURE_SPEC_CTRL_SSBD) ||
+ cpu_has(c, X86_FEATURE_VIRT_SSBD))
+ set_cpu_cap(c, X86_FEATURE_SSBD);
+
+ if (cpu_has(c, X86_FEATURE_AMD_IBRS)) {
+ set_cpu_cap(c, X86_FEATURE_IBRS);
+ set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
+ }
+
+ if (cpu_has(c, X86_FEATURE_AMD_IBPB))
+ set_cpu_cap(c, X86_FEATURE_IBPB);
+
+ if (cpu_has(c, X86_FEATURE_AMD_STIBP)) {
+ set_cpu_cap(c, X86_FEATURE_STIBP);
+ set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
+ }
}
void get_cpu_cap(struct cpuinfo_x86 *c)
@@ -848,6 +883,11 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
c->x86_power = edx;
}
+ if (c->extended_cpuid_level >= 0x80000008) {
+ cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
+ c->x86_capability[CPUID_8000_0008_EBX] = ebx;
+ }
+
if (c->extended_cpuid_level >= 0x8000000a)
c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a);
@@ -871,7 +911,6 @@ static void get_cpu_address_sizes(struct cpuinfo_x86 *c)
c->x86_virt_bits = (eax >> 8) & 0xff;
c->x86_phys_bits = eax & 0xff;
- c->x86_capability[CPUID_8000_0008_EBX] = ebx;
}
#ifdef CONFIG_X86_32
else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36))
@@ -923,21 +962,47 @@ static const __initconst struct x86_cpu_id cpu_no_meltdown[] = {
{}
};
-static bool __init cpu_vulnerable_to_meltdown(struct cpuinfo_x86 *c)
+/* Only list CPUs which speculate but are non susceptible to SSB */
+static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = {
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT2 },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MERRIFIELD },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_CORE_YONAH },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM },
+ { X86_VENDOR_AMD, 0x12, },
+ { X86_VENDOR_AMD, 0x11, },
+ { X86_VENDOR_AMD, 0x10, },
+ { X86_VENDOR_AMD, 0xf, },
+ {}
+};
+
+static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
{
u64 ia32_cap = 0;
- if (x86_match_cpu(cpu_no_meltdown))
- return false;
+ if (x86_match_cpu(cpu_no_speculation))
+ return;
+
+ setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
+ setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES))
rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
+ if (!x86_match_cpu(cpu_no_spec_store_bypass) &&
+ !(ia32_cap & ARCH_CAP_SSB_NO))
+ setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
+
+ if (x86_match_cpu(cpu_no_meltdown))
+ return;
+
/* Rogue Data Cache Load? No! */
if (ia32_cap & ARCH_CAP_RDCL_NO)
- return false;
+ return;
- return true;
+ setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
}
/*
@@ -988,12 +1053,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
setup_force_cpu_cap(X86_FEATURE_ALWAYS);
- if (!x86_match_cpu(cpu_no_speculation)) {
- if (cpu_vulnerable_to_meltdown(c))
- setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
- setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
- setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
- }
+ cpu_set_bug_bits(c);
fpu__init_system(c);
@@ -1004,6 +1064,21 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
*/
setup_clear_cpu_cap(X86_FEATURE_PCID);
#endif
+
+ /*
+ * Later in the boot process pgtable_l5_enabled() relies on
+ * cpu_feature_enabled(X86_FEATURE_LA57). If 5-level paging is not
+ * enabled by this point we need to clear the feature bit to avoid
+ * false-positives at the later stage.
+ *
+ * pgtable_l5_enabled() can be false here for several reasons:
+ * - 5-level paging is disabled compile-time;
+ * - it's 32-bit kernel;
+ * - machine doesn't support 5-level paging;
+ * - user specified 'no5lvl' in kernel command line.
+ */
+ if (!pgtable_l5_enabled())
+ setup_clear_cpu_cap(X86_FEATURE_LA57);
}
void __init early_cpu_init(void)
@@ -1355,6 +1430,7 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c)
#endif
mtrr_ap_init();
validate_apic_and_package_id(c);
+ x86_spec_ctrl_setup_ap();
}
static __init int setup_noclflush(char *arg)
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index e806b11a99af..38216f678fc3 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -47,7 +47,19 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[],
extern void get_cpu_cap(struct cpuinfo_x86 *c);
extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
+extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
+extern u32 get_scattered_cpuid_leaf(unsigned int level,
+ unsigned int sub_leaf,
+ enum cpuid_regs_idx reg);
+extern void init_intel_cacheinfo(struct cpuinfo_x86 *c);
+extern void init_amd_cacheinfo(struct cpuinfo_x86 *c);
+
+extern void detect_num_cpu_cores(struct cpuinfo_x86 *c);
+extern int detect_extended_topology(struct cpuinfo_x86 *c);
+extern void detect_ht(struct cpuinfo_x86 *c);
unsigned int aperfmperf_get_khz(int cpu);
+extern void x86_spec_ctrl_setup_ap(void);
+
#endif /* ARCH_X86_CPU_H */
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index b9693b80fc21..eb75564f2d25 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -188,7 +188,10 @@ static void early_init_intel(struct cpuinfo_x86 *c)
setup_clear_cpu_cap(X86_FEATURE_IBPB);
setup_clear_cpu_cap(X86_FEATURE_STIBP);
setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL);
+ setup_clear_cpu_cap(X86_FEATURE_MSR_SPEC_CTRL);
setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP);
+ setup_clear_cpu_cap(X86_FEATURE_SSBD);
+ setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL_SSBD);
}
/*
@@ -453,24 +456,6 @@ static void srat_detect_node(struct cpuinfo_x86 *c)
#endif
}
-/*
- * find out the number of processor cores on the die
- */
-static int intel_num_cpu_cores(struct cpuinfo_x86 *c)
-{
- unsigned int eax, ebx, ecx, edx;
-
- if (!IS_ENABLED(CONFIG_SMP) || c->cpuid_level < 4)
- return 1;
-
- /* Intel has a non-standard dependency on %ecx for this CPUID level. */
- cpuid_count(4, 0, &eax, &ebx, &ecx, &edx);
- if (eax & 0x1f)
- return (eax >> 26) + 1;
- else
- return 1;
-}
-
static void detect_vmx_virtcap(struct cpuinfo_x86 *c)
{
/* Intel VMX MSR indicated features */
@@ -653,8 +638,6 @@ static void init_intel_misc_features(struct cpuinfo_x86 *c)
static void init_intel(struct cpuinfo_x86 *c)
{
- unsigned int l2 = 0;
-
early_init_intel(c);
intel_workarounds(c);
@@ -671,19 +654,13 @@ static void init_intel(struct cpuinfo_x86 *c)
* let's use the legacy cpuid vector 0x1 and 0x4 for topology
* detection.
*/
- c->x86_max_cores = intel_num_cpu_cores(c);
+ detect_num_cpu_cores(c);
#ifdef CONFIG_X86_32
detect_ht(c);
#endif
}
- l2 = init_intel_cacheinfo(c);
-
- /* Detect legacy cache sizes if init_intel_cacheinfo did not */
- if (l2 == 0) {
- cpu_detect_cache_sizes(c);
- l2 = c->x86_cache_size;
- }
+ init_intel_cacheinfo(c);
if (c->cpuid_level > 9) {
unsigned eax = cpuid_eax(10);
@@ -696,7 +673,8 @@ static void init_intel(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
if (boot_cpu_has(X86_FEATURE_DS)) {
- unsigned int l1;
+ unsigned int l1, l2;
+
rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
if (!(l1 & (1<<11)))
set_cpu_cap(c, X86_FEATURE_BTS);
@@ -724,6 +702,7 @@ static void init_intel(struct cpuinfo_x86 *c)
* Dixon is NOT a Celeron.
*/
if (c->x86 == 6) {
+ unsigned int l2 = c->x86_cache_size;
char *p = NULL;
switch (c->x86_model) {
@@ -835,6 +814,9 @@ static const struct _tlb_table intel_tlb_table[] = {
{ 0x5d, TLB_DATA_4K_4M, 256, " TLB_DATA 4 KByte and 4 MByte pages" },
{ 0x61, TLB_INST_4K, 48, " TLB_INST 4 KByte pages, full associative" },
{ 0x63, TLB_DATA_1G, 4, " TLB_DATA 1 GByte pages, 4-way set associative" },
+ { 0x6b, TLB_DATA_4K, 256, " TLB_DATA 4 KByte pages, 8-way associative" },
+ { 0x6c, TLB_DATA_2M_4M, 128, " TLB_DATA 2 MByte or 4 MByte pages, 8-way associative" },
+ { 0x6d, TLB_DATA_1G, 16, " TLB_DATA 1 GByte pages, fully associative" },
{ 0x76, TLB_INST_2M_4M, 8, " TLB_INST 2-MByte or 4-MByte pages, fully associative" },
{ 0xb0, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 4-way set associative" },
{ 0xb1, TLB_INST_2M_4M, 4, " TLB_INST 2M pages, 4-way, 8 entries or 4M pages, 4-way entries" },
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 589b948e6e01..24bfa63e86cf 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -33,8 +33,8 @@
#include <asm/intel_rdt_sched.h>
#include "intel_rdt.h"
-#define MAX_MBA_BW 100u
#define MBA_IS_LINEAR 0x4
+#define MBA_MAX_MBPS U32_MAX
/* Mutex to protect rdtgroup access. */
DEFINE_MUTEX(rdtgroup_mutex);
@@ -178,7 +178,7 @@ struct rdt_resource rdt_resources_all[] = {
.msr_update = mba_wrmsr,
.cache_level = 3,
.parse_ctrlval = parse_bw,
- .format_str = "%d=%*d",
+ .format_str = "%d=%*u",
.fflags = RFTYPE_RES_MB,
},
};
@@ -230,6 +230,14 @@ static inline void cache_alloc_hsw_probe(void)
rdt_alloc_capable = true;
}
+bool is_mba_sc(struct rdt_resource *r)
+{
+ if (!r)
+ return rdt_resources_all[RDT_RESOURCE_MBA].membw.mba_sc;
+
+ return r->membw.mba_sc;
+}
+
/*
* rdt_get_mb_table() - get a mapping of bandwidth(b/w) percentage values
* exposed to user interface and the h/w understandable delay values.
@@ -341,7 +349,7 @@ static int get_cache_id(int cpu, int level)
* that can be written to QOS_MSRs.
* There are currently no SKUs which support non linear delay values.
*/
-static u32 delay_bw_map(unsigned long bw, struct rdt_resource *r)
+u32 delay_bw_map(unsigned long bw, struct rdt_resource *r)
{
if (r->membw.delay_linear)
return MAX_MBA_BW - bw;
@@ -431,25 +439,40 @@ struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
return NULL;
}
+void setup_default_ctrlval(struct rdt_resource *r, u32 *dc, u32 *dm)
+{
+ int i;
+
+ /*
+ * Initialize the Control MSRs to having no control.
+ * For Cache Allocation: Set all bits in cbm
+ * For Memory Allocation: Set b/w requested to 100%
+ * and the bandwidth in MBps to U32_MAX
+ */
+ for (i = 0; i < r->num_closid; i++, dc++, dm++) {
+ *dc = r->default_ctrl;
+ *dm = MBA_MAX_MBPS;
+ }
+}
+
static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d)
{
struct msr_param m;
- u32 *dc;
- int i;
+ u32 *dc, *dm;
dc = kmalloc_array(r->num_closid, sizeof(*d->ctrl_val), GFP_KERNEL);
if (!dc)
return -ENOMEM;
- d->ctrl_val = dc;
+ dm = kmalloc_array(r->num_closid, sizeof(*d->mbps_val), GFP_KERNEL);
+ if (!dm) {
+ kfree(dc);
+ return -ENOMEM;
+ }
- /*
- * Initialize the Control MSRs to having no control.
- * For Cache Allocation: Set all bits in cbm
- * For Memory Allocation: Set b/w requested to 100
- */
- for (i = 0; i < r->num_closid; i++, dc++)
- *dc = r->default_ctrl;
+ d->ctrl_val = dc;
+ d->mbps_val = dm;
+ setup_default_ctrlval(r, dc, dm);
m.low = 0;
m.high = r->num_closid;
@@ -588,6 +611,7 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
}
kfree(d->ctrl_val);
+ kfree(d->mbps_val);
kfree(d->rmid_busy_llc);
kfree(d->mbm_total);
kfree(d->mbm_local);
diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h
index 3fd7a70ee04a..39752825e376 100644
--- a/arch/x86/kernel/cpu/intel_rdt.h
+++ b/arch/x86/kernel/cpu/intel_rdt.h
@@ -28,6 +28,7 @@
#define MBM_CNTR_WIDTH 24
#define MBM_OVERFLOW_INTERVAL 1000
+#define MAX_MBA_BW 100u
#define RMID_VAL_ERROR BIT_ULL(63)
#define RMID_VAL_UNAVAIL BIT_ULL(62)
@@ -180,10 +181,20 @@ struct rftype {
* struct mbm_state - status for each MBM counter in each domain
* @chunks: Total data moved (multiply by rdt_group.mon_scale to get bytes)
* @prev_msr Value of IA32_QM_CTR for this RMID last time we read it
+ * @chunks_bw Total local data moved. Used for bandwidth calculation
+ * @prev_bw_msr:Value of previous IA32_QM_CTR for bandwidth counting
+ * @prev_bw The most recent bandwidth in MBps
+ * @delta_bw Difference between the current and previous bandwidth
+ * @delta_comp Indicates whether to compute the delta_bw
*/
struct mbm_state {
u64 chunks;
u64 prev_msr;
+ u64 chunks_bw;
+ u64 prev_bw_msr;
+ u32 prev_bw;
+ u32 delta_bw;
+ bool delta_comp;
};
/**
@@ -202,6 +213,7 @@ struct mbm_state {
* @cqm_work_cpu:
* worker cpu for CQM h/w counters
* @ctrl_val: array of cache or mem ctrl values (indexed by CLOSID)
+ * @mbps_val: When mba_sc is enabled, this holds the bandwidth in MBps
* @new_ctrl: new ctrl value to be loaded
* @have_new_ctrl: did user provide new_ctrl for this domain
*/
@@ -217,6 +229,7 @@ struct rdt_domain {
int mbm_work_cpu;
int cqm_work_cpu;
u32 *ctrl_val;
+ u32 *mbps_val;
u32 new_ctrl;
bool have_new_ctrl;
};
@@ -259,6 +272,7 @@ struct rdt_cache {
* @min_bw: Minimum memory bandwidth percentage user can request
* @bw_gran: Granularity at which the memory bandwidth is allocated
* @delay_linear: True if memory B/W delay is in linear scale
+ * @mba_sc: True if MBA software controller(mba_sc) is enabled
* @mb_map: Mapping of memory B/W percentage to memory B/W delay
*/
struct rdt_membw {
@@ -266,6 +280,7 @@ struct rdt_membw {
u32 min_bw;
u32 bw_gran;
u32 delay_linear;
+ bool mba_sc;
u32 *mb_map;
};
@@ -445,6 +460,9 @@ void mon_event_read(struct rmid_read *rr, struct rdt_domain *d,
void mbm_setup_overflow_handler(struct rdt_domain *dom,
unsigned long delay_ms);
void mbm_handle_overflow(struct work_struct *work);
+bool is_mba_sc(struct rdt_resource *r);
+void setup_default_ctrlval(struct rdt_resource *r, u32 *dc, u32 *dm);
+u32 delay_bw_map(unsigned long bw, struct rdt_resource *r);
void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms);
void cqm_handle_limbo(struct work_struct *work);
bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d);
diff --git a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
index 23e1d5c249c6..116d57b248d3 100644
--- a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
+++ b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
@@ -53,7 +53,8 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r)
return false;
}
- if (bw < r->membw.min_bw || bw > r->default_ctrl) {
+ if ((bw < r->membw.min_bw || bw > r->default_ctrl) &&
+ !is_mba_sc(r)) {
rdt_last_cmd_printf("MB value %ld out of range [%d,%d]\n", bw,
r->membw.min_bw, r->default_ctrl);
return false;
@@ -179,6 +180,8 @@ static int update_domains(struct rdt_resource *r, int closid)
struct msr_param msr_param;
cpumask_var_t cpu_mask;
struct rdt_domain *d;
+ bool mba_sc;
+ u32 *dc;
int cpu;
if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
@@ -188,13 +191,20 @@ static int update_domains(struct rdt_resource *r, int closid)
msr_param.high = msr_param.low + 1;
msr_param.res = r;
+ mba_sc = is_mba_sc(r);
list_for_each_entry(d, &r->domains, list) {
- if (d->have_new_ctrl && d->new_ctrl != d->ctrl_val[closid]) {
+ dc = !mba_sc ? d->ctrl_val : d->mbps_val;
+ if (d->have_new_ctrl && d->new_ctrl != dc[closid]) {
cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
- d->ctrl_val[closid] = d->new_ctrl;
+ dc[closid] = d->new_ctrl;
}
}
- if (cpumask_empty(cpu_mask))
+
+ /*
+ * Avoid writing the control msr with control values when
+ * MBA software controller is enabled
+ */
+ if (cpumask_empty(cpu_mask) || mba_sc)
goto done;
cpu = get_cpu();
/* Update CBM on this cpu if it's in cpu_mask. */
@@ -282,13 +292,17 @@ static void show_doms(struct seq_file *s, struct rdt_resource *r, int closid)
{
struct rdt_domain *dom;
bool sep = false;
+ u32 ctrl_val;
seq_printf(s, "%*s:", max_name_width, r->name);
list_for_each_entry(dom, &r->domains, list) {
if (sep)
seq_puts(s, ";");
+
+ ctrl_val = (!is_mba_sc(r) ? dom->ctrl_val[closid] :
+ dom->mbps_val[closid]);
seq_printf(s, r->format_str, dom->id, max_data_width,
- dom->ctrl_val[closid]);
+ ctrl_val);
sep = true;
}
seq_puts(s, "\n");
diff --git a/arch/x86/kernel/cpu/intel_rdt_monitor.c b/arch/x86/kernel/cpu/intel_rdt_monitor.c
index 681450eee428..b0f3aed76b75 100644
--- a/arch/x86/kernel/cpu/intel_rdt_monitor.c
+++ b/arch/x86/kernel/cpu/intel_rdt_monitor.c
@@ -225,10 +225,18 @@ void free_rmid(u32 rmid)
list_add_tail(&entry->list, &rmid_free_lru);
}
+static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr)
+{
+ u64 shift = 64 - MBM_CNTR_WIDTH, chunks;
+
+ chunks = (cur_msr << shift) - (prev_msr << shift);
+ return chunks >>= shift;
+}
+
static int __mon_event_count(u32 rmid, struct rmid_read *rr)
{
- u64 chunks, shift, tval;
struct mbm_state *m;
+ u64 chunks, tval;
tval = __rmid_read(rmid, rr->evtid);
if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) {
@@ -254,14 +262,12 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr)
}
if (rr->first) {
- m->prev_msr = tval;
- m->chunks = 0;
+ memset(m, 0, sizeof(struct mbm_state));
+ m->prev_bw_msr = m->prev_msr = tval;
return 0;
}
- shift = 64 - MBM_CNTR_WIDTH;
- chunks = (tval << shift) - (m->prev_msr << shift);
- chunks >>= shift;
+ chunks = mbm_overflow_count(m->prev_msr, tval);
m->chunks += chunks;
m->prev_msr = tval;
@@ -270,6 +276,32 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr)
}
/*
+ * Supporting function to calculate the memory bandwidth
+ * and delta bandwidth in MBps.
+ */
+static void mbm_bw_count(u32 rmid, struct rmid_read *rr)
+{
+ struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3];
+ struct mbm_state *m = &rr->d->mbm_local[rmid];
+ u64 tval, cur_bw, chunks;
+
+ tval = __rmid_read(rmid, rr->evtid);
+ if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
+ return;
+
+ chunks = mbm_overflow_count(m->prev_bw_msr, tval);
+ m->chunks_bw += chunks;
+ m->chunks = m->chunks_bw;
+ cur_bw = (chunks * r->mon_scale) >> 20;
+
+ if (m->delta_comp)
+ m->delta_bw = abs(cur_bw - m->prev_bw);
+ m->delta_comp = false;
+ m->prev_bw = cur_bw;
+ m->prev_bw_msr = tval;
+}
+
+/*
* This is called via IPI to read the CQM/MBM counters
* on a domain.
*/
@@ -297,6 +329,118 @@ void mon_event_count(void *info)
}
}
+/*
+ * Feedback loop for MBA software controller (mba_sc)
+ *
+ * mba_sc is a feedback loop where we periodically read MBM counters and
+ * adjust the bandwidth percentage values via the IA32_MBA_THRTL_MSRs so
+ * that:
+ *
+ * current bandwdith(cur_bw) < user specified bandwidth(user_bw)
+ *
+ * This uses the MBM counters to measure the bandwidth and MBA throttle
+ * MSRs to control the bandwidth for a particular rdtgrp. It builds on the
+ * fact that resctrl rdtgroups have both monitoring and control.
+ *
+ * The frequency of the checks is 1s and we just tag along the MBM overflow
+ * timer. Having 1s interval makes the calculation of bandwidth simpler.
+ *
+ * Although MBA's goal is to restrict the bandwidth to a maximum, there may
+ * be a need to increase the bandwidth to avoid uncecessarily restricting
+ * the L2 <-> L3 traffic.
+ *
+ * Since MBA controls the L2 external bandwidth where as MBM measures the
+ * L3 external bandwidth the following sequence could lead to such a
+ * situation.
+ *
+ * Consider an rdtgroup which had high L3 <-> memory traffic in initial
+ * phases -> mba_sc kicks in and reduced bandwidth percentage values -> but
+ * after some time rdtgroup has mostly L2 <-> L3 traffic.
+ *
+ * In this case we may restrict the rdtgroup's L2 <-> L3 traffic as its
+ * throttle MSRs already have low percentage values. To avoid
+ * unnecessarily restricting such rdtgroups, we also increase the bandwidth.
+ */
+static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
+{
+ u32 closid, rmid, cur_msr, cur_msr_val, new_msr_val;
+ struct mbm_state *pmbm_data, *cmbm_data;
+ u32 cur_bw, delta_bw, user_bw;
+ struct rdt_resource *r_mba;
+ struct rdt_domain *dom_mba;
+ struct list_head *head;
+ struct rdtgroup *entry;
+
+ r_mba = &rdt_resources_all[RDT_RESOURCE_MBA];
+ closid = rgrp->closid;
+ rmid = rgrp->mon.rmid;
+ pmbm_data = &dom_mbm->mbm_local[rmid];
+
+ dom_mba = get_domain_from_cpu(smp_processor_id(), r_mba);
+ if (!dom_mba) {
+ pr_warn_once("Failure to get domain for MBA update\n");
+ return;
+ }
+
+ cur_bw = pmbm_data->prev_bw;
+ user_bw = dom_mba->mbps_val[closid];
+ delta_bw = pmbm_data->delta_bw;
+ cur_msr_val = dom_mba->ctrl_val[closid];
+
+ /*
+ * For Ctrl groups read data from child monitor groups.
+ */
+ head = &rgrp->mon.crdtgrp_list;
+ list_for_each_entry(entry, head, mon.crdtgrp_list) {
+ cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid];
+ cur_bw += cmbm_data->prev_bw;
+ delta_bw += cmbm_data->delta_bw;
+ }
+
+ /*
+ * Scale up/down the bandwidth linearly for the ctrl group. The
+ * bandwidth step is the bandwidth granularity specified by the
+ * hardware.
+ *
+ * The delta_bw is used when increasing the bandwidth so that we
+ * dont alternately increase and decrease the control values
+ * continuously.
+ *
+ * For ex: consider cur_bw = 90MBps, user_bw = 100MBps and if
+ * bandwidth step is 20MBps(> user_bw - cur_bw), we would keep
+ * switching between 90 and 110 continuously if we only check
+ * cur_bw < user_bw.
+ */
+ if (cur_msr_val > r_mba->membw.min_bw && user_bw < cur_bw) {
+ new_msr_val = cur_msr_val - r_mba->membw.bw_gran;
+ } else if (cur_msr_val < MAX_MBA_BW &&
+ (user_bw > (cur_bw + delta_bw))) {
+ new_msr_val = cur_msr_val + r_mba->membw.bw_gran;
+ } else {
+ return;
+ }
+
+ cur_msr = r_mba->msr_base + closid;
+ wrmsrl(cur_msr, delay_bw_map(new_msr_val, r_mba));
+ dom_mba->ctrl_val[closid] = new_msr_val;
+
+ /*
+ * Delta values are updated dynamically package wise for each
+ * rdtgrp everytime the throttle MSR changes value.
+ *
+ * This is because (1)the increase in bandwidth is not perfectly
+ * linear and only "approximately" linear even when the hardware
+ * says it is linear.(2)Also since MBA is a core specific
+ * mechanism, the delta values vary based on number of cores used
+ * by the rdtgrp.
+ */
+ pmbm_data->delta_comp = true;
+ list_for_each_entry(entry, head, mon.crdtgrp_list) {
+ cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid];
+ cmbm_data->delta_comp = true;
+ }
+}
+
static void mbm_update(struct rdt_domain *d, int rmid)
{
struct rmid_read rr;
@@ -314,7 +458,16 @@ static void mbm_update(struct rdt_domain *d, int rmid)
}
if (is_mbm_local_enabled()) {
rr.evtid = QOS_L3_MBM_LOCAL_EVENT_ID;
- __mon_event_count(rmid, &rr);
+
+ /*
+ * Call the MBA software controller only for the
+ * control groups and when user has enabled
+ * the software controller explicitly.
+ */
+ if (!is_mba_sc(NULL))
+ __mon_event_count(rmid, &rr);
+ else
+ mbm_bw_count(rmid, &rr);
}
}
@@ -385,6 +538,9 @@ void mbm_handle_overflow(struct work_struct *work)
head = &prgrp->mon.crdtgrp_list;
list_for_each_entry(crgrp, head, mon.crdtgrp_list)
mbm_update(d, crgrp->mon.rmid);
+
+ if (is_mba_sc(NULL))
+ update_mba_bw(prgrp, d);
}
schedule_delayed_work_on(cpu, &d->mbm_over, delay);
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index fca759d272a1..749856a2e736 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -1005,6 +1005,11 @@ static void l2_qos_cfg_update(void *arg)
wrmsrl(IA32_L2_QOS_CFG, *enable ? L2_QOS_CDP_ENABLE : 0ULL);
}
+static inline bool is_mba_linear(void)
+{
+ return rdt_resources_all[RDT_RESOURCE_MBA].membw.delay_linear;
+}
+
static int set_cache_qos_cfg(int level, bool enable)
{
void (*update)(void *arg);
@@ -1041,6 +1046,28 @@ static int set_cache_qos_cfg(int level, bool enable)
return 0;
}
+/*
+ * Enable or disable the MBA software controller
+ * which helps user specify bandwidth in MBps.
+ * MBA software controller is supported only if
+ * MBM is supported and MBA is in linear scale.
+ */
+static int set_mba_sc(bool mba_sc)
+{
+ struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA];
+ struct rdt_domain *d;
+
+ if (!is_mbm_enabled() || !is_mba_linear() ||
+ mba_sc == is_mba_sc(r))
+ return -EINVAL;
+
+ r->membw.mba_sc = mba_sc;
+ list_for_each_entry(d, &r->domains, list)
+ setup_default_ctrlval(r, d->ctrl_val, d->mbps_val);
+
+ return 0;
+}
+
static int cdp_enable(int level, int data_type, int code_type)
{
struct rdt_resource *r_ldata = &rdt_resources_all[data_type];
@@ -1123,6 +1150,10 @@ static int parse_rdtgroupfs_options(char *data)
ret = cdpl2_enable();
if (ret)
goto out;
+ } else if (!strcmp(token, "mba_MBps")) {
+ ret = set_mba_sc(true);
+ if (ret)
+ goto out;
} else {
ret = -EINVAL;
goto out;
@@ -1445,6 +1476,8 @@ static void rdt_kill_sb(struct super_block *sb)
cpus_read_lock();
mutex_lock(&rdtgroup_mutex);
+ set_mba_sc(false);
+
/*Put everything back to default values. */
for_each_alloc_enabled_rdt_resource(r)
reset_all_ctrls(r);
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 475cb4f5f14f..c805a06e14c3 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -48,7 +48,7 @@ static struct dentry *dfs_inj;
static u8 n_banks;
-#define MAX_FLAG_OPT_SIZE 3
+#define MAX_FLAG_OPT_SIZE 4
#define NBCFG 0x44
enum injection_type {
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 42cf2880d0ed..cd76380af79f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1727,6 +1727,21 @@ static void __mcheck_cpu_init_early(struct cpuinfo_x86 *c)
}
}
+static void mce_centaur_feature_init(struct cpuinfo_x86 *c)
+{
+ struct mca_config *cfg = &mca_cfg;
+
+ /*
+ * All newer Centaur CPUs support MCE broadcasting. Enable
+ * synchronization with a one second timeout.
+ */
+ if ((c->x86 == 6 && c->x86_model == 0xf && c->x86_stepping >= 0xe) ||
+ c->x86 > 6) {
+ if (cfg->monarch_timeout < 0)
+ cfg->monarch_timeout = USEC_PER_SEC;
+ }
+}
+
static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
{
switch (c->x86_vendor) {
@@ -1739,6 +1754,9 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
mce_amd_feature_init(c);
break;
}
+ case X86_VENDOR_CENTAUR:
+ mce_centaur_feature_init(c);
+ break;
default:
break;
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index f7666eef4a87..f591b01930db 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -94,6 +94,11 @@ static struct smca_bank_name smca_names[] = {
[SMCA_SMU] = { "smu", "System Management Unit" },
};
+static u32 smca_bank_addrs[MAX_NR_BANKS][NR_BLOCKS] __ro_after_init =
+{
+ [0 ... MAX_NR_BANKS - 1] = { [0 ... NR_BLOCKS - 1] = -1 }
+};
+
const char *smca_get_name(enum smca_bank_types t)
{
if (t >= N_SMCA_BANK_TYPES)
@@ -431,8 +436,7 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
wrmsr(MSR_CU_DEF_ERR, low, high);
}
-static u32 smca_get_block_address(unsigned int cpu, unsigned int bank,
- unsigned int block)
+static u32 smca_get_block_address(unsigned int bank, unsigned int block)
{
u32 low, high;
u32 addr = 0;
@@ -443,24 +447,30 @@ static u32 smca_get_block_address(unsigned int cpu, unsigned int bank,
if (!block)
return MSR_AMD64_SMCA_MCx_MISC(bank);
+ /* Check our cache first: */
+ if (smca_bank_addrs[bank][block] != -1)
+ return smca_bank_addrs[bank][block];
+
/*
* For SMCA enabled processors, BLKPTR field of the first MISC register
* (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4).
*/
- if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
- return addr;
+ if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
+ goto out;
if (!(low & MCI_CONFIG_MCAX))
- return addr;
+ goto out;
- if (!rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
+ if (!rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
(low & MASK_BLKPTR_LO))
- return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
+ addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
+out:
+ smca_bank_addrs[bank][block] = addr;
return addr;
}
-static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 high,
+static u32 get_block_address(u32 current_addr, u32 low, u32 high,
unsigned int bank, unsigned int block)
{
u32 addr = 0, offset = 0;
@@ -468,20 +478,8 @@ static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 hi
if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
return addr;
- /* Get address from already initialized block. */
- if (per_cpu(threshold_banks, cpu)) {
- struct threshold_bank *bankp = per_cpu(threshold_banks, cpu)[bank];
-
- if (bankp && bankp->blocks) {
- struct threshold_block *blockp = &bankp->blocks[block];
-
- if (blockp)
- return blockp->address;
- }
- }
-
if (mce_flags.smca)
- return smca_get_block_address(cpu, bank, block);
+ return smca_get_block_address(bank, block);
/* Fall back to method we used for older processors: */
switch (block) {
@@ -559,7 +557,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
smca_configure(bank, cpu);
for (block = 0; block < NR_BLOCKS; ++block) {
- address = get_block_address(cpu, address, low, high, bank, block);
+ address = get_block_address(address, low, high, bank, block);
if (!address)
break;
@@ -1176,7 +1174,7 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
if (err)
goto out_free;
recurse:
- address = get_block_address(cpu, address, low, high, bank, ++block);
+ address = get_block_address(address, low, high, bank, ++block);
if (!address)
return 0;
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 10c4fc2c91f8..77e201301528 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -564,14 +564,12 @@ static int __reload_late(void *info)
apply_microcode_local(&err);
spin_unlock(&update_lock);
+ /* siblings return UCODE_OK because their engine got updated already */
if (err > UCODE_NFOUND) {
pr_warn("Error reloading microcode on CPU %d\n", cpu);
- return -1;
- /* siblings return UCODE_OK because their engine got updated already */
+ ret = -1;
} else if (err == UCODE_UPDATED || err == UCODE_OK) {
ret = 1;
- } else {
- return ret;
}
/*
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 32b8e5724f96..1c2cfa0644aa 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -485,7 +485,6 @@ static void show_saved_mc(void)
*/
static void save_mc_for_early(u8 *mc, unsigned int size)
{
-#ifdef CONFIG_HOTPLUG_CPU
/* Synchronization during CPU hotplug. */
static DEFINE_MUTEX(x86_cpu_microcode_mutex);
@@ -495,7 +494,6 @@ static void save_mc_for_early(u8 *mc, unsigned int size)
show_saved_mc();
mutex_unlock(&x86_cpu_microcode_mutex);
-#endif
}
static bool load_builtin_intel_microcode(struct cpio_data *cp)
diff --git a/arch/x86/kernel/cpu/mtrr/Makefile b/arch/x86/kernel/cpu/mtrr/Makefile
index ad9e5ed81181..2ad9107ee980 100644
--- a/arch/x86/kernel/cpu/mtrr/Makefile
+++ b/arch/x86/kernel/cpu/mtrr/Makefile
@@ -1,3 +1,3 @@
-obj-y := main.o if.o generic.o cleanup.o
+obj-y := mtrr.o if.o generic.o cleanup.o
obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/mtrr.c
index 7468de429087..9a19c800fe40 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.c
@@ -46,6 +46,7 @@
#include <linux/pci.h>
#include <linux/smp.h>
#include <linux/syscore_ops.h>
+#include <linux/rcupdate.h>
#include <asm/cpufeature.h>
#include <asm/e820/api.h>
@@ -100,7 +101,7 @@ static int have_wrcomb(void)
if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS &&
dev->device == PCI_DEVICE_ID_SERVERWORKS_LE &&
dev->revision <= 5) {
- pr_info("mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n");
+ pr_info("Serverworks LE rev < 6 detected. Write-combining disabled.\n");
pci_dev_put(dev);
return 0;
}
@@ -110,7 +111,7 @@ static int have_wrcomb(void)
*/
if (dev->vendor == PCI_VENDOR_ID_INTEL &&
dev->device == PCI_DEVICE_ID_INTEL_82451NX) {
- pr_info("mtrr: Intel 450NX MMC detected. Write-combining disabled.\n");
+ pr_info("Intel 450NX MMC detected. Write-combining disabled.\n");
pci_dev_put(dev);
return 0;
}
@@ -312,24 +313,24 @@ int mtrr_add_page(unsigned long base, unsigned long size,
return error;
if (type >= MTRR_NUM_TYPES) {
- pr_warn("mtrr: type: %u invalid\n", type);
+ pr_warn("type: %u invalid\n", type);
return -EINVAL;
}
/* If the type is WC, check that this processor supports it */
if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) {
- pr_warn("mtrr: your processor doesn't support write-combining\n");
+ pr_warn("your processor doesn't support write-combining\n");
return -ENOSYS;
}
if (!size) {
- pr_warn("mtrr: zero sized request\n");
+ pr_warn("zero sized request\n");
return -EINVAL;
}
if ((base | (base + size - 1)) >>
(boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) {
- pr_warn("mtrr: base or size exceeds the MTRR width\n");
+ pr_warn("base or size exceeds the MTRR width\n");
return -EINVAL;
}
@@ -360,8 +361,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
} else if (types_compatible(type, ltype))
continue;
}
- pr_warn("mtrr: 0x%lx000,0x%lx000 overlaps existing"
- " 0x%lx000,0x%lx000\n", base, size, lbase,
+ pr_warn("0x%lx000,0x%lx000 overlaps existing 0x%lx000,0x%lx000\n", base, size, lbase,
lsize);
goto out;
}
@@ -369,7 +369,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
if (ltype != type) {
if (types_compatible(type, ltype))
continue;
- pr_warn("mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n",
+ pr_warn("type mismatch for %lx000,%lx000 old: %s new: %s\n",
base, size, mtrr_attrib_to_str(ltype),
mtrr_attrib_to_str(type));
goto out;
@@ -395,7 +395,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
}
}
} else {
- pr_info("mtrr: no more MTRRs available\n");
+ pr_info("no more MTRRs available\n");
}
error = i;
out:
@@ -407,8 +407,8 @@ int mtrr_add_page(unsigned long base, unsigned long size,
static int mtrr_check(unsigned long base, unsigned long size)
{
if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
- pr_warn("mtrr: size and base must be multiples of 4 kiB\n");
- pr_debug("mtrr: size: 0x%lx base: 0x%lx\n", size, base);
+ pr_warn("size and base must be multiples of 4 kiB\n");
+ pr_debug("size: 0x%lx base: 0x%lx\n", size, base);
dump_stack();
return -1;
}
@@ -499,22 +499,22 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
}
}
if (reg < 0) {
- pr_debug("mtrr: no MTRR for %lx000,%lx000 found\n",
+ pr_debug("no MTRR for %lx000,%lx000 found\n",
base, size);
goto out;
}
}
if (reg >= max) {
- pr_warn("mtrr: register: %d too big\n", reg);
+ pr_warn("register: %d too big\n", reg);
goto out;
}
mtrr_if->get(reg, &lbase, &lsize, &ltype);
if (lsize < 1) {
- pr_warn("mtrr: MTRR %d not used\n", reg);
+ pr_warn("MTRR %d not used\n", reg);
goto out;
}
if (mtrr_usage_table[reg] < 1) {
- pr_warn("mtrr: reg: %d has count=0\n", reg);
+ pr_warn("reg: %d has count=0\n", reg);
goto out;
}
if (--mtrr_usage_table[reg] < 1)
@@ -775,7 +775,7 @@ void __init mtrr_bp_init(void)
}
if (!mtrr_enabled()) {
- pr_info("MTRR: Disabled\n");
+ pr_info("Disabled\n");
/*
* PAT initialization relies on MTRR's rendezvous handler.
@@ -793,6 +793,9 @@ void mtrr_ap_init(void)
if (!use_intel() || mtrr_aps_delayed_init)
return;
+
+ rcu_cpu_starting(smp_processor_id());
+
/*
* Ideally we should hold mtrr_mutex here to avoid mtrr entries
* changed, but this routine will be called in cpu boot time,
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index b099024d339c..81c0afb39d0a 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -27,7 +27,7 @@
* exists, use it for populating initial_apicid and cpu topology
* detection.
*/
-void detect_extended_topology(struct cpuinfo_x86 *c)
+int detect_extended_topology(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_SMP
unsigned int eax, ebx, ecx, edx, sub_index;
@@ -36,7 +36,7 @@ void detect_extended_topology(struct cpuinfo_x86 *c)
static bool printed;
if (c->cpuid_level < 0xb)
- return;
+ return -1;
cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
@@ -44,7 +44,7 @@ void detect_extended_topology(struct cpuinfo_x86 *c)
* check if the cpuid leaf 0xb is actually implemented.
*/
if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE))
- return;
+ return -1;
set_cpu_cap(c, X86_FEATURE_XTOPOLOGY);
@@ -95,6 +95,6 @@ void detect_extended_topology(struct cpuinfo_x86 *c)
c->cpu_core_id);
printed = 1;
}
- return;
#endif
+ return 0;
}
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 18fa9d74c182..666a284116ac 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -22,11 +22,14 @@
#include <asm/stacktrace.h>
#include <asm/unwind.h>
+#define OPCODE_BUFSIZE 64
+
int panic_on_unrecovered_nmi;
int panic_on_io_nmi;
-static unsigned int code_bytes = 64;
static int die_counter;
+static struct pt_regs exec_summary_regs;
+
bool in_task_stack(unsigned long *stack, struct task_struct *task,
struct stack_info *info)
{
@@ -69,9 +72,62 @@ static void printk_stack_address(unsigned long address, int reliable,
printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address);
}
+/*
+ * There are a couple of reasons for the 2/3rd prologue, courtesy of Linus:
+ *
+ * In case where we don't have the exact kernel image (which, if we did, we can
+ * simply disassemble and navigate to the RIP), the purpose of the bigger
+ * prologue is to have more context and to be able to correlate the code from
+ * the different toolchains better.
+ *
+ * In addition, it helps in recreating the register allocation of the failing
+ * kernel and thus make sense of the register dump.
+ *
+ * What is more, the additional complication of a variable length insn arch like
+ * x86 warrants having longer byte sequence before rIP so that the disassembler
+ * can "sync" up properly and find instruction boundaries when decoding the
+ * opcode bytes.
+ *
+ * Thus, the 2/3rds prologue and 64 byte OPCODE_BUFSIZE is just a random
+ * guesstimate in attempt to achieve all of the above.
+ */
+void show_opcodes(u8 *rip, const char *loglvl)
+{
+ unsigned int code_prologue = OPCODE_BUFSIZE * 2 / 3;
+ u8 opcodes[OPCODE_BUFSIZE];
+ u8 *ip;
+ int i;
+
+ printk("%sCode: ", loglvl);
+
+ ip = (u8 *)rip - code_prologue;
+ if (probe_kernel_read(opcodes, ip, OPCODE_BUFSIZE)) {
+ pr_cont("Bad RIP value.\n");
+ return;
+ }
+
+ for (i = 0; i < OPCODE_BUFSIZE; i++, ip++) {
+ if (ip == rip)
+ pr_cont("<%02x> ", opcodes[i]);
+ else
+ pr_cont("%02x ", opcodes[i]);
+ }
+ pr_cont("\n");
+}
+
+void show_ip(struct pt_regs *regs, const char *loglvl)
+{
+#ifdef CONFIG_X86_32
+ printk("%sEIP: %pS\n", loglvl, (void *)regs->ip);
+#else
+ printk("%sRIP: %04x:%pS\n", loglvl, (int)regs->cs, (void *)regs->ip);
+#endif
+ show_opcodes((u8 *)regs->ip, loglvl);
+}
+
void show_iret_regs(struct pt_regs *regs)
{
- printk(KERN_DEFAULT "RIP: %04x:%pS\n", (int)regs->cs, (void *)regs->ip);
+ show_ip(regs, KERN_DEFAULT);
printk(KERN_DEFAULT "RSP: %04x:%016lx EFLAGS: %08lx", (int)regs->ss,
regs->sp, regs->flags);
}
@@ -267,7 +323,6 @@ unsigned long oops_begin(void)
bust_spinlocks(1);
return flags;
}
-EXPORT_SYMBOL_GPL(oops_begin);
NOKPROBE_SYMBOL(oops_begin);
void __noreturn rewind_stack_do_exit(int signr);
@@ -287,6 +342,9 @@ void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
raw_local_irq_restore(flags);
oops_exit();
+ /* Executive summary in case the oops scrolled away */
+ __show_regs(&exec_summary_regs, true);
+
if (!signr)
return;
if (in_interrupt())
@@ -305,10 +363,10 @@ NOKPROBE_SYMBOL(oops_end);
int __die(const char *str, struct pt_regs *regs, long err)
{
-#ifdef CONFIG_X86_32
- unsigned short ss;
- unsigned long sp;
-#endif
+ /* Save the regs of the first oops for the executive summary later. */
+ if (!die_counter)
+ exec_summary_regs = *regs;
+
printk(KERN_DEFAULT
"%s: %04lx [#%d]%s%s%s%s%s\n", str, err & 0xffff, ++die_counter,
IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "",
@@ -318,26 +376,13 @@ int __die(const char *str, struct pt_regs *regs, long err)
IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION) ?
(boot_cpu_has(X86_FEATURE_PTI) ? " PTI" : " NOPTI") : "");
+ show_regs(regs);
+ print_modules();
+
if (notify_die(DIE_OOPS, str, regs, err,
current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP)
return 1;
- print_modules();
- show_regs(regs);
-#ifdef CONFIG_X86_32
- if (user_mode(regs)) {
- sp = regs->sp;
- ss = regs->ss;
- } else {
- sp = kernel_stack_pointer(regs);
- savesegment(ss, ss);
- }
- printk(KERN_EMERG "EIP: %pS SS:ESP: %04x:%08lx\n",
- (void *)regs->ip, ss, sp);
-#else
- /* Executive summary in case the oops scrolled away */
- printk(KERN_ALERT "RIP: %pS RSP: %016lx\n", (void *)regs->ip, regs->sp);
-#endif
return 0;
}
NOKPROBE_SYMBOL(__die);
@@ -356,30 +401,9 @@ void die(const char *str, struct pt_regs *regs, long err)
oops_end(flags, regs, sig);
}
-static int __init code_bytes_setup(char *s)
-{
- ssize_t ret;
- unsigned long val;
-
- if (!s)
- return -EINVAL;
-
- ret = kstrtoul(s, 0, &val);
- if (ret)
- return ret;
-
- code_bytes = val;
- if (code_bytes > 8192)
- code_bytes = 8192;
-
- return 1;
-}
-__setup("code_bytes=", code_bytes_setup);
-
void show_regs(struct pt_regs *regs)
{
bool all = true;
- int i;
show_regs_print_info(KERN_DEFAULT);
@@ -389,36 +413,8 @@ void show_regs(struct pt_regs *regs)
__show_regs(regs, all);
/*
- * When in-kernel, we also print out the stack and code at the
- * time of the fault..
+ * When in-kernel, we also print out the stack at the time of the fault..
*/
- if (!user_mode(regs)) {
- unsigned int code_prologue = code_bytes * 43 / 64;
- unsigned int code_len = code_bytes;
- unsigned char c;
- u8 *ip;
-
+ if (!user_mode(regs))
show_trace_log_lvl(current, regs, NULL, KERN_DEFAULT);
-
- printk(KERN_DEFAULT "Code: ");
-
- ip = (u8 *)regs->ip - code_prologue;
- if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
- /* try starting at IP */
- ip = (u8 *)regs->ip;
- code_len = code_len - code_prologue + 1;
- }
- for (i = 0; i < code_len; i++, ip++) {
- if (ip < (u8 *)PAGE_OFFSET ||
- probe_kernel_address(ip, c)) {
- pr_cont(" Bad RIP value.");
- break;
- }
- if (ip == (u8 *)regs->ip)
- pr_cont("<%02x> ", c);
- else
- pr_cont("%02x ", c);
- }
- }
- pr_cont("\n");
}
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 6a2cb1442e05..d1f25c831447 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -155,7 +155,8 @@ static void __init __e820__range_add(struct e820_table *table, u64 start, u64 si
int x = table->nr_entries;
if (x >= ARRAY_SIZE(table->entries)) {
- pr_err("e820: too many entries; ignoring [mem %#010llx-%#010llx]\n", start, start + size - 1);
+ pr_err("too many entries; ignoring [mem %#010llx-%#010llx]\n",
+ start, start + size - 1);
return;
}
@@ -190,9 +191,10 @@ void __init e820__print_table(char *who)
int i;
for (i = 0; i < e820_table->nr_entries; i++) {
- pr_info("%s: [mem %#018Lx-%#018Lx] ", who,
- e820_table->entries[i].addr,
- e820_table->entries[i].addr + e820_table->entries[i].size - 1);
+ pr_info("%s: [mem %#018Lx-%#018Lx] ",
+ who,
+ e820_table->entries[i].addr,
+ e820_table->entries[i].addr + e820_table->entries[i].size - 1);
e820_print_type(e820_table->entries[i].type);
pr_cont("\n");
@@ -574,7 +576,7 @@ void __init e820__update_table_print(void)
if (e820__update_table(e820_table))
return;
- pr_info("e820: modified physical RAM map:\n");
+ pr_info("modified physical RAM map:\n");
e820__print_table("modified");
}
@@ -636,9 +638,8 @@ __init void e820__setup_pci_gap(void)
if (!found) {
#ifdef CONFIG_X86_64
gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024;
- pr_err(
- "e820: Cannot find an available gap in the 32-bit address range\n"
- "e820: PCI devices with unassigned 32-bit BARs may not work!\n");
+ pr_err("Cannot find an available gap in the 32-bit address range\n");
+ pr_err("PCI devices with unassigned 32-bit BARs may not work!\n");
#else
gapstart = 0x10000000;
#endif
@@ -649,7 +650,8 @@ __init void e820__setup_pci_gap(void)
*/
pci_mem_start = gapstart;
- pr_info("e820: [mem %#010lx-%#010lx] available for PCI devices\n", gapstart, gapstart + gapsize - 1);
+ pr_info("[mem %#010lx-%#010lx] available for PCI devices\n",
+ gapstart, gapstart + gapsize - 1);
}
/*
@@ -711,7 +713,7 @@ void __init e820__memory_setup_extended(u64 phys_addr, u32 data_len)
memcpy(e820_table_firmware, e820_table, sizeof(*e820_table_firmware));
early_memunmap(sdata, data_len);
- pr_info("e820: extended physical RAM map:\n");
+ pr_info("extended physical RAM map:\n");
e820__print_table("extended");
}
@@ -780,7 +782,7 @@ u64 __init e820__memblock_alloc_reserved(u64 size, u64 align)
addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
if (addr) {
e820__range_update_kexec(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED);
- pr_info("e820: update e820_table_kexec for e820__memblock_alloc_reserved()\n");
+ pr_info("update e820_table_kexec for e820__memblock_alloc_reserved()\n");
e820__update_table_kexec();
}
@@ -830,8 +832,8 @@ static unsigned long __init e820_end_pfn(unsigned long limit_pfn, enum e820_type
if (last_pfn > max_arch_pfn)
last_pfn = max_arch_pfn;
- pr_info("e820: last_pfn = %#lx max_arch_pfn = %#lx\n",
- last_pfn, max_arch_pfn);
+ pr_info("last_pfn = %#lx max_arch_pfn = %#lx\n",
+ last_pfn, max_arch_pfn);
return last_pfn;
}
@@ -1005,7 +1007,7 @@ void __init e820__finish_early_params(void)
if (e820__update_table(e820_table) < 0)
early_panic("Invalid user supplied memory map");
- pr_info("e820: user-defined physical RAM map:\n");
+ pr_info("user-defined physical RAM map:\n");
e820__print_table("user");
}
}
@@ -1238,7 +1240,7 @@ void __init e820__memory_setup(void)
memcpy(e820_table_kexec, e820_table, sizeof(*e820_table_kexec));
memcpy(e820_table_firmware, e820_table, sizeof(*e820_table_firmware));
- pr_info("e820: BIOS-provided physical RAM map:\n");
+ pr_info("BIOS-provided physical RAM map:\n");
e820__print_table(who);
}
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index bae0d32e327b..da5d8ac60062 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -28,8 +28,6 @@
#include <asm/irq_remapping.h>
#include <asm/early_ioremap.h>
-#define dev_err(msg) pr_err("pci 0000:%02x:%02x.%d: %s", bus, slot, func, msg)
-
static void __init fix_hypertransport_config(int num, int slot, int func)
{
u32 htcfg;
@@ -617,7 +615,8 @@ static void __init apple_airport_reset(int bus, int slot, int func)
pmcsr = read_pci_config_16(bus, slot, func, BCM4331_PM_CAP + PCI_PM_CTRL);
if ((pmcsr & PCI_PM_CTRL_STATE_MASK) != PCI_D0) {
- dev_err("Cannot power up Apple AirPort card\n");
+ pr_err("pci 0000:%02x:%02x.%d: Cannot power up Apple AirPort card\n",
+ bus, slot, func);
return;
}
}
@@ -628,7 +627,8 @@ static void __init apple_airport_reset(int bus, int slot, int func)
mmio = early_ioremap(addr, BCM4331_MMIO_SIZE);
if (!mmio) {
- dev_err("Cannot iomap Apple AirPort card\n");
+ pr_err("pci 0000:%02x:%02x.%d: Cannot iomap Apple AirPort card\n",
+ bus, slot, func);
return;
}
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 0c408f8c4ed4..a21d6ace648e 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -6,6 +6,10 @@
*/
#define DISABLE_BRANCH_PROFILING
+
+/* cpu_feature_enabled() cannot be used this early */
+#define USE_EARLY_PGTABLE_L5
+
#include <linux/init.h>
#include <linux/linkage.h>
#include <linux/types.h>
@@ -32,11 +36,6 @@
#include <asm/microcode.h>
#include <asm/kasan.h>
-#ifdef CONFIG_X86_5LEVEL
-#undef pgtable_l5_enabled
-#define pgtable_l5_enabled __pgtable_l5_enabled
-#endif
-
/*
* Manage page tables very early on.
*/
@@ -45,8 +44,7 @@ static unsigned int __initdata next_early_pgt;
pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);
#ifdef CONFIG_X86_5LEVEL
-unsigned int __pgtable_l5_enabled __ro_after_init;
-EXPORT_SYMBOL(__pgtable_l5_enabled);
+unsigned int __pgtable_l5_enabled __initdata;
unsigned int pgdir_shift __ro_after_init = 39;
EXPORT_SYMBOL(pgdir_shift);
unsigned int ptrs_per_p4d __ro_after_init = 1;
@@ -82,13 +80,14 @@ static unsigned int __head *fixup_int(void *ptr, unsigned long physaddr)
static bool __head check_la57_support(unsigned long physaddr)
{
- if (native_cpuid_eax(0) < 7)
- return false;
-
- if (!(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31))))
+ /*
+ * 5-level paging is detected and enabled at kernel decomression
+ * stage. Only check if it has been enabled there.
+ */
+ if (!(native_read_cr4() & X86_CR4_LA57))
return false;
- *fixup_int(&pgtable_l5_enabled, physaddr) = 1;
+ *fixup_int(&__pgtable_l5_enabled, physaddr) = 1;
*fixup_int(&pgdir_shift, physaddr) = 48;
*fixup_int(&ptrs_per_p4d, physaddr) = 512;
*fixup_long(&page_offset_base, physaddr) = __PAGE_OFFSET_BASE_L5;
@@ -104,6 +103,12 @@ static bool __head check_la57_support(unsigned long physaddr)
}
#endif
+/* Code in __startup_64() can be relocated during execution, but the compiler
+ * doesn't have to generate PC-relative relocations when accessing globals from
+ * that function. Clang actually does not generate them, which leads to
+ * boot-time crashes. To work around this problem, every global pointer must
+ * be adjusted using fixup_pointer().
+ */
unsigned long __head __startup_64(unsigned long physaddr,
struct boot_params *bp)
{
@@ -113,6 +118,7 @@ unsigned long __head __startup_64(unsigned long physaddr,
p4dval_t *p4d;
pudval_t *pud;
pmdval_t *pmd, pmd_entry;
+ pteval_t *mask_ptr;
bool la57;
int i;
unsigned int *next_pgt_ptr;
@@ -196,7 +202,8 @@ unsigned long __head __startup_64(unsigned long physaddr,
pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL;
/* Filter out unsupported __PAGE_KERNEL_* bits: */
- pmd_entry &= __supported_pte_mask;
+ mask_ptr = fixup_pointer(&__supported_pte_mask, physaddr);
+ pmd_entry &= *mask_ptr;
pmd_entry += sme_get_me_mask();
pmd_entry += physaddr;
@@ -273,7 +280,7 @@ again:
* critical -- __PAGE_OFFSET would point us back into the dynamic
* range and we might end up looping forever...
*/
- if (!pgtable_l5_enabled)
+ if (!pgtable_l5_enabled())
p4d_p = pgd_p;
else if (pgd)
p4d_p = (p4dval_t *)((pgd & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 8ce4212e2b8d..b6be34ee88e9 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -975,8 +975,7 @@ int __init hpet_enable(void)
cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY);
hpet_writel(cfg, HPET_CFG);
if (cfg)
- pr_warn("HPET: Unrecognized bits %#x set in global cfg\n",
- cfg);
+ pr_warn("Unrecognized bits %#x set in global cfg\n", cfg);
for (i = 0; i <= last; ++i) {
cfg = hpet_readl(HPET_Tn_CFG(i));
@@ -988,7 +987,7 @@ int __init hpet_enable(void)
| HPET_TN_64BIT_CAP | HPET_TN_32BIT | HPET_TN_ROUTE
| HPET_TN_FSB | HPET_TN_FSB_CAP);
if (cfg)
- pr_warn("HPET: Unrecognized bits %#x set in cfg#%u\n",
+ pr_warn("Unrecognized bits %#x set in cfg#%u\n",
cfg, i);
}
hpet_print_config();
diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c
index fa183a131edc..108c48d0d40e 100644
--- a/arch/x86/kernel/jailhouse.c
+++ b/arch/x86/kernel/jailhouse.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL2.0
+// SPDX-License-Identifier: GPL-2.0
/*
* Jailhouse paravirt_ops implementation
*
@@ -37,7 +37,7 @@ static uint32_t __init jailhouse_detect(void)
return jailhouse_cpuid_base();
}
-static void jailhouse_get_wallclock(struct timespec *now)
+static void jailhouse_get_wallclock(struct timespec64 *now)
{
memset(now, 0, sizeof(*now));
}
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
index 3182908b7e6c..7326078eaa7a 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -398,11 +398,10 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
* little bit simple
*/
efi_map_sz = efi_get_runtime_map_size();
- efi_map_sz = ALIGN(efi_map_sz, 16);
params_cmdline_sz = sizeof(struct boot_params) + cmdline_len +
MAX_ELFCOREHDR_STR_LEN;
params_cmdline_sz = ALIGN(params_cmdline_sz, 16);
- kbuf.bufsz = params_cmdline_sz + efi_map_sz +
+ kbuf.bufsz = params_cmdline_sz + ALIGN(efi_map_sz, 16) +
sizeof(struct setup_data) +
sizeof(struct efi_setup_data);
@@ -410,7 +409,7 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
if (!params)
return ERR_PTR(-ENOMEM);
efi_map_offset = params_cmdline_sz;
- efi_setup_data_offset = efi_map_offset + efi_map_sz;
+ efi_setup_data_offset = efi_map_offset + ALIGN(efi_map_sz, 16);
/* Copy setup header onto bootparams. Documentation/x86/boot.txt */
setup_header_size = 0x0202 + kernel[0x0201] - setup_hdr_offset;
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 0715f827607c..6f4d42377fe5 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -370,6 +370,10 @@ int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn)
if (insn->opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
return 0;
+ /* We should not singlestep on the exception masking instructions */
+ if (insn_masking_exception(insn))
+ return 0;
+
#ifdef CONFIG_X86_64
/* Only x86_64 has RIP relative instructions */
if (insn_rip_relative(insn)) {
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 7867417cfaff..5b2300b818af 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -457,7 +457,7 @@ static void __init sev_map_percpu_data(void)
static void __init kvm_smp_prepare_cpus(unsigned int max_cpus)
{
native_smp_prepare_cpus(max_cpus);
- if (kvm_para_has_hint(KVM_HINTS_DEDICATED))
+ if (kvm_para_has_hint(KVM_HINTS_REALTIME))
static_branch_disable(&virt_spin_lock_key);
}
@@ -553,7 +553,7 @@ static void __init kvm_guest_init(void)
}
if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
- !kvm_para_has_hint(KVM_HINTS_DEDICATED) &&
+ !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
kvm_para_has_feature(KVM_FEATURE_STEAL_TIME))
pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others;
@@ -649,7 +649,7 @@ static __init int kvm_setup_pv_tlb_flush(void)
int cpu;
if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
- !kvm_para_has_hint(KVM_HINTS_DEDICATED) &&
+ !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
for_each_possible_cpu(cpu) {
zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu),
@@ -745,7 +745,7 @@ void __init kvm_spinlock_init(void)
if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
return;
- if (kvm_para_has_hint(KVM_HINTS_DEDICATED))
+ if (kvm_para_has_hint(KVM_HINTS_REALTIME))
return;
__pv_init_lock_hash();
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 8b26c9e01cc4..bf8d1eb7fca3 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -53,7 +53,7 @@ static struct pvclock_wall_clock *wall_clock;
* have elapsed since the hypervisor wrote the data. So we try to account for
* that with system time
*/
-static void kvm_get_wallclock(struct timespec *now)
+static void kvm_get_wallclock(struct timespec64 *now)
{
struct pvclock_vcpu_time_info *vcpu_time;
int low, high;
@@ -72,7 +72,7 @@ static void kvm_get_wallclock(struct timespec *now)
put_cpu();
}
-static int kvm_set_wallclock(const struct timespec *now)
+static int kvm_set_wallclock(const struct timespec64 *now)
{
return -ENODEV;
}
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index d41d896481b8..c9b14020f4dd 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -166,7 +166,7 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
*/
pte_prot = __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL);
/* Filter out unsuppored __PAGE_KERNEL* bits: */
- pgprot_val(pte_prot) |= __supported_pte_mask;
+ pgprot_val(pte_prot) &= __supported_pte_mask;
pte = pfn_pte(pfn, pte_prot);
set_pte_at(mm, va, ptep, pte);
pte_unmap_unlock(ptep, ptl);
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 60cdec6628b0..d1ab07ec8c9a 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -57,12 +57,17 @@ static void load_segments(void)
static void machine_kexec_free_page_tables(struct kimage *image)
{
free_page((unsigned long)image->arch.pgd);
+ image->arch.pgd = NULL;
#ifdef CONFIG_X86_PAE
free_page((unsigned long)image->arch.pmd0);
+ image->arch.pmd0 = NULL;
free_page((unsigned long)image->arch.pmd1);
+ image->arch.pmd1 = NULL;
#endif
free_page((unsigned long)image->arch.pte0);
+ image->arch.pte0 = NULL;
free_page((unsigned long)image->arch.pte1);
+ image->arch.pte1 = NULL;
}
static int machine_kexec_alloc_page_tables(struct kimage *image)
@@ -79,7 +84,6 @@ static int machine_kexec_alloc_page_tables(struct kimage *image)
!image->arch.pmd0 || !image->arch.pmd1 ||
#endif
!image->arch.pte0 || !image->arch.pte1) {
- machine_kexec_free_page_tables(image);
return -ENOMEM;
}
return 0;
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index a5e55d832d0a..4c8acdfdc5a7 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -39,9 +39,13 @@ const struct kexec_file_ops * const kexec_file_loaders[] = {
static void free_transition_pgtable(struct kimage *image)
{
free_page((unsigned long)image->arch.p4d);
+ image->arch.p4d = NULL;
free_page((unsigned long)image->arch.pud);
+ image->arch.pud = NULL;
free_page((unsigned long)image->arch.pmd);
+ image->arch.pmd = NULL;
free_page((unsigned long)image->arch.pte);
+ image->arch.pte = NULL;
}
static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
@@ -91,7 +95,6 @@ static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC_NOENC));
return 0;
err:
- free_transition_pgtable(image);
return result;
}
@@ -351,7 +354,8 @@ void arch_crash_save_vmcoreinfo(void)
{
VMCOREINFO_NUMBER(phys_base);
VMCOREINFO_SYMBOL(init_top_pgt);
- VMCOREINFO_NUMBER(pgtable_l5_enabled);
+ vmcoreinfo_append_str("NUMBER(pgtable_l5_enabled)=%d\n",
+ pgtable_l5_enabled());
#ifdef CONFIG_NUMA
VMCOREINFO_SYMBOL(node_data);
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 77625b60a510..ab5d9dd668d2 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -15,13 +15,11 @@
#include <asm/x86_init.h>
#include <asm/iommu_table.h>
-static int forbid_dac __read_mostly;
+static bool disable_dac_quirk __read_mostly;
const struct dma_map_ops *dma_ops = &dma_direct_ops;
EXPORT_SYMBOL(dma_ops);
-static int iommu_sac_force __read_mostly;
-
#ifdef CONFIG_IOMMU_DEBUG
int panic_on_overflow __read_mostly = 1;
int force_iommu __read_mostly = 1;
@@ -55,9 +53,6 @@ struct device x86_dma_fallback_dev = {
};
EXPORT_SYMBOL(x86_dma_fallback_dev);
-/* Number of entries preallocated for DMA-API debugging */
-#define PREALLOC_DMA_DEBUG_ENTRIES 65536
-
void __init pci_iommu_alloc(void)
{
struct iommu_table_entry *p;
@@ -76,7 +71,7 @@ void __init pci_iommu_alloc(void)
}
}
-bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp)
+bool arch_dma_alloc_attrs(struct device **dev)
{
if (!*dev)
*dev = &x86_dma_fallback_dev;
@@ -125,13 +120,13 @@ static __init int iommu_setup(char *p)
if (!strncmp(p, "nomerge", 7))
iommu_merge = 0;
if (!strncmp(p, "forcesac", 8))
- iommu_sac_force = 1;
+ pr_warn("forcesac option ignored.\n");
if (!strncmp(p, "allowdac", 8))
- forbid_dac = 0;
+ pr_warn("allowdac option ignored.\n");
if (!strncmp(p, "nodac", 5))
- forbid_dac = 1;
+ pr_warn("nodac option ignored.\n");
if (!strncmp(p, "usedac", 6)) {
- forbid_dac = -1;
+ disable_dac_quirk = true;
return 1;
}
#ifdef CONFIG_SWIOTLB
@@ -156,40 +151,9 @@ static __init int iommu_setup(char *p)
}
early_param("iommu", iommu_setup);
-int arch_dma_supported(struct device *dev, u64 mask)
-{
-#ifdef CONFIG_PCI
- if (mask > 0xffffffff && forbid_dac > 0) {
- dev_info(dev, "PCI: Disallowing DAC for device\n");
- return 0;
- }
-#endif
-
- /* Tell the device to use SAC when IOMMU force is on. This
- allows the driver to use cheaper accesses in some cases.
-
- Problem with this is that if we overflow the IOMMU area and
- return DAC as fallback address the device may not handle it
- correctly.
-
- As a special case some controllers have a 39bit address
- mode that is as efficient as 32bit (aic79xx). Don't force
- SAC for these. Assume all masks <= 40 bits are of this
- type. Normally this doesn't make any difference, but gives
- more gentle handling of IOMMU overflow. */
- if (iommu_sac_force && (mask >= DMA_BIT_MASK(40))) {
- dev_info(dev, "Force SAC with mask %Lx\n", mask);
- return 0;
- }
-
- return 1;
-}
-EXPORT_SYMBOL(arch_dma_supported);
-
static int __init pci_iommu_init(void)
{
struct iommu_table_entry *p;
- dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
#ifdef CONFIG_PCI
dma_debug_add_bus(&pci_bus_type);
@@ -209,11 +173,17 @@ rootfs_initcall(pci_iommu_init);
#ifdef CONFIG_PCI
/* Many VIA bridges seem to corrupt data for DAC. Disable it here */
+static int via_no_dac_cb(struct pci_dev *pdev, void *data)
+{
+ pdev->dev.dma_32bit_limit = true;
+ return 0;
+}
+
static void via_no_dac(struct pci_dev *dev)
{
- if (forbid_dac == 0) {
+ if (!disable_dac_quirk) {
dev_info(&dev->dev, "disabling DAC on VIA PCI bridge\n");
- forbid_dac = 1;
+ pci_walk_bus(dev->subordinate, via_no_dac_cb, NULL);
}
}
DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID,
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
deleted file mode 100644
index ac7ea3a8242f..000000000000
--- a/arch/x86/kernel/pci-nommu.c
+++ /dev/null
@@ -1,90 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Fallback functions when the main IOMMU code is not compiled in. This
- code is roughly equivalent to i386. */
-#include <linux/dma-direct.h>
-#include <linux/scatterlist.h>
-#include <linux/string.h>
-#include <linux/gfp.h>
-#include <linux/pci.h>
-#include <linux/mm.h>
-
-#include <asm/processor.h>
-#include <asm/iommu.h>
-#include <asm/dma.h>
-
-#define NOMMU_MAPPING_ERROR 0
-
-static int
-check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size)
-{
- if (hwdev && !dma_capable(hwdev, bus, size)) {
- if (*hwdev->dma_mask >= DMA_BIT_MASK(32))
- printk(KERN_ERR
- "nommu_%s: overflow %Lx+%zu of device mask %Lx\n",
- name, (long long)bus, size,
- (long long)*hwdev->dma_mask);
- return 0;
- }
- return 1;
-}
-
-static dma_addr_t nommu_map_page(struct device *dev, struct page *page,
- unsigned long offset, size_t size,
- enum dma_data_direction dir,
- unsigned long attrs)
-{
- dma_addr_t bus = phys_to_dma(dev, page_to_phys(page)) + offset;
- WARN_ON(size == 0);
- if (!check_addr("map_single", dev, bus, size))
- return NOMMU_MAPPING_ERROR;
- return bus;
-}
-
-/* Map a set of buffers described by scatterlist in streaming
- * mode for DMA. This is the scatter-gather version of the
- * above pci_map_single interface. Here the scatter gather list
- * elements are each tagged with the appropriate dma address
- * and length. They are obtained via sg_dma_{address,length}(SG).
- *
- * NOTE: An implementation may be able to use a smaller number of
- * DMA address/length pairs than there are SG table elements.
- * (for example via virtual mapping capabilities)
- * The routine returns the number of addr/length pairs actually
- * used, at most nents.
- *
- * Device ownership issues as mentioned above for pci_map_single are
- * the same here.
- */
-static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
- int nents, enum dma_data_direction dir,
- unsigned long attrs)
-{
- struct scatterlist *s;
- int i;
-
- WARN_ON(nents == 0 || sg[0].length == 0);
-
- for_each_sg(sg, s, nents, i) {
- BUG_ON(!sg_page(s));
- s->dma_address = sg_phys(s);
- if (!check_addr("map_sg", hwdev, s->dma_address, s->length))
- return 0;
- s->dma_length = s->length;
- }
- return nents;
-}
-
-static int nommu_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
- return dma_addr == NOMMU_MAPPING_ERROR;
-}
-
-const struct dma_map_ops nommu_dma_ops = {
- .alloc = dma_generic_alloc_coherent,
- .free = dma_generic_free_coherent,
- .map_sg = nommu_map_sg,
- .map_page = nommu_map_page,
- .is_phys = 1,
- .mapping_error = nommu_mapping_error,
- .dma_supported = x86_dma_supported,
-};
diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c
index e47b2dbbdef3..c06c4c16c6b6 100644
--- a/arch/x86/kernel/perf_regs.c
+++ b/arch/x86/kernel/perf_regs.c
@@ -151,17 +151,19 @@ void perf_get_regs_user(struct perf_regs *regs_user,
regs_user_copy->sp = user_regs->sp;
regs_user_copy->cs = user_regs->cs;
regs_user_copy->ss = user_regs->ss;
-
/*
- * Most system calls don't save these registers, don't report them.
+ * Store user space frame-pointer value on sample
+ * to facilitate stack unwinding for cases when
+ * user space executable code has such support
+ * enabled at compile time:
*/
+ regs_user_copy->bp = user_regs->bp;
+
regs_user_copy->bx = -1;
- regs_user_copy->bp = -1;
regs_user_copy->r12 = -1;
regs_user_copy->r13 = -1;
regs_user_copy->r14 = -1;
regs_user_copy->r15 = -1;
-
/*
* For this to be at all useful, we need a reasonable guess for
* the ABI. Be careful: we're in NMI context, and we're
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 03408b942adb..30ca2d1a9231 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -38,6 +38,7 @@
#include <asm/switch_to.h>
#include <asm/desc.h>
#include <asm/prctl.h>
+#include <asm/spec-ctrl.h>
/*
* per-CPU TSS segments. Threads are completely 'soft' on Linux,
@@ -278,6 +279,148 @@ static inline void switch_to_bitmap(struct tss_struct *tss,
}
}
+#ifdef CONFIG_SMP
+
+struct ssb_state {
+ struct ssb_state *shared_state;
+ raw_spinlock_t lock;
+ unsigned int disable_state;
+ unsigned long local_state;
+};
+
+#define LSTATE_SSB 0
+
+static DEFINE_PER_CPU(struct ssb_state, ssb_state);
+
+void speculative_store_bypass_ht_init(void)
+{
+ struct ssb_state *st = this_cpu_ptr(&ssb_state);
+ unsigned int this_cpu = smp_processor_id();
+ unsigned int cpu;
+
+ st->local_state = 0;
+
+ /*
+ * Shared state setup happens once on the first bringup
+ * of the CPU. It's not destroyed on CPU hotunplug.
+ */
+ if (st->shared_state)
+ return;
+
+ raw_spin_lock_init(&st->lock);
+
+ /*
+ * Go over HT siblings and check whether one of them has set up the
+ * shared state pointer already.
+ */
+ for_each_cpu(cpu, topology_sibling_cpumask(this_cpu)) {
+ if (cpu == this_cpu)
+ continue;
+
+ if (!per_cpu(ssb_state, cpu).shared_state)
+ continue;
+
+ /* Link it to the state of the sibling: */
+ st->shared_state = per_cpu(ssb_state, cpu).shared_state;
+ return;
+ }
+
+ /*
+ * First HT sibling to come up on the core. Link shared state of
+ * the first HT sibling to itself. The siblings on the same core
+ * which come up later will see the shared state pointer and link
+ * themself to the state of this CPU.
+ */
+ st->shared_state = st;
+}
+
+/*
+ * Logic is: First HT sibling enables SSBD for both siblings in the core
+ * and last sibling to disable it, disables it for the whole core. This how
+ * MSR_SPEC_CTRL works in "hardware":
+ *
+ * CORE_SPEC_CTRL = THREAD0_SPEC_CTRL | THREAD1_SPEC_CTRL
+ */
+static __always_inline void amd_set_core_ssb_state(unsigned long tifn)
+{
+ struct ssb_state *st = this_cpu_ptr(&ssb_state);
+ u64 msr = x86_amd_ls_cfg_base;
+
+ if (!static_cpu_has(X86_FEATURE_ZEN)) {
+ msr |= ssbd_tif_to_amd_ls_cfg(tifn);
+ wrmsrl(MSR_AMD64_LS_CFG, msr);
+ return;
+ }
+
+ if (tifn & _TIF_SSBD) {
+ /*
+ * Since this can race with prctl(), block reentry on the
+ * same CPU.
+ */
+ if (__test_and_set_bit(LSTATE_SSB, &st->local_state))
+ return;
+
+ msr |= x86_amd_ls_cfg_ssbd_mask;
+
+ raw_spin_lock(&st->shared_state->lock);
+ /* First sibling enables SSBD: */
+ if (!st->shared_state->disable_state)
+ wrmsrl(MSR_AMD64_LS_CFG, msr);
+ st->shared_state->disable_state++;
+ raw_spin_unlock(&st->shared_state->lock);
+ } else {
+ if (!__test_and_clear_bit(LSTATE_SSB, &st->local_state))
+ return;
+
+ raw_spin_lock(&st->shared_state->lock);
+ st->shared_state->disable_state--;
+ if (!st->shared_state->disable_state)
+ wrmsrl(MSR_AMD64_LS_CFG, msr);
+ raw_spin_unlock(&st->shared_state->lock);
+ }
+}
+#else
+static __always_inline void amd_set_core_ssb_state(unsigned long tifn)
+{
+ u64 msr = x86_amd_ls_cfg_base | ssbd_tif_to_amd_ls_cfg(tifn);
+
+ wrmsrl(MSR_AMD64_LS_CFG, msr);
+}
+#endif
+
+static __always_inline void amd_set_ssb_virt_state(unsigned long tifn)
+{
+ /*
+ * SSBD has the same definition in SPEC_CTRL and VIRT_SPEC_CTRL,
+ * so ssbd_tif_to_spec_ctrl() just works.
+ */
+ wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn));
+}
+
+static __always_inline void intel_set_ssb_state(unsigned long tifn)
+{
+ u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn);
+
+ wrmsrl(MSR_IA32_SPEC_CTRL, msr);
+}
+
+static __always_inline void __speculative_store_bypass_update(unsigned long tifn)
+{
+ if (static_cpu_has(X86_FEATURE_VIRT_SSBD))
+ amd_set_ssb_virt_state(tifn);
+ else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD))
+ amd_set_core_ssb_state(tifn);
+ else
+ intel_set_ssb_state(tifn);
+}
+
+void speculative_store_bypass_update(unsigned long tif)
+{
+ preempt_disable();
+ __speculative_store_bypass_update(tif);
+ preempt_enable();
+}
+
void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
struct tss_struct *tss)
{
@@ -309,6 +452,9 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
if ((tifp ^ tifn) & _TIF_NOCPUID)
set_cpuid_faulting(!!(tifn & _TIF_NOCPUID));
+
+ if ((tifp ^ tifn) & _TIF_SSBD)
+ __speculative_store_bypass_update(tifn);
}
/*
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 5224c6099184..0ae659de21eb 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -76,16 +76,14 @@ void __show_regs(struct pt_regs *regs, int all)
savesegment(gs, gs);
}
- printk(KERN_DEFAULT "EIP: %pS\n", (void *)regs->ip);
- printk(KERN_DEFAULT "EFLAGS: %08lx CPU: %d\n", regs->flags,
- raw_smp_processor_id());
+ show_ip(regs, KERN_DEFAULT);
printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
regs->ax, regs->bx, regs->cx, regs->dx);
printk(KERN_DEFAULT "ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n",
regs->si, regs->di, regs->bp, sp);
- printk(KERN_DEFAULT " DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n",
- (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss);
+ printk(KERN_DEFAULT "DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x EFLAGS: %08lx\n",
+ (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss, regs->flags);
if (!all)
return;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 4b100fe0f508..12bb445fb98d 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -542,6 +542,7 @@ void set_personality_64bit(void)
clear_thread_flag(TIF_X32);
/* Pretend that this comes from a 64bit execve */
task_pt_regs(current)->orig_ax = __NR_execve;
+ current_thread_info()->status &= ~TS_COMPAT;
/* Ensure the corresponding mm is not marked. */
if (current->mm)
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index ed5c4cdf0a34..e2ee403865eb 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1377,7 +1377,6 @@ static void fill_sigtrap_info(struct task_struct *tsk,
tsk->thread.trap_nr = X86_TRAP_DB;
tsk->thread.error_code = error_code;
- memset(info, 0, sizeof(*info));
info->si_signo = SIGTRAP;
info->si_code = si_code;
info->si_addr = user_mode(regs) ? (void __user *)regs->ip : NULL;
@@ -1395,6 +1394,7 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
{
struct siginfo info;
+ clear_siginfo(&info);
fill_sigtrap_info(tsk, regs, error_code, si_code, &info);
/* Send us the fake SIGTRAP */
force_sig_info(SIGTRAP, &info, tsk);
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 761f6af6efa5..637982efecd8 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -123,28 +123,35 @@ u64 pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
struct pvclock_vcpu_time_info *vcpu_time,
- struct timespec *ts)
+ struct timespec64 *ts)
{
u32 version;
u64 delta;
- struct timespec now;
+ struct timespec64 now;
/* get wallclock at system boot */
do {
version = wall_clock->version;
rmb(); /* fetch version before time */
+ /*
+ * Note: wall_clock->sec is a u32 value, so it can
+ * only store dates between 1970 and 2106. To allow
+ * times beyond that, we need to create a new hypercall
+ * interface with an extended pvclock_wall_clock structure
+ * like ARM has.
+ */
now.tv_sec = wall_clock->sec;
now.tv_nsec = wall_clock->nsec;
rmb(); /* fetch time before checking version */
} while ((wall_clock->version & 1) || (version != wall_clock->version));
delta = pvclock_clocksource_read(vcpu_time); /* time since system boot */
- delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec;
+ delta += now.tv_sec * NSEC_PER_SEC + now.tv_nsec;
now.tv_nsec = do_div(delta, NSEC_PER_SEC);
now.tv_sec = delta;
- set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
+ set_normalized_timespec64(ts, now.tv_sec, now.tv_nsec);
}
void pvclock_set_pvti_cpu0_va(struct pvclock_vsyscall_time_info *pvti)
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index f7b82ed7b5b5..586f718b8e95 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -39,7 +39,7 @@ EXPORT_SYMBOL(rtc_lock);
* jump to the next second precisely 500 ms later. Check the Motorola
* MC146818A or Dallas DS12887 data sheet for details.
*/
-int mach_set_rtc_mmss(const struct timespec *now)
+int mach_set_rtc_mmss(const struct timespec64 *now)
{
unsigned long long nowtime = now->tv_sec;
struct rtc_time tm;
@@ -60,7 +60,7 @@ int mach_set_rtc_mmss(const struct timespec *now)
return retval;
}
-void mach_get_cmos_time(struct timespec *now)
+void mach_get_cmos_time(struct timespec64 *now)
{
unsigned int status, year, mon, day, hour, min, sec, century = 0;
unsigned long flags;
@@ -118,7 +118,7 @@ void mach_get_cmos_time(struct timespec *now)
} else
year += CMOS_YEARS_OFFS;
- now->tv_sec = mktime(year, mon, day, hour, min, sec);
+ now->tv_sec = mktime64(year, mon, day, hour, min, sec);
now->tv_nsec = 0;
}
@@ -145,13 +145,13 @@ void rtc_cmos_write(unsigned char val, unsigned char addr)
}
EXPORT_SYMBOL(rtc_cmos_write);
-int update_persistent_clock(struct timespec now)
+int update_persistent_clock64(struct timespec64 now)
{
return x86_platform.set_wallclock(&now);
}
/* not static: needed by APM */
-void read_persistent_clock(struct timespec *ts)
+void read_persistent_clock64(struct timespec64 *ts)
{
x86_platform.get_wallclock(ts);
}
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 6285697b6e56..2f86d883dd95 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -50,6 +50,7 @@
#include <linux/init_ohci1394_dma.h>
#include <linux/kvm_para.h>
#include <linux/dma-contiguous.h>
+#include <xen/xen.h>
#include <linux/errno.h>
#include <linux/kernel.h>
@@ -534,6 +535,11 @@ static void __init reserve_crashkernel(void)
high = true;
}
+ if (xen_pv_domain()) {
+ pr_info("Ignoring crashkernel for a Xen PV domain\n");
+ return;
+ }
+
/* 0 means: find the address automatically */
if (crash_base <= 0) {
/*
@@ -1306,11 +1312,3 @@ static int __init register_kernel_offset_dumper(void)
return 0;
}
__initcall(register_kernel_offset_dumper);
-
-void arch_show_smap(struct seq_file *m, struct vm_area_struct *vma)
-{
- if (!boot_cpu_has(X86_FEATURE_OSPKE))
- return;
-
- seq_printf(m, "ProtectionKey: %8u\n", vma_pkey(vma));
-}
diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c
index 14c057f29979..9ccbf0576cd0 100644
--- a/arch/x86/kernel/signal_compat.c
+++ b/arch/x86/kernel/signal_compat.c
@@ -29,7 +29,7 @@ static inline void signal_compat_build_tests(void)
BUILD_BUG_ON(NSIGFPE != 15);
BUILD_BUG_ON(NSIGSEGV != 7);
BUILD_BUG_ON(NSIGBUS != 5);
- BUILD_BUG_ON(NSIGTRAP != 4);
+ BUILD_BUG_ON(NSIGTRAP != 5);
BUILD_BUG_ON(NSIGCHLD != 6);
BUILD_BUG_ON(NSIGSYS != 1);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index ff99e2b6fc54..c2f7d1d2a5c3 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -77,13 +77,9 @@
#include <asm/i8259.h>
#include <asm/misc.h>
#include <asm/qspinlock.h>
-
-/* Number of siblings per CPU package */
-int smp_num_siblings = 1;
-EXPORT_SYMBOL(smp_num_siblings);
-
-/* Last level cache ID of each logical CPU */
-DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id) = BAD_APICID;
+#include <asm/intel-family.h>
+#include <asm/cpu_device_id.h>
+#include <asm/spec-ctrl.h>
/* representing HT siblings of each logical CPU */
DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
@@ -242,6 +238,8 @@ static void notrace start_secondary(void *unused)
*/
check_tsc_sync_target();
+ speculative_store_bypass_ht_init();
+
/*
* Lock vector_lock, set CPU online and bring the vector
* allocator online. Online must be set with vector_lock held
@@ -390,15 +388,47 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
return false;
}
+/*
+ * Define snc_cpu[] for SNC (Sub-NUMA Cluster) CPUs.
+ *
+ * These are Intel CPUs that enumerate an LLC that is shared by
+ * multiple NUMA nodes. The LLC on these systems is shared for
+ * off-package data access but private to the NUMA node (half
+ * of the package) for on-package access.
+ *
+ * CPUID (the source of the information about the LLC) can only
+ * enumerate the cache as being shared *or* unshared, but not
+ * this particular configuration. The CPU in this case enumerates
+ * the cache to be shared across the entire package (spanning both
+ * NUMA nodes).
+ */
+
+static const struct x86_cpu_id snc_cpu[] = {
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_SKYLAKE_X },
+ {}
+};
+
static bool match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
{
int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
- if (per_cpu(cpu_llc_id, cpu1) != BAD_APICID &&
- per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2))
- return topology_sane(c, o, "llc");
+ /* Do not match if we do not have a valid APICID for cpu: */
+ if (per_cpu(cpu_llc_id, cpu1) == BAD_APICID)
+ return false;
- return false;
+ /* Do not match if LLC id does not match: */
+ if (per_cpu(cpu_llc_id, cpu1) != per_cpu(cpu_llc_id, cpu2))
+ return false;
+
+ /*
+ * Allow the SNC topology without warning. Return of false
+ * means 'c' does not share the LLC of 'o'. This will be
+ * reflected to userspace.
+ */
+ if (!topology_same_node(c, o) && x86_match_cpu(snc_cpu))
+ return false;
+
+ return topology_sane(c, o, "llc");
}
/*
@@ -456,7 +486,8 @@ static struct sched_domain_topology_level x86_topology[] = {
/*
* Set if a package/die has multiple NUMA nodes inside.
- * AMD Magny-Cours and Intel Cluster-on-Die have this.
+ * AMD Magny-Cours, Intel Cluster-on-Die, and Intel
+ * Sub-NUMA Clustering have this.
*/
static bool x86_has_numa_in_package;
@@ -1257,6 +1288,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
set_mtrr_aps_delayed_init();
smp_quirk_init_udelay();
+
+ speculative_store_bypass_ht_init();
}
void arch_enable_nonboot_cpus_begin(void)
@@ -1536,6 +1569,8 @@ static inline void mwait_play_dead(void)
void *mwait_ptr;
int i;
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+ return;
if (!this_cpu_has(X86_FEATURE_MWAIT))
return;
if (!this_cpu_has(X86_FEATURE_CLFLUSH))
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index a3f15ed545b5..6a78d4b36a79 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+#include <linux/compat.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/sched/mm.h>
@@ -19,7 +20,6 @@
#include <linux/elf.h>
#include <asm/elf.h>
-#include <asm/compat.h>
#include <asm/ia32.h>
#include <asm/syscalls.h>
#include <asm/mpx.h>
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 03f3d7695dac..a535dd64de63 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -299,6 +299,7 @@ static void do_error_trap(struct pt_regs *regs, long error_code, char *str,
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) !=
NOTIFY_STOP) {
cond_local_irq_enable(regs);
+ clear_siginfo(&info);
do_trap(trapnr, signr, str, regs, error_code,
fill_trap_info(regs, signr, trapnr, &info));
}
@@ -854,6 +855,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
task->thread.trap_nr = trapnr;
task->thread.error_code = error_code;
+ clear_siginfo(&info);
info.si_signo = SIGFPE;
info.si_errno = 0;
info.si_addr = (void __user *)uprobe_get_trap_addr(regs);
@@ -929,6 +931,7 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
local_irq_enable();
+ clear_siginfo(&info);
info.si_signo = SIGILL;
info.si_errno = 0;
info.si_code = ILL_BADSTK;
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index ef32297ff17e..74392d9d51e0 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -317,7 +317,7 @@ static unsigned long calc_hpet_ref(u64 deltatsc, u64 hpet1, u64 hpet2)
hpet2 -= hpet1;
tmp = ((u64)hpet2 * hpet_readl(HPET_PERIOD));
do_div(tmp, 1000000);
- do_div(deltatsc, tmp);
+ deltatsc = div64_u64(deltatsc, tmp);
return (unsigned long) deltatsc;
}
@@ -1067,6 +1067,7 @@ static struct clocksource clocksource_tsc_early = {
.resume = tsc_resume,
.mark_unstable = tsc_cs_mark_unstable,
.tick_stable = tsc_cs_tick_stable,
+ .list = LIST_HEAD_INIT(clocksource_tsc_early.list),
};
/*
@@ -1086,6 +1087,7 @@ static struct clocksource clocksource_tsc = {
.resume = tsc_resume,
.mark_unstable = tsc_cs_mark_unstable,
.tick_stable = tsc_cs_tick_stable,
+ .list = LIST_HEAD_INIT(clocksource_tsc.list),
};
void mark_tsc_unstable(char *reason)
@@ -1098,13 +1100,9 @@ void mark_tsc_unstable(char *reason)
clear_sched_clock_stable();
disable_sched_clock_irqtime();
pr_info("Marking TSC unstable due to %s\n", reason);
- /* Change only the rating, when not registered */
- if (clocksource_tsc.mult) {
- clocksource_mark_unstable(&clocksource_tsc);
- } else {
- clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE;
- clocksource_tsc.rating = 0;
- }
+
+ clocksource_mark_unstable(&clocksource_tsc_early);
+ clocksource_mark_unstable(&clocksource_tsc);
}
EXPORT_SYMBOL_GPL(mark_tsc_unstable);
@@ -1244,7 +1242,7 @@ static void tsc_refine_calibration_work(struct work_struct *work)
/* Don't bother refining TSC on unstable systems */
if (tsc_unstable)
- return;
+ goto unreg;
/*
* Since the work is started early in boot, we may be
@@ -1297,11 +1295,12 @@ static void tsc_refine_calibration_work(struct work_struct *work)
out:
if (tsc_unstable)
- return;
+ goto unreg;
if (boot_cpu_has(X86_FEATURE_ART))
art_related_clocksource = &clocksource_tsc;
clocksource_register_khz(&clocksource_tsc, tsc_khz);
+unreg:
clocksource_unregister(&clocksource_tsc_early);
}
@@ -1311,8 +1310,8 @@ static int __init init_tsc_clocksource(void)
if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_disabled > 0 || !tsc_khz)
return 0;
- if (check_tsc_unstable())
- return 0;
+ if (tsc_unstable)
+ goto unreg;
if (tsc_clocksource_reliable)
clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
@@ -1328,6 +1327,7 @@ static int __init init_tsc_clocksource(void)
if (boot_cpu_has(X86_FEATURE_ART))
art_related_clocksource = &clocksource_tsc;
clocksource_register_khz(&clocksource_tsc, tsc_khz);
+unreg:
clocksource_unregister(&clocksource_tsc_early);
return 0;
}
diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c
index f44ce0fb3583..ff20b35e98dd 100644
--- a/arch/x86/kernel/umip.c
+++ b/arch/x86/kernel/umip.c
@@ -278,6 +278,7 @@ static void force_sig_info_umip_fault(void __user *addr, struct pt_regs *regs)
tsk->thread.error_code = X86_PF_USER | X86_PF_WRITE;
tsk->thread.trap_nr = X86_TRAP_PF;
+ clear_siginfo(&info);
info.si_signo = SIGSEGV;
info.si_errno = 0;
info.si_code = SEGV_MAPERR;
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 85c7ef23d99f..58d8d800875d 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -299,6 +299,10 @@ static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool
if (is_prefix_bad(insn))
return -ENOTSUPP;
+ /* We should not singlestep on the exception masking instructions */
+ if (insn_masking_exception(insn))
+ return -ENOTSUPP;
+
if (x86_64)
good_insns = good_insns_64;
else
@@ -1079,8 +1083,8 @@ arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs
return orig_ret_vaddr;
if (nleft != rasize) {
- pr_err("uprobe: return address clobbered: pid=%d, %%sp=%#lx, "
- "%%ip=%#lx\n", current->pid, regs->sp, regs->ip);
+ pr_err("return address clobbered: pid=%d, %%sp=%#lx, %%ip=%#lx\n",
+ current->pid, regs->sp, regs->ip);
force_sig_info(SIGSEGV, SEND_SIG_FORCED, current);
}
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 795f3a80e576..5e1458f609a1 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -117,11 +117,11 @@ SECTIONS
#ifdef CONFIG_X86_64
. = ALIGN(PAGE_SIZE);
- VMLINUX_SYMBOL(__entry_trampoline_start) = .;
+ __entry_trampoline_start = .;
_entry_trampoline = .;
*(.entry_trampoline)
. = ALIGN(PAGE_SIZE);
- VMLINUX_SYMBOL(__entry_trampoline_end) = .;
+ __entry_trampoline_end = .;
ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big");
#endif
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 82055b90a8b3..92bf2f2e7cdd 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -379,7 +379,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
/* cpuid 0x80000008.ebx */
const u32 kvm_cpuid_8000_0008_ebx_x86_features =
- F(IBPB) | F(IBRS);
+ F(AMD_IBPB) | F(AMD_IBRS) | F(VIRT_SSBD);
/* cpuid 0xC0000001.edx */
const u32 kvm_cpuid_C000_0001_edx_x86_features =
@@ -408,7 +408,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
/* cpuid 7.0.edx*/
const u32 kvm_cpuid_7_0_edx_x86_features =
F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
- F(ARCH_CAPABILITIES);
+ F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES);
/* all calls to cpuid_count() should be made on the same cpu */
get_cpu();
@@ -495,6 +495,11 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
entry->ecx &= ~F(PKU);
entry->edx &= kvm_cpuid_7_0_edx_x86_features;
cpuid_mask(&entry->edx, CPUID_7_EDX);
+ /*
+ * We emulate ARCH_CAPABILITIES in software even
+ * if the host doesn't support it.
+ */
+ entry->edx |= F(ARCH_CAPABILITIES);
} else {
entry->ebx = 0;
entry->ecx = 0;
@@ -647,13 +652,20 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
g_phys_as = phys_as;
entry->eax = g_phys_as | (virt_as << 8);
entry->edx = 0;
- /* IBRS and IBPB aren't necessarily present in hardware cpuid */
- if (boot_cpu_has(X86_FEATURE_IBPB))
- entry->ebx |= F(IBPB);
- if (boot_cpu_has(X86_FEATURE_IBRS))
- entry->ebx |= F(IBRS);
+ /*
+ * IBRS, IBPB and VIRT_SSBD aren't necessarily present in
+ * hardware cpuid
+ */
+ if (boot_cpu_has(X86_FEATURE_AMD_IBPB))
+ entry->ebx |= F(AMD_IBPB);
+ if (boot_cpu_has(X86_FEATURE_AMD_IBRS))
+ entry->ebx |= F(AMD_IBRS);
+ if (boot_cpu_has(X86_FEATURE_VIRT_SSBD))
+ entry->ebx |= F(VIRT_SSBD);
entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features;
cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX);
+ if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD))
+ entry->ebx |= F(VIRT_SSBD);
break;
}
case 0x80000019:
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 98618e397342..46ff64da44ca 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1260,12 +1260,16 @@ static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
}
}
-static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
+static int kvm_hv_hypercall_complete(struct kvm_vcpu *vcpu, u64 result)
{
- struct kvm_run *run = vcpu->run;
+ kvm_hv_hypercall_set_result(vcpu, result);
+ ++vcpu->stat.hypercalls;
+ return kvm_skip_emulated_instruction(vcpu);
+}
- kvm_hv_hypercall_set_result(vcpu, run->hyperv.u.hcall.result);
- return 1;
+static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
+{
+ return kvm_hv_hypercall_complete(vcpu, vcpu->run->hyperv.u.hcall.result);
}
static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param)
@@ -1296,8 +1300,10 @@ static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param)
if (param & ~KVM_HYPERV_CONN_ID_MASK)
return HV_STATUS_INVALID_HYPERCALL_INPUT;
- /* conn_to_evt is protected by vcpu->kvm->srcu */
+ /* the eventfd is protected by vcpu->kvm->srcu, but conn_to_evt isn't */
+ rcu_read_lock();
eventfd = idr_find(&vcpu->kvm->arch.hyperv.conn_to_evt, param);
+ rcu_read_unlock();
if (!eventfd)
return HV_STATUS_INVALID_PORT_ID;
@@ -1348,7 +1354,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
/* Hypercall continuation is not supported yet */
if (rep_cnt || rep_idx) {
ret = HV_STATUS_INVALID_HYPERCALL_CODE;
- goto set_result;
+ goto out;
}
switch (code) {
@@ -1379,9 +1385,8 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
break;
}
-set_result:
- kvm_hv_hypercall_set_result(vcpu, ret);
- return 1;
+out:
+ return kvm_hv_hypercall_complete(vcpu, ret);
}
void kvm_hv_init_vm(struct kvm *kvm)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 70dcb5548022..3773c4625114 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1463,23 +1463,6 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
local_irq_restore(flags);
}
-static void start_sw_period(struct kvm_lapic *apic)
-{
- if (!apic->lapic_timer.period)
- return;
-
- if (apic_lvtt_oneshot(apic) &&
- ktime_after(ktime_get(),
- apic->lapic_timer.target_expiration)) {
- apic_timer_expired(apic);
- return;
- }
-
- hrtimer_start(&apic->lapic_timer.timer,
- apic->lapic_timer.target_expiration,
- HRTIMER_MODE_ABS_PINNED);
-}
-
static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor)
{
ktime_t now, remaining;
@@ -1539,11 +1522,43 @@ static bool set_target_expiration(struct kvm_lapic *apic)
static void advance_periodic_target_expiration(struct kvm_lapic *apic)
{
- apic->lapic_timer.tscdeadline +=
- nsec_to_cycles(apic->vcpu, apic->lapic_timer.period);
+ ktime_t now = ktime_get();
+ u64 tscl = rdtsc();
+ ktime_t delta;
+
+ /*
+ * Synchronize both deadlines to the same time source or
+ * differences in the periods (caused by differences in the
+ * underlying clocks or numerical approximation errors) will
+ * cause the two to drift apart over time as the errors
+ * accumulate.
+ */
apic->lapic_timer.target_expiration =
ktime_add_ns(apic->lapic_timer.target_expiration,
apic->lapic_timer.period);
+ delta = ktime_sub(apic->lapic_timer.target_expiration, now);
+ apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
+ nsec_to_cycles(apic->vcpu, delta);
+}
+
+static void start_sw_period(struct kvm_lapic *apic)
+{
+ if (!apic->lapic_timer.period)
+ return;
+
+ if (ktime_after(ktime_get(),
+ apic->lapic_timer.target_expiration)) {
+ apic_timer_expired(apic);
+
+ if (apic_lvtt_oneshot(apic))
+ return;
+
+ advance_periodic_target_expiration(apic);
+ }
+
+ hrtimer_start(&apic->lapic_timer.timer,
+ apic->lapic_timer.target_expiration,
+ HRTIMER_MODE_ABS_PINNED);
}
bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 8494dbae41b9..d634f0332c0f 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3007,6 +3007,7 @@ static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct *
{
siginfo_t info;
+ clear_siginfo(&info);
info.si_signo = SIGBUS;
info.si_errno = 0;
info.si_code = BUS_MCEERR_AR;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index b58787daf9f8..26110c202b19 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -49,7 +49,7 @@
#include <asm/debugreg.h>
#include <asm/kvm_para.h>
#include <asm/irq_remapping.h>
-#include <asm/nospec-branch.h>
+#include <asm/spec-ctrl.h>
#include <asm/virtext.h>
#include "trace.h"
@@ -213,6 +213,12 @@ struct vcpu_svm {
} host;
u64 spec_ctrl;
+ /*
+ * Contains guest-controlled bits of VIRT_SPEC_CTRL, which will be
+ * translated into the appropriate L2_CFG bits on the host to
+ * perform speculative control.
+ */
+ u64 virt_spec_ctrl;
u32 *msrpm;
@@ -1423,12 +1429,23 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
seg->base = 0;
}
+static u64 svm_read_l1_tsc_offset(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ if (is_guest_mode(vcpu))
+ return svm->nested.hsave->control.tsc_offset;
+
+ return vcpu->arch.tsc_offset;
+}
+
static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
{
struct vcpu_svm *svm = to_svm(vcpu);
u64 g_tsc_offset = 0;
if (is_guest_mode(vcpu)) {
+ /* Write L1's TSC offset. */
g_tsc_offset = svm->vmcb->control.tsc_offset -
svm->nested.hsave->control.tsc_offset;
svm->nested.hsave->control.tsc_offset = offset;
@@ -2049,6 +2066,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
vcpu->arch.microcode_version = 0x01000065;
svm->spec_ctrl = 0;
+ svm->virt_spec_ctrl = 0;
if (!init_event) {
svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE |
@@ -3322,6 +3340,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
/* Restore the original control entries */
copy_vmcb_control_area(vmcb, hsave);
+ svm->vcpu.arch.tsc_offset = svm->vmcb->control.tsc_offset;
kvm_clear_exception_queue(&svm->vcpu);
kvm_clear_interrupt_queue(&svm->vcpu);
@@ -3482,10 +3501,12 @@ static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
/* We don't want to see VMMCALLs from a nested guest */
clr_intercept(svm, INTERCEPT_VMMCALL);
+ svm->vcpu.arch.tsc_offset += nested_vmcb->control.tsc_offset;
+ svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset;
+
svm->vmcb->control.virt_ext = nested_vmcb->control.virt_ext;
svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
svm->vmcb->control.int_state = nested_vmcb->control.int_state;
- svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
@@ -4035,12 +4056,6 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
struct vcpu_svm *svm = to_svm(vcpu);
switch (msr_info->index) {
- case MSR_IA32_TSC: {
- msr_info->data = svm->vmcb->control.tsc_offset +
- kvm_scale_tsc(vcpu, rdtsc());
-
- break;
- }
case MSR_STAR:
msr_info->data = svm->vmcb->save.star;
break;
@@ -4100,11 +4115,18 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case MSR_IA32_SPEC_CTRL:
if (!msr_info->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_IBRS))
+ !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS))
return 1;
msr_info->data = svm->spec_ctrl;
break;
+ case MSR_AMD64_VIRT_SPEC_CTRL:
+ if (!msr_info->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD))
+ return 1;
+
+ msr_info->data = svm->virt_spec_ctrl;
+ break;
case MSR_F15H_IC_CFG: {
int family, model;
@@ -4193,12 +4215,9 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
svm->vmcb->save.g_pat = data;
mark_dirty(svm->vmcb, VMCB_NPT);
break;
- case MSR_IA32_TSC:
- kvm_write_tsc(vcpu, msr);
- break;
case MSR_IA32_SPEC_CTRL:
if (!msr->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_IBRS))
+ !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS))
return 1;
/* The STIBP bit doesn't fault even if it's not advertised */
@@ -4225,7 +4244,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
break;
case MSR_IA32_PRED_CMD:
if (!msr->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_IBPB))
+ !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBPB))
return 1;
if (data & ~PRED_CMD_IBPB)
@@ -4239,6 +4258,16 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
break;
set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1);
break;
+ case MSR_AMD64_VIRT_SPEC_CTRL:
+ if (!msr->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD))
+ return 1;
+
+ if (data & ~SPEC_CTRL_SSBD)
+ return 1;
+
+ svm->virt_spec_ctrl = data;
+ break;
case MSR_STAR:
svm->vmcb->save.star = data;
break;
@@ -5265,9 +5294,8 @@ static int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
}
if (!ret && svm) {
- trace_kvm_pi_irte_update(svm->vcpu.vcpu_id,
- host_irq, e->gsi,
- vcpu_info.vector,
+ trace_kvm_pi_irte_update(host_irq, svm->vcpu.vcpu_id,
+ e->gsi, vcpu_info.vector,
vcpu_info.pi_desc_addr, set);
}
@@ -5553,8 +5581,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
* is no need to worry about the conditional branch over the wrmsr
* being speculatively taken.
*/
- if (svm->spec_ctrl)
- native_wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
+ x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl);
asm volatile (
"push %%" _ASM_BP "; \n\t"
@@ -5648,6 +5675,18 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
#endif
);
+ /* Eliminate branch target predictions from guest mode */
+ vmexit_fill_RSB();
+
+#ifdef CONFIG_X86_64
+ wrmsrl(MSR_GS_BASE, svm->host.gs_base);
+#else
+ loadsegment(fs, svm->host.fs);
+#ifndef CONFIG_X86_32_LAZY_GS
+ loadsegment(gs, svm->host.gs);
+#endif
+#endif
+
/*
* We do not use IBRS in the kernel. If this vCPU has used the
* SPEC_CTRL MSR it may have left it on; save the value and
@@ -5666,20 +5705,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
- if (svm->spec_ctrl)
- native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
-
- /* Eliminate branch target predictions from guest mode */
- vmexit_fill_RSB();
-
-#ifdef CONFIG_X86_64
- wrmsrl(MSR_GS_BASE, svm->host.gs_base);
-#else
- loadsegment(fs, svm->host.fs);
-#ifndef CONFIG_X86_32_LAZY_GS
- loadsegment(gs, svm->host.gs);
-#endif
-#endif
+ x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl);
reload_tss(vcpu);
@@ -5782,7 +5808,7 @@ static bool svm_cpu_has_accelerated_tpr(void)
return false;
}
-static bool svm_has_high_real_mode_segbase(void)
+static bool svm_has_emulated_msr(int index)
{
return true;
}
@@ -7008,7 +7034,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.hardware_enable = svm_hardware_enable,
.hardware_disable = svm_hardware_disable,
.cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr,
- .cpu_has_high_real_mode_segbase = svm_has_high_real_mode_segbase,
+ .has_emulated_msr = svm_has_emulated_msr,
.vcpu_create = svm_create_vcpu,
.vcpu_free = svm_free_vcpu,
@@ -7102,6 +7128,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.has_wbinvd_exit = svm_has_wbinvd_exit,
+ .read_l1_tsc_offset = svm_read_l1_tsc_offset,
.write_tsc_offset = svm_write_tsc_offset,
.set_tdp_cr3 = set_tdp_cr3,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index aafcc9881e88..40aa29204baf 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -51,7 +51,7 @@
#include <asm/apic.h>
#include <asm/irq_remapping.h>
#include <asm/mmu_context.h>
-#include <asm/nospec-branch.h>
+#include <asm/spec-ctrl.h>
#include <asm/mshyperv.h>
#include "trace.h"
@@ -1494,6 +1494,12 @@ static inline bool cpu_has_vmx_vmfunc(void)
SECONDARY_EXEC_ENABLE_VMFUNC;
}
+static bool vmx_umip_emulated(void)
+{
+ return vmcs_config.cpu_based_2nd_exec_ctrl &
+ SECONDARY_EXEC_DESC;
+}
+
static inline bool report_flexpriority(void)
{
return flexpriority_enabled;
@@ -2880,18 +2886,15 @@ static void setup_msrs(struct vcpu_vmx *vmx)
vmx_update_msr_bitmap(&vmx->vcpu);
}
-/*
- * reads and returns guest's timestamp counter "register"
- * guest_tsc = (host_tsc * tsc multiplier) >> 48 + tsc_offset
- * -- Intel TSC Scaling for Virtualization White Paper, sec 1.3
- */
-static u64 guest_read_tsc(struct kvm_vcpu *vcpu)
+static u64 vmx_read_l1_tsc_offset(struct kvm_vcpu *vcpu)
{
- u64 host_tsc, tsc_offset;
+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
- host_tsc = rdtsc();
- tsc_offset = vmcs_read64(TSC_OFFSET);
- return kvm_scale_tsc(vcpu, host_tsc) + tsc_offset;
+ if (is_guest_mode(vcpu) &&
+ (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING))
+ return vcpu->arch.tsc_offset - vmcs12->tsc_offset;
+
+ return vcpu->arch.tsc_offset;
}
/*
@@ -3524,12 +3527,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
#endif
case MSR_EFER:
return kvm_get_msr_common(vcpu, msr_info);
- case MSR_IA32_TSC:
- msr_info->data = guest_read_tsc(vcpu);
- break;
case MSR_IA32_SPEC_CTRL:
if (!msr_info->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) &&
!guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
return 1;
@@ -3646,17 +3645,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
vmcs_write64(GUEST_BNDCFGS, data);
break;
- case MSR_IA32_TSC:
- kvm_write_tsc(vcpu, msr_info);
- break;
case MSR_IA32_SPEC_CTRL:
if (!msr_info->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) &&
!guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
return 1;
/* The STIBP bit doesn't fault even if it's not advertised */
- if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP))
+ if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD))
return 1;
vmx->spec_ctrl = data;
@@ -3682,7 +3677,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case MSR_IA32_PRED_CMD:
if (!msr_info->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_IBPB) &&
!guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
return 1;
@@ -4553,12 +4547,6 @@ static void vmx_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
__vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid, invalidate_gpa);
}
-static void vmx_flush_tlb_ept_only(struct kvm_vcpu *vcpu)
-{
- if (enable_ept)
- vmx_flush_tlb(vcpu, true);
-}
-
static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
{
ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits;
@@ -4776,14 +4764,16 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
else
hw_cr4 |= KVM_PMODE_VM_CR4_ALWAYS_ON;
- if ((cr4 & X86_CR4_UMIP) && !boot_cpu_has(X86_FEATURE_UMIP)) {
- vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
- SECONDARY_EXEC_DESC);
- hw_cr4 &= ~X86_CR4_UMIP;
- } else if (!is_guest_mode(vcpu) ||
- !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC))
- vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
+ if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated()) {
+ if (cr4 & X86_CR4_UMIP) {
+ vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
SECONDARY_EXEC_DESC);
+ hw_cr4 &= ~X86_CR4_UMIP;
+ } else if (!is_guest_mode(vcpu) ||
+ !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC))
+ vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
+ SECONDARY_EXEC_DESC);
+ }
if (cr4 & X86_CR4_VMXE) {
/*
@@ -9287,7 +9277,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
} else {
sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
- vmx_flush_tlb_ept_only(vcpu);
+ vmx_flush_tlb(vcpu, true);
}
vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control);
@@ -9315,7 +9305,7 @@ static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
!nested_cpu_has2(get_vmcs12(&vmx->vcpu),
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
vmcs_write64(APIC_ACCESS_ADDR, hpa);
- vmx_flush_tlb_ept_only(vcpu);
+ vmx_flush_tlb(vcpu, true);
}
}
@@ -9495,9 +9485,21 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
}
STACK_FRAME_NON_STANDARD(vmx_handle_external_intr);
-static bool vmx_has_high_real_mode_segbase(void)
+static bool vmx_has_emulated_msr(int index)
{
- return enable_unrestricted_guest || emulate_invalid_guest_state;
+ switch (index) {
+ case MSR_IA32_SMBASE:
+ /*
+ * We cannot do SMM unless we can run the guest in big
+ * real mode.
+ */
+ return enable_unrestricted_guest || emulate_invalid_guest_state;
+ case MSR_AMD64_VIRT_SPEC_CTRL:
+ /* This is AMD only. */
+ return false;
+ default:
+ return true;
+ }
}
static bool vmx_mpx_supported(void)
@@ -9512,12 +9514,6 @@ static bool vmx_xsaves_supported(void)
SECONDARY_EXEC_XSAVES;
}
-static bool vmx_umip_emulated(void)
-{
- return vmcs_config.cpu_based_2nd_exec_ctrl &
- SECONDARY_EXEC_DESC;
-}
-
static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
{
u32 exit_intr_info;
@@ -9735,8 +9731,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
* is no need to worry about the conditional branch over the wrmsr
* being speculatively taken.
*/
- if (vmx->spec_ctrl)
- native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
+ x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0);
vmx->__launched = vmx->loaded_vmcs->launched;
@@ -9884,8 +9879,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
- if (vmx->spec_ctrl)
- native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+ x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0);
/* Eliminate branch target predictions from guest mode */
vmexit_fill_RSB();
@@ -10608,6 +10602,16 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
return true;
}
+static int nested_vmx_check_apic_access_controls(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12)
+{
+ if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) &&
+ !page_address_valid(vcpu, vmcs12->apic_access_addr))
+ return -EINVAL;
+ else
+ return 0;
+}
+
static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
@@ -11176,11 +11180,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
}
- if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
- vmcs_write64(TSC_OFFSET,
- vcpu->arch.tsc_offset + vmcs12->tsc_offset);
- else
- vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
+ vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
+
if (kvm_has_tsc_control)
decache_tsc_multiplier(vmx);
@@ -11222,7 +11223,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
}
} else if (nested_cpu_has2(vmcs12,
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
- vmx_flush_tlb_ept_only(vcpu);
+ vmx_flush_tlb(vcpu, true);
}
/*
@@ -11299,6 +11300,9 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12))
return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+ if (nested_vmx_check_apic_access_controls(vcpu, vmcs12))
+ return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
if (nested_vmx_check_tpr_shadow_controls(vcpu, vmcs12))
return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
@@ -11420,6 +11424,7 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
u32 msr_entry_idx;
u32 exit_qual;
+ int r;
enter_guest_mode(vcpu);
@@ -11429,26 +11434,21 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02);
vmx_segment_cache_clear(vmx);
- if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) {
- leave_guest_mode(vcpu);
- vmx_switch_vmcs(vcpu, &vmx->vmcs01);
- nested_vmx_entry_failure(vcpu, vmcs12,
- EXIT_REASON_INVALID_STATE, exit_qual);
- return 1;
- }
+ if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
+ vcpu->arch.tsc_offset += vmcs12->tsc_offset;
+
+ r = EXIT_REASON_INVALID_STATE;
+ if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual))
+ goto fail;
nested_get_vmcs12_pages(vcpu, vmcs12);
+ r = EXIT_REASON_MSR_LOAD_FAIL;
msr_entry_idx = nested_vmx_load_msr(vcpu,
vmcs12->vm_entry_msr_load_addr,
vmcs12->vm_entry_msr_load_count);
- if (msr_entry_idx) {
- leave_guest_mode(vcpu);
- vmx_switch_vmcs(vcpu, &vmx->vmcs01);
- nested_vmx_entry_failure(vcpu, vmcs12,
- EXIT_REASON_MSR_LOAD_FAIL, msr_entry_idx);
- return 1;
- }
+ if (msr_entry_idx)
+ goto fail;
/*
* Note no nested_vmx_succeed or nested_vmx_fail here. At this point
@@ -11457,6 +11457,14 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
* the success flag) when L2 exits (see nested_vmx_vmexit()).
*/
return 0;
+
+fail:
+ if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
+ vcpu->arch.tsc_offset -= vmcs12->tsc_offset;
+ leave_guest_mode(vcpu);
+ vmx_switch_vmcs(vcpu, &vmx->vmcs01);
+ nested_vmx_entry_failure(vcpu, vmcs12, r, exit_qual);
+ return 1;
}
/*
@@ -12028,6 +12036,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
leave_guest_mode(vcpu);
+ if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
+ vcpu->arch.tsc_offset -= vmcs12->tsc_offset;
+
if (likely(!vmx->fail)) {
if (exit_reason == -1)
sync_vmcs12(vcpu, vmcs12);
@@ -12065,7 +12076,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
} else if (!nested_cpu_has_ept(vmcs12) &&
nested_cpu_has2(vmcs12,
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
- vmx_flush_tlb_ept_only(vcpu);
+ vmx_flush_tlb(vcpu, true);
}
/* This is needed for same reason as it was needed in prepare_vmcs02 */
@@ -12224,10 +12235,16 @@ static inline int u64_shl_div_u64(u64 a, unsigned int shift,
static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc)
{
- struct vcpu_vmx *vmx = to_vmx(vcpu);
- u64 tscl = rdtsc();
- u64 guest_tscl = kvm_read_l1_tsc(vcpu, tscl);
- u64 delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl;
+ struct vcpu_vmx *vmx;
+ u64 tscl, guest_tscl, delta_tsc;
+
+ if (kvm_mwait_in_guest(vcpu->kvm))
+ return -EOPNOTSUPP;
+
+ vmx = to_vmx(vcpu);
+ tscl = rdtsc();
+ guest_tscl = kvm_read_l1_tsc(vcpu, tscl);
+ delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl;
/* Convert to host delta tsc if tsc scaling is enabled */
if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio &&
@@ -12533,7 +12550,7 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu));
vcpu_info.vector = irq.vector;
- trace_kvm_pi_irte_update(vcpu->vcpu_id, host_irq, e->gsi,
+ trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, e->gsi,
vcpu_info.vector, vcpu_info.pi_desc_addr, set);
if (set)
@@ -12622,7 +12639,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.hardware_enable = hardware_enable,
.hardware_disable = hardware_disable,
.cpu_has_accelerated_tpr = report_flexpriority,
- .cpu_has_high_real_mode_segbase = vmx_has_high_real_mode_segbase,
+ .has_emulated_msr = vmx_has_emulated_msr,
.vm_init = vmx_vm_init,
.vm_alloc = vmx_vm_alloc,
@@ -12712,6 +12729,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
+ .read_l1_tsc_offset = vmx_read_l1_tsc_offset,
.write_tsc_offset = vmx_write_tsc_offset,
.set_tdp_cr3 = vmx_set_cr3,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b2ff74b12ec4..71e7cda6d014 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -114,7 +114,7 @@ module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
static bool __read_mostly report_ignored_msrs = true;
module_param(report_ignored_msrs, bool, S_IRUGO | S_IWUSR);
-unsigned int min_timer_period_us = 500;
+unsigned int min_timer_period_us = 200;
module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
static bool __read_mostly kvmclock_periodic_sync = true;
@@ -843,7 +843,10 @@ EXPORT_SYMBOL_GPL(kvm_set_cr4);
int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
{
#ifdef CONFIG_X86_64
- cr3 &= ~CR3_PCID_INVD;
+ bool pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
+
+ if (pcid_enabled)
+ cr3 &= ~CR3_PCID_INVD;
#endif
if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
@@ -1058,6 +1061,7 @@ static u32 emulated_msrs[] = {
MSR_SMI_COUNT,
MSR_PLATFORM_INFO,
MSR_MISC_FEATURES_ENABLES,
+ MSR_AMD64_VIRT_SPEC_CTRL,
};
static unsigned num_emulated_msrs;
@@ -1490,7 +1494,7 @@ static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset)
{
- u64 curr_offset = vcpu->arch.tsc_offset;
+ u64 curr_offset = kvm_x86_ops->read_l1_tsc_offset(vcpu);
vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset;
}
@@ -1532,7 +1536,9 @@ static u64 kvm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
{
- return vcpu->arch.tsc_offset + kvm_scale_tsc(vcpu, host_tsc);
+ u64 tsc_offset = kvm_x86_ops->read_l1_tsc_offset(vcpu);
+
+ return tsc_offset + kvm_scale_tsc(vcpu, host_tsc);
}
EXPORT_SYMBOL_GPL(kvm_read_l1_tsc);
@@ -2362,6 +2368,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
vcpu->arch.smbase = data;
break;
+ case MSR_IA32_TSC:
+ kvm_write_tsc(vcpu, msr_info);
+ break;
case MSR_SMI_COUNT:
if (!msr_info->host_initiated)
return 1;
@@ -2605,6 +2614,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_UCODE_REV:
msr_info->data = vcpu->arch.microcode_version;
break;
+ case MSR_IA32_TSC:
+ msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) + vcpu->arch.tsc_offset;
+ break;
case MSR_MTRRcap:
case 0x200 ... 0x2ff:
return kvm_mtrr_get_msr(vcpu, msr_info->index, &msr_info->data);
@@ -2819,7 +2831,8 @@ out:
static inline bool kvm_can_mwait_in_guest(void)
{
return boot_cpu_has(X86_FEATURE_MWAIT) &&
- !boot_cpu_has_bug(X86_BUG_MONITOR);
+ !boot_cpu_has_bug(X86_BUG_MONITOR) &&
+ boot_cpu_has(X86_FEATURE_ARAT);
}
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
@@ -2894,7 +2907,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
* fringe case that is not enabled except via specific settings
* of the module parameters.
*/
- r = kvm_x86_ops->cpu_has_high_real_mode_segbase();
+ r = kvm_x86_ops->has_emulated_msr(MSR_IA32_SMBASE);
break;
case KVM_CAP_VAPIC:
r = !kvm_x86_ops->cpu_has_accelerated_tpr();
@@ -4594,14 +4607,8 @@ static void kvm_init_msr_list(void)
num_msrs_to_save = j;
for (i = j = 0; i < ARRAY_SIZE(emulated_msrs); i++) {
- switch (emulated_msrs[i]) {
- case MSR_IA32_SMBASE:
- if (!kvm_x86_ops->cpu_has_high_real_mode_segbase())
- continue;
- break;
- default:
- break;
- }
+ if (!kvm_x86_ops->has_emulated_msr(emulated_msrs[i]))
+ continue;
if (j < i)
emulated_msrs[j] = emulated_msrs[i];
@@ -6662,9 +6669,7 @@ void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu)
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
{
unsigned long nr, a0, a1, a2, a3, ret;
- int op_64_bit, r;
-
- r = kvm_skip_emulated_instruction(vcpu);
+ int op_64_bit;
if (kvm_hv_hypercall_enabled(vcpu->kvm))
return kvm_hv_hypercall(vcpu);
@@ -6712,8 +6717,9 @@ out:
if (!op_64_bit)
ret = (u32)ret;
kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
+
++vcpu->stat.hypercalls;
- return r;
+ return kvm_skip_emulated_instruction(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
@@ -7970,6 +7976,7 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
{
struct msr_data apic_base_msr;
int mmu_reset_needed = 0;
+ int cpuid_update_needed = 0;
int pending_vec, max_bits, idx;
struct desc_ptr dt;
int ret = -EINVAL;
@@ -8008,8 +8015,10 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
vcpu->arch.cr0 = sregs->cr0;
mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
+ cpuid_update_needed |= ((kvm_read_cr4(vcpu) ^ sregs->cr4) &
+ (X86_CR4_OSXSAVE | X86_CR4_PKE));
kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
- if (sregs->cr4 & (X86_CR4_OSXSAVE | X86_CR4_PKE))
+ if (cpuid_update_needed)
kvm_update_cpuid(vcpu);
idx = srcu_read_lock(&vcpu->kvm->srcu);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 7d35ce672989..c9492f764902 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -302,13 +302,6 @@ static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
__rem; \
})
-#define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0)
-#define KVM_X86_DISABLE_EXITS_HTL (1 << 1)
-#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2)
-#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \
- KVM_X86_DISABLE_EXITS_HTL | \
- KVM_X86_DISABLE_EXITS_PAUSE)
-
static inline bool kvm_mwait_in_guest(struct kvm *kvm)
{
return kvm->arch.mwait_in_guest;
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 9a53a06e5a3e..c3b527a9f95d 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -184,11 +184,11 @@ ENDPROC(memcpy_orig)
#ifndef CONFIG_UML
/*
- * memcpy_mcsafe_unrolled - memory copy with machine check exception handling
+ * __memcpy_mcsafe - memory copy with machine check exception handling
* Note that we only catch machine checks when reading the source addresses.
* Writes to target are posted and don't generate machine checks.
*/
-ENTRY(memcpy_mcsafe_unrolled)
+ENTRY(__memcpy_mcsafe)
cmpl $8, %edx
/* Less than 8 bytes? Go to byte copy loop */
jb .L_no_whole_words
@@ -204,58 +204,29 @@ ENTRY(memcpy_mcsafe_unrolled)
subl $8, %ecx
negl %ecx
subl %ecx, %edx
-.L_copy_leading_bytes:
+.L_read_leading_bytes:
movb (%rsi), %al
+.L_write_leading_bytes:
movb %al, (%rdi)
incq %rsi
incq %rdi
decl %ecx
- jnz .L_copy_leading_bytes
+ jnz .L_read_leading_bytes
.L_8byte_aligned:
- /* Figure out how many whole cache lines (64-bytes) to copy */
- movl %edx, %ecx
- andl $63, %edx
- shrl $6, %ecx
- jz .L_no_whole_cache_lines
-
- /* Loop copying whole cache lines */
-.L_cache_w0: movq (%rsi), %r8
-.L_cache_w1: movq 1*8(%rsi), %r9
-.L_cache_w2: movq 2*8(%rsi), %r10
-.L_cache_w3: movq 3*8(%rsi), %r11
- movq %r8, (%rdi)
- movq %r9, 1*8(%rdi)
- movq %r10, 2*8(%rdi)
- movq %r11, 3*8(%rdi)
-.L_cache_w4: movq 4*8(%rsi), %r8
-.L_cache_w5: movq 5*8(%rsi), %r9
-.L_cache_w6: movq 6*8(%rsi), %r10
-.L_cache_w7: movq 7*8(%rsi), %r11
- movq %r8, 4*8(%rdi)
- movq %r9, 5*8(%rdi)
- movq %r10, 6*8(%rdi)
- movq %r11, 7*8(%rdi)
- leaq 64(%rsi), %rsi
- leaq 64(%rdi), %rdi
- decl %ecx
- jnz .L_cache_w0
-
- /* Are there any trailing 8-byte words? */
-.L_no_whole_cache_lines:
movl %edx, %ecx
andl $7, %edx
shrl $3, %ecx
jz .L_no_whole_words
- /* Copy trailing words */
-.L_copy_trailing_words:
+.L_read_words:
movq (%rsi), %r8
- mov %r8, (%rdi)
- leaq 8(%rsi), %rsi
- leaq 8(%rdi), %rdi
+.L_write_words:
+ movq %r8, (%rdi)
+ addq $8, %rsi
+ addq $8, %rdi
decl %ecx
- jnz .L_copy_trailing_words
+ jnz .L_read_words
/* Any trailing bytes? */
.L_no_whole_words:
@@ -264,38 +235,53 @@ ENTRY(memcpy_mcsafe_unrolled)
/* Copy trailing bytes */
movl %edx, %ecx
-.L_copy_trailing_bytes:
+.L_read_trailing_bytes:
movb (%rsi), %al
+.L_write_trailing_bytes:
movb %al, (%rdi)
incq %rsi
incq %rdi
decl %ecx
- jnz .L_copy_trailing_bytes
+ jnz .L_read_trailing_bytes
/* Copy successful. Return zero */
.L_done_memcpy_trap:
xorq %rax, %rax
ret
-ENDPROC(memcpy_mcsafe_unrolled)
-EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled)
+ENDPROC(__memcpy_mcsafe)
+EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
.section .fixup, "ax"
- /* Return -EFAULT for any failure */
-.L_memcpy_mcsafe_fail:
- mov $-EFAULT, %rax
+ /*
+ * Return number of bytes not copied for any failure. Note that
+ * there is no "tail" handling since the source buffer is 8-byte
+ * aligned and poison is cacheline aligned.
+ */
+.E_read_words:
+ shll $3, %ecx
+.E_leading_bytes:
+ addl %edx, %ecx
+.E_trailing_bytes:
+ mov %ecx, %eax
ret
+ /*
+ * For write fault handling, given the destination is unaligned,
+ * we handle faults on multi-byte writes with a byte-by-byte
+ * copy up to the write-protected page.
+ */
+.E_write_words:
+ shll $3, %ecx
+ addl %edx, %ecx
+ movl %ecx, %edx
+ jmp mcsafe_handle_tail
+
.previous
- _ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail)
- _ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail)
- _ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail)
- _ASM_EXTABLE_FAULT(.L_cache_w2, .L_memcpy_mcsafe_fail)
- _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
- _ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail)
- _ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail)
- _ASM_EXTABLE_FAULT(.L_cache_w6, .L_memcpy_mcsafe_fail)
- _ASM_EXTABLE_FAULT(.L_cache_w7, .L_memcpy_mcsafe_fail)
- _ASM_EXTABLE_FAULT(.L_copy_trailing_words, .L_memcpy_mcsafe_fail)
- _ASM_EXTABLE_FAULT(.L_copy_trailing_bytes, .L_memcpy_mcsafe_fail)
+ _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
+ _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
+ _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
+ _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
+ _ASM_EXTABLE(.L_write_words, .E_write_words)
+ _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
#endif
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 75d3776123cc..9c5606d88f61 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -23,13 +23,13 @@ unsigned long __clear_user(void __user *addr, unsigned long size)
asm volatile(
" testq %[size8],%[size8]\n"
" jz 4f\n"
- "0: movq %[zero],(%[dst])\n"
- " addq %[eight],%[dst]\n"
+ "0: movq $0,(%[dst])\n"
+ " addq $8,%[dst]\n"
" decl %%ecx ; jnz 0b\n"
"4: movq %[size1],%%rcx\n"
" testl %%ecx,%%ecx\n"
" jz 2f\n"
- "1: movb %b[zero],(%[dst])\n"
+ "1: movb $0,(%[dst])\n"
" incq %[dst]\n"
" decl %%ecx ; jnz 1b\n"
"2:\n"
@@ -40,8 +40,7 @@ unsigned long __clear_user(void __user *addr, unsigned long size)
_ASM_EXTABLE(0b,3b)
_ASM_EXTABLE(1b,2b)
: [size8] "=&c"(size), [dst] "=&D" (__d0)
- : [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr),
- [zero] "r" (0UL), [eight] "r" (8UL));
+ : [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr));
clac();
return size;
}
@@ -75,6 +74,27 @@ copy_user_handle_tail(char *to, char *from, unsigned len)
return len;
}
+/*
+ * Similar to copy_user_handle_tail, probe for the write fault point,
+ * but reuse __memcpy_mcsafe in case a new read error is encountered.
+ * clac() is handled in _copy_to_iter_mcsafe().
+ */
+__visible unsigned long
+mcsafe_handle_tail(char *to, char *from, unsigned len)
+{
+ for (; len; --len, to++, from++) {
+ /*
+ * Call the assembly routine back directly since
+ * memcpy_mcsafe() may silently fallback to memcpy.
+ */
+ unsigned long rem = __memcpy_mcsafe(to, from, 1);
+
+ if (rem)
+ break;
+ }
+ return len;
+}
+
#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
/**
* clean_cache_range - write back a cache range with CLWB
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 62a7e9f65dec..2f3c9196b834 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -18,6 +18,7 @@
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/seq_file.h>
+#include <linux/highmem.h>
#include <asm/pgtable.h>
@@ -334,16 +335,16 @@ static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr,
pgprotval_t eff_in, unsigned long P)
{
int i;
- pte_t *start;
+ pte_t *pte;
pgprotval_t prot, eff;
- start = (pte_t *)pmd_page_vaddr(addr);
for (i = 0; i < PTRS_PER_PTE; i++) {
- prot = pte_flags(*start);
- eff = effective_prot(eff_in, prot);
st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT);
+ pte = pte_offset_map(&addr, st->current_address);
+ prot = pte_flags(*pte);
+ eff = effective_prot(eff_in, prot);
note_page(m, st, __pgprot(prot), eff, 5);
- start++;
+ pte_unmap(pte);
}
}
#ifdef CONFIG_KASAN
@@ -359,7 +360,7 @@ static inline bool kasan_page_table(struct seq_file *m, struct pg_state *st,
void *pt)
{
if (__pa(pt) == __pa(kasan_zero_pmd) ||
- (pgtable_l5_enabled && __pa(pt) == __pa(kasan_zero_p4d)) ||
+ (pgtable_l5_enabled() && __pa(pt) == __pa(kasan_zero_p4d)) ||
__pa(pt) == __pa(kasan_zero_pud)) {
pgprotval_t prot = pte_flags(kasan_zero_pte[0]);
note_page(m, st, __pgprot(prot), 0, 5);
@@ -475,8 +476,8 @@ static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
}
}
-#define pgd_large(a) (pgtable_l5_enabled ? pgd_large(a) : p4d_large(__p4d(pgd_val(a))))
-#define pgd_none(a) (pgtable_l5_enabled ? pgd_none(a) : p4d_none(__p4d(pgd_val(a))))
+#define pgd_large(a) (pgtable_l5_enabled() ? pgd_large(a) : p4d_large(__p4d(pgd_val(a))))
+#define pgd_none(a) (pgtable_l5_enabled() ? pgd_none(a) : p4d_none(__p4d(pgd_val(a))))
static inline bool is_hypervisor_range(int idx)
{
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 73bd8c95ac71..9a84a0d08727 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -209,6 +209,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address,
unsigned lsb = 0;
siginfo_t info;
+ clear_siginfo(&info);
info.si_signo = si_signo;
info.si_errno = 0;
info.si_code = si_code;
@@ -439,7 +440,7 @@ static noinline int vmalloc_fault(unsigned long address)
if (pgd_none(*pgd_k))
return -1;
- if (pgtable_l5_enabled) {
+ if (pgtable_l5_enabled()) {
if (pgd_none(*pgd)) {
set_pgd(pgd, *pgd_k);
arch_flush_lazy_mmu_mode();
@@ -454,7 +455,7 @@ static noinline int vmalloc_fault(unsigned long address)
if (p4d_none(*p4d_k))
return -1;
- if (p4d_none(*p4d) && !pgtable_l5_enabled) {
+ if (p4d_none(*p4d) && !pgtable_l5_enabled()) {
set_p4d(p4d, *p4d_k);
arch_flush_lazy_mmu_mode();
} else {
@@ -828,6 +829,8 @@ static inline void
show_signal_msg(struct pt_regs *regs, unsigned long error_code,
unsigned long address, struct task_struct *tsk)
{
+ const char *loglvl = task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG;
+
if (!unhandled_signal(tsk, SIGSEGV))
return;
@@ -835,13 +838,14 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
return;
printk("%s%s[%d]: segfault at %lx ip %px sp %px error %lx",
- task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
- tsk->comm, task_pid_nr(tsk), address,
+ loglvl, tsk->comm, task_pid_nr(tsk), address,
(void *)regs->ip, (void *)regs->sp, error_code);
print_vma_addr(KERN_CONT " in ", regs->ip);
printk(KERN_CONT "\n");
+
+ show_opcodes((u8 *)regs->ip, loglvl);
}
static void
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index a2f0c7e20fb0..fe7a12599d8e 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -123,7 +123,7 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
result = ident_p4d_init(info, p4d, addr, next);
if (result)
return result;
- if (pgtable_l5_enabled) {
+ if (pgtable_l5_enabled()) {
set_pgd(pgd, __pgd(__pa(p4d) | info->kernpg_flag));
} else {
/*
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 0a400606dea0..17383f9677fa 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -180,7 +180,7 @@ static void sync_global_pgds_l4(unsigned long start, unsigned long end)
*/
void sync_global_pgds(unsigned long start, unsigned long end)
{
- if (pgtable_l5_enabled)
+ if (pgtable_l5_enabled())
sync_global_pgds_l5(start, end);
else
sync_global_pgds_l4(start, end);
@@ -643,7 +643,7 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
unsigned long vaddr = (unsigned long)__va(paddr);
int i = p4d_index(vaddr);
- if (!pgtable_l5_enabled)
+ if (!pgtable_l5_enabled())
return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, page_size_mask);
for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) {
@@ -723,7 +723,7 @@ kernel_physical_mapping_init(unsigned long paddr_start,
page_size_mask);
spin_lock(&init_mm.page_table_lock);
- if (pgtable_l5_enabled)
+ if (pgtable_l5_enabled())
pgd_populate(&init_mm, pgd, p4d);
else
p4d_populate(&init_mm, p4d_offset(pgd, vaddr), (pud_t *) p4d);
@@ -1100,7 +1100,7 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end,
* 5-level case we should free them. This code will have to change
* to adapt for boot-time switching between 4 and 5 level page tables.
*/
- if (pgtable_l5_enabled)
+ if (pgtable_l5_enabled())
free_pud_table(pud_base, p4d);
}
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 980dbebd0ca7..e3e77527f8df 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -2,10 +2,8 @@
#define DISABLE_BRANCH_PROFILING
#define pr_fmt(fmt) "kasan: " fmt
-#ifdef CONFIG_X86_5LEVEL
-/* Too early to use cpu_feature_enabled() */
-#define pgtable_l5_enabled __pgtable_l5_enabled
-#endif
+/* cpu_feature_enabled() cannot be used this early */
+#define USE_EARLY_PGTABLE_L5
#include <linux/bootmem.h>
#include <linux/kasan.h>
@@ -182,7 +180,7 @@ static void __init clear_pgds(unsigned long start,
* With folded p4d, pgd_clear() is nop, use p4d_clear()
* instead.
*/
- if (pgtable_l5_enabled)
+ if (pgtable_l5_enabled())
pgd_clear(pgd);
else
p4d_clear(p4d_offset(pgd, start));
@@ -197,7 +195,7 @@ static inline p4d_t *early_p4d_offset(pgd_t *pgd, unsigned long addr)
{
unsigned long p4d;
- if (!pgtable_l5_enabled)
+ if (!pgtable_l5_enabled())
return (p4d_t *)pgd;
p4d = __pa_nodebug(pgd_val(*pgd)) & PTE_PFN_MASK;
@@ -284,7 +282,7 @@ void __init kasan_early_init(void)
for (i = 0; i < PTRS_PER_PUD; i++)
kasan_zero_pud[i] = __pud(pud_val);
- for (i = 0; pgtable_l5_enabled && i < PTRS_PER_P4D; i++)
+ for (i = 0; pgtable_l5_enabled() && i < PTRS_PER_P4D; i++)
kasan_zero_p4d[i] = __p4d(p4d_val);
kasan_map_early_shadow(early_top_pgt);
@@ -315,7 +313,7 @@ void __init kasan_init(void)
* bunch of things like kernel code, modules, EFI mapping, etc.
* We need to take extra steps to not overwrite them.
*/
- if (pgtable_l5_enabled) {
+ if (pgtable_l5_enabled()) {
void *ptr;
ptr = (void *)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_END));
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index 615cc03ced84..61db77b0eda9 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -78,7 +78,7 @@ void __init kernel_randomize_memory(void)
struct rnd_state rand_state;
unsigned long remain_entropy;
- vaddr_start = pgtable_l5_enabled ? __PAGE_OFFSET_BASE_L5 : __PAGE_OFFSET_BASE_L4;
+ vaddr_start = pgtable_l5_enabled() ? __PAGE_OFFSET_BASE_L5 : __PAGE_OFFSET_BASE_L4;
vaddr = vaddr_start;
/*
@@ -124,7 +124,7 @@ void __init kernel_randomize_memory(void)
*/
entropy = remain_entropy / (ARRAY_SIZE(kaslr_regions) - i);
prandom_bytes_state(&rand_state, &rand, sizeof(rand));
- if (pgtable_l5_enabled)
+ if (pgtable_l5_enabled())
entropy = (rand % (entropy + 1)) & P4D_MASK;
else
entropy = (rand % (entropy + 1)) & PUD_MASK;
@@ -136,7 +136,7 @@ void __init kernel_randomize_memory(void)
* randomization alignment.
*/
vaddr += get_padding(&kaslr_regions[i]);
- if (pgtable_l5_enabled)
+ if (pgtable_l5_enabled())
vaddr = round_up(vaddr + 1, P4D_SIZE);
else
vaddr = round_up(vaddr + 1, PUD_SIZE);
@@ -212,7 +212,7 @@ void __meminit init_trampoline(void)
return;
}
- if (pgtable_l5_enabled)
+ if (pgtable_l5_enabled())
init_trampoline_p4d();
else
init_trampoline_pud();
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 25504d5aa816..fa150855647c 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -136,13 +136,13 @@ static int __init numa_add_memblk_to(int nid, u64 start, u64 end,
/* whine about and ignore invalid blks */
if (start > end || nid < 0 || nid >= MAX_NUMNODES) {
- pr_warning("NUMA: Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n",
- nid, start, end - 1);
+ pr_warn("Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n",
+ nid, start, end - 1);
return 0;
}
if (mi->nr_blks >= NR_NODE_MEMBLKS) {
- pr_err("NUMA: too many memblk ranges\n");
+ pr_err("too many memblk ranges\n");
return -EINVAL;
}
@@ -267,14 +267,14 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
*/
if (bi->end > bj->start && bi->start < bj->end) {
if (bi->nid != bj->nid) {
- pr_err("NUMA: node %d [mem %#010Lx-%#010Lx] overlaps with node %d [mem %#010Lx-%#010Lx]\n",
+ pr_err("node %d [mem %#010Lx-%#010Lx] overlaps with node %d [mem %#010Lx-%#010Lx]\n",
bi->nid, bi->start, bi->end - 1,
bj->nid, bj->start, bj->end - 1);
return -EINVAL;
}
- pr_warning("NUMA: Warning: node %d [mem %#010Lx-%#010Lx] overlaps with itself [mem %#010Lx-%#010Lx]\n",
- bi->nid, bi->start, bi->end - 1,
- bj->start, bj->end - 1);
+ pr_warn("Warning: node %d [mem %#010Lx-%#010Lx] overlaps with itself [mem %#010Lx-%#010Lx]\n",
+ bi->nid, bi->start, bi->end - 1,
+ bj->start, bj->end - 1);
}
/*
@@ -364,7 +364,7 @@ static int __init numa_alloc_distance(void)
phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
size, PAGE_SIZE);
if (!phys) {
- pr_warning("NUMA: Warning: can't allocate distance table!\n");
+ pr_warn("Warning: can't allocate distance table!\n");
/* don't retry until explicitly reset */
numa_distance = (void *)1LU;
return -ENOMEM;
@@ -410,14 +410,14 @@ void __init numa_set_distance(int from, int to, int distance)
if (from >= numa_distance_cnt || to >= numa_distance_cnt ||
from < 0 || to < 0) {
- pr_warn_once("NUMA: Warning: node ids are out of bound, from=%d to=%d distance=%d\n",
- from, to, distance);
+ pr_warn_once("Warning: node ids are out of bound, from=%d to=%d distance=%d\n",
+ from, to, distance);
return;
}
if ((u8)distance != distance ||
(from == to && distance != LOCAL_DISTANCE)) {
- pr_warn_once("NUMA: Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
+ pr_warn_once("Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
from, to, distance);
return;
}
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 0f3d50f4c48c..3bded76e8d5c 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -93,6 +93,18 @@ void arch_report_meminfo(struct seq_file *m)
static inline void split_page_count(int level) { }
#endif
+static inline int
+within(unsigned long addr, unsigned long start, unsigned long end)
+{
+ return addr >= start && addr < end;
+}
+
+static inline int
+within_inclusive(unsigned long addr, unsigned long start, unsigned long end)
+{
+ return addr >= start && addr <= end;
+}
+
#ifdef CONFIG_X86_64
static inline unsigned long highmap_start_pfn(void)
@@ -106,20 +118,25 @@ static inline unsigned long highmap_end_pfn(void)
return __pa_symbol(roundup(_brk_end, PMD_SIZE) - 1) >> PAGE_SHIFT;
}
-#endif
-
-static inline int
-within(unsigned long addr, unsigned long start, unsigned long end)
+static bool __cpa_pfn_in_highmap(unsigned long pfn)
{
- return addr >= start && addr < end;
+ /*
+ * Kernel text has an alias mapping at a high address, known
+ * here as "highmap".
+ */
+ return within_inclusive(pfn, highmap_start_pfn(), highmap_end_pfn());
}
-static inline int
-within_inclusive(unsigned long addr, unsigned long start, unsigned long end)
+#else
+
+static bool __cpa_pfn_in_highmap(unsigned long pfn)
{
- return addr >= start && addr <= end;
+ /* There is no highmap on 32-bit */
+ return false;
}
+#endif
+
/*
* Flushing functions
*/
@@ -172,7 +189,7 @@ static void __cpa_flush_all(void *arg)
static void cpa_flush_all(unsigned long cache)
{
- BUG_ON(irqs_disabled());
+ BUG_ON(irqs_disabled() && !early_boot_irqs_disabled);
on_each_cpu(__cpa_flush_all, (void *) cache, 1);
}
@@ -236,7 +253,7 @@ static void cpa_flush_array(unsigned long *start, int numpages, int cache,
unsigned long do_wbinvd = cache && numpages >= 1024; /* 4M threshold */
#endif
- BUG_ON(irqs_disabled());
+ BUG_ON(irqs_disabled() && !early_boot_irqs_disabled);
on_each_cpu(__cpa_flush_all, (void *) do_wbinvd, 1);
@@ -1183,6 +1200,10 @@ static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr,
cpa->numpages = 1;
cpa->pfn = __pa(vaddr) >> PAGE_SHIFT;
return 0;
+
+ } else if (__cpa_pfn_in_highmap(cpa->pfn)) {
+ /* Faults in the highmap are OK, so do not warn: */
+ return -EFAULT;
} else {
WARN(1, KERN_WARNING "CPA: called for zero pte. "
"vaddr = %lx cpa->vaddr = %lx\n", vaddr,
@@ -1335,8 +1356,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
* to touch the high mapped kernel as well:
*/
if (!within(vaddr, (unsigned long)_text, _brk_end) &&
- within_inclusive(cpa->pfn, highmap_start_pfn(),
- highmap_end_pfn())) {
+ __cpa_pfn_in_highmap(cpa->pfn)) {
unsigned long temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) +
__START_KERNEL_map - phys_base;
alias_cpa = *cpa;
diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c
index d7bc0eea20a5..6e98e0a7c923 100644
--- a/arch/x86/mm/pkeys.c
+++ b/arch/x86/mm/pkeys.c
@@ -94,26 +94,27 @@ int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey
*/
if (pkey != -1)
return pkey;
- /*
- * Look for a protection-key-drive execute-only mapping
- * which is now being given permissions that are not
- * execute-only. Move it back to the default pkey.
- */
- if (vma_is_pkey_exec_only(vma) &&
- (prot & (PROT_READ|PROT_WRITE))) {
- return 0;
- }
+
/*
* The mapping is execute-only. Go try to get the
* execute-only protection key. If we fail to do that,
* fall through as if we do not have execute-only
- * support.
+ * support in this mm.
*/
if (prot == PROT_EXEC) {
pkey = execute_only_pkey(vma->vm_mm);
if (pkey > 0)
return pkey;
+ } else if (vma_is_pkey_exec_only(vma)) {
+ /*
+ * Protections are *not* PROT_EXEC, but the mapping
+ * is using the exec-only pkey. This mapping was
+ * PROT_EXEC and will no longer be. Move back to
+ * the default pkey.
+ */
+ return ARCH_DEFAULT_PKEY;
}
+
/*
* This is a vanilla, non-pkey mprotect (or we failed to
* setup execute-only), inherit the pkey from the VMA we
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index f1fd52f449e0..4d418e705878 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -421,6 +421,16 @@ static inline bool pti_kernel_image_global_ok(void)
if (boot_cpu_has(X86_FEATURE_K8))
return false;
+ /*
+ * RANDSTRUCT derives its hardening benefits from the
+ * attacker's lack of knowledge about the layout of kernel
+ * data structures. Keep the kernel image non-global in
+ * cases where RANDSTRUCT is in use to help keep the layout a
+ * secret.
+ */
+ if (IS_ENABLED(CONFIG_GCC_PLUGIN_RANDSTRUCT))
+ return false;
+
return true;
}
@@ -430,12 +440,24 @@ static inline bool pti_kernel_image_global_ok(void)
*/
void pti_clone_kernel_text(void)
{
+ /*
+ * rodata is part of the kernel image and is normally
+ * readable on the filesystem or on the web. But, do not
+ * clone the areas past rodata, they might contain secrets.
+ */
unsigned long start = PFN_ALIGN(_text);
- unsigned long end = ALIGN((unsigned long)_end, PMD_PAGE_SIZE);
+ unsigned long end = (unsigned long)__end_rodata_hpage_align;
if (!pti_kernel_image_global_ok())
return;
+ pr_debug("mapping partial kernel image into user address space\n");
+
+ /*
+ * Note that this will undo _some_ of the work that
+ * pti_set_kernel_image_nonglobal() did to clear the
+ * global bit.
+ */
pti_clone_pmds(start, end, _PAGE_RW);
}
@@ -458,8 +480,6 @@ void pti_set_kernel_image_nonglobal(void)
if (pti_kernel_image_global_ok())
return;
- pr_debug("set kernel image non-global\n");
-
set_memory_nonglobal(start, (end - start) >> PAGE_SHIFT);
}
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index e055d1a06699..6eb1f34c3c85 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -157,7 +157,7 @@ static void sync_current_stack_to_mm(struct mm_struct *mm)
unsigned long sp = current_stack_pointer;
pgd_t *pgd = pgd_offset(mm, sp);
- if (pgtable_l5_enabled) {
+ if (pgtable_l5_enabled()) {
if (unlikely(pgd_none(*pgd))) {
pgd_t *pgd_ref = pgd_offset_k(sp);
diff --git a/arch/x86/net/Makefile b/arch/x86/net/Makefile
index fefb4b619598..59e123da580c 100644
--- a/arch/x86/net/Makefile
+++ b/arch/x86/net/Makefile
@@ -1,6 +1,9 @@
#
# Arch-specific network modules
#
-OBJECT_FILES_NON_STANDARD_bpf_jit.o += y
-obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o
+ifeq ($(CONFIG_X86_32),y)
+ obj-$(CONFIG_BPF_JIT) += bpf_jit_comp32.o
+else
+ obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o
+endif
diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S
deleted file mode 100644
index b33093f84528..000000000000
--- a/arch/x86/net/bpf_jit.S
+++ /dev/null
@@ -1,154 +0,0 @@
-/* bpf_jit.S : BPF JIT helper functions
- *
- * Copyright (C) 2011 Eric Dumazet (eric.dumazet@gmail.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- */
-#include <linux/linkage.h>
-#include <asm/frame.h>
-
-/*
- * Calling convention :
- * rbx : skb pointer (callee saved)
- * esi : offset of byte(s) to fetch in skb (can be scratched)
- * r10 : copy of skb->data
- * r9d : hlen = skb->len - skb->data_len
- */
-#define SKBDATA %r10
-#define SKF_MAX_NEG_OFF $(-0x200000) /* SKF_LL_OFF from filter.h */
-
-#define FUNC(name) \
- .globl name; \
- .type name, @function; \
- name:
-
-FUNC(sk_load_word)
- test %esi,%esi
- js bpf_slow_path_word_neg
-
-FUNC(sk_load_word_positive_offset)
- mov %r9d,%eax # hlen
- sub %esi,%eax # hlen - offset
- cmp $3,%eax
- jle bpf_slow_path_word
- mov (SKBDATA,%rsi),%eax
- bswap %eax /* ntohl() */
- ret
-
-FUNC(sk_load_half)
- test %esi,%esi
- js bpf_slow_path_half_neg
-
-FUNC(sk_load_half_positive_offset)
- mov %r9d,%eax
- sub %esi,%eax # hlen - offset
- cmp $1,%eax
- jle bpf_slow_path_half
- movzwl (SKBDATA,%rsi),%eax
- rol $8,%ax # ntohs()
- ret
-
-FUNC(sk_load_byte)
- test %esi,%esi
- js bpf_slow_path_byte_neg
-
-FUNC(sk_load_byte_positive_offset)
- cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte */
- jle bpf_slow_path_byte
- movzbl (SKBDATA,%rsi),%eax
- ret
-
-/* rsi contains offset and can be scratched */
-#define bpf_slow_path_common(LEN) \
- lea 32(%rbp), %rdx;\
- FRAME_BEGIN; \
- mov %rbx, %rdi; /* arg1 == skb */ \
- push %r9; \
- push SKBDATA; \
-/* rsi already has offset */ \
- mov $LEN,%ecx; /* len */ \
- call skb_copy_bits; \
- test %eax,%eax; \
- pop SKBDATA; \
- pop %r9; \
- FRAME_END
-
-
-bpf_slow_path_word:
- bpf_slow_path_common(4)
- js bpf_error
- mov 32(%rbp),%eax
- bswap %eax
- ret
-
-bpf_slow_path_half:
- bpf_slow_path_common(2)
- js bpf_error
- mov 32(%rbp),%ax
- rol $8,%ax
- movzwl %ax,%eax
- ret
-
-bpf_slow_path_byte:
- bpf_slow_path_common(1)
- js bpf_error
- movzbl 32(%rbp),%eax
- ret
-
-#define sk_negative_common(SIZE) \
- FRAME_BEGIN; \
- mov %rbx, %rdi; /* arg1 == skb */ \
- push %r9; \
- push SKBDATA; \
-/* rsi already has offset */ \
- mov $SIZE,%edx; /* size */ \
- call bpf_internal_load_pointer_neg_helper; \
- test %rax,%rax; \
- pop SKBDATA; \
- pop %r9; \
- FRAME_END; \
- jz bpf_error
-
-bpf_slow_path_word_neg:
- cmp SKF_MAX_NEG_OFF, %esi /* test range */
- jl bpf_error /* offset lower -> error */
-
-FUNC(sk_load_word_negative_offset)
- sk_negative_common(4)
- mov (%rax), %eax
- bswap %eax
- ret
-
-bpf_slow_path_half_neg:
- cmp SKF_MAX_NEG_OFF, %esi
- jl bpf_error
-
-FUNC(sk_load_half_negative_offset)
- sk_negative_common(2)
- mov (%rax),%ax
- rol $8,%ax
- movzwl %ax,%eax
- ret
-
-bpf_slow_path_byte_neg:
- cmp SKF_MAX_NEG_OFF, %esi
- jl bpf_error
-
-FUNC(sk_load_byte_negative_offset)
- sk_negative_common(1)
- movzbl (%rax), %eax
- ret
-
-bpf_error:
-# force a return 0 from jit handler
- xor %eax,%eax
- mov (%rbp),%rbx
- mov 8(%rbp),%r13
- mov 16(%rbp),%r14
- mov 24(%rbp),%r15
- add $40, %rbp
- leaveq
- ret
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index b725154182cc..8fca446aaef6 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1,4 +1,5 @@
-/* bpf_jit_comp.c : BPF JIT compiler
+/*
+ * bpf_jit_comp.c: BPF JIT compiler
*
* Copyright (C) 2011-2013 Eric Dumazet (eric.dumazet@gmail.com)
* Internal BPF Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
@@ -16,15 +17,6 @@
#include <asm/set_memory.h>
#include <asm/nospec-branch.h>
-/*
- * assembly code in arch/x86/net/bpf_jit.S
- */
-extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
-extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[];
-extern u8 sk_load_byte_positive_offset[];
-extern u8 sk_load_word_negative_offset[], sk_load_half_negative_offset[];
-extern u8 sk_load_byte_negative_offset[];
-
static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
{
if (len == 1)
@@ -45,14 +37,15 @@ static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
#define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2)
#define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3)
#define EMIT4(b1, b2, b3, b4) EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4)
+
#define EMIT1_off32(b1, off) \
- do {EMIT1(b1); EMIT(off, 4); } while (0)
+ do { EMIT1(b1); EMIT(off, 4); } while (0)
#define EMIT2_off32(b1, b2, off) \
- do {EMIT2(b1, b2); EMIT(off, 4); } while (0)
+ do { EMIT2(b1, b2); EMIT(off, 4); } while (0)
#define EMIT3_off32(b1, b2, b3, off) \
- do {EMIT3(b1, b2, b3); EMIT(off, 4); } while (0)
+ do { EMIT3(b1, b2, b3); EMIT(off, 4); } while (0)
#define EMIT4_off32(b1, b2, b3, b4, off) \
- do {EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0)
+ do { EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0)
static bool is_imm8(int value)
{
@@ -70,9 +63,10 @@ static bool is_uimm32(u64 value)
}
/* mov dst, src */
-#define EMIT_mov(DST, SRC) \
- do {if (DST != SRC) \
- EMIT3(add_2mod(0x48, DST, SRC), 0x89, add_2reg(0xC0, DST, SRC)); \
+#define EMIT_mov(DST, SRC) \
+ do { \
+ if (DST != SRC) \
+ EMIT3(add_2mod(0x48, DST, SRC), 0x89, add_2reg(0xC0, DST, SRC)); \
} while (0)
static int bpf_size_to_x86_bytes(int bpf_size)
@@ -89,7 +83,8 @@ static int bpf_size_to_x86_bytes(int bpf_size)
return 0;
}
-/* list of x86 cond jumps opcodes (. + s8)
+/*
+ * List of x86 cond jumps opcodes (. + s8)
* Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32)
*/
#define X86_JB 0x72
@@ -103,38 +98,37 @@ static int bpf_size_to_x86_bytes(int bpf_size)
#define X86_JLE 0x7E
#define X86_JG 0x7F
-#define CHOOSE_LOAD_FUNC(K, func) \
- ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
-
-/* pick a register outside of BPF range for JIT internal work */
+/* Pick a register outside of BPF range for JIT internal work */
#define AUX_REG (MAX_BPF_JIT_REG + 1)
-/* The following table maps BPF registers to x64 registers.
+/*
+ * The following table maps BPF registers to x86-64 registers.
*
- * x64 register r12 is unused, since if used as base address
+ * x86-64 register R12 is unused, since if used as base address
* register in load/store instructions, it always needs an
* extra byte of encoding and is callee saved.
*
- * r9 caches skb->len - skb->data_len
- * r10 caches skb->data, and used for blinding (if enabled)
+ * Also x86-64 register R9 is unused. x86-64 register R10 is
+ * used for blinding (if enabled).
*/
static const int reg2hex[] = {
- [BPF_REG_0] = 0, /* rax */
- [BPF_REG_1] = 7, /* rdi */
- [BPF_REG_2] = 6, /* rsi */
- [BPF_REG_3] = 2, /* rdx */
- [BPF_REG_4] = 1, /* rcx */
- [BPF_REG_5] = 0, /* r8 */
- [BPF_REG_6] = 3, /* rbx callee saved */
- [BPF_REG_7] = 5, /* r13 callee saved */
- [BPF_REG_8] = 6, /* r14 callee saved */
- [BPF_REG_9] = 7, /* r15 callee saved */
- [BPF_REG_FP] = 5, /* rbp readonly */
- [BPF_REG_AX] = 2, /* r10 temp register */
- [AUX_REG] = 3, /* r11 temp register */
+ [BPF_REG_0] = 0, /* RAX */
+ [BPF_REG_1] = 7, /* RDI */
+ [BPF_REG_2] = 6, /* RSI */
+ [BPF_REG_3] = 2, /* RDX */
+ [BPF_REG_4] = 1, /* RCX */
+ [BPF_REG_5] = 0, /* R8 */
+ [BPF_REG_6] = 3, /* RBX callee saved */
+ [BPF_REG_7] = 5, /* R13 callee saved */
+ [BPF_REG_8] = 6, /* R14 callee saved */
+ [BPF_REG_9] = 7, /* R15 callee saved */
+ [BPF_REG_FP] = 5, /* RBP readonly */
+ [BPF_REG_AX] = 2, /* R10 temp register */
+ [AUX_REG] = 3, /* R11 temp register */
};
-/* is_ereg() == true if BPF register 'reg' maps to x64 r8..r15
+/*
+ * is_ereg() == true if BPF register 'reg' maps to x86-64 r8..r15
* which need extra byte of encoding.
* rax,rcx,...,rbp have simpler encoding
*/
@@ -153,7 +147,7 @@ static bool is_axreg(u32 reg)
return reg == BPF_REG_0;
}
-/* add modifiers if 'reg' maps to x64 registers r8..r15 */
+/* Add modifiers if 'reg' maps to x86-64 registers R8..R15 */
static u8 add_1mod(u8 byte, u32 reg)
{
if (is_ereg(reg))
@@ -170,13 +164,13 @@ static u8 add_2mod(u8 byte, u32 r1, u32 r2)
return byte;
}
-/* encode 'dst_reg' register into x64 opcode 'byte' */
+/* Encode 'dst_reg' register into x86-64 opcode 'byte' */
static u8 add_1reg(u8 byte, u32 dst_reg)
{
return byte + reg2hex[dst_reg];
}
-/* encode 'dst_reg' and 'src_reg' registers into x64 opcode 'byte' */
+/* Encode 'dst_reg' and 'src_reg' registers into x86-64 opcode 'byte' */
static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg)
{
return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3);
@@ -184,27 +178,24 @@ static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg)
static void jit_fill_hole(void *area, unsigned int size)
{
- /* fill whole space with int3 instructions */
+ /* Fill whole space with INT3 instructions */
memset(area, 0xcc, size);
}
struct jit_context {
- int cleanup_addr; /* epilogue code offset */
- bool seen_ld_abs;
- bool seen_ax_reg;
+ int cleanup_addr; /* Epilogue code offset */
};
-/* maximum number of bytes emitted while JITing one eBPF insn */
+/* Maximum number of bytes emitted while JITing one eBPF insn */
#define BPF_MAX_INSN_SIZE 128
#define BPF_INSN_SAFETY 64
-#define AUX_STACK_SPACE \
- (32 /* space for rbx, r13, r14, r15 */ + \
- 8 /* space for skb_copy_bits() buffer */)
+#define AUX_STACK_SPACE 40 /* Space for RBX, R13, R14, R15, tailcnt */
-#define PROLOGUE_SIZE 37
+#define PROLOGUE_SIZE 37
-/* emit x64 prologue code for BPF program and check it's size.
+/*
+ * Emit x86-64 prologue code for BPF program and check its size.
* bpf_tail_call helper will skip it while jumping into another program
*/
static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
@@ -212,8 +203,11 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
u8 *prog = *pprog;
int cnt = 0;
- EMIT1(0x55); /* push rbp */
- EMIT3(0x48, 0x89, 0xE5); /* mov rbp,rsp */
+ /* push rbp */
+ EMIT1(0x55);
+
+ /* mov rbp,rsp */
+ EMIT3(0x48, 0x89, 0xE5);
/* sub rsp, rounded_stack_depth + AUX_STACK_SPACE */
EMIT3_off32(0x48, 0x81, 0xEC,
@@ -222,19 +216,8 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
/* sub rbp, AUX_STACK_SPACE */
EMIT4(0x48, 0x83, 0xED, AUX_STACK_SPACE);
- /* all classic BPF filters use R6(rbx) save it */
-
/* mov qword ptr [rbp+0],rbx */
EMIT4(0x48, 0x89, 0x5D, 0);
-
- /* bpf_convert_filter() maps classic BPF register X to R7 and uses R8
- * as temporary, so all tcpdump filters need to spill/fill R7(r13) and
- * R8(r14). R9(r15) spill could be made conditional, but there is only
- * one 'bpf_error' return path out of helper functions inside bpf_jit.S
- * The overhead of extra spill is negligible for any filter other
- * than synthetic ones. Therefore not worth adding complexity.
- */
-
/* mov qword ptr [rbp+8],r13 */
EMIT4(0x4C, 0x89, 0x6D, 8);
/* mov qword ptr [rbp+16],r14 */
@@ -243,9 +226,10 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
EMIT4(0x4C, 0x89, 0x7D, 24);
if (!ebpf_from_cbpf) {
- /* Clear the tail call counter (tail_call_cnt): for eBPF tail
+ /*
+ * Clear the tail call counter (tail_call_cnt): for eBPF tail
* calls we need to reset the counter to 0. It's done in two
- * instructions, resetting rax register to 0, and moving it
+ * instructions, resetting RAX register to 0, and moving it
* to the counter location.
*/
@@ -260,7 +244,9 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
*pprog = prog;
}
-/* generate the following code:
+/*
+ * Generate the following code:
+ *
* ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ...
* if (index >= array->map.max_entries)
* goto out;
@@ -278,23 +264,26 @@ static void emit_bpf_tail_call(u8 **pprog)
int label1, label2, label3;
int cnt = 0;
- /* rdi - pointer to ctx
+ /*
+ * rdi - pointer to ctx
* rsi - pointer to bpf_array
* rdx - index in bpf_array
*/
- /* if (index >= array->map.max_entries)
- * goto out;
+ /*
+ * if (index >= array->map.max_entries)
+ * goto out;
*/
EMIT2(0x89, 0xD2); /* mov edx, edx */
EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */
offsetof(struct bpf_array, map.max_entries));
-#define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* number of bytes to jump */
+#define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* Number of bytes to jump */
EMIT2(X86_JBE, OFFSET1); /* jbe out */
label1 = cnt;
- /* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
- * goto out;
+ /*
+ * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
+ * goto out;
*/
EMIT2_off32(0x8B, 0x85, 36); /* mov eax, dword ptr [rbp + 36] */
EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
@@ -308,8 +297,9 @@ static void emit_bpf_tail_call(u8 **pprog)
EMIT4_off32(0x48, 0x8B, 0x84, 0xD6, /* mov rax, [rsi + rdx * 8 + offsetof(...)] */
offsetof(struct bpf_array, ptrs));
- /* if (prog == NULL)
- * goto out;
+ /*
+ * if (prog == NULL)
+ * goto out;
*/
EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */
#define OFFSET3 (8 + RETPOLINE_RAX_BPF_JIT_SIZE)
@@ -321,7 +311,8 @@ static void emit_bpf_tail_call(u8 **pprog)
offsetof(struct bpf_prog, bpf_func));
EMIT4(0x48, 0x83, 0xC0, PROLOGUE_SIZE); /* add rax, prologue_size */
- /* now we're ready to jump into next BPF program
+ /*
+ * Wow we're ready to jump into next BPF program
* rdi == ctx (1st arg)
* rax == prog->bpf_func + prologue_size
*/
@@ -334,26 +325,6 @@ static void emit_bpf_tail_call(u8 **pprog)
*pprog = prog;
}
-
-static void emit_load_skb_data_hlen(u8 **pprog)
-{
- u8 *prog = *pprog;
- int cnt = 0;
-
- /* r9d = skb->len - skb->data_len (headlen)
- * r10 = skb->data
- */
- /* mov %r9d, off32(%rdi) */
- EMIT3_off32(0x44, 0x8b, 0x8f, offsetof(struct sk_buff, len));
-
- /* sub %r9d, off32(%rdi) */
- EMIT3_off32(0x44, 0x2b, 0x8f, offsetof(struct sk_buff, data_len));
-
- /* mov %r10, off32(%rdi) */
- EMIT3_off32(0x4c, 0x8b, 0x97, offsetof(struct sk_buff, data));
- *pprog = prog;
-}
-
static void emit_mov_imm32(u8 **pprog, bool sign_propagate,
u32 dst_reg, const u32 imm32)
{
@@ -361,7 +332,8 @@ static void emit_mov_imm32(u8 **pprog, bool sign_propagate,
u8 b1, b2, b3;
int cnt = 0;
- /* optimization: if imm32 is positive, use 'mov %eax, imm32'
+ /*
+ * Optimization: if imm32 is positive, use 'mov %eax, imm32'
* (which zero-extends imm32) to save 2 bytes.
*/
if (sign_propagate && (s32)imm32 < 0) {
@@ -373,7 +345,8 @@ static void emit_mov_imm32(u8 **pprog, bool sign_propagate,
goto done;
}
- /* optimization: if imm32 is zero, use 'xor %eax, %eax'
+ /*
+ * Optimization: if imm32 is zero, use 'xor %eax, %eax'
* to save 3 bytes.
*/
if (imm32 == 0) {
@@ -400,7 +373,8 @@ static void emit_mov_imm64(u8 **pprog, u32 dst_reg,
int cnt = 0;
if (is_uimm32(((u64)imm32_hi << 32) | (u32)imm32_lo)) {
- /* For emitting plain u32, where sign bit must not be
+ /*
+ * For emitting plain u32, where sign bit must not be
* propagated LLVM tends to load imm64 over mov32
* directly, so save couple of bytes by just doing
* 'mov %eax, imm32' instead.
@@ -439,8 +413,6 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
{
struct bpf_insn *insn = bpf_prog->insnsi;
int insn_cnt = bpf_prog->len;
- bool seen_ld_abs = ctx->seen_ld_abs | (oldproglen == 0);
- bool seen_ax_reg = ctx->seen_ax_reg | (oldproglen == 0);
bool seen_exit = false;
u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
int i, cnt = 0;
@@ -450,9 +422,6 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
emit_prologue(&prog, bpf_prog->aux->stack_depth,
bpf_prog_was_classic(bpf_prog));
- if (seen_ld_abs)
- emit_load_skb_data_hlen(&prog);
-
for (i = 0; i < insn_cnt; i++, insn++) {
const s32 imm32 = insn->imm;
u32 dst_reg = insn->dst_reg;
@@ -460,13 +429,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
u8 b2 = 0, b3 = 0;
s64 jmp_offset;
u8 jmp_cond;
- bool reload_skb_data;
int ilen;
u8 *func;
- if (dst_reg == BPF_REG_AX || src_reg == BPF_REG_AX)
- ctx->seen_ax_reg = seen_ax_reg = true;
-
switch (insn->code) {
/* ALU */
case BPF_ALU | BPF_ADD | BPF_X:
@@ -525,7 +490,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
else if (is_ereg(dst_reg))
EMIT1(add_1mod(0x40, dst_reg));
- /* b3 holds 'normal' opcode, b2 short form only valid
+ /*
+ * b3 holds 'normal' opcode, b2 short form only valid
* in case dst is eax/rax.
*/
switch (BPF_OP(insn->code)) {
@@ -593,7 +559,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
/* mov rax, dst_reg */
EMIT_mov(BPF_REG_0, dst_reg);
- /* xor edx, edx
+ /*
+ * xor edx, edx
* equivalent to 'xor rdx, rdx', but one byte less
*/
EMIT2(0x31, 0xd2);
@@ -655,7 +622,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
}
break;
}
- /* shifts */
+ /* Shifts */
case BPF_ALU | BPF_LSH | BPF_K:
case BPF_ALU | BPF_RSH | BPF_K:
case BPF_ALU | BPF_ARSH | BPF_K:
@@ -686,7 +653,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
case BPF_ALU64 | BPF_RSH | BPF_X:
case BPF_ALU64 | BPF_ARSH | BPF_X:
- /* check for bad case when dst_reg == rcx */
+ /* Check for bad case when dst_reg == rcx */
if (dst_reg == BPF_REG_4) {
/* mov r11, dst_reg */
EMIT_mov(AUX_REG, dst_reg);
@@ -724,13 +691,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
case BPF_ALU | BPF_END | BPF_FROM_BE:
switch (imm32) {
case 16:
- /* emit 'ror %ax, 8' to swap lower 2 bytes */
+ /* Emit 'ror %ax, 8' to swap lower 2 bytes */
EMIT1(0x66);
if (is_ereg(dst_reg))
EMIT1(0x41);
EMIT3(0xC1, add_1reg(0xC8, dst_reg), 8);
- /* emit 'movzwl eax, ax' */
+ /* Emit 'movzwl eax, ax' */
if (is_ereg(dst_reg))
EMIT3(0x45, 0x0F, 0xB7);
else
@@ -738,7 +705,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
EMIT1(add_2reg(0xC0, dst_reg, dst_reg));
break;
case 32:
- /* emit 'bswap eax' to swap lower 4 bytes */
+ /* Emit 'bswap eax' to swap lower 4 bytes */
if (is_ereg(dst_reg))
EMIT2(0x41, 0x0F);
else
@@ -746,7 +713,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
EMIT1(add_1reg(0xC8, dst_reg));
break;
case 64:
- /* emit 'bswap rax' to swap 8 bytes */
+ /* Emit 'bswap rax' to swap 8 bytes */
EMIT3(add_1mod(0x48, dst_reg), 0x0F,
add_1reg(0xC8, dst_reg));
break;
@@ -756,7 +723,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
case BPF_ALU | BPF_END | BPF_FROM_LE:
switch (imm32) {
case 16:
- /* emit 'movzwl eax, ax' to zero extend 16-bit
+ /*
+ * Emit 'movzwl eax, ax' to zero extend 16-bit
* into 64 bit
*/
if (is_ereg(dst_reg))
@@ -766,7 +734,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
EMIT1(add_2reg(0xC0, dst_reg, dst_reg));
break;
case 32:
- /* emit 'mov eax, eax' to clear upper 32-bits */
+ /* Emit 'mov eax, eax' to clear upper 32-bits */
if (is_ereg(dst_reg))
EMIT1(0x45);
EMIT2(0x89, add_2reg(0xC0, dst_reg, dst_reg));
@@ -809,9 +777,9 @@ st: if (is_imm8(insn->off))
/* STX: *(u8*)(dst_reg + off) = src_reg */
case BPF_STX | BPF_MEM | BPF_B:
- /* emit 'mov byte ptr [rax + off], al' */
+ /* Emit 'mov byte ptr [rax + off], al' */
if (is_ereg(dst_reg) || is_ereg(src_reg) ||
- /* have to add extra byte for x86 SIL, DIL regs */
+ /* We have to add extra byte for x86 SIL, DIL regs */
src_reg == BPF_REG_1 || src_reg == BPF_REG_2)
EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88);
else
@@ -840,25 +808,26 @@ stx: if (is_imm8(insn->off))
/* LDX: dst_reg = *(u8*)(src_reg + off) */
case BPF_LDX | BPF_MEM | BPF_B:
- /* emit 'movzx rax, byte ptr [rax + off]' */
+ /* Emit 'movzx rax, byte ptr [rax + off]' */
EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6);
goto ldx;
case BPF_LDX | BPF_MEM | BPF_H:
- /* emit 'movzx rax, word ptr [rax + off]' */
+ /* Emit 'movzx rax, word ptr [rax + off]' */
EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7);
goto ldx;
case BPF_LDX | BPF_MEM | BPF_W:
- /* emit 'mov eax, dword ptr [rax+0x14]' */
+ /* Emit 'mov eax, dword ptr [rax+0x14]' */
if (is_ereg(dst_reg) || is_ereg(src_reg))
EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B);
else
EMIT1(0x8B);
goto ldx;
case BPF_LDX | BPF_MEM | BPF_DW:
- /* emit 'mov rax, qword ptr [rax+0x14]' */
+ /* Emit 'mov rax, qword ptr [rax+0x14]' */
EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B);
-ldx: /* if insn->off == 0 we can save one extra byte, but
- * special case of x86 r13 which always needs an offset
+ldx: /*
+ * If insn->off == 0 we can save one extra byte, but
+ * special case of x86 R13 which always needs an offset
* is not worth the hassle
*/
if (is_imm8(insn->off))
@@ -870,7 +839,7 @@ ldx: /* if insn->off == 0 we can save one extra byte, but
/* STX XADD: lock *(u32*)(dst_reg + off) += src_reg */
case BPF_STX | BPF_XADD | BPF_W:
- /* emit 'lock add dword ptr [rax + off], eax' */
+ /* Emit 'lock add dword ptr [rax + off], eax' */
if (is_ereg(dst_reg) || is_ereg(src_reg))
EMIT3(0xF0, add_2mod(0x40, dst_reg, src_reg), 0x01);
else
@@ -889,35 +858,12 @@ xadd: if (is_imm8(insn->off))
case BPF_JMP | BPF_CALL:
func = (u8 *) __bpf_call_base + imm32;
jmp_offset = func - (image + addrs[i]);
- if (seen_ld_abs) {
- reload_skb_data = bpf_helper_changes_pkt_data(func);
- if (reload_skb_data) {
- EMIT1(0x57); /* push %rdi */
- jmp_offset += 22; /* pop, mov, sub, mov */
- } else {
- EMIT2(0x41, 0x52); /* push %r10 */
- EMIT2(0x41, 0x51); /* push %r9 */
- /* need to adjust jmp offset, since
- * pop %r9, pop %r10 take 4 bytes after call insn
- */
- jmp_offset += 4;
- }
- }
if (!imm32 || !is_simm32(jmp_offset)) {
- pr_err("unsupported bpf func %d addr %p image %p\n",
+ pr_err("unsupported BPF func %d addr %p image %p\n",
imm32, func, image);
return -EINVAL;
}
EMIT1_off32(0xE8, jmp_offset);
- if (seen_ld_abs) {
- if (reload_skb_data) {
- EMIT1(0x5F); /* pop %rdi */
- emit_load_skb_data_hlen(&prog);
- } else {
- EMIT2(0x41, 0x59); /* pop %r9 */
- EMIT2(0x41, 0x5A); /* pop %r10 */
- }
- }
break;
case BPF_JMP | BPF_TAIL_CALL:
@@ -970,7 +916,7 @@ xadd: if (is_imm8(insn->off))
else
EMIT2_off32(0x81, add_1reg(0xF8, dst_reg), imm32);
-emit_cond_jmp: /* convert BPF opcode to x86 */
+emit_cond_jmp: /* Convert BPF opcode to x86 */
switch (BPF_OP(insn->code)) {
case BPF_JEQ:
jmp_cond = X86_JE;
@@ -996,22 +942,22 @@ emit_cond_jmp: /* convert BPF opcode to x86 */
jmp_cond = X86_JBE;
break;
case BPF_JSGT:
- /* signed '>', GT in x86 */
+ /* Signed '>', GT in x86 */
jmp_cond = X86_JG;
break;
case BPF_JSLT:
- /* signed '<', LT in x86 */
+ /* Signed '<', LT in x86 */
jmp_cond = X86_JL;
break;
case BPF_JSGE:
- /* signed '>=', GE in x86 */
+ /* Signed '>=', GE in x86 */
jmp_cond = X86_JGE;
break;
case BPF_JSLE:
- /* signed '<=', LE in x86 */
+ /* Signed '<=', LE in x86 */
jmp_cond = X86_JLE;
break;
- default: /* to silence gcc warning */
+ default: /* to silence GCC warning */
return -EFAULT;
}
jmp_offset = addrs[i + insn->off] - addrs[i];
@@ -1027,9 +973,19 @@ emit_cond_jmp: /* convert BPF opcode to x86 */
break;
case BPF_JMP | BPF_JA:
- jmp_offset = addrs[i + insn->off] - addrs[i];
+ if (insn->off == -1)
+ /* -1 jmp instructions will always jump
+ * backwards two bytes. Explicitly handling
+ * this case avoids wasting too many passes
+ * when there are long sequences of replaced
+ * dead code.
+ */
+ jmp_offset = -2;
+ else
+ jmp_offset = addrs[i + insn->off] - addrs[i];
+
if (!jmp_offset)
- /* optimize out nop jumps */
+ /* Optimize out nop jumps */
break;
emit_jmp:
if (is_imm8(jmp_offset)) {
@@ -1042,66 +998,13 @@ emit_jmp:
}
break;
- case BPF_LD | BPF_IND | BPF_W:
- func = sk_load_word;
- goto common_load;
- case BPF_LD | BPF_ABS | BPF_W:
- func = CHOOSE_LOAD_FUNC(imm32, sk_load_word);
-common_load:
- ctx->seen_ld_abs = seen_ld_abs = true;
- jmp_offset = func - (image + addrs[i]);
- if (!func || !is_simm32(jmp_offset)) {
- pr_err("unsupported bpf func %d addr %p image %p\n",
- imm32, func, image);
- return -EINVAL;
- }
- if (BPF_MODE(insn->code) == BPF_ABS) {
- /* mov %esi, imm32 */
- EMIT1_off32(0xBE, imm32);
- } else {
- /* mov %rsi, src_reg */
- EMIT_mov(BPF_REG_2, src_reg);
- if (imm32) {
- if (is_imm8(imm32))
- /* add %esi, imm8 */
- EMIT3(0x83, 0xC6, imm32);
- else
- /* add %esi, imm32 */
- EMIT2_off32(0x81, 0xC6, imm32);
- }
- }
- /* skb pointer is in R6 (%rbx), it will be copied into
- * %rdi if skb_copy_bits() call is necessary.
- * sk_load_* helpers also use %r10 and %r9d.
- * See bpf_jit.S
- */
- if (seen_ax_reg)
- /* r10 = skb->data, mov %r10, off32(%rbx) */
- EMIT3_off32(0x4c, 0x8b, 0x93,
- offsetof(struct sk_buff, data));
- EMIT1_off32(0xE8, jmp_offset); /* call */
- break;
-
- case BPF_LD | BPF_IND | BPF_H:
- func = sk_load_half;
- goto common_load;
- case BPF_LD | BPF_ABS | BPF_H:
- func = CHOOSE_LOAD_FUNC(imm32, sk_load_half);
- goto common_load;
- case BPF_LD | BPF_IND | BPF_B:
- func = sk_load_byte;
- goto common_load;
- case BPF_LD | BPF_ABS | BPF_B:
- func = CHOOSE_LOAD_FUNC(imm32, sk_load_byte);
- goto common_load;
-
case BPF_JMP | BPF_EXIT:
if (seen_exit) {
jmp_offset = ctx->cleanup_addr - addrs[i];
goto emit_jmp;
}
seen_exit = true;
- /* update cleanup_addr */
+ /* Update cleanup_addr */
ctx->cleanup_addr = proglen;
/* mov rbx, qword ptr [rbp+0] */
EMIT4(0x48, 0x8B, 0x5D, 0);
@@ -1119,10 +1022,11 @@ common_load:
break;
default:
- /* By design x64 JIT should support all BPF instructions
+ /*
+ * By design x86-64 JIT should support all BPF instructions.
* This error will be seen if new instruction was added
- * to interpreter, but not to JIT
- * or if there is junk in bpf_prog
+ * to the interpreter, but not to the JIT, or if there is
+ * junk in bpf_prog.
*/
pr_err("bpf_jit: unknown opcode %02x\n", insn->code);
return -EINVAL;
@@ -1174,7 +1078,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
return orig_prog;
tmp = bpf_jit_blind_constants(prog);
- /* If blinding was requested and we failed during blinding,
+ /*
+ * If blinding was requested and we failed during blinding,
* we must fall back to the interpreter.
*/
if (IS_ERR(tmp))
@@ -1208,8 +1113,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
goto out_addrs;
}
- /* Before first pass, make a rough estimation of addrs[]
- * each bpf instruction is translated to less than 64 bytes
+ /*
+ * Before first pass, make a rough estimation of addrs[]
+ * each BPF instruction is translated to less than 64 bytes
*/
for (proglen = 0, i = 0; i < prog->len; i++) {
proglen += 64;
@@ -1218,14 +1124,16 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
ctx.cleanup_addr = proglen;
skip_init_addrs:
- /* JITed image shrinks with every pass and the loop iterates
- * until the image stops shrinking. Very large bpf programs
+ /*
+ * JITed image shrinks with every pass and the loop iterates
+ * until the image stops shrinking. Very large BPF programs
* may converge on the last pass. In such case do one more
- * pass to emit the final image
+ * pass to emit the final image.
*/
for (pass = 0; pass < 20 || image; pass++) {
proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
if (proglen <= 0) {
+out_image:
image = NULL;
if (header)
bpf_jit_binary_free(header);
@@ -1236,8 +1144,7 @@ skip_init_addrs:
if (proglen != oldproglen) {
pr_err("bpf_jit: proglen=%d != oldproglen=%d\n",
proglen, oldproglen);
- prog = orig_prog;
- goto out_addrs;
+ goto out_image;
}
break;
}
@@ -1273,7 +1180,7 @@ skip_init_addrs:
prog = orig_prog;
}
- if (!prog->is_func || extra_pass) {
+ if (!image || !prog->is_func || extra_pass) {
out_addrs:
kfree(addrs);
kfree(jit_data);
diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c
new file mode 100644
index 000000000000..0cc04e30adc1
--- /dev/null
+++ b/arch/x86/net/bpf_jit_comp32.c
@@ -0,0 +1,2419 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Just-In-Time compiler for eBPF filters on IA32 (32bit x86)
+ *
+ * Author: Wang YanQing (udknight@gmail.com)
+ * The code based on code and ideas from:
+ * Eric Dumazet (eric.dumazet@gmail.com)
+ * and from:
+ * Shubham Bansal <illusionist.neo@gmail.com>
+ */
+
+#include <linux/netdevice.h>
+#include <linux/filter.h>
+#include <linux/if_vlan.h>
+#include <asm/cacheflush.h>
+#include <asm/set_memory.h>
+#include <asm/nospec-branch.h>
+#include <linux/bpf.h>
+
+/*
+ * eBPF prog stack layout:
+ *
+ * high
+ * original ESP => +-----+
+ * | | callee saved registers
+ * +-----+
+ * | ... | eBPF JIT scratch space
+ * BPF_FP,IA32_EBP => +-----+
+ * | ... | eBPF prog stack
+ * +-----+
+ * |RSVD | JIT scratchpad
+ * current ESP => +-----+
+ * | |
+ * | ... | Function call stack
+ * | |
+ * +-----+
+ * low
+ *
+ * The callee saved registers:
+ *
+ * high
+ * original ESP => +------------------+ \
+ * | ebp | |
+ * current EBP => +------------------+ } callee saved registers
+ * | ebx,esi,edi | |
+ * +------------------+ /
+ * low
+ */
+
+static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
+{
+ if (len == 1)
+ *ptr = bytes;
+ else if (len == 2)
+ *(u16 *)ptr = bytes;
+ else {
+ *(u32 *)ptr = bytes;
+ barrier();
+ }
+ return ptr + len;
+}
+
+#define EMIT(bytes, len) \
+ do { prog = emit_code(prog, bytes, len); cnt += len; } while (0)
+
+#define EMIT1(b1) EMIT(b1, 1)
+#define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2)
+#define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3)
+#define EMIT4(b1, b2, b3, b4) \
+ EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4)
+
+#define EMIT1_off32(b1, off) \
+ do { EMIT1(b1); EMIT(off, 4); } while (0)
+#define EMIT2_off32(b1, b2, off) \
+ do { EMIT2(b1, b2); EMIT(off, 4); } while (0)
+#define EMIT3_off32(b1, b2, b3, off) \
+ do { EMIT3(b1, b2, b3); EMIT(off, 4); } while (0)
+#define EMIT4_off32(b1, b2, b3, b4, off) \
+ do { EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0)
+
+#define jmp_label(label, jmp_insn_len) (label - cnt - jmp_insn_len)
+
+static bool is_imm8(int value)
+{
+ return value <= 127 && value >= -128;
+}
+
+static bool is_simm32(s64 value)
+{
+ return value == (s64) (s32) value;
+}
+
+#define STACK_OFFSET(k) (k)
+#define TCALL_CNT (MAX_BPF_JIT_REG + 0) /* Tail Call Count */
+
+#define IA32_EAX (0x0)
+#define IA32_EBX (0x3)
+#define IA32_ECX (0x1)
+#define IA32_EDX (0x2)
+#define IA32_ESI (0x6)
+#define IA32_EDI (0x7)
+#define IA32_EBP (0x5)
+#define IA32_ESP (0x4)
+
+/*
+ * List of x86 cond jumps opcodes (. + s8)
+ * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32)
+ */
+#define IA32_JB 0x72
+#define IA32_JAE 0x73
+#define IA32_JE 0x74
+#define IA32_JNE 0x75
+#define IA32_JBE 0x76
+#define IA32_JA 0x77
+#define IA32_JL 0x7C
+#define IA32_JGE 0x7D
+#define IA32_JLE 0x7E
+#define IA32_JG 0x7F
+
+/*
+ * Map eBPF registers to IA32 32bit registers or stack scratch space.
+ *
+ * 1. All the registers, R0-R10, are mapped to scratch space on stack.
+ * 2. We need two 64 bit temp registers to do complex operations on eBPF
+ * registers.
+ * 3. For performance reason, the BPF_REG_AX for blinding constant, is
+ * mapped to real hardware register pair, IA32_ESI and IA32_EDI.
+ *
+ * As the eBPF registers are all 64 bit registers and IA32 has only 32 bit
+ * registers, we have to map each eBPF registers with two IA32 32 bit regs
+ * or scratch memory space and we have to build eBPF 64 bit register from those.
+ *
+ * We use IA32_EAX, IA32_EDX, IA32_ECX, IA32_EBX as temporary registers.
+ */
+static const u8 bpf2ia32[][2] = {
+ /* Return value from in-kernel function, and exit value from eBPF */
+ [BPF_REG_0] = {STACK_OFFSET(0), STACK_OFFSET(4)},
+
+ /* The arguments from eBPF program to in-kernel function */
+ /* Stored on stack scratch space */
+ [BPF_REG_1] = {STACK_OFFSET(8), STACK_OFFSET(12)},
+ [BPF_REG_2] = {STACK_OFFSET(16), STACK_OFFSET(20)},
+ [BPF_REG_3] = {STACK_OFFSET(24), STACK_OFFSET(28)},
+ [BPF_REG_4] = {STACK_OFFSET(32), STACK_OFFSET(36)},
+ [BPF_REG_5] = {STACK_OFFSET(40), STACK_OFFSET(44)},
+
+ /* Callee saved registers that in-kernel function will preserve */
+ /* Stored on stack scratch space */
+ [BPF_REG_6] = {STACK_OFFSET(48), STACK_OFFSET(52)},
+ [BPF_REG_7] = {STACK_OFFSET(56), STACK_OFFSET(60)},
+ [BPF_REG_8] = {STACK_OFFSET(64), STACK_OFFSET(68)},
+ [BPF_REG_9] = {STACK_OFFSET(72), STACK_OFFSET(76)},
+
+ /* Read only Frame Pointer to access Stack */
+ [BPF_REG_FP] = {STACK_OFFSET(80), STACK_OFFSET(84)},
+
+ /* Temporary register for blinding constants. */
+ [BPF_REG_AX] = {IA32_ESI, IA32_EDI},
+
+ /* Tail call count. Stored on stack scratch space. */
+ [TCALL_CNT] = {STACK_OFFSET(88), STACK_OFFSET(92)},
+};
+
+#define dst_lo dst[0]
+#define dst_hi dst[1]
+#define src_lo src[0]
+#define src_hi src[1]
+
+#define STACK_ALIGNMENT 8
+/*
+ * Stack space for BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4,
+ * BPF_REG_5, BPF_REG_6, BPF_REG_7, BPF_REG_8, BPF_REG_9,
+ * BPF_REG_FP, BPF_REG_AX and Tail call counts.
+ */
+#define SCRATCH_SIZE 96
+
+/* Total stack size used in JITed code */
+#define _STACK_SIZE (stack_depth + SCRATCH_SIZE)
+
+#define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT)
+
+/* Get the offset of eBPF REGISTERs stored on scratch space. */
+#define STACK_VAR(off) (off)
+
+/* Encode 'dst_reg' register into IA32 opcode 'byte' */
+static u8 add_1reg(u8 byte, u32 dst_reg)
+{
+ return byte + dst_reg;
+}
+
+/* Encode 'dst_reg' and 'src_reg' registers into IA32 opcode 'byte' */
+static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg)
+{
+ return byte + dst_reg + (src_reg << 3);
+}
+
+static void jit_fill_hole(void *area, unsigned int size)
+{
+ /* Fill whole space with int3 instructions */
+ memset(area, 0xcc, size);
+}
+
+static inline void emit_ia32_mov_i(const u8 dst, const u32 val, bool dstk,
+ u8 **pprog)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+
+ if (dstk) {
+ if (val == 0) {
+ /* xor eax,eax */
+ EMIT2(0x33, add_2reg(0xC0, IA32_EAX, IA32_EAX));
+ /* mov dword ptr [ebp+off],eax */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst));
+ } else {
+ EMIT3_off32(0xC7, add_1reg(0x40, IA32_EBP),
+ STACK_VAR(dst), val);
+ }
+ } else {
+ if (val == 0)
+ EMIT2(0x33, add_2reg(0xC0, dst, dst));
+ else
+ EMIT2_off32(0xC7, add_1reg(0xC0, dst),
+ val);
+ }
+ *pprog = prog;
+}
+
+/* dst = imm (4 bytes)*/
+static inline void emit_ia32_mov_r(const u8 dst, const u8 src, bool dstk,
+ bool sstk, u8 **pprog)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+ u8 sreg = sstk ? IA32_EAX : src;
+
+ if (sstk)
+ /* mov eax,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(src));
+ if (dstk)
+ /* mov dword ptr [ebp+off],eax */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, sreg), STACK_VAR(dst));
+ else
+ /* mov dst,sreg */
+ EMIT2(0x89, add_2reg(0xC0, dst, sreg));
+
+ *pprog = prog;
+}
+
+/* dst = src */
+static inline void emit_ia32_mov_r64(const bool is64, const u8 dst[],
+ const u8 src[], bool dstk,
+ bool sstk, u8 **pprog)
+{
+ emit_ia32_mov_r(dst_lo, src_lo, dstk, sstk, pprog);
+ if (is64)
+ /* complete 8 byte move */
+ emit_ia32_mov_r(dst_hi, src_hi, dstk, sstk, pprog);
+ else
+ /* zero out high 4 bytes */
+ emit_ia32_mov_i(dst_hi, 0, dstk, pprog);
+}
+
+/* Sign extended move */
+static inline void emit_ia32_mov_i64(const bool is64, const u8 dst[],
+ const u32 val, bool dstk, u8 **pprog)
+{
+ u32 hi = 0;
+
+ if (is64 && (val & (1<<31)))
+ hi = (u32)~0;
+ emit_ia32_mov_i(dst_lo, val, dstk, pprog);
+ emit_ia32_mov_i(dst_hi, hi, dstk, pprog);
+}
+
+/*
+ * ALU operation (32 bit)
+ * dst = dst * src
+ */
+static inline void emit_ia32_mul_r(const u8 dst, const u8 src, bool dstk,
+ bool sstk, u8 **pprog)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+ u8 sreg = sstk ? IA32_ECX : src;
+
+ if (sstk)
+ /* mov ecx,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src));
+
+ if (dstk)
+ /* mov eax,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst));
+ else
+ /* mov eax,dst */
+ EMIT2(0x8B, add_2reg(0xC0, dst, IA32_EAX));
+
+
+ EMIT2(0xF7, add_1reg(0xE0, sreg));
+
+ if (dstk)
+ /* mov dword ptr [ebp+off],eax */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst));
+ else
+ /* mov dst,eax */
+ EMIT2(0x89, add_2reg(0xC0, dst, IA32_EAX));
+
+ *pprog = prog;
+}
+
+static inline void emit_ia32_to_le_r64(const u8 dst[], s32 val,
+ bool dstk, u8 **pprog)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+ u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
+ u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+
+ if (dstk && val != 64) {
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst_lo));
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
+ STACK_VAR(dst_hi));
+ }
+ switch (val) {
+ case 16:
+ /*
+ * Emit 'movzwl eax,ax' to zero extend 16-bit
+ * into 64 bit
+ */
+ EMIT2(0x0F, 0xB7);
+ EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo));
+ /* xor dreg_hi,dreg_hi */
+ EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
+ break;
+ case 32:
+ /* xor dreg_hi,dreg_hi */
+ EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
+ break;
+ case 64:
+ /* nop */
+ break;
+ }
+
+ if (dstk && val != 64) {
+ /* mov dword ptr [ebp+off],dreg_lo */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
+ STACK_VAR(dst_lo));
+ /* mov dword ptr [ebp+off],dreg_hi */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
+ STACK_VAR(dst_hi));
+ }
+ *pprog = prog;
+}
+
+static inline void emit_ia32_to_be_r64(const u8 dst[], s32 val,
+ bool dstk, u8 **pprog)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+ u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
+ u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+
+ if (dstk) {
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst_lo));
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
+ STACK_VAR(dst_hi));
+ }
+ switch (val) {
+ case 16:
+ /* Emit 'ror %ax, 8' to swap lower 2 bytes */
+ EMIT1(0x66);
+ EMIT3(0xC1, add_1reg(0xC8, dreg_lo), 8);
+
+ EMIT2(0x0F, 0xB7);
+ EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo));
+
+ /* xor dreg_hi,dreg_hi */
+ EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
+ break;
+ case 32:
+ /* Emit 'bswap eax' to swap lower 4 bytes */
+ EMIT1(0x0F);
+ EMIT1(add_1reg(0xC8, dreg_lo));
+
+ /* xor dreg_hi,dreg_hi */
+ EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
+ break;
+ case 64:
+ /* Emit 'bswap eax' to swap lower 4 bytes */
+ EMIT1(0x0F);
+ EMIT1(add_1reg(0xC8, dreg_lo));
+
+ /* Emit 'bswap edx' to swap lower 4 bytes */
+ EMIT1(0x0F);
+ EMIT1(add_1reg(0xC8, dreg_hi));
+
+ /* mov ecx,dreg_hi */
+ EMIT2(0x89, add_2reg(0xC0, IA32_ECX, dreg_hi));
+ /* mov dreg_hi,dreg_lo */
+ EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo));
+ /* mov dreg_lo,ecx */
+ EMIT2(0x89, add_2reg(0xC0, dreg_lo, IA32_ECX));
+
+ break;
+ }
+ if (dstk) {
+ /* mov dword ptr [ebp+off],dreg_lo */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
+ STACK_VAR(dst_lo));
+ /* mov dword ptr [ebp+off],dreg_hi */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
+ STACK_VAR(dst_hi));
+ }
+ *pprog = prog;
+}
+
+/*
+ * ALU operation (32 bit)
+ * dst = dst (div|mod) src
+ */
+static inline void emit_ia32_div_mod_r(const u8 op, const u8 dst, const u8 src,
+ bool dstk, bool sstk, u8 **pprog)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+
+ if (sstk)
+ /* mov ecx,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
+ STACK_VAR(src));
+ else if (src != IA32_ECX)
+ /* mov ecx,src */
+ EMIT2(0x8B, add_2reg(0xC0, src, IA32_ECX));
+
+ if (dstk)
+ /* mov eax,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst));
+ else
+ /* mov eax,dst */
+ EMIT2(0x8B, add_2reg(0xC0, dst, IA32_EAX));
+
+ /* xor edx,edx */
+ EMIT2(0x31, add_2reg(0xC0, IA32_EDX, IA32_EDX));
+ /* div ecx */
+ EMIT2(0xF7, add_1reg(0xF0, IA32_ECX));
+
+ if (op == BPF_MOD) {
+ if (dstk)
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
+ STACK_VAR(dst));
+ else
+ EMIT2(0x89, add_2reg(0xC0, dst, IA32_EDX));
+ } else {
+ if (dstk)
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst));
+ else
+ EMIT2(0x89, add_2reg(0xC0, dst, IA32_EAX));
+ }
+ *pprog = prog;
+}
+
+/*
+ * ALU operation (32 bit)
+ * dst = dst (shift) src
+ */
+static inline void emit_ia32_shift_r(const u8 op, const u8 dst, const u8 src,
+ bool dstk, bool sstk, u8 **pprog)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+ u8 dreg = dstk ? IA32_EAX : dst;
+ u8 b2;
+
+ if (dstk)
+ /* mov eax,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst));
+
+ if (sstk)
+ /* mov ecx,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src));
+ else if (src != IA32_ECX)
+ /* mov ecx,src */
+ EMIT2(0x8B, add_2reg(0xC0, src, IA32_ECX));
+
+ switch (op) {
+ case BPF_LSH:
+ b2 = 0xE0; break;
+ case BPF_RSH:
+ b2 = 0xE8; break;
+ case BPF_ARSH:
+ b2 = 0xF8; break;
+ default:
+ return;
+ }
+ EMIT2(0xD3, add_1reg(b2, dreg));
+
+ if (dstk)
+ /* mov dword ptr [ebp+off],dreg */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg), STACK_VAR(dst));
+ *pprog = prog;
+}
+
+/*
+ * ALU operation (32 bit)
+ * dst = dst (op) src
+ */
+static inline void emit_ia32_alu_r(const bool is64, const bool hi, const u8 op,
+ const u8 dst, const u8 src, bool dstk,
+ bool sstk, u8 **pprog)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+ u8 sreg = sstk ? IA32_EAX : src;
+ u8 dreg = dstk ? IA32_EDX : dst;
+
+ if (sstk)
+ /* mov eax,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(src));
+
+ if (dstk)
+ /* mov eax,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(dst));
+
+ switch (BPF_OP(op)) {
+ /* dst = dst + src */
+ case BPF_ADD:
+ if (hi && is64)
+ EMIT2(0x11, add_2reg(0xC0, dreg, sreg));
+ else
+ EMIT2(0x01, add_2reg(0xC0, dreg, sreg));
+ break;
+ /* dst = dst - src */
+ case BPF_SUB:
+ if (hi && is64)
+ EMIT2(0x19, add_2reg(0xC0, dreg, sreg));
+ else
+ EMIT2(0x29, add_2reg(0xC0, dreg, sreg));
+ break;
+ /* dst = dst | src */
+ case BPF_OR:
+ EMIT2(0x09, add_2reg(0xC0, dreg, sreg));
+ break;
+ /* dst = dst & src */
+ case BPF_AND:
+ EMIT2(0x21, add_2reg(0xC0, dreg, sreg));
+ break;
+ /* dst = dst ^ src */
+ case BPF_XOR:
+ EMIT2(0x31, add_2reg(0xC0, dreg, sreg));
+ break;
+ }
+
+ if (dstk)
+ /* mov dword ptr [ebp+off],dreg */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg),
+ STACK_VAR(dst));
+ *pprog = prog;
+}
+
+/* ALU operation (64 bit) */
+static inline void emit_ia32_alu_r64(const bool is64, const u8 op,
+ const u8 dst[], const u8 src[],
+ bool dstk, bool sstk,
+ u8 **pprog)
+{
+ u8 *prog = *pprog;
+
+ emit_ia32_alu_r(is64, false, op, dst_lo, src_lo, dstk, sstk, &prog);
+ if (is64)
+ emit_ia32_alu_r(is64, true, op, dst_hi, src_hi, dstk, sstk,
+ &prog);
+ else
+ emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
+ *pprog = prog;
+}
+
+/*
+ * ALU operation (32 bit)
+ * dst = dst (op) val
+ */
+static inline void emit_ia32_alu_i(const bool is64, const bool hi, const u8 op,
+ const u8 dst, const s32 val, bool dstk,
+ u8 **pprog)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+ u8 dreg = dstk ? IA32_EAX : dst;
+ u8 sreg = IA32_EDX;
+
+ if (dstk)
+ /* mov eax,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst));
+
+ if (!is_imm8(val))
+ /* mov edx,imm32*/
+ EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EDX), val);
+
+ switch (op) {
+ /* dst = dst + val */
+ case BPF_ADD:
+ if (hi && is64) {
+ if (is_imm8(val))
+ EMIT3(0x83, add_1reg(0xD0, dreg), val);
+ else
+ EMIT2(0x11, add_2reg(0xC0, dreg, sreg));
+ } else {
+ if (is_imm8(val))
+ EMIT3(0x83, add_1reg(0xC0, dreg), val);
+ else
+ EMIT2(0x01, add_2reg(0xC0, dreg, sreg));
+ }
+ break;
+ /* dst = dst - val */
+ case BPF_SUB:
+ if (hi && is64) {
+ if (is_imm8(val))
+ EMIT3(0x83, add_1reg(0xD8, dreg), val);
+ else
+ EMIT2(0x19, add_2reg(0xC0, dreg, sreg));
+ } else {
+ if (is_imm8(val))
+ EMIT3(0x83, add_1reg(0xE8, dreg), val);
+ else
+ EMIT2(0x29, add_2reg(0xC0, dreg, sreg));
+ }
+ break;
+ /* dst = dst | val */
+ case BPF_OR:
+ if (is_imm8(val))
+ EMIT3(0x83, add_1reg(0xC8, dreg), val);
+ else
+ EMIT2(0x09, add_2reg(0xC0, dreg, sreg));
+ break;
+ /* dst = dst & val */
+ case BPF_AND:
+ if (is_imm8(val))
+ EMIT3(0x83, add_1reg(0xE0, dreg), val);
+ else
+ EMIT2(0x21, add_2reg(0xC0, dreg, sreg));
+ break;
+ /* dst = dst ^ val */
+ case BPF_XOR:
+ if (is_imm8(val))
+ EMIT3(0x83, add_1reg(0xF0, dreg), val);
+ else
+ EMIT2(0x31, add_2reg(0xC0, dreg, sreg));
+ break;
+ case BPF_NEG:
+ EMIT2(0xF7, add_1reg(0xD8, dreg));
+ break;
+ }
+
+ if (dstk)
+ /* mov dword ptr [ebp+off],dreg */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg),
+ STACK_VAR(dst));
+ *pprog = prog;
+}
+
+/* ALU operation (64 bit) */
+static inline void emit_ia32_alu_i64(const bool is64, const u8 op,
+ const u8 dst[], const u32 val,
+ bool dstk, u8 **pprog)
+{
+ u8 *prog = *pprog;
+ u32 hi = 0;
+
+ if (is64 && (val & (1<<31)))
+ hi = (u32)~0;
+
+ emit_ia32_alu_i(is64, false, op, dst_lo, val, dstk, &prog);
+ if (is64)
+ emit_ia32_alu_i(is64, true, op, dst_hi, hi, dstk, &prog);
+ else
+ emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
+
+ *pprog = prog;
+}
+
+/* dst = ~dst (64 bit) */
+static inline void emit_ia32_neg64(const u8 dst[], bool dstk, u8 **pprog)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+ u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
+ u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+
+ if (dstk) {
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst_lo));
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
+ STACK_VAR(dst_hi));
+ }
+
+ /* xor ecx,ecx */
+ EMIT2(0x31, add_2reg(0xC0, IA32_ECX, IA32_ECX));
+ /* sub dreg_lo,ecx */
+ EMIT2(0x2B, add_2reg(0xC0, dreg_lo, IA32_ECX));
+ /* mov dreg_lo,ecx */
+ EMIT2(0x89, add_2reg(0xC0, dreg_lo, IA32_ECX));
+
+ /* xor ecx,ecx */
+ EMIT2(0x31, add_2reg(0xC0, IA32_ECX, IA32_ECX));
+ /* sbb dreg_hi,ecx */
+ EMIT2(0x19, add_2reg(0xC0, dreg_hi, IA32_ECX));
+ /* mov dreg_hi,ecx */
+ EMIT2(0x89, add_2reg(0xC0, dreg_hi, IA32_ECX));
+
+ if (dstk) {
+ /* mov dword ptr [ebp+off],dreg_lo */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
+ STACK_VAR(dst_lo));
+ /* mov dword ptr [ebp+off],dreg_hi */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
+ STACK_VAR(dst_hi));
+ }
+ *pprog = prog;
+}
+
+/* dst = dst << src */
+static inline void emit_ia32_lsh_r64(const u8 dst[], const u8 src[],
+ bool dstk, bool sstk, u8 **pprog)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+ static int jmp_label1 = -1;
+ static int jmp_label2 = -1;
+ static int jmp_label3 = -1;
+ u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
+ u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+
+ if (dstk) {
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst_lo));
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
+ STACK_VAR(dst_hi));
+ }
+
+ if (sstk)
+ /* mov ecx,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
+ STACK_VAR(src_lo));
+ else
+ /* mov ecx,src_lo */
+ EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
+
+ /* cmp ecx,32 */
+ EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
+ /* Jumps when >= 32 */
+ if (is_imm8(jmp_label(jmp_label1, 2)))
+ EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
+ else
+ EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6));
+
+ /* < 32 */
+ /* shl dreg_hi,cl */
+ EMIT2(0xD3, add_1reg(0xE0, dreg_hi));
+ /* mov ebx,dreg_lo */
+ EMIT2(0x8B, add_2reg(0xC0, dreg_lo, IA32_EBX));
+ /* shl dreg_lo,cl */
+ EMIT2(0xD3, add_1reg(0xE0, dreg_lo));
+
+ /* IA32_ECX = -IA32_ECX + 32 */
+ /* neg ecx */
+ EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
+ /* add ecx,32 */
+ EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
+
+ /* shr ebx,cl */
+ EMIT2(0xD3, add_1reg(0xE8, IA32_EBX));
+ /* or dreg_hi,ebx */
+ EMIT2(0x09, add_2reg(0xC0, dreg_hi, IA32_EBX));
+
+ /* goto out; */
+ if (is_imm8(jmp_label(jmp_label3, 2)))
+ EMIT2(0xEB, jmp_label(jmp_label3, 2));
+ else
+ EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
+
+ /* >= 32 */
+ if (jmp_label1 == -1)
+ jmp_label1 = cnt;
+
+ /* cmp ecx,64 */
+ EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64);
+ /* Jumps when >= 64 */
+ if (is_imm8(jmp_label(jmp_label2, 2)))
+ EMIT2(IA32_JAE, jmp_label(jmp_label2, 2));
+ else
+ EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6));
+
+ /* >= 32 && < 64 */
+ /* sub ecx,32 */
+ EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32);
+ /* shl dreg_lo,cl */
+ EMIT2(0xD3, add_1reg(0xE0, dreg_lo));
+ /* mov dreg_hi,dreg_lo */
+ EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo));
+
+ /* xor dreg_lo,dreg_lo */
+ EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
+
+ /* goto out; */
+ if (is_imm8(jmp_label(jmp_label3, 2)))
+ EMIT2(0xEB, jmp_label(jmp_label3, 2));
+ else
+ EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
+
+ /* >= 64 */
+ if (jmp_label2 == -1)
+ jmp_label2 = cnt;
+ /* xor dreg_lo,dreg_lo */
+ EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
+ /* xor dreg_hi,dreg_hi */
+ EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
+
+ if (jmp_label3 == -1)
+ jmp_label3 = cnt;
+
+ if (dstk) {
+ /* mov dword ptr [ebp+off],dreg_lo */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
+ STACK_VAR(dst_lo));
+ /* mov dword ptr [ebp+off],dreg_hi */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
+ STACK_VAR(dst_hi));
+ }
+ /* out: */
+ *pprog = prog;
+}
+
+/* dst = dst >> src (signed)*/
+static inline void emit_ia32_arsh_r64(const u8 dst[], const u8 src[],
+ bool dstk, bool sstk, u8 **pprog)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+ static int jmp_label1 = -1;
+ static int jmp_label2 = -1;
+ static int jmp_label3 = -1;
+ u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
+ u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+
+ if (dstk) {
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst_lo));
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
+ STACK_VAR(dst_hi));
+ }
+
+ if (sstk)
+ /* mov ecx,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
+ STACK_VAR(src_lo));
+ else
+ /* mov ecx,src_lo */
+ EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
+
+ /* cmp ecx,32 */
+ EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
+ /* Jumps when >= 32 */
+ if (is_imm8(jmp_label(jmp_label1, 2)))
+ EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
+ else
+ EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6));
+
+ /* < 32 */
+ /* lshr dreg_lo,cl */
+ EMIT2(0xD3, add_1reg(0xE8, dreg_lo));
+ /* mov ebx,dreg_hi */
+ EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX));
+ /* ashr dreg_hi,cl */
+ EMIT2(0xD3, add_1reg(0xF8, dreg_hi));
+
+ /* IA32_ECX = -IA32_ECX + 32 */
+ /* neg ecx */
+ EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
+ /* add ecx,32 */
+ EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
+
+ /* shl ebx,cl */
+ EMIT2(0xD3, add_1reg(0xE0, IA32_EBX));
+ /* or dreg_lo,ebx */
+ EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX));
+
+ /* goto out; */
+ if (is_imm8(jmp_label(jmp_label3, 2)))
+ EMIT2(0xEB, jmp_label(jmp_label3, 2));
+ else
+ EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
+
+ /* >= 32 */
+ if (jmp_label1 == -1)
+ jmp_label1 = cnt;
+
+ /* cmp ecx,64 */
+ EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64);
+ /* Jumps when >= 64 */
+ if (is_imm8(jmp_label(jmp_label2, 2)))
+ EMIT2(IA32_JAE, jmp_label(jmp_label2, 2));
+ else
+ EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6));
+
+ /* >= 32 && < 64 */
+ /* sub ecx,32 */
+ EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32);
+ /* ashr dreg_hi,cl */
+ EMIT2(0xD3, add_1reg(0xF8, dreg_hi));
+ /* mov dreg_lo,dreg_hi */
+ EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
+
+ /* ashr dreg_hi,imm8 */
+ EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
+
+ /* goto out; */
+ if (is_imm8(jmp_label(jmp_label3, 2)))
+ EMIT2(0xEB, jmp_label(jmp_label3, 2));
+ else
+ EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
+
+ /* >= 64 */
+ if (jmp_label2 == -1)
+ jmp_label2 = cnt;
+ /* ashr dreg_hi,imm8 */
+ EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
+ /* mov dreg_lo,dreg_hi */
+ EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
+
+ if (jmp_label3 == -1)
+ jmp_label3 = cnt;
+
+ if (dstk) {
+ /* mov dword ptr [ebp+off],dreg_lo */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
+ STACK_VAR(dst_lo));
+ /* mov dword ptr [ebp+off],dreg_hi */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
+ STACK_VAR(dst_hi));
+ }
+ /* out: */
+ *pprog = prog;
+}
+
+/* dst = dst >> src */
+static inline void emit_ia32_rsh_r64(const u8 dst[], const u8 src[], bool dstk,
+ bool sstk, u8 **pprog)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+ static int jmp_label1 = -1;
+ static int jmp_label2 = -1;
+ static int jmp_label3 = -1;
+ u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
+ u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+
+ if (dstk) {
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst_lo));
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
+ STACK_VAR(dst_hi));
+ }
+
+ if (sstk)
+ /* mov ecx,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
+ STACK_VAR(src_lo));
+ else
+ /* mov ecx,src_lo */
+ EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
+
+ /* cmp ecx,32 */
+ EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
+ /* Jumps when >= 32 */
+ if (is_imm8(jmp_label(jmp_label1, 2)))
+ EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
+ else
+ EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6));
+
+ /* < 32 */
+ /* lshr dreg_lo,cl */
+ EMIT2(0xD3, add_1reg(0xE8, dreg_lo));
+ /* mov ebx,dreg_hi */
+ EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX));
+ /* shr dreg_hi,cl */
+ EMIT2(0xD3, add_1reg(0xE8, dreg_hi));
+
+ /* IA32_ECX = -IA32_ECX + 32 */
+ /* neg ecx */
+ EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
+ /* add ecx,32 */
+ EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
+
+ /* shl ebx,cl */
+ EMIT2(0xD3, add_1reg(0xE0, IA32_EBX));
+ /* or dreg_lo,ebx */
+ EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX));
+
+ /* goto out; */
+ if (is_imm8(jmp_label(jmp_label3, 2)))
+ EMIT2(0xEB, jmp_label(jmp_label3, 2));
+ else
+ EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
+
+ /* >= 32 */
+ if (jmp_label1 == -1)
+ jmp_label1 = cnt;
+ /* cmp ecx,64 */
+ EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64);
+ /* Jumps when >= 64 */
+ if (is_imm8(jmp_label(jmp_label2, 2)))
+ EMIT2(IA32_JAE, jmp_label(jmp_label2, 2));
+ else
+ EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6));
+
+ /* >= 32 && < 64 */
+ /* sub ecx,32 */
+ EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32);
+ /* shr dreg_hi,cl */
+ EMIT2(0xD3, add_1reg(0xE8, dreg_hi));
+ /* mov dreg_lo,dreg_hi */
+ EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
+ /* xor dreg_hi,dreg_hi */
+ EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
+
+ /* goto out; */
+ if (is_imm8(jmp_label(jmp_label3, 2)))
+ EMIT2(0xEB, jmp_label(jmp_label3, 2));
+ else
+ EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
+
+ /* >= 64 */
+ if (jmp_label2 == -1)
+ jmp_label2 = cnt;
+ /* xor dreg_lo,dreg_lo */
+ EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
+ /* xor dreg_hi,dreg_hi */
+ EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
+
+ if (jmp_label3 == -1)
+ jmp_label3 = cnt;
+
+ if (dstk) {
+ /* mov dword ptr [ebp+off],dreg_lo */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
+ STACK_VAR(dst_lo));
+ /* mov dword ptr [ebp+off],dreg_hi */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
+ STACK_VAR(dst_hi));
+ }
+ /* out: */
+ *pprog = prog;
+}
+
+/* dst = dst << val */
+static inline void emit_ia32_lsh_i64(const u8 dst[], const u32 val,
+ bool dstk, u8 **pprog)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+ u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
+ u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+
+ if (dstk) {
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst_lo));
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
+ STACK_VAR(dst_hi));
+ }
+ /* Do LSH operation */
+ if (val < 32) {
+ /* shl dreg_hi,imm8 */
+ EMIT3(0xC1, add_1reg(0xE0, dreg_hi), val);
+ /* mov ebx,dreg_lo */
+ EMIT2(0x8B, add_2reg(0xC0, dreg_lo, IA32_EBX));
+ /* shl dreg_lo,imm8 */
+ EMIT3(0xC1, add_1reg(0xE0, dreg_lo), val);
+
+ /* IA32_ECX = 32 - val */
+ /* mov ecx,val */
+ EMIT2(0xB1, val);
+ /* movzx ecx,ecx */
+ EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX));
+ /* neg ecx */
+ EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
+ /* add ecx,32 */
+ EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
+
+ /* shr ebx,cl */
+ EMIT2(0xD3, add_1reg(0xE8, IA32_EBX));
+ /* or dreg_hi,ebx */
+ EMIT2(0x09, add_2reg(0xC0, dreg_hi, IA32_EBX));
+ } else if (val >= 32 && val < 64) {
+ u32 value = val - 32;
+
+ /* shl dreg_lo,imm8 */
+ EMIT3(0xC1, add_1reg(0xE0, dreg_lo), value);
+ /* mov dreg_hi,dreg_lo */
+ EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo));
+ /* xor dreg_lo,dreg_lo */
+ EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
+ } else {
+ /* xor dreg_lo,dreg_lo */
+ EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
+ /* xor dreg_hi,dreg_hi */
+ EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
+ }
+
+ if (dstk) {
+ /* mov dword ptr [ebp+off],dreg_lo */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
+ STACK_VAR(dst_lo));
+ /* mov dword ptr [ebp+off],dreg_hi */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
+ STACK_VAR(dst_hi));
+ }
+ *pprog = prog;
+}
+
+/* dst = dst >> val */
+static inline void emit_ia32_rsh_i64(const u8 dst[], const u32 val,
+ bool dstk, u8 **pprog)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+ u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
+ u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+
+ if (dstk) {
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst_lo));
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
+ STACK_VAR(dst_hi));
+ }
+
+ /* Do RSH operation */
+ if (val < 32) {
+ /* shr dreg_lo,imm8 */
+ EMIT3(0xC1, add_1reg(0xE8, dreg_lo), val);
+ /* mov ebx,dreg_hi */
+ EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX));
+ /* shr dreg_hi,imm8 */
+ EMIT3(0xC1, add_1reg(0xE8, dreg_hi), val);
+
+ /* IA32_ECX = 32 - val */
+ /* mov ecx,val */
+ EMIT2(0xB1, val);
+ /* movzx ecx,ecx */
+ EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX));
+ /* neg ecx */
+ EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
+ /* add ecx,32 */
+ EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
+
+ /* shl ebx,cl */
+ EMIT2(0xD3, add_1reg(0xE0, IA32_EBX));
+ /* or dreg_lo,ebx */
+ EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX));
+ } else if (val >= 32 && val < 64) {
+ u32 value = val - 32;
+
+ /* shr dreg_hi,imm8 */
+ EMIT3(0xC1, add_1reg(0xE8, dreg_hi), value);
+ /* mov dreg_lo,dreg_hi */
+ EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
+ /* xor dreg_hi,dreg_hi */
+ EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
+ } else {
+ /* xor dreg_lo,dreg_lo */
+ EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
+ /* xor dreg_hi,dreg_hi */
+ EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
+ }
+
+ if (dstk) {
+ /* mov dword ptr [ebp+off],dreg_lo */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
+ STACK_VAR(dst_lo));
+ /* mov dword ptr [ebp+off],dreg_hi */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
+ STACK_VAR(dst_hi));
+ }
+ *pprog = prog;
+}
+
+/* dst = dst >> val (signed) */
+static inline void emit_ia32_arsh_i64(const u8 dst[], const u32 val,
+ bool dstk, u8 **pprog)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+ u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
+ u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+
+ if (dstk) {
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst_lo));
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
+ STACK_VAR(dst_hi));
+ }
+ /* Do RSH operation */
+ if (val < 32) {
+ /* shr dreg_lo,imm8 */
+ EMIT3(0xC1, add_1reg(0xE8, dreg_lo), val);
+ /* mov ebx,dreg_hi */
+ EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX));
+ /* ashr dreg_hi,imm8 */
+ EMIT3(0xC1, add_1reg(0xF8, dreg_hi), val);
+
+ /* IA32_ECX = 32 - val */
+ /* mov ecx,val */
+ EMIT2(0xB1, val);
+ /* movzx ecx,ecx */
+ EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX));
+ /* neg ecx */
+ EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
+ /* add ecx,32 */
+ EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
+
+ /* shl ebx,cl */
+ EMIT2(0xD3, add_1reg(0xE0, IA32_EBX));
+ /* or dreg_lo,ebx */
+ EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX));
+ } else if (val >= 32 && val < 64) {
+ u32 value = val - 32;
+
+ /* ashr dreg_hi,imm8 */
+ EMIT3(0xC1, add_1reg(0xF8, dreg_hi), value);
+ /* mov dreg_lo,dreg_hi */
+ EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
+
+ /* ashr dreg_hi,imm8 */
+ EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
+ } else {
+ /* ashr dreg_hi,imm8 */
+ EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
+ /* mov dreg_lo,dreg_hi */
+ EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
+ }
+
+ if (dstk) {
+ /* mov dword ptr [ebp+off],dreg_lo */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
+ STACK_VAR(dst_lo));
+ /* mov dword ptr [ebp+off],dreg_hi */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
+ STACK_VAR(dst_hi));
+ }
+ *pprog = prog;
+}
+
+static inline void emit_ia32_mul_r64(const u8 dst[], const u8 src[], bool dstk,
+ bool sstk, u8 **pprog)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+
+ if (dstk)
+ /* mov eax,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst_hi));
+ else
+ /* mov eax,dst_hi */
+ EMIT2(0x8B, add_2reg(0xC0, dst_hi, IA32_EAX));
+
+ if (sstk)
+ /* mul dword ptr [ebp+off] */
+ EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_lo));
+ else
+ /* mul src_lo */
+ EMIT2(0xF7, add_1reg(0xE0, src_lo));
+
+ /* mov ecx,eax */
+ EMIT2(0x89, add_2reg(0xC0, IA32_ECX, IA32_EAX));
+
+ if (dstk)
+ /* mov eax,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst_lo));
+ else
+ /* mov eax,dst_lo */
+ EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
+
+ if (sstk)
+ /* mul dword ptr [ebp+off] */
+ EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_hi));
+ else
+ /* mul src_hi */
+ EMIT2(0xF7, add_1reg(0xE0, src_hi));
+
+ /* add eax,eax */
+ EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EAX));
+
+ if (dstk)
+ /* mov eax,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst_lo));
+ else
+ /* mov eax,dst_lo */
+ EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
+
+ if (sstk)
+ /* mul dword ptr [ebp+off] */
+ EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_lo));
+ else
+ /* mul src_lo */
+ EMIT2(0xF7, add_1reg(0xE0, src_lo));
+
+ /* add ecx,edx */
+ EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EDX));
+
+ if (dstk) {
+ /* mov dword ptr [ebp+off],eax */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst_lo));
+ /* mov dword ptr [ebp+off],ecx */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX),
+ STACK_VAR(dst_hi));
+ } else {
+ /* mov dst_lo,eax */
+ EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EAX));
+ /* mov dst_hi,ecx */
+ EMIT2(0x89, add_2reg(0xC0, dst_hi, IA32_ECX));
+ }
+
+ *pprog = prog;
+}
+
+static inline void emit_ia32_mul_i64(const u8 dst[], const u32 val,
+ bool dstk, u8 **pprog)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+ u32 hi;
+
+ hi = val & (1<<31) ? (u32)~0 : 0;
+ /* movl eax,imm32 */
+ EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), val);
+ if (dstk)
+ /* mul dword ptr [ebp+off] */
+ EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_hi));
+ else
+ /* mul dst_hi */
+ EMIT2(0xF7, add_1reg(0xE0, dst_hi));
+
+ /* mov ecx,eax */
+ EMIT2(0x89, add_2reg(0xC0, IA32_ECX, IA32_EAX));
+
+ /* movl eax,imm32 */
+ EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), hi);
+ if (dstk)
+ /* mul dword ptr [ebp+off] */
+ EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_lo));
+ else
+ /* mul dst_lo */
+ EMIT2(0xF7, add_1reg(0xE0, dst_lo));
+ /* add ecx,eax */
+ EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EAX));
+
+ /* movl eax,imm32 */
+ EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), val);
+ if (dstk)
+ /* mul dword ptr [ebp+off] */
+ EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_lo));
+ else
+ /* mul dst_lo */
+ EMIT2(0xF7, add_1reg(0xE0, dst_lo));
+
+ /* add ecx,edx */
+ EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EDX));
+
+ if (dstk) {
+ /* mov dword ptr [ebp+off],eax */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst_lo));
+ /* mov dword ptr [ebp+off],ecx */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX),
+ STACK_VAR(dst_hi));
+ } else {
+ /* mov dword ptr [ebp+off],eax */
+ EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EAX));
+ /* mov dword ptr [ebp+off],ecx */
+ EMIT2(0x89, add_2reg(0xC0, dst_hi, IA32_ECX));
+ }
+
+ *pprog = prog;
+}
+
+static int bpf_size_to_x86_bytes(int bpf_size)
+{
+ if (bpf_size == BPF_W)
+ return 4;
+ else if (bpf_size == BPF_H)
+ return 2;
+ else if (bpf_size == BPF_B)
+ return 1;
+ else if (bpf_size == BPF_DW)
+ return 4; /* imm32 */
+ else
+ return 0;
+}
+
+struct jit_context {
+ int cleanup_addr; /* Epilogue code offset */
+};
+
+/* Maximum number of bytes emitted while JITing one eBPF insn */
+#define BPF_MAX_INSN_SIZE 128
+#define BPF_INSN_SAFETY 64
+
+#define PROLOGUE_SIZE 35
+
+/*
+ * Emit prologue code for BPF program and check it's size.
+ * bpf_tail_call helper will skip it while jumping into another program.
+ */
+static void emit_prologue(u8 **pprog, u32 stack_depth)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+ const u8 *r1 = bpf2ia32[BPF_REG_1];
+ const u8 fplo = bpf2ia32[BPF_REG_FP][0];
+ const u8 fphi = bpf2ia32[BPF_REG_FP][1];
+ const u8 *tcc = bpf2ia32[TCALL_CNT];
+
+ /* push ebp */
+ EMIT1(0x55);
+ /* mov ebp,esp */
+ EMIT2(0x89, 0xE5);
+ /* push edi */
+ EMIT1(0x57);
+ /* push esi */
+ EMIT1(0x56);
+ /* push ebx */
+ EMIT1(0x53);
+
+ /* sub esp,STACK_SIZE */
+ EMIT2_off32(0x81, 0xEC, STACK_SIZE);
+ /* sub ebp,SCRATCH_SIZE+4+12*/
+ EMIT3(0x83, add_1reg(0xE8, IA32_EBP), SCRATCH_SIZE + 16);
+ /* xor ebx,ebx */
+ EMIT2(0x31, add_2reg(0xC0, IA32_EBX, IA32_EBX));
+
+ /* Set up BPF prog stack base register */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBP), STACK_VAR(fplo));
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(fphi));
+
+ /* Move BPF_CTX (EAX) to BPF_REG_R1 */
+ /* mov dword ptr [ebp+off],eax */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r1[0]));
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(r1[1]));
+
+ /* Initialize Tail Count */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[0]));
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1]));
+
+ BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
+ *pprog = prog;
+}
+
+/* Emit epilogue code for BPF program */
+static void emit_epilogue(u8 **pprog, u32 stack_depth)
+{
+ u8 *prog = *pprog;
+ const u8 *r0 = bpf2ia32[BPF_REG_0];
+ int cnt = 0;
+
+ /* mov eax,dword ptr [ebp+off]*/
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r0[0]));
+ /* mov edx,dword ptr [ebp+off]*/
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(r0[1]));
+
+ /* add ebp,SCRATCH_SIZE+4+12*/
+ EMIT3(0x83, add_1reg(0xC0, IA32_EBP), SCRATCH_SIZE + 16);
+
+ /* mov ebx,dword ptr [ebp-12]*/
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), -12);
+ /* mov esi,dword ptr [ebp-8]*/
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ESI), -8);
+ /* mov edi,dword ptr [ebp-4]*/
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDI), -4);
+
+ EMIT1(0xC9); /* leave */
+ EMIT1(0xC3); /* ret */
+ *pprog = prog;
+}
+
+/*
+ * Generate the following code:
+ * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ...
+ * if (index >= array->map.max_entries)
+ * goto out;
+ * if (++tail_call_cnt > MAX_TAIL_CALL_CNT)
+ * goto out;
+ * prog = array->ptrs[index];
+ * if (prog == NULL)
+ * goto out;
+ * goto *(prog->bpf_func + prologue_size);
+ * out:
+ */
+static void emit_bpf_tail_call(u8 **pprog)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+ const u8 *r1 = bpf2ia32[BPF_REG_1];
+ const u8 *r2 = bpf2ia32[BPF_REG_2];
+ const u8 *r3 = bpf2ia32[BPF_REG_3];
+ const u8 *tcc = bpf2ia32[TCALL_CNT];
+ u32 lo, hi;
+ static int jmp_label1 = -1;
+
+ /*
+ * if (index >= array->map.max_entries)
+ * goto out;
+ */
+ /* mov eax,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r2[0]));
+ /* mov edx,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(r3[0]));
+
+ /* cmp dword ptr [eax+off],edx */
+ EMIT3(0x39, add_2reg(0x40, IA32_EAX, IA32_EDX),
+ offsetof(struct bpf_array, map.max_entries));
+ /* jbe out */
+ EMIT2(IA32_JBE, jmp_label(jmp_label1, 2));
+
+ /*
+ * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
+ * goto out;
+ */
+ lo = (u32)MAX_TAIL_CALL_CNT;
+ hi = (u32)((u64)MAX_TAIL_CALL_CNT >> 32);
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(tcc[0]));
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1]));
+
+ /* cmp edx,hi */
+ EMIT3(0x83, add_1reg(0xF8, IA32_EBX), hi);
+ EMIT2(IA32_JNE, 3);
+ /* cmp ecx,lo */
+ EMIT3(0x83, add_1reg(0xF8, IA32_ECX), lo);
+
+ /* ja out */
+ EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
+
+ /* add eax,0x1 */
+ EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 0x01);
+ /* adc ebx,0x0 */
+ EMIT3(0x83, add_1reg(0xD0, IA32_EBX), 0x00);
+
+ /* mov dword ptr [ebp+off],eax */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(tcc[0]));
+ /* mov dword ptr [ebp+off],edx */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1]));
+
+ /* prog = array->ptrs[index]; */
+ /* mov edx, [eax + edx * 4 + offsetof(...)] */
+ EMIT3_off32(0x8B, 0x94, 0x90, offsetof(struct bpf_array, ptrs));
+
+ /*
+ * if (prog == NULL)
+ * goto out;
+ */
+ /* test edx,edx */
+ EMIT2(0x85, add_2reg(0xC0, IA32_EDX, IA32_EDX));
+ /* je out */
+ EMIT2(IA32_JE, jmp_label(jmp_label1, 2));
+
+ /* goto *(prog->bpf_func + prologue_size); */
+ /* mov edx, dword ptr [edx + 32] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EDX, IA32_EDX),
+ offsetof(struct bpf_prog, bpf_func));
+ /* add edx,prologue_size */
+ EMIT3(0x83, add_1reg(0xC0, IA32_EDX), PROLOGUE_SIZE);
+
+ /* mov eax,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r1[0]));
+
+ /*
+ * Now we're ready to jump into next BPF program:
+ * eax == ctx (1st arg)
+ * edx == prog->bpf_func + prologue_size
+ */
+ RETPOLINE_EDX_BPF_JIT();
+
+ if (jmp_label1 == -1)
+ jmp_label1 = cnt;
+
+ /* out: */
+ *pprog = prog;
+}
+
+/* Push the scratch stack register on top of the stack. */
+static inline void emit_push_r64(const u8 src[], u8 **pprog)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+
+ /* mov ecx,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_hi));
+ /* push ecx */
+ EMIT1(0x51);
+
+ /* mov ecx,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_lo));
+ /* push ecx */
+ EMIT1(0x51);
+
+ *pprog = prog;
+}
+
+static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
+ int oldproglen, struct jit_context *ctx)
+{
+ struct bpf_insn *insn = bpf_prog->insnsi;
+ int insn_cnt = bpf_prog->len;
+ bool seen_exit = false;
+ u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
+ int i, cnt = 0;
+ int proglen = 0;
+ u8 *prog = temp;
+
+ emit_prologue(&prog, bpf_prog->aux->stack_depth);
+
+ for (i = 0; i < insn_cnt; i++, insn++) {
+ const s32 imm32 = insn->imm;
+ const bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
+ const bool dstk = insn->dst_reg == BPF_REG_AX ? false : true;
+ const bool sstk = insn->src_reg == BPF_REG_AX ? false : true;
+ const u8 code = insn->code;
+ const u8 *dst = bpf2ia32[insn->dst_reg];
+ const u8 *src = bpf2ia32[insn->src_reg];
+ const u8 *r0 = bpf2ia32[BPF_REG_0];
+ s64 jmp_offset;
+ u8 jmp_cond;
+ int ilen;
+ u8 *func;
+
+ switch (code) {
+ /* ALU operations */
+ /* dst = src */
+ case BPF_ALU | BPF_MOV | BPF_K:
+ case BPF_ALU | BPF_MOV | BPF_X:
+ case BPF_ALU64 | BPF_MOV | BPF_K:
+ case BPF_ALU64 | BPF_MOV | BPF_X:
+ switch (BPF_SRC(code)) {
+ case BPF_X:
+ emit_ia32_mov_r64(is64, dst, src, dstk,
+ sstk, &prog);
+ break;
+ case BPF_K:
+ /* Sign-extend immediate value to dst reg */
+ emit_ia32_mov_i64(is64, dst, imm32,
+ dstk, &prog);
+ break;
+ }
+ break;
+ /* dst = dst + src/imm */
+ /* dst = dst - src/imm */
+ /* dst = dst | src/imm */
+ /* dst = dst & src/imm */
+ /* dst = dst ^ src/imm */
+ /* dst = dst * src/imm */
+ /* dst = dst << src */
+ /* dst = dst >> src */
+ case BPF_ALU | BPF_ADD | BPF_K:
+ case BPF_ALU | BPF_ADD | BPF_X:
+ case BPF_ALU | BPF_SUB | BPF_K:
+ case BPF_ALU | BPF_SUB | BPF_X:
+ case BPF_ALU | BPF_OR | BPF_K:
+ case BPF_ALU | BPF_OR | BPF_X:
+ case BPF_ALU | BPF_AND | BPF_K:
+ case BPF_ALU | BPF_AND | BPF_X:
+ case BPF_ALU | BPF_XOR | BPF_K:
+ case BPF_ALU | BPF_XOR | BPF_X:
+ case BPF_ALU64 | BPF_ADD | BPF_K:
+ case BPF_ALU64 | BPF_ADD | BPF_X:
+ case BPF_ALU64 | BPF_SUB | BPF_K:
+ case BPF_ALU64 | BPF_SUB | BPF_X:
+ case BPF_ALU64 | BPF_OR | BPF_K:
+ case BPF_ALU64 | BPF_OR | BPF_X:
+ case BPF_ALU64 | BPF_AND | BPF_K:
+ case BPF_ALU64 | BPF_AND | BPF_X:
+ case BPF_ALU64 | BPF_XOR | BPF_K:
+ case BPF_ALU64 | BPF_XOR | BPF_X:
+ switch (BPF_SRC(code)) {
+ case BPF_X:
+ emit_ia32_alu_r64(is64, BPF_OP(code), dst,
+ src, dstk, sstk, &prog);
+ break;
+ case BPF_K:
+ emit_ia32_alu_i64(is64, BPF_OP(code), dst,
+ imm32, dstk, &prog);
+ break;
+ }
+ break;
+ case BPF_ALU | BPF_MUL | BPF_K:
+ case BPF_ALU | BPF_MUL | BPF_X:
+ switch (BPF_SRC(code)) {
+ case BPF_X:
+ emit_ia32_mul_r(dst_lo, src_lo, dstk,
+ sstk, &prog);
+ break;
+ case BPF_K:
+ /* mov ecx,imm32*/
+ EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX),
+ imm32);
+ emit_ia32_mul_r(dst_lo, IA32_ECX, dstk,
+ false, &prog);
+ break;
+ }
+ emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
+ break;
+ case BPF_ALU | BPF_LSH | BPF_X:
+ case BPF_ALU | BPF_RSH | BPF_X:
+ case BPF_ALU | BPF_ARSH | BPF_K:
+ case BPF_ALU | BPF_ARSH | BPF_X:
+ switch (BPF_SRC(code)) {
+ case BPF_X:
+ emit_ia32_shift_r(BPF_OP(code), dst_lo, src_lo,
+ dstk, sstk, &prog);
+ break;
+ case BPF_K:
+ /* mov ecx,imm32*/
+ EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX),
+ imm32);
+ emit_ia32_shift_r(BPF_OP(code), dst_lo,
+ IA32_ECX, dstk, false,
+ &prog);
+ break;
+ }
+ emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
+ break;
+ /* dst = dst / src(imm) */
+ /* dst = dst % src(imm) */
+ case BPF_ALU | BPF_DIV | BPF_K:
+ case BPF_ALU | BPF_DIV | BPF_X:
+ case BPF_ALU | BPF_MOD | BPF_K:
+ case BPF_ALU | BPF_MOD | BPF_X:
+ switch (BPF_SRC(code)) {
+ case BPF_X:
+ emit_ia32_div_mod_r(BPF_OP(code), dst_lo,
+ src_lo, dstk, sstk, &prog);
+ break;
+ case BPF_K:
+ /* mov ecx,imm32*/
+ EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX),
+ imm32);
+ emit_ia32_div_mod_r(BPF_OP(code), dst_lo,
+ IA32_ECX, dstk, false,
+ &prog);
+ break;
+ }
+ emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
+ break;
+ case BPF_ALU64 | BPF_DIV | BPF_K:
+ case BPF_ALU64 | BPF_DIV | BPF_X:
+ case BPF_ALU64 | BPF_MOD | BPF_K:
+ case BPF_ALU64 | BPF_MOD | BPF_X:
+ goto notyet;
+ /* dst = dst >> imm */
+ /* dst = dst << imm */
+ case BPF_ALU | BPF_RSH | BPF_K:
+ case BPF_ALU | BPF_LSH | BPF_K:
+ if (unlikely(imm32 > 31))
+ return -EINVAL;
+ /* mov ecx,imm32*/
+ EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
+ emit_ia32_shift_r(BPF_OP(code), dst_lo, IA32_ECX, dstk,
+ false, &prog);
+ emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
+ break;
+ /* dst = dst << imm */
+ case BPF_ALU64 | BPF_LSH | BPF_K:
+ if (unlikely(imm32 > 63))
+ return -EINVAL;
+ emit_ia32_lsh_i64(dst, imm32, dstk, &prog);
+ break;
+ /* dst = dst >> imm */
+ case BPF_ALU64 | BPF_RSH | BPF_K:
+ if (unlikely(imm32 > 63))
+ return -EINVAL;
+ emit_ia32_rsh_i64(dst, imm32, dstk, &prog);
+ break;
+ /* dst = dst << src */
+ case BPF_ALU64 | BPF_LSH | BPF_X:
+ emit_ia32_lsh_r64(dst, src, dstk, sstk, &prog);
+ break;
+ /* dst = dst >> src */
+ case BPF_ALU64 | BPF_RSH | BPF_X:
+ emit_ia32_rsh_r64(dst, src, dstk, sstk, &prog);
+ break;
+ /* dst = dst >> src (signed) */
+ case BPF_ALU64 | BPF_ARSH | BPF_X:
+ emit_ia32_arsh_r64(dst, src, dstk, sstk, &prog);
+ break;
+ /* dst = dst >> imm (signed) */
+ case BPF_ALU64 | BPF_ARSH | BPF_K:
+ if (unlikely(imm32 > 63))
+ return -EINVAL;
+ emit_ia32_arsh_i64(dst, imm32, dstk, &prog);
+ break;
+ /* dst = ~dst */
+ case BPF_ALU | BPF_NEG:
+ emit_ia32_alu_i(is64, false, BPF_OP(code),
+ dst_lo, 0, dstk, &prog);
+ emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
+ break;
+ /* dst = ~dst (64 bit) */
+ case BPF_ALU64 | BPF_NEG:
+ emit_ia32_neg64(dst, dstk, &prog);
+ break;
+ /* dst = dst * src/imm */
+ case BPF_ALU64 | BPF_MUL | BPF_X:
+ case BPF_ALU64 | BPF_MUL | BPF_K:
+ switch (BPF_SRC(code)) {
+ case BPF_X:
+ emit_ia32_mul_r64(dst, src, dstk, sstk, &prog);
+ break;
+ case BPF_K:
+ emit_ia32_mul_i64(dst, imm32, dstk, &prog);
+ break;
+ }
+ break;
+ /* dst = htole(dst) */
+ case BPF_ALU | BPF_END | BPF_FROM_LE:
+ emit_ia32_to_le_r64(dst, imm32, dstk, &prog);
+ break;
+ /* dst = htobe(dst) */
+ case BPF_ALU | BPF_END | BPF_FROM_BE:
+ emit_ia32_to_be_r64(dst, imm32, dstk, &prog);
+ break;
+ /* dst = imm64 */
+ case BPF_LD | BPF_IMM | BPF_DW: {
+ s32 hi, lo = imm32;
+
+ hi = insn[1].imm;
+ emit_ia32_mov_i(dst_lo, lo, dstk, &prog);
+ emit_ia32_mov_i(dst_hi, hi, dstk, &prog);
+ insn++;
+ i++;
+ break;
+ }
+ /* ST: *(u8*)(dst_reg + off) = imm */
+ case BPF_ST | BPF_MEM | BPF_H:
+ case BPF_ST | BPF_MEM | BPF_B:
+ case BPF_ST | BPF_MEM | BPF_W:
+ case BPF_ST | BPF_MEM | BPF_DW:
+ if (dstk)
+ /* mov eax,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst_lo));
+ else
+ /* mov eax,dst_lo */
+ EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
+
+ switch (BPF_SIZE(code)) {
+ case BPF_B:
+ EMIT(0xC6, 1); break;
+ case BPF_H:
+ EMIT2(0x66, 0xC7); break;
+ case BPF_W:
+ case BPF_DW:
+ EMIT(0xC7, 1); break;
+ }
+
+ if (is_imm8(insn->off))
+ EMIT2(add_1reg(0x40, IA32_EAX), insn->off);
+ else
+ EMIT1_off32(add_1reg(0x80, IA32_EAX),
+ insn->off);
+ EMIT(imm32, bpf_size_to_x86_bytes(BPF_SIZE(code)));
+
+ if (BPF_SIZE(code) == BPF_DW) {
+ u32 hi;
+
+ hi = imm32 & (1<<31) ? (u32)~0 : 0;
+ EMIT2_off32(0xC7, add_1reg(0x80, IA32_EAX),
+ insn->off + 4);
+ EMIT(hi, 4);
+ }
+ break;
+
+ /* STX: *(u8*)(dst_reg + off) = src_reg */
+ case BPF_STX | BPF_MEM | BPF_B:
+ case BPF_STX | BPF_MEM | BPF_H:
+ case BPF_STX | BPF_MEM | BPF_W:
+ case BPF_STX | BPF_MEM | BPF_DW:
+ if (dstk)
+ /* mov eax,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst_lo));
+ else
+ /* mov eax,dst_lo */
+ EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
+
+ if (sstk)
+ /* mov edx,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
+ STACK_VAR(src_lo));
+ else
+ /* mov edx,src_lo */
+ EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_EDX));
+
+ switch (BPF_SIZE(code)) {
+ case BPF_B:
+ EMIT(0x88, 1); break;
+ case BPF_H:
+ EMIT2(0x66, 0x89); break;
+ case BPF_W:
+ case BPF_DW:
+ EMIT(0x89, 1); break;
+ }
+
+ if (is_imm8(insn->off))
+ EMIT2(add_2reg(0x40, IA32_EAX, IA32_EDX),
+ insn->off);
+ else
+ EMIT1_off32(add_2reg(0x80, IA32_EAX, IA32_EDX),
+ insn->off);
+
+ if (BPF_SIZE(code) == BPF_DW) {
+ if (sstk)
+ /* mov edi,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP,
+ IA32_EDX),
+ STACK_VAR(src_hi));
+ else
+ /* mov edi,src_hi */
+ EMIT2(0x8B, add_2reg(0xC0, src_hi,
+ IA32_EDX));
+ EMIT1(0x89);
+ if (is_imm8(insn->off + 4)) {
+ EMIT2(add_2reg(0x40, IA32_EAX,
+ IA32_EDX),
+ insn->off + 4);
+ } else {
+ EMIT1(add_2reg(0x80, IA32_EAX,
+ IA32_EDX));
+ EMIT(insn->off + 4, 4);
+ }
+ }
+ break;
+
+ /* LDX: dst_reg = *(u8*)(src_reg + off) */
+ case BPF_LDX | BPF_MEM | BPF_B:
+ case BPF_LDX | BPF_MEM | BPF_H:
+ case BPF_LDX | BPF_MEM | BPF_W:
+ case BPF_LDX | BPF_MEM | BPF_DW:
+ if (sstk)
+ /* mov eax,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(src_lo));
+ else
+ /* mov eax,dword ptr [ebp+off] */
+ EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_EAX));
+
+ switch (BPF_SIZE(code)) {
+ case BPF_B:
+ EMIT2(0x0F, 0xB6); break;
+ case BPF_H:
+ EMIT2(0x0F, 0xB7); break;
+ case BPF_W:
+ case BPF_DW:
+ EMIT(0x8B, 1); break;
+ }
+
+ if (is_imm8(insn->off))
+ EMIT2(add_2reg(0x40, IA32_EAX, IA32_EDX),
+ insn->off);
+ else
+ EMIT1_off32(add_2reg(0x80, IA32_EAX, IA32_EDX),
+ insn->off);
+
+ if (dstk)
+ /* mov dword ptr [ebp+off],edx */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
+ STACK_VAR(dst_lo));
+ else
+ /* mov dst_lo,edx */
+ EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EDX));
+ switch (BPF_SIZE(code)) {
+ case BPF_B:
+ case BPF_H:
+ case BPF_W:
+ if (dstk) {
+ EMIT3(0xC7, add_1reg(0x40, IA32_EBP),
+ STACK_VAR(dst_hi));
+ EMIT(0x0, 4);
+ } else {
+ EMIT3(0xC7, add_1reg(0xC0, dst_hi), 0);
+ }
+ break;
+ case BPF_DW:
+ EMIT2_off32(0x8B,
+ add_2reg(0x80, IA32_EAX, IA32_EDX),
+ insn->off + 4);
+ if (dstk)
+ EMIT3(0x89,
+ add_2reg(0x40, IA32_EBP,
+ IA32_EDX),
+ STACK_VAR(dst_hi));
+ else
+ EMIT2(0x89,
+ add_2reg(0xC0, dst_hi, IA32_EDX));
+ break;
+ default:
+ break;
+ }
+ break;
+ /* call */
+ case BPF_JMP | BPF_CALL:
+ {
+ const u8 *r1 = bpf2ia32[BPF_REG_1];
+ const u8 *r2 = bpf2ia32[BPF_REG_2];
+ const u8 *r3 = bpf2ia32[BPF_REG_3];
+ const u8 *r4 = bpf2ia32[BPF_REG_4];
+ const u8 *r5 = bpf2ia32[BPF_REG_5];
+
+ if (insn->src_reg == BPF_PSEUDO_CALL)
+ goto notyet;
+
+ func = (u8 *) __bpf_call_base + imm32;
+ jmp_offset = func - (image + addrs[i]);
+
+ if (!imm32 || !is_simm32(jmp_offset)) {
+ pr_err("unsupported BPF func %d addr %p image %p\n",
+ imm32, func, image);
+ return -EINVAL;
+ }
+
+ /* mov eax,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(r1[0]));
+ /* mov edx,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
+ STACK_VAR(r1[1]));
+
+ emit_push_r64(r5, &prog);
+ emit_push_r64(r4, &prog);
+ emit_push_r64(r3, &prog);
+ emit_push_r64(r2, &prog);
+
+ EMIT1_off32(0xE8, jmp_offset + 9);
+
+ /* mov dword ptr [ebp+off],eax */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(r0[0]));
+ /* mov dword ptr [ebp+off],edx */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
+ STACK_VAR(r0[1]));
+
+ /* add esp,32 */
+ EMIT3(0x83, add_1reg(0xC0, IA32_ESP), 32);
+ break;
+ }
+ case BPF_JMP | BPF_TAIL_CALL:
+ emit_bpf_tail_call(&prog);
+ break;
+
+ /* cond jump */
+ case BPF_JMP | BPF_JEQ | BPF_X:
+ case BPF_JMP | BPF_JNE | BPF_X:
+ case BPF_JMP | BPF_JGT | BPF_X:
+ case BPF_JMP | BPF_JLT | BPF_X:
+ case BPF_JMP | BPF_JGE | BPF_X:
+ case BPF_JMP | BPF_JLE | BPF_X:
+ case BPF_JMP | BPF_JSGT | BPF_X:
+ case BPF_JMP | BPF_JSLE | BPF_X:
+ case BPF_JMP | BPF_JSLT | BPF_X:
+ case BPF_JMP | BPF_JSGE | BPF_X: {
+ u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
+ u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+ u8 sreg_lo = sstk ? IA32_ECX : src_lo;
+ u8 sreg_hi = sstk ? IA32_EBX : src_hi;
+
+ if (dstk) {
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst_lo));
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
+ STACK_VAR(dst_hi));
+ }
+
+ if (sstk) {
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
+ STACK_VAR(src_lo));
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX),
+ STACK_VAR(src_hi));
+ }
+
+ /* cmp dreg_hi,sreg_hi */
+ EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
+ EMIT2(IA32_JNE, 2);
+ /* cmp dreg_lo,sreg_lo */
+ EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
+ goto emit_cond_jmp;
+ }
+ case BPF_JMP | BPF_JSET | BPF_X: {
+ u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
+ u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+ u8 sreg_lo = sstk ? IA32_ECX : src_lo;
+ u8 sreg_hi = sstk ? IA32_EBX : src_hi;
+
+ if (dstk) {
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst_lo));
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
+ STACK_VAR(dst_hi));
+ }
+
+ if (sstk) {
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
+ STACK_VAR(src_lo));
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX),
+ STACK_VAR(src_hi));
+ }
+ /* and dreg_lo,sreg_lo */
+ EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo));
+ /* and dreg_hi,sreg_hi */
+ EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi));
+ /* or dreg_lo,dreg_hi */
+ EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi));
+ goto emit_cond_jmp;
+ }
+ case BPF_JMP | BPF_JSET | BPF_K: {
+ u32 hi;
+ u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
+ u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+ u8 sreg_lo = IA32_ECX;
+ u8 sreg_hi = IA32_EBX;
+
+ if (dstk) {
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst_lo));
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
+ STACK_VAR(dst_hi));
+ }
+ hi = imm32 & (1<<31) ? (u32)~0 : 0;
+
+ /* mov ecx,imm32 */
+ EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
+ /* mov ebx,imm32 */
+ EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi);
+
+ /* and dreg_lo,sreg_lo */
+ EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo));
+ /* and dreg_hi,sreg_hi */
+ EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi));
+ /* or dreg_lo,dreg_hi */
+ EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi));
+ goto emit_cond_jmp;
+ }
+ case BPF_JMP | BPF_JEQ | BPF_K:
+ case BPF_JMP | BPF_JNE | BPF_K:
+ case BPF_JMP | BPF_JGT | BPF_K:
+ case BPF_JMP | BPF_JLT | BPF_K:
+ case BPF_JMP | BPF_JGE | BPF_K:
+ case BPF_JMP | BPF_JLE | BPF_K:
+ case BPF_JMP | BPF_JSGT | BPF_K:
+ case BPF_JMP | BPF_JSLE | BPF_K:
+ case BPF_JMP | BPF_JSLT | BPF_K:
+ case BPF_JMP | BPF_JSGE | BPF_K: {
+ u32 hi;
+ u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
+ u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+ u8 sreg_lo = IA32_ECX;
+ u8 sreg_hi = IA32_EBX;
+
+ if (dstk) {
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst_lo));
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
+ STACK_VAR(dst_hi));
+ }
+
+ hi = imm32 & (1<<31) ? (u32)~0 : 0;
+ /* mov ecx,imm32 */
+ EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
+ /* mov ebx,imm32 */
+ EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi);
+
+ /* cmp dreg_hi,sreg_hi */
+ EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
+ EMIT2(IA32_JNE, 2);
+ /* cmp dreg_lo,sreg_lo */
+ EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
+
+emit_cond_jmp: /* Convert BPF opcode to x86 */
+ switch (BPF_OP(code)) {
+ case BPF_JEQ:
+ jmp_cond = IA32_JE;
+ break;
+ case BPF_JSET:
+ case BPF_JNE:
+ jmp_cond = IA32_JNE;
+ break;
+ case BPF_JGT:
+ /* GT is unsigned '>', JA in x86 */
+ jmp_cond = IA32_JA;
+ break;
+ case BPF_JLT:
+ /* LT is unsigned '<', JB in x86 */
+ jmp_cond = IA32_JB;
+ break;
+ case BPF_JGE:
+ /* GE is unsigned '>=', JAE in x86 */
+ jmp_cond = IA32_JAE;
+ break;
+ case BPF_JLE:
+ /* LE is unsigned '<=', JBE in x86 */
+ jmp_cond = IA32_JBE;
+ break;
+ case BPF_JSGT:
+ /* Signed '>', GT in x86 */
+ jmp_cond = IA32_JG;
+ break;
+ case BPF_JSLT:
+ /* Signed '<', LT in x86 */
+ jmp_cond = IA32_JL;
+ break;
+ case BPF_JSGE:
+ /* Signed '>=', GE in x86 */
+ jmp_cond = IA32_JGE;
+ break;
+ case BPF_JSLE:
+ /* Signed '<=', LE in x86 */
+ jmp_cond = IA32_JLE;
+ break;
+ default: /* to silence GCC warning */
+ return -EFAULT;
+ }
+ jmp_offset = addrs[i + insn->off] - addrs[i];
+ if (is_imm8(jmp_offset)) {
+ EMIT2(jmp_cond, jmp_offset);
+ } else if (is_simm32(jmp_offset)) {
+ EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset);
+ } else {
+ pr_err("cond_jmp gen bug %llx\n", jmp_offset);
+ return -EFAULT;
+ }
+
+ break;
+ }
+ case BPF_JMP | BPF_JA:
+ if (insn->off == -1)
+ /* -1 jmp instructions will always jump
+ * backwards two bytes. Explicitly handling
+ * this case avoids wasting too many passes
+ * when there are long sequences of replaced
+ * dead code.
+ */
+ jmp_offset = -2;
+ else
+ jmp_offset = addrs[i + insn->off] - addrs[i];
+
+ if (!jmp_offset)
+ /* Optimize out nop jumps */
+ break;
+emit_jmp:
+ if (is_imm8(jmp_offset)) {
+ EMIT2(0xEB, jmp_offset);
+ } else if (is_simm32(jmp_offset)) {
+ EMIT1_off32(0xE9, jmp_offset);
+ } else {
+ pr_err("jmp gen bug %llx\n", jmp_offset);
+ return -EFAULT;
+ }
+ break;
+ /* STX XADD: lock *(u32 *)(dst + off) += src */
+ case BPF_STX | BPF_XADD | BPF_W:
+ /* STX XADD: lock *(u64 *)(dst + off) += src */
+ case BPF_STX | BPF_XADD | BPF_DW:
+ goto notyet;
+ case BPF_JMP | BPF_EXIT:
+ if (seen_exit) {
+ jmp_offset = ctx->cleanup_addr - addrs[i];
+ goto emit_jmp;
+ }
+ seen_exit = true;
+ /* Update cleanup_addr */
+ ctx->cleanup_addr = proglen;
+ emit_epilogue(&prog, bpf_prog->aux->stack_depth);
+ break;
+notyet:
+ pr_info_once("*** NOT YET: opcode %02x ***\n", code);
+ return -EFAULT;
+ default:
+ /*
+ * This error will be seen if new instruction was added
+ * to interpreter, but not to JIT or if there is junk in
+ * bpf_prog
+ */
+ pr_err("bpf_jit: unknown opcode %02x\n", code);
+ return -EINVAL;
+ }
+
+ ilen = prog - temp;
+ if (ilen > BPF_MAX_INSN_SIZE) {
+ pr_err("bpf_jit: fatal insn size error\n");
+ return -EFAULT;
+ }
+
+ if (image) {
+ if (unlikely(proglen + ilen > oldproglen)) {
+ pr_err("bpf_jit: fatal error\n");
+ return -EFAULT;
+ }
+ memcpy(image + proglen, temp, ilen);
+ }
+ proglen += ilen;
+ addrs[i] = proglen;
+ prog = temp;
+ }
+ return proglen;
+}
+
+struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
+{
+ struct bpf_binary_header *header = NULL;
+ struct bpf_prog *tmp, *orig_prog = prog;
+ int proglen, oldproglen = 0;
+ struct jit_context ctx = {};
+ bool tmp_blinded = false;
+ u8 *image = NULL;
+ int *addrs;
+ int pass;
+ int i;
+
+ if (!prog->jit_requested)
+ return orig_prog;
+
+ tmp = bpf_jit_blind_constants(prog);
+ /*
+ * If blinding was requested and we failed during blinding,
+ * we must fall back to the interpreter.
+ */
+ if (IS_ERR(tmp))
+ return orig_prog;
+ if (tmp != prog) {
+ tmp_blinded = true;
+ prog = tmp;
+ }
+
+ addrs = kmalloc(prog->len * sizeof(*addrs), GFP_KERNEL);
+ if (!addrs) {
+ prog = orig_prog;
+ goto out;
+ }
+
+ /*
+ * Before first pass, make a rough estimation of addrs[]
+ * each BPF instruction is translated to less than 64 bytes
+ */
+ for (proglen = 0, i = 0; i < prog->len; i++) {
+ proglen += 64;
+ addrs[i] = proglen;
+ }
+ ctx.cleanup_addr = proglen;
+
+ /*
+ * JITed image shrinks with every pass and the loop iterates
+ * until the image stops shrinking. Very large BPF programs
+ * may converge on the last pass. In such case do one more
+ * pass to emit the final image.
+ */
+ for (pass = 0; pass < 20 || image; pass++) {
+ proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
+ if (proglen <= 0) {
+out_image:
+ image = NULL;
+ if (header)
+ bpf_jit_binary_free(header);
+ prog = orig_prog;
+ goto out_addrs;
+ }
+ if (image) {
+ if (proglen != oldproglen) {
+ pr_err("bpf_jit: proglen=%d != oldproglen=%d\n",
+ proglen, oldproglen);
+ goto out_image;
+ }
+ break;
+ }
+ if (proglen == oldproglen) {
+ header = bpf_jit_binary_alloc(proglen, &image,
+ 1, jit_fill_hole);
+ if (!header) {
+ prog = orig_prog;
+ goto out_addrs;
+ }
+ }
+ oldproglen = proglen;
+ cond_resched();
+ }
+
+ if (bpf_jit_enable > 1)
+ bpf_jit_dump(prog->len, proglen, pass + 1, image);
+
+ if (image) {
+ bpf_jit_binary_lock_ro(header);
+ prog->bpf_func = (void *)image;
+ prog->jited = 1;
+ prog->jited_len = proglen;
+ } else {
+ prog = orig_prog;
+ }
+
+out_addrs:
+ kfree(addrs);
+out:
+ if (tmp_blinded)
+ bpf_jit_prog_release_other(prog, prog == orig_prog ?
+ tmp : orig_prog);
+ return prog;
+}
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index bed7e7f4e44c..e01f7ceb9e7a 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -225,7 +225,7 @@ int __init efi_alloc_page_tables(void)
pud = pud_alloc(&init_mm, p4d, EFI_VA_END);
if (!pud) {
- if (pgtable_l5_enabled)
+ if (pgtable_l5_enabled())
free_page((unsigned long) pgd_page_vaddr(*pgd));
free_pages((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER);
return -ENOMEM;
diff --git a/arch/x86/platform/intel-mid/intel_mid_vrtc.c b/arch/x86/platform/intel-mid/intel_mid_vrtc.c
index 58024862a7eb..a52914aa3b6c 100644
--- a/arch/x86/platform/intel-mid/intel_mid_vrtc.c
+++ b/arch/x86/platform/intel-mid/intel_mid_vrtc.c
@@ -57,7 +57,7 @@ void vrtc_cmos_write(unsigned char val, unsigned char reg)
}
EXPORT_SYMBOL_GPL(vrtc_cmos_write);
-void vrtc_get_time(struct timespec *now)
+void vrtc_get_time(struct timespec64 *now)
{
u8 sec, min, hour, mday, mon;
unsigned long flags;
@@ -83,18 +83,18 @@ void vrtc_get_time(struct timespec *now)
pr_info("vRTC: sec: %d min: %d hour: %d day: %d "
"mon: %d year: %d\n", sec, min, hour, mday, mon, year);
- now->tv_sec = mktime(year, mon, mday, hour, min, sec);
+ now->tv_sec = mktime64(year, mon, mday, hour, min, sec);
now->tv_nsec = 0;
}
-int vrtc_set_mmss(const struct timespec *now)
+int vrtc_set_mmss(const struct timespec64 *now)
{
unsigned long flags;
struct rtc_time tm;
int year;
int retval = 0;
- rtc_time_to_tm(now->tv_sec, &tm);
+ rtc_time64_to_tm(now->tv_sec, &tm);
if (!rtc_valid_tm(&tm) && tm.tm_year >= 72) {
/*
* tm.year is the number of years since 1900, and the
@@ -110,8 +110,8 @@ int vrtc_set_mmss(const struct timespec *now)
vrtc_cmos_write(tm.tm_sec, RTC_SECONDS);
spin_unlock_irqrestore(&rtc_lock, flags);
} else {
- pr_err("%s: Invalid vRTC value: write of %lx to vRTC failed\n",
- __func__, now->tv_sec);
+ pr_err("%s: Invalid vRTC value: write of %llx to vRTC failed\n",
+ __func__, (s64)now->tv_sec);
retval = -EINVAL;
}
return retval;
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index 48b14b534897..67ccf64c8bd8 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -72,7 +72,7 @@ static int set_up_temporary_text_mapping(pgd_t *pgd)
* tables used by the image kernel.
*/
- if (pgtable_l5_enabled) {
+ if (pgtable_l5_enabled()) {
p4d = (p4d_t *)get_safe_page(GFP_ATOMIC);
if (!p4d)
return -ENOMEM;
@@ -98,7 +98,7 @@ static int set_up_temporary_text_mapping(pgd_t *pgd)
set_pgd(pgd + pgd_index(restore_jump_address), new_pgd);
} else {
/* No p4d for 4-level paging: point the pgd to the pud page table */
- pgd_t new_pgd = __pgd(__pa(p4d) | pgprot_val(pgtable_prot));
+ pgd_t new_pgd = __pgd(__pa(pud) | pgprot_val(pgtable_prot));
set_pgd(pgd + pgd_index(restore_jump_address), new_pgd);
}
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index 13ed827c7c66..9d529f22fd9d 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig
@@ -1,5 +1,9 @@
# SPDX-License-Identifier: GPL-2.0
-mainmenu "User Mode Linux/$SUBARCH $KERNELVERSION Kernel Configuration"
+mainmenu "User Mode Linux/$(SUBARCH) $(KERNELVERSION) Kernel Configuration"
+
+comment "Compiler: $(CC_VERSION_TEXT)"
+
+source "scripts/Kconfig.include"
source "arch/um/Kconfig.common"
@@ -16,8 +20,8 @@ config UML_X86
select GENERIC_FIND_FIRST_BIT
config 64BIT
- bool "64-bit kernel" if SUBARCH = "x86"
- default SUBARCH != "i386"
+ bool "64-bit kernel" if "$(SUBARCH)" = "x86"
+ default "$(SUBARCH)" != "i386"
config X86_32
def_bool !64BIT
diff --git a/arch/x86/um/vdso/Makefile b/arch/x86/um/vdso/Makefile
index 10003359e633..b2d6967262b2 100644
--- a/arch/x86/um/vdso/Makefile
+++ b/arch/x86/um/vdso/Makefile
@@ -23,14 +23,14 @@ $(obj)/vdso.o: $(obj)/vdso.so
targets += vdso.so vdso.so.dbg vdso.lds $(vobjs-y)
-export CPPFLAGS_vdso.lds += -P -C
+CPPFLAGS_vdso.lds += -P -C
VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \
-Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
$(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so
-$(obj)/vdso.so.dbg: $(src)/vdso.lds $(vobjs) FORCE
+$(obj)/vdso.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE
$(call if_changed,vdso)
$(obj)/%.so: OBJCOPYFLAGS := -S
diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c
index a18703be9ead..1804b27f9632 100644
--- a/arch/x86/xen/efi.c
+++ b/arch/x86/xen/efi.c
@@ -115,6 +115,61 @@ static efi_system_table_t __init *xen_efi_probe(void)
return &efi_systab_xen;
}
+/*
+ * Determine whether we're in secure boot mode.
+ *
+ * Please keep the logic in sync with
+ * drivers/firmware/efi/libstub/secureboot.c:efi_get_secureboot().
+ */
+static enum efi_secureboot_mode xen_efi_get_secureboot(void)
+{
+ static efi_guid_t efi_variable_guid = EFI_GLOBAL_VARIABLE_GUID;
+ static efi_guid_t shim_guid = EFI_SHIM_LOCK_GUID;
+ efi_status_t status;
+ u8 moksbstate, secboot, setupmode;
+ unsigned long size;
+
+ size = sizeof(secboot);
+ status = efi.get_variable(L"SecureBoot", &efi_variable_guid,
+ NULL, &size, &secboot);
+
+ if (status == EFI_NOT_FOUND)
+ return efi_secureboot_mode_disabled;
+
+ if (status != EFI_SUCCESS)
+ goto out_efi_err;
+
+ size = sizeof(setupmode);
+ status = efi.get_variable(L"SetupMode", &efi_variable_guid,
+ NULL, &size, &setupmode);
+
+ if (status != EFI_SUCCESS)
+ goto out_efi_err;
+
+ if (secboot == 0 || setupmode == 1)
+ return efi_secureboot_mode_disabled;
+
+ /* See if a user has put the shim into insecure mode. */
+ size = sizeof(moksbstate);
+ status = efi.get_variable(L"MokSBStateRT", &shim_guid,
+ NULL, &size, &moksbstate);
+
+ /* If it fails, we don't care why. Default to secure. */
+ if (status != EFI_SUCCESS)
+ goto secure_boot_enabled;
+
+ if (moksbstate == 1)
+ return efi_secureboot_mode_disabled;
+
+ secure_boot_enabled:
+ pr_info("UEFI Secure Boot is enabled.\n");
+ return efi_secureboot_mode_enabled;
+
+ out_efi_err:
+ pr_err("Could not determine UEFI Secure Boot status.\n");
+ return efi_secureboot_mode_unknown;
+}
+
void __init xen_efi_init(void)
{
efi_system_table_t *efi_systab_xen;
@@ -129,6 +184,8 @@ void __init xen_efi_init(void)
boot_params.efi_info.efi_systab = (__u32)__pa(efi_systab_xen);
boot_params.efi_info.efi_systab_hi = (__u32)(__pa(efi_systab_xen) >> 32);
+ boot_params.secure_boot = xen_efi_get_secureboot();
+
set_bit(EFI_BOOT, &efi.flags);
set_bit(EFI_PARAVIRT, &efi.flags);
set_bit(EFI_64BIT, &efi.flags);
diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c
index 826898701045..19c1ff542387 100644
--- a/arch/x86/xen/enlighten_hvm.c
+++ b/arch/x86/xen/enlighten_hvm.c
@@ -65,6 +65,19 @@ static void __init xen_hvm_init_mem_mapping(void)
{
early_memunmap(HYPERVISOR_shared_info, PAGE_SIZE);
HYPERVISOR_shared_info = __va(PFN_PHYS(shared_info_pfn));
+
+ /*
+ * The virtual address of the shared_info page has changed, so
+ * the vcpu_info pointer for VCPU 0 is now stale.
+ *
+ * The prepare_boot_cpu callback will re-initialize it via
+ * xen_vcpu_setup, but we can't rely on that to be called for
+ * old Xen versions (xen_have_vector_callback == 0).
+ *
+ * It is, in any case, bad to have a stale vcpu_info pointer
+ * so reset it now.
+ */
+ xen_vcpu_info_reset(0);
}
static void __init init_hvm_pv_info(void)
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index c36d23aa6c35..357969a3697c 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -421,45 +421,33 @@ static void xen_load_gdt(const struct desc_ptr *dtr)
{
unsigned long va = dtr->address;
unsigned int size = dtr->size + 1;
- unsigned pages = DIV_ROUND_UP(size, PAGE_SIZE);
- unsigned long frames[pages];
- int f;
-
- /*
- * A GDT can be up to 64k in size, which corresponds to 8192
- * 8-byte entries, or 16 4k pages..
- */
+ unsigned long pfn, mfn;
+ int level;
+ pte_t *ptep;
+ void *virt;
- BUG_ON(size > 65536);
+ /* @size should be at most GDT_SIZE which is smaller than PAGE_SIZE. */
+ BUG_ON(size > PAGE_SIZE);
BUG_ON(va & ~PAGE_MASK);
- for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) {
- int level;
- pte_t *ptep;
- unsigned long pfn, mfn;
- void *virt;
-
- /*
- * The GDT is per-cpu and is in the percpu data area.
- * That can be virtually mapped, so we need to do a
- * page-walk to get the underlying MFN for the
- * hypercall. The page can also be in the kernel's
- * linear range, so we need to RO that mapping too.
- */
- ptep = lookup_address(va, &level);
- BUG_ON(ptep == NULL);
-
- pfn = pte_pfn(*ptep);
- mfn = pfn_to_mfn(pfn);
- virt = __va(PFN_PHYS(pfn));
+ /*
+ * The GDT is per-cpu and is in the percpu data area.
+ * That can be virtually mapped, so we need to do a
+ * page-walk to get the underlying MFN for the
+ * hypercall. The page can also be in the kernel's
+ * linear range, so we need to RO that mapping too.
+ */
+ ptep = lookup_address(va, &level);
+ BUG_ON(ptep == NULL);
- frames[f] = mfn;
+ pfn = pte_pfn(*ptep);
+ mfn = pfn_to_mfn(pfn);
+ virt = __va(PFN_PHYS(pfn));
- make_lowmem_page_readonly((void *)va);
- make_lowmem_page_readonly(virt);
- }
+ make_lowmem_page_readonly((void *)va);
+ make_lowmem_page_readonly(virt);
- if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct)))
+ if (HYPERVISOR_set_gdt(&mfn, size / sizeof(struct desc_struct)))
BUG();
}
@@ -470,34 +458,22 @@ static void __init xen_load_gdt_boot(const struct desc_ptr *dtr)
{
unsigned long va = dtr->address;
unsigned int size = dtr->size + 1;
- unsigned pages = DIV_ROUND_UP(size, PAGE_SIZE);
- unsigned long frames[pages];
- int f;
-
- /*
- * A GDT can be up to 64k in size, which corresponds to 8192
- * 8-byte entries, or 16 4k pages..
- */
+ unsigned long pfn, mfn;
+ pte_t pte;
- BUG_ON(size > 65536);
+ /* @size should be at most GDT_SIZE which is smaller than PAGE_SIZE. */
+ BUG_ON(size > PAGE_SIZE);
BUG_ON(va & ~PAGE_MASK);
- for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) {
- pte_t pte;
- unsigned long pfn, mfn;
+ pfn = virt_to_pfn(va);
+ mfn = pfn_to_mfn(pfn);
- pfn = virt_to_pfn(va);
- mfn = pfn_to_mfn(pfn);
+ pte = pfn_pte(pfn, PAGE_KERNEL_RO);
- pte = pfn_pte(pfn, PAGE_KERNEL_RO);
-
- if (HYPERVISOR_update_va_mapping((unsigned long)va, pte, 0))
- BUG();
-
- frames[f] = mfn;
- }
+ if (HYPERVISOR_update_va_mapping((unsigned long)va, pte, 0))
+ BUG();
- if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct)))
+ if (HYPERVISOR_set_gdt(&mfn, size / sizeof(struct desc_struct)))
BUG();
}
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index d33e7dbe3129..2d76106788a3 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -42,13 +42,11 @@ xmaddr_t arbitrary_virt_to_machine(void *vaddr)
}
EXPORT_SYMBOL_GPL(arbitrary_virt_to_machine);
-static void xen_flush_tlb_all(void)
+static noinline void xen_flush_tlb_all(void)
{
struct mmuext_op *op;
struct multicall_space mcs;
- trace_xen_mmu_flush_tlb_all(0);
-
preempt_disable();
mcs = xen_mc_entry(sizeof(*op));
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 486c0a34d00b..2c30cabfda90 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1310,13 +1310,11 @@ unsigned long xen_read_cr2_direct(void)
return this_cpu_read(xen_vcpu_info.arch.cr2);
}
-static void xen_flush_tlb(void)
+static noinline void xen_flush_tlb(void)
{
struct mmuext_op *op;
struct multicall_space mcs;
- trace_xen_mmu_flush_tlb(0);
-
preempt_disable();
mcs = xen_mc_entry(sizeof(*op));
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 29163c43ebbd..e0f1bcf01d63 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -57,7 +57,7 @@ static u64 xen_clocksource_get_cycles(struct clocksource *cs)
return xen_clocksource_read();
}
-static void xen_read_wallclock(struct timespec *ts)
+static void xen_read_wallclock(struct timespec64 *ts)
{
struct shared_info *s = HYPERVISOR_shared_info;
struct pvclock_wall_clock *wall_clock = &(s->wc);
@@ -68,12 +68,12 @@ static void xen_read_wallclock(struct timespec *ts)
put_cpu_var(xen_vcpu);
}
-static void xen_get_wallclock(struct timespec *now)
+static void xen_get_wallclock(struct timespec64 *now)
{
xen_read_wallclock(now);
}
-static int xen_set_wallclock(const struct timespec *now)
+static int xen_set_wallclock(const struct timespec64 *now)
{
return -ENODEV;
}
@@ -461,7 +461,7 @@ static void __init xen_time_init(void)
{
struct pvclock_vcpu_time_info *pvti;
int cpu = smp_processor_id();
- struct timespec tp;
+ struct timespec64 tp;
/* As Dom0 is never moved, no penalty on using TSC there */
if (xen_initial_domain())
@@ -479,7 +479,7 @@ static void __init xen_time_init(void)
/* Set initial system time with full resolution */
xen_read_wallclock(&tp);
- do_settimeofday(&tp);
+ do_settimeofday64(&tp);
setup_force_cpu_cap(X86_FEATURE_TSC);