aboutsummaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig12
-rw-r--r--arch/x86/Makefile7
-rw-r--r--arch/x86/boot/Makefile7
-rw-r--r--arch/x86/boot/compressed/Makefile18
-rw-r--r--arch/x86/boot/compressed/aslr.c18
-rw-r--r--arch/x86/boot/compressed/early_serial_console.c4
-rw-r--r--arch/x86/boot/compressed/eboot.c62
-rw-r--r--arch/x86/boot/compressed/eboot.h16
-rw-r--r--arch/x86/boot/cpu.c68
-rw-r--r--arch/x86/configs/tiny.config1
-rw-r--r--arch/x86/crypto/Makefile1
-rw-r--r--arch/x86/crypto/aes_ctrby8_avx-x86_64.S20
-rw-r--r--arch/x86/crypto/sha-mb/Makefile11
-rw-r--r--arch/x86/crypto/sha-mb/sha1_mb.c935
-rw-r--r--arch/x86/crypto/sha-mb/sha1_mb_mgr_datastruct.S287
-rw-r--r--arch/x86/crypto/sha-mb/sha1_mb_mgr_flush_avx2.S327
-rw-r--r--arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c64
-rw-r--r--arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S228
-rw-r--r--arch/x86/crypto/sha-mb/sha1_x8_avx2.S472
-rw-r--r--arch/x86/crypto/sha-mb/sha_mb_ctx.h136
-rw-r--r--arch/x86/crypto/sha-mb/sha_mb_mgr.h110
-rw-r--r--arch/x86/include/asm/cpufeature.h8
-rw-r--r--arch/x86/include/asm/efi.h24
-rw-r--r--arch/x86/include/asm/fixmap.h6
-rw-r--r--arch/x86/include/asm/io_apic.h1
-rw-r--r--arch/x86/include/asm/kvm_host.h32
-rw-r--r--arch/x86/include/asm/kvm_para.h10
-rw-r--r--arch/x86/kernel/cpu/Makefile7
-rw-r--r--arch/x86/kernel/cpu/amd.c7
-rw-r--r--arch/x86/kernel/cpu/common.c4
-rw-r--r--arch/x86/kernel/kprobes/opt.c4
-rw-r--r--arch/x86/kernel/smpboot.c3
-rw-r--r--arch/x86/kvm/cpuid.c31
-rw-r--r--arch/x86/kvm/cpuid.h10
-rw-r--r--arch/x86/kvm/emulate.c51
-rw-r--r--arch/x86/kvm/lapic.c34
-rw-r--r--arch/x86/kvm/mmu.c139
-rw-r--r--arch/x86/kvm/mmu.h5
-rw-r--r--arch/x86/kvm/paging_tmpl.h22
-rw-r--r--arch/x86/kvm/pmu.c24
-rw-r--r--arch/x86/kvm/svm.c40
-rw-r--r--arch/x86/kvm/trace.h41
-rw-r--r--arch/x86/kvm/vmx.c377
-rw-r--r--arch/x86/kvm/x86.c148
-rw-r--r--arch/x86/kvm/x86.h22
-rw-r--r--arch/x86/mm/dump_pagetables.c4
-rw-r--r--arch/x86/mm/mmap.c2
-rw-r--r--arch/x86/pci/fixup.c24
48 files changed, 3468 insertions, 416 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 36327438caf0..e4b1f431c7ed 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -137,6 +137,7 @@ config X86
select HAVE_ACPI_APEI if ACPI
select HAVE_ACPI_APEI_NMI if ACPI
select ACPI_LEGACY_TABLES_LOOKUP if ACPI
+ select X86_FEATURE_NAMES if PROC_FS
config INSTRUCTION_DECODER
def_bool y
@@ -314,6 +315,17 @@ config SMP
If you don't know what to do here, say N.
+config X86_FEATURE_NAMES
+ bool "Processor feature human-readable names" if EMBEDDED
+ default y
+ ---help---
+ This option compiles in a table of x86 feature bits and corresponding
+ names. This is required to support /proc/cpuinfo and a few kernel
+ messages. You can disable this to save space, at the expense of
+ making those few kernel messages show numeric feature bits instead.
+
+ If in doubt, say Y.
+
config X86_X2APIC
bool "Support x2apic"
depends on X86_LOCAL_APIC && X86_64 && IRQ_REMAP
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 60087ca37679..5692d6ac0f18 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -253,12 +253,6 @@ archclean:
$(Q)$(MAKE) $(clean)=arch/x86/tools
$(Q)$(MAKE) $(clean)=arch/x86/purgatory
-PHONY += kvmconfig
-kvmconfig:
- $(if $(wildcard $(objtree)/.config),, $(error You need an existing .config for this target))
- $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh -m -O $(objtree) $(objtree)/.config $(srctree)/arch/x86/configs/kvm_guest.config
- $(Q)yes "" | $(MAKE) -f $(srctree)/Makefile oldconfig
-
define archhelp
echo '* bzImage - Compressed kernel image (arch/x86/boot/bzImage)'
echo ' install - Install kernel using'
@@ -272,5 +266,4 @@ define archhelp
echo ' bzdisk/fdimage*/isoimage also accept:'
echo ' FDARGS="..." arguments for the booted kernel'
echo ' FDINITRD=file initrd for the booted kernel'
- echo ' kvmconfig - Enable additional options for guest kernel support'
endef
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index dbe8dd2fe247..5b016e2498f3 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -35,19 +35,22 @@ setup-y += video-vesa.o
setup-y += video-bios.o
targets += $(setup-y)
-hostprogs-y := mkcpustr tools/build
+hostprogs-y := tools/build
+hostprogs-$(CONFIG_X86_FEATURE_NAMES) += mkcpustr
HOST_EXTRACFLAGS += -I$(srctree)/tools/include \
-include include/generated/autoconf.h \
-D__EXPORTED_HEADERS__
+ifdef CONFIG_X86_FEATURE_NAMES
$(obj)/cpu.o: $(obj)/cpustr.h
quiet_cmd_cpustr = CPUSTR $@
cmd_cpustr = $(obj)/mkcpustr > $@
-targets += cpustr.h
+targets += cpustr.h
$(obj)/cpustr.h: $(obj)/mkcpustr FORCE
$(call if_changed,cpustr)
+endif
# ---------------------------------------------------------------------------
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 7a801a310e37..704f58aa79cd 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -26,18 +26,18 @@ LDFLAGS_vmlinux := -T
hostprogs-y := mkpiggy
HOST_EXTRACFLAGS += -I$(srctree)/tools/include
-VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \
- $(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o \
- $(obj)/piggy.o $(obj)/cpuflags.o $(obj)/aslr.o
+vmlinux-objs-y := $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \
+ $(obj)/string.o $(obj)/cmdline.o \
+ $(obj)/piggy.o $(obj)/cpuflags.o
+
+vmlinux-objs-$(CONFIG_EARLY_PRINTK) += $(obj)/early_serial_console.o
+vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/aslr.o
$(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone
-ifeq ($(CONFIG_EFI_STUB), y)
- VMLINUX_OBJS += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o \
- $(objtree)/drivers/firmware/efi/libstub/lib.a
-endif
+vmlinux-objs-$(CONFIG_EFI_STUB) += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o
-$(obj)/vmlinux: $(VMLINUX_OBJS) FORCE
+$(obj)/vmlinux: $(vmlinux-objs-y) FORCE
$(call if_changed,ld)
@:
@@ -45,7 +45,7 @@ OBJCOPYFLAGS_vmlinux.bin := -R .comment -S
$(obj)/vmlinux.bin: vmlinux FORCE
$(call if_changed,objcopy)
-targets += $(patsubst $(obj)/%,%,$(VMLINUX_OBJS)) vmlinux.bin.all vmlinux.relocs
+targets += $(patsubst $(obj)/%,%,$(vmlinux-objs-y)) vmlinux.bin.all vmlinux.relocs
CMD_RELOCS = arch/x86/tools/relocs
quiet_cmd_relocs = RELOCS $@
diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c
index fc6091abedb7..7c68808edeb7 100644
--- a/arch/x86/boot/compressed/aslr.c
+++ b/arch/x86/boot/compressed/aslr.c
@@ -1,6 +1,5 @@
#include "misc.h"
-#ifdef CONFIG_RANDOMIZE_BASE
#include <asm/msr.h>
#include <asm/archrandom.h>
#include <asm/e820.h>
@@ -183,12 +182,27 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
static bool mem_avoid_overlap(struct mem_vector *img)
{
int i;
+ struct setup_data *ptr;
for (i = 0; i < MEM_AVOID_MAX; i++) {
if (mem_overlaps(img, &mem_avoid[i]))
return true;
}
+ /* Avoid all entries in the setup_data linked list. */
+ ptr = (struct setup_data *)(unsigned long)real_mode->hdr.setup_data;
+ while (ptr) {
+ struct mem_vector avoid;
+
+ avoid.start = (u64)ptr;
+ avoid.size = sizeof(*ptr) + ptr->len;
+
+ if (mem_overlaps(img, &avoid))
+ return true;
+
+ ptr = (struct setup_data *)(unsigned long)ptr->next;
+ }
+
return false;
}
@@ -320,5 +334,3 @@ unsigned char *choose_kernel_location(unsigned char *input,
out:
return (unsigned char *)choice;
}
-
-#endif /* CONFIG_RANDOMIZE_BASE */
diff --git a/arch/x86/boot/compressed/early_serial_console.c b/arch/x86/boot/compressed/early_serial_console.c
index d3d003cb5481..261e81fb9582 100644
--- a/arch/x86/boot/compressed/early_serial_console.c
+++ b/arch/x86/boot/compressed/early_serial_console.c
@@ -1,9 +1,5 @@
#include "misc.h"
-#ifdef CONFIG_EARLY_PRINTK
-
int early_serial_base;
#include "../early_serial_console.c"
-
-#endif
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index f277184e2ac1..de8eebd6f67c 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -19,7 +19,10 @@
static efi_system_table_t *sys_table;
-struct efi_config *efi_early;
+static struct efi_config *efi_early;
+
+#define efi_call_early(f, ...) \
+ efi_early->call(efi_early->f, __VA_ARGS__);
#define BOOT_SERVICES(bits) \
static void setup_boot_services##bits(struct efi_config *c) \
@@ -265,21 +268,25 @@ void efi_char16_printk(efi_system_table_t *table, efi_char16_t *str)
offset = offsetof(typeof(*out), output_string);
output_string = efi_early->text_output + offset;
+ out = (typeof(out))(unsigned long)efi_early->text_output;
func = (u64 *)output_string;
- efi_early->call(*func, efi_early->text_output, str);
+ efi_early->call(*func, out, str);
} else {
struct efi_simple_text_output_protocol_32 *out;
u32 *func;
offset = offsetof(typeof(*out), output_string);
output_string = efi_early->text_output + offset;
+ out = (typeof(out))(unsigned long)efi_early->text_output;
func = (u32 *)output_string;
- efi_early->call(*func, efi_early->text_output, str);
+ efi_early->call(*func, out, str);
}
}
+#include "../../../../drivers/firmware/efi/libstub/efi-stub-helper.c"
+
static void find_bits(unsigned long mask, u8 *pos, u8 *size)
{
u8 first, len;
@@ -360,7 +367,7 @@ free_struct:
return status;
}
-static efi_status_t
+static void
setup_efi_pci32(struct boot_params *params, void **pci_handle,
unsigned long size)
{
@@ -403,8 +410,6 @@ setup_efi_pci32(struct boot_params *params, void **pci_handle,
data = (struct setup_data *)rom;
}
-
- return status;
}
static efi_status_t
@@ -463,7 +468,7 @@ free_struct:
}
-static efi_status_t
+static void
setup_efi_pci64(struct boot_params *params, void **pci_handle,
unsigned long size)
{
@@ -506,11 +511,18 @@ setup_efi_pci64(struct boot_params *params, void **pci_handle,
data = (struct setup_data *)rom;
}
-
- return status;
}
-static efi_status_t setup_efi_pci(struct boot_params *params)
+/*
+ * There's no way to return an informative status from this function,
+ * because any analysis (and printing of error messages) needs to be
+ * done directly at the EFI function call-site.
+ *
+ * For example, EFI_INVALID_PARAMETER could indicate a bug or maybe we
+ * just didn't find any PCI devices, but there's no way to tell outside
+ * the context of the call.
+ */
+static void setup_efi_pci(struct boot_params *params)
{
efi_status_t status;
void **pci_handle = NULL;
@@ -527,7 +539,7 @@ static efi_status_t setup_efi_pci(struct boot_params *params)
size, (void **)&pci_handle);
if (status != EFI_SUCCESS)
- return status;
+ return;
status = efi_call_early(locate_handle,
EFI_LOCATE_BY_PROTOCOL, &pci_proto,
@@ -538,13 +550,12 @@ static efi_status_t setup_efi_pci(struct boot_params *params)
goto free_handle;
if (efi_early->is64)
- status = setup_efi_pci64(params, pci_handle, size);
+ setup_efi_pci64(params, pci_handle, size);
else
- status = setup_efi_pci32(params, pci_handle, size);
+ setup_efi_pci32(params, pci_handle, size);
free_handle:
efi_call_early(free_pool, pci_handle);
- return status;
}
static void
@@ -1032,7 +1043,6 @@ struct boot_params *make_boot_params(struct efi_config *c)
int i;
unsigned long ramdisk_addr;
unsigned long ramdisk_size;
- unsigned long initrd_addr_max;
efi_early = c;
sys_table = (efi_system_table_t *)(unsigned long)efi_early->table;
@@ -1095,15 +1105,20 @@ struct boot_params *make_boot_params(struct efi_config *c)
memset(sdt, 0, sizeof(*sdt));
- if (hdr->xloadflags & XLF_CAN_BE_LOADED_ABOVE_4G)
- initrd_addr_max = -1UL;
- else
- initrd_addr_max = hdr->initrd_addr_max;
-
status = handle_cmdline_files(sys_table, image,
(char *)(unsigned long)hdr->cmd_line_ptr,
- "initrd=", initrd_addr_max,
+ "initrd=", hdr->initrd_addr_max,
&ramdisk_addr, &ramdisk_size);
+
+ if (status != EFI_SUCCESS &&
+ hdr->xloadflags & XLF_CAN_BE_LOADED_ABOVE_4G) {
+ efi_printk(sys_table, "Trying to load files to higher address\n");
+ status = handle_cmdline_files(sys_table, image,
+ (char *)(unsigned long)hdr->cmd_line_ptr,
+ "initrd=", -1UL,
+ &ramdisk_addr, &ramdisk_size);
+ }
+
if (status != EFI_SUCCESS)
goto fail2;
hdr->ramdisk_image = ramdisk_addr & 0xffffffff;
@@ -1376,10 +1391,7 @@ struct boot_params *efi_main(struct efi_config *c,
setup_graphics(boot_params);
- status = setup_efi_pci(boot_params);
- if (status != EFI_SUCCESS) {
- efi_printk(sys_table, "setup_efi_pci() failed!\n");
- }
+ setup_efi_pci(boot_params);
status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
sizeof(*gdt), (void **)&gdt);
diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h
index d487e727f1ec..c88c31ecad12 100644
--- a/arch/x86/boot/compressed/eboot.h
+++ b/arch/x86/boot/compressed/eboot.h
@@ -103,4 +103,20 @@ struct efi_uga_draw_protocol {
void *blt;
};
+struct efi_config {
+ u64 image_handle;
+ u64 table;
+ u64 allocate_pool;
+ u64 allocate_pages;
+ u64 get_memory_map;
+ u64 free_pool;
+ u64 free_pages;
+ u64 locate_handle;
+ u64 handle_protocol;
+ u64 exit_boot_services;
+ u64 text_output;
+ efi_status_t (*call)(unsigned long, ...);
+ bool is64;
+} __packed;
+
#endif /* BOOT_COMPRESSED_EBOOT_H */
diff --git a/arch/x86/boot/cpu.c b/arch/x86/boot/cpu.c
index 6ec6bb6e9957..29207f69ae8c 100644
--- a/arch/x86/boot/cpu.c
+++ b/arch/x86/boot/cpu.c
@@ -16,7 +16,9 @@
*/
#include "boot.h"
+#ifdef CONFIG_X86_FEATURE_NAMES
#include "cpustr.h"
+#endif
static char *cpu_name(int level)
{
@@ -32,11 +34,48 @@ static char *cpu_name(int level)
}
}
+static void show_cap_strs(u32 *err_flags)
+{
+ int i, j;
+#ifdef CONFIG_X86_FEATURE_NAMES
+ const unsigned char *msg_strs = (const unsigned char *)x86_cap_strs;
+ for (i = 0; i < NCAPINTS; i++) {
+ u32 e = err_flags[i];
+ for (j = 0; j < 32; j++) {
+ if (msg_strs[0] < i ||
+ (msg_strs[0] == i && msg_strs[1] < j)) {
+ /* Skip to the next string */
+ msg_strs += 2;
+ while (*msg_strs++)
+ ;
+ }
+ if (e & 1) {
+ if (msg_strs[0] == i &&
+ msg_strs[1] == j &&
+ msg_strs[2])
+ printf("%s ", msg_strs+2);
+ else
+ printf("%d:%d ", i, j);
+ }
+ e >>= 1;
+ }
+ }
+#else
+ for (i = 0; i < NCAPINTS; i++) {
+ u32 e = err_flags[i];
+ for (j = 0; j < 32; j++) {
+ if (e & 1)
+ printf("%d:%d ", i, j);
+ e >>= 1;
+ }
+ }
+#endif
+}
+
int validate_cpu(void)
{
u32 *err_flags;
int cpu_level, req_level;
- const unsigned char *msg_strs;
check_cpu(&cpu_level, &req_level, &err_flags);
@@ -49,34 +88,9 @@ int validate_cpu(void)
}
if (err_flags) {
- int i, j;
puts("This kernel requires the following features "
"not present on the CPU:\n");
-
- msg_strs = (const unsigned char *)x86_cap_strs;
-
- for (i = 0; i < NCAPINTS; i++) {
- u32 e = err_flags[i];
-
- for (j = 0; j < 32; j++) {
- if (msg_strs[0] < i ||
- (msg_strs[0] == i && msg_strs[1] < j)) {
- /* Skip to the next string */
- msg_strs += 2;
- while (*msg_strs++)
- ;
- }
- if (e & 1) {
- if (msg_strs[0] == i &&
- msg_strs[1] == j &&
- msg_strs[2])
- printf("%s ", msg_strs+2);
- else
- printf("%d:%d ", i, j);
- }
- e >>= 1;
- }
- }
+ show_cap_strs(err_flags);
putchar('\n');
return -1;
} else {
diff --git a/arch/x86/configs/tiny.config b/arch/x86/configs/tiny.config
new file mode 100644
index 000000000000..4e2ecfa23c15
--- /dev/null
+++ b/arch/x86/configs/tiny.config
@@ -0,0 +1 @@
+CONFIG_NOHIGHMEM=y
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index d551165a3159..fd0f848938cc 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o
obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o
+obj-$(CONFIG_CRYPTO_SHA1_MB) += sha-mb/
obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o
obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o
obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o
diff --git a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
index f091f122ed24..2df2a0298f5a 100644
--- a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
+++ b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
@@ -79,9 +79,6 @@
#define xcounter %xmm8
#define xbyteswap %xmm9
#define xkey0 %xmm10
-#define xkey3 %xmm11
-#define xkey6 %xmm12
-#define xkey9 %xmm13
#define xkey4 %xmm11
#define xkey8 %xmm12
#define xkey12 %xmm13
@@ -108,6 +105,10 @@
byteswap_const:
.octa 0x000102030405060708090A0B0C0D0E0F
+ddq_low_msk:
+ .octa 0x0000000000000000FFFFFFFFFFFFFFFF
+ddq_high_add_1:
+ .octa 0x00000000000000010000000000000000
ddq_add_1:
.octa 0x00000000000000000000000000000001
ddq_add_2:
@@ -169,7 +170,12 @@ ddq_add_8:
.rept (by - 1)
club DDQ_DATA, i
club XDATA, i
- vpaddd var_ddq_add(%rip), xcounter, var_xdata
+ vpaddq var_ddq_add(%rip), xcounter, var_xdata
+ vptest ddq_low_msk(%rip), var_xdata
+ jnz 1f
+ vpaddq ddq_high_add_1(%rip), var_xdata, var_xdata
+ vpaddq ddq_high_add_1(%rip), xcounter, xcounter
+ 1:
vpshufb xbyteswap, var_xdata, var_xdata
.set i, (i +1)
.endr
@@ -178,7 +184,11 @@ ddq_add_8:
vpxor xkey0, xdata0, xdata0
club DDQ_DATA, by
- vpaddd var_ddq_add(%rip), xcounter, xcounter
+ vpaddq var_ddq_add(%rip), xcounter, xcounter
+ vptest ddq_low_msk(%rip), xcounter
+ jnz 1f
+ vpaddq ddq_high_add_1(%rip), xcounter, xcounter
+ 1:
.set i, 1
.rept (by - 1)
diff --git a/arch/x86/crypto/sha-mb/Makefile b/arch/x86/crypto/sha-mb/Makefile
new file mode 100644
index 000000000000..2f8756375df5
--- /dev/null
+++ b/arch/x86/crypto/sha-mb/Makefile
@@ -0,0 +1,11 @@
+#
+# Arch-specific CryptoAPI modules.
+#
+
+avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
+ $(comma)4)$(comma)%ymm2,yes,no)
+ifeq ($(avx2_supported),yes)
+ obj-$(CONFIG_CRYPTO_SHA1_MB) += sha1-mb.o
+ sha1-mb-y := sha1_mb.o sha1_mb_mgr_flush_avx2.o \
+ sha1_mb_mgr_init_avx2.o sha1_mb_mgr_submit_avx2.o sha1_x8_avx2.o
+endif
diff --git a/arch/x86/crypto/sha-mb/sha1_mb.c b/arch/x86/crypto/sha-mb/sha1_mb.c
new file mode 100644
index 000000000000..99eefd812958
--- /dev/null
+++ b/arch/x86/crypto/sha-mb/sha1_mb.c
@@ -0,0 +1,935 @@
+/*
+ * Multi buffer SHA1 algorithm Glue Code
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Contact Information:
+ * Tim Chen <tim.c.chen@linux.intel.com>
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/cryptohash.h>
+#include <linux/types.h>
+#include <linux/list.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/sha.h>
+#include <crypto/mcryptd.h>
+#include <crypto/crypto_wq.h>
+#include <asm/byteorder.h>
+#include <asm/i387.h>
+#include <asm/xcr.h>
+#include <asm/xsave.h>
+#include <linux/hardirq.h>
+#include <asm/fpu-internal.h>
+#include "sha_mb_ctx.h"
+
+#define FLUSH_INTERVAL 1000 /* in usec */
+
+static struct mcryptd_alg_state sha1_mb_alg_state;
+
+struct sha1_mb_ctx {
+ struct mcryptd_ahash *mcryptd_tfm;
+};
+
+static inline struct mcryptd_hash_request_ctx *cast_hash_to_mcryptd_ctx(struct sha1_hash_ctx *hash_ctx)
+{
+ struct shash_desc *desc;
+
+ desc = container_of((void *) hash_ctx, struct shash_desc, __ctx);
+ return container_of(desc, struct mcryptd_hash_request_ctx, desc);
+}
+
+static inline struct ahash_request *cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx)
+{
+ return container_of((void *) ctx, struct ahash_request, __ctx);
+}
+
+static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx,
+ struct shash_desc *desc)
+{
+ rctx->flag = HASH_UPDATE;
+}
+
+static asmlinkage void (*sha1_job_mgr_init)(struct sha1_mb_mgr *state);
+static asmlinkage struct job_sha1* (*sha1_job_mgr_submit)(struct sha1_mb_mgr *state,
+ struct job_sha1 *job);
+static asmlinkage struct job_sha1* (*sha1_job_mgr_flush)(struct sha1_mb_mgr *state);
+static asmlinkage struct job_sha1* (*sha1_job_mgr_get_comp_job)(struct sha1_mb_mgr *state);
+
+inline void sha1_init_digest(uint32_t *digest)
+{
+ static const uint32_t initial_digest[SHA1_DIGEST_LENGTH] = {SHA1_H0,
+ SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 };
+ memcpy(digest, initial_digest, sizeof(initial_digest));
+}
+
+inline uint32_t sha1_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2],
+ uint32_t total_len)
+{
+ uint32_t i = total_len & (SHA1_BLOCK_SIZE - 1);
+
+ memset(&padblock[i], 0, SHA1_BLOCK_SIZE);
+ padblock[i] = 0x80;
+
+ i += ((SHA1_BLOCK_SIZE - 1) &
+ (0 - (total_len + SHA1_PADLENGTHFIELD_SIZE + 1)))
+ + 1 + SHA1_PADLENGTHFIELD_SIZE;
+
+#if SHA1_PADLENGTHFIELD_SIZE == 16
+ *((uint64_t *) &padblock[i - 16]) = 0;
+#endif
+
+ *((uint64_t *) &padblock[i - 8]) = cpu_to_be64(total_len << 3);
+
+ /* Number of extra blocks to hash */
+ return i >> SHA1_LOG2_BLOCK_SIZE;
+}
+
+static struct sha1_hash_ctx *sha1_ctx_mgr_resubmit(struct sha1_ctx_mgr *mgr, struct sha1_hash_ctx *ctx)
+{
+ while (ctx) {
+ if (ctx->status & HASH_CTX_STS_COMPLETE) {
+ /* Clear PROCESSING bit */
+ ctx->status = HASH_CTX_STS_COMPLETE;
+ return ctx;
+ }
+
+ /*
+ * If the extra blocks are empty, begin hashing what remains
+ * in the user's buffer.
+ */
+ if (ctx->partial_block_buffer_length == 0 &&
+ ctx->incoming_buffer_length) {
+
+ const void *buffer = ctx->incoming_buffer;
+ uint32_t len = ctx->incoming_buffer_length;
+ uint32_t copy_len;
+
+ /*
+ * Only entire blocks can be hashed.
+ * Copy remainder to extra blocks buffer.
+ */
+ copy_len = len & (SHA1_BLOCK_SIZE-1);
+
+ if (copy_len) {
+ len -= copy_len;
+ memcpy(ctx->partial_block_buffer,
+ ((const char *) buffer + len),
+ copy_len);
+ ctx->partial_block_buffer_length = copy_len;
+ }
+
+ ctx->incoming_buffer_length = 0;
+
+ /* len should be a multiple of the block size now */
+ assert((len % SHA1_BLOCK_SIZE) == 0);
+
+ /* Set len to the number of blocks to be hashed */
+ len >>= SHA1_LOG2_BLOCK_SIZE;
+
+ if (len) {
+
+ ctx->job.buffer = (uint8_t *) buffer;
+ ctx->job.len = len;
+ ctx = (struct sha1_hash_ctx *) sha1_job_mgr_submit(&mgr->mgr,
+ &ctx->job);
+ continue;
+ }
+ }
+
+ /*
+ * If the extra blocks are not empty, then we are
+ * either on the last block(s) or we need more
+ * user input before continuing.
+ */
+ if (ctx->status & HASH_CTX_STS_LAST) {
+
+ uint8_t *buf = ctx->partial_block_buffer;
+ uint32_t n_extra_blocks = sha1_pad(buf, ctx->total_length);
+
+ ctx->status = (HASH_CTX_STS_PROCESSING |
+ HASH_CTX_STS_COMPLETE);
+ ctx->job.buffer = buf;
+ ctx->job.len = (uint32_t) n_extra_blocks;
+ ctx = (struct sha1_hash_ctx *) sha1_job_mgr_submit(&mgr->mgr, &ctx->job);
+ continue;
+ }
+
+ if (ctx)
+ ctx->status = HASH_CTX_STS_IDLE;
+ return ctx;
+ }
+
+ return NULL;
+}
+
+static struct sha1_hash_ctx *sha1_ctx_mgr_get_comp_ctx(struct sha1_ctx_mgr *mgr)
+{
+ /*
+ * If get_comp_job returns NULL, there are no jobs complete.
+ * If get_comp_job returns a job, verify that it is safe to return to the user.
+ * If it is not ready, resubmit the job to finish processing.
+ * If sha1_ctx_mgr_resubmit returned a job, it is ready to be returned.
+ * Otherwise, all jobs currently being managed by the hash_ctx_mgr still need processing.
+ */
+ struct sha1_hash_ctx *ctx;
+
+ ctx = (struct sha1_hash_ctx *) sha1_job_mgr_get_comp_job(&mgr->mgr);
+ return sha1_ctx_mgr_resubmit(mgr, ctx);
+}
+
+static void sha1_ctx_mgr_init(struct sha1_ctx_mgr *mgr)
+{
+ sha1_job_mgr_init(&mgr->mgr);
+}
+
+static struct sha1_hash_ctx *sha1_ctx_mgr_submit(struct sha1_ctx_mgr *mgr,
+ struct sha1_hash_ctx *ctx,
+ const void *buffer,
+ uint32_t len,
+ int flags)
+{
+ if (flags & (~HASH_ENTIRE)) {
+ /* User should not pass anything other than FIRST, UPDATE, or LAST */
+ ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
+ return ctx;
+ }
+
+ if (ctx->status & HASH_CTX_STS_PROCESSING) {
+ /* Cannot submit to a currently processing job. */
+ ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING;
+ return ctx;
+ }
+
+ if ((ctx->status & HASH_CTX_STS_COMPLETE) && !(flags & HASH_FIRST)) {
+ /* Cannot update a finished job. */
+ ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
+ return ctx;
+ }
+
+
+ if (flags & HASH_FIRST) {
+ /* Init digest */
+ sha1_init_digest(ctx->job.result_digest);
+
+ /* Reset byte counter */
+ ctx->total_length = 0;
+
+ /* Clear extra blocks */
+ ctx->partial_block_buffer_length = 0;
+ }
+
+ /* If we made it here, there were no errors during this call to submit */
+ ctx->error = HASH_CTX_ERROR_NONE;
+
+ /* Store buffer ptr info from user */
+ ctx->incoming_buffer = buffer;
+ ctx->incoming_buffer_length = len;
+
+ /* Store the user's request flags and mark this ctx as currently being processed. */
+ ctx->status = (flags & HASH_LAST) ?
+ (HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) :
+ HASH_CTX_STS_PROCESSING;
+
+ /* Advance byte counter */
+ ctx->total_length += len;
+
+ /*
+ * If there is anything currently buffered in the extra blocks,
+ * append to it until it contains a whole block.
+ * Or if the user's buffer contains less than a whole block,
+ * append as much as possible to the extra block.
+ */
+ if ((ctx->partial_block_buffer_length) | (len < SHA1_BLOCK_SIZE)) {
+ /* Compute how many bytes to copy from user buffer into extra block */
+ uint32_t copy_len = SHA1_BLOCK_SIZE - ctx->partial_block_buffer_length;
+ if (len < copy_len)
+ copy_len = len;
+
+ if (copy_len) {
+ /* Copy and update relevant pointers and counters */
+ memcpy(&ctx->partial_block_buffer[ctx->partial_block_buffer_length],
+ buffer, copy_len);
+
+ ctx->partial_block_buffer_length += copy_len;
+ ctx->incoming_buffer = (const void *)((const char *)buffer + copy_len);
+ ctx->incoming_buffer_length = len - copy_len;
+ }
+
+ /* The extra block should never contain more than 1 block here */
+ assert(ctx->partial_block_buffer_length <= SHA1_BLOCK_SIZE);
+
+ /* If the extra block buffer contains exactly 1 block, it can be hashed. */
+ if (ctx->partial_block_buffer_length >= SHA1_BLOCK_SIZE) {
+ ctx->partial_block_buffer_length = 0;
+
+ ctx->job.buffer = ctx->partial_block_buffer;
+ ctx->job.len = 1;
+ ctx = (struct sha1_hash_ctx *) sha1_job_mgr_submit(&mgr->mgr, &ctx->job);
+ }
+ }
+
+ return sha1_ctx_mgr_resubmit(mgr, ctx);
+}
+
+static struct sha1_hash_ctx *sha1_ctx_mgr_flush(struct sha1_ctx_mgr *mgr)
+{
+ struct sha1_hash_ctx *ctx;
+
+ while (1) {
+ ctx = (struct sha1_hash_ctx *) sha1_job_mgr_flush(&mgr->mgr);
+
+ /* If flush returned 0, there are no more jobs in flight. */
+ if (!ctx)
+ return NULL;
+
+ /*
+ * If flush returned a job, resubmit the job to finish processing.
+ */
+ ctx = sha1_ctx_mgr_resubmit(mgr, ctx);
+
+ /*
+ * If sha1_ctx_mgr_resubmit returned a job, it is ready to be returned.
+ * Otherwise, all jobs currently being managed by the sha1_ctx_mgr
+ * still need processing. Loop.
+ */
+ if (ctx)
+ return ctx;
+ }
+}
+
+static int sha1_mb_init(struct shash_desc *desc)
+{
+ struct sha1_hash_ctx *sctx = shash_desc_ctx(desc);
+
+ hash_ctx_init(sctx);
+ sctx->job.result_digest[0] = SHA1_H0;
+ sctx->job.result_digest[1] = SHA1_H1;
+ sctx->job.result_digest[2] = SHA1_H2;
+ sctx->job.result_digest[3] = SHA1_H3;
+ sctx->job.result_digest[4] = SHA1_H4;
+ sctx->total_length = 0;
+ sctx->partial_block_buffer_length = 0;
+ sctx->status = HASH_CTX_STS_IDLE;
+
+ return 0;
+}
+
+static int sha1_mb_set_results(struct mcryptd_hash_request_ctx *rctx)
+{
+ int i;
+ struct sha1_hash_ctx *sctx = shash_desc_ctx(&rctx->desc);
+ __be32 *dst = (__be32 *) rctx->out;
+
+ for (i = 0; i < 5; ++i)
+ dst[i] = cpu_to_be32(sctx->job.result_digest[i]);
+
+ return 0;
+}
+
+static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx,
+ struct mcryptd_alg_cstate *cstate, bool flush)
+{
+ int flag = HASH_UPDATE;
+ int nbytes, err = 0;
+ struct mcryptd_hash_request_ctx *rctx = *ret_rctx;
+ struct sha1_hash_ctx *sha_ctx;
+
+ /* more work ? */
+ while (!(rctx->flag & HASH_DONE)) {
+ nbytes = crypto_ahash_walk_done(&rctx->walk, 0);
+ if (nbytes < 0) {
+ err = nbytes;
+ goto out;
+ }
+ /* check if the walk is done */
+ if (crypto_ahash_walk_last(&rctx->walk)) {
+ rctx->flag |= HASH_DONE;
+ if (rctx->flag & HASH_FINAL)
+ flag |= HASH_LAST;
+
+ }
+ sha_ctx = (struct sha1_hash_ctx *) shash_desc_ctx(&rctx->desc);
+ kernel_fpu_begin();
+ sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data, nbytes, flag);
+ if (!sha_ctx) {
+ if (flush)
+ sha_ctx = sha1_ctx_mgr_flush(cstate->mgr);
+ }
+ kernel_fpu_end();
+ if (sha_ctx)
+ rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
+ else {
+ rctx = NULL;
+ goto out;
+ }
+ }
+
+ /* copy the results */
+ if (rctx->flag & HASH_FINAL)
+ sha1_mb_set_results(rctx);
+
+out:
+ *ret_rctx = rctx;
+ return err;
+}
+
+static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx,
+ struct mcryptd_alg_cstate *cstate,
+ int err)
+{
+ struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
+ struct sha1_hash_ctx *sha_ctx;
+ struct mcryptd_hash_request_ctx *req_ctx;
+ int ret;
+
+ /* remove from work list */
+ spin_lock(&cstate->work_lock);
+ list_del(&rctx->waiter);
+ spin_unlock(&cstate->work_lock);
+
+ if (irqs_disabled())
+ rctx->complete(&req->base, err);
+ else {
+ local_bh_disable();
+ rctx->complete(&req->base, err);
+ local_bh_enable();
+ }
+
+ /* check to see if there are other jobs that are done */
+ sha_ctx = sha1_ctx_mgr_get_comp_ctx(cstate->mgr);
+ while (sha_ctx) {
+ req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx);
+ ret = sha_finish_walk(&req_ctx, cstate, false);
+ if (req_ctx) {
+ spin_lock(&cstate->work_lock);
+ list_del(&req_ctx->waiter);
+ spin_unlock(&cstate->work_lock);
+
+ req = cast_mcryptd_ctx_to_req(req_ctx);
+ if (irqs_disabled())
+ rctx->complete(&req->base, ret);
+ else {
+ local_bh_disable();
+ rctx->complete(&req->base, ret);
+ local_bh_enable();
+ }
+ }
+ sha_ctx = sha1_ctx_mgr_get_comp_ctx(cstate->mgr);
+ }
+
+ return 0;
+}
+
+static void sha1_mb_add_list(struct mcryptd_hash_request_ctx *rctx,
+ struct mcryptd_alg_cstate *cstate)
+{
+ unsigned long next_flush;
+ unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL);
+
+ /* initialize tag */
+ rctx->tag.arrival = jiffies; /* tag the arrival time */
+ rctx->tag.seq_num = cstate->next_seq_num++;
+ next_flush = rctx->tag.arrival + delay;
+ rctx->tag.expire = next_flush;
+
+ spin_lock(&cstate->work_lock);
+ list_add_tail(&rctx->waiter, &cstate->work_list);
+ spin_unlock(&cstate->work_lock);
+
+ mcryptd_arm_flusher(cstate, delay);
+}
+
+static int sha1_mb_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ struct mcryptd_hash_request_ctx *rctx =
+ container_of(desc, struct mcryptd_hash_request_ctx, desc);
+ struct mcryptd_alg_cstate *cstate =
+ this_cpu_ptr(sha1_mb_alg_state.alg_cstate);
+
+ struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
+ struct sha1_hash_ctx *sha_ctx;
+ int ret = 0, nbytes;
+
+
+ /* sanity check */
+ if (rctx->tag.cpu != smp_processor_id()) {
+ pr_err("mcryptd error: cpu clash\n");
+ goto done;
+ }
+
+ /* need to init context */
+ req_ctx_init(rctx, desc);
+
+ nbytes = crypto_ahash_walk_first(req, &rctx->walk);
+
+ if (nbytes < 0) {
+ ret = nbytes;
+ goto done;
+ }
+
+ if (crypto_ahash_walk_last(&rctx->walk))
+ rctx->flag |= HASH_DONE;
+
+ /* submit */
+ sha_ctx = (struct sha1_hash_ctx *) shash_desc_ctx(desc);
+ sha1_mb_add_list(rctx, cstate);
+ kernel_fpu_begin();
+ sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data, nbytes, HASH_UPDATE);
+ kernel_fpu_end();
+
+ /* check if anything is returned */
+ if (!sha_ctx)
+ return -EINPROGRESS;
+
+ if (sha_ctx->error) {
+ ret = sha_ctx->error;
+ rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
+ goto done;
+ }
+
+ rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
+ ret = sha_finish_walk(&rctx, cstate, false);
+
+ if (!rctx)
+ return -EINPROGRESS;
+done:
+ sha_complete_job(rctx, cstate, ret);
+ return ret;
+}
+
+static int sha1_mb_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
+{
+ struct mcryptd_hash_request_ctx *rctx =
+ container_of(desc, struct mcryptd_hash_request_ctx, desc);
+ struct mcryptd_alg_cstate *cstate =
+ this_cpu_ptr(sha1_mb_alg_state.alg_cstate);
+
+ struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
+ struct sha1_hash_ctx *sha_ctx;
+ int ret = 0, flag = HASH_UPDATE, nbytes;
+
+ /* sanity check */
+ if (rctx->tag.cpu != smp_processor_id()) {
+ pr_err("mcryptd error: cpu clash\n");
+ goto done;
+ }
+
+ /* need to init context */
+ req_ctx_init(rctx, desc);
+
+ nbytes = crypto_ahash_walk_first(req, &rctx->walk);
+
+ if (nbytes < 0) {
+ ret = nbytes;
+ goto done;
+ }
+
+ if (crypto_ahash_walk_last(&rctx->walk)) {
+ rctx->flag |= HASH_DONE;
+ flag = HASH_LAST;
+ }
+ rctx->out = out;
+
+ /* submit */
+ rctx->flag |= HASH_FINAL;
+ sha_ctx = (struct sha1_hash_ctx *) shash_desc_ctx(desc);
+ sha1_mb_add_list(rctx, cstate);
+
+ kernel_fpu_begin();
+ sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data, nbytes, flag);
+ kernel_fpu_end();
+
+ /* check if anything is returned */
+ if (!sha_ctx)
+ return -EINPROGRESS;
+
+ if (sha_ctx->error) {
+ ret = sha_ctx->error;
+ goto done;
+ }
+
+ rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
+ ret = sha_finish_walk(&rctx, cstate, false);
+ if (!rctx)
+ return -EINPROGRESS;
+done:
+ sha_complete_job(rctx, cstate, ret);
+ return ret;
+}
+
+static int sha1_mb_final(struct shash_desc *desc, u8 *out)
+{
+ struct mcryptd_hash_request_ctx *rctx =
+ container_of(desc, struct mcryptd_hash_request_ctx, desc);
+ struct mcryptd_alg_cstate *cstate =
+ this_cpu_ptr(sha1_mb_alg_state.alg_cstate);
+
+ struct sha1_hash_ctx *sha_ctx;
+ int ret = 0;
+ u8 data;
+
+ /* sanity check */
+ if (rctx->tag.cpu != smp_processor_id()) {
+ pr_err("mcryptd error: cpu clash\n");
+ goto done;
+ }
+
+ /* need to init context */
+ req_ctx_init(rctx, desc);
+
+ rctx->out = out;
+ rctx->flag |= HASH_DONE | HASH_FINAL;
+
+ sha_ctx = (struct sha1_hash_ctx *) shash_desc_ctx(desc);
+ /* flag HASH_FINAL and 0 data size */
+ sha1_mb_add_list(rctx, cstate);
+ kernel_fpu_begin();
+ sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, &data, 0, HASH_LAST);
+ kernel_fpu_end();
+
+ /* check if anything is returned */
+ if (!sha_ctx)
+ return -EINPROGRESS;
+
+ if (sha_ctx->error) {
+ ret = sha_ctx->error;
+ rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
+ goto done;
+ }
+
+ rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
+ ret = sha_finish_walk(&rctx, cstate, false);
+ if (!rctx)
+ return -EINPROGRESS;
+done:
+ sha_complete_job(rctx, cstate, ret);
+ return ret;
+}
+
+static int sha1_mb_export(struct shash_desc *desc, void *out)
+{
+ struct sha1_hash_ctx *sctx = shash_desc_ctx(desc);
+
+ memcpy(out, sctx, sizeof(*sctx));
+
+ return 0;
+}
+
+static int sha1_mb_import(struct shash_desc *desc, const void *in)
+{
+ struct sha1_hash_ctx *sctx = shash_desc_ctx(desc);
+
+ memcpy(sctx, in, sizeof(*sctx));
+
+ return 0;
+}
+
+
+static struct shash_alg sha1_mb_shash_alg = {
+ .digestsize = SHA1_DIGEST_SIZE,
+ .init = sha1_mb_init,
+ .update = sha1_mb_update,
+ .final = sha1_mb_final,
+ .finup = sha1_mb_finup,
+ .export = sha1_mb_export,
+ .import = sha1_mb_import,
+ .descsize = sizeof(struct sha1_hash_ctx),
+ .statesize = sizeof(struct sha1_hash_ctx),
+ .base = {
+ .cra_name = "__sha1-mb",
+ .cra_driver_name = "__intel_sha1-mb",
+ .cra_priority = 100,
+ /*
+ * use ASYNC flag as some buffers in multi-buffer
+ * algo may not have completed before hashing thread sleep
+ */
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH | CRYPTO_ALG_ASYNC,
+ .cra_blocksize = SHA1_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(sha1_mb_shash_alg.base.cra_list),
+ }
+};
+
+static int sha1_mb_async_init(struct ahash_request *req)
+{
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
+ struct ahash_request *mcryptd_req = ahash_request_ctx(req);
+ struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
+
+ memcpy(mcryptd_req, req, sizeof(*req));
+ ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
+ return crypto_ahash_init(mcryptd_req);
+}
+
+static int sha1_mb_async_update(struct ahash_request *req)
+{
+ struct ahash_request *mcryptd_req = ahash_request_ctx(req);
+
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
+ struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
+
+ memcpy(mcryptd_req, req, sizeof(*req));
+ ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
+ return crypto_ahash_update(mcryptd_req);
+}
+
+static int sha1_mb_async_finup(struct ahash_request *req)
+{
+ struct ahash_request *mcryptd_req = ahash_request_ctx(req);
+
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
+ struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
+
+ memcpy(mcryptd_req, req, sizeof(*req));
+ ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
+ return crypto_ahash_finup(mcryptd_req);
+}
+
+static int sha1_mb_async_final(struct ahash_request *req)
+{
+ struct ahash_request *mcryptd_req = ahash_request_ctx(req);
+
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
+ struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
+
+ memcpy(mcryptd_req, req, sizeof(*req));
+ ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
+ return crypto_ahash_final(mcryptd_req);
+}
+
+static int sha1_mb_async_digest(struct ahash_request *req)
+{
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm);
+ struct ahash_request *mcryptd_req = ahash_request_ctx(req);
+ struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
+
+ memcpy(mcryptd_req, req, sizeof(*req));
+ ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
+ return crypto_ahash_digest(mcryptd_req);
+}
+
+static int sha1_mb_async_init_tfm(struct crypto_tfm *tfm)
+{
+ struct mcryptd_ahash *mcryptd_tfm;
+ struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct mcryptd_hash_ctx *mctx;
+
+ mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha1-mb", 0, 0);
+ if (IS_ERR(mcryptd_tfm))
+ return PTR_ERR(mcryptd_tfm);
+ mctx = crypto_ahash_ctx(&mcryptd_tfm->base);
+ mctx->alg_state = &sha1_mb_alg_state;
+ ctx->mcryptd_tfm = mcryptd_tfm;
+ crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+ sizeof(struct ahash_request) +
+ crypto_ahash_reqsize(&mcryptd_tfm->base));
+
+ return 0;
+}
+
+static void sha1_mb_async_exit_tfm(struct crypto_tfm *tfm)
+{
+ struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ mcryptd_free_ahash(ctx->mcryptd_tfm);
+}
+
+static struct ahash_alg sha1_mb_async_alg = {
+ .init = sha1_mb_async_init,
+ .update = sha1_mb_async_update,
+ .final = sha1_mb_async_final,
+ .finup = sha1_mb_async_finup,
+ .digest = sha1_mb_async_digest,
+ .halg = {
+ .digestsize = SHA1_DIGEST_SIZE,
+ .base = {
+ .cra_name = "sha1",
+ .cra_driver_name = "sha1_mb",
+ .cra_priority = 200,
+ .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC,
+ .cra_blocksize = SHA1_BLOCK_SIZE,
+ .cra_type = &crypto_ahash_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(sha1_mb_async_alg.halg.base.cra_list),
+ .cra_init = sha1_mb_async_init_tfm,
+ .cra_exit = sha1_mb_async_exit_tfm,
+ .cra_ctxsize = sizeof(struct sha1_mb_ctx),
+ .cra_alignmask = 0,
+ },
+ },
+};
+
+static unsigned long sha1_mb_flusher(struct mcryptd_alg_cstate *cstate)
+{
+ struct mcryptd_hash_request_ctx *rctx;
+ unsigned long cur_time;
+ unsigned long next_flush = 0;
+ struct sha1_hash_ctx *sha_ctx;
+
+
+ cur_time = jiffies;
+
+ while (!list_empty(&cstate->work_list)) {
+ rctx = list_entry(cstate->work_list.next,
+ struct mcryptd_hash_request_ctx, waiter);
+ if time_before(cur_time, rctx->tag.expire)
+ break;
+ kernel_fpu_begin();
+ sha_ctx = (struct sha1_hash_ctx *) sha1_ctx_mgr_flush(cstate->mgr);
+ kernel_fpu_end();
+ if (!sha_ctx) {
+ pr_err("sha1_mb error: nothing got flushed for non-empty list\n");
+ break;
+ }
+ rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
+ sha_finish_walk(&rctx, cstate, true);
+ sha_complete_job(rctx, cstate, 0);
+ }
+
+ if (!list_empty(&cstate->work_list)) {
+ rctx = list_entry(cstate->work_list.next,
+ struct mcryptd_hash_request_ctx, waiter);
+ /* get the hash context and then flush time */
+ next_flush = rctx->tag.expire;
+ mcryptd_arm_flusher(cstate, get_delay(next_flush));
+ }
+ return next_flush;
+}
+
+static int __init sha1_mb_mod_init(void)
+{
+
+ int cpu;
+ int err;
+ struct mcryptd_alg_cstate *cpu_state;
+
+ /* check for dependent cpu features */
+ if (!boot_cpu_has(X86_FEATURE_AVX2) ||
+ !boot_cpu_has(X86_FEATURE_BMI2))
+ return -ENODEV;
+
+ /* initialize multibuffer structures */
+ sha1_mb_alg_state.alg_cstate = alloc_percpu(struct mcryptd_alg_cstate);
+
+ sha1_job_mgr_init = sha1_mb_mgr_init_avx2;
+ sha1_job_mgr_submit = sha1_mb_mgr_submit_avx2;
+ sha1_job_mgr_flush = sha1_mb_mgr_flush_avx2;
+ sha1_job_mgr_get_comp_job = sha1_mb_mgr_get_comp_job_avx2;
+
+ if (!sha1_mb_alg_state.alg_cstate)
+ return -ENOMEM;
+ for_each_possible_cpu(cpu) {
+ cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu);
+ cpu_state->next_flush = 0;
+ cpu_state->next_seq_num = 0;
+ cpu_state->flusher_engaged = false;
+ INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher);
+ cpu_state->cpu = cpu;
+ cpu_state->alg_state = &sha1_mb_alg_state;
+ cpu_state->mgr = (struct sha1_ctx_mgr *) kzalloc(sizeof(struct sha1_ctx_mgr), GFP_KERNEL);
+ if (!cpu_state->mgr)
+ goto err2;
+ sha1_ctx_mgr_init(cpu_state->mgr);
+ INIT_LIST_HEAD(&cpu_state->work_list);
+ spin_lock_init(&cpu_state->work_lock);
+ }
+ sha1_mb_alg_state.flusher = &sha1_mb_flusher;
+
+ err = crypto_register_shash(&sha1_mb_shash_alg);
+ if (err)
+ goto err2;
+ err = crypto_register_ahash(&sha1_mb_async_alg);
+ if (err)
+ goto err1;
+
+
+ return 0;
+err1:
+ crypto_unregister_shash(&sha1_mb_shash_alg);
+err2:
+ for_each_possible_cpu(cpu) {
+ cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu);
+ kfree(cpu_state->mgr);
+ }
+ free_percpu(sha1_mb_alg_state.alg_cstate);
+ return -ENODEV;
+}
+
+static void __exit sha1_mb_mod_fini(void)
+{
+ int cpu;
+ struct mcryptd_alg_cstate *cpu_state;
+
+ crypto_unregister_ahash(&sha1_mb_async_alg);
+ crypto_unregister_shash(&sha1_mb_shash_alg);
+ for_each_possible_cpu(cpu) {
+ cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu);
+ kfree(cpu_state->mgr);
+ }
+ free_percpu(sha1_mb_alg_state.alg_cstate);
+}
+
+module_init(sha1_mb_mod_init);
+module_exit(sha1_mb_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, multi buffer accelerated");
+
+MODULE_ALIAS("sha1");
diff --git a/arch/x86/crypto/sha-mb/sha1_mb_mgr_datastruct.S b/arch/x86/crypto/sha-mb/sha1_mb_mgr_datastruct.S
new file mode 100644
index 000000000000..86688c6e7a25
--- /dev/null
+++ b/arch/x86/crypto/sha-mb/sha1_mb_mgr_datastruct.S
@@ -0,0 +1,287 @@
+/*
+ * Header file for multi buffer SHA1 algorithm data structure
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Contact Information:
+ * James Guilford <james.guilford@intel.com>
+ * Tim Chen <tim.c.chen@linux.intel.com>
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+# Macros for defining data structures
+
+# Usage example
+
+#START_FIELDS # JOB_AES
+### name size align
+#FIELD _plaintext, 8, 8 # pointer to plaintext
+#FIELD _ciphertext, 8, 8 # pointer to ciphertext
+#FIELD _IV, 16, 8 # IV
+#FIELD _keys, 8, 8 # pointer to keys
+#FIELD _len, 4, 4 # length in bytes
+#FIELD _status, 4, 4 # status enumeration
+#FIELD _user_data, 8, 8 # pointer to user data
+#UNION _union, size1, align1, \
+# size2, align2, \
+# size3, align3, \
+# ...
+#END_FIELDS
+#%assign _JOB_AES_size _FIELD_OFFSET
+#%assign _JOB_AES_align _STRUCT_ALIGN
+
+#########################################################################
+
+# Alternate "struc-like" syntax:
+# STRUCT job_aes2
+# RES_Q .plaintext, 1
+# RES_Q .ciphertext, 1
+# RES_DQ .IV, 1
+# RES_B .nested, _JOB_AES_SIZE, _JOB_AES_ALIGN
+# RES_U .union, size1, align1, \
+# size2, align2, \
+# ...
+# ENDSTRUCT
+# # Following only needed if nesting
+# %assign job_aes2_size _FIELD_OFFSET
+# %assign job_aes2_align _STRUCT_ALIGN
+#
+# RES_* macros take a name, a count and an optional alignment.
+# The count in in terms of the base size of the macro, and the
+# default alignment is the base size.
+# The macros are:
+# Macro Base size
+# RES_B 1
+# RES_W 2
+# RES_D 4
+# RES_Q 8
+# RES_DQ 16
+# RES_Y 32
+# RES_Z 64
+#
+# RES_U defines a union. It's arguments are a name and two or more
+# pairs of "size, alignment"
+#
+# The two assigns are only needed if this structure is being nested
+# within another. Even if the assigns are not done, one can still use
+# STRUCT_NAME_size as the size of the structure.
+#
+# Note that for nesting, you still need to assign to STRUCT_NAME_size.
+#
+# The differences between this and using "struc" directly are that each
+# type is implicitly aligned to its natural length (although this can be
+# over-ridden with an explicit third parameter), and that the structure
+# is padded at the end to its overall alignment.
+#
+
+#########################################################################
+
+#ifndef _SHA1_MB_MGR_DATASTRUCT_ASM_
+#define _SHA1_MB_MGR_DATASTRUCT_ASM_
+
+## START_FIELDS
+.macro START_FIELDS
+ _FIELD_OFFSET = 0
+ _STRUCT_ALIGN = 0
+.endm
+
+## FIELD name size align
+.macro FIELD name size align
+ _FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1))
+ \name = _FIELD_OFFSET
+ _FIELD_OFFSET = _FIELD_OFFSET + (\size)
+.if (\align > _STRUCT_ALIGN)
+ _STRUCT_ALIGN = \align
+.endif
+.endm
+
+## END_FIELDS
+.macro END_FIELDS
+ _FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1))
+.endm
+
+########################################################################
+
+.macro STRUCT p1
+START_FIELDS
+.struc \p1
+.endm
+
+.macro ENDSTRUCT
+ tmp = _FIELD_OFFSET
+ END_FIELDS
+ tmp = (_FIELD_OFFSET - %%tmp)
+.if (tmp > 0)
+ .lcomm tmp
+.endif
+.endstruc
+.endm
+
+## RES_int name size align
+.macro RES_int p1 p2 p3
+ name = \p1
+ size = \p2
+ align = .\p3
+
+ _FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1))
+.align align
+.lcomm name size
+ _FIELD_OFFSET = _FIELD_OFFSET + (size)
+.if (align > _STRUCT_ALIGN)
+ _STRUCT_ALIGN = align
+.endif
+.endm
+
+
+
+# macro RES_B name, size [, align]
+.macro RES_B _name, _size, _align=1
+RES_int _name _size _align
+.endm
+
+# macro RES_W name, size [, align]
+.macro RES_W _name, _size, _align=2
+RES_int _name 2*(_size) _align
+.endm
+
+# macro RES_D name, size [, align]
+.macro RES_D _name, _size, _align=4
+RES_int _name 4*(_size) _align
+.endm
+
+# macro RES_Q name, size [, align]
+.macro RES_Q _name, _size, _align=8
+RES_int _name 8*(_size) _align
+.endm
+
+# macro RES_DQ name, size [, align]
+.macro RES_DQ _name, _size, _align=16
+RES_int _name 16*(_size) _align
+.endm
+
+# macro RES_Y name, size [, align]
+.macro RES_Y _name, _size, _align=32
+RES_int _name 32*(_size) _align
+.endm
+
+# macro RES_Z name, size [, align]
+.macro RES_Z _name, _size, _align=64
+RES_int _name 64*(_size) _align
+.endm
+
+
+#endif
+
+########################################################################
+#### Define constants
+########################################################################
+
+########################################################################
+#### Define SHA1 Out Of Order Data Structures
+########################################################################
+
+START_FIELDS # LANE_DATA
+### name size align
+FIELD _job_in_lane, 8, 8 # pointer to job object
+END_FIELDS
+
+_LANE_DATA_size = _FIELD_OFFSET
+_LANE_DATA_align = _STRUCT_ALIGN
+
+########################################################################
+
+START_FIELDS # SHA1_ARGS_X8
+### name size align
+FIELD _digest, 4*5*8, 16 # transposed digest
+FIELD _data_ptr, 8*8, 8 # array of pointers to data
+END_FIELDS
+
+_SHA1_ARGS_X4_size = _FIELD_OFFSET
+_SHA1_ARGS_X4_align = _STRUCT_ALIGN
+_SHA1_ARGS_X8_size = _FIELD_OFFSET
+_SHA1_ARGS_X8_align = _STRUCT_ALIGN
+
+########################################################################
+
+START_FIELDS # MB_MGR
+### name size align
+FIELD _args, _SHA1_ARGS_X4_size, _SHA1_ARGS_X4_align
+FIELD _lens, 4*8, 8
+FIELD _unused_lanes, 8, 8
+FIELD _ldata, _LANE_DATA_size*8, _LANE_DATA_align
+END_FIELDS
+
+_MB_MGR_size = _FIELD_OFFSET
+_MB_MGR_align = _STRUCT_ALIGN
+
+_args_digest = _args + _digest
+_args_data_ptr = _args + _data_ptr
+
+
+########################################################################
+#### Define constants
+########################################################################
+
+#define STS_UNKNOWN 0
+#define STS_BEING_PROCESSED 1
+#define STS_COMPLETED 2
+
+########################################################################
+#### Define JOB_SHA1 structure
+########################################################################
+
+START_FIELDS # JOB_SHA1
+
+### name size align
+FIELD _buffer, 8, 8 # pointer to buffer
+FIELD _len, 4, 4 # length in bytes
+FIELD _result_digest, 5*4, 32 # Digest (output)
+FIELD _status, 4, 4
+FIELD _user_data, 8, 8
+END_FIELDS
+
+_JOB_SHA1_size = _FIELD_OFFSET
+_JOB_SHA1_align = _STRUCT_ALIGN
diff --git a/arch/x86/crypto/sha-mb/sha1_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha-mb/sha1_mb_mgr_flush_avx2.S
new file mode 100644
index 000000000000..85c4e1cf7172
--- /dev/null
+++ b/arch/x86/crypto/sha-mb/sha1_mb_mgr_flush_avx2.S
@@ -0,0 +1,327 @@
+/*
+ * Flush routine for SHA1 multibuffer
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Contact Information:
+ * James Guilford <james.guilford@intel.com>
+ * Tim Chen <tim.c.chen@linux.intel.com>
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <linux/linkage.h>
+#include "sha1_mb_mgr_datastruct.S"
+
+
+.extern sha1_x8_avx2
+
+# LINUX register definitions
+#define arg1 %rdi
+#define arg2 %rsi
+
+# Common definitions
+#define state arg1
+#define job arg2
+#define len2 arg2
+
+# idx must be a register not clobbered by sha1_x8_avx2
+#define idx %r8
+#define DWORD_idx %r8d
+
+#define unused_lanes %rbx
+#define lane_data %rbx
+#define tmp2 %rbx
+#define tmp2_w %ebx
+
+#define job_rax %rax
+#define tmp1 %rax
+#define size_offset %rax
+#define tmp %rax
+#define start_offset %rax
+
+#define tmp3 %arg1
+
+#define extra_blocks %arg2
+#define p %arg2
+
+
+# STACK_SPACE needs to be an odd multiple of 8
+_XMM_SAVE_SIZE = 10*16
+_GPR_SAVE_SIZE = 8*8
+_ALIGN_SIZE = 8
+
+_XMM_SAVE = 0
+_GPR_SAVE = _XMM_SAVE + _XMM_SAVE_SIZE
+STACK_SPACE = _GPR_SAVE + _GPR_SAVE_SIZE + _ALIGN_SIZE
+
+.macro LABEL prefix n
+\prefix\n\():
+.endm
+
+.macro JNE_SKIP i
+jne skip_\i
+.endm
+
+.altmacro
+.macro SET_OFFSET _offset
+offset = \_offset
+.endm
+.noaltmacro
+
+# JOB* sha1_mb_mgr_flush_avx2(MB_MGR *state)
+# arg 1 : rcx : state
+ENTRY(sha1_mb_mgr_flush_avx2)
+ mov %rsp, %r10
+ sub $STACK_SPACE, %rsp
+ and $~31, %rsp
+ mov %rbx, _GPR_SAVE(%rsp)
+ mov %r10, _GPR_SAVE+8*1(%rsp) #save rsp
+ mov %rbp, _GPR_SAVE+8*3(%rsp)
+ mov %r12, _GPR_SAVE+8*4(%rsp)
+ mov %r13, _GPR_SAVE+8*5(%rsp)
+ mov %r14, _GPR_SAVE+8*6(%rsp)
+ mov %r15, _GPR_SAVE+8*7(%rsp)
+
+ # If bit (32+3) is set, then all lanes are empty
+ mov _unused_lanes(state), unused_lanes
+ bt $32+3, unused_lanes
+ jc return_null
+
+ # find a lane with a non-null job
+ xor idx, idx
+ offset = (_ldata + 1 * _LANE_DATA_size + _job_in_lane)
+ cmpq $0, offset(state)
+ cmovne one(%rip), idx
+ offset = (_ldata + 2 * _LANE_DATA_size + _job_in_lane)
+ cmpq $0, offset(state)
+ cmovne two(%rip), idx
+ offset = (_ldata + 3 * _LANE_DATA_size + _job_in_lane)
+ cmpq $0, offset(state)
+ cmovne three(%rip), idx
+ offset = (_ldata + 4 * _LANE_DATA_size + _job_in_lane)
+ cmpq $0, offset(state)
+ cmovne four(%rip), idx
+ offset = (_ldata + 5 * _LANE_DATA_size + _job_in_lane)
+ cmpq $0, offset(state)
+ cmovne five(%rip), idx
+ offset = (_ldata + 6 * _LANE_DATA_size + _job_in_lane)
+ cmpq $0, offset(state)
+ cmovne six(%rip), idx
+ offset = (_ldata + 7 * _LANE_DATA_size + _job_in_lane)
+ cmpq $0, offset(state)
+ cmovne seven(%rip), idx
+
+ # copy idx to empty lanes
+copy_lane_data:
+ offset = (_args + _data_ptr)
+ mov offset(state,idx,8), tmp
+
+ I = 0
+.rep 8
+ offset = (_ldata + I * _LANE_DATA_size + _job_in_lane)
+ cmpq $0, offset(state)
+.altmacro
+ JNE_SKIP %I
+ offset = (_args + _data_ptr + 8*I)
+ mov tmp, offset(state)
+ offset = (_lens + 4*I)
+ movl $0xFFFFFFFF, offset(state)
+LABEL skip_ %I
+ I = (I+1)
+.noaltmacro
+.endr
+
+ # Find min length
+ vmovdqa _lens+0*16(state), %xmm0
+ vmovdqa _lens+1*16(state), %xmm1
+
+ vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A}
+ vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C}
+ vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F}
+ vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E}
+ vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min value in low dword
+
+ vmovd %xmm2, DWORD_idx
+ mov idx, len2
+ and $0xF, idx
+ shr $4, len2
+ jz len_is_0
+
+ vpand clear_low_nibble(%rip), %xmm2, %xmm2
+ vpshufd $0, %xmm2, %xmm2
+
+ vpsubd %xmm2, %xmm0, %xmm0
+ vpsubd %xmm2, %xmm1, %xmm1
+
+ vmovdqa %xmm0, _lens+0*16(state)
+ vmovdqa %xmm1, _lens+1*16(state)
+
+ # "state" and "args" are the same address, arg1
+ # len is arg2
+ call sha1_x8_avx2
+ # state and idx are intact
+
+
+len_is_0:
+ # process completed job "idx"
+ imul $_LANE_DATA_size, idx, lane_data
+ lea _ldata(state, lane_data), lane_data
+
+ mov _job_in_lane(lane_data), job_rax
+ movq $0, _job_in_lane(lane_data)
+ movl $STS_COMPLETED, _status(job_rax)
+ mov _unused_lanes(state), unused_lanes
+ shl $4, unused_lanes
+ or idx, unused_lanes
+ mov unused_lanes, _unused_lanes(state)
+
+ movl $0xFFFFFFFF, _lens(state, idx, 4)
+
+ vmovd _args_digest(state , idx, 4) , %xmm0
+ vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
+ vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
+ vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
+ movl _args_digest+4*32(state, idx, 4), tmp2_w
+
+ vmovdqu %xmm0, _result_digest(job_rax)
+ offset = (_result_digest + 1*16)
+ mov tmp2_w, offset(job_rax)
+
+return:
+
+ mov _GPR_SAVE(%rsp), %rbx
+ mov _GPR_SAVE+8*1(%rsp), %r10 #saved rsp
+ mov _GPR_SAVE+8*3(%rsp), %rbp
+ mov _GPR_SAVE+8*4(%rsp), %r12
+ mov _GPR_SAVE+8*5(%rsp), %r13
+ mov _GPR_SAVE+8*6(%rsp), %r14
+ mov _GPR_SAVE+8*7(%rsp), %r15
+ mov %r10, %rsp
+
+ ret
+
+return_null:
+ xor job_rax, job_rax
+ jmp return
+ENDPROC(sha1_mb_mgr_flush_avx2)
+
+
+#################################################################
+
+.align 16
+ENTRY(sha1_mb_mgr_get_comp_job_avx2)
+ push %rbx
+
+ ## if bit 32+3 is set, then all lanes are empty
+ mov _unused_lanes(state), unused_lanes
+ bt $(32+3), unused_lanes
+ jc .return_null
+
+ # Find min length
+ vmovdqa _lens(state), %xmm0
+ vmovdqa _lens+1*16(state), %xmm1
+
+ vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A}
+ vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C}
+ vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F}
+ vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E}
+ vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min value in low dword
+
+ vmovd %xmm2, DWORD_idx
+ test $~0xF, idx
+ jnz .return_null
+
+ # process completed job "idx"
+ imul $_LANE_DATA_size, idx, lane_data
+ lea _ldata(state, lane_data), lane_data
+
+ mov _job_in_lane(lane_data), job_rax
+ movq $0, _job_in_lane(lane_data)
+ movl $STS_COMPLETED, _status(job_rax)
+ mov _unused_lanes(state), unused_lanes
+ shl $4, unused_lanes
+ or idx, unused_lanes
+ mov unused_lanes, _unused_lanes(state)
+
+ movl $0xFFFFFFFF, _lens(state, idx, 4)
+
+ vmovd _args_digest(state, idx, 4), %xmm0
+ vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
+ vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
+ vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
+ movl _args_digest+4*32(state, idx, 4), tmp2_w
+
+ vmovdqu %xmm0, _result_digest(job_rax)
+ movl tmp2_w, _result_digest+1*16(job_rax)
+
+ pop %rbx
+
+ ret
+
+.return_null:
+ xor job_rax, job_rax
+ pop %rbx
+ ret
+ENDPROC(sha1_mb_mgr_get_comp_job_avx2)
+
+.data
+
+.align 16
+clear_low_nibble:
+.octa 0x000000000000000000000000FFFFFFF0
+one:
+.quad 1
+two:
+.quad 2
+three:
+.quad 3
+four:
+.quad 4
+five:
+.quad 5
+six:
+.quad 6
+seven:
+.quad 7
diff --git a/arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c b/arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c
new file mode 100644
index 000000000000..4ca7e166a2aa
--- /dev/null
+++ b/arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c
@@ -0,0 +1,64 @@
+/*
+ * Initialization code for multi buffer SHA1 algorithm for AVX2
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Contact Information:
+ * Tim Chen <tim.c.chen@linux.intel.com>
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "sha_mb_mgr.h"
+
+void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state)
+{
+ unsigned int j;
+ state->unused_lanes = 0xF76543210;
+ for (j = 0; j < 8; j++) {
+ state->lens[j] = 0xFFFFFFFF;
+ state->ldata[j].job_in_lane = NULL;
+ }
+}
diff --git a/arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S
new file mode 100644
index 000000000000..2ab9560b53c8
--- /dev/null
+++ b/arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S
@@ -0,0 +1,228 @@
+/*
+ * Buffer submit code for multi buffer SHA1 algorithm
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Contact Information:
+ * James Guilford <james.guilford@intel.com>
+ * Tim Chen <tim.c.chen@linux.intel.com>
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/linkage.h>
+#include "sha1_mb_mgr_datastruct.S"
+
+
+.extern sha1_x8_avx
+
+# LINUX register definitions
+arg1 = %rdi
+arg2 = %rsi
+size_offset = %rcx
+tmp2 = %rcx
+extra_blocks = %rdx
+
+# Common definitions
+#define state arg1
+#define job %rsi
+#define len2 arg2
+#define p2 arg2
+
+# idx must be a register not clobberred by sha1_x8_avx2
+idx = %r8
+DWORD_idx = %r8d
+last_len = %r8
+
+p = %r11
+start_offset = %r11
+
+unused_lanes = %rbx
+BYTE_unused_lanes = %bl
+
+job_rax = %rax
+len = %rax
+DWORD_len = %eax
+
+lane = %rbp
+tmp3 = %rbp
+
+tmp = %r9
+DWORD_tmp = %r9d
+
+lane_data = %r10
+
+# STACK_SPACE needs to be an odd multiple of 8
+STACK_SPACE = 8*8 + 16*10 + 8
+
+# JOB* submit_mb_mgr_submit_avx2(MB_MGR *state, job_sha1 *job)
+# arg 1 : rcx : state
+# arg 2 : rdx : job
+ENTRY(sha1_mb_mgr_submit_avx2)
+
+ mov %rsp, %r10
+ sub $STACK_SPACE, %rsp
+ and $~31, %rsp
+
+ mov %rbx, (%rsp)
+ mov %r10, 8*2(%rsp) #save old rsp
+ mov %rbp, 8*3(%rsp)
+ mov %r12, 8*4(%rsp)
+ mov %r13, 8*5(%rsp)
+ mov %r14, 8*6(%rsp)
+ mov %r15, 8*7(%rsp)
+
+ mov _unused_lanes(state), unused_lanes
+ mov unused_lanes, lane
+ and $0xF, lane
+ shr $4, unused_lanes
+ imul $_LANE_DATA_size, lane, lane_data
+ movl $STS_BEING_PROCESSED, _status(job)
+ lea _ldata(state, lane_data), lane_data
+ mov unused_lanes, _unused_lanes(state)
+ movl _len(job), DWORD_len
+
+ mov job, _job_in_lane(lane_data)
+ shl $4, len
+ or lane, len
+
+ movl DWORD_len, _lens(state , lane, 4)
+
+ # Load digest words from result_digest
+ vmovdqu _result_digest(job), %xmm0
+ mov _result_digest+1*16(job), DWORD_tmp
+ vmovd %xmm0, _args_digest(state, lane, 4)
+ vpextrd $1, %xmm0, _args_digest+1*32(state , lane, 4)
+ vpextrd $2, %xmm0, _args_digest+2*32(state , lane, 4)
+ vpextrd $3, %xmm0, _args_digest+3*32(state , lane, 4)
+ movl DWORD_tmp, _args_digest+4*32(state , lane, 4)
+
+ mov _buffer(job), p
+ mov p, _args_data_ptr(state, lane, 8)
+
+ cmp $0xF, unused_lanes
+ jne return_null
+
+start_loop:
+ # Find min length
+ vmovdqa _lens(state), %xmm0
+ vmovdqa _lens+1*16(state), %xmm1
+
+ vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A}
+ vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C}
+ vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F}
+ vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E}
+ vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min value in low dword
+
+ vmovd %xmm2, DWORD_idx
+ mov idx, len2
+ and $0xF, idx
+ shr $4, len2
+ jz len_is_0
+
+ vpand clear_low_nibble(%rip), %xmm2, %xmm2
+ vpshufd $0, %xmm2, %xmm2
+
+ vpsubd %xmm2, %xmm0, %xmm0
+ vpsubd %xmm2, %xmm1, %xmm1
+
+ vmovdqa %xmm0, _lens + 0*16(state)
+ vmovdqa %xmm1, _lens + 1*16(state)
+
+
+ # "state" and "args" are the same address, arg1
+ # len is arg2
+ call sha1_x8_avx2
+
+ # state and idx are intact
+
+len_is_0:
+ # process completed job "idx"
+ imul $_LANE_DATA_size, idx, lane_data
+ lea _ldata(state, lane_data), lane_data
+
+ mov _job_in_lane(lane_data), job_rax
+ mov _unused_lanes(state), unused_lanes
+ movq $0, _job_in_lane(lane_data)
+ movl $STS_COMPLETED, _status(job_rax)
+ shl $4, unused_lanes
+ or idx, unused_lanes
+ mov unused_lanes, _unused_lanes(state)
+
+ movl $0xFFFFFFFF, _lens(state, idx, 4)
+
+ vmovd _args_digest(state, idx, 4), %xmm0
+ vpinsrd $1, _args_digest+1*32(state , idx, 4), %xmm0, %xmm0
+ vpinsrd $2, _args_digest+2*32(state , idx, 4), %xmm0, %xmm0
+ vpinsrd $3, _args_digest+3*32(state , idx, 4), %xmm0, %xmm0
+ movl 4*32(state, idx, 4), DWORD_tmp
+
+ vmovdqu %xmm0, _result_digest(job_rax)
+ movl DWORD_tmp, _result_digest+1*16(job_rax)
+
+return:
+
+ mov (%rsp), %rbx
+ mov 8*2(%rsp), %r10 #save old rsp
+ mov 8*3(%rsp), %rbp
+ mov 8*4(%rsp), %r12
+ mov 8*5(%rsp), %r13
+ mov 8*6(%rsp), %r14
+ mov 8*7(%rsp), %r15
+ mov %r10, %rsp
+
+ ret
+
+return_null:
+ xor job_rax, job_rax
+ jmp return
+
+ENDPROC(sha1_mb_mgr_submit_avx2)
+
+.data
+
+.align 16
+clear_low_nibble:
+ .octa 0x000000000000000000000000FFFFFFF0
diff --git a/arch/x86/crypto/sha-mb/sha1_x8_avx2.S b/arch/x86/crypto/sha-mb/sha1_x8_avx2.S
new file mode 100644
index 000000000000..8e1b47792b31
--- /dev/null
+++ b/arch/x86/crypto/sha-mb/sha1_x8_avx2.S
@@ -0,0 +1,472 @@
+/*
+ * Multi-buffer SHA1 algorithm hash compute routine
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Contact Information:
+ * James Guilford <james.guilford@intel.com>
+ * Tim Chen <tim.c.chen@linux.intel.com>
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/linkage.h>
+#include "sha1_mb_mgr_datastruct.S"
+
+## code to compute oct SHA1 using SSE-256
+## outer calling routine takes care of save and restore of XMM registers
+
+## Function clobbers: rax, rcx, rdx, rbx, rsi, rdi, r9-r15# ymm0-15
+##
+## Linux clobbers: rax rbx rcx rdx rsi r9 r10 r11 r12 r13 r14 r15
+## Linux preserves: rdi rbp r8
+##
+## clobbers ymm0-15
+
+
+# TRANSPOSE8 r0, r1, r2, r3, r4, r5, r6, r7, t0, t1
+# "transpose" data in {r0...r7} using temps {t0...t1}
+# Input looks like: {r0 r1 r2 r3 r4 r5 r6 r7}
+# r0 = {a7 a6 a5 a4 a3 a2 a1 a0}
+# r1 = {b7 b6 b5 b4 b3 b2 b1 b0}
+# r2 = {c7 c6 c5 c4 c3 c2 c1 c0}
+# r3 = {d7 d6 d5 d4 d3 d2 d1 d0}
+# r4 = {e7 e6 e5 e4 e3 e2 e1 e0}
+# r5 = {f7 f6 f5 f4 f3 f2 f1 f0}
+# r6 = {g7 g6 g5 g4 g3 g2 g1 g0}
+# r7 = {h7 h6 h5 h4 h3 h2 h1 h0}
+#
+# Output looks like: {r0 r1 r2 r3 r4 r5 r6 r7}
+# r0 = {h0 g0 f0 e0 d0 c0 b0 a0}
+# r1 = {h1 g1 f1 e1 d1 c1 b1 a1}
+# r2 = {h2 g2 f2 e2 d2 c2 b2 a2}
+# r3 = {h3 g3 f3 e3 d3 c3 b3 a3}
+# r4 = {h4 g4 f4 e4 d4 c4 b4 a4}
+# r5 = {h5 g5 f5 e5 d5 c5 b5 a5}
+# r6 = {h6 g6 f6 e6 d6 c6 b6 a6}
+# r7 = {h7 g7 f7 e7 d7 c7 b7 a7}
+#
+
+.macro TRANSPOSE8 r0 r1 r2 r3 r4 r5 r6 r7 t0 t1
+ # process top half (r0..r3) {a...d}
+ vshufps $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4 b1 b0 a1 a0}
+ vshufps $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6 b3 b2 a3 a2}
+ vshufps $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4 d1 d0 c1 c0}
+ vshufps $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6 d3 d2 c3 c2}
+ vshufps $0xDD, \t1, \t0, \r3 # r3 = {d5 c5 b5 a5 d1 c1 b1 a1}
+ vshufps $0x88, \r2, \r0, \r1 # r1 = {d6 c6 b6 a6 d2 c2 b2 a2}
+ vshufps $0xDD, \r2, \r0, \r0 # r0 = {d7 c7 b7 a7 d3 c3 b3 a3}
+ vshufps $0x88, \t1, \t0, \t0 # t0 = {d4 c4 b4 a4 d0 c0 b0 a0}
+
+ # use r2 in place of t0
+ # process bottom half (r4..r7) {e...h}
+ vshufps $0x44, \r5, \r4, \r2 # r2 = {f5 f4 e5 e4 f1 f0 e1 e0}
+ vshufps $0xEE, \r5, \r4, \r4 # r4 = {f7 f6 e7 e6 f3 f2 e3 e2}
+ vshufps $0x44, \r7, \r6, \t1 # t1 = {h5 h4 g5 g4 h1 h0 g1 g0}
+ vshufps $0xEE, \r7, \r6, \r6 # r6 = {h7 h6 g7 g6 h3 h2 g3 g2}
+ vshufps $0xDD, \t1, \r2, \r7 # r7 = {h5 g5 f5 e5 h1 g1 f1 e1}
+ vshufps $0x88, \r6, \r4, \r5 # r5 = {h6 g6 f6 e6 h2 g2 f2 e2}
+ vshufps $0xDD, \r6, \r4, \r4 # r4 = {h7 g7 f7 e7 h3 g3 f3 e3}
+ vshufps $0x88, \t1, \r2, \t1 # t1 = {h4 g4 f4 e4 h0 g0 f0 e0}
+
+ vperm2f128 $0x13, \r1, \r5, \r6 # h6...a6
+ vperm2f128 $0x02, \r1, \r5, \r2 # h2...a2
+ vperm2f128 $0x13, \r3, \r7, \r5 # h5...a5
+ vperm2f128 $0x02, \r3, \r7, \r1 # h1...a1
+ vperm2f128 $0x13, \r0, \r4, \r7 # h7...a7
+ vperm2f128 $0x02, \r0, \r4, \r3 # h3...a3
+ vperm2f128 $0x13, \t0, \t1, \r4 # h4...a4
+ vperm2f128 $0x02, \t0, \t1, \r0 # h0...a0
+
+.endm
+##
+## Magic functions defined in FIPS 180-1
+##
+# macro MAGIC_F0 F,B,C,D,T ## F = (D ^ (B & (C ^ D)))
+.macro MAGIC_F0 regF regB regC regD regT
+ vpxor \regD, \regC, \regF
+ vpand \regB, \regF, \regF
+ vpxor \regD, \regF, \regF
+.endm
+
+# macro MAGIC_F1 F,B,C,D,T ## F = (B ^ C ^ D)
+.macro MAGIC_F1 regF regB regC regD regT
+ vpxor \regC, \regD, \regF
+ vpxor \regB, \regF, \regF
+.endm
+
+# macro MAGIC_F2 F,B,C,D,T ## F = ((B & C) | (B & D) | (C & D))
+.macro MAGIC_F2 regF regB regC regD regT
+ vpor \regC, \regB, \regF
+ vpand \regC, \regB, \regT
+ vpand \regD, \regF, \regF
+ vpor \regT, \regF, \regF
+.endm
+
+# macro MAGIC_F3 F,B,C,D,T ## F = (B ^ C ^ D)
+.macro MAGIC_F3 regF regB regC regD regT
+ MAGIC_F1 \regF,\regB,\regC,\regD,\regT
+.endm
+
+# PROLD reg, imm, tmp
+.macro PROLD reg imm tmp
+ vpsrld $(32-\imm), \reg, \tmp
+ vpslld $\imm, \reg, \reg
+ vpor \tmp, \reg, \reg
+.endm
+
+.macro PROLD_nd reg imm tmp src
+ vpsrld $(32-\imm), \src, \tmp
+ vpslld $\imm, \src, \reg
+ vpor \tmp, \reg, \reg
+.endm
+
+.macro SHA1_STEP_00_15 regA regB regC regD regE regT regF memW immCNT MAGIC
+ vpaddd \immCNT, \regE, \regE
+ vpaddd \memW*32(%rsp), \regE, \regE
+ PROLD_nd \regT, 5, \regF, \regA
+ vpaddd \regT, \regE, \regE
+ \MAGIC \regF, \regB, \regC, \regD, \regT
+ PROLD \regB, 30, \regT
+ vpaddd \regF, \regE, \regE
+.endm
+
+.macro SHA1_STEP_16_79 regA regB regC regD regE regT regF memW immCNT MAGIC
+ vpaddd \immCNT, \regE, \regE
+ offset = ((\memW - 14) & 15) * 32
+ vmovdqu offset(%rsp), W14
+ vpxor W14, W16, W16
+ offset = ((\memW - 8) & 15) * 32
+ vpxor offset(%rsp), W16, W16
+ offset = ((\memW - 3) & 15) * 32
+ vpxor offset(%rsp), W16, W16
+ vpsrld $(32-1), W16, \regF
+ vpslld $1, W16, W16
+ vpor W16, \regF, \regF
+
+ ROTATE_W
+
+ offset = ((\memW - 0) & 15) * 32
+ vmovdqu \regF, offset(%rsp)
+ vpaddd \regF, \regE, \regE
+ PROLD_nd \regT, 5, \regF, \regA
+ vpaddd \regT, \regE, \regE
+ \MAGIC \regF,\regB,\regC,\regD,\regT ## FUN = MAGIC_Fi(B,C,D)
+ PROLD \regB,30, \regT
+ vpaddd \regF, \regE, \regE
+.endm
+
+########################################################################
+########################################################################
+########################################################################
+
+## FRAMESZ plus pushes must be an odd multiple of 8
+YMM_SAVE = (15-15)*32
+FRAMESZ = 32*16 + YMM_SAVE
+_YMM = FRAMESZ - YMM_SAVE
+
+#define VMOVPS vmovups
+
+IDX = %rax
+inp0 = %r9
+inp1 = %r10
+inp2 = %r11
+inp3 = %r12
+inp4 = %r13
+inp5 = %r14
+inp6 = %r15
+inp7 = %rcx
+arg1 = %rdi
+arg2 = %rsi
+RSP_SAVE = %rdx
+
+# ymm0 A
+# ymm1 B
+# ymm2 C
+# ymm3 D
+# ymm4 E
+# ymm5 F AA
+# ymm6 T0 BB
+# ymm7 T1 CC
+# ymm8 T2 DD
+# ymm9 T3 EE
+# ymm10 T4 TMP
+# ymm11 T5 FUN
+# ymm12 T6 K
+# ymm13 T7 W14
+# ymm14 T8 W15
+# ymm15 T9 W16
+
+
+A = %ymm0
+B = %ymm1
+C = %ymm2
+D = %ymm3
+E = %ymm4
+F = %ymm5
+T0 = %ymm6
+T1 = %ymm7
+T2 = %ymm8
+T3 = %ymm9
+T4 = %ymm10
+T5 = %ymm11
+T6 = %ymm12
+T7 = %ymm13
+T8 = %ymm14
+T9 = %ymm15
+
+AA = %ymm5
+BB = %ymm6
+CC = %ymm7
+DD = %ymm8
+EE = %ymm9
+TMP = %ymm10
+FUN = %ymm11
+K = %ymm12
+W14 = %ymm13
+W15 = %ymm14
+W16 = %ymm15
+
+.macro ROTATE_ARGS
+ TMP_ = E
+ E = D
+ D = C
+ C = B
+ B = A
+ A = TMP_
+.endm
+
+.macro ROTATE_W
+TMP_ = W16
+W16 = W15
+W15 = W14
+W14 = TMP_
+.endm
+
+# 8 streams x 5 32bit words per digest x 4 bytes per word
+#define DIGEST_SIZE (8*5*4)
+
+.align 32
+
+# void sha1_x8_avx2(void **input_data, UINT128 *digest, UINT32 size)
+# arg 1 : pointer to array[4] of pointer to input data
+# arg 2 : size (in blocks) ;; assumed to be >= 1
+#
+ENTRY(sha1_x8_avx2)
+
+ push RSP_SAVE
+
+ #save rsp
+ mov %rsp, RSP_SAVE
+ sub $FRAMESZ, %rsp
+
+ #align rsp to 32 Bytes
+ and $~0x1F, %rsp
+
+ ## Initialize digests
+ vmovdqu 0*32(arg1), A
+ vmovdqu 1*32(arg1), B
+ vmovdqu 2*32(arg1), C
+ vmovdqu 3*32(arg1), D
+ vmovdqu 4*32(arg1), E
+
+ ## transpose input onto stack
+ mov _data_ptr+0*8(arg1),inp0
+ mov _data_ptr+1*8(arg1),inp1
+ mov _data_ptr+2*8(arg1),inp2
+ mov _data_ptr+3*8(arg1),inp3
+ mov _data_ptr+4*8(arg1),inp4
+ mov _data_ptr+5*8(arg1),inp5
+ mov _data_ptr+6*8(arg1),inp6
+ mov _data_ptr+7*8(arg1),inp7
+
+ xor IDX, IDX
+lloop:
+ vmovdqu PSHUFFLE_BYTE_FLIP_MASK(%rip), F
+ I=0
+.rep 2
+ VMOVPS (inp0, IDX), T0
+ VMOVPS (inp1, IDX), T1
+ VMOVPS (inp2, IDX), T2
+ VMOVPS (inp3, IDX), T3
+ VMOVPS (inp4, IDX), T4
+ VMOVPS (inp5, IDX), T5
+ VMOVPS (inp6, IDX), T6
+ VMOVPS (inp7, IDX), T7
+
+ TRANSPOSE8 T0, T1, T2, T3, T4, T5, T6, T7, T8, T9
+ vpshufb F, T0, T0
+ vmovdqu T0, (I*8)*32(%rsp)
+ vpshufb F, T1, T1
+ vmovdqu T1, (I*8+1)*32(%rsp)
+ vpshufb F, T2, T2
+ vmovdqu T2, (I*8+2)*32(%rsp)
+ vpshufb F, T3, T3
+ vmovdqu T3, (I*8+3)*32(%rsp)
+ vpshufb F, T4, T4
+ vmovdqu T4, (I*8+4)*32(%rsp)
+ vpshufb F, T5, T5
+ vmovdqu T5, (I*8+5)*32(%rsp)
+ vpshufb F, T6, T6
+ vmovdqu T6, (I*8+6)*32(%rsp)
+ vpshufb F, T7, T7
+ vmovdqu T7, (I*8+7)*32(%rsp)
+ add $32, IDX
+ I = (I+1)
+.endr
+ # save old digests
+ vmovdqu A,AA
+ vmovdqu B,BB
+ vmovdqu C,CC
+ vmovdqu D,DD
+ vmovdqu E,EE
+
+##
+## perform 0-79 steps
+##
+ vmovdqu K00_19(%rip), K
+## do rounds 0...15
+ I = 0
+.rep 16
+ SHA1_STEP_00_15 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0
+ ROTATE_ARGS
+ I = (I+1)
+.endr
+
+## do rounds 16...19
+ vmovdqu ((16 - 16) & 15) * 32 (%rsp), W16
+ vmovdqu ((16 - 15) & 15) * 32 (%rsp), W15
+.rep 4
+ SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0
+ ROTATE_ARGS
+ I = (I+1)
+.endr
+
+## do rounds 20...39
+ vmovdqu K20_39(%rip), K
+.rep 20
+ SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F1
+ ROTATE_ARGS
+ I = (I+1)
+.endr
+
+## do rounds 40...59
+ vmovdqu K40_59(%rip), K
+.rep 20
+ SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F2
+ ROTATE_ARGS
+ I = (I+1)
+.endr
+
+## do rounds 60...79
+ vmovdqu K60_79(%rip), K
+.rep 20
+ SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F3
+ ROTATE_ARGS
+ I = (I+1)
+.endr
+
+ vpaddd AA,A,A
+ vpaddd BB,B,B
+ vpaddd CC,C,C
+ vpaddd DD,D,D
+ vpaddd EE,E,E
+
+ sub $1, arg2
+ jne lloop
+
+ # write out digests
+ vmovdqu A, 0*32(arg1)
+ vmovdqu B, 1*32(arg1)
+ vmovdqu C, 2*32(arg1)
+ vmovdqu D, 3*32(arg1)
+ vmovdqu E, 4*32(arg1)
+
+ # update input pointers
+ add IDX, inp0
+ add IDX, inp1
+ add IDX, inp2
+ add IDX, inp3
+ add IDX, inp4
+ add IDX, inp5
+ add IDX, inp6
+ add IDX, inp7
+ mov inp0, _data_ptr (arg1)
+ mov inp1, _data_ptr + 1*8(arg1)
+ mov inp2, _data_ptr + 2*8(arg1)
+ mov inp3, _data_ptr + 3*8(arg1)
+ mov inp4, _data_ptr + 4*8(arg1)
+ mov inp5, _data_ptr + 5*8(arg1)
+ mov inp6, _data_ptr + 6*8(arg1)
+ mov inp7, _data_ptr + 7*8(arg1)
+
+ ################
+ ## Postamble
+
+ mov RSP_SAVE, %rsp
+ pop RSP_SAVE
+
+ ret
+ENDPROC(sha1_x8_avx2)
+
+
+.data
+
+.align 32
+K00_19:
+.octa 0x5A8279995A8279995A8279995A827999
+.octa 0x5A8279995A8279995A8279995A827999
+K20_39:
+.octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1
+.octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1
+K40_59:
+.octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC
+.octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC
+K60_79:
+.octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6
+.octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6
+PSHUFFLE_BYTE_FLIP_MASK:
+.octa 0x0c0d0e0f08090a0b0405060700010203
+.octa 0x0c0d0e0f08090a0b0405060700010203
diff --git a/arch/x86/crypto/sha-mb/sha_mb_ctx.h b/arch/x86/crypto/sha-mb/sha_mb_ctx.h
new file mode 100644
index 000000000000..e36069d0c1bd
--- /dev/null
+++ b/arch/x86/crypto/sha-mb/sha_mb_ctx.h
@@ -0,0 +1,136 @@
+/*
+ * Header file for multi buffer SHA context
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Contact Information:
+ * Tim Chen <tim.c.chen@linux.intel.com>
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _SHA_MB_CTX_INTERNAL_H
+#define _SHA_MB_CTX_INTERNAL_H
+
+#include "sha_mb_mgr.h"
+
+#define HASH_UPDATE 0x00
+#define HASH_FIRST 0x01
+#define HASH_LAST 0x02
+#define HASH_ENTIRE 0x03
+#define HASH_DONE 0x04
+#define HASH_FINAL 0x08
+
+#define HASH_CTX_STS_IDLE 0x00
+#define HASH_CTX_STS_PROCESSING 0x01
+#define HASH_CTX_STS_LAST 0x02
+#define HASH_CTX_STS_COMPLETE 0x04
+
+enum hash_ctx_error {
+ HASH_CTX_ERROR_NONE = 0,
+ HASH_CTX_ERROR_INVALID_FLAGS = -1,
+ HASH_CTX_ERROR_ALREADY_PROCESSING = -2,
+ HASH_CTX_ERROR_ALREADY_COMPLETED = -3,
+
+#ifdef HASH_CTX_DEBUG
+ HASH_CTX_ERROR_DEBUG_DIGEST_MISMATCH = -4,
+#endif
+};
+
+
+#define hash_ctx_user_data(ctx) ((ctx)->user_data)
+#define hash_ctx_digest(ctx) ((ctx)->job.result_digest)
+#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING)
+#define hash_ctx_complete(ctx) ((ctx)->status == HASH_CTX_STS_COMPLETE)
+#define hash_ctx_status(ctx) ((ctx)->status)
+#define hash_ctx_error(ctx) ((ctx)->error)
+#define hash_ctx_init(ctx) \
+ do { \
+ (ctx)->error = HASH_CTX_ERROR_NONE; \
+ (ctx)->status = HASH_CTX_STS_COMPLETE; \
+ } while (0)
+
+
+/* Hash Constants and Typedefs */
+#define SHA1_DIGEST_LENGTH 5
+#define SHA1_LOG2_BLOCK_SIZE 6
+
+#define SHA1_PADLENGTHFIELD_SIZE 8
+
+#ifdef SHA_MB_DEBUG
+#define assert(expr) \
+do { \
+ if (unlikely(!(expr))) { \
+ printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \
+ #expr, __FILE__, __func__, __LINE__); \
+ } \
+} while (0)
+#else
+#define assert(expr) do {} while (0)
+#endif
+
+struct sha1_ctx_mgr {
+ struct sha1_mb_mgr mgr;
+};
+
+/* typedef struct sha1_ctx_mgr sha1_ctx_mgr; */
+
+struct sha1_hash_ctx {
+ /* Must be at struct offset 0 */
+ struct job_sha1 job;
+ /* status flag */
+ int status;
+ /* error flag */
+ int error;
+
+ uint32_t total_length;
+ const void *incoming_buffer;
+ uint32_t incoming_buffer_length;
+ uint8_t partial_block_buffer[SHA1_BLOCK_SIZE * 2];
+ uint32_t partial_block_buffer_length;
+ void *user_data;
+};
+
+#endif
diff --git a/arch/x86/crypto/sha-mb/sha_mb_mgr.h b/arch/x86/crypto/sha-mb/sha_mb_mgr.h
new file mode 100644
index 000000000000..08ad1a9acfd7
--- /dev/null
+++ b/arch/x86/crypto/sha-mb/sha_mb_mgr.h
@@ -0,0 +1,110 @@
+/*
+ * Header file for multi buffer SHA1 algorithm manager
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Contact Information:
+ * James Guilford <james.guilford@intel.com>
+ * Tim Chen <tim.c.chen@linux.intel.com>
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef __SHA_MB_MGR_H
+#define __SHA_MB_MGR_H
+
+
+#include <linux/types.h>
+
+#define NUM_SHA1_DIGEST_WORDS 5
+
+enum job_sts { STS_UNKNOWN = 0,
+ STS_BEING_PROCESSED = 1,
+ STS_COMPLETED = 2,
+ STS_INTERNAL_ERROR = 3,
+ STS_ERROR = 4
+};
+
+struct job_sha1 {
+ u8 *buffer;
+ u32 len;
+ u32 result_digest[NUM_SHA1_DIGEST_WORDS] __aligned(32);
+ enum job_sts status;
+ void *user_data;
+};
+
+/* SHA1 out-of-order scheduler */
+
+/* typedef uint32_t sha1_digest_array[5][8]; */
+
+struct sha1_args_x8 {
+ uint32_t digest[5][8];
+ uint8_t *data_ptr[8];
+};
+
+struct sha1_lane_data {
+ struct job_sha1 *job_in_lane;
+};
+
+struct sha1_mb_mgr {
+ struct sha1_args_x8 args;
+
+ uint32_t lens[8];
+
+ /* each byte is index (0...7) of unused lanes */
+ uint64_t unused_lanes;
+ /* byte 4 is set to FF as a flag */
+ struct sha1_lane_data ldata[8];
+};
+
+
+#define SHA1_MB_MGR_NUM_LANES_AVX2 8
+
+void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state);
+struct job_sha1 *sha1_mb_mgr_submit_avx2(struct sha1_mb_mgr *state,
+ struct job_sha1 *job);
+struct job_sha1 *sha1_mb_mgr_flush_avx2(struct sha1_mb_mgr *state);
+struct job_sha1 *sha1_mb_mgr_get_comp_job_avx2(struct sha1_mb_mgr *state);
+
+#endif
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index bb9b258d60e7..094292a63e74 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -202,6 +202,7 @@
#define X86_FEATURE_DECODEASSISTS ( 8*32+12) /* AMD Decode Assists support */
#define X86_FEATURE_PAUSEFILTER ( 8*32+13) /* AMD filtered pause intercept */
#define X86_FEATURE_PFTHRESHOLD ( 8*32+14) /* AMD pause filter threshold */
+#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
@@ -250,8 +251,15 @@
#include <asm/asm.h>
#include <linux/bitops.h>
+#ifdef CONFIG_X86_FEATURE_NAMES
extern const char * const x86_cap_flags[NCAPINTS*32];
extern const char * const x86_power_flags[32];
+#define X86_CAP_FMT "%s"
+#define x86_cap_flag(flag) x86_cap_flags[flag]
+#else
+#define X86_CAP_FMT "%d:%d"
+#define x86_cap_flag(flag) ((flag) >> 5), ((flag) & 31)
+#endif
/*
* In order to save room, we index into this array by doing
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 044a2fd3c5fe..0ec241ede5a2 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -159,30 +159,6 @@ static inline efi_status_t efi_thunk_set_virtual_address_map(
}
#endif /* CONFIG_EFI_MIXED */
-
-/* arch specific definitions used by the stub code */
-
-struct efi_config {
- u64 image_handle;
- u64 table;
- u64 allocate_pool;
- u64 allocate_pages;
- u64 get_memory_map;
- u64 free_pool;
- u64 free_pages;
- u64 locate_handle;
- u64 handle_protocol;
- u64 exit_boot_services;
- u64 text_output;
- efi_status_t (*call)(unsigned long, ...);
- bool is64;
-} __packed;
-
-extern struct efi_config *efi_early;
-
-#define efi_call_early(f, ...) \
- efi_early->call(efi_early->f, __VA_ARGS__);
-
extern bool efi_reboot_required(void);
#else
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index b0910f97a3ea..ffb1733ac91f 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -106,14 +106,14 @@ enum fixed_addresses {
__end_of_permanent_fixed_addresses,
/*
- * 256 temporary boot-time mappings, used by early_ioremap(),
+ * 512 temporary boot-time mappings, used by early_ioremap(),
* before ioremap() is functional.
*
- * If necessary we round it up to the next 256 pages boundary so
+ * If necessary we round it up to the next 512 pages boundary so
* that we can have a single pgd entry and a single pte table:
*/
#define NR_FIX_BTMAPS 64
-#define FIX_BTMAPS_SLOTS 4
+#define FIX_BTMAPS_SLOTS 8
#define TOTAL_FIX_BTMAPS (NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS)
FIX_BTMAP_END =
(__end_of_permanent_fixed_addresses ^
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 478c490f3654..1733ab49ac5e 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -239,6 +239,7 @@ static inline int mp_find_ioapic(u32 gsi) { return 0; }
static inline u32 mp_pin_to_gsi(int ioapic, int pin) { return UINT_MAX; }
static inline int mp_map_gsi_to_irq(u32 gsi, unsigned int flags) { return gsi; }
static inline void mp_unmap_irq(int irq) { }
+static inline bool mp_should_keep_irq(struct device *dev) { return 1; }
static inline int save_ioapic_entries(void)
{
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7c492ed9087b..7d603a71ab3a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -99,10 +99,6 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
#define ASYNC_PF_PER_VCPU 64
-struct kvm_vcpu;
-struct kvm;
-struct kvm_async_pf;
-
enum kvm_reg {
VCPU_REGS_RAX = 0,
VCPU_REGS_RCX = 1,
@@ -266,7 +262,8 @@ struct kvm_mmu {
struct x86_exception *fault);
gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access,
struct x86_exception *exception);
- gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access);
+ gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
+ struct x86_exception *exception);
int (*sync_page)(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp);
void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva);
@@ -481,6 +478,7 @@ struct kvm_vcpu_arch {
u64 mmio_gva;
unsigned access;
gfn_t mmio_gfn;
+ u64 mmio_gen;
struct kvm_pmu pmu;
@@ -576,11 +574,10 @@ struct kvm_arch {
struct kvm_apic_map *apic_map;
unsigned int tss_addr;
- struct page *apic_access_page;
+ bool apic_access_page_done;
gpa_t wall_clock;
- struct page *ept_identity_pagetable;
bool ept_identity_pagetable_done;
gpa_t ept_identity_map_addr;
@@ -665,8 +662,8 @@ struct msr_data {
struct kvm_x86_ops {
int (*cpu_has_kvm_support)(void); /* __init */
int (*disabled_by_bios)(void); /* __init */
- int (*hardware_enable)(void *dummy);
- void (*hardware_disable)(void *dummy);
+ int (*hardware_enable)(void);
+ void (*hardware_disable)(void);
void (*check_processor_compatibility)(void *rtn);
int (*hardware_setup)(void); /* __init */
void (*hardware_unsetup)(void); /* __exit */
@@ -710,7 +707,6 @@ struct kvm_x86_ops {
void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
- void (*fpu_activate)(struct kvm_vcpu *vcpu);
void (*fpu_deactivate)(struct kvm_vcpu *vcpu);
void (*tlb_flush)(struct kvm_vcpu *vcpu);
@@ -740,6 +736,7 @@ struct kvm_x86_ops {
void (*hwapic_isr_update)(struct kvm *kvm, int isr);
void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
+ void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
@@ -772,6 +769,8 @@ struct kvm_x86_ops {
bool (*mpx_supported)(void);
int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
+
+ void (*sched_in)(struct kvm_vcpu *kvm, int cpu);
};
struct kvm_arch_async_pf {
@@ -895,7 +894,6 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault);
int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
gfn_t gfn, void *data, int offset, int len,
u32 access);
-void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault);
bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl);
static inline int __kvm_irq_line_state(unsigned long *irq_state,
@@ -917,7 +915,6 @@ void kvm_inject_nmi(struct kvm_vcpu *vcpu);
int fx_init(struct kvm_vcpu *vcpu);
-void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu);
void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
const u8 *new, int bytes);
int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
@@ -926,7 +923,8 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
int kvm_mmu_load(struct kvm_vcpu *vcpu);
void kvm_mmu_unload(struct kvm_vcpu *vcpu);
void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
-gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access);
+gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
+ struct x86_exception *exception);
gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
struct x86_exception *exception);
gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
@@ -946,7 +944,8 @@ void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu);
void kvm_enable_tdp(void);
void kvm_disable_tdp(void);
-static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access)
+static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
+ struct x86_exception *exception)
{
return gpa;
}
@@ -1037,7 +1036,7 @@ asmlinkage void kvm_spurious_fault(void);
#define KVM_ARCH_WANT_MMU_NOTIFIER
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end);
-int kvm_age_hva(struct kvm *kvm, unsigned long hva);
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
@@ -1046,6 +1045,9 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
void kvm_vcpu_reset(struct kvm_vcpu *vcpu);
+void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu);
+void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
+ unsigned long address);
void kvm_define_shared_msr(unsigned index, u32 msr);
void kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index c7678e43465b..e62cf897f781 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -2,6 +2,7 @@
#define _ASM_X86_KVM_PARA_H
#include <asm/processor.h>
+#include <asm/alternative.h>
#include <uapi/asm/kvm_para.h>
extern void kvmclock_init(void);
@@ -16,10 +17,15 @@ static inline bool kvm_check_and_clear_guest_paused(void)
}
#endif /* CONFIG_KVM_GUEST */
-/* This instruction is vmcall. On non-VT architectures, it will generate a
- * trap that we will then rewrite to the appropriate instruction.
+#ifdef CONFIG_DEBUG_RODATA
+#define KVM_HYPERCALL \
+ ALTERNATIVE(".byte 0x0f,0x01,0xc1", ".byte 0x0f,0x01,0xd9", X86_FEATURE_VMMCALL)
+#else
+/* On AMD processors, vmcall will generate a trap that we will
+ * then rewrite to the appropriate instruction.
*/
#define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1"
+#endif
/* For KVM hypercalls, a three-byte sequence of either the vmcall or the vmmcall
* instruction. The hypervisor may replace it with something else but only the
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 7fd54f09b011..77dcab277710 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -13,10 +13,13 @@ nostackp := $(call cc-option, -fno-stack-protector)
CFLAGS_common.o := $(nostackp)
obj-y := intel_cacheinfo.o scattered.o topology.o
-obj-y += proc.o capflags.o powerflags.o common.o
+obj-y += common.o
obj-y += rdrand.o
obj-y += match.o
+obj-$(CONFIG_PROC_FS) += proc.o
+obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
+
obj-$(CONFIG_X86_32) += bugs.o
obj-$(CONFIG_X86_64) += bugs_64.o
@@ -48,6 +51,7 @@ obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o perf_event_amd_ibs.o
obj-$(CONFIG_HYPERVISOR_GUEST) += vmware.o hypervisor.o mshyperv.o
+ifdef CONFIG_X86_FEATURE_NAMES
quiet_cmd_mkcapflags = MKCAP $@
cmd_mkcapflags = $(CONFIG_SHELL) $(srctree)/$(src)/mkcapflags.sh $< $@
@@ -56,3 +60,4 @@ cpufeature = $(src)/../../include/asm/cpufeature.h
targets += capflags.c
$(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.sh FORCE
$(call if_changed,mkcapflags)
+endif
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 60e5497681f5..813d29d00a17 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -525,6 +525,13 @@ static void early_init_amd(struct cpuinfo_x86 *c)
}
#endif
+ /*
+ * This is only needed to tell the kernel whether to use VMCALL
+ * and VMMCALL. VMMCALL is never executed except under virt, so
+ * we can set it unconditionally.
+ */
+ set_cpu_cap(c, X86_FEATURE_VMMCALL);
+
/* F16h erratum 793, CVE-2013-6885 */
if (c->x86 == 0x16 && c->x86_model <= 0xf)
msr_set_bit(MSR_AMD64_LS_CFG, 15);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index e4ab2b42bd6f..c649f236e288 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -346,8 +346,8 @@ static void filter_cpuid_features(struct cpuinfo_x86 *c, bool warn)
continue;
printk(KERN_WARNING
- "CPU: CPU feature %s disabled, no CPUID level 0x%x\n",
- x86_cap_flags[df->feature], df->level);
+ "CPU: CPU feature " X86_CAP_FMT " disabled, no CPUID level 0x%x\n",
+ x86_cap_flag(df->feature), df->level);
}
}
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index f304773285ae..f1314d0bcf0a 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -338,8 +338,10 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
* a relative jump.
*/
rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE;
- if (abs(rel) > 0x7fffffff)
+ if (abs(rel) > 0x7fffffff) {
+ __arch_remove_optimized_kprobe(op, 0);
return -ERANGE;
+ }
buf = (u8 *)op->optinsn.insn;
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 2d872e08fab9..42a2dca984b3 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1284,6 +1284,9 @@ static void remove_siblinginfo(int cpu)
for_each_cpu(sibling, cpu_sibling_mask(cpu))
cpumask_clear_cpu(cpu, cpu_sibling_mask(sibling));
+ for_each_cpu(sibling, cpu_llc_shared_mask(cpu))
+ cpumask_clear_cpu(cpu, cpu_llc_shared_mask(sibling));
+ cpumask_clear(cpu_llc_shared_mask(cpu));
cpumask_clear(cpu_sibling_mask(cpu));
cpumask_clear(cpu_core_mask(cpu));
c->phys_proc_id = 0;
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 38a0afe83c6b..976e3a57f9ea 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -53,14 +53,14 @@ u64 kvm_supported_xcr0(void)
return xcr0;
}
-void kvm_update_cpuid(struct kvm_vcpu *vcpu)
+int kvm_update_cpuid(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
struct kvm_lapic *apic = vcpu->arch.apic;
best = kvm_find_cpuid_entry(vcpu, 1, 0);
if (!best)
- return;
+ return 0;
/* Update OSXSAVE bit */
if (cpu_has_xsave && best->function == 0x1) {
@@ -88,7 +88,17 @@ void kvm_update_cpuid(struct kvm_vcpu *vcpu)
xstate_required_size(vcpu->arch.xcr0);
}
+ /*
+ * The existing code assumes virtual address is 48-bit in the canonical
+ * address checks; exit if it is ever changed.
+ */
+ best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
+ if (best && ((best->eax & 0xff00) >> 8) != 48 &&
+ ((best->eax & 0xff00) >> 8) != 0)
+ return -EINVAL;
+
kvm_pmu_cpuid_update(vcpu);
+ return 0;
}
static int is_efer_nx(void)
@@ -112,8 +122,8 @@ static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
break;
}
}
- if (entry && (entry->edx & (1 << 20)) && !is_efer_nx()) {
- entry->edx &= ~(1 << 20);
+ if (entry && (entry->edx & bit(X86_FEATURE_NX)) && !is_efer_nx()) {
+ entry->edx &= ~bit(X86_FEATURE_NX);
printk(KERN_INFO "kvm: guest NX capability removed\n");
}
}
@@ -151,10 +161,9 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
}
vcpu->arch.cpuid_nent = cpuid->nent;
cpuid_fix_nx_cap(vcpu);
- r = 0;
kvm_apic_set_version(vcpu);
kvm_x86_ops->cpuid_update(vcpu);
- kvm_update_cpuid(vcpu);
+ r = kvm_update_cpuid(vcpu);
out_free:
vfree(cpuid_entries);
@@ -178,9 +187,7 @@ int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
vcpu->arch.cpuid_nent = cpuid->nent;
kvm_apic_set_version(vcpu);
kvm_x86_ops->cpuid_update(vcpu);
- kvm_update_cpuid(vcpu);
- return 0;
-
+ r = kvm_update_cpuid(vcpu);
out:
return r;
}
@@ -767,6 +774,12 @@ void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
if (!best)
best = check_cpuid_limit(vcpu, function, index);
+ /*
+ * Perfmon not yet supported for L2 guest.
+ */
+ if (is_guest_mode(vcpu) && function == 0xa)
+ best = NULL;
+
if (best) {
*eax = best->eax;
*ebx = best->ebx;
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index a5380590ab0e..4452eedfaedd 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -3,7 +3,7 @@
#include "x86.h"
-void kvm_update_cpuid(struct kvm_vcpu *vcpu);
+int kvm_update_cpuid(struct kvm_vcpu *vcpu);
struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
u32 function, u32 index);
int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
@@ -88,6 +88,14 @@ static inline bool guest_cpuid_has_x2apic(struct kvm_vcpu *vcpu)
return best && (best->ecx & bit(X86_FEATURE_X2APIC));
}
+static inline bool guest_cpuid_is_amd(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry(vcpu, 0, 0);
+ return best && best->ebx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx;
+}
+
static inline bool guest_cpuid_has_gbpages(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 03954f7900f5..a46207a05835 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -527,6 +527,7 @@ static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
u32 error, bool valid)
{
+ WARN_ON(vec > 0x1f);
ctxt->exception.vector = vec;
ctxt->exception.error_code = error;
ctxt->exception.error_code_valid = valid;
@@ -1468,7 +1469,7 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
return ret;
err_code = selector & 0xfffc;
- err_vec = GP_VECTOR;
+ err_vec = in_task_switch ? TS_VECTOR : GP_VECTOR;
/* can't load system descriptor into segment selector */
if (seg <= VCPU_SREG_GS && !seg_desc.s)
@@ -1503,6 +1504,15 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
if (rpl > cpl || dpl != cpl)
goto exception;
}
+ /* in long-mode d/b must be clear if l is set */
+ if (seg_desc.d && seg_desc.l) {
+ u64 efer = 0;
+
+ ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
+ if (efer & EFER_LMA)
+ goto exception;
+ }
+
/* CS(RPL) <- CPL */
selector = (selector & 0xfffc) | cpl;
break;
@@ -1549,8 +1559,7 @@ load:
ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
return X86EMUL_CONTINUE;
exception:
- emulate_exception(ctxt, err_vec, err_code, true);
- return X86EMUL_PROPAGATE_FAULT;
+ return emulate_exception(ctxt, err_vec, err_code, true);
}
static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
@@ -2723,8 +2732,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
if (!next_tss_desc.p ||
((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
desc_limit < 0x2b)) {
- emulate_ts(ctxt, tss_selector & 0xfffc);
- return X86EMUL_PROPAGATE_FAULT;
+ return emulate_ts(ctxt, tss_selector & 0xfffc);
}
if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
@@ -3016,7 +3024,7 @@ static int em_movbe(struct x86_emulate_ctxt *ctxt)
ctxt->dst.val = swab64(ctxt->src.val);
break;
default:
- return X86EMUL_PROPAGATE_FAULT;
+ BUG();
}
return X86EMUL_CONTINUE;
}
@@ -3140,12 +3148,8 @@ static int em_clts(struct x86_emulate_ctxt *ctxt)
static int em_vmcall(struct x86_emulate_ctxt *ctxt)
{
- int rc;
-
- if (ctxt->modrm_mod != 3 || ctxt->modrm_rm != 1)
- return X86EMUL_UNHANDLEABLE;
+ int rc = ctxt->ops->fix_hypercall(ctxt);
- rc = ctxt->ops->fix_hypercall(ctxt);
if (rc != X86EMUL_CONTINUE)
return rc;
@@ -3563,6 +3567,12 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \
F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
+static const struct opcode group7_rm0[] = {
+ N,
+ I(SrcNone | Priv | EmulateOnUD, em_vmcall),
+ N, N, N, N, N, N,
+};
+
static const struct opcode group7_rm1[] = {
DI(SrcNone | Priv, monitor),
DI(SrcNone | Priv, mwait),
@@ -3656,7 +3666,7 @@ static const struct group_dual group7 = { {
II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
II(SrcMem | ByteOp | Priv | NoAccess, em_invlpg, invlpg),
}, {
- I(SrcNone | Priv | EmulateOnUD, em_vmcall),
+ EXT(0, group7_rm0),
EXT(0, group7_rm1),
N, EXT(0, group7_rm3),
II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
@@ -3687,14 +3697,18 @@ static const struct gprefix pfx_0f_6f_0f_7f = {
I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
};
-static const struct gprefix pfx_vmovntpx = {
- I(0, em_mov), N, N, N,
+static const struct gprefix pfx_0f_2b = {
+ I(0, em_mov), I(0, em_mov), N, N,
};
static const struct gprefix pfx_0f_28_0f_29 = {
I(Aligned, em_mov), I(Aligned, em_mov), N, N,
};
+static const struct gprefix pfx_0f_e7 = {
+ N, I(Sse, em_mov), N, N,
+};
+
static const struct escape escape_d9 = { {
N, N, N, N, N, N, N, I(DstMem, em_fnstcw),
}, {
@@ -3901,7 +3915,7 @@ static const struct opcode twobyte_table[256] = {
N, N, N, N,
GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_28_0f_29),
GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_28_0f_29),
- N, GP(ModRM | DstMem | SrcReg | Sse | Mov | Aligned, &pfx_vmovntpx),
+ N, GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_2b),
N, N, N, N,
/* 0x30 - 0x3F */
II(ImplicitOps | Priv, em_wrmsr, wrmsr),
@@ -3965,7 +3979,8 @@ static const struct opcode twobyte_table[256] = {
/* 0xD0 - 0xDF */
N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
/* 0xE0 - 0xEF */
- N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
+ N, N, N, N, N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_e7),
+ N, N, N, N, N, N, N, N,
/* 0xF0 - 0xFF */
N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
};
@@ -4829,8 +4844,10 @@ writeback:
ctxt->eip = ctxt->_eip;
done:
- if (rc == X86EMUL_PROPAGATE_FAULT)
+ if (rc == X86EMUL_PROPAGATE_FAULT) {
+ WARN_ON(ctxt->exception.vector > 0x1f);
ctxt->have_exception = true;
+ }
if (rc == X86EMUL_INTERCEPTED)
return EMULATION_INTERCEPTED;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 08e8a899e005..b8345dd41b25 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -112,17 +112,6 @@ static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
struct static_key_deferred apic_hw_disabled __read_mostly;
struct static_key_deferred apic_sw_disabled __read_mostly;
-static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
-{
- if ((kvm_apic_get_reg(apic, APIC_SPIV) ^ val) & APIC_SPIV_APIC_ENABLED) {
- if (val & APIC_SPIV_APIC_ENABLED)
- static_key_slow_dec_deferred(&apic_sw_disabled);
- else
- static_key_slow_inc(&apic_sw_disabled.key);
- }
- apic_set_reg(apic, APIC_SPIV, val);
-}
-
static inline int apic_enabled(struct kvm_lapic *apic)
{
return kvm_apic_sw_enabled(apic) && kvm_apic_hw_enabled(apic);
@@ -210,6 +199,20 @@ out:
kvm_vcpu_request_scan_ioapic(kvm);
}
+static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
+{
+ u32 prev = kvm_apic_get_reg(apic, APIC_SPIV);
+
+ apic_set_reg(apic, APIC_SPIV, val);
+ if ((prev ^ val) & APIC_SPIV_APIC_ENABLED) {
+ if (val & APIC_SPIV_APIC_ENABLED) {
+ static_key_slow_dec_deferred(&apic_sw_disabled);
+ recalculate_apic_map(apic->vcpu->kvm);
+ } else
+ static_key_slow_inc(&apic_sw_disabled.key);
+ }
+}
+
static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id)
{
apic_set_reg(apic, APIC_ID, id << 24);
@@ -706,6 +709,8 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
int result = 0;
struct kvm_vcpu *vcpu = apic->vcpu;
+ trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
+ trig_mode, vector);
switch (delivery_mode) {
case APIC_DM_LOWEST:
vcpu->arch.apic_arb_prio++;
@@ -727,8 +732,6 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
kvm_make_request(KVM_REQ_EVENT, vcpu);
kvm_vcpu_kick(vcpu);
}
- trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
- trig_mode, vector, false);
break;
case APIC_DM_REMRD:
@@ -1352,6 +1355,9 @@ void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
return;
hrtimer_cancel(&apic->lapic_timer.timer);
+ /* Inject here so clearing tscdeadline won't override new value */
+ if (apic_has_pending_timer(vcpu))
+ kvm_inject_apic_timer_irqs(vcpu);
apic->lapic_timer.tscdeadline = data;
start_apic_timer(apic);
}
@@ -1639,6 +1645,8 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
if (atomic_read(&apic->lapic_timer.pending) > 0) {
kvm_apic_local_deliver(apic, APIC_LVTT);
+ if (apic_lvtt_tscdeadline(apic))
+ apic->lapic_timer.tscdeadline = 0;
atomic_set(&apic->lapic_timer.pending, 0);
}
}
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 931467881da7..3201e93ebd07 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -199,16 +199,20 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask)
EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
/*
- * spte bits of bit 3 ~ bit 11 are used as low 9 bits of generation number,
- * the bits of bits 52 ~ bit 61 are used as high 10 bits of generation
- * number.
+ * the low bit of the generation number is always presumed to be zero.
+ * This disables mmio caching during memslot updates. The concept is
+ * similar to a seqcount but instead of retrying the access we just punt
+ * and ignore the cache.
+ *
+ * spte bits 3-11 are used as bits 1-9 of the generation number,
+ * the bits 52-61 are used as bits 10-19 of the generation number.
*/
-#define MMIO_SPTE_GEN_LOW_SHIFT 3
+#define MMIO_SPTE_GEN_LOW_SHIFT 2
#define MMIO_SPTE_GEN_HIGH_SHIFT 52
-#define MMIO_GEN_SHIFT 19
-#define MMIO_GEN_LOW_SHIFT 9
-#define MMIO_GEN_LOW_MASK ((1 << MMIO_GEN_LOW_SHIFT) - 1)
+#define MMIO_GEN_SHIFT 20
+#define MMIO_GEN_LOW_SHIFT 10
+#define MMIO_GEN_LOW_MASK ((1 << MMIO_GEN_LOW_SHIFT) - 2)
#define MMIO_GEN_MASK ((1 << MMIO_GEN_SHIFT) - 1)
#define MMIO_MAX_GEN ((1 << MMIO_GEN_SHIFT) - 1)
@@ -236,12 +240,7 @@ static unsigned int get_mmio_spte_generation(u64 spte)
static unsigned int kvm_current_mmio_generation(struct kvm *kvm)
{
- /*
- * Init kvm generation close to MMIO_MAX_GEN to easily test the
- * code of handling generation number wrap-around.
- */
- return (kvm_memslots(kvm)->generation +
- MMIO_MAX_GEN - 150) & MMIO_GEN_MASK;
+ return kvm_memslots(kvm)->generation & MMIO_GEN_MASK;
}
static void mark_mmio_spte(struct kvm *kvm, u64 *sptep, u64 gfn,
@@ -296,11 +295,6 @@ static bool check_mmio_spte(struct kvm *kvm, u64 spte)
return likely(kvm_gen == spte_gen);
}
-static inline u64 rsvd_bits(int s, int e)
-{
- return ((1ULL << (e - s + 1)) - 1) << s;
-}
-
void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
u64 dirty_mask, u64 nx_mask, u64 x_mask)
{
@@ -1180,7 +1174,7 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep)
* Write-protect on the specified @sptep, @pt_protect indicates whether
* spte write-protection is caused by protecting shadow page table.
*
- * Note: write protection is difference between drity logging and spte
+ * Note: write protection is difference between dirty logging and spte
* protection:
* - for dirty logging, the spte can be set to writable at anytime if
* its dirty bitmap is properly set.
@@ -1268,7 +1262,8 @@ static bool rmap_write_protect(struct kvm *kvm, u64 gfn)
}
static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
- struct kvm_memory_slot *slot, unsigned long data)
+ struct kvm_memory_slot *slot, gfn_t gfn, int level,
+ unsigned long data)
{
u64 *sptep;
struct rmap_iterator iter;
@@ -1276,7 +1271,8 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
while ((sptep = rmap_get_first(*rmapp, &iter))) {
BUG_ON(!(*sptep & PT_PRESENT_MASK));
- rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", sptep, *sptep);
+ rmap_printk("kvm_rmap_unmap_hva: spte %p %llx gfn %llx (%d)\n",
+ sptep, *sptep, gfn, level);
drop_spte(kvm, sptep);
need_tlb_flush = 1;
@@ -1286,7 +1282,8 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
}
static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
- struct kvm_memory_slot *slot, unsigned long data)
+ struct kvm_memory_slot *slot, gfn_t gfn, int level,
+ unsigned long data)
{
u64 *sptep;
struct rmap_iterator iter;
@@ -1300,7 +1297,8 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
BUG_ON(!is_shadow_present_pte(*sptep));
- rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", sptep, *sptep);
+ rmap_printk("kvm_set_pte_rmapp: spte %p %llx gfn %llx (%d)\n",
+ sptep, *sptep, gfn, level);
need_flush = 1;
@@ -1334,6 +1332,8 @@ static int kvm_handle_hva_range(struct kvm *kvm,
int (*handler)(struct kvm *kvm,
unsigned long *rmapp,
struct kvm_memory_slot *slot,
+ gfn_t gfn,
+ int level,
unsigned long data))
{
int j;
@@ -1363,6 +1363,7 @@ static int kvm_handle_hva_range(struct kvm *kvm,
j < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++j) {
unsigned long idx, idx_end;
unsigned long *rmapp;
+ gfn_t gfn = gfn_start;
/*
* {idx(page_j) | page_j intersects with
@@ -1373,8 +1374,10 @@ static int kvm_handle_hva_range(struct kvm *kvm,
rmapp = __gfn_to_rmap(gfn_start, j, memslot);
- for (; idx <= idx_end; ++idx)
- ret |= handler(kvm, rmapp++, memslot, data);
+ for (; idx <= idx_end;
+ ++idx, gfn += (1UL << KVM_HPAGE_GFN_SHIFT(j)))
+ ret |= handler(kvm, rmapp++, memslot,
+ gfn, j, data);
}
}
@@ -1385,6 +1388,7 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
unsigned long data,
int (*handler)(struct kvm *kvm, unsigned long *rmapp,
struct kvm_memory_slot *slot,
+ gfn_t gfn, int level,
unsigned long data))
{
return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler);
@@ -1406,24 +1410,14 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
}
static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
- struct kvm_memory_slot *slot, unsigned long data)
+ struct kvm_memory_slot *slot, gfn_t gfn, int level,
+ unsigned long data)
{
u64 *sptep;
struct rmap_iterator uninitialized_var(iter);
int young = 0;
- /*
- * In case of absence of EPT Access and Dirty Bits supports,
- * emulate the accessed bit for EPT, by checking if this page has
- * an EPT mapping, and clearing it if it does. On the next access,
- * a new EPT mapping will be established.
- * This has some overhead, but not as much as the cost of swapping
- * out actively used pages or breaking up actively used hugepages.
- */
- if (!shadow_accessed_mask) {
- young = kvm_unmap_rmapp(kvm, rmapp, slot, data);
- goto out;
- }
+ BUG_ON(!shadow_accessed_mask);
for (sptep = rmap_get_first(*rmapp, &iter); sptep;
sptep = rmap_get_next(&iter)) {
@@ -1435,14 +1429,13 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
(unsigned long *)sptep);
}
}
-out:
- /* @data has hva passed to kvm_age_hva(). */
- trace_kvm_age_page(data, slot, young);
+ trace_kvm_age_page(gfn, level, slot, young);
return young;
}
static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
- struct kvm_memory_slot *slot, unsigned long data)
+ struct kvm_memory_slot *slot, gfn_t gfn,
+ int level, unsigned long data)
{
u64 *sptep;
struct rmap_iterator iter;
@@ -1480,13 +1473,33 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
- kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, 0);
+ kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, gfn, sp->role.level, 0);
kvm_flush_remote_tlbs(vcpu->kvm);
}
-int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
{
- return kvm_handle_hva(kvm, hva, hva, kvm_age_rmapp);
+ /*
+ * In case of absence of EPT Access and Dirty Bits supports,
+ * emulate the accessed bit for EPT, by checking if this page has
+ * an EPT mapping, and clearing it if it does. On the next access,
+ * a new EPT mapping will be established.
+ * This has some overhead, but not as much as the cost of swapping
+ * out actively used pages or breaking up actively used hugepages.
+ */
+ if (!shadow_accessed_mask) {
+ /*
+ * We are holding the kvm->mmu_lock, and we are blowing up
+ * shadow PTEs. MMU notifier consumers need to be kept at bay.
+ * This is correct as long as we don't decouple the mmu_lock
+ * protected regions (like invalidate_range_start|end does).
+ */
+ kvm->mmu_notifier_seq++;
+ return kvm_handle_hva_range(kvm, start, end, 0,
+ kvm_unmap_rmapp);
+ }
+
+ return kvm_handle_hva_range(kvm, start, end, 0, kvm_age_rmapp);
}
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
@@ -1749,7 +1762,7 @@ static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
return 1;
}
- kvm_mmu_flush_tlb(vcpu);
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
return 0;
}
@@ -1802,7 +1815,7 @@ static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn)
kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
if (flush)
- kvm_mmu_flush_tlb(vcpu);
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
}
struct mmu_page_path {
@@ -2536,7 +2549,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
true, host_writable)) {
if (write_fault)
*emulate = 1;
- kvm_mmu_flush_tlb(vcpu);
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
}
if (unlikely(is_mmio_spte(*sptep) && emulate))
@@ -3163,7 +3176,7 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu)
if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
return;
- vcpu_clear_mmio_info(vcpu, ~0ul);
+ vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) {
hpa_t root = vcpu->arch.mmu.root_hpa;
@@ -3206,7 +3219,7 @@ static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr,
{
if (exception)
exception->error_code = 0;
- return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access);
+ return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access, exception);
}
static bool quickly_check_mmio_pf(struct kvm_vcpu *vcpu, u64 addr, bool direct)
@@ -3450,13 +3463,6 @@ static void nonpaging_init_context(struct kvm_vcpu *vcpu,
context->nx = false;
}
-void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
-{
- ++vcpu->stat.tlb_flush;
- kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
-}
-EXPORT_SYMBOL_GPL(kvm_mmu_flush_tlb);
-
void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu)
{
mmu_free_roots(vcpu);
@@ -3518,6 +3524,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
int maxphyaddr = cpuid_maxphyaddr(vcpu);
u64 exb_bit_rsvd = 0;
u64 gbpages_bit_rsvd = 0;
+ u64 nonleaf_bit8_rsvd = 0;
context->bad_mt_xwr = 0;
@@ -3525,6 +3532,14 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
exb_bit_rsvd = rsvd_bits(63, 63);
if (!guest_cpuid_has_gbpages(vcpu))
gbpages_bit_rsvd = rsvd_bits(7, 7);
+
+ /*
+ * Non-leaf PML4Es and PDPEs reserve bit 8 (which would be the G bit for
+ * leaf entries) on AMD CPUs only.
+ */
+ if (guest_cpuid_is_amd(vcpu))
+ nonleaf_bit8_rsvd = rsvd_bits(8, 8);
+
switch (context->root_level) {
case PT32_ROOT_LEVEL:
/* no rsvd bits for 2 level 4K page table entries */
@@ -3559,9 +3574,9 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
break;
case PT64_ROOT_LEVEL:
context->rsvd_bits_mask[0][3] = exb_bit_rsvd |
- rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 7);
+ nonleaf_bit8_rsvd | rsvd_bits(7, 7) | rsvd_bits(maxphyaddr, 51);
context->rsvd_bits_mask[0][2] = exb_bit_rsvd |
- gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51);
+ nonleaf_bit8_rsvd | gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51);
context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
rsvd_bits(maxphyaddr, 51);
context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
@@ -3962,7 +3977,7 @@ static void mmu_pte_write_flush_tlb(struct kvm_vcpu *vcpu, bool zap_page,
if (remote_flush)
kvm_flush_remote_tlbs(vcpu->kvm);
else if (local_flush)
- kvm_mmu_flush_tlb(vcpu);
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
}
static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
@@ -4223,7 +4238,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
{
vcpu->arch.mmu.invlpg(vcpu, gva);
- kvm_mmu_flush_tlb(vcpu);
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
++vcpu->stat.invlpg;
}
EXPORT_SYMBOL_GPL(kvm_mmu_invlpg);
@@ -4433,7 +4448,7 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm)
* The very rare case: if the generation-number is round,
* zap all shadow pages.
*/
- if (unlikely(kvm_current_mmio_generation(kvm) >= MMIO_MAX_GEN)) {
+ if (unlikely(kvm_current_mmio_generation(kvm) == 0)) {
printk_ratelimited(KERN_INFO "kvm: zapping shadow pages for mmio generation wraparound\n");
kvm_mmu_invalidate_zap_all_pages(kvm);
}
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index b982112d2ca5..bde8ee725754 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -56,6 +56,11 @@
#define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT)
#define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT)
+static inline u64 rsvd_bits(int s, int e)
+{
+ return ((1ULL << (e - s + 1)) - 1) << s;
+}
+
int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]);
void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask);
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 410776528265..806d58e3c320 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -298,8 +298,7 @@ retry_walk:
}
#endif
walker->max_level = walker->level;
- ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
- (mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0);
+ ASSERT(!is_long_mode(vcpu) && is_pae(vcpu));
accessed_dirty = PT_GUEST_ACCESSED_MASK;
pt_access = pte_access = ACC_ALL;
@@ -321,9 +320,22 @@ retry_walk:
walker->pte_gpa[walker->level - 1] = pte_gpa;
real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn),
- PFERR_USER_MASK|PFERR_WRITE_MASK);
+ PFERR_USER_MASK|PFERR_WRITE_MASK,
+ &walker->fault);
+
+ /*
+ * FIXME: This can happen if emulation (for of an INS/OUTS
+ * instruction) triggers a nested page fault. The exit
+ * qualification / exit info field will incorrectly have
+ * "guest page access" as the nested page fault's cause,
+ * instead of "guest page structure access". To fix this,
+ * the x86_exception struct should be augmented with enough
+ * information to fix the exit_qualification or exit_info_1
+ * fields.
+ */
if (unlikely(real_gfn == UNMAPPED_GVA))
- goto error;
+ return 0;
+
real_gfn = gpa_to_gfn(real_gfn);
host_addr = gfn_to_hva_prot(vcpu->kvm, real_gfn,
@@ -364,7 +376,7 @@ retry_walk:
if (PTTYPE == 32 && walker->level == PT_DIRECTORY_LEVEL && is_cpuid_PSE36())
gfn += pse36_gfn_delta(pte);
- real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), access);
+ real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), access, &walker->fault);
if (real_gpa == UNMAPPED_GVA)
return 0;
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 3dd6accb64ec..8e6b7d869d2f 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -15,6 +15,7 @@
#include <linux/types.h>
#include <linux/kvm_host.h>
#include <linux/perf_event.h>
+#include <asm/perf_event.h>
#include "x86.h"
#include "cpuid.h"
#include "lapic.h"
@@ -463,7 +464,8 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu)
{
struct kvm_pmu *pmu = &vcpu->arch.pmu;
struct kvm_cpuid_entry2 *entry;
- unsigned bitmap_len;
+ union cpuid10_eax eax;
+ union cpuid10_edx edx;
pmu->nr_arch_gp_counters = 0;
pmu->nr_arch_fixed_counters = 0;
@@ -475,25 +477,27 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu)
entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
if (!entry)
return;
+ eax.full = entry->eax;
+ edx.full = entry->edx;
- pmu->version = entry->eax & 0xff;
+ pmu->version = eax.split.version_id;
if (!pmu->version)
return;
- pmu->nr_arch_gp_counters = min((int)(entry->eax >> 8) & 0xff,
- INTEL_PMC_MAX_GENERIC);
- pmu->counter_bitmask[KVM_PMC_GP] =
- ((u64)1 << ((entry->eax >> 16) & 0xff)) - 1;
- bitmap_len = (entry->eax >> 24) & 0xff;
- pmu->available_event_types = ~entry->ebx & ((1ull << bitmap_len) - 1);
+ pmu->nr_arch_gp_counters = min_t(int, eax.split.num_counters,
+ INTEL_PMC_MAX_GENERIC);
+ pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << eax.split.bit_width) - 1;
+ pmu->available_event_types = ~entry->ebx &
+ ((1ull << eax.split.mask_length) - 1);
if (pmu->version == 1) {
pmu->nr_arch_fixed_counters = 0;
} else {
- pmu->nr_arch_fixed_counters = min((int)(entry->edx & 0x1f),
+ pmu->nr_arch_fixed_counters =
+ min_t(int, edx.split.num_counters_fixed,
INTEL_PMC_MAX_FIXED);
pmu->counter_bitmask[KVM_PMC_FIXED] =
- ((u64)1 << ((entry->edx >> 5) & 0xff)) - 1;
+ ((u64)1 << edx.split.bit_width_fixed) - 1;
}
pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index ddf742768ecf..f7f6a4a157a6 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -622,7 +622,7 @@ static int has_svm(void)
return 1;
}
-static void svm_hardware_disable(void *garbage)
+static void svm_hardware_disable(void)
{
/* Make sure we clean up behind us */
if (static_cpu_has(X86_FEATURE_TSCRATEMSR))
@@ -633,7 +633,7 @@ static void svm_hardware_disable(void *garbage)
amd_pmu_disable_virt();
}
-static int svm_hardware_enable(void *garbage)
+static int svm_hardware_enable(void)
{
struct svm_cpu_data *sd;
@@ -1257,7 +1257,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
svm->asid_generation = 0;
init_vmcb(svm);
- svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
+ svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE |
+ MSR_IA32_APICBASE_ENABLE;
if (kvm_vcpu_is_bsp(&svm->vcpu))
svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
@@ -1974,10 +1975,26 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
{
struct vcpu_svm *svm = to_svm(vcpu);
- svm->vmcb->control.exit_code = SVM_EXIT_NPF;
- svm->vmcb->control.exit_code_hi = 0;
- svm->vmcb->control.exit_info_1 = fault->error_code;
- svm->vmcb->control.exit_info_2 = fault->address;
+ if (svm->vmcb->control.exit_code != SVM_EXIT_NPF) {
+ /*
+ * TODO: track the cause of the nested page fault, and
+ * correctly fill in the high bits of exit_info_1.
+ */
+ svm->vmcb->control.exit_code = SVM_EXIT_NPF;
+ svm->vmcb->control.exit_code_hi = 0;
+ svm->vmcb->control.exit_info_1 = (1ULL << 32);
+ svm->vmcb->control.exit_info_2 = fault->address;
+ }
+
+ svm->vmcb->control.exit_info_1 &= ~0xffffffffULL;
+ svm->vmcb->control.exit_info_1 |= fault->error_code;
+
+ /*
+ * The present bit is always zero for page structure faults on real
+ * hardware.
+ */
+ if (svm->vmcb->control.exit_info_1 & (2ULL << 32))
+ svm->vmcb->control.exit_info_1 &= ~1;
nested_svm_vmexit(svm);
}
@@ -3031,7 +3048,7 @@ static int cr8_write_interception(struct vcpu_svm *svm)
return 0;
}
-u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
+static u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
{
struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu));
return vmcb->control.tsc_offset +
@@ -4305,6 +4322,10 @@ static void svm_handle_external_intr(struct kvm_vcpu *vcpu)
local_irq_enable();
}
+static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
static struct kvm_x86_ops svm_x86_ops = {
.cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled,
@@ -4349,7 +4370,6 @@ static struct kvm_x86_ops svm_x86_ops = {
.cache_reg = svm_cache_reg,
.get_rflags = svm_get_rflags,
.set_rflags = svm_set_rflags,
- .fpu_activate = svm_fpu_activate,
.fpu_deactivate = svm_fpu_deactivate,
.tlb_flush = svm_flush_tlb,
@@ -4406,6 +4426,8 @@ static struct kvm_x86_ops svm_x86_ops = {
.check_intercept = svm_check_intercept,
.handle_external_intr = svm_handle_external_intr,
+
+ .sched_in = svm_sched_in,
};
static int __init svm_init(void)
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index e850a7d332be..6b06ab8748dd 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -415,15 +415,14 @@ TRACE_EVENT(kvm_apic_ipi,
);
TRACE_EVENT(kvm_apic_accept_irq,
- TP_PROTO(__u32 apicid, __u16 dm, __u8 tm, __u8 vec, bool coalesced),
- TP_ARGS(apicid, dm, tm, vec, coalesced),
+ TP_PROTO(__u32 apicid, __u16 dm, __u8 tm, __u8 vec),
+ TP_ARGS(apicid, dm, tm, vec),
TP_STRUCT__entry(
__field( __u32, apicid )
__field( __u16, dm )
__field( __u8, tm )
__field( __u8, vec )
- __field( bool, coalesced )
),
TP_fast_assign(
@@ -431,14 +430,12 @@ TRACE_EVENT(kvm_apic_accept_irq,
__entry->dm = dm;
__entry->tm = tm;
__entry->vec = vec;
- __entry->coalesced = coalesced;
),
- TP_printk("apicid %x vec %u (%s|%s)%s",
+ TP_printk("apicid %x vec %u (%s|%s)",
__entry->apicid, __entry->vec,
__print_symbolic((__entry->dm >> 8 & 0x7), kvm_deliver_mode),
- __entry->tm ? "level" : "edge",
- __entry->coalesced ? " (coalesced)" : "")
+ __entry->tm ? "level" : "edge")
);
TRACE_EVENT(kvm_eoi,
@@ -850,6 +847,36 @@ TRACE_EVENT(kvm_track_tsc,
#endif /* CONFIG_X86_64 */
+TRACE_EVENT(kvm_ple_window,
+ TP_PROTO(bool grow, unsigned int vcpu_id, int new, int old),
+ TP_ARGS(grow, vcpu_id, new, old),
+
+ TP_STRUCT__entry(
+ __field( bool, grow )
+ __field( unsigned int, vcpu_id )
+ __field( int, new )
+ __field( int, old )
+ ),
+
+ TP_fast_assign(
+ __entry->grow = grow;
+ __entry->vcpu_id = vcpu_id;
+ __entry->new = new;
+ __entry->old = old;
+ ),
+
+ TP_printk("vcpu %u: ple_window %d (%s %d)",
+ __entry->vcpu_id,
+ __entry->new,
+ __entry->grow ? "grow" : "shrink",
+ __entry->old)
+);
+
+#define trace_kvm_ple_window_grow(vcpu_id, new, old) \
+ trace_kvm_ple_window(true, vcpu_id, new, old)
+#define trace_kvm_ple_window_shrink(vcpu_id, new, old) \
+ trace_kvm_ple_window(false, vcpu_id, new, old)
+
#endif /* _TRACE_KVM_H */
#undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index bfe11cf124a1..04fa1b8298c8 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -125,14 +125,32 @@ module_param(nested, bool, S_IRUGO);
* Time is measured based on a counter that runs at the same rate as the TSC,
* refer SDM volume 3b section 21.6.13 & 22.1.3.
*/
-#define KVM_VMX_DEFAULT_PLE_GAP 128
-#define KVM_VMX_DEFAULT_PLE_WINDOW 4096
+#define KVM_VMX_DEFAULT_PLE_GAP 128
+#define KVM_VMX_DEFAULT_PLE_WINDOW 4096
+#define KVM_VMX_DEFAULT_PLE_WINDOW_GROW 2
+#define KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK 0
+#define KVM_VMX_DEFAULT_PLE_WINDOW_MAX \
+ INT_MAX / KVM_VMX_DEFAULT_PLE_WINDOW_GROW
+
static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP;
module_param(ple_gap, int, S_IRUGO);
static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
module_param(ple_window, int, S_IRUGO);
+/* Default doubles per-vcpu window every exit. */
+static int ple_window_grow = KVM_VMX_DEFAULT_PLE_WINDOW_GROW;
+module_param(ple_window_grow, int, S_IRUGO);
+
+/* Default resets per-vcpu window every exit to ple_window. */
+static int ple_window_shrink = KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK;
+module_param(ple_window_shrink, int, S_IRUGO);
+
+/* Default is to compute the maximum so we can never overflow. */
+static int ple_window_actual_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
+static int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
+module_param(ple_window_max, int, S_IRUGO);
+
extern const ulong vmx_return;
#define NR_AUTOLOAD_MSRS 8
@@ -379,6 +397,7 @@ struct nested_vmx {
* we must keep them pinned while L2 runs.
*/
struct page *apic_access_page;
+ struct page *virtual_apic_page;
u64 msr_ia32_feature_control;
struct hrtimer preemption_timer;
@@ -484,6 +503,10 @@ struct vcpu_vmx {
/* Support for a guest hypervisor (nested VMX) */
struct nested_vmx nested;
+
+ /* Dynamic PLE window. */
+ int ple_window;
+ bool ple_window_dirty;
};
enum segment_cache_field {
@@ -533,6 +556,7 @@ static int max_shadow_read_only_fields =
ARRAY_SIZE(shadow_read_only_fields);
static unsigned long shadow_read_write_fields[] = {
+ TPR_THRESHOLD,
GUEST_RIP,
GUEST_RSP,
GUEST_CR0,
@@ -743,6 +767,7 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var);
static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu);
static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx);
static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx);
+static int alloc_identity_pagetable(struct kvm *kvm);
static DEFINE_PER_CPU(struct vmcs *, vmxarea);
static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -2135,7 +2160,7 @@ static u64 guest_read_tsc(void)
* Like guest_read_tsc, but always returns L1's notion of the timestamp
* counter, even if a nested guest (L2) is currently running.
*/
-u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
+static u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
{
u64 tsc_offset;
@@ -2330,7 +2355,7 @@ static __init void nested_vmx_setup_ctls_msrs(void)
CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING |
CPU_BASED_RDPMC_EXITING | CPU_BASED_RDTSC_EXITING |
- CPU_BASED_PAUSE_EXITING |
+ CPU_BASED_PAUSE_EXITING | CPU_BASED_TPR_SHADOW |
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
/*
* We can allow some features even when not supported by the
@@ -2601,6 +2626,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case MSR_IA32_CR_PAT:
if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
+ if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
+ return 1;
vmcs_write64(GUEST_IA32_PAT, data);
vcpu->arch.pat = data;
break;
@@ -2704,7 +2731,7 @@ static void kvm_cpu_vmxon(u64 addr)
: "memory", "cc");
}
-static int hardware_enable(void *garbage)
+static int hardware_enable(void)
{
int cpu = raw_smp_processor_id();
u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
@@ -2768,7 +2795,7 @@ static void kvm_cpu_vmxoff(void)
asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
}
-static void hardware_disable(void *garbage)
+static void hardware_disable(void)
{
if (vmm_exclusive) {
vmclear_local_loaded_vmcss();
@@ -3107,9 +3134,17 @@ static __init int hardware_setup(void)
if (!cpu_has_vmx_unrestricted_guest())
enable_unrestricted_guest = 0;
- if (!cpu_has_vmx_flexpriority())
+ if (!cpu_has_vmx_flexpriority()) {
flexpriority_enabled = 0;
+ /*
+ * set_apic_access_page_addr() is used to reload apic access
+ * page upon invalidation. No need to do anything if the
+ * processor does not have the APIC_ACCESS_ADDR VMCS field.
+ */
+ kvm_x86_ops->set_apic_access_page_addr = NULL;
+ }
+
if (!cpu_has_vmx_tpr_shadow())
kvm_x86_ops->update_cr8_intercept = NULL;
@@ -3905,7 +3940,7 @@ static int init_rmode_tss(struct kvm *kvm)
{
gfn_t fn;
u16 data = 0;
- int r, idx, ret = 0;
+ int idx, r;
idx = srcu_read_lock(&kvm->srcu);
fn = kvm->arch.tss_addr >> PAGE_SHIFT;
@@ -3927,32 +3962,32 @@ static int init_rmode_tss(struct kvm *kvm)
r = kvm_write_guest_page(kvm, fn, &data,
RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1,
sizeof(u8));
- if (r < 0)
- goto out;
-
- ret = 1;
out:
srcu_read_unlock(&kvm->srcu, idx);
- return ret;
+ return r;
}
static int init_rmode_identity_map(struct kvm *kvm)
{
- int i, idx, r, ret;
+ int i, idx, r = 0;
pfn_t identity_map_pfn;
u32 tmp;
if (!enable_ept)
- return 1;
- if (unlikely(!kvm->arch.ept_identity_pagetable)) {
- printk(KERN_ERR "EPT: identity-mapping pagetable "
- "haven't been allocated!\n");
return 0;
- }
+
+ /* Protect kvm->arch.ept_identity_pagetable_done. */
+ mutex_lock(&kvm->slots_lock);
+
if (likely(kvm->arch.ept_identity_pagetable_done))
- return 1;
- ret = 0;
+ goto out2;
+
identity_map_pfn = kvm->arch.ept_identity_map_addr >> PAGE_SHIFT;
+
+ r = alloc_identity_pagetable(kvm);
+ if (r < 0)
+ goto out2;
+
idx = srcu_read_lock(&kvm->srcu);
r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE);
if (r < 0)
@@ -3967,10 +4002,13 @@ static int init_rmode_identity_map(struct kvm *kvm)
goto out;
}
kvm->arch.ept_identity_pagetable_done = true;
- ret = 1;
+
out:
srcu_read_unlock(&kvm->srcu, idx);
- return ret;
+
+out2:
+ mutex_unlock(&kvm->slots_lock);
+ return r;
}
static void seg_setup(int seg)
@@ -3995,23 +4033,28 @@ static int alloc_apic_access_page(struct kvm *kvm)
int r = 0;
mutex_lock(&kvm->slots_lock);
- if (kvm->arch.apic_access_page)
+ if (kvm->arch.apic_access_page_done)
goto out;
kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
kvm_userspace_mem.flags = 0;
- kvm_userspace_mem.guest_phys_addr = 0xfee00000ULL;
+ kvm_userspace_mem.guest_phys_addr = APIC_DEFAULT_PHYS_BASE;
kvm_userspace_mem.memory_size = PAGE_SIZE;
r = __kvm_set_memory_region(kvm, &kvm_userspace_mem);
if (r)
goto out;
- page = gfn_to_page(kvm, 0xfee00);
+ page = gfn_to_page(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
if (is_error_page(page)) {
r = -EFAULT;
goto out;
}
- kvm->arch.apic_access_page = page;
+ /*
+ * Do not pin the page in memory, so that memory hot-unplug
+ * is able to migrate it.
+ */
+ put_page(page);
+ kvm->arch.apic_access_page_done = true;
out:
mutex_unlock(&kvm->slots_lock);
return r;
@@ -4019,31 +4062,20 @@ out:
static int alloc_identity_pagetable(struct kvm *kvm)
{
- struct page *page;
+ /* Called with kvm->slots_lock held. */
+
struct kvm_userspace_memory_region kvm_userspace_mem;
int r = 0;
- mutex_lock(&kvm->slots_lock);
- if (kvm->arch.ept_identity_pagetable)
- goto out;
+ BUG_ON(kvm->arch.ept_identity_pagetable_done);
+
kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
kvm_userspace_mem.flags = 0;
kvm_userspace_mem.guest_phys_addr =
kvm->arch.ept_identity_map_addr;
kvm_userspace_mem.memory_size = PAGE_SIZE;
r = __kvm_set_memory_region(kvm, &kvm_userspace_mem);
- if (r)
- goto out;
-
- page = gfn_to_page(kvm, kvm->arch.ept_identity_map_addr >> PAGE_SHIFT);
- if (is_error_page(page)) {
- r = -EFAULT;
- goto out;
- }
- kvm->arch.ept_identity_pagetable = page;
-out:
- mutex_unlock(&kvm->slots_lock);
return r;
}
@@ -4402,7 +4434,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
if (ple_gap) {
vmcs_write32(PLE_GAP, ple_gap);
- vmcs_write32(PLE_WINDOW, ple_window);
+ vmx->ple_window = ple_window;
+ vmx->ple_window_dirty = true;
}
vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0);
@@ -4477,7 +4510,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
kvm_set_cr8(&vmx->vcpu, 0);
- apic_base_msr.data = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
+ apic_base_msr.data = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE;
if (kvm_vcpu_is_bsp(&vmx->vcpu))
apic_base_msr.data |= MSR_IA32_APICBASE_BSP;
apic_base_msr.host_initiated = true;
@@ -4537,9 +4570,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
vmcs_write32(TPR_THRESHOLD, 0);
}
- if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm))
- vmcs_write64(APIC_ACCESS_ADDR,
- page_to_phys(vmx->vcpu.kvm->arch.apic_access_page));
+ kvm_vcpu_reload_apic_access_page(vcpu);
if (vmx_vm_has_apicv(vcpu->kvm))
memset(&vmx->pi_desc, 0, sizeof(struct pi_desc));
@@ -4729,10 +4760,7 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
if (ret)
return ret;
kvm->arch.tss_addr = addr;
- if (!init_rmode_tss(kvm))
- return -ENOMEM;
-
- return 0;
+ return init_rmode_tss(kvm);
}
static bool rmode_exception(struct kvm_vcpu *vcpu, int vec)
@@ -5521,17 +5549,18 @@ static u64 ept_rsvd_mask(u64 spte, int level)
for (i = 51; i > boot_cpu_data.x86_phys_bits; i--)
mask |= (1ULL << i);
- if (level > 2)
+ if (level == 4)
/* bits 7:3 reserved */
mask |= 0xf8;
- else if (level == 2) {
- if (spte & (1ULL << 7))
- /* 2MB ref, bits 20:12 reserved */
- mask |= 0x1ff000;
- else
- /* bits 6:3 reserved */
- mask |= 0x78;
- }
+ else if (spte & (1ULL << 7))
+ /*
+ * 1GB/2MB page, bits 29:12 or 20:12 reserved respectively,
+ * level == 1 if the hypervisor is using the ignored bit 7.
+ */
+ mask |= (PAGE_SIZE << ((level - 1) * 9)) - PAGE_SIZE;
+ else if (level > 1)
+ /* bits 6:3 reserved */
+ mask |= 0x78;
return mask;
}
@@ -5561,7 +5590,8 @@ static void ept_misconfig_inspect_spte(struct kvm_vcpu *vcpu, u64 spte,
WARN_ON(1);
}
- if (level == 1 || (level == 2 && (spte & (1ULL << 7)))) {
+ /* bits 5:3 are _not_ reserved for large page or leaf page */
+ if ((rsvd_bits & 0x38) == 0) {
u64 ept_mem_type = (spte & 0x38) >> 3;
if (ept_mem_type == 2 || ept_mem_type == 3 ||
@@ -5676,12 +5706,85 @@ out:
return ret;
}
+static int __grow_ple_window(int val)
+{
+ if (ple_window_grow < 1)
+ return ple_window;
+
+ val = min(val, ple_window_actual_max);
+
+ if (ple_window_grow < ple_window)
+ val *= ple_window_grow;
+ else
+ val += ple_window_grow;
+
+ return val;
+}
+
+static int __shrink_ple_window(int val, int modifier, int minimum)
+{
+ if (modifier < 1)
+ return ple_window;
+
+ if (modifier < ple_window)
+ val /= modifier;
+ else
+ val -= modifier;
+
+ return max(val, minimum);
+}
+
+static void grow_ple_window(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ int old = vmx->ple_window;
+
+ vmx->ple_window = __grow_ple_window(old);
+
+ if (vmx->ple_window != old)
+ vmx->ple_window_dirty = true;
+
+ trace_kvm_ple_window_grow(vcpu->vcpu_id, vmx->ple_window, old);
+}
+
+static void shrink_ple_window(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ int old = vmx->ple_window;
+
+ vmx->ple_window = __shrink_ple_window(old,
+ ple_window_shrink, ple_window);
+
+ if (vmx->ple_window != old)
+ vmx->ple_window_dirty = true;
+
+ trace_kvm_ple_window_shrink(vcpu->vcpu_id, vmx->ple_window, old);
+}
+
+/*
+ * ple_window_actual_max is computed to be one grow_ple_window() below
+ * ple_window_max. (See __grow_ple_window for the reason.)
+ * This prevents overflows, because ple_window_max is int.
+ * ple_window_max effectively rounded down to a multiple of ple_window_grow in
+ * this process.
+ * ple_window_max is also prevented from setting vmx->ple_window < ple_window.
+ */
+static void update_ple_window_actual_max(void)
+{
+ ple_window_actual_max =
+ __shrink_ple_window(max(ple_window_max, ple_window),
+ ple_window_grow, INT_MIN);
+}
+
/*
* Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
* exiting, so only get here on cpu with PAUSE-Loop-Exiting.
*/
static int handle_pause(struct kvm_vcpu *vcpu)
{
+ if (ple_gap)
+ grow_ple_window(vcpu);
+
skip_emulated_instruction(vcpu);
kvm_vcpu_on_spin(vcpu);
@@ -6146,7 +6249,11 @@ static void free_nested(struct vcpu_vmx *vmx)
/* Unpin physical memory we referred to in current vmcs02 */
if (vmx->nested.apic_access_page) {
nested_release_page(vmx->nested.apic_access_page);
- vmx->nested.apic_access_page = 0;
+ vmx->nested.apic_access_page = NULL;
+ }
+ if (vmx->nested.virtual_apic_page) {
+ nested_release_page(vmx->nested.virtual_apic_page);
+ vmx->nested.virtual_apic_page = NULL;
}
nested_free_all_saved_vmcss(vmx);
@@ -6617,7 +6724,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
switch (type) {
case VMX_EPT_EXTENT_GLOBAL:
kvm_mmu_sync_roots(vcpu);
- kvm_mmu_flush_tlb(vcpu);
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
nested_vmx_succeed(vcpu);
break;
default:
@@ -6892,6 +6999,8 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
case EXIT_REASON_TASK_SWITCH:
return 1;
case EXIT_REASON_CPUID:
+ if (kvm_register_read(vcpu, VCPU_REGS_RAX) == 0xa)
+ return 0;
return 1;
case EXIT_REASON_HLT:
return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING);
@@ -6936,7 +7045,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
case EXIT_REASON_MCE_DURING_VMENTRY:
return 0;
case EXIT_REASON_TPR_BELOW_THRESHOLD:
- return 1;
+ return nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW);
case EXIT_REASON_APIC_ACCESS:
return nested_cpu_has2(vmcs12,
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
@@ -7057,6 +7166,12 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
{
+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+
+ if (is_guest_mode(vcpu) &&
+ nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
+ return;
+
if (irr == -1 || tpr < irr) {
vmcs_write32(TPR_THRESHOLD, 0);
return;
@@ -7094,6 +7209,29 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
vmx_set_msr_bitmap(vcpu);
}
+static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ /*
+ * Currently we do not handle the nested case where L2 has an
+ * APIC access page of its own; that page is still pinned.
+ * Hence, we skip the case where the VCPU is in guest mode _and_
+ * L1 prepared an APIC access page for L2.
+ *
+ * For the case where L1 and L2 share the same APIC access page
+ * (flexpriority=Y but SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES clear
+ * in the vmcs12), this function will only update either the vmcs01
+ * or the vmcs02. If the former, the vmcs02 will be updated by
+ * prepare_vmcs02. If the latter, the vmcs01 will be updated in
+ * the next L2->L1 exit.
+ */
+ if (!is_guest_mode(vcpu) ||
+ !nested_cpu_has2(vmx->nested.current_vmcs12,
+ SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
+ vmcs_write64(APIC_ACCESS_ADDR, hpa);
+}
+
static void vmx_hwapic_isr_update(struct kvm *kvm, int isr)
{
u16 status;
@@ -7387,6 +7525,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
if (vmx->emulation_required)
return;
+ if (vmx->ple_window_dirty) {
+ vmx->ple_window_dirty = false;
+ vmcs_write32(PLE_WINDOW, vmx->ple_window);
+ }
+
if (vmx->nested.sync_shadow_vmcs) {
copy_vmcs12_to_shadow(vmx);
vmx->nested.sync_shadow_vmcs = false;
@@ -7642,10 +7785,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
if (!kvm->arch.ept_identity_map_addr)
kvm->arch.ept_identity_map_addr =
VMX_EPT_IDENTITY_PAGETABLE_ADDR;
- err = -ENOMEM;
- if (alloc_identity_pagetable(kvm) != 0)
- goto free_vmcs;
- if (!init_rmode_identity_map(kvm))
+ err = init_rmode_identity_map(kvm);
+ if (err)
goto free_vmcs;
}
@@ -7824,6 +7965,55 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
kvm_inject_page_fault(vcpu, fault);
}
+static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
+ /* TODO: Also verify bits beyond physical address width are 0 */
+ if (!PAGE_ALIGNED(vmcs12->apic_access_addr))
+ return false;
+
+ /*
+ * Translate L1 physical address to host physical
+ * address for vmcs02. Keep the page pinned, so this
+ * physical address remains valid. We keep a reference
+ * to it so we can release it later.
+ */
+ if (vmx->nested.apic_access_page) /* shouldn't happen */
+ nested_release_page(vmx->nested.apic_access_page);
+ vmx->nested.apic_access_page =
+ nested_get_page(vcpu, vmcs12->apic_access_addr);
+ }
+
+ if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
+ /* TODO: Also verify bits beyond physical address width are 0 */
+ if (!PAGE_ALIGNED(vmcs12->virtual_apic_page_addr))
+ return false;
+
+ if (vmx->nested.virtual_apic_page) /* shouldn't happen */
+ nested_release_page(vmx->nested.virtual_apic_page);
+ vmx->nested.virtual_apic_page =
+ nested_get_page(vcpu, vmcs12->virtual_apic_page_addr);
+
+ /*
+ * Failing the vm entry is _not_ what the processor does
+ * but it's basically the only possibility we have.
+ * We could still enter the guest if CR8 load exits are
+ * enabled, CR8 store exits are enabled, and virtualize APIC
+ * access is disabled; in this case the processor would never
+ * use the TPR shadow and we could simply clear the bit from
+ * the execution control. But such a configuration is useless,
+ * so let's keep the code simple.
+ */
+ if (!vmx->nested.virtual_apic_page)
+ return false;
+ }
+
+ return true;
+}
+
static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
{
u64 preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value;
@@ -7849,7 +8039,7 @@ static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
/*
* prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
* L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
- * with L0's requirements for its guest (a.k.a. vmsc01), so we can run the L2
+ * with L0's requirements for its guest (a.k.a. vmcs01), so we can run the L2
* guest in a way that will both be appropriate to L1's requests, and our
* needs. In addition to modifying the active vmcs (which is vmcs02), this
* function also has additional necessary side-effects, like setting various
@@ -7970,16 +8160,6 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
if (exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) {
/*
- * Translate L1 physical address to host physical
- * address for vmcs02. Keep the page pinned, so this
- * physical address remains valid. We keep a reference
- * to it so we can release it later.
- */
- if (vmx->nested.apic_access_page) /* shouldn't happen */
- nested_release_page(vmx->nested.apic_access_page);
- vmx->nested.apic_access_page =
- nested_get_page(vcpu, vmcs12->apic_access_addr);
- /*
* If translation failed, no matter: This feature asks
* to exit when accessing the given address, and if it
* can never be accessed, this feature won't do
@@ -7994,8 +8174,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
} else if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) {
exec_control |=
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
- vmcs_write64(APIC_ACCESS_ADDR,
- page_to_phys(vcpu->kvm->arch.apic_access_page));
+ kvm_vcpu_reload_apic_access_page(vcpu);
}
vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
@@ -8024,6 +8203,13 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
exec_control &= ~CPU_BASED_TPR_SHADOW;
exec_control |= vmcs12->cpu_based_vm_exec_control;
+
+ if (exec_control & CPU_BASED_TPR_SHADOW) {
+ vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
+ page_to_phys(vmx->nested.virtual_apic_page));
+ vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
+ }
+
/*
* Merging of IO and MSR bitmaps not currently supported.
* Rather, exit every time.
@@ -8185,8 +8371,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
return 1;
}
- if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) &&
- !PAGE_ALIGNED(vmcs12->apic_access_addr)) {
+ if (!nested_get_vmcs12_pages(vcpu, vmcs12)) {
/*TODO: Also verify bits beyond physical address width are 0*/
nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
return 1;
@@ -8790,10 +8975,20 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
/* Unpin physical memory we referred to in vmcs02 */
if (vmx->nested.apic_access_page) {
nested_release_page(vmx->nested.apic_access_page);
- vmx->nested.apic_access_page = 0;
+ vmx->nested.apic_access_page = NULL;
+ }
+ if (vmx->nested.virtual_apic_page) {
+ nested_release_page(vmx->nested.virtual_apic_page);
+ vmx->nested.virtual_apic_page = NULL;
}
/*
+ * We are now running in L2, mmu_notifier will force to reload the
+ * page's hpa for L2 vmcs. Need to reload it for L1 before entering L1.
+ */
+ kvm_vcpu_reload_apic_access_page(vcpu);
+
+ /*
* Exiting from L2 to L1, we're now back to L1 which thinks it just
* finished a VMLAUNCH or VMRESUME instruction, so we need to set the
* success or failure flag accordingly.
@@ -8846,6 +9041,12 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu,
return X86EMUL_CONTINUE;
}
+static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+ if (ple_gap)
+ shrink_ple_window(vcpu);
+}
+
static struct kvm_x86_ops vmx_x86_ops = {
.cpu_has_kvm_support = cpu_has_kvm_support,
.disabled_by_bios = vmx_disabled_by_bios,
@@ -8890,7 +9091,6 @@ static struct kvm_x86_ops vmx_x86_ops = {
.cache_reg = vmx_cache_reg,
.get_rflags = vmx_get_rflags,
.set_rflags = vmx_set_rflags,
- .fpu_activate = vmx_fpu_activate,
.fpu_deactivate = vmx_fpu_deactivate,
.tlb_flush = vmx_flush_tlb,
@@ -8913,6 +9113,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
.enable_irq_window = enable_irq_window,
.update_cr8_intercept = update_cr8_intercept,
.set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode,
+ .set_apic_access_page_addr = vmx_set_apic_access_page_addr,
.vm_has_apicv = vmx_vm_has_apicv,
.load_eoi_exitmap = vmx_load_eoi_exitmap,
.hwapic_irr_update = vmx_hwapic_irr_update,
@@ -8951,6 +9152,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
.mpx_supported = vmx_mpx_supported,
.check_nested_events = vmx_check_nested_events,
+
+ .sched_in = vmx_sched_in,
};
static int __init vmx_init(void)
@@ -9065,6 +9268,8 @@ static int __init vmx_init(void)
} else
kvm_disable_tdp();
+ update_ple_window_actual_max();
+
return 0;
out7:
@@ -9098,7 +9303,7 @@ static void __exit vmx_exit(void)
free_page((unsigned long)vmx_vmread_bitmap);
#ifdef CONFIG_KEXEC
- rcu_assign_pointer(crash_vmclear_loaded_vmcss, NULL);
+ RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
synchronize_rcu();
#endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8f1e22d3b286..5430e4b0af29 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -246,7 +246,7 @@ void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
}
EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
-static void drop_user_return_notifiers(void *ignore)
+static void drop_user_return_notifiers(void)
{
unsigned int cpu = smp_processor_id();
struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
@@ -408,12 +408,14 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
}
EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
-void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
+static bool kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
{
if (mmu_is_nested(vcpu) && !fault->nested_page_fault)
vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault);
else
vcpu->arch.mmu.inject_page_fault(vcpu, fault);
+
+ return fault->nested_page_fault;
}
void kvm_inject_nmi(struct kvm_vcpu *vcpu)
@@ -457,11 +459,12 @@ int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
gfn_t ngfn, void *data, int offset, int len,
u32 access)
{
+ struct x86_exception exception;
gfn_t real_gfn;
gpa_t ngpa;
ngpa = gfn_to_gpa(ngfn);
- real_gfn = mmu->translate_gpa(vcpu, ngpa, access);
+ real_gfn = mmu->translate_gpa(vcpu, ngpa, access, &exception);
if (real_gfn == UNMAPPED_GVA)
return -EFAULT;
@@ -726,7 +729,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
{
if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
kvm_mmu_sync_roots(vcpu);
- kvm_mmu_flush_tlb(vcpu);
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
return 0;
}
@@ -1518,7 +1521,7 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
pvclock_update_vm_gtod_copy(kvm);
kvm_for_each_vcpu(i, vcpu, kvm)
- set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
+ kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
/* guest entries allowed */
kvm_for_each_vcpu(i, vcpu, kvm)
@@ -1661,7 +1664,7 @@ static void kvmclock_update_fn(struct work_struct *work)
struct kvm_vcpu *vcpu;
kvm_for_each_vcpu(i, vcpu, kvm) {
- set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
+ kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
kvm_vcpu_kick(vcpu);
}
}
@@ -1670,7 +1673,7 @@ static void kvm_gen_kvmclock_update(struct kvm_vcpu *v)
{
struct kvm *kvm = v->kvm;
- set_bit(KVM_REQ_CLOCK_UPDATE, &v->requests);
+ kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
schedule_delayed_work(&kvm->arch.kvmclock_update_work,
KVMCLOCK_UPDATE_DELAY);
}
@@ -1723,9 +1726,10 @@ static bool valid_mtrr_type(unsigned t)
return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */
}
-static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
{
int i;
+ u64 mask;
if (!msr_mtrr_valid(msr))
return false;
@@ -1747,14 +1751,31 @@ static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
}
/* variable MTRRs */
- return valid_mtrr_type(data & 0xff);
+ WARN_ON(!(msr >= 0x200 && msr < 0x200 + 2 * KVM_NR_VAR_MTRR));
+
+ mask = (~0ULL) << cpuid_maxphyaddr(vcpu);
+ if ((msr & 1) == 0) {
+ /* MTRR base */
+ if (!valid_mtrr_type(data & 0xff))
+ return false;
+ mask |= 0xf00;
+ } else
+ /* MTRR mask */
+ mask |= 0x7ff;
+ if (data & mask) {
+ kvm_inject_gp(vcpu, 0);
+ return false;
+ }
+
+ return true;
}
+EXPORT_SYMBOL_GPL(kvm_mtrr_valid);
static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
{
u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
- if (!mtrr_valid(vcpu, msr, data))
+ if (!kvm_mtrr_valid(vcpu, msr, data))
return 1;
if (msr == MSR_MTRRdefType) {
@@ -1805,7 +1826,7 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
break;
default:
if (msr >= MSR_IA32_MC0_CTL &&
- msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
+ msr < MSR_IA32_MCx_CTL(bank_num)) {
u32 offset = msr - MSR_IA32_MC0_CTL;
/* only 0 or all 1s can be written to IA32_MCi_CTL
* some Linux kernels though clear bit 10 in bank 4 to
@@ -2164,7 +2185,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_MCG_CTL:
case MSR_IA32_MCG_STATUS:
- case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
+ case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
return set_msr_mce(vcpu, msr, data);
/* Performance counters are not protected by a CPUID bit,
@@ -2330,7 +2351,7 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
break;
default:
if (msr >= MSR_IA32_MC0_CTL &&
- msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
+ msr < MSR_IA32_MCx_CTL(bank_num)) {
u32 offset = msr - MSR_IA32_MC0_CTL;
data = vcpu->arch.mce_banks[offset];
break;
@@ -2419,7 +2440,13 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
case MSR_K7_HWCR:
case MSR_VM_HSAVE_PA:
case MSR_K7_EVNTSEL0:
+ case MSR_K7_EVNTSEL1:
+ case MSR_K7_EVNTSEL2:
+ case MSR_K7_EVNTSEL3:
case MSR_K7_PERFCTR0:
+ case MSR_K7_PERFCTR1:
+ case MSR_K7_PERFCTR2:
+ case MSR_K7_PERFCTR3:
case MSR_K8_INT_PENDING_MSG:
case MSR_AMD64_NB_CFG:
case MSR_FAM10H_MMIO_CONF_BASE:
@@ -2505,7 +2532,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
case MSR_IA32_MCG_CAP:
case MSR_IA32_MCG_CTL:
case MSR_IA32_MCG_STATUS:
- case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
+ case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
return get_msr_mce(vcpu, msr, pdata);
case MSR_K7_CLK_CTL:
/*
@@ -2823,7 +2850,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
vcpu->arch.tsc_offset_adjustment = 0;
- set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
+ kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
}
if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
@@ -4040,16 +4067,16 @@ void kvm_get_segment(struct kvm_vcpu *vcpu,
kvm_x86_ops->get_segment(vcpu, var, seg);
}
-gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access)
+gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
+ struct x86_exception *exception)
{
gpa_t t_gpa;
- struct x86_exception exception;
BUG_ON(!mmu_is_nested(vcpu));
/* NPT walks are always user-walks */
access |= PFERR_USER_MASK;
- t_gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, &exception);
+ t_gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, exception);
return t_gpa;
}
@@ -4906,16 +4933,18 @@ static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
}
}
-static void inject_emulated_exception(struct kvm_vcpu *vcpu)
+static bool inject_emulated_exception(struct kvm_vcpu *vcpu)
{
struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
if (ctxt->exception.vector == PF_VECTOR)
- kvm_propagate_fault(vcpu, &ctxt->exception);
- else if (ctxt->exception.error_code_valid)
+ return kvm_propagate_fault(vcpu, &ctxt->exception);
+
+ if (ctxt->exception.error_code_valid)
kvm_queue_exception_e(vcpu, ctxt->exception.vector,
ctxt->exception.error_code);
else
kvm_queue_exception(vcpu, ctxt->exception.vector);
+ return false;
}
static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
@@ -4972,7 +5001,7 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu)
++vcpu->stat.insn_emulation_fail;
trace_kvm_emulate_insn_failed(vcpu);
- if (!is_guest_mode(vcpu)) {
+ if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) {
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
vcpu->run->internal.ndata = 0;
@@ -5224,6 +5253,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
ctxt->interruptibility = 0;
ctxt->have_exception = false;
+ ctxt->exception.vector = -1;
ctxt->perm_ok = false;
ctxt->ud = emulation_type & EMULTYPE_TRAP_UD;
@@ -5276,8 +5306,9 @@ restart:
}
if (ctxt->have_exception) {
- inject_emulated_exception(vcpu);
r = EMULATE_DONE;
+ if (inject_emulated_exception(vcpu))
+ return r;
} else if (vcpu->arch.pio.count) {
if (!vcpu->arch.pio.in) {
/* FIXME: return into emulator if single-stepping. */
@@ -5545,7 +5576,7 @@ static void kvm_set_mmio_spte_mask(void)
* entry to generate page fault with PFER.RSV = 1.
*/
/* Mask the reserved physical address bits. */
- mask = ((1ull << (51 - maxphyaddr + 1)) - 1) << maxphyaddr;
+ mask = rsvd_bits(maxphyaddr, 51);
/* Bit 62 is always reserved for 32bit host. */
mask |= 0x3ull << 62;
@@ -5576,7 +5607,7 @@ static void pvclock_gtod_update_fn(struct work_struct *work)
spin_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list)
kvm_for_each_vcpu(i, vcpu, kvm)
- set_bit(KVM_REQ_MASTERCLOCK_UPDATE, &vcpu->requests);
+ kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
atomic_set(&kvm_guest_has_master_clock, 0);
spin_unlock(&kvm_lock);
}
@@ -5989,6 +6020,44 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
kvm_apic_update_tmr(vcpu, tmr);
}
+static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
+{
+ ++vcpu->stat.tlb_flush;
+ kvm_x86_ops->tlb_flush(vcpu);
+}
+
+void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
+{
+ struct page *page = NULL;
+
+ if (!irqchip_in_kernel(vcpu->kvm))
+ return;
+
+ if (!kvm_x86_ops->set_apic_access_page_addr)
+ return;
+
+ page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
+ kvm_x86_ops->set_apic_access_page_addr(vcpu, page_to_phys(page));
+
+ /*
+ * Do not pin apic access page in memory, the MMU notifier
+ * will call us again if it is migrated or swapped out.
+ */
+ put_page(page);
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page);
+
+void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
+ unsigned long address)
+{
+ /*
+ * The physical address of apic access page is stored in the VMCS.
+ * Update it when it becomes invalid.
+ */
+ if (address == gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT))
+ kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
+}
+
/*
* Returns 1 to let __vcpu_run() continue the guest execution loop without
* exiting to the userspace. Otherwise, the value will be returned to the
@@ -6018,7 +6087,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
kvm_mmu_sync_roots(vcpu);
if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
- kvm_x86_ops->tlb_flush(vcpu);
+ kvm_vcpu_flush_tlb(vcpu);
if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
r = 0;
@@ -6049,6 +6118,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_deliver_pmi(vcpu);
if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
vcpu_scan_ioapic(vcpu);
+ if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
+ kvm_vcpu_reload_apic_access_page(vcpu);
}
if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
@@ -6934,7 +7005,7 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector)
kvm_rip_write(vcpu, 0);
}
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
{
struct kvm *kvm;
struct kvm_vcpu *vcpu;
@@ -6945,7 +7016,7 @@ int kvm_arch_hardware_enable(void *garbage)
bool stable, backwards_tsc = false;
kvm_shared_msr_cpu_online();
- ret = kvm_x86_ops->hardware_enable(garbage);
+ ret = kvm_x86_ops->hardware_enable();
if (ret != 0)
return ret;
@@ -6954,7 +7025,7 @@ int kvm_arch_hardware_enable(void *garbage)
list_for_each_entry(kvm, &vm_list, vm_list) {
kvm_for_each_vcpu(i, vcpu, kvm) {
if (!stable && vcpu->cpu == smp_processor_id())
- set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
+ kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
if (stable && vcpu->arch.last_host_tsc > local_tsc) {
backwards_tsc = true;
if (vcpu->arch.last_host_tsc > max_tsc)
@@ -7008,8 +7079,7 @@ int kvm_arch_hardware_enable(void *garbage)
kvm_for_each_vcpu(i, vcpu, kvm) {
vcpu->arch.tsc_offset_adjustment += delta_cyc;
vcpu->arch.last_host_tsc = local_tsc;
- set_bit(KVM_REQ_MASTERCLOCK_UPDATE,
- &vcpu->requests);
+ kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
}
/*
@@ -7026,10 +7096,10 @@ int kvm_arch_hardware_enable(void *garbage)
return 0;
}
-void kvm_arch_hardware_disable(void *garbage)
+void kvm_arch_hardware_disable(void)
{
- kvm_x86_ops->hardware_disable(garbage);
- drop_user_return_notifiers(garbage);
+ kvm_x86_ops->hardware_disable();
+ drop_user_return_notifiers();
}
int kvm_arch_hardware_setup(void)
@@ -7146,6 +7216,11 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
static_key_slow_dec(&kvm_no_apic_vcpu);
}
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+ kvm_x86_ops->sched_in(vcpu, cpu);
+}
+
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
{
if (type)
@@ -7237,10 +7312,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kfree(kvm->arch.vpic);
kfree(kvm->arch.vioapic);
kvm_free_vcpus(kvm);
- if (kvm->arch.apic_access_page)
- put_page(kvm->arch.apic_access_page);
- if (kvm->arch.ept_identity_pagetable)
- put_page(kvm->arch.ept_identity_pagetable);
kfree(rcu_dereference_check(kvm->arch.apic_map, 1));
}
@@ -7643,3 +7714,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 306a1b77581f..7cb9c45a5fe0 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -88,15 +88,23 @@ static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
vcpu->arch.mmio_gva = gva & PAGE_MASK;
vcpu->arch.access = access;
vcpu->arch.mmio_gfn = gfn;
+ vcpu->arch.mmio_gen = kvm_memslots(vcpu->kvm)->generation;
+}
+
+static inline bool vcpu_match_mmio_gen(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.mmio_gen == kvm_memslots(vcpu->kvm)->generation;
}
/*
- * Clear the mmio cache info for the given gva,
- * specially, if gva is ~0ul, we clear all mmio cache info.
+ * Clear the mmio cache info for the given gva. If gva is MMIO_GVA_ANY, we
+ * clear all mmio cache info.
*/
+#define MMIO_GVA_ANY (~(gva_t)0)
+
static inline void vcpu_clear_mmio_info(struct kvm_vcpu *vcpu, gva_t gva)
{
- if (gva != (~0ul) && vcpu->arch.mmio_gva != (gva & PAGE_MASK))
+ if (gva != MMIO_GVA_ANY && vcpu->arch.mmio_gva != (gva & PAGE_MASK))
return;
vcpu->arch.mmio_gva = 0;
@@ -104,7 +112,8 @@ static inline void vcpu_clear_mmio_info(struct kvm_vcpu *vcpu, gva_t gva)
static inline bool vcpu_match_mmio_gva(struct kvm_vcpu *vcpu, unsigned long gva)
{
- if (vcpu->arch.mmio_gva && vcpu->arch.mmio_gva == (gva & PAGE_MASK))
+ if (vcpu_match_mmio_gen(vcpu) && vcpu->arch.mmio_gva &&
+ vcpu->arch.mmio_gva == (gva & PAGE_MASK))
return true;
return false;
@@ -112,7 +121,8 @@ static inline bool vcpu_match_mmio_gva(struct kvm_vcpu *vcpu, unsigned long gva)
static inline bool vcpu_match_mmio_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
{
- if (vcpu->arch.mmio_gfn && vcpu->arch.mmio_gfn == gpa >> PAGE_SHIFT)
+ if (vcpu_match_mmio_gen(vcpu) && vcpu->arch.mmio_gfn &&
+ vcpu->arch.mmio_gfn == gpa >> PAGE_SHIFT)
return true;
return false;
@@ -149,6 +159,8 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
gva_t addr, void *val, unsigned int bytes,
struct x86_exception *exception);
+bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data);
+
#define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \
| XSTATE_BNDREGS | XSTATE_BNDCSR)
extern u64 host_xcr0;
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 167ffcac16ed..95a427e57887 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -48,7 +48,9 @@ enum address_markers_idx {
LOW_KERNEL_NR,
VMALLOC_START_NR,
VMEMMAP_START_NR,
+# ifdef CONFIG_X86_ESPFIX64
ESPFIX_START_NR,
+# endif
HIGH_KERNEL_NR,
MODULES_VADDR_NR,
MODULES_END_NR,
@@ -71,7 +73,9 @@ static struct addr_marker address_markers[] = {
{ PAGE_OFFSET, "Low Kernel Mapping" },
{ VMALLOC_START, "vmalloc() Area" },
{ VMEMMAP_START, "Vmemmap" },
+# ifdef CONFIG_X86_ESPFIX64
{ ESPFIX_BASE_ADDR, "ESPfix Area", 16 },
+# endif
{ __START_KERNEL_map, "High Kernel Mapping" },
{ MODULES_VADDR, "Modules" },
{ MODULES_END, "End Modules" },
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 25e7e1372bb2..919b91205cd4 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -31,7 +31,7 @@
#include <linux/sched.h>
#include <asm/elf.h>
-struct __read_mostly va_alignment va_align = {
+struct va_alignment __read_mostly va_align = {
.flags = -1,
};
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index c61ea57d1ba1..9a2b7101ae8a 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -326,27 +326,6 @@ static void pci_fixup_video(struct pci_dev *pdev)
struct pci_bus *bus;
u16 config;
- if (!vga_default_device()) {
- resource_size_t start, end;
- int i;
-
- /* Does firmware framebuffer belong to us? */
- for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
- if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM))
- continue;
-
- start = pci_resource_start(pdev, i);
- end = pci_resource_end(pdev, i);
-
- if (!start || !end)
- continue;
-
- if (screen_info.lfb_base >= start &&
- (screen_info.lfb_base + screen_info.lfb_size) < end)
- vga_set_default_device(pdev);
- }
- }
-
/* Is VGA routed to us? */
bus = pdev->bus;
while (bus) {
@@ -371,8 +350,7 @@ static void pci_fixup_video(struct pci_dev *pdev)
pci_read_config_word(pdev, PCI_COMMAND, &config);
if (config & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) {
pdev->resource[PCI_ROM_RESOURCE].flags |= IORESOURCE_ROM_SHADOW;
- dev_printk(KERN_DEBUG, &pdev->dev, "Boot video device\n");
- vga_set_default_device(pdev);
+ dev_printk(KERN_DEBUG, &pdev->dev, "Video device with shadowed ROM\n");
}
}
}