aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/include
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/include')
-rw-r--r--arch/x86/include/asm/agp.h2
-rw-r--r--arch/x86/include/asm/alternative-asm.h114
-rw-r--r--arch/x86/include/asm/alternative.h149
-rw-r--r--arch/x86/include/asm/asm-prototypes.h13
-rw-r--r--arch/x86/include/asm/bug.h9
-rw-r--r--arch/x86/include/asm/cmpxchg.h2
-rw-r--r--arch/x86/include/asm/cpu.h13
-rw-r--r--arch/x86/include/asm/cpufeature.h41
-rw-r--r--arch/x86/include/asm/cpufeatures.h9
-rw-r--r--arch/x86/include/asm/elf.h10
-rw-r--r--arch/x86/include/asm/entry-common.h16
-rw-r--r--arch/x86/include/asm/floppy.h1
-rw-r--r--arch/x86/include/asm/hyperv-tlfs.h131
-rw-r--r--arch/x86/include/asm/idtentry.h17
-rw-r--r--arch/x86/include/asm/inat.h2
-rw-r--r--arch/x86/include/asm/insn-eval.h4
-rw-r--r--arch/x86/include/asm/insn.h46
-rw-r--r--arch/x86/include/asm/intel-family.h52
-rw-r--r--arch/x86/include/asm/intel_pconfig.h2
-rw-r--r--arch/x86/include/asm/intel_pt.h2
-rw-r--r--arch/x86/include/asm/io.h2
-rw-r--r--arch/x86/include/asm/irq_stack.h2
-rw-r--r--arch/x86/include/asm/irqflags.h7
-rw-r--r--arch/x86/include/asm/jump_label.h16
-rw-r--r--arch/x86/include/asm/kexec.h5
-rw-r--r--arch/x86/include/asm/kfence.h7
-rw-r--r--arch/x86/include/asm/kprobes.h21
-rw-r--r--arch/x86/include/asm/kvm_host.h86
-rw-r--r--arch/x86/include/asm/kvm_para.h10
-rw-r--r--arch/x86/include/asm/mem_encrypt.h1
-rw-r--r--arch/x86/include/asm/mshyperv.h100
-rw-r--r--arch/x86/include/asm/msr-index.h6
-rw-r--r--arch/x86/include/asm/msr.h4
-rw-r--r--arch/x86/include/asm/nops.h176
-rw-r--r--arch/x86/include/asm/nospec-branch.h9
-rw-r--r--arch/x86/include/asm/page_64.h33
-rw-r--r--arch/x86/include/asm/page_64_types.h23
-rw-r--r--arch/x86/include/asm/paravirt.h173
-rw-r--r--arch/x86/include/asm/paravirt_types.h216
-rw-r--r--arch/x86/include/asm/pgtable.h2
-rw-r--r--arch/x86/include/asm/processor.h27
-rw-r--r--arch/x86/include/asm/proto.h2
-rw-r--r--arch/x86/include/asm/ptrace.h5
-rw-r--r--arch/x86/include/asm/segment.h30
-rw-r--r--arch/x86/include/asm/set_memory.h4
-rw-r--r--arch/x86/include/asm/setup.h5
-rw-r--r--arch/x86/include/asm/sgx.h378
-rw-r--r--arch/x86/include/asm/smap.h5
-rw-r--r--arch/x86/include/asm/smp.h1
-rw-r--r--arch/x86/include/asm/special_insns.h4
-rw-r--r--arch/x86/include/asm/stackprotector.h79
-rw-r--r--arch/x86/include/asm/suspend_32.h6
-rw-r--r--arch/x86/include/asm/svm.h4
-rw-r--r--arch/x86/include/asm/switch_to.h7
-rw-r--r--arch/x86/include/asm/syscall_wrapper.h1
-rw-r--r--arch/x86/include/asm/thread_info.h8
-rw-r--r--arch/x86/include/asm/tlbflush.h48
-rw-r--r--arch/x86/include/asm/trace/hyperv.h2
-rw-r--r--arch/x86/include/asm/uv/uv_geo.h2
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h2
-rw-r--r--arch/x86/include/asm/vdso/gettimeofday.h3
-rw-r--r--arch/x86/include/asm/vmalloc.h20
-rw-r--r--arch/x86/include/asm/vmx.h1
-rw-r--r--arch/x86/include/uapi/asm/bootparam.h4
-rw-r--r--arch/x86/include/uapi/asm/debugreg.h1
-rw-r--r--arch/x86/include/uapi/asm/kvm.h2
-rw-r--r--arch/x86/include/uapi/asm/msgbuf.h2
-rw-r--r--arch/x86/include/uapi/asm/sgx.h2
-rw-r--r--arch/x86/include/uapi/asm/shmbuf.h2
-rw-r--r--arch/x86/include/uapi/asm/sigcontext.h2
-rw-r--r--arch/x86/include/uapi/asm/vmx.h1
71 files changed, 1178 insertions, 1016 deletions
diff --git a/arch/x86/include/asm/agp.h b/arch/x86/include/asm/agp.h
index 62da760d6d5a..cd7b14322035 100644
--- a/arch/x86/include/asm/agp.h
+++ b/arch/x86/include/asm/agp.h
@@ -9,7 +9,7 @@
* Functions to keep the agpgart mappings coherent with the MMU. The
* GART gives the CPU a physical alias of pages in memory. The alias
* region is mapped uncacheable. Make sure there are no conflicting
- * mappings with different cachability attributes for the same
+ * mappings with different cacheability attributes for the same
* page. This avoids data corruption on some CPUs.
*/
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
deleted file mode 100644
index 464034db299f..000000000000
--- a/arch/x86/include/asm/alternative-asm.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_ALTERNATIVE_ASM_H
-#define _ASM_X86_ALTERNATIVE_ASM_H
-
-#ifdef __ASSEMBLY__
-
-#include <asm/asm.h>
-
-#ifdef CONFIG_SMP
- .macro LOCK_PREFIX
-672: lock
- .pushsection .smp_locks,"a"
- .balign 4
- .long 672b - .
- .popsection
- .endm
-#else
- .macro LOCK_PREFIX
- .endm
-#endif
-
-/*
- * objtool annotation to ignore the alternatives and only consider the original
- * instruction(s).
- */
-.macro ANNOTATE_IGNORE_ALTERNATIVE
- .Lannotate_\@:
- .pushsection .discard.ignore_alts
- .long .Lannotate_\@ - .
- .popsection
-.endm
-
-/*
- * Issue one struct alt_instr descriptor entry (need to put it into
- * the section .altinstructions, see below). This entry contains
- * enough information for the alternatives patching code to patch an
- * instruction. See apply_alternatives().
- */
-.macro altinstruction_entry orig alt feature orig_len alt_len pad_len
- .long \orig - .
- .long \alt - .
- .word \feature
- .byte \orig_len
- .byte \alt_len
- .byte \pad_len
-.endm
-
-/*
- * Define an alternative between two instructions. If @feature is
- * present, early code in apply_alternatives() replaces @oldinstr with
- * @newinstr. ".skip" directive takes care of proper instruction padding
- * in case @newinstr is longer than @oldinstr.
- */
-.macro ALTERNATIVE oldinstr, newinstr, feature
-140:
- \oldinstr
-141:
- .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90
-142:
-
- .pushsection .altinstructions,"a"
- altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b
- .popsection
-
- .pushsection .altinstr_replacement,"ax"
-143:
- \newinstr
-144:
- .popsection
-.endm
-
-#define old_len 141b-140b
-#define new_len1 144f-143f
-#define new_len2 145f-144f
-
-/*
- * gas compatible max based on the idea from:
- * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
- *
- * The additional "-" is needed because gas uses a "true" value of -1.
- */
-#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
-
-
-/*
- * Same as ALTERNATIVE macro above but for two alternatives. If CPU
- * has @feature1, it replaces @oldinstr with @newinstr1. If CPU has
- * @feature2, it replaces @oldinstr with @feature2.
- */
-.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
-140:
- \oldinstr
-141:
- .skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \
- (alt_max_short(new_len1, new_len2) - (old_len)),0x90
-142:
-
- .pushsection .altinstructions,"a"
- altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b
- altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b
- .popsection
-
- .pushsection .altinstr_replacement,"ax"
-143:
- \newinstr1
-144:
- \newinstr2
-145:
- .popsection
-.endm
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* _ASM_X86_ALTERNATIVE_ASM_H */
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 13adca37c99a..a3c2315aca12 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -2,13 +2,17 @@
#ifndef _ASM_X86_ALTERNATIVE_H
#define _ASM_X86_ALTERNATIVE_H
-#ifndef __ASSEMBLY__
-
#include <linux/types.h>
-#include <linux/stddef.h>
#include <linux/stringify.h>
#include <asm/asm.h>
+#define ALTINSTR_FLAG_INV (1 << 15)
+#define ALT_NOT(feat) ((feat) | ALTINSTR_FLAG_INV)
+
+#ifndef __ASSEMBLY__
+
+#include <linux/stddef.h>
+
/*
* Alternative inline assembly for SMP.
*
@@ -61,7 +65,6 @@ struct alt_instr {
u16 cpuid; /* cpuid bit set for replacement */
u8 instrlen; /* length of original instruction */
u8 replacementlen; /* length of new instruction */
- u8 padlen; /* length of build-time padding */
} __packed;
/*
@@ -100,7 +103,6 @@ static inline int alternatives_text_reserved(void *start, void *end)
#define alt_end_marker "663"
#define alt_slen "662b-661b"
-#define alt_pad_len alt_end_marker"b-662b"
#define alt_total_slen alt_end_marker"b-661b"
#define alt_rlen(num) e_replacement(num)"f-"b_replacement(num)"f"
@@ -147,10 +149,9 @@ static inline int alternatives_text_reserved(void *start, void *end)
" .long " b_replacement(num)"f - .\n" /* new instruction */ \
" .word " __stringify(feature) "\n" /* feature bit */ \
" .byte " alt_total_slen "\n" /* source len */ \
- " .byte " alt_rlen(num) "\n" /* replacement len */ \
- " .byte " alt_pad_len "\n" /* pad len */
+ " .byte " alt_rlen(num) "\n" /* replacement len */
-#define ALTINSTR_REPLACEMENT(newinstr, feature, num) /* replacement */ \
+#define ALTINSTR_REPLACEMENT(newinstr, num) /* replacement */ \
"# ALT: replacement " #num "\n" \
b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n"
@@ -161,7 +162,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
ALTINSTR_ENTRY(feature, 1) \
".popsection\n" \
".pushsection .altinstr_replacement, \"ax\"\n" \
- ALTINSTR_REPLACEMENT(newinstr, feature, 1) \
+ ALTINSTR_REPLACEMENT(newinstr, 1) \
".popsection\n"
#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
@@ -171,10 +172,15 @@ static inline int alternatives_text_reserved(void *start, void *end)
ALTINSTR_ENTRY(feature2, 2) \
".popsection\n" \
".pushsection .altinstr_replacement, \"ax\"\n" \
- ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \
- ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \
+ ALTINSTR_REPLACEMENT(newinstr1, 1) \
+ ALTINSTR_REPLACEMENT(newinstr2, 2) \
".popsection\n"
+/* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */
+#define ALTERNATIVE_TERNARY(oldinstr, feature, newinstr_yes, newinstr_no) \
+ ALTERNATIVE_2(oldinstr, newinstr_no, X86_FEATURE_ALWAYS, \
+ newinstr_yes, feature)
+
#define ALTERNATIVE_3(oldinsn, newinsn1, feat1, newinsn2, feat2, newinsn3, feat3) \
OLDINSTR_3(oldinsn, 1, 2, 3) \
".pushsection .altinstructions,\"a\"\n" \
@@ -183,9 +189,9 @@ static inline int alternatives_text_reserved(void *start, void *end)
ALTINSTR_ENTRY(feat3, 3) \
".popsection\n" \
".pushsection .altinstr_replacement, \"ax\"\n" \
- ALTINSTR_REPLACEMENT(newinsn1, feat1, 1) \
- ALTINSTR_REPLACEMENT(newinsn2, feat2, 2) \
- ALTINSTR_REPLACEMENT(newinsn3, feat3, 3) \
+ ALTINSTR_REPLACEMENT(newinsn1, 1) \
+ ALTINSTR_REPLACEMENT(newinsn2, 2) \
+ ALTINSTR_REPLACEMENT(newinsn3, 3) \
".popsection\n"
/*
@@ -206,15 +212,15 @@ static inline int alternatives_text_reserved(void *start, void *end)
#define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \
asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory")
+#define alternative_ternary(oldinstr, feature, newinstr_yes, newinstr_no) \
+ asm_inline volatile(ALTERNATIVE_TERNARY(oldinstr, feature, newinstr_yes, newinstr_no) ::: "memory")
+
/*
* Alternative inline assembly with input.
*
* Peculiarities:
* No memory clobber here.
* Argument numbers start with 1.
- * Best is to use constraints that are fixed size (like (%1) ... "r")
- * If you use variable sized constraints like "m" or "g" in the
- * replacement make sure to pad to the worst case length.
* Leaving an unused argument 0 to keep API compatibility.
*/
#define alternative_input(oldinstr, newinstr, feature, input...) \
@@ -271,6 +277,115 @@ static inline int alternatives_text_reserved(void *start, void *end)
*/
#define ASM_NO_INPUT_CLOBBER(clbr...) "i" (0) : clbr
+#else /* __ASSEMBLY__ */
+
+#ifdef CONFIG_SMP
+ .macro LOCK_PREFIX
+672: lock
+ .pushsection .smp_locks,"a"
+ .balign 4
+ .long 672b - .
+ .popsection
+ .endm
+#else
+ .macro LOCK_PREFIX
+ .endm
+#endif
+
+/*
+ * objtool annotation to ignore the alternatives and only consider the original
+ * instruction(s).
+ */
+.macro ANNOTATE_IGNORE_ALTERNATIVE
+ .Lannotate_\@:
+ .pushsection .discard.ignore_alts
+ .long .Lannotate_\@ - .
+ .popsection
+.endm
+
+/*
+ * Issue one struct alt_instr descriptor entry (need to put it into
+ * the section .altinstructions, see below). This entry contains
+ * enough information for the alternatives patching code to patch an
+ * instruction. See apply_alternatives().
+ */
+.macro altinstruction_entry orig alt feature orig_len alt_len
+ .long \orig - .
+ .long \alt - .
+ .word \feature
+ .byte \orig_len
+ .byte \alt_len
+.endm
+
+/*
+ * Define an alternative between two instructions. If @feature is
+ * present, early code in apply_alternatives() replaces @oldinstr with
+ * @newinstr. ".skip" directive takes care of proper instruction padding
+ * in case @newinstr is longer than @oldinstr.
+ */
+.macro ALTERNATIVE oldinstr, newinstr, feature
+140:
+ \oldinstr
+141:
+ .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90
+142:
+
+ .pushsection .altinstructions,"a"
+ altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f
+ .popsection
+
+ .pushsection .altinstr_replacement,"ax"
+143:
+ \newinstr
+144:
+ .popsection
+.endm
+
+#define old_len 141b-140b
+#define new_len1 144f-143f
+#define new_len2 145f-144f
+
+/*
+ * gas compatible max based on the idea from:
+ * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
+ *
+ * The additional "-" is needed because gas uses a "true" value of -1.
+ */
+#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
+
+
+/*
+ * Same as ALTERNATIVE macro above but for two alternatives. If CPU
+ * has @feature1, it replaces @oldinstr with @newinstr1. If CPU has
+ * @feature2, it replaces @oldinstr with @feature2.
+ */
+.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
+140:
+ \oldinstr
+141:
+ .skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \
+ (alt_max_short(new_len1, new_len2) - (old_len)),0x90
+142:
+
+ .pushsection .altinstructions,"a"
+ altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f
+ altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f
+ .popsection
+
+ .pushsection .altinstr_replacement,"ax"
+143:
+ \newinstr1
+144:
+ \newinstr2
+145:
+ .popsection
+.endm
+
+/* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */
+#define ALTERNATIVE_TERNARY(oldinstr, feature, newinstr_yes, newinstr_no) \
+ ALTERNATIVE_2 oldinstr, newinstr_no, X86_FEATURE_ALWAYS, \
+ newinstr_yes, feature
+
#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_ALTERNATIVE_H */
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index 51e2bf27cc9b..4cb726c71ed8 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -19,18 +19,19 @@ extern void cmpxchg8b_emu(void);
#ifdef CONFIG_RETPOLINE
-#define DECL_INDIRECT_THUNK(reg) \
+#undef GEN
+#define GEN(reg) \
extern asmlinkage void __x86_indirect_thunk_ ## reg (void);
-
-#define DECL_RETPOLINE(reg) \
- extern asmlinkage void __x86_retpoline_ ## reg (void);
+#include <asm/GEN-for-each-reg.h>
#undef GEN
-#define GEN(reg) DECL_INDIRECT_THUNK(reg)
+#define GEN(reg) \
+ extern asmlinkage void __x86_indirect_alt_call_ ## reg (void);
#include <asm/GEN-for-each-reg.h>
#undef GEN
-#define GEN(reg) DECL_RETPOLINE(reg)
+#define GEN(reg) \
+ extern asmlinkage void __x86_indirect_alt_jmp_ ## reg (void);
#include <asm/GEN-for-each-reg.h>
#endif /* CONFIG_RETPOLINE */
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index 297fa12e7e27..84b87538a15d 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -7,18 +7,9 @@
/*
* Despite that some emulators terminate on UD2, we use it for WARN().
- *
- * Since various instruction decoders/specs disagree on the encoding of
- * UD0/UD1.
*/
-
-#define ASM_UD0 ".byte 0x0f, 0xff" /* + ModRM (for Intel) */
-#define ASM_UD1 ".byte 0x0f, 0xb9" /* + ModRM */
#define ASM_UD2 ".byte 0x0f, 0x0b"
-
-#define INSN_UD0 0xff0f
#define INSN_UD2 0x0b0f
-
#define LEN_UD2 2
#ifdef CONFIG_GENERIC_BUG
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index 4d4ec5cbdc51..94fbe6ae7431 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -22,7 +22,7 @@ extern void __add_wrong_size(void)
/*
* Constants for operation sizes. On 32-bit, the 64-bit size it set to
* -1 because sizeof will never return -1, thereby making those switch
- * case statements guaranteeed dead code which the compiler will
+ * case statements guaranteed dead code which the compiler will
* eliminate, and allowing the "missing symbol in the default case" to
* indicate a usage error.
*/
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index da78ccbd493b..33d41e350c79 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -41,12 +41,14 @@ unsigned int x86_family(unsigned int sig);
unsigned int x86_model(unsigned int sig);
unsigned int x86_stepping(unsigned int sig);
#ifdef CONFIG_CPU_SUP_INTEL
-extern void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c);
+extern void __init sld_setup(struct cpuinfo_x86 *c);
extern void switch_to_sld(unsigned long tifn);
extern bool handle_user_split_lock(struct pt_regs *regs, long error_code);
extern bool handle_guest_split_lock(unsigned long ip);
+extern void handle_bus_lock(struct pt_regs *regs);
+u8 get_this_hybrid_cpu_type(void);
#else
-static inline void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c) {}
+static inline void __init sld_setup(struct cpuinfo_x86 *c) {}
static inline void switch_to_sld(unsigned long tifn) {}
static inline bool handle_user_split_lock(struct pt_regs *regs, long error_code)
{
@@ -57,6 +59,13 @@ static inline bool handle_guest_split_lock(unsigned long ip)
{
return false;
}
+
+static inline void handle_bus_lock(struct pt_regs *regs) {}
+
+static inline u8 get_this_hybrid_cpu_type(void)
+{
+ return 0;
+}
#endif
#ifdef CONFIG_IA32_FEAT_CTL
void init_ia32_feat_ctl(struct cpuinfo_x86 *c);
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 1728d4ce5730..16a51e7288d5 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -8,6 +8,7 @@
#include <asm/asm.h>
#include <linux/bitops.h>
+#include <asm/alternative.h>
enum cpuid_leafs
{
@@ -175,39 +176,15 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
*/
static __always_inline bool _static_cpu_has(u16 bit)
{
- asm_volatile_goto("1: jmp 6f\n"
- "2:\n"
- ".skip -(((5f-4f) - (2b-1b)) > 0) * "
- "((5f-4f) - (2b-1b)),0x90\n"
- "3:\n"
- ".section .altinstructions,\"a\"\n"
- " .long 1b - .\n" /* src offset */
- " .long 4f - .\n" /* repl offset */
- " .word %P[always]\n" /* always replace */
- " .byte 3b - 1b\n" /* src len */
- " .byte 5f - 4f\n" /* repl len */
- " .byte 3b - 2b\n" /* pad len */
- ".previous\n"
- ".section .altinstr_replacement,\"ax\"\n"
- "4: jmp %l[t_no]\n"
- "5:\n"
- ".previous\n"
- ".section .altinstructions,\"a\"\n"
- " .long 1b - .\n" /* src offset */
- " .long 0\n" /* no replacement */
- " .word %P[feature]\n" /* feature bit */
- " .byte 3b - 1b\n" /* src len */
- " .byte 0\n" /* repl len */
- " .byte 0\n" /* pad len */
- ".previous\n"
- ".section .altinstr_aux,\"ax\"\n"
- "6:\n"
- " testb %[bitnum],%[cap_byte]\n"
- " jnz %l[t_yes]\n"
- " jmp %l[t_no]\n"
- ".previous\n"
+ asm_volatile_goto(
+ ALTERNATIVE_TERNARY("jmp 6f", %P[feature], "", "jmp %l[t_no]")
+ ".section .altinstr_aux,\"ax\"\n"
+ "6:\n"
+ " testb %[bitnum],%[cap_byte]\n"
+ " jnz %l[t_yes]\n"
+ " jmp %l[t_no]\n"
+ ".previous\n"
: : [feature] "i" (bit),
- [always] "i" (X86_FEATURE_ALWAYS),
[bitnum] "i" (1 << (bit & 7)),
[cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
: : t_yes, t_no);
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index cc96e26d69f7..ac37830ae941 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -84,7 +84,7 @@
/* CPU types for specific tunings: */
#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */
-#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */
+/* FREE, was #define X86_FEATURE_K7 ( 3*32+ 5) "" Athlon */
#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */
#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */
#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
@@ -236,6 +236,8 @@
#define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table access-dirty bit */
#define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */
#define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */
+#define X86_FEATURE_PVUNLOCK ( 8*32+20) /* "" PV unlock function */
+#define X86_FEATURE_VCPUPREEMPT ( 8*32+21) /* "" PV vcpu_is_preempted function */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
@@ -290,6 +292,8 @@
#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */
#define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */
#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */
+#define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */
+#define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
@@ -336,6 +340,7 @@
#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */
#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */
+#define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* Virtual SPEC_CTRL */
#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* "" SVME addr check */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */
@@ -354,6 +359,7 @@
#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */
#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */
+#define X86_FEATURE_BUS_LOCK_DETECT (16*32+24) /* Bus Lock detect */
#define X86_FEATURE_CLDEMOTE (16*32+25) /* CLDEMOTE instruction */
#define X86_FEATURE_MOVDIRI (16*32+27) /* MOVDIRI instruction */
#define X86_FEATURE_MOVDIR64B (16*32+28) /* MOVDIR64B instruction */
@@ -374,6 +380,7 @@
#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */
#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */
#define X86_FEATURE_SERIALIZE (18*32+14) /* SERIALIZE instruction */
+#define X86_FEATURE_HYBRID_CPU (18*32+15) /* "" This part has CPUs of more than one type */
#define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */
#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */
#define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 9224d40cdefe..7d7500806af8 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -283,12 +283,12 @@ extern u32 elf_hwcap2;
*
* The decision process for determining the results are:
*
- *              CPU: | lacks NX*  | has NX, ia32     | has NX, x86_64 |
- * ELF:              |            |                  |                |
+ * CPU: | lacks NX* | has NX, ia32 | has NX, x86_64 |
+ * ELF: | | | |
* ---------------------|------------|------------------|----------------|
- * missing PT_GNU_STACK | exec-all   | exec-all         | exec-none      |
- * PT_GNU_STACK == RWX  | exec-stack | exec-stack       | exec-stack     |
- * PT_GNU_STACK == RW   | exec-none  | exec-none        | exec-none      |
+ * missing PT_GNU_STACK | exec-all | exec-all | exec-none |
+ * PT_GNU_STACK == RWX | exec-stack | exec-stack | exec-stack |
+ * PT_GNU_STACK == RW | exec-none | exec-none | exec-none |
*
* exec-all : all PROT_READ user mappings are executable, except when
* backed by files on a noexec-filesystem.
diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h
index 2b87b191b3b8..14ebd2196569 100644
--- a/arch/x86/include/asm/entry-common.h
+++ b/arch/x86/include/asm/entry-common.h
@@ -2,6 +2,7 @@
#ifndef _ASM_X86_ENTRY_COMMON_H
#define _ASM_X86_ENTRY_COMMON_H
+#include <linux/randomize_kstack.h>
#include <linux/user-return-notifier.h>
#include <asm/nospec-branch.h>
@@ -70,6 +71,21 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
*/
current_thread_info()->status &= ~(TS_COMPAT | TS_I386_REGS_POKED);
#endif
+
+ /*
+ * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(),
+ * but not enough for x86 stack utilization comfort. To keep
+ * reasonable stack head room, reduce the maximum offset to 8 bits.
+ *
+ * The actual entropy will be further reduced by the compiler when
+ * applying stack alignment constraints (see cc_stack_align4/8 in
+ * arch/x86/Makefile), which will remove the 3 (x86_64) or 2 (ia32)
+ * low bits from any entropy chosen here.
+ *
+ * Therefore, final stack offset entropy will be 5 (x86_64) or
+ * 6 (ia32) bits.
+ */
+ choose_random_kstack_offset(rdtsc() & 0xFF);
}
#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
diff --git a/arch/x86/include/asm/floppy.h b/arch/x86/include/asm/floppy.h
index d43717b423cb..6ec3fc969ad5 100644
--- a/arch/x86/include/asm/floppy.h
+++ b/arch/x86/include/asm/floppy.h
@@ -74,7 +74,6 @@ static irqreturn_t floppy_hardint(int irq, void *dev_id)
int lcount;
char *lptr;
- st = 1;
for (lcount = virtual_dma_count, lptr = virtual_dma_addr;
lcount; lcount--, lptr++) {
st = inb(virtual_dma_port + FD_STATUS);
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index e6cd3fee562b..606f5cc579b2 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -156,7 +156,7 @@ enum hv_isolation_type {
#define HV_X64_MSR_HYPERCALL 0x40000001
/* MSR used to provide vcpu index */
-#define HV_X64_MSR_VP_INDEX 0x40000002
+#define HV_REGISTER_VP_INDEX 0x40000002
/* MSR used to reset the guest OS. */
#define HV_X64_MSR_RESET 0x40000003
@@ -165,10 +165,10 @@ enum hv_isolation_type {
#define HV_X64_MSR_VP_RUNTIME 0x40000010
/* MSR used to read the per-partition time reference counter */
-#define HV_X64_MSR_TIME_REF_COUNT 0x40000020
+#define HV_REGISTER_TIME_REF_COUNT 0x40000020
/* A partition's reference time stamp counter (TSC) page */
-#define HV_X64_MSR_REFERENCE_TSC 0x40000021
+#define HV_REGISTER_REFERENCE_TSC 0x40000021
/* MSR used to retrieve the TSC frequency */
#define HV_X64_MSR_TSC_FREQUENCY 0x40000022
@@ -183,50 +183,50 @@ enum hv_isolation_type {
#define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073
/* Define synthetic interrupt controller model specific registers. */
-#define HV_X64_MSR_SCONTROL 0x40000080
-#define HV_X64_MSR_SVERSION 0x40000081
-#define HV_X64_MSR_SIEFP 0x40000082
-#define HV_X64_MSR_SIMP 0x40000083
-#define HV_X64_MSR_EOM 0x40000084
-#define HV_X64_MSR_SINT0 0x40000090
-#define HV_X64_MSR_SINT1 0x40000091
-#define HV_X64_MSR_SINT2 0x40000092
-#define HV_X64_MSR_SINT3 0x40000093
-#define HV_X64_MSR_SINT4 0x40000094
-#define HV_X64_MSR_SINT5 0x40000095
-#define HV_X64_MSR_SINT6 0x40000096
-#define HV_X64_MSR_SINT7 0x40000097
-#define HV_X64_MSR_SINT8 0x40000098
-#define HV_X64_MSR_SINT9 0x40000099
-#define HV_X64_MSR_SINT10 0x4000009A
-#define HV_X64_MSR_SINT11 0x4000009B
-#define HV_X64_MSR_SINT12 0x4000009C
-#define HV_X64_MSR_SINT13 0x4000009D
-#define HV_X64_MSR_SINT14 0x4000009E
-#define HV_X64_MSR_SINT15 0x4000009F
+#define HV_REGISTER_SCONTROL 0x40000080
+#define HV_REGISTER_SVERSION 0x40000081
+#define HV_REGISTER_SIEFP 0x40000082
+#define HV_REGISTER_SIMP 0x40000083
+#define HV_REGISTER_EOM 0x40000084
+#define HV_REGISTER_SINT0 0x40000090
+#define HV_REGISTER_SINT1 0x40000091
+#define HV_REGISTER_SINT2 0x40000092
+#define HV_REGISTER_SINT3 0x40000093
+#define HV_REGISTER_SINT4 0x40000094
+#define HV_REGISTER_SINT5 0x40000095
+#define HV_REGISTER_SINT6 0x40000096
+#define HV_REGISTER_SINT7 0x40000097
+#define HV_REGISTER_SINT8 0x40000098
+#define HV_REGISTER_SINT9 0x40000099
+#define HV_REGISTER_SINT10 0x4000009A
+#define HV_REGISTER_SINT11 0x4000009B
+#define HV_REGISTER_SINT12 0x4000009C
+#define HV_REGISTER_SINT13 0x4000009D
+#define HV_REGISTER_SINT14 0x4000009E
+#define HV_REGISTER_SINT15 0x4000009F
/*
* Synthetic Timer MSRs. Four timers per vcpu.
*/
-#define HV_X64_MSR_STIMER0_CONFIG 0x400000B0
-#define HV_X64_MSR_STIMER0_COUNT 0x400000B1
-#define HV_X64_MSR_STIMER1_CONFIG 0x400000B2
-#define HV_X64_MSR_STIMER1_COUNT 0x400000B3
-#define HV_X64_MSR_STIMER2_CONFIG 0x400000B4
-#define HV_X64_MSR_STIMER2_COUNT 0x400000B5
-#define HV_X64_MSR_STIMER3_CONFIG 0x400000B6
-#define HV_X64_MSR_STIMER3_COUNT 0x400000B7
+#define HV_REGISTER_STIMER0_CONFIG 0x400000B0
+#define HV_REGISTER_STIMER0_COUNT 0x400000B1
+#define HV_REGISTER_STIMER1_CONFIG 0x400000B2
+#define HV_REGISTER_STIMER1_COUNT 0x400000B3
+#define HV_REGISTER_STIMER2_CONFIG 0x400000B4
+#define HV_REGISTER_STIMER2_COUNT 0x400000B5
+#define HV_REGISTER_STIMER3_CONFIG 0x400000B6
+#define HV_REGISTER_STIMER3_COUNT 0x400000B7
/* Hyper-V guest idle MSR */
#define HV_X64_MSR_GUEST_IDLE 0x400000F0
/* Hyper-V guest crash notification MSR's */
-#define HV_X64_MSR_CRASH_P0 0x40000100
-#define HV_X64_MSR_CRASH_P1 0x40000101
-#define HV_X64_MSR_CRASH_P2 0x40000102
-#define HV_X64_MSR_CRASH_P3 0x40000103
-#define HV_X64_MSR_CRASH_P4 0x40000104
-#define HV_X64_MSR_CRASH_CTL 0x40000105
+#define HV_REGISTER_CRASH_P0 0x40000100
+#define HV_REGISTER_CRASH_P1 0x40000101
+#define HV_REGISTER_CRASH_P2 0x40000102
+#define HV_REGISTER_CRASH_P3 0x40000103
+#define HV_REGISTER_CRASH_P4 0x40000104
+#define HV_REGISTER_CRASH_CTL 0x40000105
/* TSC emulation after migration */
#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106
@@ -236,6 +236,32 @@ enum hv_isolation_type {
/* TSC invariant control */
#define HV_X64_MSR_TSC_INVARIANT_CONTROL 0x40000118
+/* Register name aliases for temporary compatibility */
+#define HV_X64_MSR_STIMER0_COUNT HV_REGISTER_STIMER0_COUNT
+#define HV_X64_MSR_STIMER0_CONFIG HV_REGISTER_STIMER0_CONFIG
+#define HV_X64_MSR_STIMER1_COUNT HV_REGISTER_STIMER1_COUNT
+#define HV_X64_MSR_STIMER1_CONFIG HV_REGISTER_STIMER1_CONFIG
+#define HV_X64_MSR_STIMER2_COUNT HV_REGISTER_STIMER2_COUNT
+#define HV_X64_MSR_STIMER2_CONFIG HV_REGISTER_STIMER2_CONFIG
+#define HV_X64_MSR_STIMER3_COUNT HV_REGISTER_STIMER3_COUNT
+#define HV_X64_MSR_STIMER3_CONFIG HV_REGISTER_STIMER3_CONFIG
+#define HV_X64_MSR_SCONTROL HV_REGISTER_SCONTROL
+#define HV_X64_MSR_SVERSION HV_REGISTER_SVERSION
+#define HV_X64_MSR_SIMP HV_REGISTER_SIMP
+#define HV_X64_MSR_SIEFP HV_REGISTER_SIEFP
+#define HV_X64_MSR_VP_INDEX HV_REGISTER_VP_INDEX
+#define HV_X64_MSR_EOM HV_REGISTER_EOM
+#define HV_X64_MSR_SINT0 HV_REGISTER_SINT0
+#define HV_X64_MSR_SINT15 HV_REGISTER_SINT15
+#define HV_X64_MSR_CRASH_P0 HV_REGISTER_CRASH_P0
+#define HV_X64_MSR_CRASH_P1 HV_REGISTER_CRASH_P1
+#define HV_X64_MSR_CRASH_P2 HV_REGISTER_CRASH_P2
+#define HV_X64_MSR_CRASH_P3 HV_REGISTER_CRASH_P3
+#define HV_X64_MSR_CRASH_P4 HV_REGISTER_CRASH_P4
+#define HV_X64_MSR_CRASH_CTL HV_REGISTER_CRASH_CTL
+#define HV_X64_MSR_TIME_REF_COUNT HV_REGISTER_TIME_REF_COUNT
+#define HV_X64_MSR_REFERENCE_TSC HV_REGISTER_REFERENCE_TSC
+
/*
* Declare the MSR used to setup pages used to communicate with the hypervisor.
*/
@@ -288,35 +314,6 @@ struct hv_tsc_emulation_status {
#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001
#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12
-
-/* Define hypervisor message types. */
-enum hv_message_type {
- HVMSG_NONE = 0x00000000,
-
- /* Memory access messages. */
- HVMSG_UNMAPPED_GPA = 0x80000000,
- HVMSG_GPA_INTERCEPT = 0x80000001,
-
- /* Timer notification messages. */
- HVMSG_TIMER_EXPIRED = 0x80000010,
-
- /* Error messages. */
- HVMSG_INVALID_VP_REGISTER_VALUE = 0x80000020,
- HVMSG_UNRECOVERABLE_EXCEPTION = 0x80000021,
- HVMSG_UNSUPPORTED_FEATURE = 0x80000022,
-
- /* Trace buffer complete messages. */
- HVMSG_EVENTLOG_BUFFERCOMPLETE = 0x80000040,
-
- /* Platform-specific processor intercept messages. */
- HVMSG_X64_IOPORT_INTERCEPT = 0x80010000,
- HVMSG_X64_MSR_INTERCEPT = 0x80010001,
- HVMSG_X64_CPUID_INTERCEPT = 0x80010002,
- HVMSG_X64_EXCEPTION_INTERCEPT = 0x80010003,
- HVMSG_X64_APIC_EOI = 0x80010004,
- HVMSG_X64_LEGACY_FP_ERROR = 0x80010005
-};
-
struct hv_nested_enlightenments_control {
struct {
__u32 directhypercall:1;
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index 5eb3bdf36a41..73d45b0dfff2 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -547,7 +547,7 @@ SYM_CODE_END(spurious_entries_start)
/*
* Dummy trap number so the low level ASM macro vector number checks do not
* match which results in emitting plain IDTENTRY stubs without bells and
- * whistels.
+ * whistles.
*/
#define X86_TRAP_OTHER 0xFFFF
@@ -588,6 +588,21 @@ DECLARE_IDTENTRY_RAW(X86_TRAP_MC, xenpv_exc_machine_check);
#endif
/* NMI */
+
+#if defined(CONFIG_X86_64) && IS_ENABLED(CONFIG_KVM_INTEL)
+/*
+ * Special NOIST entry point for VMX which invokes this on the kernel
+ * stack. asm_exc_nmi() requires an IST to work correctly vs. the NMI
+ * 'executing' marker.
+ *
+ * On 32bit this just uses the regular NMI entry point because 32-bit does
+ * not have ISTs.
+ */
+DECLARE_IDTENTRY(X86_TRAP_NMI, exc_nmi_noist);
+#else
+#define asm_exc_nmi_noist asm_exc_nmi
+#endif
+
DECLARE_IDTENTRY_NMI(X86_TRAP_NMI, exc_nmi);
#ifdef CONFIG_XEN_PV
DECLARE_IDTENTRY_RAW(X86_TRAP_NMI, xenpv_exc_nmi);
diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h
index 4cf2ad521f65..b56c5741581a 100644
--- a/arch/x86/include/asm/inat.h
+++ b/arch/x86/include/asm/inat.h
@@ -6,7 +6,7 @@
*
* Written by Masami Hiramatsu <mhiramat@redhat.com>
*/
-#include <asm/inat_types.h>
+#include <asm/inat_types.h> /* __ignore_sync_check__ */
/*
* Internal bits. Don't use bitmasks directly, because these bits are
diff --git a/arch/x86/include/asm/insn-eval.h b/arch/x86/include/asm/insn-eval.h
index 98b4dae5e8bc..91d7182ad2d6 100644
--- a/arch/x86/include/asm/insn-eval.h
+++ b/arch/x86/include/asm/insn-eval.h
@@ -25,7 +25,7 @@ int insn_fetch_from_user(struct pt_regs *regs,
unsigned char buf[MAX_INSN_SIZE]);
int insn_fetch_from_user_inatomic(struct pt_regs *regs,
unsigned char buf[MAX_INSN_SIZE]);
-bool insn_decode(struct insn *insn, struct pt_regs *regs,
- unsigned char buf[MAX_INSN_SIZE], int buf_size);
+bool insn_decode_from_regs(struct insn *insn, struct pt_regs *regs,
+ unsigned char buf[MAX_INSN_SIZE], int buf_size);
#endif /* _ASM_X86_INSN_EVAL_H */
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index 95a448fbb44c..05a6ab940f45 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -9,7 +9,7 @@
#include <asm/byteorder.h>
/* insn_attr_t is defined in inat.h */
-#include <asm/inat.h>
+#include <asm/inat.h> /* __ignore_sync_check__ */
#if defined(__BYTE_ORDER) ? __BYTE_ORDER == __LITTLE_ENDIAN : defined(__LITTLE_ENDIAN)
@@ -132,13 +132,25 @@ struct insn {
#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */
extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64);
-extern void insn_get_prefixes(struct insn *insn);
-extern void insn_get_opcode(struct insn *insn);
-extern void insn_get_modrm(struct insn *insn);
-extern void insn_get_sib(struct insn *insn);
-extern void insn_get_displacement(struct insn *insn);
-extern void insn_get_immediate(struct insn *insn);
-extern void insn_get_length(struct insn *insn);
+extern int insn_get_prefixes(struct insn *insn);
+extern int insn_get_opcode(struct insn *insn);
+extern int insn_get_modrm(struct insn *insn);
+extern int insn_get_sib(struct insn *insn);
+extern int insn_get_displacement(struct insn *insn);
+extern int insn_get_immediate(struct insn *insn);
+extern int insn_get_length(struct insn *insn);
+
+enum insn_mode {
+ INSN_MODE_32,
+ INSN_MODE_64,
+ /* Mode is determined by the current kernel build. */
+ INSN_MODE_KERN,
+ INSN_NUM_MODES,
+};
+
+extern int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m);
+
+#define insn_decode_kernel(_insn, _ptr) insn_decode((_insn), (_ptr), MAX_INSN_SIZE, INSN_MODE_KERN)
/* Attribute will be determined after getting ModRM (for opcode groups) */
static inline void insn_get_attribute(struct insn *insn)
@@ -149,17 +161,6 @@ static inline void insn_get_attribute(struct insn *insn)
/* Instruction uses RIP-relative addressing */
extern int insn_rip_relative(struct insn *insn);
-/* Init insn for kernel text */
-static inline void kernel_insn_init(struct insn *insn,
- const void *kaddr, int buf_len)
-{
-#ifdef CONFIG_X86_64
- insn_init(insn, kaddr, buf_len, 1);
-#else /* CONFIG_X86_32 */
- insn_init(insn, kaddr, buf_len, 0);
-#endif
-}
-
static inline int insn_is_avx(struct insn *insn)
{
if (!insn->prefixes.got)
@@ -179,13 +180,6 @@ static inline int insn_has_emulate_prefix(struct insn *insn)
return !!insn->emulate_prefix_size;
}
-/* Ensure this instruction is decoded completely */
-static inline int insn_complete(struct insn *insn)
-{
- return insn->opcode.got && insn->modrm.got && insn->sib.got &&
- insn->displacement.got && insn->immediate.got;
-}
-
static inline insn_byte_t insn_vex_m_bits(struct insn *insn)
{
if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 9abe842dbd84..955b06d6325a 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -32,7 +32,9 @@
* _EP - 2 socket server parts
* _EX - 4+ socket server parts
*
- * The #define line may optionally include a comment including platform names.
+ * The #define line may optionally include a comment including platform or core
+ * names. An exception is made for skylake/kabylake where steppings seem to have gotten
+ * their own names :-(
*/
/* Wildcard match for FAM6 so X86_MATCH_INTEL_FAM6_MODEL(ANY) works */
@@ -69,35 +71,41 @@
#define INTEL_FAM6_BROADWELL_X 0x4F
#define INTEL_FAM6_BROADWELL_D 0x56
-#define INTEL_FAM6_SKYLAKE_L 0x4E
-#define INTEL_FAM6_SKYLAKE 0x5E
-#define INTEL_FAM6_SKYLAKE_X 0x55
-#define INTEL_FAM6_KABYLAKE_L 0x8E
-#define INTEL_FAM6_KABYLAKE 0x9E
+#define INTEL_FAM6_SKYLAKE_L 0x4E /* Sky Lake */
+#define INTEL_FAM6_SKYLAKE 0x5E /* Sky Lake */
+#define INTEL_FAM6_SKYLAKE_X 0x55 /* Sky Lake */
+/* CASCADELAKE_X 0x55 Sky Lake -- s: 7 */
+/* COOPERLAKE_X 0x55 Sky Lake -- s: 11 */
-#define INTEL_FAM6_CANNONLAKE_L 0x66
+#define INTEL_FAM6_KABYLAKE_L 0x8E /* Sky Lake */
+/* AMBERLAKE_L 0x8E Sky Lake -- s: 9 */
+/* COFFEELAKE_L 0x8E Sky Lake -- s: 10 */
+/* WHISKEYLAKE_L 0x8E Sky Lake -- s: 11,12 */
-#define INTEL_FAM6_ICELAKE_X 0x6A
-#define INTEL_FAM6_ICELAKE_D 0x6C
-#define INTEL_FAM6_ICELAKE 0x7D
-#define INTEL_FAM6_ICELAKE_L 0x7E
-#define INTEL_FAM6_ICELAKE_NNPI 0x9D
+#define INTEL_FAM6_KABYLAKE 0x9E /* Sky Lake */
+/* COFFEELAKE 0x9E Sky Lake -- s: 10-13 */
-#define INTEL_FAM6_TIGERLAKE_L 0x8C
-#define INTEL_FAM6_TIGERLAKE 0x8D
+#define INTEL_FAM6_COMETLAKE 0xA5 /* Sky Lake */
+#define INTEL_FAM6_COMETLAKE_L 0xA6 /* Sky Lake */
-#define INTEL_FAM6_COMETLAKE 0xA5
-#define INTEL_FAM6_COMETLAKE_L 0xA6
+#define INTEL_FAM6_CANNONLAKE_L 0x66 /* Palm Cove */
-#define INTEL_FAM6_ROCKETLAKE 0xA7
+#define INTEL_FAM6_ICELAKE_X 0x6A /* Sunny Cove */
+#define INTEL_FAM6_ICELAKE_D 0x6C /* Sunny Cove */
+#define INTEL_FAM6_ICELAKE 0x7D /* Sunny Cove */
+#define INTEL_FAM6_ICELAKE_L 0x7E /* Sunny Cove */
+#define INTEL_FAM6_ICELAKE_NNPI 0x9D /* Sunny Cove */
-#define INTEL_FAM6_SAPPHIRERAPIDS_X 0x8F
+#define INTEL_FAM6_LAKEFIELD 0x8A /* Sunny Cove / Tremont */
-/* Hybrid Core/Atom Processors */
+#define INTEL_FAM6_ROCKETLAKE 0xA7 /* Cypress Cove */
-#define INTEL_FAM6_LAKEFIELD 0x8A
-#define INTEL_FAM6_ALDERLAKE 0x97
-#define INTEL_FAM6_ALDERLAKE_L 0x9A
+#define INTEL_FAM6_TIGERLAKE_L 0x8C /* Willow Cove */
+#define INTEL_FAM6_TIGERLAKE 0x8D /* Willow Cove */
+#define INTEL_FAM6_SAPPHIRERAPIDS_X 0x8F /* Willow Cove */
+
+#define INTEL_FAM6_ALDERLAKE 0x97 /* Golden Cove / Gracemont */
+#define INTEL_FAM6_ALDERLAKE_L 0x9A /* Golden Cove / Gracemont */
/* "Small Core" Processors (Atom) */
diff --git a/arch/x86/include/asm/intel_pconfig.h b/arch/x86/include/asm/intel_pconfig.h
index 3cb002b1d0f9..994638ef171b 100644
--- a/arch/x86/include/asm/intel_pconfig.h
+++ b/arch/x86/include/asm/intel_pconfig.h
@@ -38,7 +38,7 @@ enum pconfig_leaf {
#define MKTME_INVALID_ENC_ALG 4
#define MKTME_DEVICE_BUSY 5
-/* Hardware requires the structure to be 256 byte alinged. Otherwise #GP(0). */
+/* Hardware requires the structure to be 256 byte aligned. Otherwise #GP(0). */
struct mktme_key_program {
u16 keyid;
u32 keyid_ctrl;
diff --git a/arch/x86/include/asm/intel_pt.h b/arch/x86/include/asm/intel_pt.h
index 423b788f495e..ebe8d2ea44fe 100644
--- a/arch/x86/include/asm/intel_pt.h
+++ b/arch/x86/include/asm/intel_pt.h
@@ -3,7 +3,7 @@
#define _ASM_X86_INTEL_PT_H
#define PT_CPUID_LEAVES 2
-#define PT_CPUID_REGS_NUM 4 /* number of regsters (eax, ebx, ecx, edx) */
+#define PT_CPUID_REGS_NUM 4 /* number of registers (eax, ebx, ecx, edx) */
enum pt_capabilities {
PT_CAP_max_subleaf = 0,
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index d726459d08e5..841a5d104afa 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -159,7 +159,7 @@ static inline void *phys_to_virt(phys_addr_t address)
/*
* ISA I/O bus memory addresses are 1:1 with the physical address.
* However, we truncate the address to unsigned int to avoid undesirable
- * promitions in legacy drivers.
+ * promotions in legacy drivers.
*/
static inline unsigned int isa_virt_to_bus(volatile void *address)
{
diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h
index 9b2a0ff76c73..562854c60808 100644
--- a/arch/x86/include/asm/irq_stack.h
+++ b/arch/x86/include/asm/irq_stack.h
@@ -190,7 +190,7 @@
/*
* Macro to invoke __do_softirq on the irq stack. This is only called from
- * task context when bottom halfs are about to be reenabled and soft
+ * task context when bottom halves are about to be reenabled and soft
* interrupts are pending to be processed. The interrupt stack cannot be in
* use here.
*/
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 144d70ea4393..c5ce9845c999 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -109,18 +109,13 @@ static __always_inline unsigned long arch_local_irq_save(void)
}
#else
-#define ENABLE_INTERRUPTS(x) sti
-#define DISABLE_INTERRUPTS(x) cli
-
#ifdef CONFIG_X86_64
#ifdef CONFIG_DEBUG_ENTRY
-#define SAVE_FLAGS(x) pushfq; popq %rax
+#define SAVE_FLAGS pushfq; popq %rax
#endif
#define INTERRUPT_RETURN jmp native_iret
-#else
-#define INTERRUPT_RETURN iret
#endif
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 06c3cc22a058..610a05374c02 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -6,12 +6,6 @@
#define JUMP_LABEL_NOP_SIZE 5
-#ifdef CONFIG_X86_64
-# define STATIC_KEY_INIT_NOP P6_NOP5_ATOMIC
-#else
-# define STATIC_KEY_INIT_NOP GENERIC_NOP5_ATOMIC
-#endif
-
#include <asm/asm.h>
#include <asm/nops.h>
@@ -20,10 +14,10 @@
#include <linux/stringify.h>
#include <linux/types.h>
-static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
+static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch)
{
asm_volatile_goto("1:"
- ".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t"
+ ".byte " __stringify(BYTES_NOP5) "\n\t"
".pushsection __jump_table, \"aw\" \n\t"
_ASM_ALIGN "\n\t"
".long 1b - ., %l[l_yes] - . \n\t"
@@ -36,7 +30,7 @@ l_yes:
return true;
}
-static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
+static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch)
{
asm_volatile_goto("1:"
".byte 0xe9\n\t .long %l[l_yes] - 2f\n\t"
@@ -63,7 +57,7 @@ l_yes:
.long \target - .Lstatic_jump_after_\@
.Lstatic_jump_after_\@:
.else
- .byte STATIC_KEY_INIT_NOP
+ .byte BYTES_NOP5
.endif
.pushsection __jump_table, "aw"
_ASM_ALIGN
@@ -75,7 +69,7 @@ l_yes:
.macro STATIC_JUMP_IF_FALSE target, key, def
.Lstatic_jump_\@:
.if \def
- .byte STATIC_KEY_INIT_NOP
+ .byte BYTES_NOP5
.else
/* Equivalent to "jmp.d32 \target" */
.byte 0xe9
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 6802c59e8252..0a6e34b07017 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -150,11 +150,6 @@ struct kimage_arch {
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
-
- /* Core ELF header buffer */
- void *elf_headers;
- unsigned long elf_headers_sz;
- unsigned long elf_load_addr;
};
#endif /* CONFIG_X86_32 */
diff --git a/arch/x86/include/asm/kfence.h b/arch/x86/include/asm/kfence.h
index 97bbb4a9083a..05b48b33baf0 100644
--- a/arch/x86/include/asm/kfence.h
+++ b/arch/x86/include/asm/kfence.h
@@ -56,8 +56,13 @@ static inline bool kfence_protect_page(unsigned long addr, bool protect)
else
set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
- /* Flush this CPU's TLB. */
+ /*
+ * Flush this CPU's TLB, assuming whoever did the allocation/free is
+ * likely to continue running on this CPU.
+ */
+ preempt_disable();
flush_tlb_one_kernel(addr);
+ preempt_enable();
return true;
}
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index d20a3d6be36e..bd7f5886a789 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -65,10 +65,22 @@ struct arch_specific_insn {
* a post_handler).
*/
unsigned boostable:1;
- unsigned if_modifier:1;
- unsigned is_call:1;
- unsigned is_pushf:1;
- unsigned is_abs_ip:1;
+ unsigned char size; /* The size of insn */
+ union {
+ unsigned char opcode;
+ struct {
+ unsigned char type;
+ } jcc;
+ struct {
+ unsigned char type;
+ unsigned char asize;
+ } loop;
+ struct {
+ unsigned char reg;
+ } indirect;
+ };
+ s32 rel32; /* relative offset must be s32, s16, or s8 */
+ void (*emulate_op)(struct kprobe *p, struct pt_regs *regs);
/* Number of bytes of text poked */
int tp_len;
};
@@ -107,7 +119,6 @@ extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
extern int kprobe_exceptions_notify(struct notifier_block *self,
unsigned long val, void *data);
extern int kprobe_int3_handler(struct pt_regs *regs);
-extern int kprobe_debug_handler(struct pt_regs *regs);
#else
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3768819693e5..55efbacfc244 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -113,6 +113,7 @@
#define VALID_PAGE(x) ((x) != INVALID_PAGE)
#define UNMAPPED_GVA (~(gpa_t)0)
+#define INVALID_GPA (~(gpa_t)0)
/* KVM Hugepage definitions for x86 */
#define KVM_MAX_HUGEPAGE_LEVEL PG_LEVEL_1G
@@ -199,6 +200,7 @@ enum x86_intercept_stage;
#define KVM_NR_DB_REGS 4
+#define DR6_BUS_LOCK (1 << 11)
#define DR6_BD (1 << 13)
#define DR6_BS (1 << 14)
#define DR6_BT (1 << 15)
@@ -212,7 +214,7 @@ enum x86_intercept_stage;
* DR6_ACTIVE_LOW is also used as the init/reset value for DR6.
*/
#define DR6_ACTIVE_LOW 0xffff0ff0
-#define DR6_VOLATILE 0x0001e00f
+#define DR6_VOLATILE 0x0001e80f
#define DR6_FIXED_1 (DR6_ACTIVE_LOW & ~DR6_VOLATILE)
#define DR7_BP_EN_MASK 0x000000ff
@@ -221,12 +223,22 @@ enum x86_intercept_stage;
#define DR7_FIXED_1 0x00000400
#define DR7_VOLATILE 0xffff2bff
+#define KVM_GUESTDBG_VALID_MASK \
+ (KVM_GUESTDBG_ENABLE | \
+ KVM_GUESTDBG_SINGLESTEP | \
+ KVM_GUESTDBG_USE_HW_BP | \
+ KVM_GUESTDBG_USE_SW_BP | \
+ KVM_GUESTDBG_INJECT_BP | \
+ KVM_GUESTDBG_INJECT_DB)
+
+
#define PFERR_PRESENT_BIT 0
#define PFERR_WRITE_BIT 1
#define PFERR_USER_BIT 2
#define PFERR_RSVD_BIT 3
#define PFERR_FETCH_BIT 4
#define PFERR_PK_BIT 5
+#define PFERR_SGX_BIT 15
#define PFERR_GUEST_FINAL_BIT 32
#define PFERR_GUEST_PAGE_BIT 33
@@ -236,6 +248,7 @@ enum x86_intercept_stage;
#define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT)
#define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT)
#define PFERR_PK_MASK (1U << PFERR_PK_BIT)
+#define PFERR_SGX_MASK (1U << PFERR_SGX_BIT)
#define PFERR_GUEST_FINAL_MASK (1ULL << PFERR_GUEST_FINAL_BIT)
#define PFERR_GUEST_PAGE_MASK (1ULL << PFERR_GUEST_PAGE_BIT)
@@ -396,7 +409,7 @@ struct kvm_mmu {
u32 pkru_mask;
u64 *pae_root;
- u64 *lm_root;
+ u64 *pml4_root;
/*
* check zero bits on shadow page table entries, these
@@ -1054,6 +1067,9 @@ struct kvm_arch {
u32 user_space_msr_mask;
struct kvm_x86_msr_filter __rcu *msr_filter;
+ /* Guest can access the SGX PROVISIONKEY. */
+ bool sgx_provisioning_allowed;
+
struct kvm_pmu_event_filter __rcu *pmu_event_filter;
struct task_struct *nx_lpage_recovery_thread;
@@ -1068,25 +1084,36 @@ struct kvm_arch {
bool tdp_mmu_enabled;
/*
- * List of struct kvmp_mmu_pages being used as roots.
+ * List of struct kvm_mmu_pages being used as roots.
* All struct kvm_mmu_pages in the list should have
* tdp_mmu_page set.
- * All struct kvm_mmu_pages in the list should have a positive
- * root_count except when a thread holds the MMU lock and is removing
- * an entry from the list.
+ *
+ * For reads, this list is protected by:
+ * the MMU lock in read mode + RCU or
+ * the MMU lock in write mode
+ *
+ * For writes, this list is protected by:
+ * the MMU lock in read mode + the tdp_mmu_pages_lock or
+ * the MMU lock in write mode
+ *
+ * Roots will remain in the list until their tdp_mmu_root_count
+ * drops to zero, at which point the thread that decremented the
+ * count to zero should removed the root from the list and clean
+ * it up, freeing the root after an RCU grace period.
*/
struct list_head tdp_mmu_roots;
/*
* List of struct kvmp_mmu_pages not being used as roots.
* All struct kvm_mmu_pages in the list should have
- * tdp_mmu_page set and a root_count of 0.
+ * tdp_mmu_page set and a tdp_mmu_root_count of 0.
*/
struct list_head tdp_mmu_pages;
/*
* Protects accesses to the following fields when the MMU lock
* is held in read mode:
+ * - tdp_mmu_roots (above)
* - tdp_mmu_pages (above)
* - the link field of struct kvm_mmu_pages used by the TDP MMU
* - lpage_disallowed_mmu_pages
@@ -1143,6 +1170,9 @@ struct kvm_vcpu_stat {
u64 req_event;
u64 halt_poll_success_ns;
u64 halt_poll_fail_ns;
+ u64 nested_run;
+ u64 directed_yield_attempted;
+ u64 directed_yield_successful;
};
struct x86_instruction_info;
@@ -1269,8 +1299,8 @@ struct kvm_x86_ops {
int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr);
u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
- void (*load_mmu_pgd)(struct kvm_vcpu *vcpu, unsigned long pgd,
- int pgd_level);
+ void (*load_mmu_pgd)(struct kvm_vcpu *vcpu, hpa_t root_hpa,
+ int root_level);
bool (*has_wbinvd_exit)(void);
@@ -1339,6 +1369,7 @@ struct kvm_x86_ops {
int (*mem_enc_op)(struct kvm *kvm, void __user *argp);
int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
+ int (*vm_copy_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
int (*get_msr_feature)(struct kvm_msr_entry *entry);
@@ -1357,6 +1388,7 @@ struct kvm_x86_ops {
struct kvm_x86_nested_ops {
int (*check_events)(struct kvm_vcpu *vcpu);
bool (*hv_timer_pending)(struct kvm_vcpu *vcpu);
+ void (*triple_fault)(struct kvm_vcpu *vcpu);
int (*get_state)(struct kvm_vcpu *vcpu,
struct kvm_nested_state __user *user_kvm_nested_state,
unsigned user_data_size);
@@ -1387,6 +1419,7 @@ struct kvm_arch_async_pf {
bool direct_map;
};
+extern u32 __read_mostly kvm_nr_uret_msrs;
extern u64 __read_mostly host_efer;
extern bool __read_mostly allow_smaller_maxphyaddr;
extern struct kvm_x86_ops kvm_x86_ops;
@@ -1428,9 +1461,6 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
int kvm_mmu_create(struct kvm_vcpu *vcpu);
void kvm_mmu_init_vm(struct kvm *kvm);
void kvm_mmu_uninit_vm(struct kvm *kvm);
-void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
- u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask,
- u64 acc_track_mask, u64 me_mask);
void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
@@ -1440,8 +1470,6 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
const struct kvm_memory_slot *memslot);
void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
struct kvm_memory_slot *memslot);
-void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm,
- struct kvm_memory_slot *memslot);
void kvm_mmu_zap_all(struct kvm *kvm);
void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen);
unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm);
@@ -1488,7 +1516,7 @@ extern u64 kvm_mce_cap_supported;
/*
* EMULTYPE_NO_DECODE - Set when re-emulating an instruction (after completing
* userspace I/O) to indicate that the emulation context
- * should be resued as is, i.e. skip initialization of
+ * should be reused as is, i.e. skip initialization of
* emulation context, instruction fetch and decode.
*
* EMULTYPE_TRAP_UD - Set when emulating an intercepted #UD from hardware.
@@ -1513,7 +1541,7 @@ extern u64 kvm_mce_cap_supported;
*
* EMULTYPE_VMWARE_GP - Set when emulating an intercepted #GP for VMware
* backdoor emulation, which is opt in via module param.
- * VMware backoor emulation handles select instructions
+ * VMware backdoor emulation handles select instructions
* and reinjects the #GP for all other cases.
*
* EMULTYPE_PF - Set when emulating MMIO by way of an intercepted #PF, in which
@@ -1538,6 +1566,11 @@ int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data);
int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data);
int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu);
int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu);
+int kvm_emulate_as_nop(struct kvm_vcpu *vcpu);
+int kvm_emulate_invd(struct kvm_vcpu *vcpu);
+int kvm_emulate_mwait(struct kvm_vcpu *vcpu);
+int kvm_handle_invalid_op(struct kvm_vcpu *vcpu);
+int kvm_emulate_monitor(struct kvm_vcpu *vcpu);
int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in);
int kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
@@ -1566,14 +1599,14 @@ void kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val);
unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu);
void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw);
void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l);
-int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr);
+int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu);
int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr);
int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr);
unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu);
void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
-bool kvm_rdpmc(struct kvm_vcpu *vcpu);
+int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu);
void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr);
void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
@@ -1614,9 +1647,6 @@ void kvm_update_dr7(struct kvm_vcpu *vcpu);
int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
-int kvm_mmu_load(struct kvm_vcpu *vcpu);
-void kvm_mmu_unload(struct kvm_vcpu *vcpu);
-void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
ulong roots_to_free);
gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
@@ -1735,11 +1765,7 @@ asmlinkage void kvm_spurious_fault(void);
_ASM_EXTABLE(666b, 667b)
#define KVM_ARCH_WANT_MMU_NOTIFIER
-int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end,
- unsigned flags);
-int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
-int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
-int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
+
int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
int kvm_cpu_has_extint(struct kvm_vcpu *v);
@@ -1752,9 +1778,15 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
unsigned long ipi_bitmap_high, u32 min,
unsigned long icr, int op_64_bit);
-void kvm_define_user_return_msr(unsigned index, u32 msr);
+int kvm_add_user_return_msr(u32 msr);
+int kvm_find_user_return_msr(u32 msr);
int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask);
+static inline bool kvm_is_supported_user_return_msr(u32 msr)
+{
+ return kvm_find_user_return_msr(msr) >= 0;
+}
+
u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc);
u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc);
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 338119852512..69299878b200 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -7,8 +7,6 @@
#include <linux/interrupt.h>
#include <uapi/asm/kvm_para.h>
-extern void kvmclock_init(void);
-
#ifdef CONFIG_KVM_GUEST
bool kvm_check_and_clear_guest_paused(void);
#else
@@ -86,13 +84,14 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
}
#ifdef CONFIG_KVM_GUEST
+void kvmclock_init(void);
+void kvmclock_disable(void);
bool kvm_para_available(void);
unsigned int kvm_arch_para_features(void);
unsigned int kvm_arch_para_hints(void);
void kvm_async_pf_task_wait_schedule(u32 token);
void kvm_async_pf_task_wake(u32 token);
u32 kvm_read_and_reset_apf_flags(void);
-void kvm_disable_steal_time(void);
bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token);
DECLARE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
@@ -137,11 +136,6 @@ static inline u32 kvm_read_and_reset_apf_flags(void)
return 0;
}
-static inline void kvm_disable_steal_time(void)
-{
- return;
-}
-
static __always_inline bool kvm_handle_async_pf(struct pt_regs *regs, u32 token)
{
return false;
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index 31c4df123aa0..9c80c68d75b5 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -20,7 +20,6 @@
extern u64 sme_me_mask;
extern u64 sev_status;
-extern bool sev_enabled;
void sme_encrypt_execute(unsigned long encrypted_kernel_vaddr,
unsigned long decrypted_kernel_vaddr,
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index ccf60a809a17..67ff0d637e55 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -9,70 +9,29 @@
#include <asm/hyperv-tlfs.h>
#include <asm/nospec-branch.h>
#include <asm/paravirt.h>
+#include <asm/mshyperv.h>
typedef int (*hyperv_fill_flush_list_func)(
struct hv_guest_mapping_flush_list *flush,
void *data);
-#define hv_init_timer(timer, tick) \
- wrmsrl(HV_X64_MSR_STIMER0_COUNT + (2*timer), tick)
-#define hv_init_timer_config(timer, val) \
- wrmsrl(HV_X64_MSR_STIMER0_CONFIG + (2*timer), val)
-
-#define hv_get_simp(val) rdmsrl(HV_X64_MSR_SIMP, val)
-#define hv_set_simp(val) wrmsrl(HV_X64_MSR_SIMP, val)
-
-#define hv_get_siefp(val) rdmsrl(HV_X64_MSR_SIEFP, val)
-#define hv_set_siefp(val) wrmsrl(HV_X64_MSR_SIEFP, val)
-
-#define hv_get_synic_state(val) rdmsrl(HV_X64_MSR_SCONTROL, val)
-#define hv_set_synic_state(val) wrmsrl(HV_X64_MSR_SCONTROL, val)
-
-#define hv_get_vp_index(index) rdmsrl(HV_X64_MSR_VP_INDEX, index)
-
-#define hv_signal_eom() wrmsrl(HV_X64_MSR_EOM, 0)
-
-#define hv_get_synint_state(int_num, val) \
- rdmsrl(HV_X64_MSR_SINT0 + int_num, val)
-#define hv_set_synint_state(int_num, val) \
- wrmsrl(HV_X64_MSR_SINT0 + int_num, val)
-#define hv_recommend_using_aeoi() \
- (!(ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED))
+static inline void hv_set_register(unsigned int reg, u64 value)
+{
+ wrmsrl(reg, value);
+}
-#define hv_get_crash_ctl(val) \
- rdmsrl(HV_X64_MSR_CRASH_CTL, val)
+static inline u64 hv_get_register(unsigned int reg)
+{
+ u64 value;
-#define hv_get_time_ref_count(val) \
- rdmsrl(HV_X64_MSR_TIME_REF_COUNT, val)
+ rdmsrl(reg, value);
+ return value;
+}
-#define hv_get_reference_tsc(val) \
- rdmsrl(HV_X64_MSR_REFERENCE_TSC, val)
-#define hv_set_reference_tsc(val) \
- wrmsrl(HV_X64_MSR_REFERENCE_TSC, val)
-#define hv_set_clocksource_vdso(val) \
- ((val).vdso_clock_mode = VDSO_CLOCKMODE_HVCLOCK)
-#define hv_enable_vdso_clocksource() \
- vclocks_set_used(VDSO_CLOCKMODE_HVCLOCK);
#define hv_get_raw_timer() rdtsc_ordered()
-#define hv_get_vector() HYPERVISOR_CALLBACK_VECTOR
-
-/*
- * Reference to pv_ops must be inline so objtool
- * detection of noinstr violations can work correctly.
- */
-static __always_inline void hv_setup_sched_clock(void *sched_clock)
-{
-#ifdef CONFIG_PARAVIRT
- pv_ops.time.sched_clock = sched_clock;
-#endif
-}
void hyperv_vector_handler(struct pt_regs *regs);
-static inline void hv_enable_stimer0_percpu_irq(int irq) {}
-static inline void hv_disable_stimer0_percpu_irq(int irq) {}
-
-
#if IS_ENABLED(CONFIG_HYPERV)
extern int hyperv_init_cpuhp;
@@ -189,38 +148,6 @@ static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
return hv_status;
}
-/*
- * Rep hypercalls. Callers of this functions are supposed to ensure that
- * rep_count and varhead_size comply with Hyper-V hypercall definition.
- */
-static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size,
- void *input, void *output)
-{
- u64 control = code;
- u64 status;
- u16 rep_comp;
-
- control |= (u64)varhead_size << HV_HYPERCALL_VARHEAD_OFFSET;
- control |= (u64)rep_count << HV_HYPERCALL_REP_COMP_OFFSET;
-
- do {
- status = hv_do_hypercall(control, input, output);
- if ((status & HV_HYPERCALL_RESULT_MASK) != HV_STATUS_SUCCESS)
- return status;
-
- /* Bits 32-43 of status have 'Reps completed' data. */
- rep_comp = (status & HV_HYPERCALL_REP_COMP_MASK) >>
- HV_HYPERCALL_REP_COMP_OFFSET;
-
- control &= ~HV_HYPERCALL_REP_START_MASK;
- control |= (u64)rep_comp << HV_HYPERCALL_REP_START_OFFSET;
-
- touch_nmi_watchdog();
- } while (rep_comp < rep_count);
-
- return status;
-}
-
extern struct hv_vp_assist_page **hv_vp_assist_page;
static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu)
@@ -233,9 +160,6 @@ static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu)
void __init hyperv_init(void);
void hyperv_setup_mmu_ops(void);
-void *hv_alloc_hyperv_page(void);
-void *hv_alloc_hyperv_zeroed_page(void);
-void hv_free_hyperv_page(unsigned long addr);
void set_hv_tscchange_cb(void (*cb)(void));
void clear_hv_tscchange_cb(void);
void hyperv_stop_tsc_emulation(void);
@@ -272,8 +196,6 @@ int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry);
#else /* CONFIG_HYPERV */
static inline void hyperv_init(void) {}
static inline void hyperv_setup_mmu_ops(void) {}
-static inline void *hv_alloc_hyperv_page(void) { return NULL; }
-static inline void hv_free_hyperv_page(unsigned long addr) {}
static inline void set_hv_tscchange_cb(void (*cb)(void)) {}
static inline void clear_hv_tscchange_cb(void) {}
static inline void hyperv_stop_tsc_emulation(void) {};
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 546d6ecf0a35..742d89a00721 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -185,6 +185,9 @@
#define MSR_PEBS_DATA_CFG 0x000003f2
#define MSR_IA32_DS_AREA 0x00000600
#define MSR_IA32_PERF_CAPABILITIES 0x00000345
+#define PERF_CAP_METRICS_IDX 15
+#define PERF_CAP_PT_IDX 16
+
#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6
#define MSR_IA32_RTIT_CTL 0x00000570
@@ -265,6 +268,7 @@
#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */
#define DEBUGCTLMSR_BTF_SHIFT 1
#define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */
+#define DEBUGCTLMSR_BUS_LOCK_DETECT (1UL << 2)
#define DEBUGCTLMSR_TR (1UL << 6)
#define DEBUGCTLMSR_BTS (1UL << 7)
#define DEBUGCTLMSR_BTINT (1UL << 8)
@@ -628,8 +632,6 @@
#define MSR_IA32_APICBASE_ENABLE (1<<11)
#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
-#define MSR_IA32_TSCDEADLINE 0x000006e0
-
#define MSR_IA32_UCODE_WRITE 0x00000079
#define MSR_IA32_UCODE_REV 0x0000008b
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index e16cccdd0420..a3f87f1015d3 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -324,10 +324,6 @@ static inline int wrmsrl_safe(u32 msr, u64 val)
return wrmsr_safe(msr, (u32)val, (u32)(val >> 32));
}
-#define write_tsc(low, high) wrmsr(MSR_IA32_TSC, (low), (high))
-
-#define write_rdtscp_aux(val) wrmsr(MSR_TSC_AUX, (val), 0)
-
struct msr *msrs_alloc(void);
void msrs_free(struct msr *msrs);
int msr_set_bit(u32 msr, u8 bit);
diff --git a/arch/x86/include/asm/nops.h b/arch/x86/include/asm/nops.h
index 12f12b5cf2ca..c1e5e818ba16 100644
--- a/arch/x86/include/asm/nops.h
+++ b/arch/x86/include/asm/nops.h
@@ -4,89 +4,58 @@
/*
* Define nops for use with alternative() and for tracing.
- *
- * *_NOP5_ATOMIC must be a single instruction.
*/
-#define NOP_DS_PREFIX 0x3e
+#ifndef CONFIG_64BIT
-/* generic versions from gas
- 1: nop
- the following instructions are NOT nops in 64-bit mode,
- for 64-bit mode use K8 or P6 nops instead
- 2: movl %esi,%esi
- 3: leal 0x00(%esi),%esi
- 4: leal 0x00(,%esi,1),%esi
- 6: leal 0x00000000(%esi),%esi
- 7: leal 0x00000000(,%esi,1),%esi
-*/
-#define GENERIC_NOP1 0x90
-#define GENERIC_NOP2 0x89,0xf6
-#define GENERIC_NOP3 0x8d,0x76,0x00
-#define GENERIC_NOP4 0x8d,0x74,0x26,0x00
-#define GENERIC_NOP5 GENERIC_NOP1,GENERIC_NOP4
-#define GENERIC_NOP6 0x8d,0xb6,0x00,0x00,0x00,0x00
-#define GENERIC_NOP7 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00
-#define GENERIC_NOP8 GENERIC_NOP1,GENERIC_NOP7
-#define GENERIC_NOP5_ATOMIC NOP_DS_PREFIX,GENERIC_NOP4
+/*
+ * Generic 32bit nops from GAS:
+ *
+ * 1: nop
+ * 2: movl %esi,%esi
+ * 3: leal 0x0(%esi),%esi
+ * 4: leal 0x0(%esi,%eiz,1),%esi
+ * 5: leal %ds:0x0(%esi,%eiz,1),%esi
+ * 6: leal 0x0(%esi),%esi
+ * 7: leal 0x0(%esi,%eiz,1),%esi
+ * 8: leal %ds:0x0(%esi,%eiz,1),%esi
+ *
+ * Except 5 and 8, which are DS prefixed 4 and 7 resp, where GAS would emit 2
+ * nop instructions.
+ */
+#define BYTES_NOP1 0x90
+#define BYTES_NOP2 0x89,0xf6
+#define BYTES_NOP3 0x8d,0x76,0x00
+#define BYTES_NOP4 0x8d,0x74,0x26,0x00
+#define BYTES_NOP5 0x3e,BYTES_NOP4
+#define BYTES_NOP6 0x8d,0xb6,0x00,0x00,0x00,0x00
+#define BYTES_NOP7 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00
+#define BYTES_NOP8 0x3e,BYTES_NOP7
-/* Opteron 64bit nops
- 1: nop
- 2: osp nop
- 3: osp osp nop
- 4: osp osp osp nop
-*/
-#define K8_NOP1 GENERIC_NOP1
-#define K8_NOP2 0x66,K8_NOP1
-#define K8_NOP3 0x66,K8_NOP2
-#define K8_NOP4 0x66,K8_NOP3
-#define K8_NOP5 K8_NOP3,K8_NOP2
-#define K8_NOP6 K8_NOP3,K8_NOP3
-#define K8_NOP7 K8_NOP4,K8_NOP3
-#define K8_NOP8 K8_NOP4,K8_NOP4
-#define K8_NOP5_ATOMIC 0x66,K8_NOP4
+#else
-/* K7 nops
- uses eax dependencies (arbitrary choice)
- 1: nop
- 2: movl %eax,%eax
- 3: leal (,%eax,1),%eax
- 4: leal 0x00(,%eax,1),%eax
- 6: leal 0x00000000(%eax),%eax
- 7: leal 0x00000000(,%eax,1),%eax
-*/
-#define K7_NOP1 GENERIC_NOP1
-#define K7_NOP2 0x8b,0xc0
-#define K7_NOP3 0x8d,0x04,0x20
-#define K7_NOP4 0x8d,0x44,0x20,0x00
-#define K7_NOP5 K7_NOP4,K7_NOP1
-#define K7_NOP6 0x8d,0x80,0,0,0,0
-#define K7_NOP7 0x8D,0x04,0x05,0,0,0,0
-#define K7_NOP8 K7_NOP7,K7_NOP1
-#define K7_NOP5_ATOMIC NOP_DS_PREFIX,K7_NOP4
+/*
+ * Generic 64bit nops from GAS:
+ *
+ * 1: nop
+ * 2: osp nop
+ * 3: nopl (%eax)
+ * 4: nopl 0x00(%eax)
+ * 5: nopl 0x00(%eax,%eax,1)
+ * 6: osp nopl 0x00(%eax,%eax,1)
+ * 7: nopl 0x00000000(%eax)
+ * 8: nopl 0x00000000(%eax,%eax,1)
+ */
+#define BYTES_NOP1 0x90
+#define BYTES_NOP2 0x66,BYTES_NOP1
+#define BYTES_NOP3 0x0f,0x1f,0x00
+#define BYTES_NOP4 0x0f,0x1f,0x40,0x00
+#define BYTES_NOP5 0x0f,0x1f,0x44,0x00,0x00
+#define BYTES_NOP6 0x66,BYTES_NOP5
+#define BYTES_NOP7 0x0f,0x1f,0x80,0x00,0x00,0x00,0x00
+#define BYTES_NOP8 0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00
-/* P6 nops
- uses eax dependencies (Intel-recommended choice)
- 1: nop
- 2: osp nop
- 3: nopl (%eax)
- 4: nopl 0x00(%eax)
- 5: nopl 0x00(%eax,%eax,1)
- 6: osp nopl 0x00(%eax,%eax,1)
- 7: nopl 0x00000000(%eax)
- 8: nopl 0x00000000(%eax,%eax,1)
- Note: All the above are assumed to be a single instruction.
- There is kernel code that depends on this.
-*/
-#define P6_NOP1 GENERIC_NOP1
-#define P6_NOP2 0x66,0x90
-#define P6_NOP3 0x0f,0x1f,0x00
-#define P6_NOP4 0x0f,0x1f,0x40,0
-#define P6_NOP5 0x0f,0x1f,0x44,0x00,0
-#define P6_NOP6 0x66,0x0f,0x1f,0x44,0x00,0
-#define P6_NOP7 0x0f,0x1f,0x80,0,0,0,0
-#define P6_NOP8 0x0f,0x1f,0x84,0x00,0,0,0,0
-#define P6_NOP5_ATOMIC P6_NOP5
+#endif /* CONFIG_64BIT */
#ifdef __ASSEMBLY__
#define _ASM_MK_NOP(x) .byte x
@@ -94,54 +63,19 @@
#define _ASM_MK_NOP(x) ".byte " __stringify(x) "\n"
#endif
-#if defined(CONFIG_MK7)
-#define ASM_NOP1 _ASM_MK_NOP(K7_NOP1)
-#define ASM_NOP2 _ASM_MK_NOP(K7_NOP2)
-#define ASM_NOP3 _ASM_MK_NOP(K7_NOP3)
-#define ASM_NOP4 _ASM_MK_NOP(K7_NOP4)
-#define ASM_NOP5 _ASM_MK_NOP(K7_NOP5)
-#define ASM_NOP6 _ASM_MK_NOP(K7_NOP6)
-#define ASM_NOP7 _ASM_MK_NOP(K7_NOP7)
-#define ASM_NOP8 _ASM_MK_NOP(K7_NOP8)
-#define ASM_NOP5_ATOMIC _ASM_MK_NOP(K7_NOP5_ATOMIC)
-#elif defined(CONFIG_X86_P6_NOP)
-#define ASM_NOP1 _ASM_MK_NOP(P6_NOP1)
-#define ASM_NOP2 _ASM_MK_NOP(P6_NOP2)
-#define ASM_NOP3 _ASM_MK_NOP(P6_NOP3)
-#define ASM_NOP4 _ASM_MK_NOP(P6_NOP4)
-#define ASM_NOP5 _ASM_MK_NOP(P6_NOP5)
-#define ASM_NOP6 _ASM_MK_NOP(P6_NOP6)
-#define ASM_NOP7 _ASM_MK_NOP(P6_NOP7)
-#define ASM_NOP8 _ASM_MK_NOP(P6_NOP8)
-#define ASM_NOP5_ATOMIC _ASM_MK_NOP(P6_NOP5_ATOMIC)
-#elif defined(CONFIG_X86_64)
-#define ASM_NOP1 _ASM_MK_NOP(K8_NOP1)
-#define ASM_NOP2 _ASM_MK_NOP(K8_NOP2)
-#define ASM_NOP3 _ASM_MK_NOP(K8_NOP3)
-#define ASM_NOP4 _ASM_MK_NOP(K8_NOP4)
-#define ASM_NOP5 _ASM_MK_NOP(K8_NOP5)
-#define ASM_NOP6 _ASM_MK_NOP(K8_NOP6)
-#define ASM_NOP7 _ASM_MK_NOP(K8_NOP7)
-#define ASM_NOP8 _ASM_MK_NOP(K8_NOP8)
-#define ASM_NOP5_ATOMIC _ASM_MK_NOP(K8_NOP5_ATOMIC)
-#else
-#define ASM_NOP1 _ASM_MK_NOP(GENERIC_NOP1)
-#define ASM_NOP2 _ASM_MK_NOP(GENERIC_NOP2)
-#define ASM_NOP3 _ASM_MK_NOP(GENERIC_NOP3)
-#define ASM_NOP4 _ASM_MK_NOP(GENERIC_NOP4)
-#define ASM_NOP5 _ASM_MK_NOP(GENERIC_NOP5)
-#define ASM_NOP6 _ASM_MK_NOP(GENERIC_NOP6)
-#define ASM_NOP7 _ASM_MK_NOP(GENERIC_NOP7)
-#define ASM_NOP8 _ASM_MK_NOP(GENERIC_NOP8)
-#define ASM_NOP5_ATOMIC _ASM_MK_NOP(GENERIC_NOP5_ATOMIC)
-#endif
+#define ASM_NOP1 _ASM_MK_NOP(BYTES_NOP1)
+#define ASM_NOP2 _ASM_MK_NOP(BYTES_NOP2)
+#define ASM_NOP3 _ASM_MK_NOP(BYTES_NOP3)
+#define ASM_NOP4 _ASM_MK_NOP(BYTES_NOP4)
+#define ASM_NOP5 _ASM_MK_NOP(BYTES_NOP5)
+#define ASM_NOP6 _ASM_MK_NOP(BYTES_NOP6)
+#define ASM_NOP7 _ASM_MK_NOP(BYTES_NOP7)
+#define ASM_NOP8 _ASM_MK_NOP(BYTES_NOP8)
#define ASM_NOP_MAX 8
-#define NOP_ATOMIC5 (ASM_NOP_MAX+1) /* Entry for the 5-byte atomic NOP */
#ifndef __ASSEMBLY__
-extern const unsigned char * const *ideal_nops;
-extern void arch_init_ideal_nops(void);
+extern const unsigned char * const x86_nops[];
#endif
#endif /* _ASM_X86_NOPS_H */
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index cb9ad6b73973..3ad8c6d3cbb3 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -7,7 +7,6 @@
#include <linux/objtool.h>
#include <asm/alternative.h>
-#include <asm/alternative-asm.h>
#include <asm/cpufeatures.h>
#include <asm/msr-index.h>
#include <asm/unwind_hints.h>
@@ -33,7 +32,7 @@
/*
* Google experimented with loop-unrolling and this turned out to be
- * the optimal version — two calls, each with their own speculation
+ * the optimal version - two calls, each with their own speculation
* trap should their return address end up getting used, in a loop.
*/
#define __FILL_RETURN_BUFFER(reg, nr, sp) \
@@ -81,7 +80,7 @@
.macro JMP_NOSPEC reg:req
#ifdef CONFIG_RETPOLINE
ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
- __stringify(jmp __x86_retpoline_\reg), X86_FEATURE_RETPOLINE, \
+ __stringify(jmp __x86_indirect_thunk_\reg), X86_FEATURE_RETPOLINE, \
__stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_AMD
#else
jmp *%\reg
@@ -91,7 +90,7 @@
.macro CALL_NOSPEC reg:req
#ifdef CONFIG_RETPOLINE
ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *%\reg), \
- __stringify(call __x86_retpoline_\reg), X86_FEATURE_RETPOLINE, \
+ __stringify(call __x86_indirect_thunk_\reg), X86_FEATURE_RETPOLINE, \
__stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *%\reg), X86_FEATURE_RETPOLINE_AMD
#else
call *%\reg
@@ -129,7 +128,7 @@
ALTERNATIVE_2( \
ANNOTATE_RETPOLINE_SAFE \
"call *%[thunk_target]\n", \
- "call __x86_retpoline_%V[thunk_target]\n", \
+ "call __x86_indirect_thunk_%V[thunk_target]\n", \
X86_FEATURE_RETPOLINE, \
"lfence;\n" \
ANNOTATE_RETPOLINE_SAFE \
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index 939b1cff4a7b..ca840fec7776 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -56,6 +56,39 @@ static inline void clear_page(void *page)
void copy_page(void *to, void *from);
+#ifdef CONFIG_X86_5LEVEL
+/*
+ * User space process size. This is the first address outside the user range.
+ * There are a few constraints that determine this:
+ *
+ * On Intel CPUs, if a SYSCALL instruction is at the highest canonical
+ * address, then that syscall will enter the kernel with a
+ * non-canonical return address, and SYSRET will explode dangerously.
+ * We avoid this particular problem by preventing anything
+ * from being mapped at the maximum canonical address.
+ *
+ * On AMD CPUs in the Ryzen family, there's a nasty bug in which the
+ * CPUs malfunction if they execute code from the highest canonical page.
+ * They'll speculate right off the end of the canonical space, and
+ * bad things happen. This is worked around in the same way as the
+ * Intel problem.
+ *
+ * With page table isolation enabled, we map the LDT in ... [stay tuned]
+ */
+static inline unsigned long task_size_max(void)
+{
+ unsigned long ret;
+
+ alternative_io("movq %[small],%0","movq %[large],%0",
+ X86_FEATURE_LA57,
+ "=r" (ret),
+ [small] "i" ((1ul << 47)-PAGE_SIZE),
+ [large] "i" ((1ul << 56)-PAGE_SIZE));
+
+ return ret;
+}
+#endif /* CONFIG_X86_5LEVEL */
+
#endif /* !__ASSEMBLY__ */
#ifdef CONFIG_X86_VSYSCALL_EMULATION
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 64297eabad63..a8d4ad856568 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -55,30 +55,13 @@
#ifdef CONFIG_X86_5LEVEL
#define __VIRTUAL_MASK_SHIFT (pgtable_l5_enabled() ? 56 : 47)
+/* See task_size_max() in <asm/page_64.h> */
#else
#define __VIRTUAL_MASK_SHIFT 47
+#define task_size_max() ((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE)
#endif
-/*
- * User space process size. This is the first address outside the user range.
- * There are a few constraints that determine this:
- *
- * On Intel CPUs, if a SYSCALL instruction is at the highest canonical
- * address, then that syscall will enter the kernel with a
- * non-canonical return address, and SYSRET will explode dangerously.
- * We avoid this particular problem by preventing anything
- * from being mapped at the maximum canonical address.
- *
- * On AMD CPUs in the Ryzen family, there's a nasty bug in which the
- * CPUs malfunction if they execute code from the highest canonical page.
- * They'll speculate right off the end of the canonical space, and
- * bad things happen. This is worked around in the same way as the
- * Intel problem.
- *
- * With page table isolation enabled, we map the LDT in ... [stay tuned]
- */
-#define TASK_SIZE_MAX ((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE)
-
+#define TASK_SIZE_MAX task_size_max()
#define DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE)
/* This decides where the kernel will search for a free chunk of vm
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 4abf110e2243..da3a1ac82be5 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -15,11 +15,20 @@
#include <linux/bug.h>
#include <linux/types.h>
#include <linux/cpumask.h>
+#include <linux/static_call_types.h>
#include <asm/frame.h>
-static inline unsigned long long paravirt_sched_clock(void)
+u64 dummy_steal_clock(int cpu);
+u64 dummy_sched_clock(void);
+
+DECLARE_STATIC_CALL(pv_steal_clock, dummy_steal_clock);
+DECLARE_STATIC_CALL(pv_sched_clock, dummy_sched_clock);
+
+void paravirt_set_sched_clock(u64 (*func)(void));
+
+static inline u64 paravirt_sched_clock(void)
{
- return PVOP_CALL0(unsigned long long, time.sched_clock);
+ return static_call(pv_sched_clock)();
}
struct static_key;
@@ -33,9 +42,13 @@ bool pv_is_native_vcpu_is_preempted(void);
static inline u64 paravirt_steal_clock(int cpu)
{
- return PVOP_CALL1(u64, time.steal_clock, cpu);
+ return static_call(pv_steal_clock)(cpu);
}
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+void __init paravirt_set_cap(void);
+#endif
+
/* The paravirtualized I/O functions */
static inline void slow_down_io(void)
{
@@ -50,7 +63,7 @@ static inline void slow_down_io(void)
void native_flush_tlb_local(void);
void native_flush_tlb_global(void);
void native_flush_tlb_one_user(unsigned long addr);
-void native_flush_tlb_others(const struct cpumask *cpumask,
+void native_flush_tlb_multi(const struct cpumask *cpumask,
const struct flush_tlb_info *info);
static inline void __flush_tlb_local(void)
@@ -68,10 +81,10 @@ static inline void __flush_tlb_one_user(unsigned long addr)
PVOP_VCALL1(mmu.flush_tlb_one_user, addr);
}
-static inline void __flush_tlb_others(const struct cpumask *cpumask,
+static inline void __flush_tlb_multi(const struct cpumask *cpumask,
const struct flush_tlb_info *info)
{
- PVOP_VCALL2(mmu.flush_tlb_others, cpumask, info);
+ PVOP_VCALL2(mmu.flush_tlb_multi, cpumask, info);
}
static inline void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table)
@@ -122,7 +135,9 @@ static inline void write_cr0(unsigned long x)
static inline unsigned long read_cr2(void)
{
- return PVOP_CALLEE0(unsigned long, mmu.read_cr2);
+ return PVOP_ALT_CALLEE0(unsigned long, mmu.read_cr2,
+ "mov %%cr2, %%rax;",
+ ALT_NOT(X86_FEATURE_XENPV));
}
static inline void write_cr2(unsigned long x)
@@ -132,12 +147,14 @@ static inline void write_cr2(unsigned long x)
static inline unsigned long __read_cr3(void)
{
- return PVOP_CALL0(unsigned long, mmu.read_cr3);
+ return PVOP_ALT_CALL0(unsigned long, mmu.read_cr3,
+ "mov %%cr3, %%rax;", ALT_NOT(X86_FEATURE_XENPV));
}
static inline void write_cr3(unsigned long x)
{
- PVOP_VCALL1(mmu.write_cr3, x);
+ PVOP_ALT_VCALL1(mmu.write_cr3, x,
+ "mov %%rdi, %%cr3", ALT_NOT(X86_FEATURE_XENPV));
}
static inline void __write_cr4(unsigned long x)
@@ -157,7 +174,7 @@ static inline void halt(void)
static inline void wbinvd(void)
{
- PVOP_VCALL0(cpu.wbinvd);
+ PVOP_ALT_VCALL0(cpu.wbinvd, "wbinvd", ALT_NOT(X86_FEATURE_XENPV));
}
static inline u64 paravirt_read_msr(unsigned msr)
@@ -371,22 +388,28 @@ static inline void paravirt_release_p4d(unsigned long pfn)
static inline pte_t __pte(pteval_t val)
{
- return (pte_t) { PVOP_CALLEE1(pteval_t, mmu.make_pte, val) };
+ return (pte_t) { PVOP_ALT_CALLEE1(pteval_t, mmu.make_pte, val,
+ "mov %%rdi, %%rax",
+ ALT_NOT(X86_FEATURE_XENPV)) };
}
static inline pteval_t pte_val(pte_t pte)
{
- return PVOP_CALLEE1(pteval_t, mmu.pte_val, pte.pte);
+ return PVOP_ALT_CALLEE1(pteval_t, mmu.pte_val, pte.pte,
+ "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
}
static inline pgd_t __pgd(pgdval_t val)
{
- return (pgd_t) { PVOP_CALLEE1(pgdval_t, mmu.make_pgd, val) };
+ return (pgd_t) { PVOP_ALT_CALLEE1(pgdval_t, mmu.make_pgd, val,
+ "mov %%rdi, %%rax",
+ ALT_NOT(X86_FEATURE_XENPV)) };
}
static inline pgdval_t pgd_val(pgd_t pgd)
{
- return PVOP_CALLEE1(pgdval_t, mmu.pgd_val, pgd.pgd);
+ return PVOP_ALT_CALLEE1(pgdval_t, mmu.pgd_val, pgd.pgd,
+ "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
}
#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
@@ -419,12 +442,15 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
static inline pmd_t __pmd(pmdval_t val)
{
- return (pmd_t) { PVOP_CALLEE1(pmdval_t, mmu.make_pmd, val) };
+ return (pmd_t) { PVOP_ALT_CALLEE1(pmdval_t, mmu.make_pmd, val,
+ "mov %%rdi, %%rax",
+ ALT_NOT(X86_FEATURE_XENPV)) };
}
static inline pmdval_t pmd_val(pmd_t pmd)
{
- return PVOP_CALLEE1(pmdval_t, mmu.pmd_val, pmd.pmd);
+ return PVOP_ALT_CALLEE1(pmdval_t, mmu.pmd_val, pmd.pmd,
+ "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
}
static inline void set_pud(pud_t *pudp, pud_t pud)
@@ -436,14 +462,16 @@ static inline pud_t __pud(pudval_t val)
{
pudval_t ret;
- ret = PVOP_CALLEE1(pudval_t, mmu.make_pud, val);
+ ret = PVOP_ALT_CALLEE1(pudval_t, mmu.make_pud, val,
+ "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
return (pud_t) { ret };
}
static inline pudval_t pud_val(pud_t pud)
{
- return PVOP_CALLEE1(pudval_t, mmu.pud_val, pud.pud);
+ return PVOP_ALT_CALLEE1(pudval_t, mmu.pud_val, pud.pud,
+ "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
}
static inline void pud_clear(pud_t *pudp)
@@ -462,14 +490,17 @@ static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
static inline p4d_t __p4d(p4dval_t val)
{
- p4dval_t ret = PVOP_CALLEE1(p4dval_t, mmu.make_p4d, val);
+ p4dval_t ret = PVOP_ALT_CALLEE1(p4dval_t, mmu.make_p4d, val,
+ "mov %%rdi, %%rax",
+ ALT_NOT(X86_FEATURE_XENPV));
return (p4d_t) { ret };
}
static inline p4dval_t p4d_val(p4d_t p4d)
{
- return PVOP_CALLEE1(p4dval_t, mmu.p4d_val, p4d.p4d);
+ return PVOP_ALT_CALLEE1(p4dval_t, mmu.p4d_val, p4d.p4d,
+ "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
}
static inline void __set_pgd(pgd_t *pgdp, pgd_t pgd)
@@ -556,7 +587,9 @@ static __always_inline void pv_queued_spin_lock_slowpath(struct qspinlock *lock,
static __always_inline void pv_queued_spin_unlock(struct qspinlock *lock)
{
- PVOP_VCALLEE1(lock.queued_spin_unlock, lock);
+ PVOP_ALT_VCALLEE1(lock.queued_spin_unlock, lock,
+ "movb $0, (%%" _ASM_ARG1 ");",
+ ALT_NOT(X86_FEATURE_PVUNLOCK));
}
static __always_inline void pv_wait(u8 *ptr, u8 val)
@@ -571,7 +604,9 @@ static __always_inline void pv_kick(int cpu)
static __always_inline bool pv_vcpu_is_preempted(long cpu)
{
- return PVOP_CALLEE1(bool, lock.vcpu_is_preempted, cpu);
+ return PVOP_ALT_CALLEE1(bool, lock.vcpu_is_preempted, cpu,
+ "xor %%" _ASM_AX ", %%" _ASM_AX ";",
+ ALT_NOT(X86_FEATURE_VCPUPREEMPT));
}
void __raw_callee_save___native_queued_spin_unlock(struct qspinlock *lock);
@@ -645,17 +680,18 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu);
#ifdef CONFIG_PARAVIRT_XXL
static inline notrace unsigned long arch_local_save_flags(void)
{
- return PVOP_CALLEE0(unsigned long, irq.save_fl);
+ return PVOP_ALT_CALLEE0(unsigned long, irq.save_fl, "pushf; pop %%rax;",
+ ALT_NOT(X86_FEATURE_XENPV));
}
static inline notrace void arch_local_irq_disable(void)
{
- PVOP_VCALLEE0(irq.irq_disable);
+ PVOP_ALT_VCALLEE0(irq.irq_disable, "cli;", ALT_NOT(X86_FEATURE_XENPV));
}
static inline notrace void arch_local_irq_enable(void)
{
- PVOP_VCALLEE0(irq.irq_enable);
+ PVOP_ALT_VCALLEE0(irq.irq_enable, "sti;", ALT_NOT(X86_FEATURE_XENPV));
}
static inline notrace unsigned long arch_local_irq_save(void)
@@ -700,84 +736,27 @@ extern void default_banner(void);
.popsection
-#define COND_PUSH(set, mask, reg) \
- .if ((~(set)) & mask); push %reg; .endif
-#define COND_POP(set, mask, reg) \
- .if ((~(set)) & mask); pop %reg; .endif
-
#ifdef CONFIG_X86_64
-
-#define PV_SAVE_REGS(set) \
- COND_PUSH(set, CLBR_RAX, rax); \
- COND_PUSH(set, CLBR_RCX, rcx); \
- COND_PUSH(set, CLBR_RDX, rdx); \
- COND_PUSH(set, CLBR_RSI, rsi); \
- COND_PUSH(set, CLBR_RDI, rdi); \
- COND_PUSH(set, CLBR_R8, r8); \
- COND_PUSH(set, CLBR_R9, r9); \
- COND_PUSH(set, CLBR_R10, r10); \
- COND_PUSH(set, CLBR_R11, r11)
-#define PV_RESTORE_REGS(set) \
- COND_POP(set, CLBR_R11, r11); \
- COND_POP(set, CLBR_R10, r10); \
- COND_POP(set, CLBR_R9, r9); \
- COND_POP(set, CLBR_R8, r8); \
- COND_POP(set, CLBR_RDI, rdi); \
- COND_POP(set, CLBR_RSI, rsi); \
- COND_POP(set, CLBR_RDX, rdx); \
- COND_POP(set, CLBR_RCX, rcx); \
- COND_POP(set, CLBR_RAX, rax)
+#ifdef CONFIG_PARAVIRT_XXL
#define PARA_PATCH(off) ((off) / 8)
#define PARA_SITE(ptype, ops) _PVSITE(ptype, ops, .quad, 8)
#define PARA_INDIRECT(addr) *addr(%rip)
-#else
-#define PV_SAVE_REGS(set) \
- COND_PUSH(set, CLBR_EAX, eax); \
- COND_PUSH(set, CLBR_EDI, edi); \
- COND_PUSH(set, CLBR_ECX, ecx); \
- COND_PUSH(set, CLBR_EDX, edx)
-#define PV_RESTORE_REGS(set) \
- COND_POP(set, CLBR_EDX, edx); \
- COND_POP(set, CLBR_ECX, ecx); \
- COND_POP(set, CLBR_EDI, edi); \
- COND_POP(set, CLBR_EAX, eax)
-
-#define PARA_PATCH(off) ((off) / 4)
-#define PARA_SITE(ptype, ops) _PVSITE(ptype, ops, .long, 4)
-#define PARA_INDIRECT(addr) *%cs:addr
-#endif
-#ifdef CONFIG_PARAVIRT_XXL
#define INTERRUPT_RETURN \
- PARA_SITE(PARA_PATCH(PV_CPU_iret), \
- ANNOTATE_RETPOLINE_SAFE; \
- jmp PARA_INDIRECT(pv_ops+PV_CPU_iret);)
-
-#define DISABLE_INTERRUPTS(clobbers) \
- PARA_SITE(PARA_PATCH(PV_IRQ_irq_disable), \
- PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
- ANNOTATE_RETPOLINE_SAFE; \
- call PARA_INDIRECT(pv_ops+PV_IRQ_irq_disable); \
- PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
-
-#define ENABLE_INTERRUPTS(clobbers) \
- PARA_SITE(PARA_PATCH(PV_IRQ_irq_enable), \
- PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
- ANNOTATE_RETPOLINE_SAFE; \
- call PARA_INDIRECT(pv_ops+PV_IRQ_irq_enable); \
- PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
-#endif
+ ANNOTATE_RETPOLINE_SAFE; \
+ ALTERNATIVE_TERNARY("jmp *paravirt_iret(%rip);", \
+ X86_FEATURE_XENPV, "jmp xen_iret;", "jmp native_iret;")
-#ifdef CONFIG_X86_64
-#ifdef CONFIG_PARAVIRT_XXL
#ifdef CONFIG_DEBUG_ENTRY
-#define SAVE_FLAGS(clobbers) \
- PARA_SITE(PARA_PATCH(PV_IRQ_save_fl), \
- PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
- ANNOTATE_RETPOLINE_SAFE; \
- call PARA_INDIRECT(pv_ops+PV_IRQ_save_fl); \
- PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
+.macro PARA_IRQ_save_fl
+ PARA_SITE(PARA_PATCH(PV_IRQ_save_fl),
+ ANNOTATE_RETPOLINE_SAFE;
+ call PARA_INDIRECT(pv_ops+PV_IRQ_save_fl);)
+.endm
+
+#define SAVE_FLAGS ALTERNATIVE "PARA_IRQ_save_fl;", "pushf; pop %rax;", \
+ ALT_NOT(X86_FEATURE_XENPV)
#endif
#endif /* CONFIG_PARAVIRT_XXL */
#endif /* CONFIG_X86_64 */
@@ -800,5 +779,11 @@ static inline void paravirt_arch_exit_mmap(struct mm_struct *mm)
{
}
#endif
+
+#ifndef CONFIG_PARAVIRT_SPINLOCKS
+static inline void paravirt_set_cap(void)
+{
+}
+#endif
#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_PARAVIRT_H */
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index de87087d3bde..d9d6b0203ec4 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -3,7 +3,6 @@
#define _ASM_X86_PARAVIRT_TYPES_H
/* Bitmask of what can be clobbered: usually at least eax. */
-#define CLBR_NONE 0
#define CLBR_EAX (1 << 0)
#define CLBR_ECX (1 << 1)
#define CLBR_EDX (1 << 2)
@@ -15,7 +14,6 @@
#define CLBR_ARG_REGS (CLBR_EAX | CLBR_EDX | CLBR_ECX)
#define CLBR_RET_REG (CLBR_EAX | CLBR_EDX)
-#define CLBR_SCRATCH (0)
#else
#define CLBR_RAX CLBR_EAX
#define CLBR_RCX CLBR_ECX
@@ -32,12 +30,9 @@
#define CLBR_ARG_REGS (CLBR_RDI | CLBR_RSI | CLBR_RDX | \
CLBR_RCX | CLBR_R8 | CLBR_R9)
#define CLBR_RET_REG (CLBR_RAX)
-#define CLBR_SCRATCH (CLBR_R10 | CLBR_R11)
#endif /* X86_64 */
-#define CLBR_CALLEE_SAVE ((CLBR_ARG_REGS | CLBR_SCRATCH) & ~CLBR_RET_REG)
-
#ifndef __ASSEMBLY__
#include <asm/desc_defs.h>
@@ -73,19 +68,6 @@ struct pv_info {
const char *name;
};
-struct pv_init_ops {
- /*
- * Patch may replace one of the defined code sequences with
- * arbitrary code, subject to the same register constraints.
- * This generally means the code is not free to clobber any
- * registers other than EAX. The patch function should return
- * the number of bytes of code generated, as we nop pad the
- * rest in generic code.
- */
- unsigned (*patch)(u8 type, void *insn_buff,
- unsigned long addr, unsigned len);
-} __no_randomize_layout;
-
#ifdef CONFIG_PARAVIRT_XXL
struct pv_lazy_ops {
/* Set deferred update mode, used for batching operations. */
@@ -95,11 +77,6 @@ struct pv_lazy_ops {
} __no_randomize_layout;
#endif
-struct pv_time_ops {
- unsigned long long (*sched_clock)(void);
- unsigned long long (*steal_clock)(int cpu);
-} __no_randomize_layout;
-
struct pv_cpu_ops {
/* hooks for various privileged instructions */
void (*io_delay)(void);
@@ -156,10 +133,6 @@ struct pv_cpu_ops {
u64 (*read_pmc)(int counter);
- /* Normal iret. Jump to this with the standard iret stack
- frame set up. */
- void (*iret)(void);
-
void (*start_context_switch)(struct task_struct *prev);
void (*end_context_switch)(struct task_struct *next);
#endif
@@ -188,8 +161,8 @@ struct pv_mmu_ops {
void (*flush_tlb_user)(void);
void (*flush_tlb_kernel)(void);
void (*flush_tlb_one_user)(unsigned long addr);
- void (*flush_tlb_others)(const struct cpumask *cpus,
- const struct flush_tlb_info *info);
+ void (*flush_tlb_multi)(const struct cpumask *cpus,
+ const struct flush_tlb_info *info);
void (*tlb_remove_table)(struct mmu_gather *tlb, void *table);
@@ -290,8 +263,6 @@ struct pv_lock_ops {
* number for each function using the offset which we use to indicate
* what to patch. */
struct paravirt_patch_template {
- struct pv_init_ops init;
- struct pv_time_ops time;
struct pv_cpu_ops cpu;
struct pv_irq_ops irq;
struct pv_mmu_ops mmu;
@@ -300,6 +271,7 @@ struct paravirt_patch_template {
extern struct pv_info pv_info;
extern struct paravirt_patch_template pv_ops;
+extern void (*paravirt_iret)(void);
#define PARAVIRT_PATCH(x) \
(offsetof(struct paravirt_patch_template, x) / sizeof(void *))
@@ -331,11 +303,7 @@ extern struct paravirt_patch_template pv_ops;
/* Simple instruction patching code. */
#define NATIVE_LABEL(a,x,b) "\n\t.globl " a #x "_" #b "\n" a #x "_" #b ":\n\t"
-unsigned paravirt_patch_ident_64(void *insn_buff, unsigned len);
-unsigned paravirt_patch_default(u8 type, void *insn_buff, unsigned long addr, unsigned len);
-unsigned paravirt_patch_insns(void *insn_buff, unsigned len, const char *start, const char *end);
-
-unsigned native_patch(u8 type, void *insn_buff, unsigned long addr, unsigned len);
+unsigned int paravirt_patch(u8 type, void *insn_buff, unsigned long addr, unsigned int len);
int paravirt_disable_iospace(void);
@@ -371,7 +339,7 @@ int paravirt_disable_iospace(void);
* on the stack. All caller-save registers (eax,edx,ecx) are expected
* to be modified (either clobbered or used for return values).
* X86_64, on the other hand, already specifies a register-based calling
- * conventions, returning at %rax, with parameteres going on %rdi, %rsi,
+ * conventions, returning at %rax, with parameters going on %rdi, %rsi,
* %rdx, and %rcx. Note that for this reason, x86_64 does not need any
* special handling for dealing with 4 arguments, unlike i386.
* However, x86_64 also have to clobber all caller saved registers, which
@@ -414,11 +382,9 @@ int paravirt_disable_iospace(void);
* makes sure the incoming and outgoing types are always correct.
*/
#ifdef CONFIG_X86_32
-#define PVOP_VCALL_ARGS \
+#define PVOP_CALL_ARGS \
unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx;
-#define PVOP_CALL_ARGS PVOP_VCALL_ARGS
-
#define PVOP_CALL_ARG1(x) "a" ((unsigned long)(x))
#define PVOP_CALL_ARG2(x) "d" ((unsigned long)(x))
#define PVOP_CALL_ARG3(x) "c" ((unsigned long)(x))
@@ -434,12 +400,10 @@ int paravirt_disable_iospace(void);
#define VEXTRA_CLOBBERS
#else /* CONFIG_X86_64 */
/* [re]ax isn't an arg, but the return val */
-#define PVOP_VCALL_ARGS \
+#define PVOP_CALL_ARGS \
unsigned long __edi = __edi, __esi = __esi, \
__edx = __edx, __ecx = __ecx, __eax = __eax;
-#define PVOP_CALL_ARGS PVOP_VCALL_ARGS
-
#define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x))
#define PVOP_CALL_ARG2(x) "S" ((unsigned long)(x))
#define PVOP_CALL_ARG3(x) "d" ((unsigned long)(x))
@@ -464,152 +428,138 @@ int paravirt_disable_iospace(void);
#define PVOP_TEST_NULL(op) ((void)pv_ops.op)
#endif
-#define PVOP_RETMASK(rettype) \
+#define PVOP_RETVAL(rettype) \
({ unsigned long __mask = ~0UL; \
+ BUILD_BUG_ON(sizeof(rettype) > sizeof(unsigned long)); \
switch (sizeof(rettype)) { \
case 1: __mask = 0xffUL; break; \
case 2: __mask = 0xffffUL; break; \
case 4: __mask = 0xffffffffUL; break; \
default: break; \
} \
- __mask; \
+ __mask & __eax; \
})
-#define ____PVOP_CALL(rettype, op, clbr, call_clbr, extra_clbr, \
- pre, post, ...) \
+#define ____PVOP_CALL(ret, op, clbr, call_clbr, extra_clbr, ...) \
({ \
- rettype __ret; \
PVOP_CALL_ARGS; \
PVOP_TEST_NULL(op); \
- /* This is 32-bit specific, but is okay in 64-bit */ \
- /* since this condition will never hold */ \
- if (sizeof(rettype) > sizeof(unsigned long)) { \
- asm volatile(pre \
- paravirt_alt(PARAVIRT_CALL) \
- post \
- : call_clbr, ASM_CALL_CONSTRAINT \
- : paravirt_type(op), \
- paravirt_clobber(clbr), \
- ##__VA_ARGS__ \
- : "memory", "cc" extra_clbr); \
- __ret = (rettype)((((u64)__edx) << 32) | __eax); \
- } else { \
- asm volatile(pre \
- paravirt_alt(PARAVIRT_CALL) \
- post \
- : call_clbr, ASM_CALL_CONSTRAINT \
- : paravirt_type(op), \
- paravirt_clobber(clbr), \
- ##__VA_ARGS__ \
- : "memory", "cc" extra_clbr); \
- __ret = (rettype)(__eax & PVOP_RETMASK(rettype)); \
- } \
- __ret; \
+ asm volatile(paravirt_alt(PARAVIRT_CALL) \
+ : call_clbr, ASM_CALL_CONSTRAINT \
+ : paravirt_type(op), \
+ paravirt_clobber(clbr), \
+ ##__VA_ARGS__ \
+ : "memory", "cc" extra_clbr); \
+ ret; \
})
-#define __PVOP_CALL(rettype, op, pre, post, ...) \
- ____PVOP_CALL(rettype, op, CLBR_ANY, PVOP_CALL_CLOBBERS, \
- EXTRA_CLOBBERS, pre, post, ##__VA_ARGS__)
-
-#define __PVOP_CALLEESAVE(rettype, op, pre, post, ...) \
- ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \
- PVOP_CALLEE_CLOBBERS, , \
- pre, post, ##__VA_ARGS__)
-
-
-#define ____PVOP_VCALL(op, clbr, call_clbr, extra_clbr, pre, post, ...) \
+#define ____PVOP_ALT_CALL(ret, op, alt, cond, clbr, call_clbr, \
+ extra_clbr, ...) \
({ \
- PVOP_VCALL_ARGS; \
+ PVOP_CALL_ARGS; \
PVOP_TEST_NULL(op); \
- asm volatile(pre \
- paravirt_alt(PARAVIRT_CALL) \
- post \
+ asm volatile(ALTERNATIVE(paravirt_alt(PARAVIRT_CALL), \
+ alt, cond) \
: call_clbr, ASM_CALL_CONSTRAINT \
: paravirt_type(op), \
paravirt_clobber(clbr), \
##__VA_ARGS__ \
: "memory", "cc" extra_clbr); \
+ ret; \
})
-#define __PVOP_VCALL(op, pre, post, ...) \
- ____PVOP_VCALL(op, CLBR_ANY, PVOP_VCALL_CLOBBERS, \
- VEXTRA_CLOBBERS, \
- pre, post, ##__VA_ARGS__)
+#define __PVOP_CALL(rettype, op, ...) \
+ ____PVOP_CALL(PVOP_RETVAL(rettype), op, CLBR_ANY, \
+ PVOP_CALL_CLOBBERS, EXTRA_CLOBBERS, ##__VA_ARGS__)
+
+#define __PVOP_ALT_CALL(rettype, op, alt, cond, ...) \
+ ____PVOP_ALT_CALL(PVOP_RETVAL(rettype), op, alt, cond, CLBR_ANY,\
+ PVOP_CALL_CLOBBERS, EXTRA_CLOBBERS, \
+ ##__VA_ARGS__)
+
+#define __PVOP_CALLEESAVE(rettype, op, ...) \
+ ____PVOP_CALL(PVOP_RETVAL(rettype), op.func, CLBR_RET_REG, \
+ PVOP_CALLEE_CLOBBERS, , ##__VA_ARGS__)
+
+#define __PVOP_ALT_CALLEESAVE(rettype, op, alt, cond, ...) \
+ ____PVOP_ALT_CALL(PVOP_RETVAL(rettype), op.func, alt, cond, \
+ CLBR_RET_REG, PVOP_CALLEE_CLOBBERS, , ##__VA_ARGS__)
+
+
+#define __PVOP_VCALL(op, ...) \
+ (void)____PVOP_CALL(, op, CLBR_ANY, PVOP_VCALL_CLOBBERS, \
+ VEXTRA_CLOBBERS, ##__VA_ARGS__)
+
+#define __PVOP_ALT_VCALL(op, alt, cond, ...) \
+ (void)____PVOP_ALT_CALL(, op, alt, cond, CLBR_ANY, \
+ PVOP_VCALL_CLOBBERS, VEXTRA_CLOBBERS, \
+ ##__VA_ARGS__)
-#define __PVOP_VCALLEESAVE(op, pre, post, ...) \
- ____PVOP_VCALL(op.func, CLBR_RET_REG, \
- PVOP_VCALLEE_CLOBBERS, , \
- pre, post, ##__VA_ARGS__)
+#define __PVOP_VCALLEESAVE(op, ...) \
+ (void)____PVOP_CALL(, op.func, CLBR_RET_REG, \
+ PVOP_VCALLEE_CLOBBERS, , ##__VA_ARGS__)
+#define __PVOP_ALT_VCALLEESAVE(op, alt, cond, ...) \
+ (void)____PVOP_ALT_CALL(, op.func, alt, cond, CLBR_RET_REG, \
+ PVOP_VCALLEE_CLOBBERS, , ##__VA_ARGS__)
#define PVOP_CALL0(rettype, op) \
- __PVOP_CALL(rettype, op, "", "")
+ __PVOP_CALL(rettype, op)
#define PVOP_VCALL0(op) \
- __PVOP_VCALL(op, "", "")
+ __PVOP_VCALL(op)
+#define PVOP_ALT_CALL0(rettype, op, alt, cond) \
+ __PVOP_ALT_CALL(rettype, op, alt, cond)
+#define PVOP_ALT_VCALL0(op, alt, cond) \
+ __PVOP_ALT_VCALL(op, alt, cond)
#define PVOP_CALLEE0(rettype, op) \
- __PVOP_CALLEESAVE(rettype, op, "", "")
+ __PVOP_CALLEESAVE(rettype, op)
#define PVOP_VCALLEE0(op) \
- __PVOP_VCALLEESAVE(op, "", "")
+ __PVOP_VCALLEESAVE(op)
+#define PVOP_ALT_CALLEE0(rettype, op, alt, cond) \
+ __PVOP_ALT_CALLEESAVE(rettype, op, alt, cond)
+#define PVOP_ALT_VCALLEE0(op, alt, cond) \
+ __PVOP_ALT_VCALLEESAVE(op, alt, cond)
#define PVOP_CALL1(rettype, op, arg1) \
- __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1))
+ __PVOP_CALL(rettype, op, PVOP_CALL_ARG1(arg1))
#define PVOP_VCALL1(op, arg1) \
- __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1))
+ __PVOP_VCALL(op, PVOP_CALL_ARG1(arg1))
+#define PVOP_ALT_VCALL1(op, arg1, alt, cond) \
+ __PVOP_ALT_VCALL(op, alt, cond, PVOP_CALL_ARG1(arg1))
#define PVOP_CALLEE1(rettype, op, arg1) \
- __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1))
+ __PVOP_CALLEESAVE(rettype, op, PVOP_CALL_ARG1(arg1))
#define PVOP_VCALLEE1(op, arg1) \
- __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1))
+ __PVOP_VCALLEESAVE(op, PVOP_CALL_ARG1(arg1))
+#define PVOP_ALT_CALLEE1(rettype, op, arg1, alt, cond) \
+ __PVOP_ALT_CALLEESAVE(rettype, op, alt, cond, PVOP_CALL_ARG1(arg1))
+#define PVOP_ALT_VCALLEE1(op, arg1, alt, cond) \
+ __PVOP_ALT_VCALLEESAVE(op, alt, cond, PVOP_CALL_ARG1(arg1))
#define PVOP_CALL2(rettype, op, arg1, arg2) \
- __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \
- PVOP_CALL_ARG2(arg2))
+ __PVOP_CALL(rettype, op, PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2))
#define PVOP_VCALL2(op, arg1, arg2) \
- __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1), \
- PVOP_CALL_ARG2(arg2))
-
-#define PVOP_CALLEE2(rettype, op, arg1, arg2) \
- __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \
- PVOP_CALL_ARG2(arg2))
-#define PVOP_VCALLEE2(op, arg1, arg2) \
- __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1), \
- PVOP_CALL_ARG2(arg2))
-
+ __PVOP_VCALL(op, PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2))
#define PVOP_CALL3(rettype, op, arg1, arg2, arg3) \
- __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \
+ __PVOP_CALL(rettype, op, PVOP_CALL_ARG1(arg1), \
PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3))
#define PVOP_VCALL3(op, arg1, arg2, arg3) \
- __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1), \
+ __PVOP_VCALL(op, PVOP_CALL_ARG1(arg1), \
PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3))
-/* This is the only difference in x86_64. We can make it much simpler */
-#ifdef CONFIG_X86_32
#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \
__PVOP_CALL(rettype, op, \
- "push %[_arg4];", "lea 4(%%esp),%%esp;", \
- PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \
- PVOP_CALL_ARG3(arg3), [_arg4] "mr" ((u32)(arg4)))
-#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \
- __PVOP_VCALL(op, \
- "push %[_arg4];", "lea 4(%%esp),%%esp;", \
- "0" ((u32)(arg1)), "1" ((u32)(arg2)), \
- "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4)))
-#else
-#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \
- __PVOP_CALL(rettype, op, "", "", \
PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \
PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4))
#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \
- __PVOP_VCALL(op, "", "", \
- PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \
+ __PVOP_VCALL(op, PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \
PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4))
-#endif
/* Lazy mode for batching updates / context switch */
enum paravirt_lazy_mode {
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index a02c67291cfc..b1099f2d9800 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1244,7 +1244,7 @@ static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp)
/*
* clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
*
- * dst - pointer to pgd range anwhere on a pgd page
+ * dst - pointer to pgd range anywhere on a pgd page
* src - ""
* count - the number of pgds to copy.
*
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index f1b9ed5efaa9..154321d29050 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -314,11 +314,6 @@ struct x86_hw_tss {
struct x86_hw_tss {
u32 reserved1;
u64 sp0;
-
- /*
- * We store cpu_current_top_of_stack in sp1 so it's always accessible.
- * Linux does not use ring 1, so sp1 is not otherwise needed.
- */
u64 sp1;
/*
@@ -426,12 +421,7 @@ struct irq_stack {
char stack[IRQ_STACK_SIZE];
} __aligned(IRQ_STACK_SIZE);
-#ifdef CONFIG_X86_32
DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
-#else
-/* The RO copy can't be accessed with this_cpu_xyz(), so use the RW copy. */
-#define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1
-#endif
#ifdef CONFIG_X86_64
struct fixed_percpu_data {
@@ -439,6 +429,9 @@ struct fixed_percpu_data {
* GCC hardcodes the stack canary as %gs:40. Since the
* irq_stack is the object at %gs:0, we reserve the bottom
* 48 bytes of the irq stack for the canary.
+ *
+ * Once we are willing to require -mstack-protector-guard-symbol=
+ * support for x86_64 stackprotector, we can get rid of this.
*/
char gs_base[40];
unsigned long stack_canary;
@@ -460,17 +453,7 @@ extern asmlinkage void ignore_sysret(void);
void current_save_fsgs(void);
#else /* X86_64 */
#ifdef CONFIG_STACKPROTECTOR
-/*
- * Make sure stack canary segment base is cached-aligned:
- * "For Intel Atom processors, avoid non zero segment base address
- * that is not aligned to cache line boundary at all cost."
- * (Optim Ref Manual Assembly/Compiler Coding Rule 15.)
- */
-struct stack_canary {
- char __pad[20]; /* canary at %gs:20 */
- unsigned long canary;
-};
-DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
+DECLARE_PER_CPU(unsigned long, __stack_chk_guard);
#endif
DECLARE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
DECLARE_PER_CPU(struct irq_stack *, softirq_stack_ptr);
@@ -527,7 +510,7 @@ struct thread_struct {
struct io_bitmap *io_bitmap;
/*
- * IOPL. Priviledge level dependent I/O permission which is
+ * IOPL. Privilege level dependent I/O permission which is
* emulated via the I/O bitmap to prevent user space from disabling
* interrupts.
*/
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index b6a9d51d1d79..8c5d1910a848 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -4,6 +4,8 @@
#include <asm/ldt.h>
+struct task_struct;
+
/* misc architecture specific prototypes */
void syscall_init(void);
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 409f661481e1..b94f615600d5 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -37,7 +37,10 @@ struct pt_regs {
unsigned short __esh;
unsigned short fs;
unsigned short __fsh;
- /* On interrupt, gs and __gsh store the vector number. */
+ /*
+ * On interrupt, gs and __gsh store the vector number. They never
+ * store gs any more.
+ */
unsigned short gs;
unsigned short __gsh;
/* On interrupt, this is the error code. */
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index 7fdd4facfce7..72044026eb3c 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -95,7 +95,7 @@
*
* 26 - ESPFIX small SS
* 27 - per-cpu [ offset to per-cpu data area ]
- * 28 - stack_canary-20 [ for stack protector ] <=== cacheline #8
+ * 28 - unused
* 29 - unused
* 30 - unused
* 31 - TSS for double fault handler
@@ -118,7 +118,6 @@
#define GDT_ENTRY_ESPFIX_SS 26
#define GDT_ENTRY_PERCPU 27
-#define GDT_ENTRY_STACK_CANARY 28
#define GDT_ENTRY_DOUBLEFAULT_TSS 31
@@ -158,12 +157,6 @@
# define __KERNEL_PERCPU 0
#endif
-#ifdef CONFIG_STACKPROTECTOR
-# define __KERNEL_STACK_CANARY (GDT_ENTRY_STACK_CANARY*8)
-#else
-# define __KERNEL_STACK_CANARY 0
-#endif
-
#else /* 64-bit: */
#include <asm/cache.h>
@@ -364,22 +357,15 @@ static inline void __loadsegment_fs(unsigned short value)
asm("mov %%" #seg ",%0":"=r" (value) : : "memory")
/*
- * x86-32 user GS accessors:
+ * x86-32 user GS accessors. This is ugly and could do with some cleaning up.
*/
#ifdef CONFIG_X86_32
-# ifdef CONFIG_X86_32_LAZY_GS
-# define get_user_gs(regs) (u16)({ unsigned long v; savesegment(gs, v); v; })
-# define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v))
-# define task_user_gs(tsk) ((tsk)->thread.gs)
-# define lazy_save_gs(v) savesegment(gs, (v))
-# define lazy_load_gs(v) loadsegment(gs, (v))
-# else /* X86_32_LAZY_GS */
-# define get_user_gs(regs) (u16)((regs)->gs)
-# define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0)
-# define task_user_gs(tsk) (task_pt_regs(tsk)->gs)
-# define lazy_save_gs(v) do { } while (0)
-# define lazy_load_gs(v) do { } while (0)
-# endif /* X86_32_LAZY_GS */
+# define get_user_gs(regs) (u16)({ unsigned long v; savesegment(gs, v); v; })
+# define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v))
+# define task_user_gs(tsk) ((tsk)->thread.gs)
+# define lazy_save_gs(v) savesegment(gs, (v))
+# define lazy_load_gs(v) loadsegment(gs, (v))
+# define load_gs_index(v) loadsegment(gs, (v))
#endif /* X86_32 */
#endif /* !__ASSEMBLY__ */
diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h
index 4352f08bfbb5..43fa081a1adb 100644
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -8,8 +8,8 @@
/*
* The set_memory_* API can be used to change various attributes of a virtual
* address range. The attributes include:
- * Cachability : UnCached, WriteCombining, WriteThrough, WriteBack
- * Executability : eXeutable, NoteXecutable
+ * Cacheability : UnCached, WriteCombining, WriteThrough, WriteBack
+ * Executability : eXecutable, NoteXecutable
* Read/Write : ReadOnly, ReadWrite
* Presence : NotPresent
* Encryption : Encrypted, Decrypted
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index 389d851a02c4..a12458a7a8d4 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -130,11 +130,6 @@ void *extend_brk(size_t size, size_t align);
: : "i" (sz)); \
}
-/* Helper for reserving space for arrays of things */
-#define RESERVE_BRK_ARRAY(type, name, entries) \
- type *name; \
- RESERVE_BRK(name, sizeof(type) * entries)
-
extern void probe_roms(void);
#ifdef __i386__
diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h
new file mode 100644
index 000000000000..9c31e0ebc55b
--- /dev/null
+++ b/arch/x86/include/asm/sgx.h
@@ -0,0 +1,378 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/**
+ * Copyright(c) 2016-20 Intel Corporation.
+ *
+ * Intel Software Guard Extensions (SGX) support.
+ */
+#ifndef _ASM_X86_SGX_H
+#define _ASM_X86_SGX_H
+
+#include <linux/bits.h>
+#include <linux/types.h>
+
+/*
+ * This file contains both data structures defined by SGX architecture and Linux
+ * defined software data structures and functions. The two should not be mixed
+ * together for better readibility. The architectural definitions come first.
+ */
+
+/* The SGX specific CPUID function. */
+#define SGX_CPUID 0x12
+/* EPC enumeration. */
+#define SGX_CPUID_EPC 2
+/* An invalid EPC section, i.e. the end marker. */
+#define SGX_CPUID_EPC_INVALID 0x0
+/* A valid EPC section. */
+#define SGX_CPUID_EPC_SECTION 0x1
+/* The bitmask for the EPC section type. */
+#define SGX_CPUID_EPC_MASK GENMASK(3, 0)
+
+enum sgx_encls_function {
+ ECREATE = 0x00,
+ EADD = 0x01,
+ EINIT = 0x02,
+ EREMOVE = 0x03,
+ EDGBRD = 0x04,
+ EDGBWR = 0x05,
+ EEXTEND = 0x06,
+ ELDU = 0x08,
+ EBLOCK = 0x09,
+ EPA = 0x0A,
+ EWB = 0x0B,
+ ETRACK = 0x0C,
+ EAUG = 0x0D,
+ EMODPR = 0x0E,
+ EMODT = 0x0F,
+};
+
+/**
+ * enum sgx_return_code - The return code type for ENCLS, ENCLU and ENCLV
+ * %SGX_NOT_TRACKED: Previous ETRACK's shootdown sequence has not
+ * been completed yet.
+ * %SGX_CHILD_PRESENT SECS has child pages present in the EPC.
+ * %SGX_INVALID_EINITTOKEN: EINITTOKEN is invalid and enclave signer's
+ * public key does not match IA32_SGXLEPUBKEYHASH.
+ * %SGX_UNMASKED_EVENT: An unmasked event, e.g. INTR, was received
+ */
+enum sgx_return_code {
+ SGX_NOT_TRACKED = 11,
+ SGX_CHILD_PRESENT = 13,
+ SGX_INVALID_EINITTOKEN = 16,
+ SGX_UNMASKED_EVENT = 128,
+};
+
+/* The modulus size for 3072-bit RSA keys. */
+#define SGX_MODULUS_SIZE 384
+
+/**
+ * enum sgx_miscselect - additional information to an SSA frame
+ * %SGX_MISC_EXINFO: Report #PF or #GP to the SSA frame.
+ *
+ * Save State Area (SSA) is a stack inside the enclave used to store processor
+ * state when an exception or interrupt occurs. This enum defines additional
+ * information stored to an SSA frame.
+ */
+enum sgx_miscselect {
+ SGX_MISC_EXINFO = BIT(0),
+};
+
+#define SGX_MISC_RESERVED_MASK GENMASK_ULL(63, 1)
+
+#define SGX_SSA_GPRS_SIZE 184
+#define SGX_SSA_MISC_EXINFO_SIZE 16
+
+/**
+ * enum sgx_attributes - the attributes field in &struct sgx_secs
+ * %SGX_ATTR_INIT: Enclave can be entered (is initialized).
+ * %SGX_ATTR_DEBUG: Allow ENCLS(EDBGRD) and ENCLS(EDBGWR).
+ * %SGX_ATTR_MODE64BIT: Tell that this a 64-bit enclave.
+ * %SGX_ATTR_PROVISIONKEY: Allow to use provisioning keys for remote
+ * attestation.
+ * %SGX_ATTR_KSS: Allow to use key separation and sharing (KSS).
+ * %SGX_ATTR_EINITTOKENKEY: Allow to use token signing key that is used to
+ * sign cryptographic tokens that can be passed to
+ * EINIT as an authorization to run an enclave.
+ */
+enum sgx_attribute {
+ SGX_ATTR_INIT = BIT(0),
+ SGX_ATTR_DEBUG = BIT(1),
+ SGX_ATTR_MODE64BIT = BIT(2),
+ SGX_ATTR_PROVISIONKEY = BIT(4),
+ SGX_ATTR_EINITTOKENKEY = BIT(5),
+ SGX_ATTR_KSS = BIT(7),
+};
+
+#define SGX_ATTR_RESERVED_MASK (BIT_ULL(3) | BIT_ULL(6) | GENMASK_ULL(63, 8))
+
+/**
+ * struct sgx_secs - SGX Enclave Control Structure (SECS)
+ * @size: size of the address space
+ * @base: base address of the address space
+ * @ssa_frame_size: size of an SSA frame
+ * @miscselect: additional information stored to an SSA frame
+ * @attributes: attributes for enclave
+ * @xfrm: XSave-Feature Request Mask (subset of XCR0)
+ * @mrenclave: SHA256-hash of the enclave contents
+ * @mrsigner: SHA256-hash of the public key used to sign the SIGSTRUCT
+ * @config_id: a user-defined value that is used in key derivation
+ * @isv_prod_id: a user-defined value that is used in key derivation
+ * @isv_svn: a user-defined value that is used in key derivation
+ * @config_svn: a user-defined value that is used in key derivation
+ *
+ * SGX Enclave Control Structure (SECS) is a special enclave page that is not
+ * visible in the address space. In fact, this structure defines the address
+ * range and other global attributes for the enclave and it is the first EPC
+ * page created for any enclave. It is moved from a temporary buffer to an EPC
+ * by the means of ENCLS[ECREATE] function.
+ */
+struct sgx_secs {
+ u64 size;
+ u64 base;
+ u32 ssa_frame_size;
+ u32 miscselect;
+ u8 reserved1[24];
+ u64 attributes;
+ u64 xfrm;
+ u32 mrenclave[8];
+ u8 reserved2[32];
+ u32 mrsigner[8];
+ u8 reserved3[32];
+ u32 config_id[16];
+ u16 isv_prod_id;
+ u16 isv_svn;
+ u16 config_svn;
+ u8 reserved4[3834];
+} __packed;
+
+/**
+ * enum sgx_tcs_flags - execution flags for TCS
+ * %SGX_TCS_DBGOPTIN: If enabled allows single-stepping and breakpoints
+ * inside an enclave. It is cleared by EADD but can
+ * be set later with EDBGWR.
+ */
+enum sgx_tcs_flags {
+ SGX_TCS_DBGOPTIN = 0x01,
+};
+
+#define SGX_TCS_RESERVED_MASK GENMASK_ULL(63, 1)
+#define SGX_TCS_RESERVED_SIZE 4024
+
+/**
+ * struct sgx_tcs - Thread Control Structure (TCS)
+ * @state: used to mark an entered TCS
+ * @flags: execution flags (cleared by EADD)
+ * @ssa_offset: SSA stack offset relative to the enclave base
+ * @ssa_index: the current SSA frame index (cleard by EADD)
+ * @nr_ssa_frames: the number of frame in the SSA stack
+ * @entry_offset: entry point offset relative to the enclave base
+ * @exit_addr: address outside the enclave to exit on an exception or
+ * interrupt
+ * @fs_offset: offset relative to the enclave base to become FS
+ * segment inside the enclave
+ * @gs_offset: offset relative to the enclave base to become GS
+ * segment inside the enclave
+ * @fs_limit: size to become a new FS-limit (only 32-bit enclaves)
+ * @gs_limit: size to become a new GS-limit (only 32-bit enclaves)
+ *
+ * Thread Control Structure (TCS) is an enclave page visible in its address
+ * space that defines an entry point inside the enclave. A thread enters inside
+ * an enclave by supplying address of TCS to ENCLU(EENTER). A TCS can be entered
+ * by only one thread at a time.
+ */
+struct sgx_tcs {
+ u64 state;
+ u64 flags;
+ u64 ssa_offset;
+ u32 ssa_index;
+ u32 nr_ssa_frames;
+ u64 entry_offset;
+ u64 exit_addr;
+ u64 fs_offset;
+ u64 gs_offset;
+ u32 fs_limit;
+ u32 gs_limit;
+ u8 reserved[SGX_TCS_RESERVED_SIZE];
+} __packed;
+
+/**
+ * struct sgx_pageinfo - an enclave page descriptor
+ * @addr: address of the enclave page
+ * @contents: pointer to the page contents
+ * @metadata: pointer either to a SECINFO or PCMD instance
+ * @secs: address of the SECS page
+ */
+struct sgx_pageinfo {
+ u64 addr;
+ u64 contents;
+ u64 metadata;
+ u64 secs;
+} __packed __aligned(32);
+
+
+/**
+ * enum sgx_page_type - bits in the SECINFO flags defining the page type
+ * %SGX_PAGE_TYPE_SECS: a SECS page
+ * %SGX_PAGE_TYPE_TCS: a TCS page
+ * %SGX_PAGE_TYPE_REG: a regular page
+ * %SGX_PAGE_TYPE_VA: a VA page
+ * %SGX_PAGE_TYPE_TRIM: a page in trimmed state
+ */
+enum sgx_page_type {
+ SGX_PAGE_TYPE_SECS,
+ SGX_PAGE_TYPE_TCS,
+ SGX_PAGE_TYPE_REG,
+ SGX_PAGE_TYPE_VA,
+ SGX_PAGE_TYPE_TRIM,
+};
+
+#define SGX_NR_PAGE_TYPES 5
+#define SGX_PAGE_TYPE_MASK GENMASK(7, 0)
+
+/**
+ * enum sgx_secinfo_flags - the flags field in &struct sgx_secinfo
+ * %SGX_SECINFO_R: allow read
+ * %SGX_SECINFO_W: allow write
+ * %SGX_SECINFO_X: allow execution
+ * %SGX_SECINFO_SECS: a SECS page
+ * %SGX_SECINFO_TCS: a TCS page
+ * %SGX_SECINFO_REG: a regular page
+ * %SGX_SECINFO_VA: a VA page
+ * %SGX_SECINFO_TRIM: a page in trimmed state
+ */
+enum sgx_secinfo_flags {
+ SGX_SECINFO_R = BIT(0),
+ SGX_SECINFO_W = BIT(1),
+ SGX_SECINFO_X = BIT(2),
+ SGX_SECINFO_SECS = (SGX_PAGE_TYPE_SECS << 8),
+ SGX_SECINFO_TCS = (SGX_PAGE_TYPE_TCS << 8),
+ SGX_SECINFO_REG = (SGX_PAGE_TYPE_REG << 8),
+ SGX_SECINFO_VA = (SGX_PAGE_TYPE_VA << 8),
+ SGX_SECINFO_TRIM = (SGX_PAGE_TYPE_TRIM << 8),
+};
+
+#define SGX_SECINFO_PERMISSION_MASK GENMASK_ULL(2, 0)
+#define SGX_SECINFO_PAGE_TYPE_MASK (SGX_PAGE_TYPE_MASK << 8)
+#define SGX_SECINFO_RESERVED_MASK ~(SGX_SECINFO_PERMISSION_MASK | \
+ SGX_SECINFO_PAGE_TYPE_MASK)
+
+/**
+ * struct sgx_secinfo - describes attributes of an EPC page
+ * @flags: permissions and type
+ *
+ * Used together with ENCLS leaves that add or modify an EPC page to an
+ * enclave to define page permissions and type.
+ */
+struct sgx_secinfo {
+ u64 flags;
+ u8 reserved[56];
+} __packed __aligned(64);
+
+#define SGX_PCMD_RESERVED_SIZE 40
+
+/**
+ * struct sgx_pcmd - Paging Crypto Metadata (PCMD)
+ * @enclave_id: enclave identifier
+ * @mac: MAC over PCMD, page contents and isvsvn
+ *
+ * PCMD is stored for every swapped page to the regular memory. When ELDU loads
+ * the page back it recalculates the MAC by using a isvsvn number stored in a
+ * VA page. Together these two structures bring integrity and rollback
+ * protection.
+ */
+struct sgx_pcmd {
+ struct sgx_secinfo secinfo;
+ u64 enclave_id;
+ u8 reserved[SGX_PCMD_RESERVED_SIZE];
+ u8 mac[16];
+} __packed __aligned(128);
+
+#define SGX_SIGSTRUCT_RESERVED1_SIZE 84
+#define SGX_SIGSTRUCT_RESERVED2_SIZE 20
+#define SGX_SIGSTRUCT_RESERVED3_SIZE 32
+#define SGX_SIGSTRUCT_RESERVED4_SIZE 12
+
+/**
+ * struct sgx_sigstruct_header - defines author of the enclave
+ * @header1: constant byte string
+ * @vendor: must be either 0x0000 or 0x8086
+ * @date: YYYYMMDD in BCD
+ * @header2: constant byte string
+ * @swdefined: software defined value
+ */
+struct sgx_sigstruct_header {
+ u64 header1[2];
+ u32 vendor;
+ u32 date;
+ u64 header2[2];
+ u32 swdefined;
+ u8 reserved1[84];
+} __packed;
+
+/**
+ * struct sgx_sigstruct_body - defines contents of the enclave
+ * @miscselect: additional information stored to an SSA frame
+ * @misc_mask: required miscselect in SECS
+ * @attributes: attributes for enclave
+ * @xfrm: XSave-Feature Request Mask (subset of XCR0)
+ * @attributes_mask: required attributes in SECS
+ * @xfrm_mask: required XFRM in SECS
+ * @mrenclave: SHA256-hash of the enclave contents
+ * @isvprodid: a user-defined value that is used in key derivation
+ * @isvsvn: a user-defined value that is used in key derivation
+ */
+struct sgx_sigstruct_body {
+ u32 miscselect;
+ u32 misc_mask;
+ u8 reserved2[20];
+ u64 attributes;
+ u64 xfrm;
+ u64 attributes_mask;
+ u64 xfrm_mask;
+ u8 mrenclave[32];
+ u8 reserved3[32];
+ u16 isvprodid;
+ u16 isvsvn;
+} __packed;
+
+/**
+ * struct sgx_sigstruct - an enclave signature
+ * @header: defines author of the enclave
+ * @modulus: the modulus of the public key
+ * @exponent: the exponent of the public key
+ * @signature: the signature calculated over the fields except modulus,
+ * @body: defines contents of the enclave
+ * @q1: a value used in RSA signature verification
+ * @q2: a value used in RSA signature verification
+ *
+ * Header and body are the parts that are actual signed. The remaining fields
+ * define the signature of the enclave.
+ */
+struct sgx_sigstruct {
+ struct sgx_sigstruct_header header;
+ u8 modulus[SGX_MODULUS_SIZE];
+ u32 exponent;
+ u8 signature[SGX_MODULUS_SIZE];
+ struct sgx_sigstruct_body body;
+ u8 reserved4[12];
+ u8 q1[SGX_MODULUS_SIZE];
+ u8 q2[SGX_MODULUS_SIZE];
+} __packed;
+
+#define SGX_LAUNCH_TOKEN_SIZE 304
+
+/*
+ * Do not put any hardware-defined SGX structure representations below this
+ * comment!
+ */
+
+#ifdef CONFIG_X86_SGX_KVM
+int sgx_virt_ecreate(struct sgx_pageinfo *pageinfo, void __user *secs,
+ int *trapnr);
+int sgx_virt_einit(void __user *sigstruct, void __user *token,
+ void __user *secs, u64 *lepubkeyhash, int *trapnr);
+#endif
+
+int sgx_set_attribute(unsigned long *allowed_attributes,
+ unsigned int attribute_fd);
+
+#endif /* _ASM_X86_SGX_H */
diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h
index 0bc9b0895f33..d17b39893b79 100644
--- a/arch/x86/include/asm/smap.h
+++ b/arch/x86/include/asm/smap.h
@@ -11,6 +11,7 @@
#include <asm/nops.h>
#include <asm/cpufeatures.h>
+#include <asm/alternative.h>
/* "Raw" instruction opcodes */
#define __ASM_CLAC ".byte 0x0f,0x01,0xca"
@@ -18,8 +19,6 @@
#ifdef __ASSEMBLY__
-#include <asm/alternative-asm.h>
-
#ifdef CONFIG_X86_SMAP
#define ASM_CLAC \
@@ -37,8 +36,6 @@
#else /* __ASSEMBLY__ */
-#include <asm/alternative.h>
-
#ifdef CONFIG_X86_SMAP
static __always_inline void clac(void)
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index c0538f82c9a2..630ff08532be 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -132,6 +132,7 @@ void native_play_dead(void);
void play_dead_common(void);
void wbinvd_on_cpu(int cpu);
int wbinvd_on_all_cpus(void);
+void cond_wakeup_cpu0(void);
void native_smp_send_reschedule(int cpu);
void native_send_call_func_ipi(const struct cpumask *mask);
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 1d3cbaef4bb7..2acd6cb62328 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -214,7 +214,7 @@ static inline void clflush(volatile void *__p)
static inline void clflushopt(volatile void *__p)
{
- alternative_io(".byte " __stringify(NOP_DS_PREFIX) "; clflush %P0",
+ alternative_io(".byte 0x3e; clflush %P0",
".byte 0x66; clflush %P0",
X86_FEATURE_CLFLUSHOPT,
"+m" (*(volatile char __force *)__p));
@@ -225,7 +225,7 @@ static inline void clwb(volatile void *__p)
volatile struct { char x[64]; } *p = __p;
asm volatile(ALTERNATIVE_2(
- ".byte " __stringify(NOP_DS_PREFIX) "; clflush (%[pax])",
+ ".byte 0x3e; clflush (%[pax])",
".byte 0x66; clflush (%[pax])", /* clflushopt (%%rax) */
X86_FEATURE_CLFLUSHOPT,
".byte 0x66, 0x0f, 0xae, 0x30", /* clwb (%%rax) */
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index 7fb482f0f25b..b6ffe58c70fa 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -5,30 +5,23 @@
* Stack protector works by putting predefined pattern at the start of
* the stack frame and verifying that it hasn't been overwritten when
* returning from the function. The pattern is called stack canary
- * and unfortunately gcc requires it to be at a fixed offset from %gs.
- * On x86_64, the offset is 40 bytes and on x86_32 20 bytes. x86_64
- * and x86_32 use segment registers differently and thus handles this
- * requirement differently.
+ * and unfortunately gcc historically required it to be at a fixed offset
+ * from the percpu segment base. On x86_64, the offset is 40 bytes.
*
- * On x86_64, %gs is shared by percpu area and stack canary. All
- * percpu symbols are zero based and %gs points to the base of percpu
- * area. The first occupant of the percpu area is always
- * fixed_percpu_data which contains stack_canary at offset 40. Userland
- * %gs is always saved and restored on kernel entry and exit using
- * swapgs, so stack protector doesn't add any complexity there.
+ * The same segment is shared by percpu area and stack canary. On
+ * x86_64, percpu symbols are zero based and %gs (64-bit) points to the
+ * base of percpu area. The first occupant of the percpu area is always
+ * fixed_percpu_data which contains stack_canary at the approproate
+ * offset. On x86_32, the stack canary is just a regular percpu
+ * variable.
*
- * On x86_32, it's slightly more complicated. As in x86_64, %gs is
- * used for userland TLS. Unfortunately, some processors are much
- * slower at loading segment registers with different value when
- * entering and leaving the kernel, so the kernel uses %fs for percpu
- * area and manages %gs lazily so that %gs is switched only when
- * necessary, usually during task switch.
+ * Putting percpu data in %fs on 32-bit is a minor optimization compared to
+ * using %gs. Since 32-bit userspace normally has %fs == 0, we are likely
+ * to load 0 into %fs on exit to usermode, whereas with percpu data in
+ * %gs, we are likely to load a non-null %gs on return to user mode.
*
- * As gcc requires the stack canary at %gs:20, %gs can't be managed
- * lazily if stack protector is enabled, so the kernel saves and
- * restores userland %gs on kernel entry and exit. This behavior is
- * controlled by CONFIG_X86_32_LAZY_GS and accessors are defined in
- * system.h to hide the details.
+ * Once we are willing to require GCC 8.1 or better for 64-bit stackprotector
+ * support, we can remove some of this complexity.
*/
#ifndef _ASM_STACKPROTECTOR_H
@@ -45,14 +38,6 @@
#include <linux/sched.h>
/*
- * 24 byte read-only segment initializer for stack canary. Linker
- * can't handle the address bit shifting. Address will be set in
- * head_32 for boot CPU and setup_per_cpu_areas() for others.
- */
-#define GDT_STACK_CANARY_INIT \
- [GDT_ENTRY_STACK_CANARY] = GDT_ENTRY_INIT(0x4090, 0, 0x18),
-
-/*
* Initialize the stackprotector canary value.
*
* NOTE: this must only be called from functions that never return
@@ -86,7 +71,7 @@ static __always_inline void boot_init_stack_canary(void)
#ifdef CONFIG_X86_64
this_cpu_write(fixed_percpu_data.stack_canary, canary);
#else
- this_cpu_write(stack_canary.canary, canary);
+ this_cpu_write(__stack_chk_guard, canary);
#endif
}
@@ -95,48 +80,16 @@ static inline void cpu_init_stack_canary(int cpu, struct task_struct *idle)
#ifdef CONFIG_X86_64
per_cpu(fixed_percpu_data.stack_canary, cpu) = idle->stack_canary;
#else
- per_cpu(stack_canary.canary, cpu) = idle->stack_canary;
-#endif
-}
-
-static inline void setup_stack_canary_segment(int cpu)
-{
-#ifdef CONFIG_X86_32
- unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu);
- struct desc_struct *gdt_table = get_cpu_gdt_rw(cpu);
- struct desc_struct desc;
-
- desc = gdt_table[GDT_ENTRY_STACK_CANARY];
- set_desc_base(&desc, canary);
- write_gdt_entry(gdt_table, GDT_ENTRY_STACK_CANARY, &desc, DESCTYPE_S);
-#endif
-}
-
-static inline void load_stack_canary_segment(void)
-{
-#ifdef CONFIG_X86_32
- asm("mov %0, %%gs" : : "r" (__KERNEL_STACK_CANARY) : "memory");
+ per_cpu(__stack_chk_guard, cpu) = idle->stack_canary;
#endif
}
#else /* STACKPROTECTOR */
-#define GDT_STACK_CANARY_INIT
-
/* dummy boot_init_stack_canary() is defined in linux/stackprotector.h */
-static inline void setup_stack_canary_segment(int cpu)
-{ }
-
static inline void cpu_init_stack_canary(int cpu, struct task_struct *idle)
{ }
-static inline void load_stack_canary_segment(void)
-{
-#ifdef CONFIG_X86_32
- asm volatile ("mov %0, %%gs" : : "r" (0));
-#endif
-}
-
#endif /* STACKPROTECTOR */
#endif /* _ASM_STACKPROTECTOR_H */
diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h
index fdbd9d7b7bca..7b132d0312eb 100644
--- a/arch/x86/include/asm/suspend_32.h
+++ b/arch/x86/include/asm/suspend_32.h
@@ -13,12 +13,10 @@
/* image of the saved processor state */
struct saved_context {
/*
- * On x86_32, all segment registers, with the possible exception of
- * gs, are saved at kernel entry in pt_regs.
+ * On x86_32, all segment registers except gs are saved at kernel
+ * entry in pt_regs.
*/
-#ifdef CONFIG_X86_32_LAZY_GS
u16 gs;
-#endif
unsigned long cr0, cr2, cr3, cr4;
u64 misc_enable;
bool misc_enable_saved;
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 1c561945b426..772e60efe243 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -269,7 +269,9 @@ struct vmcb_save_area {
* SEV-ES guests when referenced through the GHCB or for
* saving to the host save area.
*/
- u8 reserved_7[80];
+ u8 reserved_7[72];
+ u32 spec_ctrl; /* Guest version of SPEC_CTRL at 0x2E0 */
+ u8 reserved_7b[4];
u32 pkru;
u8 reserved_7a[20];
u64 reserved_8; /* rax already available at 0x01f8 */
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 9f69cc497f4b..b5f0d2ff47e4 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -71,12 +71,7 @@ static inline void update_task_stack(struct task_struct *task)
else
this_cpu_write(cpu_tss_rw.x86_tss.sp1, task->thread.sp0);
#else
- /*
- * x86-64 updates x86_tss.sp1 via cpu_current_top_of_stack. That
- * doesn't work on x86-32 because sp1 and
- * cpu_current_top_of_stack have different values (because of
- * the non-zero stack-padding on 32bit).
- */
+ /* Xen PV enters the kernel on the thread stack. */
if (static_cpu_has(X86_FEATURE_XENPV))
load_sp0(task_top_of_stack(task));
#endif
diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h
index a84333adeef2..80c08c7d5e72 100644
--- a/arch/x86/include/asm/syscall_wrapper.h
+++ b/arch/x86/include/asm/syscall_wrapper.h
@@ -80,6 +80,7 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
}
#define __COND_SYSCALL(abi, name) \
+ __weak long __##abi##_##name(const struct pt_regs *__unused); \
__weak long __##abi##_##name(const struct pt_regs *__unused) \
{ \
return sys_ni_syscall(); \
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 06b740bae431..de406d93b515 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -197,13 +197,7 @@ static inline int arch_within_stack_frames(const void * const stack,
#endif
}
-#else /* !__ASSEMBLY__ */
-
-#ifdef CONFIG_X86_64
-# define cpu_current_top_of_stack (cpu_tss_rw + TSS_sp1)
-#endif
-
-#endif
+#endif /* !__ASSEMBLY__ */
/*
* Thread-synchronous status.
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 8c87a2e0b660..fa952eadbc2e 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -90,23 +90,6 @@ struct tlb_state {
u16 next_asid;
/*
- * We can be in one of several states:
- *
- * - Actively using an mm. Our CPU's bit will be set in
- * mm_cpumask(loaded_mm) and is_lazy == false;
- *
- * - Not using a real mm. loaded_mm == &init_mm. Our CPU's bit
- * will not be set in mm_cpumask(&init_mm) and is_lazy == false.
- *
- * - Lazily using a real mm. loaded_mm != &init_mm, our bit
- * is set in mm_cpumask(loaded_mm), but is_lazy == true.
- * We're heuristically guessing that the CR3 load we
- * skipped more than makes up for the overhead added by
- * lazy mode.
- */
- bool is_lazy;
-
- /*
* If set we changed the page tables in such a way that we
* needed an invalidation of all contexts (aka. PCIDs / ASIDs).
* This tells us to go invalidate all the non-loaded ctxs[]
@@ -151,7 +134,27 @@ struct tlb_state {
*/
struct tlb_context ctxs[TLB_NR_DYN_ASIDS];
};
-DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
+DECLARE_PER_CPU_ALIGNED(struct tlb_state, cpu_tlbstate);
+
+struct tlb_state_shared {
+ /*
+ * We can be in one of several states:
+ *
+ * - Actively using an mm. Our CPU's bit will be set in
+ * mm_cpumask(loaded_mm) and is_lazy == false;
+ *
+ * - Not using a real mm. loaded_mm == &init_mm. Our CPU's bit
+ * will not be set in mm_cpumask(&init_mm) and is_lazy == false.
+ *
+ * - Lazily using a real mm. loaded_mm != &init_mm, our bit
+ * is set in mm_cpumask(loaded_mm), but is_lazy == true.
+ * We're heuristically guessing that the CR3 load we
+ * skipped more than makes up for the overhead added by
+ * lazy mode.
+ */
+ bool is_lazy;
+};
+DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state_shared, cpu_tlbstate_shared);
bool nmi_uaccess_okay(void);
#define nmi_uaccess_okay nmi_uaccess_okay
@@ -175,7 +178,7 @@ extern void initialize_tlbstate_and_flush(void);
* - flush_tlb_page(vma, vmaddr) flushes one page
* - flush_tlb_range(vma, start, end) flushes a range of pages
* - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
- * - flush_tlb_others(cpumask, info) flushes TLBs on other cpus
+ * - flush_tlb_multi(cpumask, info) flushes TLBs on multiple cpus
*
* ..but the i386 has somewhat limited tlb flushing capabilities,
* and page-granular flushes are available only on i486 and up.
@@ -201,14 +204,15 @@ struct flush_tlb_info {
unsigned long start;
unsigned long end;
u64 new_tlb_gen;
- unsigned int stride_shift;
- bool freed_tables;
+ unsigned int initiating_cpu;
+ u8 stride_shift;
+ u8 freed_tables;
};
void flush_tlb_local(void);
void flush_tlb_one_user(unsigned long addr);
void flush_tlb_one_kernel(unsigned long addr);
-void flush_tlb_others(const struct cpumask *cpumask,
+void flush_tlb_multi(const struct cpumask *cpumask,
const struct flush_tlb_info *info);
#ifdef CONFIG_PARAVIRT
diff --git a/arch/x86/include/asm/trace/hyperv.h b/arch/x86/include/asm/trace/hyperv.h
index 4d705cb4d63b..a8e5a7a2b460 100644
--- a/arch/x86/include/asm/trace/hyperv.h
+++ b/arch/x86/include/asm/trace/hyperv.h
@@ -8,7 +8,7 @@
#if IS_ENABLED(CONFIG_HYPERV)
-TRACE_EVENT(hyperv_mmu_flush_tlb_others,
+TRACE_EVENT(hyperv_mmu_flush_tlb_multi,
TP_PROTO(const struct cpumask *cpus,
const struct flush_tlb_info *info),
TP_ARGS(cpus, info),
diff --git a/arch/x86/include/asm/uv/uv_geo.h b/arch/x86/include/asm/uv/uv_geo.h
index f241451035fb..027a9258dbca 100644
--- a/arch/x86/include/asm/uv/uv_geo.h
+++ b/arch/x86/include/asm/uv/uv_geo.h
@@ -10,7 +10,7 @@
#ifndef _ASM_UV_GEO_H
#define _ASM_UV_GEO_H
-/* Type declaractions */
+/* Type declarations */
/* Size of a geoid_s structure (must be before decl. of geoid_u) */
#define GEOID_SIZE 8
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 5002f52be332..d3e3197917be 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -353,7 +353,7 @@ union uvh_apicid {
*
* Note there are NO leds on a UV system. This register is only
* used by the system controller to monitor system-wide operation.
- * There are 64 regs per node. With Nahelem cpus (2 cores per node,
+ * There are 64 regs per node. With Nehalem cpus (2 cores per node,
* 8 cpus per core, 2 threads per cpu) there are 32 cpu threads on
* a node.
*
diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h
index df01d7349d79..1936f21ed8cd 100644
--- a/arch/x86/include/asm/vdso/gettimeofday.h
+++ b/arch/x86/include/asm/vdso/gettimeofday.h
@@ -58,7 +58,8 @@ extern struct ms_hyperv_tsc_page hvclock_page
#endif
#ifdef CONFIG_TIME_NS
-static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(void)
+static __always_inline
+const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
{
return __timens_vdso_data;
}
diff --git a/arch/x86/include/asm/vmalloc.h b/arch/x86/include/asm/vmalloc.h
index 29837740b520..49ce331f3ac6 100644
--- a/arch/x86/include/asm/vmalloc.h
+++ b/arch/x86/include/asm/vmalloc.h
@@ -1,6 +1,26 @@
#ifndef _ASM_X86_VMALLOC_H
#define _ASM_X86_VMALLOC_H
+#include <asm/cpufeature.h>
+#include <asm/page.h>
#include <asm/pgtable_areas.h>
+#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
+
+#ifdef CONFIG_X86_64
+#define arch_vmap_pud_supported arch_vmap_pud_supported
+static inline bool arch_vmap_pud_supported(pgprot_t prot)
+{
+ return boot_cpu_has(X86_FEATURE_GBPAGES);
+}
+#endif
+
+#define arch_vmap_pmd_supported arch_vmap_pmd_supported
+static inline bool arch_vmap_pmd_supported(pgprot_t prot)
+{
+ return boot_cpu_has(X86_FEATURE_PSE);
+}
+
+#endif
+
#endif /* _ASM_X86_VMALLOC_H */
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 358707f60d99..0ffaa3156a4e 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -373,6 +373,7 @@ enum vmcs_field {
#define GUEST_INTR_STATE_MOV_SS 0x00000002
#define GUEST_INTR_STATE_SMI 0x00000004
#define GUEST_INTR_STATE_NMI 0x00000008
+#define GUEST_INTR_STATE_ENCLAVE_INTR 0x00000010
/* GUEST_ACTIVITY_STATE flags */
#define GUEST_ACTIVITY_ACTIVE 0
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index 600a141c8805..b25d3f82c2f3 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -234,7 +234,7 @@ struct boot_params {
* handling of page tables.
*
* These enums should only ever be used by x86 code, and the code that uses
- * it should be well contained and compartamentalized.
+ * it should be well contained and compartmentalized.
*
* KVM and Xen HVM do not have a subarch as these are expected to follow
* standard x86 boot entries. If there is a genuine need for "hypervisor" type
@@ -252,7 +252,7 @@ struct boot_params {
* @X86_SUBARCH_XEN: Used for Xen guest types which follow the PV boot path,
* which start at asm startup_xen() entry point and later jump to the C
* xen_start_kernel() entry point. Both domU and dom0 type of guests are
- * currently supportd through this PV boot path.
+ * currently supported through this PV boot path.
* @X86_SUBARCH_INTEL_MID: Used for Intel MID (Mobile Internet Device) platform
* systems which do not have the PCI legacy interfaces.
* @X86_SUBARCH_CE4100: Used for Intel CE media processor (CE4100) SoC
diff --git a/arch/x86/include/uapi/asm/debugreg.h b/arch/x86/include/uapi/asm/debugreg.h
index d95d080b30e3..0007ba077c0c 100644
--- a/arch/x86/include/uapi/asm/debugreg.h
+++ b/arch/x86/include/uapi/asm/debugreg.h
@@ -24,6 +24,7 @@
#define DR_TRAP3 (0x8) /* db3 */
#define DR_TRAP_BITS (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)
+#define DR_BUS_LOCK (0x800) /* bus_lock */
#define DR_STEP (0x4000) /* single-step */
#define DR_SWITCH (0x8000) /* task switch */
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 5a3022c8af82..0662f644aad9 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -437,6 +437,8 @@ struct kvm_vmx_nested_state_hdr {
__u16 flags;
} smm;
+ __u16 pad;
+
__u32 flags;
__u64 preemption_timer_deadline;
};
diff --git a/arch/x86/include/uapi/asm/msgbuf.h b/arch/x86/include/uapi/asm/msgbuf.h
index b3d0664fadc9..ac83e25bbf37 100644
--- a/arch/x86/include/uapi/asm/msgbuf.h
+++ b/arch/x86/include/uapi/asm/msgbuf.h
@@ -12,7 +12,7 @@
* The msqid64_ds structure for x86 architecture with x32 ABI.
*
* On x86-32 and x86-64 we can just use the generic definition, but
- * x32 uses the same binary layout as x86_64, which is differnet
+ * x32 uses the same binary layout as x86_64, which is different
* from other 32-bit architectures.
*/
diff --git a/arch/x86/include/uapi/asm/sgx.h b/arch/x86/include/uapi/asm/sgx.h
index 9034f3007c4e..9690d6899ad9 100644
--- a/arch/x86/include/uapi/asm/sgx.h
+++ b/arch/x86/include/uapi/asm/sgx.h
@@ -152,7 +152,7 @@ struct sgx_enclave_run {
* Most exceptions reported on ENCLU, including those that occur within the
* enclave, are fixed up and reported synchronously instead of being delivered
* via a standard signal. Debug Exceptions (#DB) and Breakpoints (#BP) are
- * never fixed up and are always delivered via standard signals. On synchrously
+ * never fixed up and are always delivered via standard signals. On synchronously
* reported exceptions, -EFAULT is returned and details about the exception are
* recorded in @run.exception, the optional sgx_enclave_exception struct.
*
diff --git a/arch/x86/include/uapi/asm/shmbuf.h b/arch/x86/include/uapi/asm/shmbuf.h
index f0305dc660c9..fce18eaa070c 100644
--- a/arch/x86/include/uapi/asm/shmbuf.h
+++ b/arch/x86/include/uapi/asm/shmbuf.h
@@ -9,7 +9,7 @@
* The shmid64_ds structure for x86 architecture with x32 ABI.
*
* On x86-32 and x86-64 we can just use the generic definition, but
- * x32 uses the same binary layout as x86_64, which is differnet
+ * x32 uses the same binary layout as x86_64, which is different
* from other 32-bit architectures.
*/
diff --git a/arch/x86/include/uapi/asm/sigcontext.h b/arch/x86/include/uapi/asm/sigcontext.h
index 844d60eb1882..d0d9b331d3a1 100644
--- a/arch/x86/include/uapi/asm/sigcontext.h
+++ b/arch/x86/include/uapi/asm/sigcontext.h
@@ -139,7 +139,7 @@ struct _fpstate_32 {
* The 64-bit FPU frame. (FXSAVE format and later)
*
* Note1: If sw_reserved.magic1 == FP_XSTATE_MAGIC1 then the structure is
- * larger: 'struct _xstate'. Note that 'struct _xstate' embedds
+ * larger: 'struct _xstate'. Note that 'struct _xstate' embeds
* 'struct _fpstate' so that you can always assume the _fpstate portion
* exists so that you can check the magic value.
*
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index b8e650a985e3..946d761adbd3 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -27,6 +27,7 @@
#define VMX_EXIT_REASONS_FAILED_VMENTRY 0x80000000
+#define VMX_EXIT_REASONS_SGX_ENCLAVE_MODE 0x08000000
#define EXIT_REASON_EXCEPTION_NMI 0
#define EXIT_REASON_EXTERNAL_INTERRUPT 1