aboutsummaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/kvm_host.h3
-rw-r--r--arch/x86/include/asm/mce.h4
-rw-r--r--arch/x86/include/asm/mwait.h8
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-internal.h4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c23
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c17
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_rapl.c1
-rw-r--r--arch/x86/kernel/i387.c15
-rw-r--r--arch/x86/kernel/process.c51
-rw-r--r--arch/x86/kvm/cpuid.c4
-rw-r--r--arch/x86/kvm/cpuid.h8
-rw-r--r--arch/x86/kvm/mmu.c16
-rw-r--r--arch/x86/kvm/mmu.h4
-rw-r--r--arch/x86/kvm/paging_tmpl.h7
-rw-r--r--arch/x86/kvm/svm.c1
-rw-r--r--arch/x86/kvm/vmx.c8
-rw-r--r--arch/x86/kvm/x86.c36
-rw-r--r--arch/x86/mm/hugetlbpage.c12
18 files changed, 180 insertions, 42 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6ed0c30d6a0c..6c0709ff2f38 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -201,6 +201,7 @@ union kvm_mmu_page_role {
unsigned nxe:1;
unsigned cr0_wp:1;
unsigned smep_andnot_wp:1;
+ unsigned smap_andnot_wp:1;
};
};
@@ -392,6 +393,7 @@ struct kvm_vcpu_arch {
struct kvm_mmu_memory_cache mmu_page_header_cache;
struct fpu guest_fpu;
+ bool eager_fpu;
u64 xcr0;
u64 guest_supported_xcr0;
u32 guest_xstate_size;
@@ -707,6 +709,7 @@ struct kvm_x86_ops {
void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
+ void (*fpu_activate)(struct kvm_vcpu *vcpu);
void (*fpu_deactivate)(struct kvm_vcpu *vcpu);
void (*tlb_flush)(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 958b90f761e5..40b35a55ce8b 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -34,6 +34,10 @@
#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */
#define MCI_STATUS_AR (1ULL<<55) /* Action required */
+/* AMD-specific bits */
+#define MCI_STATUS_DEFERRED (1ULL<<44) /* declare an uncorrected error */
+#define MCI_STATUS_POISON (1ULL<<43) /* access poisonous data */
+
/*
* Note that the full MCACOD field of IA32_MCi_STATUS MSR is
* bits 15:0. But bit 12 is the 'F' bit, defined for corrected
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index a1410db38a1a..653dfa7662e1 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -30,6 +30,14 @@ static inline void __mwait(unsigned long eax, unsigned long ecx)
:: "a" (eax), "c" (ecx));
}
+static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
+{
+ trace_hardirqs_on();
+ /* "mwait %eax, %ecx;" */
+ asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
+ :: "a" (eax), "c" (ecx));
+}
+
/*
* This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
* which can obviate IPI to trigger checking of need_resched.
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 09edd0b65fef..10b46906767f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -3,6 +3,8 @@
enum severity_level {
MCE_NO_SEVERITY,
+ MCE_DEFERRED_SEVERITY,
+ MCE_UCNA_SEVERITY = MCE_DEFERRED_SEVERITY,
MCE_KEEP_SEVERITY,
MCE_SOME_SEVERITY,
MCE_AO_SEVERITY,
@@ -21,7 +23,7 @@ struct mce_bank {
char attrname[ATTR_LEN]; /* attribute name */
};
-int mce_severity(struct mce *a, int tolerant, char **msg);
+int mce_severity(struct mce *a, int tolerant, char **msg, bool is_excp);
struct dentry *mce_get_debugfs_dir(void);
extern struct mce_bank *mce_banks;
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index c370e1c4468b..8bb433043a7f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -31,6 +31,7 @@
enum context { IN_KERNEL = 1, IN_USER = 2 };
enum ser { SER_REQUIRED = 1, NO_SER = 2 };
+enum exception { EXCP_CONTEXT = 1, NO_EXCP = 2 };
static struct severity {
u64 mask;
@@ -40,6 +41,7 @@ static struct severity {
unsigned char mcgres;
unsigned char ser;
unsigned char context;
+ unsigned char excp;
unsigned char covered;
char *msg;
} severities[] = {
@@ -48,6 +50,8 @@ static struct severity {
#define USER .context = IN_USER
#define SER .ser = SER_REQUIRED
#define NOSER .ser = NO_SER
+#define EXCP .excp = EXCP_CONTEXT
+#define NOEXCP .excp = NO_EXCP
#define BITCLR(x) .mask = x, .result = 0
#define BITSET(x) .mask = x, .result = x
#define MCGMASK(x, y) .mcgmask = x, .mcgres = y
@@ -62,7 +66,7 @@ static struct severity {
),
MCESEV(
NO, "Not enabled",
- BITCLR(MCI_STATUS_EN)
+ EXCP, BITCLR(MCI_STATUS_EN)
),
MCESEV(
PANIC, "Processor context corrupt",
@@ -71,16 +75,20 @@ static struct severity {
/* When MCIP is not set something is very confused */
MCESEV(
PANIC, "MCIP not set in MCA handler",
- MCGMASK(MCG_STATUS_MCIP, 0)
+ EXCP, MCGMASK(MCG_STATUS_MCIP, 0)
),
/* Neither return not error IP -- no chance to recover -> PANIC */
MCESEV(
PANIC, "Neither restart nor error IP",
- MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0)
+ EXCP, MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0)
),
MCESEV(
PANIC, "In kernel and no restart IP",
- KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
+ EXCP, KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
+ ),
+ MCESEV(
+ DEFERRED, "Deferred error",
+ NOSER, MASK(MCI_STATUS_UC|MCI_STATUS_DEFERRED|MCI_STATUS_POISON, MCI_STATUS_DEFERRED)
),
MCESEV(
KEEP, "Corrected error",
@@ -89,7 +97,7 @@ static struct severity {
/* ignore OVER for UCNA */
MCESEV(
- KEEP, "Uncorrected no action required",
+ UCNA, "Uncorrected no action required",
SER, MASK(MCI_UC_SAR, MCI_STATUS_UC)
),
MCESEV(
@@ -178,8 +186,9 @@ static int error_context(struct mce *m)
return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL;
}
-int mce_severity(struct mce *m, int tolerant, char **msg)
+int mce_severity(struct mce *m, int tolerant, char **msg, bool is_excp)
{
+ enum exception excp = (is_excp ? EXCP_CONTEXT : NO_EXCP);
enum context ctx = error_context(m);
struct severity *s;
@@ -194,6 +203,8 @@ int mce_severity(struct mce *m, int tolerant, char **msg)
continue;
if (s->context && ctx != s->context)
continue;
+ if (s->excp && excp != s->excp)
+ continue;
if (msg)
*msg = s->msg;
s->covered = 1;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 61a9668cebfd..b5c2276317e2 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -660,6 +660,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
struct pt_regs *regs)
{
int i, ret = 0;
+ char *tmp;
for (i = 0; i < mca_cfg.banks; i++) {
m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
@@ -668,8 +669,11 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
if (quirk_no_way_out)
quirk_no_way_out(i, m, regs);
}
- if (mce_severity(m, mca_cfg.tolerant, msg) >= MCE_PANIC_SEVERITY)
+
+ if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
+ *msg = tmp;
ret = 1;
+ }
}
return ret;
}
@@ -754,7 +758,7 @@ static void mce_reign(void)
for_each_possible_cpu(cpu) {
int severity = mce_severity(&per_cpu(mces_seen, cpu),
mca_cfg.tolerant,
- &nmsg);
+ &nmsg, true);
if (severity > global_worst) {
msg = nmsg;
global_worst = severity;
@@ -1095,13 +1099,14 @@ void do_machine_check(struct pt_regs *regs, long error_code)
*/
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
- severity = mce_severity(&m, cfg->tolerant, NULL);
+ severity = mce_severity(&m, cfg->tolerant, NULL, true);
/*
- * When machine check was for corrected handler don't touch,
- * unless we're panicing.
+ * When machine check was for corrected/deferred handler don't
+ * touch, unless we're panicing.
*/
- if (severity == MCE_KEEP_SEVERITY && !no_way_out)
+ if ((severity == MCE_KEEP_SEVERITY ||
+ severity == MCE_UCNA_SEVERITY) && !no_way_out)
continue;
__set_bit(i, toclear);
if (severity == MCE_NO_SEVERITY) {
diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
index 8c256749882c..611d821eac1a 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
@@ -664,6 +664,7 @@ static int __init rapl_pmu_init(void)
break;
case 60: /* Haswell */
case 69: /* Haswell-Celeron */
+ case 61: /* Broadwell */
rapl_cntr_mask = RAPL_IDX_HSW;
rapl_pmu_events_group.attrs = rapl_events_hsw_attr;
break;
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index a9a4229f6161..ae0fdc86b7b6 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -155,6 +155,21 @@ static void init_thread_xstate(void)
xstate_size = sizeof(struct i387_fxsave_struct);
else
xstate_size = sizeof(struct i387_fsave_struct);
+
+ /*
+ * Quirk: we don't yet handle the XSAVES* instructions
+ * correctly, as we don't correctly convert between
+ * standard and compacted format when interfacing
+ * with user-space - so disable it for now.
+ *
+ * The difference is small: with recent CPUs the
+ * compacted format is only marginally smaller than
+ * the standard FPU state format.
+ *
+ * ( This is easy to backport while we are fixing
+ * XSAVES* support. )
+ */
+ setup_clear_cpu_cap(X86_FEATURE_XSAVES);
}
/*
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 4eb204c6b057..5dd6c417b095 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -24,6 +24,7 @@
#include <asm/syscalls.h>
#include <asm/idle.h>
#include <asm/uaccess.h>
+#include <asm/mwait.h>
#include <asm/i387.h>
#include <asm/fpu-internal.h>
#include <asm/debugreg.h>
@@ -385,6 +386,53 @@ static void amd_e400_idle(void)
default_idle();
}
+/*
+ * Intel Core2 and older machines prefer MWAIT over HALT for C1.
+ * We can't rely on cpuidle installing MWAIT, because it will not load
+ * on systems that support only C1 -- so the boot default must be MWAIT.
+ *
+ * Some AMD machines are the opposite, they depend on using HALT.
+ *
+ * So for default C1, which is used during boot until cpuidle loads,
+ * use MWAIT-C1 on Intel HW that has it, else use HALT.
+ */
+static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c)
+{
+ if (c->x86_vendor != X86_VENDOR_INTEL)
+ return 0;
+
+ if (!cpu_has(c, X86_FEATURE_MWAIT))
+ return 0;
+
+ return 1;
+}
+
+/*
+ * MONITOR/MWAIT with no hints, used for default default C1 state.
+ * This invokes MWAIT with interrutps enabled and no flags,
+ * which is backwards compatible with the original MWAIT implementation.
+ */
+
+static void mwait_idle(void)
+{
+ if (!current_set_polling_and_test()) {
+ if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) {
+ smp_mb(); /* quirk */
+ clflush((void *)&current_thread_info()->flags);
+ smp_mb(); /* quirk */
+ }
+
+ __monitor((void *)&current_thread_info()->flags, 0, 0);
+ if (!need_resched())
+ __sti_mwait(0, 0);
+ else
+ local_irq_enable();
+ } else {
+ local_irq_enable();
+ }
+ __current_clr_polling();
+}
+
void select_idle_routine(const struct cpuinfo_x86 *c)
{
#ifdef CONFIG_SMP
@@ -398,6 +446,9 @@ void select_idle_routine(const struct cpuinfo_x86 *c)
/* E400: APIC timer interrupt does not wake up CPU from C1e */
pr_info("using AMD E400 aware idle routine\n");
x86_idle = amd_e400_idle;
+ } else if (prefer_mwait_c1_over_halt(c)) {
+ pr_info("using mwait in idle threads\n");
+ x86_idle = mwait_idle;
} else
x86_idle = default_idle;
}
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 88f92014ba6b..81c6d541d98a 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -16,6 +16,8 @@
#include <linux/module.h>
#include <linux/vmalloc.h>
#include <linux/uaccess.h>
+#include <asm/i387.h> /* For use_eager_fpu. Ugh! */
+#include <asm/fpu-internal.h> /* For use_eager_fpu. Ugh! */
#include <asm/user.h>
#include <asm/xsave.h>
#include "cpuid.h"
@@ -88,6 +90,8 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
xstate_required_size(vcpu->arch.xcr0);
}
+ vcpu->arch.eager_fpu = guest_cpuid_has_mpx(vcpu);
+
/*
* The existing code assumes virtual address is 48-bit in the canonical
* address checks; exit if it is ever changed.
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 4452eedfaedd..9bec2b8cdced 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -111,4 +111,12 @@ static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu)
best = kvm_find_cpuid_entry(vcpu, 7, 0);
return best && (best->ebx & bit(X86_FEATURE_RTM));
}
+
+static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry(vcpu, 7, 0);
+ return best && (best->ebx & bit(X86_FEATURE_MPX));
+}
#endif
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 9c12e63c653f..f696dedb0fa7 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3625,8 +3625,8 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
}
}
-void update_permission_bitmask(struct kvm_vcpu *vcpu,
- struct kvm_mmu *mmu, bool ept)
+static void update_permission_bitmask(struct kvm_vcpu *vcpu,
+ struct kvm_mmu *mmu, bool ept)
{
unsigned bit, byte, pfec;
u8 map;
@@ -3807,6 +3807,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
{
bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
+ bool smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
ASSERT(vcpu);
ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
@@ -3824,6 +3825,8 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu);
vcpu->arch.mmu.base_role.smep_andnot_wp
= smep && !is_write_protection(vcpu);
+ context->base_role.smap_andnot_wp
+ = smap && !is_write_protection(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
@@ -4095,12 +4098,18 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
const u8 *new, int bytes)
{
gfn_t gfn = gpa >> PAGE_SHIFT;
- union kvm_mmu_page_role mask = { .word = 0 };
struct kvm_mmu_page *sp;
LIST_HEAD(invalid_list);
u64 entry, gentry, *spte;
int npte;
bool remote_flush, local_flush, zap_page;
+ union kvm_mmu_page_role mask = { };
+
+ mask.cr0_wp = 1;
+ mask.cr4_pae = 1;
+ mask.nxe = 1;
+ mask.smep_andnot_wp = 1;
+ mask.smap_andnot_wp = 1;
/*
* If we don't have indirect shadow pages, it means no page is
@@ -4126,7 +4135,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
++vcpu->kvm->stat.mmu_pte_write;
kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
- mask.cr0_wp = mask.cr4_pae = mask.nxe = 1;
for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) {
if (detect_write_misaligned(sp, gpa, bytes) ||
detect_write_flooding(sp)) {
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index bde8ee725754..a6b876443302 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -84,8 +84,6 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct);
void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
bool execonly);
-void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
- bool ept);
static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
{
@@ -179,6 +177,8 @@ static inline bool permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
int index = (pfec >> 1) +
(smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1));
+ WARN_ON(pfec & PFERR_RSVD_MASK);
+
return (mmu->permissions[index] >> pte_access) & 1;
}
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index fd49c867b25a..6e6d115fe9b5 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -718,6 +718,13 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
mmu_is_nested(vcpu));
if (likely(r != RET_MMIO_PF_INVALID))
return r;
+
+ /*
+ * page fault with PFEC.RSVD = 1 is caused by shadow
+ * page fault, should not be used to walk guest page
+ * table.
+ */
+ error_code &= ~PFERR_RSVD_MASK;
};
r = mmu_topup_memory_caches(vcpu);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 7527cefc5a43..f7eaee1cbc54 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -4370,6 +4370,7 @@ static struct kvm_x86_ops svm_x86_ops = {
.cache_reg = svm_cache_reg,
.get_rflags = svm_get_rflags,
.set_rflags = svm_set_rflags,
+ .fpu_activate = svm_fpu_activate,
.fpu_deactivate = svm_fpu_deactivate,
.tlb_flush = svm_flush_tlb,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ed7039465f16..54bda28e6a12 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2381,8 +2381,7 @@ static __init void nested_vmx_setup_ctls_msrs(void)
if (enable_ept) {
/* nested EPT: emulate EPT also to L1 */
- nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT |
- SECONDARY_EXEC_UNRESTRICTED_GUEST;
+ nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT;
nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT |
VMX_EPT_INVEPT_BIT;
@@ -2396,6 +2395,10 @@ static __init void nested_vmx_setup_ctls_msrs(void)
} else
nested_vmx_ept_caps = 0;
+ if (enable_unrestricted_guest)
+ nested_vmx_secondary_ctls_high |=
+ SECONDARY_EXEC_UNRESTRICTED_GUEST;
+
/* miscellaneous data */
rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high);
nested_vmx_misc_low &= VMX_MISC_SAVE_EFER_LMA;
@@ -9117,6 +9120,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
.cache_reg = vmx_cache_reg,
.get_rflags = vmx_get_rflags,
.set_rflags = vmx_set_rflags,
+ .fpu_activate = vmx_fpu_activate,
.fpu_deactivate = vmx_fpu_deactivate,
.tlb_flush = vmx_flush_tlb,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a38dd816015b..0bb431c3f74e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -678,8 +678,9 @@ EXPORT_SYMBOL_GPL(kvm_set_xcr);
int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
unsigned long old_cr4 = kvm_read_cr4(vcpu);
- unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE |
- X86_CR4_PAE | X86_CR4_SMEP;
+ unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
+ X86_CR4_SMEP | X86_CR4_SMAP;
+
if (cr4 & CR4_RESERVED_BITS)
return 1;
@@ -720,9 +721,6 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
(!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
kvm_mmu_reset_context(vcpu);
- if ((cr4 ^ old_cr4) & X86_CR4_SMAP)
- update_permission_bitmask(vcpu, vcpu->arch.walk_mmu, false);
-
if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE)
kvm_update_cpuid(vcpu);
@@ -5779,7 +5777,6 @@ int kvm_arch_init(void *opaque)
kvm_set_mmio_spte_mask();
kvm_x86_ops = ops;
- kvm_init_msr_list();
kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
PT_DIRTY_MASK, PT64_NX_MASK, 0);
@@ -6140,6 +6137,8 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
return;
page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
+ if (is_error_page(page))
+ return;
kvm_x86_ops->set_apic_access_page_addr(vcpu, page_to_phys(page));
/*
@@ -6994,7 +6993,9 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
fpu_save_init(&vcpu->arch.guest_fpu);
__kernel_fpu_end();
++vcpu->stat.fpu_reload;
- kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
+ if (!vcpu->arch.eager_fpu)
+ kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
+
trace_kvm_fpu(0);
}
@@ -7010,11 +7011,21 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
unsigned int id)
{
+ struct kvm_vcpu *vcpu;
+
if (check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
printk_once(KERN_WARNING
"kvm: SMP vm created on host with unstable TSC; "
"guest TSC will not be reliable\n");
- return kvm_x86_ops->vcpu_create(kvm, id);
+
+ vcpu = kvm_x86_ops->vcpu_create(kvm, id);
+
+ /*
+ * Activate fpu unconditionally in case the guest needs eager FPU. It will be
+ * deactivated soon if it doesn't.
+ */
+ kvm_x86_ops->fpu_activate(vcpu);
+ return vcpu;
}
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
@@ -7210,7 +7221,14 @@ void kvm_arch_hardware_disable(void)
int kvm_arch_hardware_setup(void)
{
- return kvm_x86_ops->hardware_setup();
+ int r;
+
+ r = kvm_x86_ops->hardware_setup();
+ if (r != 0)
+ return r;
+
+ kvm_init_msr_list();
+ return 0;
}
void kvm_arch_hardware_unsetup(void)
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 006cc914994b..9161f764121e 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -52,20 +52,8 @@ int pud_huge(pud_t pud)
return 0;
}
-struct page *
-follow_huge_pmd(struct mm_struct *mm, unsigned long address,
- pmd_t *pmd, int write)
-{
- return NULL;
-}
#else
-struct page *
-follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
-{
- return ERR_PTR(-EINVAL);
-}
-
/*
* pmd_huge() returns 1 if @pmd is hugetlb related entry, that is normal
* hugetlb entry or non-present (migration or hwpoisoned) hugetlb entry.