aboutsummaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/mwait.h8
-rw-r--r--arch/x86/kernel/process.c51
-rw-r--r--arch/x86/kvm/vmx.c7
-rw-r--r--arch/x86/kvm/x86.c10
-rw-r--r--arch/x86/mm/hugetlbpage.c12
5 files changed, 72 insertions, 16 deletions
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index a1410db38a1a..653dfa7662e1 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -30,6 +30,14 @@ static inline void __mwait(unsigned long eax, unsigned long ecx)
:: "a" (eax), "c" (ecx));
}
+static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
+{
+ trace_hardirqs_on();
+ /* "mwait %eax, %ecx;" */
+ asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
+ :: "a" (eax), "c" (ecx));
+}
+
/*
* This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
* which can obviate IPI to trigger checking of need_resched.
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index e127ddaa2d5a..6ad8a6396b75 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -24,6 +24,7 @@
#include <asm/syscalls.h>
#include <asm/idle.h>
#include <asm/uaccess.h>
+#include <asm/mwait.h>
#include <asm/i387.h>
#include <asm/fpu-internal.h>
#include <asm/debugreg.h>
@@ -398,6 +399,53 @@ static void amd_e400_idle(void)
default_idle();
}
+/*
+ * Intel Core2 and older machines prefer MWAIT over HALT for C1.
+ * We can't rely on cpuidle installing MWAIT, because it will not load
+ * on systems that support only C1 -- so the boot default must be MWAIT.
+ *
+ * Some AMD machines are the opposite, they depend on using HALT.
+ *
+ * So for default C1, which is used during boot until cpuidle loads,
+ * use MWAIT-C1 on Intel HW that has it, else use HALT.
+ */
+static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c)
+{
+ if (c->x86_vendor != X86_VENDOR_INTEL)
+ return 0;
+
+ if (!cpu_has(c, X86_FEATURE_MWAIT))
+ return 0;
+
+ return 1;
+}
+
+/*
+ * MONITOR/MWAIT with no hints, used for default default C1 state.
+ * This invokes MWAIT with interrutps enabled and no flags,
+ * which is backwards compatible with the original MWAIT implementation.
+ */
+
+static void mwait_idle(void)
+{
+ if (!current_set_polling_and_test()) {
+ if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) {
+ smp_mb(); /* quirk */
+ clflush((void *)&current_thread_info()->flags);
+ smp_mb(); /* quirk */
+ }
+
+ __monitor((void *)&current_thread_info()->flags, 0, 0);
+ if (!need_resched())
+ __sti_mwait(0, 0);
+ else
+ local_irq_enable();
+ } else {
+ local_irq_enable();
+ }
+ __current_clr_polling();
+}
+
void select_idle_routine(const struct cpuinfo_x86 *c)
{
#ifdef CONFIG_SMP
@@ -411,6 +459,9 @@ void select_idle_routine(const struct cpuinfo_x86 *c)
/* E400: APIC timer interrupt does not wake up CPU from C1e */
pr_info("using AMD E400 aware idle routine\n");
x86_idle = amd_e400_idle;
+ } else if (prefer_mwait_c1_over_halt(c)) {
+ pr_info("using mwait in idle threads\n");
+ x86_idle = mwait_idle;
} else
x86_idle = default_idle;
}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ed7039465f16..47843b04d60f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2381,8 +2381,7 @@ static __init void nested_vmx_setup_ctls_msrs(void)
if (enable_ept) {
/* nested EPT: emulate EPT also to L1 */
- nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT |
- SECONDARY_EXEC_UNRESTRICTED_GUEST;
+ nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT;
nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT |
VMX_EPT_INVEPT_BIT;
@@ -2396,6 +2395,10 @@ static __init void nested_vmx_setup_ctls_msrs(void)
} else
nested_vmx_ept_caps = 0;
+ if (enable_unrestricted_guest)
+ nested_vmx_secondary_ctls_high |=
+ SECONDARY_EXEC_UNRESTRICTED_GUEST;
+
/* miscellaneous data */
rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high);
nested_vmx_misc_low &= VMX_MISC_SAVE_EFER_LMA;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a38dd816015b..5369ec6a8094 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5779,7 +5779,6 @@ int kvm_arch_init(void *opaque)
kvm_set_mmio_spte_mask();
kvm_x86_ops = ops;
- kvm_init_msr_list();
kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
PT_DIRTY_MASK, PT64_NX_MASK, 0);
@@ -7210,7 +7209,14 @@ void kvm_arch_hardware_disable(void)
int kvm_arch_hardware_setup(void)
{
- return kvm_x86_ops->hardware_setup();
+ int r;
+
+ r = kvm_x86_ops->hardware_setup();
+ if (r != 0)
+ return r;
+
+ kvm_init_msr_list();
+ return 0;
}
void kvm_arch_hardware_unsetup(void)
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 006cc914994b..9161f764121e 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -52,20 +52,8 @@ int pud_huge(pud_t pud)
return 0;
}
-struct page *
-follow_huge_pmd(struct mm_struct *mm, unsigned long address,
- pmd_t *pmd, int write)
-{
- return NULL;
-}
#else
-struct page *
-follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
-{
- return ERR_PTR(-EINVAL);
-}
-
/*
* pmd_huge() returns 1 if @pmd is hugetlb related entry, that is normal
* hugetlb entry or non-present (migration or hwpoisoned) hugetlb entry.