aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/kvm/svm.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/svm.c')
-rw-r--r--arch/x86/kvm/svm.c283
1 files changed, 221 insertions, 62 deletions
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 8a3f9f64f86..82e144a4e51 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -4,7 +4,7 @@
* AMD SVM support
*
* Copyright (C) 2006 Qumranet, Inc.
- * Copyright 2010 Red Hat, Inc. and/or its affilates.
+ * Copyright 2010 Red Hat, Inc. and/or its affiliates.
*
* Authors:
* Yaniv Kamay <yaniv@qumranet.com>
@@ -88,6 +88,14 @@ struct nested_state {
/* A VMEXIT is required but not yet emulated */
bool exit_required;
+ /*
+ * If we vmexit during an instruction emulation we need this to restore
+ * the l1 guest rip after the emulation
+ */
+ unsigned long vmexit_rip;
+ unsigned long vmexit_rsp;
+ unsigned long vmexit_rax;
+
/* cache for intercepts of the guest */
u16 intercept_cr_read;
u16 intercept_cr_write;
@@ -96,6 +104,8 @@ struct nested_state {
u32 intercept_exceptions;
u64 intercept;
+ /* Nested Paging related state */
+ u64 nested_cr3;
};
#define MSRPM_OFFSETS 16
@@ -284,6 +294,15 @@ static inline void flush_guest_tlb(struct kvm_vcpu *vcpu)
force_new_asid(vcpu);
}
+static int get_npt_level(void)
+{
+#ifdef CONFIG_X86_64
+ return PT64_ROOT_LEVEL;
+#else
+ return PT32E_ROOT_LEVEL;
+#endif
+}
+
static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
{
vcpu->arch.efer = efer;
@@ -701,6 +720,29 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
seg->base = 0;
}
+static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ u64 g_tsc_offset = 0;
+
+ if (is_nested(svm)) {
+ g_tsc_offset = svm->vmcb->control.tsc_offset -
+ svm->nested.hsave->control.tsc_offset;
+ svm->nested.hsave->control.tsc_offset = offset;
+ }
+
+ svm->vmcb->control.tsc_offset = offset + g_tsc_offset;
+}
+
+static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ svm->vmcb->control.tsc_offset += adjustment;
+ if (is_nested(svm))
+ svm->nested.hsave->control.tsc_offset += adjustment;
+}
+
static void init_vmcb(struct vcpu_svm *svm)
{
struct vmcb_control_area *control = &svm->vmcb->control;
@@ -793,7 +835,7 @@ static void init_vmcb(struct vcpu_svm *svm)
init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
- save->efer = EFER_SVME;
+ svm_set_efer(&svm->vcpu, 0);
save->dr6 = 0xffff0ff0;
save->dr7 = 0x400;
save->rflags = 2;
@@ -804,8 +846,8 @@ static void init_vmcb(struct vcpu_svm *svm)
* This is the guest-visible cr0 value.
* svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0.
*/
- svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
- (void)kvm_set_cr0(&svm->vcpu, svm->vcpu.arch.cr0);
+ svm->vcpu.arch.cr0 = 0;
+ (void)kvm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET);
save->cr4 = X86_CR4_PAE;
/* rdx = ?? */
@@ -901,7 +943,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
svm->asid_generation = 0;
init_vmcb(svm);
- svm->vmcb->control.tsc_offset = 0-native_read_tsc();
+ kvm_write_tsc(&svm->vcpu, 0);
err = fx_init(&svm->vcpu);
if (err)
@@ -947,20 +989,6 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
int i;
if (unlikely(cpu != vcpu->cpu)) {
- u64 delta;
-
- if (check_tsc_unstable()) {
- /*
- * Make sure that the guest sees a monotonically
- * increasing TSC.
- */
- delta = vcpu->arch.host_tsc - native_read_tsc();
- svm->vmcb->control.tsc_offset += delta;
- if (is_nested(svm))
- svm->nested.hsave->control.tsc_offset += delta;
- }
- vcpu->cpu = cpu;
- kvm_migrate_timers(vcpu);
svm->asid_generation = 0;
}
@@ -976,8 +1004,6 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu)
++vcpu->stat.host_state_reload;
for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
-
- vcpu->arch.host_tsc = native_read_tsc();
}
static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
@@ -995,7 +1021,7 @@ static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
switch (reg) {
case VCPU_EXREG_PDPTR:
BUG_ON(!npt_enabled);
- load_pdptrs(vcpu, vcpu->arch.cr3);
+ load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3);
break;
default:
BUG();
@@ -1206,8 +1232,12 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
if (old == new) {
/* cr0 write with ts and mp unchanged */
svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE;
- if (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE)
+ if (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE) {
+ svm->nested.vmexit_rip = kvm_rip_read(vcpu);
+ svm->nested.vmexit_rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
+ svm->nested.vmexit_rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
return;
+ }
}
}
@@ -1581,6 +1611,54 @@ static int vmmcall_interception(struct vcpu_svm *svm)
return 1;
}
+static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ return svm->nested.nested_cr3;
+}
+
+static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu,
+ unsigned long root)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ svm->vmcb->control.nested_cr3 = root;
+ force_new_asid(vcpu);
+}
+
+static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ svm->vmcb->control.exit_code = SVM_EXIT_NPF;
+ svm->vmcb->control.exit_code_hi = 0;
+ svm->vmcb->control.exit_info_1 = vcpu->arch.fault.error_code;
+ svm->vmcb->control.exit_info_2 = vcpu->arch.fault.address;
+
+ nested_svm_vmexit(svm);
+}
+
+static int nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
+{
+ int r;
+
+ r = kvm_init_shadow_mmu(vcpu, &vcpu->arch.mmu);
+
+ vcpu->arch.mmu.set_cr3 = nested_svm_set_tdp_cr3;
+ vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3;
+ vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit;
+ vcpu->arch.mmu.shadow_root_level = get_npt_level();
+ vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
+
+ return r;
+}
+
+static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.walk_mmu = &vcpu->arch.mmu;
+}
+
static int nested_svm_check_permissions(struct vcpu_svm *svm)
{
if (!(svm->vcpu.arch.efer & EFER_SVME)
@@ -1629,6 +1707,14 @@ static inline bool nested_svm_intr(struct vcpu_svm *svm)
if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))
return false;
+ /*
+ * if vmexit was already requested (by intercepted exception
+ * for instance) do not overwrite it with "external interrupt"
+ * vmexit.
+ */
+ if (svm->nested.exit_required)
+ return false;
+
svm->vmcb->control.exit_code = SVM_EXIT_INTR;
svm->vmcb->control.exit_info_1 = 0;
svm->vmcb->control.exit_info_2 = 0;
@@ -1896,6 +1982,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
nested_vmcb->save.ds = vmcb->save.ds;
nested_vmcb->save.gdtr = vmcb->save.gdtr;
nested_vmcb->save.idtr = vmcb->save.idtr;
+ nested_vmcb->save.efer = svm->vcpu.arch.efer;
nested_vmcb->save.cr0 = kvm_read_cr0(&svm->vcpu);
nested_vmcb->save.cr3 = svm->vcpu.arch.cr3;
nested_vmcb->save.cr2 = vmcb->save.cr2;
@@ -1917,6 +2004,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2;
nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info;
nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err;
+ nested_vmcb->control.next_rip = vmcb->control.next_rip;
/*
* If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have
@@ -1947,6 +2035,8 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
kvm_clear_exception_queue(&svm->vcpu);
kvm_clear_interrupt_queue(&svm->vcpu);
+ svm->nested.nested_cr3 = 0;
+
/* Restore selected save entries */
svm->vmcb->save.es = hsave->save.es;
svm->vmcb->save.cs = hsave->save.cs;
@@ -1973,6 +2063,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
nested_svm_unmap(page);
+ nested_svm_uninit_mmu_context(&svm->vcpu);
kvm_mmu_reset_context(&svm->vcpu);
kvm_mmu_load(&svm->vcpu);
@@ -2012,6 +2103,20 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
return true;
}
+static bool nested_vmcb_checks(struct vmcb *vmcb)
+{
+ if ((vmcb->control.intercept & (1ULL << INTERCEPT_VMRUN)) == 0)
+ return false;
+
+ if (vmcb->control.asid == 0)
+ return false;
+
+ if (vmcb->control.nested_ctl && !npt_enabled)
+ return false;
+
+ return true;
+}
+
static bool nested_svm_vmrun(struct vcpu_svm *svm)
{
struct vmcb *nested_vmcb;
@@ -2026,7 +2131,18 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
if (!nested_vmcb)
return false;
- trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, vmcb_gpa,
+ if (!nested_vmcb_checks(nested_vmcb)) {
+ nested_vmcb->control.exit_code = SVM_EXIT_ERR;
+ nested_vmcb->control.exit_code_hi = 0;
+ nested_vmcb->control.exit_info_1 = 0;
+ nested_vmcb->control.exit_info_2 = 0;
+
+ nested_svm_unmap(page);
+
+ return false;
+ }
+
+ trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb_gpa,
nested_vmcb->save.rip,
nested_vmcb->control.int_ctl,
nested_vmcb->control.event_inj,
@@ -2055,7 +2171,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
hsave->save.cr0 = kvm_read_cr0(&svm->vcpu);
hsave->save.cr4 = svm->vcpu.arch.cr4;
hsave->save.rflags = vmcb->save.rflags;
- hsave->save.rip = svm->next_rip;
+ hsave->save.rip = kvm_rip_read(&svm->vcpu);
hsave->save.rsp = vmcb->save.rsp;
hsave->save.rax = vmcb->save.rax;
if (npt_enabled)
@@ -2070,6 +2186,12 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
else
svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
+ if (nested_vmcb->control.nested_ctl) {
+ kvm_mmu_unload(&svm->vcpu);
+ svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3;
+ nested_svm_init_mmu_context(&svm->vcpu);
+ }
+
/* Load the nested guest state */
svm->vmcb->save.es = nested_vmcb->save.es;
svm->vmcb->save.cs = nested_vmcb->save.cs;
@@ -2227,8 +2349,8 @@ static int vmrun_interception(struct vcpu_svm *svm)
if (nested_svm_check_permissions(svm))
return 1;
- svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
- skip_emulated_instruction(&svm->vcpu);
+ /* Save rip after vmrun instruction */
+ kvm_rip_write(&svm->vcpu, kvm_rip_read(&svm->vcpu) + 3);
if (!nested_svm_vmrun(svm))
return 1;
@@ -2257,6 +2379,7 @@ static int stgi_interception(struct vcpu_svm *svm)
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
skip_emulated_instruction(&svm->vcpu);
+ kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
enable_gif(svm);
@@ -2399,6 +2522,23 @@ static int emulate_on_interception(struct vcpu_svm *svm)
return emulate_instruction(&svm->vcpu, 0, 0, 0) == EMULATE_DONE;
}
+static int cr0_write_interception(struct vcpu_svm *svm)
+{
+ struct kvm_vcpu *vcpu = &svm->vcpu;
+ int r;
+
+ r = emulate_instruction(&svm->vcpu, 0, 0, 0);
+
+ if (svm->nested.vmexit_rip) {
+ kvm_register_write(vcpu, VCPU_REGS_RIP, svm->nested.vmexit_rip);
+ kvm_register_write(vcpu, VCPU_REGS_RSP, svm->nested.vmexit_rsp);
+ kvm_register_write(vcpu, VCPU_REGS_RAX, svm->nested.vmexit_rax);
+ svm->nested.vmexit_rip = 0;
+ }
+
+ return r == EMULATE_DONE;
+}
+
static int cr8_write_interception(struct vcpu_svm *svm)
{
struct kvm_run *kvm_run = svm->vcpu.run;
@@ -2542,20 +2682,9 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
struct vcpu_svm *svm = to_svm(vcpu);
switch (ecx) {
- case MSR_IA32_TSC: {
- u64 tsc_offset = data - native_read_tsc();
- u64 g_tsc_offset = 0;
-
- if (is_nested(svm)) {
- g_tsc_offset = svm->vmcb->control.tsc_offset -
- svm->nested.hsave->control.tsc_offset;
- svm->nested.hsave->control.tsc_offset = tsc_offset;
- }
-
- svm->vmcb->control.tsc_offset = tsc_offset + g_tsc_offset;
-
+ case MSR_IA32_TSC:
+ kvm_write_tsc(vcpu, data);
break;
- }
case MSR_STAR:
svm->vmcb->save.star = data;
break;
@@ -2643,6 +2772,7 @@ static int interrupt_window_interception(struct vcpu_svm *svm)
{
struct kvm_run *kvm_run = svm->vcpu.run;
+ kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
svm_clear_vintr(svm);
svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
/*
@@ -2672,7 +2802,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
[SVM_EXIT_READ_CR4] = emulate_on_interception,
[SVM_EXIT_READ_CR8] = emulate_on_interception,
[SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception,
- [SVM_EXIT_WRITE_CR0] = emulate_on_interception,
+ [SVM_EXIT_WRITE_CR0] = cr0_write_interception,
[SVM_EXIT_WRITE_CR3] = emulate_on_interception,
[SVM_EXIT_WRITE_CR4] = emulate_on_interception,
[SVM_EXIT_WRITE_CR8] = cr8_write_interception,
@@ -2871,7 +3001,8 @@ static int handle_exit(struct kvm_vcpu *vcpu)
if (is_external_interrupt(svm->vmcb->control.exit_int_info) &&
exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR &&
- exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH)
+ exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH &&
+ exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI)
printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x "
"exit_code 0x%x\n",
__func__, svm->vmcb->control.exit_int_info,
@@ -3088,8 +3219,10 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
svm->int3_injected = 0;
- if (svm->vcpu.arch.hflags & HF_IRET_MASK)
+ if (svm->vcpu.arch.hflags & HF_IRET_MASK) {
svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
+ kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
+ }
svm->vcpu.arch.nmi_injected = false;
kvm_clear_exception_queue(&svm->vcpu);
@@ -3098,6 +3231,8 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
if (!(exitintinfo & SVM_EXITINTINFO_VALID))
return;
+ kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
+
vector = exitintinfo & SVM_EXITINTINFO_VEC_MASK;
type = exitintinfo & SVM_EXITINTINFO_TYPE_MASK;
@@ -3134,6 +3269,17 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
}
}
+static void svm_cancel_injection(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ struct vmcb_control_area *control = &svm->vmcb->control;
+
+ control->exit_int_info = control->event_inj;
+ control->exit_int_info_err = control->event_inj_err;
+ control->event_inj = 0;
+ svm_complete_interrupts(svm);
+}
+
#ifdef CONFIG_X86_64
#define R "r"
#else
@@ -3167,9 +3313,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
savesegment(gs, gs_selector);
ldt_selector = kvm_read_ldt();
svm->vmcb->save.cr2 = vcpu->arch.cr2;
- /* required for live migration with NPT */
- if (npt_enabled)
- svm->vmcb->save.cr3 = vcpu->arch.cr3;
clgi();
@@ -3291,16 +3434,22 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
{
struct vcpu_svm *svm = to_svm(vcpu);
- if (npt_enabled) {
- svm->vmcb->control.nested_cr3 = root;
- force_new_asid(vcpu);
- return;
- }
-
svm->vmcb->save.cr3 = root;
force_new_asid(vcpu);
}
+static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ svm->vmcb->control.nested_cr3 = root;
+
+ /* Also sync guest cr3 here in case we live migrate */
+ svm->vmcb->save.cr3 = vcpu->arch.cr3;
+
+ force_new_asid(vcpu);
+}
+
static int is_disabled(void)
{
u64 vm_cr;
@@ -3333,15 +3482,6 @@ static bool svm_cpu_has_accelerated_tpr(void)
return false;
}
-static int get_npt_level(void)
-{
-#ifdef CONFIG_X86_64
- return PT64_ROOT_LEVEL;
-#else
- return PT32E_ROOT_LEVEL;
-#endif
-}
-
static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
{
return 0;
@@ -3354,12 +3494,25 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu)
static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
{
switch (func) {
+ case 0x80000001:
+ if (nested)
+ entry->ecx |= (1 << 2); /* Set SVM bit */
+ break;
case 0x8000000A:
entry->eax = 1; /* SVM revision 1 */
entry->ebx = 8; /* Lets support 8 ASIDs in case we add proper
ASID emulation to nested SVM */
entry->ecx = 0; /* Reserved */
- entry->edx = 0; /* Do not support any additional features */
+ entry->edx = 0; /* Per default do not support any
+ additional features */
+
+ /* Support next_rip if host supports it */
+ if (svm_has(SVM_FEATURE_NRIP))
+ entry->edx |= SVM_FEATURE_NRIP;
+
+ /* Support NPT for the guest if enabled */
+ if (npt_enabled)
+ entry->edx |= SVM_FEATURE_NPT;
break;
}
@@ -3497,6 +3650,7 @@ static struct kvm_x86_ops svm_x86_ops = {
.set_irq = svm_set_irq,
.set_nmi = svm_inject_nmi,
.queue_exception = svm_queue_exception,
+ .cancel_injection = svm_cancel_injection,
.interrupt_allowed = svm_interrupt_allowed,
.nmi_allowed = svm_nmi_allowed,
.get_nmi_mask = svm_get_nmi_mask,
@@ -3519,6 +3673,11 @@ static struct kvm_x86_ops svm_x86_ops = {
.set_supported_cpuid = svm_set_supported_cpuid,
.has_wbinvd_exit = svm_has_wbinvd_exit,
+
+ .write_tsc_offset = svm_write_tsc_offset,
+ .adjust_tsc_offset = svm_adjust_tsc_offset,
+
+ .set_tdp_cr3 = set_tdp_cr3,
};
static int __init svm_init(void)