diff options
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r-- | arch/x86/kvm/emulate.c | 5 | ||||
-rw-r--r-- | arch/x86/kvm/i8254.c | 18 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.c | 48 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.h | 4 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.c | 12 | ||||
-rw-r--r-- | arch/x86/kvm/paging_tmpl.h | 8 | ||||
-rw-r--r-- | arch/x86/kvm/svm.c | 6 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 13 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 45 | ||||
-rw-r--r-- | arch/x86/kvm/x86.h | 2 |
10 files changed, 90 insertions, 71 deletions
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 5953dcea752..5484d54582c 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4207,7 +4207,10 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, case OpMem8: ctxt->memop.bytes = 1; if (ctxt->memop.type == OP_REG) { - ctxt->memop.addr.reg = decode_register(ctxt, ctxt->modrm_rm, 1); + int highbyte_regs = ctxt->rex_prefix == 0; + + ctxt->memop.addr.reg = decode_register(ctxt, ctxt->modrm_rm, + highbyte_regs); fetch_register_operand(&ctxt->memop); } goto mem_common; diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 412a5aa0ef9..518d86471b7 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -37,6 +37,7 @@ #include "irq.h" #include "i8254.h" +#include "x86.h" #ifndef CONFIG_X86_64 #define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) @@ -349,6 +350,23 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period) atomic_set(&ps->pending, 0); ps->irq_ack = 1; + /* + * Do not allow the guest to program periodic timers with small + * interval, since the hrtimers are not throttled by the host + * scheduler. + */ + if (ps->is_periodic) { + s64 min_period = min_timer_period_us * 1000LL; + + if (ps->period < min_period) { + pr_info_ratelimited( + "kvm: requested %lld ns " + "i8254 timer period limited to %lld ns\n", + ps->period, min_period); + ps->period = min_period; + } + } + hrtimer_start(&ps->timer, ktime_add_ns(ktime_get(), interval), HRTIMER_MODE_ABS); } diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 0eee2c8b64d..61d9fed5eb3 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -71,9 +71,6 @@ #define VEC_POS(v) ((v) & (32 - 1)) #define REG_POS(v) (((v) >> 5) << 4) -static unsigned int min_timer_period_us = 500; -module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); - static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val) { *((u32 *) (apic->regs + reg_off)) = val; @@ -153,6 +150,8 @@ static inline int kvm_apic_id(struct kvm_lapic *apic) return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; } +#define KVM_X2APIC_CID_BITS 0 + static void recalculate_apic_map(struct kvm *kvm) { struct kvm_apic_map *new, *old = NULL; @@ -190,7 +189,8 @@ static void recalculate_apic_map(struct kvm *kvm) if (apic_x2apic_mode(apic)) { new->ldr_bits = 32; new->cid_shift = 16; - new->cid_mask = new->lid_mask = 0xffff; + new->cid_mask = (1 << KVM_X2APIC_CID_BITS) - 1; + new->lid_mask = 0xffff; } else if (kvm_apic_sw_enabled(apic) && !new->cid_mask /* flat mode */ && kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_CLUSTER) { @@ -855,7 +855,8 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic) ASSERT(apic != NULL); /* if initial count is 0, current count should also be 0 */ - if (kvm_apic_get_reg(apic, APIC_TMICT) == 0) + if (kvm_apic_get_reg(apic, APIC_TMICT) == 0 || + apic->lapic_timer.period == 0) return 0; remaining = hrtimer_get_remaining(&apic->lapic_timer.timer); @@ -1360,8 +1361,12 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) return; } + if (!kvm_vcpu_is_bsp(apic->vcpu)) + value &= ~MSR_IA32_APICBASE_BSP; + vcpu->arch.apic_base = value; + /* update jump label if enable bit changes */ - if ((vcpu->arch.apic_base ^ value) & MSR_IA32_APICBASE_ENABLE) { + if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) { if (value & MSR_IA32_APICBASE_ENABLE) static_key_slow_dec_deferred(&apic_hw_disabled); else @@ -1369,10 +1374,6 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) recalculate_apic_map(vcpu->kvm); } - if (!kvm_vcpu_is_bsp(apic->vcpu)) - value &= ~MSR_IA32_APICBASE_BSP; - - vcpu->arch.apic_base = value; if ((old_value ^ value) & X2APIC_ENABLE) { if (value & X2APIC_ENABLE) { u32 id = kvm_apic_id(apic); @@ -1705,7 +1706,6 @@ static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu, void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu) { u32 data; - void *vapic; if (test_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention)) apic_sync_pv_eoi_from_guest(vcpu, vcpu->arch.apic); @@ -1713,9 +1713,8 @@ void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu) if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) return; - vapic = kmap_atomic(vcpu->arch.apic->vapic_page); - data = *(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr)); - kunmap_atomic(vapic); + kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data, + sizeof(u32)); apic_set_tpr(vcpu->arch.apic, data & 0xff); } @@ -1751,7 +1750,6 @@ void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu) u32 data, tpr; int max_irr, max_isr; struct kvm_lapic *apic = vcpu->arch.apic; - void *vapic; apic_sync_pv_eoi_to_guest(vcpu, apic); @@ -1767,18 +1765,24 @@ void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu) max_isr = 0; data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24); - vapic = kmap_atomic(vcpu->arch.apic->vapic_page); - *(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr)) = data; - kunmap_atomic(vapic); + kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data, + sizeof(u32)); } -void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr) +int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr) { - vcpu->arch.apic->vapic_addr = vapic_addr; - if (vapic_addr) + if (vapic_addr) { + if (kvm_gfn_to_hva_cache_init(vcpu->kvm, + &vcpu->arch.apic->vapic_cache, + vapic_addr, sizeof(u32))) + return -EINVAL; __set_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention); - else + } else { __clear_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention); + } + + vcpu->arch.apic->vapic_addr = vapic_addr; + return 0; } int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data) diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index c730ac9fe80..c8b0d0d2da5 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -34,7 +34,7 @@ struct kvm_lapic { */ void *regs; gpa_t vapic_addr; - struct page *vapic_page; + struct gfn_to_hva_cache vapic_cache; unsigned long pending_events; unsigned int sipi_vector; }; @@ -76,7 +76,7 @@ void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data); void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset); void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector); -void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); +int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu); void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 004cc87b781..711c649f80b 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2585,6 +2585,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, int emulate = 0; gfn_t pseudo_gfn; + if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) + return 0; + for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { if (iterator.level == level) { mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, @@ -2748,6 +2751,9 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, bool ret = false; u64 spte = 0ull; + if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) + return false; + if (!page_fault_can_be_fast(vcpu, error_code)) return false; @@ -3139,6 +3145,9 @@ static u64 walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr) struct kvm_shadow_walk_iterator iterator; u64 spte = 0ull; + if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) + return spte; + walk_shadow_page_lockless_begin(vcpu); for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) if (!is_shadow_present_pte(spte)) @@ -4329,6 +4338,9 @@ int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]) u64 spte; int nr_sptes = 0; + if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) + return nr_sptes; + walk_shadow_page_lockless_begin(vcpu); for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) { sptes[iterator.level-1] = spte; diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index da20860b457..7e6090e1323 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -423,6 +423,9 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, if (FNAME(gpte_changed)(vcpu, gw, top_level)) goto out_gpte_changed; + if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) + goto out_gpte_changed; + for (shadow_walk_init(&it, vcpu, addr); shadow_walk_okay(&it) && it.level > gw->level; shadow_walk_next(&it)) { @@ -671,6 +674,11 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) */ mmu_topup_memory_caches(vcpu); + if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) { + WARN_ON(1); + return; + } + spin_lock(&vcpu->kvm->mmu_lock); for_each_shadow_entry(vcpu, gva, iterator) { level = iterator.level; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index a14a6eaf871..765210d4d92 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2985,10 +2985,8 @@ static int cr8_write_interception(struct vcpu_svm *svm) u8 cr8_prev = kvm_get_cr8(&svm->vcpu); /* instruction emulation calls kvm_set_cr8() */ r = cr_interception(svm); - if (irqchip_in_kernel(svm->vcpu.kvm)) { - clr_cr_intercept(svm, INTERCEPT_CR8_WRITE); + if (irqchip_in_kernel(svm->vcpu.kvm)) return r; - } if (cr8_prev <= kvm_get_cr8(&svm->vcpu)) return r; kvm_run->exit_reason = KVM_EXIT_SET_TPR; @@ -3550,6 +3548,8 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK)) return; + clr_cr_intercept(svm, INTERCEPT_CR8_WRITE); + if (irr == -1) return; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 260a9193955..7cdafb6dc70 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3399,15 +3399,22 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, var->limit = vmx_read_guest_seg_limit(vmx, seg); var->selector = vmx_read_guest_seg_selector(vmx, seg); ar = vmx_read_guest_seg_ar(vmx, seg); + var->unusable = (ar >> 16) & 1; var->type = ar & 15; var->s = (ar >> 4) & 1; var->dpl = (ar >> 5) & 3; - var->present = (ar >> 7) & 1; + /* + * Some userspaces do not preserve unusable property. Since usable + * segment has to be present according to VMX spec we can use present + * property to amend userspace bug by making unusable segment always + * nonpresent. vmx_segment_access_rights() already marks nonpresent + * segment as unusable. + */ + var->present = !var->unusable; var->avl = (ar >> 12) & 1; var->l = (ar >> 13) & 1; var->db = (ar >> 14) & 1; var->g = (ar >> 15) & 1; - var->unusable = (ar >> 16) & 1; } static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) @@ -7126,8 +7133,8 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) struct vcpu_vmx *vmx = to_vmx(vcpu); free_vpid(vmx); - free_nested(vmx); free_loaded_vmcs(vmx->loaded_vmcs); + free_nested(vmx); kfree(vmx->guest_msrs); kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, vmx); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e8ba99c3418..1be0a9e75d1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -94,6 +94,9 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops); static bool ignore_msrs = 0; module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR); +unsigned int min_timer_period_us = 500; +module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); + bool kvm_has_tsc_control; EXPORT_SYMBOL_GPL(kvm_has_tsc_control); u32 kvm_max_guest_tsc_khz; @@ -3138,8 +3141,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = -EFAULT; if (copy_from_user(&va, argp, sizeof va)) goto out; - r = 0; - kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr); + r = kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr); break; } case KVM_X86_SETUP_MCE: { @@ -5539,36 +5541,6 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu) !kvm_event_needs_reinjection(vcpu); } -static int vapic_enter(struct kvm_vcpu *vcpu) -{ - struct kvm_lapic *apic = vcpu->arch.apic; - struct page *page; - - if (!apic || !apic->vapic_addr) - return 0; - - page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT); - if (is_error_page(page)) - return -EFAULT; - - vcpu->arch.apic->vapic_page = page; - return 0; -} - -static void vapic_exit(struct kvm_vcpu *vcpu) -{ - struct kvm_lapic *apic = vcpu->arch.apic; - int idx; - - if (!apic || !apic->vapic_addr) - return; - - idx = srcu_read_lock(&vcpu->kvm->srcu); - kvm_release_page_dirty(apic->vapic_page); - mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT); - srcu_read_unlock(&vcpu->kvm->srcu, idx); -} - static void update_cr8_intercept(struct kvm_vcpu *vcpu) { int max_irr, tpr; @@ -5889,11 +5861,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) struct kvm *kvm = vcpu->kvm; vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); - r = vapic_enter(vcpu); - if (r) { - srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); - return r; - } r = 1; while (r > 0) { @@ -5951,8 +5918,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); - vapic_exit(vcpu); - return r; } @@ -6017,7 +5982,7 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu) frag->len -= len; } - if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) { + if (vcpu->mmio_cur_fragment >= vcpu->mmio_nr_fragments) { vcpu->mmio_needed = 0; if (vcpu->mmio_is_write) return 1; diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index e224f7a671b..3186542f2fa 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -124,5 +124,7 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, extern u64 host_xcr0; +extern unsigned int min_timer_period_us; + extern struct static_key kvm_no_apic_vcpu; #endif |