aboutsummaryrefslogtreecommitdiff
path: root/virt
diff options
context:
space:
mode:
authorChristoffer Dall <christoffer.dall@linaro.org>2014-10-08 11:20:51 +0200
committerChristoffer Dall <christoffer.dall@linaro.org>2014-10-08 11:20:51 +0200
commit05fc265939403d2a0789cd071de9f26152842309 (patch)
tree3eff409451740ce6d7a5ab3d250d9928cb0bcc58 /virt
parent3143b4aaa5182e7a7d0bfb51d03bd6216d6bfd53 (diff)
parentd731c6c9b17b8a6b471981b3e310235049190034 (diff)
Merge branch 'lsk/v3.14/topic/kvm' into lsk/lsk-with-kvm-v3.14
Conflicts: arch/arm64/include/asm/debug-monitors.h
Diffstat (limited to 'virt')
-rw-r--r--virt/kvm/Kconfig7
-rw-r--r--virt/kvm/arm/vgic-v2.c265
-rw-r--r--virt/kvm/arm/vgic-v3.c247
-rw-r--r--virt/kvm/arm/vgic.c1045
-rw-r--r--virt/kvm/async_pf.c31
-rw-r--r--virt/kvm/eventfd.c154
-rw-r--r--virt/kvm/irq_comm.c41
-rw-r--r--virt/kvm/irqchip.c107
-rw-r--r--virt/kvm/kvm_main.c203
-rw-r--r--virt/kvm/vfio.c22
10 files changed, 1600 insertions, 522 deletions
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index fbe1a48bd629..fc0c5e603eb4 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -6,6 +6,9 @@ config HAVE_KVM
config HAVE_KVM_IRQCHIP
bool
+config HAVE_KVM_IRQFD
+ bool
+
config HAVE_KVM_IRQ_ROUTING
bool
@@ -22,6 +25,10 @@ config KVM_MMIO
config KVM_ASYNC_PF
bool
+# Toggle to switch between direct notification and batch job
+config KVM_ASYNC_PF_SYNC
+ bool
+
config HAVE_KVM_MSI
bool
diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c
new file mode 100644
index 000000000000..01124ef3690a
--- /dev/null
+++ b/virt/kvm/arm/vgic-v2.c
@@ -0,0 +1,265 @@
+/*
+ * Copyright (C) 2012,2013 ARM Limited, All Rights Reserved.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/cpu.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+
+#include <linux/irqchip/arm-gic.h>
+
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_mmu.h>
+
+static struct vgic_lr vgic_v2_get_lr(const struct kvm_vcpu *vcpu, int lr)
+{
+ struct vgic_lr lr_desc;
+ u32 val = vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr];
+
+ lr_desc.irq = val & GICH_LR_VIRTUALID;
+ if (lr_desc.irq <= 15)
+ lr_desc.source = (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7;
+ else
+ lr_desc.source = 0;
+ lr_desc.state = 0;
+
+ if (val & GICH_LR_PENDING_BIT)
+ lr_desc.state |= LR_STATE_PENDING;
+ if (val & GICH_LR_ACTIVE_BIT)
+ lr_desc.state |= LR_STATE_ACTIVE;
+ if (val & GICH_LR_EOI)
+ lr_desc.state |= LR_EOI_INT;
+
+ return lr_desc;
+}
+
+static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr,
+ struct vgic_lr lr_desc)
+{
+ u32 lr_val = (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) | lr_desc.irq;
+
+ if (lr_desc.state & LR_STATE_PENDING)
+ lr_val |= GICH_LR_PENDING_BIT;
+ if (lr_desc.state & LR_STATE_ACTIVE)
+ lr_val |= GICH_LR_ACTIVE_BIT;
+ if (lr_desc.state & LR_EOI_INT)
+ lr_val |= GICH_LR_EOI;
+
+ vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val;
+}
+
+static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
+ struct vgic_lr lr_desc)
+{
+ if (!(lr_desc.state & LR_STATE_MASK))
+ set_bit(lr, (unsigned long *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr);
+}
+
+static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu)
+{
+ u64 val;
+
+#if BITS_PER_LONG == 64
+ val = vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[1];
+ val <<= 32;
+ val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[0];
+#else
+ val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr;
+#endif
+ return val;
+}
+
+static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu)
+{
+ u64 val;
+
+#if BITS_PER_LONG == 64
+ val = vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[1];
+ val <<= 32;
+ val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[0];
+#else
+ val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr;
+#endif
+ return val;
+}
+
+static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu)
+{
+ u32 misr = vcpu->arch.vgic_cpu.vgic_v2.vgic_misr;
+ u32 ret = 0;
+
+ if (misr & GICH_MISR_EOI)
+ ret |= INT_STATUS_EOI;
+ if (misr & GICH_MISR_U)
+ ret |= INT_STATUS_UNDERFLOW;
+
+ return ret;
+}
+
+static void vgic_v2_enable_underflow(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr |= GICH_HCR_UIE;
+}
+
+static void vgic_v2_disable_underflow(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr &= ~GICH_HCR_UIE;
+}
+
+static void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
+{
+ u32 vmcr = vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr;
+
+ vmcrp->ctlr = (vmcr & GICH_VMCR_CTRL_MASK) >> GICH_VMCR_CTRL_SHIFT;
+ vmcrp->abpr = (vmcr & GICH_VMCR_ALIAS_BINPOINT_MASK) >> GICH_VMCR_ALIAS_BINPOINT_SHIFT;
+ vmcrp->bpr = (vmcr & GICH_VMCR_BINPOINT_MASK) >> GICH_VMCR_BINPOINT_SHIFT;
+ vmcrp->pmr = (vmcr & GICH_VMCR_PRIMASK_MASK) >> GICH_VMCR_PRIMASK_SHIFT;
+}
+
+static void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
+{
+ u32 vmcr;
+
+ vmcr = (vmcrp->ctlr << GICH_VMCR_CTRL_SHIFT) & GICH_VMCR_CTRL_MASK;
+ vmcr |= (vmcrp->abpr << GICH_VMCR_ALIAS_BINPOINT_SHIFT) & GICH_VMCR_ALIAS_BINPOINT_MASK;
+ vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) & GICH_VMCR_BINPOINT_MASK;
+ vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) & GICH_VMCR_PRIMASK_MASK;
+
+ vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr;
+}
+
+static void vgic_v2_enable(struct kvm_vcpu *vcpu)
+{
+ /*
+ * By forcing VMCR to zero, the GIC will restore the binary
+ * points to their reset values. Anything else resets to zero
+ * anyway.
+ */
+ vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0;
+
+ /* Get the show on the road... */
+ vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN;
+}
+
+static const struct vgic_ops vgic_v2_ops = {
+ .get_lr = vgic_v2_get_lr,
+ .set_lr = vgic_v2_set_lr,
+ .sync_lr_elrsr = vgic_v2_sync_lr_elrsr,
+ .get_elrsr = vgic_v2_get_elrsr,
+ .get_eisr = vgic_v2_get_eisr,
+ .get_interrupt_status = vgic_v2_get_interrupt_status,
+ .enable_underflow = vgic_v2_enable_underflow,
+ .disable_underflow = vgic_v2_disable_underflow,
+ .get_vmcr = vgic_v2_get_vmcr,
+ .set_vmcr = vgic_v2_set_vmcr,
+ .enable = vgic_v2_enable,
+};
+
+static struct vgic_params vgic_v2_params;
+
+/**
+ * vgic_v2_probe - probe for a GICv2 compatible interrupt controller in DT
+ * @node: pointer to the DT node
+ * @ops: address of a pointer to the GICv2 operations
+ * @params: address of a pointer to HW-specific parameters
+ *
+ * Returns 0 if a GICv2 has been found, with the low level operations
+ * in *ops and the HW parameters in *params. Returns an error code
+ * otherwise.
+ */
+int vgic_v2_probe(struct device_node *vgic_node,
+ const struct vgic_ops **ops,
+ const struct vgic_params **params)
+{
+ int ret;
+ struct resource vctrl_res;
+ struct resource vcpu_res;
+ struct vgic_params *vgic = &vgic_v2_params;
+
+ vgic->maint_irq = irq_of_parse_and_map(vgic_node, 0);
+ if (!vgic->maint_irq) {
+ kvm_err("error getting vgic maintenance irq from DT\n");
+ ret = -ENXIO;
+ goto out;
+ }
+
+ ret = of_address_to_resource(vgic_node, 2, &vctrl_res);
+ if (ret) {
+ kvm_err("Cannot obtain GICH resource\n");
+ goto out;
+ }
+
+ vgic->vctrl_base = of_iomap(vgic_node, 2);
+ if (!vgic->vctrl_base) {
+ kvm_err("Cannot ioremap GICH\n");
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ vgic->nr_lr = readl_relaxed(vgic->vctrl_base + GICH_VTR);
+ vgic->nr_lr = (vgic->nr_lr & 0x3f) + 1;
+
+ ret = create_hyp_io_mappings(vgic->vctrl_base,
+ vgic->vctrl_base + resource_size(&vctrl_res),
+ vctrl_res.start);
+ if (ret) {
+ kvm_err("Cannot map VCTRL into hyp\n");
+ goto out_unmap;
+ }
+
+ if (of_address_to_resource(vgic_node, 3, &vcpu_res)) {
+ kvm_err("Cannot obtain GICV resource\n");
+ ret = -ENXIO;
+ goto out_unmap;
+ }
+
+ if (!PAGE_ALIGNED(vcpu_res.start)) {
+ kvm_err("GICV physical address 0x%llx not page aligned\n",
+ (unsigned long long)vcpu_res.start);
+ ret = -ENXIO;
+ goto out_unmap;
+ }
+
+ if (!PAGE_ALIGNED(resource_size(&vcpu_res))) {
+ kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n",
+ (unsigned long long)resource_size(&vcpu_res),
+ PAGE_SIZE);
+ ret = -ENXIO;
+ goto out_unmap;
+ }
+
+ vgic->vcpu_base = vcpu_res.start;
+
+ kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
+ vctrl_res.start, vgic->maint_irq);
+
+ vgic->type = VGIC_V2;
+ *ops = &vgic_v2_ops;
+ *params = vgic;
+ goto out;
+
+out_unmap:
+ iounmap(vgic->vctrl_base);
+out:
+ of_node_put(vgic_node);
+ return ret;
+}
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
new file mode 100644
index 000000000000..1c2c8eef0599
--- /dev/null
+++ b/virt/kvm/arm/vgic-v3.c
@@ -0,0 +1,247 @@
+/*
+ * Copyright (C) 2013 ARM Limited, All Rights Reserved.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/cpu.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+
+#include <linux/irqchip/arm-gic-v3.h>
+
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_mmu.h>
+
+/* These are for GICv2 emulation only */
+#define GICH_LR_VIRTUALID (0x3ffUL << 0)
+#define GICH_LR_PHYSID_CPUID_SHIFT (10)
+#define GICH_LR_PHYSID_CPUID (7UL << GICH_LR_PHYSID_CPUID_SHIFT)
+
+/*
+ * LRs are stored in reverse order in memory. make sure we index them
+ * correctly.
+ */
+#define LR_INDEX(lr) (VGIC_V3_MAX_LRS - 1 - lr)
+
+static u32 ich_vtr_el2;
+
+static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr)
+{
+ struct vgic_lr lr_desc;
+ u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)];
+
+ lr_desc.irq = val & GICH_LR_VIRTUALID;
+ if (lr_desc.irq <= 15)
+ lr_desc.source = (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7;
+ else
+ lr_desc.source = 0;
+ lr_desc.state = 0;
+
+ if (val & ICH_LR_PENDING_BIT)
+ lr_desc.state |= LR_STATE_PENDING;
+ if (val & ICH_LR_ACTIVE_BIT)
+ lr_desc.state |= LR_STATE_ACTIVE;
+ if (val & ICH_LR_EOI)
+ lr_desc.state |= LR_EOI_INT;
+
+ return lr_desc;
+}
+
+static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr,
+ struct vgic_lr lr_desc)
+{
+ u64 lr_val = (((u32)lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) |
+ lr_desc.irq);
+
+ if (lr_desc.state & LR_STATE_PENDING)
+ lr_val |= ICH_LR_PENDING_BIT;
+ if (lr_desc.state & LR_STATE_ACTIVE)
+ lr_val |= ICH_LR_ACTIVE_BIT;
+ if (lr_desc.state & LR_EOI_INT)
+ lr_val |= ICH_LR_EOI;
+
+ vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val;
+}
+
+static void vgic_v3_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
+ struct vgic_lr lr_desc)
+{
+ if (!(lr_desc.state & LR_STATE_MASK))
+ vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr);
+}
+
+static u64 vgic_v3_get_elrsr(const struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr;
+}
+
+static u64 vgic_v3_get_eisr(const struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.vgic_cpu.vgic_v3.vgic_eisr;
+}
+
+static u32 vgic_v3_get_interrupt_status(const struct kvm_vcpu *vcpu)
+{
+ u32 misr = vcpu->arch.vgic_cpu.vgic_v3.vgic_misr;
+ u32 ret = 0;
+
+ if (misr & ICH_MISR_EOI)
+ ret |= INT_STATUS_EOI;
+ if (misr & ICH_MISR_U)
+ ret |= INT_STATUS_UNDERFLOW;
+
+ return ret;
+}
+
+static void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
+{
+ u32 vmcr = vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr;
+
+ vmcrp->ctlr = (vmcr & ICH_VMCR_CTLR_MASK) >> ICH_VMCR_CTLR_SHIFT;
+ vmcrp->abpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT;
+ vmcrp->bpr = (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT;
+ vmcrp->pmr = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT;
+}
+
+static void vgic_v3_enable_underflow(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr |= ICH_HCR_UIE;
+}
+
+static void vgic_v3_disable_underflow(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr &= ~ICH_HCR_UIE;
+}
+
+static void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
+{
+ u32 vmcr;
+
+ vmcr = (vmcrp->ctlr << ICH_VMCR_CTLR_SHIFT) & ICH_VMCR_CTLR_MASK;
+ vmcr |= (vmcrp->abpr << ICH_VMCR_BPR1_SHIFT) & ICH_VMCR_BPR1_MASK;
+ vmcr |= (vmcrp->bpr << ICH_VMCR_BPR0_SHIFT) & ICH_VMCR_BPR0_MASK;
+ vmcr |= (vmcrp->pmr << ICH_VMCR_PMR_SHIFT) & ICH_VMCR_PMR_MASK;
+
+ vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = vmcr;
+}
+
+static void vgic_v3_enable(struct kvm_vcpu *vcpu)
+{
+ /*
+ * By forcing VMCR to zero, the GIC will restore the binary
+ * points to their reset values. Anything else resets to zero
+ * anyway.
+ */
+ vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = 0;
+
+ /* Get the show on the road... */
+ vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr = ICH_HCR_EN;
+}
+
+static const struct vgic_ops vgic_v3_ops = {
+ .get_lr = vgic_v3_get_lr,
+ .set_lr = vgic_v3_set_lr,
+ .sync_lr_elrsr = vgic_v3_sync_lr_elrsr,
+ .get_elrsr = vgic_v3_get_elrsr,
+ .get_eisr = vgic_v3_get_eisr,
+ .get_interrupt_status = vgic_v3_get_interrupt_status,
+ .enable_underflow = vgic_v3_enable_underflow,
+ .disable_underflow = vgic_v3_disable_underflow,
+ .get_vmcr = vgic_v3_get_vmcr,
+ .set_vmcr = vgic_v3_set_vmcr,
+ .enable = vgic_v3_enable,
+};
+
+static struct vgic_params vgic_v3_params;
+
+/**
+ * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT
+ * @node: pointer to the DT node
+ * @ops: address of a pointer to the GICv3 operations
+ * @params: address of a pointer to HW-specific parameters
+ *
+ * Returns 0 if a GICv3 has been found, with the low level operations
+ * in *ops and the HW parameters in *params. Returns an error code
+ * otherwise.
+ */
+int vgic_v3_probe(struct device_node *vgic_node,
+ const struct vgic_ops **ops,
+ const struct vgic_params **params)
+{
+ int ret = 0;
+ u32 gicv_idx;
+ struct resource vcpu_res;
+ struct vgic_params *vgic = &vgic_v3_params;
+
+ vgic->maint_irq = irq_of_parse_and_map(vgic_node, 0);
+ if (!vgic->maint_irq) {
+ kvm_err("error getting vgic maintenance irq from DT\n");
+ ret = -ENXIO;
+ goto out;
+ }
+
+ ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2);
+
+ /*
+ * The ListRegs field is 5 bits, but there is a architectural
+ * maximum of 16 list registers. Just ignore bit 4...
+ */
+ vgic->nr_lr = (ich_vtr_el2 & 0xf) + 1;
+
+ if (of_property_read_u32(vgic_node, "#redistributor-regions", &gicv_idx))
+ gicv_idx = 1;
+
+ gicv_idx += 3; /* Also skip GICD, GICC, GICH */
+ if (of_address_to_resource(vgic_node, gicv_idx, &vcpu_res)) {
+ kvm_err("Cannot obtain GICV region\n");
+ ret = -ENXIO;
+ goto out;
+ }
+
+ if (!PAGE_ALIGNED(vcpu_res.start)) {
+ kvm_err("GICV physical address 0x%llx not page aligned\n",
+ (unsigned long long)vcpu_res.start);
+ ret = -ENXIO;
+ goto out;
+ }
+
+ if (!PAGE_ALIGNED(resource_size(&vcpu_res))) {
+ kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n",
+ (unsigned long long)resource_size(&vcpu_res),
+ PAGE_SIZE);
+ ret = -ENXIO;
+ goto out;
+ }
+
+ vgic->vcpu_base = vcpu_res.start;
+ vgic->vctrl_base = NULL;
+ vgic->type = VGIC_V3;
+
+ kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
+ vcpu_res.start, vgic->maint_irq);
+
+ *ops = &vgic_v3_ops;
+ *params = vgic;
+
+out:
+ of_node_put(vgic_node);
+ return ret;
+}
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 26954a7d9b03..8e1dc03342c3 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -36,21 +36,22 @@
* How the whole thing works (courtesy of Christoffer Dall):
*
* - At any time, the dist->irq_pending_on_cpu is the oracle that knows if
- * something is pending
- * - VGIC pending interrupts are stored on the vgic.irq_state vgic
- * bitmap (this bitmap is updated by both user land ioctls and guest
- * mmio ops, and other in-kernel peripherals such as the
- * arch. timers) and indicate the 'wire' state.
+ * something is pending on the CPU interface.
+ * - Interrupts that are pending on the distributor are stored on the
+ * vgic.irq_pending vgic bitmap (this bitmap is updated by both user land
+ * ioctls and guest mmio ops, and other in-kernel peripherals such as the
+ * arch. timers).
* - Every time the bitmap changes, the irq_pending_on_cpu oracle is
* recalculated
* - To calculate the oracle, we need info for each cpu from
* compute_pending_for_cpu, which considers:
- * - PPI: dist->irq_state & dist->irq_enable
- * - SPI: dist->irq_state & dist->irq_enable & dist->irq_spi_target
- * - irq_spi_target is a 'formatted' version of the GICD_ICFGR
+ * - PPI: dist->irq_pending & dist->irq_enable
+ * - SPI: dist->irq_pending & dist->irq_enable & dist->irq_spi_target
+ * - irq_spi_target is a 'formatted' version of the GICD_ITARGETSRn
* registers, stored on each vcpu. We only keep one bit of
* information per interrupt, making sure that only one vcpu can
* accept the interrupt.
+ * - If any of the above state changes, we must recalculate the oracle.
* - The same is true when injecting an interrupt, except that we only
* consider a single interrupt at a time. The irq_spi_cpu array
* contains the target CPU for each SPI.
@@ -60,13 +61,18 @@
* the 'line' again. This is achieved as such:
*
* - When a level interrupt is moved onto a vcpu, the corresponding
- * bit in irq_active is set. As long as this bit is set, the line
+ * bit in irq_queued is set. As long as this bit is set, the line
* will be ignored for further interrupts. The interrupt is injected
* into the vcpu with the GICH_LR_EOI bit set (generate a
* maintenance interrupt on EOI).
* - When the interrupt is EOIed, the maintenance interrupt fires,
- * and clears the corresponding bit in irq_active. This allow the
+ * and clears the corresponding bit in irq_queued. This allows the
* interrupt line to be sampled again.
+ * - Note that level-triggered interrupts can also be set to pending from
+ * writes to GICD_ISPENDRn and lowering the external input line does not
+ * cause the interrupt to become inactive in such a situation.
+ * Conversely, writes to GICD_ICPENDRn do not cause the interrupt to become
+ * inactive as long as the external input line is held high.
*/
#define VGIC_ADDR_UNDEF (-1)
@@ -76,14 +82,6 @@
#define IMPLEMENTER_ARM 0x43b
#define GICC_ARCH_VERSION_V2 0x2
-/* Physical address of vgic virtual cpu interface */
-static phys_addr_t vgic_vcpu_base;
-
-/* Virtual control interface base address */
-static void __iomem *vgic_vctrl_base;
-
-static struct device_node *vgic_node;
-
#define ACCESS_READ_VALUE (1 << 0)
#define ACCESS_READ_RAZ (0 << 0)
#define ACCESS_READ_MASK(x) ((x) & (1 << 0))
@@ -94,30 +92,76 @@ static struct device_node *vgic_node;
#define ACCESS_WRITE_MASK(x) ((x) & (3 << 1))
static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
+static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu);
static void vgic_update_state(struct kvm *kvm);
static void vgic_kick_vcpus(struct kvm *kvm);
+static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi);
static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg);
-static u32 vgic_nr_lr;
+static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr);
+static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc);
+static void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
+static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
+
+static const struct vgic_ops *vgic_ops;
+static const struct vgic_params *vgic;
+
+/*
+ * struct vgic_bitmap contains a bitmap made of unsigned longs, but
+ * extracts u32s out of them.
+ *
+ * This does not work on 64-bit BE systems, because the bitmap access
+ * will store two consecutive 32-bit words with the higher-addressed
+ * register's bits at the lower index and the lower-addressed register's
+ * bits at the higher index.
+ *
+ * Therefore, swizzle the register index when accessing the 32-bit word
+ * registers to access the right register's value.
+ */
+#if defined(CONFIG_CPU_BIG_ENDIAN) && BITS_PER_LONG == 64
+#define REG_OFFSET_SWIZZLE 1
+#else
+#define REG_OFFSET_SWIZZLE 0
+#endif
+
+static int vgic_init_bitmap(struct vgic_bitmap *b, int nr_cpus, int nr_irqs)
+{
+ int nr_longs;
-static unsigned int vgic_maint_irq;
+ nr_longs = nr_cpus + BITS_TO_LONGS(nr_irqs - VGIC_NR_PRIVATE_IRQS);
+
+ b->private = kzalloc(sizeof(unsigned long) * nr_longs, GFP_KERNEL);
+ if (!b->private)
+ return -ENOMEM;
+
+ b->shared = b->private + nr_cpus;
+
+ return 0;
+}
+
+static void vgic_free_bitmap(struct vgic_bitmap *b)
+{
+ kfree(b->private);
+ b->private = NULL;
+ b->shared = NULL;
+}
static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x,
int cpuid, u32 offset)
{
offset >>= 2;
if (!offset)
- return x->percpu[cpuid].reg;
+ return (u32 *)(x->private + cpuid) + REG_OFFSET_SWIZZLE;
else
- return x->shared.reg + offset - 1;
+ return (u32 *)(x->shared) + ((offset - 1) ^ REG_OFFSET_SWIZZLE);
}
static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x,
int cpuid, int irq)
{
if (irq < VGIC_NR_PRIVATE_IRQS)
- return test_bit(irq, x->percpu[cpuid].reg_ul);
+ return test_bit(irq, x->private + cpuid);
- return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared.reg_ul);
+ return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared);
}
static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid,
@@ -126,9 +170,9 @@ static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid,
unsigned long *reg;
if (irq < VGIC_NR_PRIVATE_IRQS) {
- reg = x->percpu[cpuid].reg_ul;
+ reg = x->private + cpuid;
} else {
- reg = x->shared.reg_ul;
+ reg = x->shared;
irq -= VGIC_NR_PRIVATE_IRQS;
}
@@ -140,24 +184,49 @@ static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid,
static unsigned long *vgic_bitmap_get_cpu_map(struct vgic_bitmap *x, int cpuid)
{
- if (unlikely(cpuid >= VGIC_MAX_CPUS))
- return NULL;
- return x->percpu[cpuid].reg_ul;
+ return x->private + cpuid;
}
static unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x)
{
- return x->shared.reg_ul;
+ return x->shared;
+}
+
+static int vgic_init_bytemap(struct vgic_bytemap *x, int nr_cpus, int nr_irqs)
+{
+ int size;
+
+ size = nr_cpus * VGIC_NR_PRIVATE_IRQS;
+ size += nr_irqs - VGIC_NR_PRIVATE_IRQS;
+
+ x->private = kzalloc(size, GFP_KERNEL);
+ if (!x->private)
+ return -ENOMEM;
+
+ x->shared = x->private + nr_cpus * VGIC_NR_PRIVATE_IRQS / sizeof(u32);
+ return 0;
+}
+
+static void vgic_free_bytemap(struct vgic_bytemap *b)
+{
+ kfree(b->private);
+ b->private = NULL;
+ b->shared = NULL;
}
static u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset)
{
- offset >>= 2;
- BUG_ON(offset > (VGIC_NR_IRQS / 4));
- if (offset < 8)
- return x->percpu[cpuid] + offset;
- else
- return x->shared + offset - 8;
+ u32 *reg;
+
+ if (offset < VGIC_NR_PRIVATE_IRQS) {
+ reg = x->private;
+ offset += cpuid * VGIC_NR_PRIVATE_IRQS;
+ } else {
+ reg = x->shared;
+ offset -= VGIC_NR_PRIVATE_IRQS;
+ }
+
+ return reg + (offset / sizeof(u32));
}
#define VGIC_CFG_LEVEL 0
@@ -179,46 +248,81 @@ static int vgic_irq_is_enabled(struct kvm_vcpu *vcpu, int irq)
return vgic_bitmap_get_irq_val(&dist->irq_enabled, vcpu->vcpu_id, irq);
}
-static int vgic_irq_is_active(struct kvm_vcpu *vcpu, int irq)
+static int vgic_irq_is_queued(struct kvm_vcpu *vcpu, int irq)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ return vgic_bitmap_get_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq);
+}
+
+static void vgic_irq_set_queued(struct kvm_vcpu *vcpu, int irq)
{
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
- return vgic_bitmap_get_irq_val(&dist->irq_active, vcpu->vcpu_id, irq);
+ vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 1);
}
-static void vgic_irq_set_active(struct kvm_vcpu *vcpu, int irq)
+static void vgic_irq_clear_queued(struct kvm_vcpu *vcpu, int irq)
{
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
- vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 1);
+ vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 0);
}
-static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq)
+static int vgic_dist_irq_get_level(struct kvm_vcpu *vcpu, int irq)
{
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
- vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 0);
+ return vgic_bitmap_get_irq_val(&dist->irq_level, vcpu->vcpu_id, irq);
+}
+
+static void vgic_dist_irq_set_level(struct kvm_vcpu *vcpu, int irq)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ vgic_bitmap_set_irq_val(&dist->irq_level, vcpu->vcpu_id, irq, 1);
+}
+
+static void vgic_dist_irq_clear_level(struct kvm_vcpu *vcpu, int irq)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ vgic_bitmap_set_irq_val(&dist->irq_level, vcpu->vcpu_id, irq, 0);
+}
+
+static int vgic_dist_irq_soft_pend(struct kvm_vcpu *vcpu, int irq)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ return vgic_bitmap_get_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq);
+}
+
+static void vgic_dist_irq_clear_soft_pend(struct kvm_vcpu *vcpu, int irq)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ vgic_bitmap_set_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq, 0);
}
static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq)
{
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
- return vgic_bitmap_get_irq_val(&dist->irq_state, vcpu->vcpu_id, irq);
+ return vgic_bitmap_get_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq);
}
-static void vgic_dist_irq_set(struct kvm_vcpu *vcpu, int irq)
+static void vgic_dist_irq_set_pending(struct kvm_vcpu *vcpu, int irq)
{
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
- vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 1);
+ vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 1);
}
-static void vgic_dist_irq_clear(struct kvm_vcpu *vcpu, int irq)
+static void vgic_dist_irq_clear_pending(struct kvm_vcpu *vcpu, int irq)
{
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
- vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 0);
+ vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 0);
}
static void vgic_cpu_irq_set(struct kvm_vcpu *vcpu, int irq)
@@ -239,14 +343,19 @@ static void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq)
vcpu->arch.vgic_cpu.pending_shared);
}
+static bool vgic_can_sample_irq(struct kvm_vcpu *vcpu, int irq)
+{
+ return vgic_irq_is_edge(vcpu, irq) || !vgic_irq_is_queued(vcpu, irq);
+}
+
static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask)
{
- return *((u32 *)mmio->data) & mask;
+ return le32_to_cpu(*((u32 *)mmio->data)) & mask;
}
static void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value)
{
- *((u32 *)mmio->data) = value & mask;
+ *((u32 *)mmio->data) = cpu_to_le32(value) & mask;
}
/**
@@ -330,7 +439,7 @@ static bool handle_mmio_misc(struct kvm_vcpu *vcpu,
case 4: /* GICD_TYPER */
reg = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5;
- reg |= (VGIC_NR_IRQS >> 5) - 1;
+ reg |= (vcpu->kvm->arch.vgic.nr_irqs >> 5) - 1;
vgic_reg_access(mmio, &reg, word_offset,
ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
break;
@@ -392,11 +501,33 @@ static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu,
struct kvm_exit_mmio *mmio,
phys_addr_t offset)
{
- u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state,
- vcpu->vcpu_id, offset);
+ u32 *reg, orig;
+ u32 level_mask;
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ reg = vgic_bitmap_get_reg(&dist->irq_cfg, vcpu->vcpu_id, offset);
+ level_mask = (~(*reg));
+
+ /* Mark both level and edge triggered irqs as pending */
+ reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset);
+ orig = *reg;
vgic_reg_access(mmio, reg, offset,
ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
+
if (mmio->is_write) {
+ /* Set the soft-pending flag only for level-triggered irqs */
+ reg = vgic_bitmap_get_reg(&dist->irq_soft_pend,
+ vcpu->vcpu_id, offset);
+ vgic_reg_access(mmio, reg, offset,
+ ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
+ *reg &= level_mask;
+
+ /* Ignore writes to SGIs */
+ if (offset < 2) {
+ *reg &= ~0xffff;
+ *reg |= orig & 0xffff;
+ }
+
vgic_update_state(vcpu->kvm);
return true;
}
@@ -408,11 +539,34 @@ static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu,
struct kvm_exit_mmio *mmio,
phys_addr_t offset)
{
- u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state,
- vcpu->vcpu_id, offset);
+ u32 *level_active;
+ u32 *reg, orig;
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset);
+ orig = *reg;
vgic_reg_access(mmio, reg, offset,
ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
if (mmio->is_write) {
+ /* Re-set level triggered level-active interrupts */
+ level_active = vgic_bitmap_get_reg(&dist->irq_level,
+ vcpu->vcpu_id, offset);
+ reg = vgic_bitmap_get_reg(&dist->irq_pending,
+ vcpu->vcpu_id, offset);
+ *reg |= *level_active;
+
+ /* Ignore writes to SGIs */
+ if (offset < 2) {
+ *reg &= ~0xffff;
+ *reg |= orig & 0xffff;
+ }
+
+ /* Clear soft-pending flags */
+ reg = vgic_bitmap_get_reg(&dist->irq_soft_pend,
+ vcpu->vcpu_id, offset);
+ vgic_reg_access(mmio, reg, offset,
+ ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
+
vgic_update_state(vcpu->kvm);
return true;
}
@@ -548,11 +702,10 @@ static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu,
u32 val;
u32 *reg;
- offset >>= 1;
reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg,
- vcpu->vcpu_id, offset);
+ vcpu->vcpu_id, offset >> 1);
- if (offset & 2)
+ if (offset & 4)
val = *reg >> 16;
else
val = *reg & 0xffff;
@@ -561,13 +714,13 @@ static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu,
vgic_reg_access(mmio, &val, offset,
ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
if (mmio->is_write) {
- if (offset < 4) {
+ if (offset < 8) {
*reg = ~0U; /* Force PPIs/SGIs to 1 */
return false;
}
val = vgic_cfg_compress(val);
- if (offset & 2) {
+ if (offset & 4) {
*reg &= 0xffff;
*reg |= val << 16;
} else {
@@ -594,18 +747,6 @@ static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu,
return false;
}
-#define LR_CPUID(lr) \
- (((lr) & GICH_LR_PHYSID_CPUID) >> GICH_LR_PHYSID_CPUID_SHIFT)
-#define LR_IRQID(lr) \
- ((lr) & GICH_LR_VIRTUALID)
-
-static void vgic_retire_lr(int lr_nr, int irq, struct vgic_cpu *vgic_cpu)
-{
- clear_bit(lr_nr, vgic_cpu->lr_used);
- vgic_cpu->vgic_lr[lr_nr] &= ~GICH_LR_STATE;
- vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
-}
-
/**
* vgic_unqueue_irqs - move pending IRQs from LRs to the distributor
* @vgic_cpu: Pointer to the vgic_cpu struct holding the LRs
@@ -623,13 +764,10 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
int vcpu_id = vcpu->vcpu_id;
- int i, irq, source_cpu;
- u32 *lr;
+ int i;
for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) {
- lr = &vgic_cpu->vgic_lr[i];
- irq = LR_IRQID(*lr);
- source_cpu = LR_CPUID(*lr);
+ struct vgic_lr lr = vgic_get_lr(vcpu, i);
/*
* There are three options for the state bits:
@@ -641,7 +779,7 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
* If the LR holds only an active interrupt (not pending) then
* just leave it alone.
*/
- if ((*lr & GICH_LR_STATE) == GICH_LR_ACTIVE_BIT)
+ if ((lr.state & LR_STATE_MASK) == LR_STATE_ACTIVE)
continue;
/*
@@ -650,18 +788,21 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
* is fine, then we are only setting a few bits that were
* already set.
*/
- vgic_dist_irq_set(vcpu, irq);
- if (irq < VGIC_NR_SGIS)
- dist->irq_sgi_sources[vcpu_id][irq] |= 1 << source_cpu;
- *lr &= ~GICH_LR_PENDING_BIT;
+ vgic_dist_irq_set_pending(vcpu, lr.irq);
+ if (lr.irq < VGIC_NR_SGIS)
+ *vgic_get_sgi_sources(dist, vcpu_id, lr.irq) |= 1 << lr.source;
+ lr.state &= ~LR_STATE_PENDING;
+ vgic_set_lr(vcpu, i, lr);
/*
* If there's no state left on the LR (it could still be
* active), then the LR does not hold any useful info and can
* be marked as free for other use.
*/
- if (!(*lr & GICH_LR_STATE))
- vgic_retire_lr(i, irq, vgic_cpu);
+ if (!(lr.state & LR_STATE_MASK)) {
+ vgic_retire_lr(i, lr.irq, vcpu);
+ vgic_irq_clear_queued(vcpu, lr.irq);
+ }
/* Finally update the VGIC state. */
vgic_update_state(vcpu->kvm);
@@ -675,7 +816,7 @@ static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
{
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
int sgi;
- int min_sgi = (offset & ~0x3) * 4;
+ int min_sgi = (offset & ~0x3);
int max_sgi = min_sgi + 3;
int vcpu_id = vcpu->vcpu_id;
u32 reg = 0;
@@ -683,7 +824,7 @@ static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
/* Copy source SGIs from distributor side */
for (sgi = min_sgi; sgi <= max_sgi; sgi++) {
int shift = 8 * (sgi - min_sgi);
- reg |= (u32)dist->irq_sgi_sources[vcpu_id][sgi] << shift;
+ reg |= ((u32)*vgic_get_sgi_sources(dist, vcpu_id, sgi)) << shift;
}
mmio_data_write(mmio, ~0, reg);
@@ -696,7 +837,7 @@ static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
{
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
int sgi;
- int min_sgi = (offset & ~0x3) * 4;
+ int min_sgi = (offset & ~0x3);
int max_sgi = min_sgi + 3;
int vcpu_id = vcpu->vcpu_id;
u32 reg;
@@ -707,14 +848,15 @@ static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
/* Clear pending SGIs on the distributor */
for (sgi = min_sgi; sgi <= max_sgi; sgi++) {
u8 mask = reg >> (8 * (sgi - min_sgi));
+ u8 *src = vgic_get_sgi_sources(dist, vcpu_id, sgi);
if (set) {
- if ((dist->irq_sgi_sources[vcpu_id][sgi] & mask) != mask)
+ if ((*src & mask) != mask)
updated = true;
- dist->irq_sgi_sources[vcpu_id][sgi] |= mask;
+ *src |= mask;
} else {
- if (dist->irq_sgi_sources[vcpu_id][sgi] & mask)
+ if (*src & mask)
updated = true;
- dist->irq_sgi_sources[vcpu_id][sgi] &= ~mask;
+ *src &= ~mask;
}
}
@@ -753,6 +895,7 @@ static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu,
struct mmio_range {
phys_addr_t base;
unsigned long len;
+ int bits_per_irq;
bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio,
phys_addr_t offset);
};
@@ -761,56 +904,67 @@ static const struct mmio_range vgic_dist_ranges[] = {
{
.base = GIC_DIST_CTRL,
.len = 12,
+ .bits_per_irq = 0,
.handle_mmio = handle_mmio_misc,
},
{
.base = GIC_DIST_IGROUP,
- .len = VGIC_NR_IRQS / 8,
+ .len = VGIC_MAX_IRQS / 8,
+ .bits_per_irq = 1,
.handle_mmio = handle_mmio_raz_wi,
},
{
.base = GIC_DIST_ENABLE_SET,
- .len = VGIC_NR_IRQS / 8,
+ .len = VGIC_MAX_IRQS / 8,
+ .bits_per_irq = 1,
.handle_mmio = handle_mmio_set_enable_reg,
},
{
.base = GIC_DIST_ENABLE_CLEAR,
- .len = VGIC_NR_IRQS / 8,
+ .len = VGIC_MAX_IRQS / 8,
+ .bits_per_irq = 1,
.handle_mmio = handle_mmio_clear_enable_reg,
},
{
.base = GIC_DIST_PENDING_SET,
- .len = VGIC_NR_IRQS / 8,
+ .len = VGIC_MAX_IRQS / 8,
+ .bits_per_irq = 1,
.handle_mmio = handle_mmio_set_pending_reg,
},
{
.base = GIC_DIST_PENDING_CLEAR,
- .len = VGIC_NR_IRQS / 8,
+ .len = VGIC_MAX_IRQS / 8,
+ .bits_per_irq = 1,
.handle_mmio = handle_mmio_clear_pending_reg,
},
{
.base = GIC_DIST_ACTIVE_SET,
- .len = VGIC_NR_IRQS / 8,
+ .len = VGIC_MAX_IRQS / 8,
+ .bits_per_irq = 1,
.handle_mmio = handle_mmio_raz_wi,
},
{
.base = GIC_DIST_ACTIVE_CLEAR,
- .len = VGIC_NR_IRQS / 8,
+ .len = VGIC_MAX_IRQS / 8,
+ .bits_per_irq = 1,
.handle_mmio = handle_mmio_raz_wi,
},
{
.base = GIC_DIST_PRI,
- .len = VGIC_NR_IRQS,
+ .len = VGIC_MAX_IRQS,
+ .bits_per_irq = 8,
.handle_mmio = handle_mmio_priority_reg,
},
{
.base = GIC_DIST_TARGET,
- .len = VGIC_NR_IRQS,
+ .len = VGIC_MAX_IRQS,
+ .bits_per_irq = 8,
.handle_mmio = handle_mmio_target_reg,
},
{
.base = GIC_DIST_CONFIG,
- .len = VGIC_NR_IRQS / 4,
+ .len = VGIC_MAX_IRQS / 4,
+ .bits_per_irq = 2,
.handle_mmio = handle_mmio_cfg_reg,
},
{
@@ -848,6 +1002,22 @@ struct mmio_range *find_matching_range(const struct mmio_range *ranges,
return NULL;
}
+static bool vgic_validate_access(const struct vgic_dist *dist,
+ const struct mmio_range *range,
+ unsigned long offset)
+{
+ int irq;
+
+ if (!range->bits_per_irq)
+ return true; /* Not an irq-based access */
+
+ irq = offset * 8 / range->bits_per_irq;
+ if (irq >= dist->nr_irqs)
+ return false;
+
+ return true;
+}
+
/**
* vgic_handle_mmio - handle an in-kernel MMIO access
* @vcpu: pointer to the vcpu performing the access
@@ -887,7 +1057,13 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
spin_lock(&vcpu->kvm->arch.vgic.lock);
offset = mmio->phys_addr - range->base - base;
- updated_state = range->handle_mmio(vcpu, mmio, offset);
+ if (vgic_validate_access(dist, range, offset)) {
+ updated_state = range->handle_mmio(vcpu, mmio, offset);
+ } else {
+ vgic_reg_access(mmio, NULL, offset,
+ ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
+ updated_state = false;
+ }
spin_unlock(&vcpu->kvm->arch.vgic.lock);
kvm_prepare_mmio(run, mmio);
kvm_handle_mmio_return(vcpu, run);
@@ -898,6 +1074,11 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
return true;
}
+static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi)
+{
+ return dist->irq_sgi_sources + vcpu_id * VGIC_NR_SGIS + sgi;
+}
+
static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
{
struct kvm *kvm = vcpu->kvm;
@@ -930,8 +1111,8 @@ static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
kvm_for_each_vcpu(c, vcpu, kvm) {
if (target_cpus & 1) {
/* Flag the SGI as pending */
- vgic_dist_irq_set(vcpu, sgi);
- dist->irq_sgi_sources[c][sgi] |= 1 << vcpu_id;
+ vgic_dist_irq_set_pending(vcpu, sgi);
+ *vgic_get_sgi_sources(dist, c, sgi) |= 1 << vcpu_id;
kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c);
}
@@ -939,32 +1120,38 @@ static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
}
}
+static int vgic_nr_shared_irqs(struct vgic_dist *dist)
+{
+ return dist->nr_irqs - VGIC_NR_PRIVATE_IRQS;
+}
+
static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
{
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
unsigned long *pending, *enabled, *pend_percpu, *pend_shared;
unsigned long pending_private, pending_shared;
+ int nr_shared = vgic_nr_shared_irqs(dist);
int vcpu_id;
vcpu_id = vcpu->vcpu_id;
pend_percpu = vcpu->arch.vgic_cpu.pending_percpu;
pend_shared = vcpu->arch.vgic_cpu.pending_shared;
- pending = vgic_bitmap_get_cpu_map(&dist->irq_state, vcpu_id);
+ pending = vgic_bitmap_get_cpu_map(&dist->irq_pending, vcpu_id);
enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id);
bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS);
- pending = vgic_bitmap_get_shared_map(&dist->irq_state);
+ pending = vgic_bitmap_get_shared_map(&dist->irq_pending);
enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled);
- bitmap_and(pend_shared, pending, enabled, VGIC_NR_SHARED_IRQS);
+ bitmap_and(pend_shared, pending, enabled, nr_shared);
bitmap_and(pend_shared, pend_shared,
vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]),
- VGIC_NR_SHARED_IRQS);
+ nr_shared);
pending_private = find_first_bit(pend_percpu, VGIC_NR_PRIVATE_IRQS);
- pending_shared = find_first_bit(pend_shared, VGIC_NR_SHARED_IRQS);
+ pending_shared = find_first_bit(pend_shared, nr_shared);
return (pending_private < VGIC_NR_PRIVATE_IRQS ||
- pending_shared < VGIC_NR_SHARED_IRQS);
+ pending_shared < vgic_nr_shared_irqs(dist));
}
/*
@@ -978,20 +1165,85 @@ static void vgic_update_state(struct kvm *kvm)
int c;
if (!dist->enabled) {
- set_bit(0, &dist->irq_pending_on_cpu);
+ set_bit(0, dist->irq_pending_on_cpu);
return;
}
kvm_for_each_vcpu(c, vcpu, kvm) {
if (compute_pending_for_cpu(vcpu)) {
pr_debug("CPU%d has pending interrupts\n", c);
- set_bit(c, &dist->irq_pending_on_cpu);
+ set_bit(c, dist->irq_pending_on_cpu);
}
}
}
-#define MK_LR_PEND(src, irq) \
- (GICH_LR_PENDING_BIT | ((src) << GICH_LR_PHYSID_CPUID_SHIFT) | (irq))
+static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr)
+{
+ return vgic_ops->get_lr(vcpu, lr);
+}
+
+static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr,
+ struct vgic_lr vlr)
+{
+ vgic_ops->set_lr(vcpu, lr, vlr);
+}
+
+static void vgic_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
+ struct vgic_lr vlr)
+{
+ vgic_ops->sync_lr_elrsr(vcpu, lr, vlr);
+}
+
+static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu)
+{
+ return vgic_ops->get_elrsr(vcpu);
+}
+
+static inline u64 vgic_get_eisr(struct kvm_vcpu *vcpu)
+{
+ return vgic_ops->get_eisr(vcpu);
+}
+
+static inline u32 vgic_get_interrupt_status(struct kvm_vcpu *vcpu)
+{
+ return vgic_ops->get_interrupt_status(vcpu);
+}
+
+static inline void vgic_enable_underflow(struct kvm_vcpu *vcpu)
+{
+ vgic_ops->enable_underflow(vcpu);
+}
+
+static inline void vgic_disable_underflow(struct kvm_vcpu *vcpu)
+{
+ vgic_ops->disable_underflow(vcpu);
+}
+
+static inline void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
+{
+ vgic_ops->get_vmcr(vcpu, vmcr);
+}
+
+static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
+{
+ vgic_ops->set_vmcr(vcpu, vmcr);
+}
+
+static inline void vgic_enable(struct kvm_vcpu *vcpu)
+{
+ vgic_ops->enable(vcpu);
+}
+
+static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
+{
+ struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+ struct vgic_lr vlr = vgic_get_lr(vcpu, lr_nr);
+
+ vlr.state = 0;
+ vgic_set_lr(vcpu, lr_nr, vlr);
+ clear_bit(lr_nr, vgic_cpu->lr_used);
+ vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
+}
/*
* An interrupt may have been disabled after being made pending on the
@@ -1007,13 +1259,13 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
int lr;
- for_each_set_bit(lr, vgic_cpu->lr_used, vgic_cpu->nr_lr) {
- int irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID;
+ for_each_set_bit(lr, vgic_cpu->lr_used, vgic->nr_lr) {
+ struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
- if (!vgic_irq_is_enabled(vcpu, irq)) {
- vgic_retire_lr(lr, irq, vgic_cpu);
- if (vgic_irq_is_active(vcpu, irq))
- vgic_irq_clear_active(vcpu, irq);
+ if (!vgic_irq_is_enabled(vcpu, vlr.irq)) {
+ vgic_retire_lr(lr, vlr.irq, vcpu);
+ if (vgic_irq_is_queued(vcpu, vlr.irq))
+ vgic_irq_clear_queued(vcpu, vlr.irq);
}
}
}
@@ -1025,40 +1277,48 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+ struct vgic_lr vlr;
int lr;
/* Sanitize the input... */
BUG_ON(sgi_source_id & ~7);
BUG_ON(sgi_source_id && irq >= VGIC_NR_SGIS);
- BUG_ON(irq >= VGIC_NR_IRQS);
+ BUG_ON(irq >= dist->nr_irqs);
kvm_debug("Queue IRQ%d\n", irq);
lr = vgic_cpu->vgic_irq_lr_map[irq];
/* Do we have an active interrupt for the same CPUID? */
- if (lr != LR_EMPTY &&
- (LR_CPUID(vgic_cpu->vgic_lr[lr]) == sgi_source_id)) {
- kvm_debug("LR%d piggyback for IRQ%d %x\n",
- lr, irq, vgic_cpu->vgic_lr[lr]);
- BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
- vgic_cpu->vgic_lr[lr] |= GICH_LR_PENDING_BIT;
- return true;
+ if (lr != LR_EMPTY) {
+ vlr = vgic_get_lr(vcpu, lr);
+ if (vlr.source == sgi_source_id) {
+ kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq);
+ BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
+ vlr.state |= LR_STATE_PENDING;
+ vgic_set_lr(vcpu, lr, vlr);
+ return true;
+ }
}
/* Try to use another LR for this interrupt */
lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used,
- vgic_cpu->nr_lr);
- if (lr >= vgic_cpu->nr_lr)
+ vgic->nr_lr);
+ if (lr >= vgic->nr_lr)
return false;
kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id);
- vgic_cpu->vgic_lr[lr] = MK_LR_PEND(sgi_source_id, irq);
vgic_cpu->vgic_irq_lr_map[irq] = lr;
set_bit(lr, vgic_cpu->lr_used);
+ vlr.irq = irq;
+ vlr.source = sgi_source_id;
+ vlr.state = LR_STATE_PENDING;
if (!vgic_irq_is_edge(vcpu, irq))
- vgic_cpu->vgic_lr[lr] |= GICH_LR_EOI;
+ vlr.state |= LR_EOI_INT;
+
+ vgic_set_lr(vcpu, lr, vlr);
return true;
}
@@ -1070,14 +1330,14 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq)
int vcpu_id = vcpu->vcpu_id;
int c;
- sources = dist->irq_sgi_sources[vcpu_id][irq];
+ sources = *vgic_get_sgi_sources(dist, vcpu_id, irq);
- for_each_set_bit(c, &sources, VGIC_MAX_CPUS) {
+ for_each_set_bit(c, &sources, dist->nr_cpus) {
if (vgic_queue_irq(vcpu, c, irq))
clear_bit(c, &sources);
}
- dist->irq_sgi_sources[vcpu_id][irq] = sources;
+ *vgic_get_sgi_sources(dist, vcpu_id, irq) = sources;
/*
* If the sources bitmap has been cleared it means that we
@@ -1086,7 +1346,7 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq)
* our emulated gic and can get rid of them.
*/
if (!sources) {
- vgic_dist_irq_clear(vcpu, irq);
+ vgic_dist_irq_clear_pending(vcpu, irq);
vgic_cpu_irq_clear(vcpu, irq);
return true;
}
@@ -1096,15 +1356,15 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq)
static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq)
{
- if (vgic_irq_is_active(vcpu, irq))
+ if (!vgic_can_sample_irq(vcpu, irq))
return true; /* level interrupt, already queued */
if (vgic_queue_irq(vcpu, 0, irq)) {
if (vgic_irq_is_edge(vcpu, irq)) {
- vgic_dist_irq_clear(vcpu, irq);
+ vgic_dist_irq_clear_pending(vcpu, irq);
vgic_cpu_irq_clear(vcpu, irq);
} else {
- vgic_irq_set_active(vcpu, irq);
+ vgic_irq_set_queued(vcpu, irq);
}
return true;
@@ -1149,66 +1409,83 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
}
/* SPIs */
- for_each_set_bit(i, vgic_cpu->pending_shared, VGIC_NR_SHARED_IRQS) {
+ for_each_set_bit(i, vgic_cpu->pending_shared, vgic_nr_shared_irqs(dist)) {
if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS))
overflow = 1;
}
epilog:
if (overflow) {
- vgic_cpu->vgic_hcr |= GICH_HCR_UIE;
+ vgic_enable_underflow(vcpu);
} else {
- vgic_cpu->vgic_hcr &= ~GICH_HCR_UIE;
+ vgic_disable_underflow(vcpu);
/*
* We're about to run this VCPU, and we've consumed
* everything the distributor had in store for
* us. Claim we don't have anything pending. We'll
* adjust that if needed while exiting.
*/
- clear_bit(vcpu_id, &dist->irq_pending_on_cpu);
+ clear_bit(vcpu_id, dist->irq_pending_on_cpu);
}
}
static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
{
- struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+ u32 status = vgic_get_interrupt_status(vcpu);
bool level_pending = false;
- kvm_debug("MISR = %08x\n", vgic_cpu->vgic_misr);
+ kvm_debug("STATUS = %08x\n", status);
- if (vgic_cpu->vgic_misr & GICH_MISR_EOI) {
+ if (status & INT_STATUS_EOI) {
/*
* Some level interrupts have been EOIed. Clear their
* active bit.
*/
- int lr, irq;
+ u64 eisr = vgic_get_eisr(vcpu);
+ unsigned long *eisr_ptr = (unsigned long *)&eisr;
+ int lr;
- for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_eisr,
- vgic_cpu->nr_lr) {
- irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID;
+ for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) {
+ struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
+ WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq));
- vgic_irq_clear_active(vcpu, irq);
- vgic_cpu->vgic_lr[lr] &= ~GICH_LR_EOI;
+ vgic_irq_clear_queued(vcpu, vlr.irq);
+ WARN_ON(vlr.state & LR_STATE_MASK);
+ vlr.state = 0;
+ vgic_set_lr(vcpu, lr, vlr);
+
+ /*
+ * If the IRQ was EOIed it was also ACKed and we we
+ * therefore assume we can clear the soft pending
+ * state (should it had been set) for this interrupt.
+ *
+ * Note: if the IRQ soft pending state was set after
+ * the IRQ was acked, it actually shouldn't be
+ * cleared, but we have no way of knowing that unless
+ * we start trapping ACKs when the soft-pending state
+ * is set.
+ */
+ vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq);
/* Any additional pending interrupt? */
- if (vgic_dist_irq_is_pending(vcpu, irq)) {
- vgic_cpu_irq_set(vcpu, irq);
+ if (vgic_dist_irq_get_level(vcpu, vlr.irq)) {
+ vgic_cpu_irq_set(vcpu, vlr.irq);
level_pending = true;
} else {
- vgic_cpu_irq_clear(vcpu, irq);
+ vgic_dist_irq_clear_pending(vcpu, vlr.irq);
+ vgic_cpu_irq_clear(vcpu, vlr.irq);
}
/*
* Despite being EOIed, the LR may not have
* been marked as empty.
*/
- set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr);
- vgic_cpu->vgic_lr[lr] &= ~GICH_LR_ACTIVE_BIT;
+ vgic_sync_lr_elrsr(vcpu, lr, vlr);
}
}
- if (vgic_cpu->vgic_misr & GICH_MISR_U)
- vgic_cpu->vgic_hcr &= ~GICH_HCR_UIE;
+ if (status & INT_STATUS_UNDERFLOW)
+ vgic_disable_underflow(vcpu);
return level_pending;
}
@@ -1221,30 +1498,32 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+ u64 elrsr;
+ unsigned long *elrsr_ptr;
int lr, pending;
bool level_pending;
level_pending = vgic_process_maintenance(vcpu);
+ elrsr = vgic_get_elrsr(vcpu);
+ elrsr_ptr = (unsigned long *)&elrsr;
/* Clear mappings for empty LRs */
- for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr,
- vgic_cpu->nr_lr) {
- int irq;
+ for_each_set_bit(lr, elrsr_ptr, vgic->nr_lr) {
+ struct vgic_lr vlr;
if (!test_and_clear_bit(lr, vgic_cpu->lr_used))
continue;
- irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID;
+ vlr = vgic_get_lr(vcpu, lr);
- BUG_ON(irq >= VGIC_NR_IRQS);
- vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
+ BUG_ON(vlr.irq >= dist->nr_irqs);
+ vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY;
}
/* Check if we still have something up our sleeve... */
- pending = find_first_zero_bit((unsigned long *)vgic_cpu->vgic_elrsr,
- vgic_cpu->nr_lr);
- if (level_pending || pending < vgic_cpu->nr_lr)
- set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu);
+ pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr);
+ if (level_pending || pending < vgic->nr_lr)
+ set_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
}
void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
@@ -1278,7 +1557,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
if (!irqchip_in_kernel(vcpu->kvm))
return 0;
- return test_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu);
+ return test_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
}
static void vgic_kick_vcpus(struct kvm *kvm)
@@ -1298,34 +1577,36 @@ static void vgic_kick_vcpus(struct kvm *kvm)
static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
{
- int is_edge = vgic_irq_is_edge(vcpu, irq);
- int state = vgic_dist_irq_is_pending(vcpu, irq);
+ int edge_triggered = vgic_irq_is_edge(vcpu, irq);
/*
* Only inject an interrupt if:
* - edge triggered and we have a rising edge
* - level triggered and we change level
*/
- if (is_edge)
+ if (edge_triggered) {
+ int state = vgic_dist_irq_is_pending(vcpu, irq);
return level > state;
- else
+ } else {
+ int state = vgic_dist_irq_get_level(vcpu, irq);
return level != state;
+ }
}
-static bool vgic_update_irq_state(struct kvm *kvm, int cpuid,
+static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid,
unsigned int irq_num, bool level)
{
struct vgic_dist *dist = &kvm->arch.vgic;
struct kvm_vcpu *vcpu;
- int is_edge, is_level;
+ int edge_triggered, level_triggered;
int enabled;
bool ret = true;
spin_lock(&dist->lock);
vcpu = kvm_get_vcpu(kvm, cpuid);
- is_edge = vgic_irq_is_edge(vcpu, irq_num);
- is_level = !is_edge;
+ edge_triggered = vgic_irq_is_edge(vcpu, irq_num);
+ level_triggered = !edge_triggered;
if (!vgic_validate_injection(vcpu, irq_num, level)) {
ret = false;
@@ -1339,10 +1620,19 @@ static bool vgic_update_irq_state(struct kvm *kvm, int cpuid,
kvm_debug("Inject IRQ%d level %d CPU%d\n", irq_num, level, cpuid);
- if (level)
- vgic_dist_irq_set(vcpu, irq_num);
- else
- vgic_dist_irq_clear(vcpu, irq_num);
+ if (level) {
+ if (level_triggered)
+ vgic_dist_irq_set_level(vcpu, irq_num);
+ vgic_dist_irq_set_pending(vcpu, irq_num);
+ } else {
+ if (level_triggered) {
+ vgic_dist_irq_clear_level(vcpu, irq_num);
+ if (!vgic_dist_irq_soft_pend(vcpu, irq_num))
+ vgic_dist_irq_clear_pending(vcpu, irq_num);
+ } else {
+ vgic_dist_irq_clear_pending(vcpu, irq_num);
+ }
+ }
enabled = vgic_irq_is_enabled(vcpu, irq_num);
@@ -1351,7 +1641,7 @@ static bool vgic_update_irq_state(struct kvm *kvm, int cpuid,
goto out;
}
- if (is_level && vgic_irq_is_active(vcpu, irq_num)) {
+ if (!vgic_can_sample_irq(vcpu, irq_num)) {
/*
* Level interrupt in progress, will be picked up
* when EOId.
@@ -1362,7 +1652,7 @@ static bool vgic_update_irq_state(struct kvm *kvm, int cpuid,
if (level) {
vgic_cpu_irq_set(vcpu, irq_num);
- set_bit(cpuid, &dist->irq_pending_on_cpu);
+ set_bit(cpuid, dist->irq_pending_on_cpu);
}
out:
@@ -1388,7 +1678,8 @@ out:
int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
bool level)
{
- if (vgic_update_irq_state(kvm, cpuid, irq_num, level))
+ if (likely(vgic_initialized(kvm)) &&
+ vgic_update_irq_pending(kvm, cpuid, irq_num, level))
vgic_kick_vcpus(kvm);
return 0;
@@ -1405,6 +1696,32 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data)
return IRQ_HANDLED;
}
+void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+ struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+
+ kfree(vgic_cpu->pending_shared);
+ kfree(vgic_cpu->vgic_irq_lr_map);
+ vgic_cpu->pending_shared = NULL;
+ vgic_cpu->vgic_irq_lr_map = NULL;
+}
+
+static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
+{
+ struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+
+ int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8;
+ vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL);
+ vgic_cpu->vgic_irq_lr_map = kzalloc(nr_irqs, GFP_KERNEL);
+
+ if (!vgic_cpu->pending_shared || !vgic_cpu->vgic_irq_lr_map) {
+ kvm_vgic_vcpu_destroy(vcpu);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
/**
* kvm_vgic_vcpu_init - Initialize per-vcpu VGIC state
* @vcpu: pointer to the vcpu struct
@@ -1412,16 +1729,13 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data)
* Initialize the vgic_cpu struct and vgic_dist struct fields pertaining to
* this vcpu and enable the VGIC for this VCPU
*/
-int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
+static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
int i;
- if (vcpu->vcpu_id >= VGIC_MAX_CPUS)
- return -EBUSY;
-
- for (i = 0; i < VGIC_NR_IRQS; i++) {
+ for (i = 0; i < dist->nr_irqs; i++) {
if (i < VGIC_NR_PPIS)
vgic_bitmap_set_irq_val(&dist->irq_enabled,
vcpu->vcpu_id, i, 1);
@@ -1433,119 +1747,119 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
}
/*
- * By forcing VMCR to zero, the GIC will restore the binary
- * points to their reset values. Anything else resets to zero
- * anyway.
+ * Store the number of LRs per vcpu, so we don't have to go
+ * all the way to the distributor structure to find out. Only
+ * assembly code should use this one.
*/
- vgic_cpu->vgic_vmcr = 0;
-
- vgic_cpu->nr_lr = vgic_nr_lr;
- vgic_cpu->vgic_hcr = GICH_HCR_EN; /* Get the show on the road... */
+ vgic_cpu->nr_lr = vgic->nr_lr;
- return 0;
+ vgic_enable(vcpu);
}
-static void vgic_init_maintenance_interrupt(void *info)
+void kvm_vgic_destroy(struct kvm *kvm)
{
- enable_percpu_irq(vgic_maint_irq, 0);
+ struct vgic_dist *dist = &kvm->arch.vgic;
+ struct kvm_vcpu *vcpu;
+ int i;
+
+ kvm_for_each_vcpu(i, vcpu, kvm)
+ kvm_vgic_vcpu_destroy(vcpu);
+
+ vgic_free_bitmap(&dist->irq_enabled);
+ vgic_free_bitmap(&dist->irq_level);
+ vgic_free_bitmap(&dist->irq_pending);
+ vgic_free_bitmap(&dist->irq_soft_pend);
+ vgic_free_bitmap(&dist->irq_queued);
+ vgic_free_bitmap(&dist->irq_cfg);
+ vgic_free_bytemap(&dist->irq_priority);
+ if (dist->irq_spi_target) {
+ for (i = 0; i < dist->nr_cpus; i++)
+ vgic_free_bitmap(&dist->irq_spi_target[i]);
+ }
+ kfree(dist->irq_sgi_sources);
+ kfree(dist->irq_spi_cpu);
+ kfree(dist->irq_spi_target);
+ kfree(dist->irq_pending_on_cpu);
+ dist->irq_sgi_sources = NULL;
+ dist->irq_spi_cpu = NULL;
+ dist->irq_spi_target = NULL;
+ dist->irq_pending_on_cpu = NULL;
}
-static int vgic_cpu_notify(struct notifier_block *self,
- unsigned long action, void *cpu)
+/*
+ * Allocate and initialize the various data structures. Must be called
+ * with kvm->lock held!
+ */
+static int vgic_init_maps(struct kvm *kvm)
{
- switch (action) {
- case CPU_STARTING:
- case CPU_STARTING_FROZEN:
- vgic_init_maintenance_interrupt(NULL);
- break;
- case CPU_DYING:
- case CPU_DYING_FROZEN:
- disable_percpu_irq(vgic_maint_irq);
- break;
- }
+ struct vgic_dist *dist = &kvm->arch.vgic;
+ struct kvm_vcpu *vcpu;
+ int nr_cpus, nr_irqs;
+ int ret, i;
- return NOTIFY_OK;
-}
+ if (dist->nr_cpus) /* Already allocated */
+ return 0;
-static struct notifier_block vgic_cpu_nb = {
- .notifier_call = vgic_cpu_notify,
-};
+ nr_cpus = dist->nr_cpus = atomic_read(&kvm->online_vcpus);
+ if (!nr_cpus) /* No vcpus? Can't be good... */
+ return -EINVAL;
-int kvm_vgic_hyp_init(void)
-{
- int ret;
- struct resource vctrl_res;
- struct resource vcpu_res;
+ /*
+ * If nobody configured the number of interrupts, use the
+ * legacy one.
+ */
+ if (!dist->nr_irqs)
+ dist->nr_irqs = VGIC_NR_IRQS_LEGACY;
- vgic_node = of_find_compatible_node(NULL, NULL, "arm,cortex-a15-gic");
- if (!vgic_node) {
- kvm_err("error: no compatible vgic node in DT\n");
- return -ENODEV;
- }
+ nr_irqs = dist->nr_irqs;
- vgic_maint_irq = irq_of_parse_and_map(vgic_node, 0);
- if (!vgic_maint_irq) {
- kvm_err("error getting vgic maintenance irq from DT\n");
- ret = -ENXIO;
- goto out;
- }
+ ret = vgic_init_bitmap(&dist->irq_enabled, nr_cpus, nr_irqs);
+ ret |= vgic_init_bitmap(&dist->irq_level, nr_cpus, nr_irqs);
+ ret |= vgic_init_bitmap(&dist->irq_pending, nr_cpus, nr_irqs);
+ ret |= vgic_init_bitmap(&dist->irq_soft_pend, nr_cpus, nr_irqs);
+ ret |= vgic_init_bitmap(&dist->irq_queued, nr_cpus, nr_irqs);
+ ret |= vgic_init_bitmap(&dist->irq_cfg, nr_cpus, nr_irqs);
+ ret |= vgic_init_bytemap(&dist->irq_priority, nr_cpus, nr_irqs);
- ret = request_percpu_irq(vgic_maint_irq, vgic_maintenance_handler,
- "vgic", kvm_get_running_vcpus());
- if (ret) {
- kvm_err("Cannot register interrupt %d\n", vgic_maint_irq);
+ if (ret)
goto out;
- }
- ret = register_cpu_notifier(&vgic_cpu_nb);
- if (ret) {
- kvm_err("Cannot register vgic CPU notifier\n");
- goto out_free_irq;
- }
-
- ret = of_address_to_resource(vgic_node, 2, &vctrl_res);
- if (ret) {
- kvm_err("Cannot obtain VCTRL resource\n");
- goto out_free_irq;
- }
-
- vgic_vctrl_base = of_iomap(vgic_node, 2);
- if (!vgic_vctrl_base) {
- kvm_err("Cannot ioremap VCTRL\n");
+ dist->irq_sgi_sources = kzalloc(nr_cpus * VGIC_NR_SGIS, GFP_KERNEL);
+ dist->irq_spi_cpu = kzalloc(nr_irqs - VGIC_NR_PRIVATE_IRQS, GFP_KERNEL);
+ dist->irq_spi_target = kzalloc(sizeof(*dist->irq_spi_target) * nr_cpus,
+ GFP_KERNEL);
+ dist->irq_pending_on_cpu = kzalloc(BITS_TO_LONGS(nr_cpus) * sizeof(long),
+ GFP_KERNEL);
+ if (!dist->irq_sgi_sources ||
+ !dist->irq_spi_cpu ||
+ !dist->irq_spi_target ||
+ !dist->irq_pending_on_cpu) {
ret = -ENOMEM;
- goto out_free_irq;
+ goto out;
}
- vgic_nr_lr = readl_relaxed(vgic_vctrl_base + GICH_VTR);
- vgic_nr_lr = (vgic_nr_lr & 0x3f) + 1;
-
- ret = create_hyp_io_mappings(vgic_vctrl_base,
- vgic_vctrl_base + resource_size(&vctrl_res),
- vctrl_res.start);
- if (ret) {
- kvm_err("Cannot map VCTRL into hyp\n");
- goto out_unmap;
- }
+ for (i = 0; i < nr_cpus; i++)
+ ret |= vgic_init_bitmap(&dist->irq_spi_target[i],
+ nr_cpus, nr_irqs);
- kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
- vctrl_res.start, vgic_maint_irq);
- on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
+ if (ret)
+ goto out;
- if (of_address_to_resource(vgic_node, 3, &vcpu_res)) {
- kvm_err("Cannot obtain VCPU resource\n");
- ret = -ENXIO;
- goto out_unmap;
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ ret = vgic_vcpu_init_maps(vcpu, nr_irqs);
+ if (ret) {
+ kvm_err("VGIC: Failed to allocate vcpu memory\n");
+ break;
+ }
}
- vgic_vcpu_base = vcpu_res.start;
- goto out;
+ for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i += 4)
+ vgic_set_target_reg(kvm, 0, i);
-out_unmap:
- iounmap(vgic_vctrl_base);
-out_free_irq:
- free_percpu_irq(vgic_maint_irq, kvm_get_running_vcpus());
out:
- of_node_put(vgic_node);
+ if (ret)
+ kvm_vgic_destroy(kvm);
+
return ret;
}
@@ -1560,6 +1874,7 @@ out:
*/
int kvm_vgic_init(struct kvm *kvm)
{
+ struct kvm_vcpu *vcpu;
int ret = 0, i;
if (!irqchip_in_kernel(kvm))
@@ -1577,18 +1892,26 @@ int kvm_vgic_init(struct kvm *kvm)
goto out;
}
+ ret = vgic_init_maps(kvm);
+ if (ret) {
+ kvm_err("Unable to allocate maps\n");
+ goto out;
+ }
+
ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base,
- vgic_vcpu_base, KVM_VGIC_V2_CPU_SIZE);
+ vgic->vcpu_base, KVM_VGIC_V2_CPU_SIZE);
if (ret) {
kvm_err("Unable to remap VGIC CPU to VCPU\n");
goto out;
}
- for (i = VGIC_NR_PRIVATE_IRQS; i < VGIC_NR_IRQS; i += 4)
- vgic_set_target_reg(kvm, 0, i);
+ kvm_for_each_vcpu(i, vcpu, kvm)
+ kvm_vgic_vcpu_init(vcpu);
kvm->arch.vgic.ready = true;
out:
+ if (ret)
+ kvm_vgic_destroy(kvm);
mutex_unlock(&kvm->lock);
return ret;
}
@@ -1624,7 +1947,8 @@ int kvm_vgic_create(struct kvm *kvm)
}
spin_lock_init(&kvm->arch.vgic.lock);
- kvm->arch.vgic.vctrl_base = vgic_vctrl_base;
+ kvm->arch.vgic.in_kernel = true;
+ kvm->arch.vgic.vctrl_base = vgic->vctrl_base;
kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
@@ -1639,7 +1963,7 @@ out:
return ret;
}
-static bool vgic_ioaddr_overlap(struct kvm *kvm)
+static int vgic_ioaddr_overlap(struct kvm *kvm)
{
phys_addr_t dist = kvm->arch.vgic.vgic_dist_base;
phys_addr_t cpu = kvm->arch.vgic.vgic_cpu_base;
@@ -1668,10 +1992,11 @@ static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr,
if (addr + size < addr)
return -EINVAL;
+ *ioaddr = addr;
ret = vgic_ioaddr_overlap(kvm);
if (ret)
- return ret;
- *ioaddr = addr;
+ *ioaddr = VGIC_ADDR_UNDEF;
+
return ret;
}
@@ -1722,39 +2047,40 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
static bool handle_cpu_mmio_misc(struct kvm_vcpu *vcpu,
struct kvm_exit_mmio *mmio, phys_addr_t offset)
{
- struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
- u32 reg, mask = 0, shift = 0;
bool updated = false;
+ struct vgic_vmcr vmcr;
+ u32 *vmcr_field;
+ u32 reg;
+
+ vgic_get_vmcr(vcpu, &vmcr);
switch (offset & ~0x3) {
case GIC_CPU_CTRL:
- mask = GICH_VMCR_CTRL_MASK;
- shift = GICH_VMCR_CTRL_SHIFT;
+ vmcr_field = &vmcr.ctlr;
break;
case GIC_CPU_PRIMASK:
- mask = GICH_VMCR_PRIMASK_MASK;
- shift = GICH_VMCR_PRIMASK_SHIFT;
+ vmcr_field = &vmcr.pmr;
break;
case GIC_CPU_BINPOINT:
- mask = GICH_VMCR_BINPOINT_MASK;
- shift = GICH_VMCR_BINPOINT_SHIFT;
+ vmcr_field = &vmcr.bpr;
break;
case GIC_CPU_ALIAS_BINPOINT:
- mask = GICH_VMCR_ALIAS_BINPOINT_MASK;
- shift = GICH_VMCR_ALIAS_BINPOINT_SHIFT;
+ vmcr_field = &vmcr.abpr;
break;
+ default:
+ BUG();
}
if (!mmio->is_write) {
- reg = (vgic_cpu->vgic_vmcr & mask) >> shift;
+ reg = *vmcr_field;
mmio_data_write(mmio, ~0, reg);
} else {
reg = mmio_data_read(mmio, ~0);
- reg = (reg << shift) & mask;
- if (reg != (vgic_cpu->vgic_vmcr & mask))
+ if (reg != *vmcr_field) {
+ *vmcr_field = reg;
+ vgic_set_vmcr(vcpu, &vmcr);
updated = true;
- vgic_cpu->vgic_vmcr &= ~mask;
- vgic_cpu->vgic_vmcr |= reg;
+ }
}
return updated;
}
@@ -1826,6 +2152,10 @@ static int vgic_attr_regs_access(struct kvm_device *dev,
mutex_lock(&dev->kvm->lock);
+ ret = vgic_init_maps(dev->kvm);
+ if (ret)
+ goto out;
+
if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) {
ret = -EINVAL;
goto out;
@@ -1923,6 +2253,36 @@ static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
return vgic_attr_regs_access(dev, attr, &reg, true);
}
+ case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
+ u32 __user *uaddr = (u32 __user *)(long)attr->addr;
+ u32 val;
+ int ret = 0;
+
+ if (get_user(val, uaddr))
+ return -EFAULT;
+
+ /*
+ * We require:
+ * - at least 32 SPIs on top of the 16 SGIs and 16 PPIs
+ * - at most 1024 interrupts
+ * - a multiple of 32 interrupts
+ */
+ if (val < (VGIC_NR_PRIVATE_IRQS + 32) ||
+ val > VGIC_MAX_IRQS ||
+ (val & 31))
+ return -EINVAL;
+
+ mutex_lock(&dev->kvm->lock);
+
+ if (vgic_initialized(dev->kvm) || dev->kvm->arch.vgic.nr_irqs)
+ ret = -EBUSY;
+ else
+ dev->kvm->arch.vgic.nr_irqs = val;
+
+ mutex_unlock(&dev->kvm->lock);
+
+ return ret;
+ }
}
@@ -1959,6 +2319,11 @@ static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
r = put_user(reg, uaddr);
break;
}
+ case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
+ u32 __user *uaddr = (u32 __user *)(long)attr->addr;
+ r = put_user(dev->kvm->arch.vgic.nr_irqs, uaddr);
+ break;
+ }
}
@@ -1995,6 +2360,8 @@ static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
return vgic_has_attr_regs(vgic_cpu_ranges, offset);
+ case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
+ return 0;
}
return -ENXIO;
}
@@ -2009,7 +2376,7 @@ static int vgic_create(struct kvm_device *dev, u32 type)
return kvm_vgic_create(dev->kvm);
}
-struct kvm_device_ops kvm_arm_vgic_v2_ops = {
+static struct kvm_device_ops kvm_arm_vgic_v2_ops = {
.name = "kvm-arm-vgic",
.create = vgic_create,
.destroy = vgic_destroy,
@@ -2017,3 +2384,81 @@ struct kvm_device_ops kvm_arm_vgic_v2_ops = {
.get_attr = vgic_get_attr,
.has_attr = vgic_has_attr,
};
+
+static void vgic_init_maintenance_interrupt(void *info)
+{
+ enable_percpu_irq(vgic->maint_irq, 0);
+}
+
+static int vgic_cpu_notify(struct notifier_block *self,
+ unsigned long action, void *cpu)
+{
+ switch (action) {
+ case CPU_STARTING:
+ case CPU_STARTING_FROZEN:
+ vgic_init_maintenance_interrupt(NULL);
+ break;
+ case CPU_DYING:
+ case CPU_DYING_FROZEN:
+ disable_percpu_irq(vgic->maint_irq);
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block vgic_cpu_nb = {
+ .notifier_call = vgic_cpu_notify,
+};
+
+static const struct of_device_id vgic_ids[] = {
+ { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, },
+ { .compatible = "arm,gic-v3", .data = vgic_v3_probe, },
+ {},
+};
+
+int kvm_vgic_hyp_init(void)
+{
+ const struct of_device_id *matched_id;
+ int (*vgic_probe)(struct device_node *,const struct vgic_ops **,
+ const struct vgic_params **);
+ struct device_node *vgic_node;
+ int ret;
+
+ vgic_node = of_find_matching_node_and_match(NULL,
+ vgic_ids, &matched_id);
+ if (!vgic_node) {
+ kvm_err("error: no compatible GIC node found\n");
+ return -ENODEV;
+ }
+
+ vgic_probe = matched_id->data;
+ ret = vgic_probe(vgic_node, &vgic_ops, &vgic);
+ if (ret)
+ return ret;
+
+ ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler,
+ "vgic", kvm_get_running_vcpus());
+ if (ret) {
+ kvm_err("Cannot register interrupt %d\n", vgic->maint_irq);
+ return ret;
+ }
+
+ ret = register_cpu_notifier(&vgic_cpu_nb);
+ if (ret) {
+ kvm_err("Cannot register vgic CPU notifier\n");
+ goto out_free_irq;
+ }
+
+ /* Callback into for arch code for setup */
+ vgic_arch_setup(vgic);
+
+ on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
+
+ return kvm_register_device_ops(&kvm_arm_vgic_v2_ops,
+ KVM_DEV_TYPE_ARM_VGIC_V2);
+
+out_free_irq:
+ free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus());
+ return ret;
+}
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index f2c80d5451c3..d6a3d0993d88 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -28,6 +28,21 @@
#include "async_pf.h"
#include <trace/events/kvm.h>
+static inline void kvm_async_page_present_sync(struct kvm_vcpu *vcpu,
+ struct kvm_async_pf *work)
+{
+#ifdef CONFIG_KVM_ASYNC_PF_SYNC
+ kvm_arch_async_page_present(vcpu, work);
+#endif
+}
+static inline void kvm_async_page_present_async(struct kvm_vcpu *vcpu,
+ struct kvm_async_pf *work)
+{
+#ifndef CONFIG_KVM_ASYNC_PF_SYNC
+ kvm_arch_async_page_present(vcpu, work);
+#endif
+}
+
static struct kmem_cache *async_pf_cache;
int kvm_async_pf_init(void)
@@ -65,11 +80,10 @@ static void async_pf_execute(struct work_struct *work)
might_sleep();
- use_mm(mm);
down_read(&mm->mmap_sem);
- get_user_pages(current, mm, addr, 1, 1, 0, NULL, NULL);
+ get_user_pages(NULL, mm, addr, 1, 1, 0, NULL, NULL);
up_read(&mm->mmap_sem);
- unuse_mm(mm);
+ kvm_async_page_present_sync(vcpu, apf);
spin_lock(&vcpu->async_pf.lock);
list_add_tail(&apf->link, &vcpu->async_pf.done);
@@ -97,11 +111,16 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
list_entry(vcpu->async_pf.queue.next,
typeof(*work), queue);
list_del(&work->queue);
+
+#ifdef CONFIG_KVM_ASYNC_PF_SYNC
+ flush_work(&work->work);
+#else
if (cancel_work_sync(&work->work)) {
mmput(work->mm);
kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */
kmem_cache_free(async_pf_cache, work);
}
+#endif
}
spin_lock(&vcpu->async_pf.lock);
@@ -130,7 +149,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
spin_unlock(&vcpu->async_pf.lock);
kvm_arch_async_page_ready(vcpu, work);
- kvm_arch_async_page_present(vcpu, work);
+ kvm_async_page_present_async(vcpu, work);
list_del(&work->queue);
vcpu->async_pf.queued--;
@@ -138,7 +157,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
}
}
-int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
+int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
struct kvm_arch_async_pf *arch)
{
struct kvm_async_pf *work;
@@ -159,7 +178,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
work->wakeup_all = false;
work->vcpu = vcpu;
work->gva = gva;
- work->addr = gfn_to_hva(vcpu->kvm, gfn);
+ work->addr = hva;
work->arch = *arch;
work->mm = current->mm;
atomic_inc(&work->mm->mm_users);
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index abe4d6043b36..0ab1411eb007 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -31,11 +31,14 @@
#include <linux/list.h>
#include <linux/eventfd.h>
#include <linux/kernel.h>
+#include <linux/srcu.h>
#include <linux/slab.h>
+#include <linux/seqlock.h>
+#include <trace/events/kvm.h>
#include "iodev.h"
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+#ifdef CONFIG_HAVE_KVM_IRQFD
/*
* --------------------------------------------------------------------
* irqfd: Allows an fd to be used to inject an interrupt to the guest
@@ -74,7 +77,8 @@ struct _irqfd {
struct kvm *kvm;
wait_queue_t wait;
/* Update side is protected by irqfds.lock */
- struct kvm_kernel_irq_routing_entry __rcu *irq_entry;
+ struct kvm_kernel_irq_routing_entry irq_entry;
+ seqcount_t irq_entry_sc;
/* Used for level IRQ fast-path */
int gsi;
struct work_struct inject;
@@ -118,19 +122,22 @@ static void
irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
{
struct _irqfd_resampler *resampler;
+ struct kvm *kvm;
struct _irqfd *irqfd;
+ int idx;
resampler = container_of(kian, struct _irqfd_resampler, notifier);
+ kvm = resampler->kvm;
- kvm_set_irq(resampler->kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
+ kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
resampler->notifier.gsi, 0, false);
- rcu_read_lock();
+ idx = srcu_read_lock(&kvm->irq_srcu);
list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link)
eventfd_signal(irqfd->resamplefd, 1);
- rcu_read_unlock();
+ srcu_read_unlock(&kvm->irq_srcu, idx);
}
static void
@@ -142,7 +149,7 @@ irqfd_resampler_shutdown(struct _irqfd *irqfd)
mutex_lock(&kvm->irqfds.resampler_lock);
list_del_rcu(&irqfd->resampler_link);
- synchronize_rcu();
+ synchronize_srcu(&kvm->irq_srcu);
if (list_empty(&resampler->list)) {
list_del(&resampler->link);
@@ -219,19 +226,24 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
{
struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait);
unsigned long flags = (unsigned long)key;
- struct kvm_kernel_irq_routing_entry *irq;
+ struct kvm_kernel_irq_routing_entry irq;
struct kvm *kvm = irqfd->kvm;
+ unsigned seq;
+ int idx;
if (flags & POLLIN) {
- rcu_read_lock();
- irq = rcu_dereference(irqfd->irq_entry);
+ idx = srcu_read_lock(&kvm->irq_srcu);
+ do {
+ seq = read_seqcount_begin(&irqfd->irq_entry_sc);
+ irq = irqfd->irq_entry;
+ } while (read_seqcount_retry(&irqfd->irq_entry_sc, seq));
/* An event has been signaled, inject an interrupt */
- if (irq)
- kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1,
+ if (irq.type == KVM_IRQ_ROUTING_MSI)
+ kvm_set_msi(&irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1,
false);
else
schedule_work(&irqfd->inject);
- rcu_read_unlock();
+ srcu_read_unlock(&kvm->irq_srcu, idx);
}
if (flags & POLLHUP) {
@@ -267,34 +279,37 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
}
/* Must be called under irqfds.lock */
-static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd,
- struct kvm_irq_routing_table *irq_rt)
+static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd)
{
struct kvm_kernel_irq_routing_entry *e;
+ struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
+ int i, n_entries;
- if (irqfd->gsi >= irq_rt->nr_rt_entries) {
- rcu_assign_pointer(irqfd->irq_entry, NULL);
- return;
- }
+ n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi);
+
+ write_seqcount_begin(&irqfd->irq_entry_sc);
+
+ irqfd->irq_entry.type = 0;
- hlist_for_each_entry(e, &irq_rt->map[irqfd->gsi], link) {
+ e = entries;
+ for (i = 0; i < n_entries; ++i, ++e) {
/* Only fast-path MSI. */
if (e->type == KVM_IRQ_ROUTING_MSI)
- rcu_assign_pointer(irqfd->irq_entry, e);
- else
- rcu_assign_pointer(irqfd->irq_entry, NULL);
+ irqfd->irq_entry = *e;
}
+
+ write_seqcount_end(&irqfd->irq_entry_sc);
}
static int
kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
{
- struct kvm_irq_routing_table *irq_rt;
struct _irqfd *irqfd, *tmp;
struct fd f;
struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL;
int ret;
unsigned int events;
+ int idx;
irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL);
if (!irqfd)
@@ -305,6 +320,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
INIT_LIST_HEAD(&irqfd->list);
INIT_WORK(&irqfd->inject, irqfd_inject);
INIT_WORK(&irqfd->shutdown, irqfd_shutdown);
+ seqcount_init(&irqfd->irq_entry_sc);
f = fdget(args->fd);
if (!f.file) {
@@ -363,7 +379,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
}
list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list);
- synchronize_rcu();
+ synchronize_srcu(&kvm->irq_srcu);
mutex_unlock(&kvm->irqfds.resampler_lock);
}
@@ -387,9 +403,9 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
goto fail;
}
- irq_rt = rcu_dereference_protected(kvm->irq_routing,
- lockdep_is_held(&kvm->irqfds.lock));
- irqfd_update(kvm, irqfd, irq_rt);
+ idx = srcu_read_lock(&kvm->irq_srcu);
+ irqfd_update(kvm, irqfd);
+ srcu_read_unlock(&kvm->irq_srcu, idx);
events = f.file->f_op->poll(f.file, &irqfd->pt);
@@ -428,12 +444,73 @@ out:
kfree(irqfd);
return ret;
}
+
+bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
+{
+ struct kvm_irq_ack_notifier *kian;
+ int gsi, idx;
+
+ idx = srcu_read_lock(&kvm->irq_srcu);
+ gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
+ if (gsi != -1)
+ hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
+ link)
+ if (kian->gsi == gsi) {
+ srcu_read_unlock(&kvm->irq_srcu, idx);
+ return true;
+ }
+
+ srcu_read_unlock(&kvm->irq_srcu, idx);
+
+ return false;
+}
+EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
+
+void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
+{
+ struct kvm_irq_ack_notifier *kian;
+ int gsi, idx;
+
+ trace_kvm_ack_irq(irqchip, pin);
+
+ idx = srcu_read_lock(&kvm->irq_srcu);
+ gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
+ if (gsi != -1)
+ hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
+ link)
+ if (kian->gsi == gsi)
+ kian->irq_acked(kian);
+ srcu_read_unlock(&kvm->irq_srcu, idx);
+}
+
+void kvm_register_irq_ack_notifier(struct kvm *kvm,
+ struct kvm_irq_ack_notifier *kian)
+{
+ mutex_lock(&kvm->irq_lock);
+ hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
+ mutex_unlock(&kvm->irq_lock);
+#ifdef __KVM_HAVE_IOAPIC
+ kvm_vcpu_request_scan_ioapic(kvm);
+#endif
+}
+
+void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
+ struct kvm_irq_ack_notifier *kian)
+{
+ mutex_lock(&kvm->irq_lock);
+ hlist_del_init_rcu(&kian->link);
+ mutex_unlock(&kvm->irq_lock);
+ synchronize_srcu(&kvm->irq_srcu);
+#ifdef __KVM_HAVE_IOAPIC
+ kvm_vcpu_request_scan_ioapic(kvm);
+#endif
+}
#endif
void
kvm_eventfd_init(struct kvm *kvm)
{
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+#ifdef CONFIG_HAVE_KVM_IRQFD
spin_lock_init(&kvm->irqfds.lock);
INIT_LIST_HEAD(&kvm->irqfds.items);
INIT_LIST_HEAD(&kvm->irqfds.resampler_list);
@@ -442,7 +519,7 @@ kvm_eventfd_init(struct kvm *kvm)
INIT_LIST_HEAD(&kvm->ioeventfds);
}
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+#ifdef CONFIG_HAVE_KVM_IRQFD
/*
* shutdown any irqfd's that match fd+gsi
*/
@@ -461,14 +538,14 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) {
/*
- * This rcu_assign_pointer is needed for when
+ * This clearing of irq_entry.type is needed for when
* another thread calls kvm_irq_routing_update before
* we flush workqueue below (we synchronize with
* kvm_irq_routing_update using irqfds.lock).
- * It is paired with synchronize_rcu done by caller
- * of that function.
*/
- rcu_assign_pointer(irqfd->irq_entry, NULL);
+ write_seqcount_begin(&irqfd->irq_entry_sc);
+ irqfd->irq_entry.type = 0;
+ write_seqcount_end(&irqfd->irq_entry_sc);
irqfd_deactivate(irqfd);
}
}
@@ -523,20 +600,17 @@ kvm_irqfd_release(struct kvm *kvm)
}
/*
- * Change irq_routing and irqfd.
- * Caller must invoke synchronize_rcu afterwards.
+ * Take note of a change in irq routing.
+ * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards.
*/
-void kvm_irq_routing_update(struct kvm *kvm,
- struct kvm_irq_routing_table *irq_rt)
+void kvm_irq_routing_update(struct kvm *kvm)
{
struct _irqfd *irqfd;
spin_lock_irq(&kvm->irqfds.lock);
- rcu_assign_pointer(kvm->irq_routing, irq_rt);
-
list_for_each_entry(irqfd, &kvm->irqfds.items, list)
- irqfd_update(kvm, irqfd, irq_rt);
+ irqfd_update(kvm, irqfd);
spin_unlock_irq(&kvm->irqfds.lock);
}
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index e2e6b4473a96..963b8995a9e8 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -160,9 +160,10 @@ static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e,
*/
int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
{
+ struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
struct kvm_kernel_irq_routing_entry *e;
int ret = -EINVAL;
- struct kvm_irq_routing_table *irq_rt;
+ int idx;
trace_kvm_set_irq(irq, level, irq_source_id);
@@ -174,17 +175,15 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
* Since there's no easy way to do this, we only support injecting MSI
* which is limited to 1:1 GSI mapping.
*/
- rcu_read_lock();
- irq_rt = rcu_dereference(kvm->irq_routing);
- if (irq < irq_rt->nr_rt_entries)
- hlist_for_each_entry(e, &irq_rt->map[irq], link) {
- if (likely(e->type == KVM_IRQ_ROUTING_MSI))
- ret = kvm_set_msi_inatomic(e, kvm);
- else
- ret = -EWOULDBLOCK;
- break;
- }
- rcu_read_unlock();
+ idx = srcu_read_lock(&kvm->irq_srcu);
+ if (kvm_irq_map_gsi(kvm, entries, irq) > 0) {
+ e = &entries[0];
+ if (likely(e->type == KVM_IRQ_ROUTING_MSI))
+ ret = kvm_set_msi_inatomic(e, kvm);
+ else
+ ret = -EWOULDBLOCK;
+ }
+ srcu_read_unlock(&kvm->irq_srcu, idx);
return ret;
}
@@ -253,26 +252,25 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
mutex_lock(&kvm->irq_lock);
hlist_del_rcu(&kimn->link);
mutex_unlock(&kvm->irq_lock);
- synchronize_rcu();
+ synchronize_srcu(&kvm->irq_srcu);
}
void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
bool mask)
{
struct kvm_irq_mask_notifier *kimn;
- int gsi;
+ int idx, gsi;
- rcu_read_lock();
- gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
+ idx = srcu_read_lock(&kvm->irq_srcu);
+ gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
if (gsi != -1)
hlist_for_each_entry_rcu(kimn, &kvm->mask_notifier_list, link)
if (kimn->irq == gsi)
kimn->func(kimn, mask);
- rcu_read_unlock();
+ srcu_read_unlock(&kvm->irq_srcu, idx);
}
-int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
- struct kvm_kernel_irq_routing_entry *e,
+int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue)
{
int r = -EINVAL;
@@ -303,7 +301,6 @@ int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
e->irqchip.pin = ue->u.irqchip.pin + delta;
if (e->irqchip.pin >= max_pin)
goto out;
- rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi;
break;
case KVM_IRQ_ROUTING_MSI:
e->set = kvm_set_msi;
@@ -322,13 +319,13 @@ out:
#define IOAPIC_ROUTING_ENTRY(irq) \
{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \
- .u.irqchip.irqchip = KVM_IRQCHIP_IOAPIC, .u.irqchip.pin = (irq) }
+ .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } }
#define ROUTING_ENTRY1(irq) IOAPIC_ROUTING_ENTRY(irq)
#ifdef CONFIG_X86
# define PIC_ROUTING_ENTRY(irq) \
{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \
- .u.irqchip.irqchip = SELECT_PIC(irq), .u.irqchip.pin = (irq) % 8 }
+ .u.irqchip = { .irqchip = SELECT_PIC(irq), .pin = (irq) % 8 } }
# define ROUTING_ENTRY2(irq) \
IOAPIC_ROUTING_ENTRY(irq), PIC_ROUTING_ENTRY(irq)
#else
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 20dc9e4a8f6c..7f256f31df10 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -26,69 +26,47 @@
#include <linux/kvm_host.h>
#include <linux/slab.h>
+#include <linux/srcu.h>
#include <linux/export.h>
#include <trace/events/kvm.h>
#include "irq.h"
-bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
-{
- struct kvm_irq_ack_notifier *kian;
- int gsi;
-
- rcu_read_lock();
- gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
- if (gsi != -1)
- hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
- link)
- if (kian->gsi == gsi) {
- rcu_read_unlock();
- return true;
- }
-
- rcu_read_unlock();
-
- return false;
-}
-EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
+struct kvm_irq_routing_table {
+ int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
+ struct kvm_kernel_irq_routing_entry *rt_entries;
+ u32 nr_rt_entries;
+ /*
+ * Array indexed by gsi. Each entry contains list of irq chips
+ * the gsi is connected to.
+ */
+ struct hlist_head map[0];
+};
-void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
+int kvm_irq_map_gsi(struct kvm *kvm,
+ struct kvm_kernel_irq_routing_entry *entries, int gsi)
{
- struct kvm_irq_ack_notifier *kian;
- int gsi;
-
- trace_kvm_ack_irq(irqchip, pin);
-
- rcu_read_lock();
- gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
- if (gsi != -1)
- hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
- link)
- if (kian->gsi == gsi)
- kian->irq_acked(kian);
- rcu_read_unlock();
-}
+ struct kvm_irq_routing_table *irq_rt;
+ struct kvm_kernel_irq_routing_entry *e;
+ int n = 0;
+
+ irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu,
+ lockdep_is_held(&kvm->irq_lock));
+ if (gsi < irq_rt->nr_rt_entries) {
+ hlist_for_each_entry(e, &irq_rt->map[gsi], link) {
+ entries[n] = *e;
+ ++n;
+ }
+ }
-void kvm_register_irq_ack_notifier(struct kvm *kvm,
- struct kvm_irq_ack_notifier *kian)
-{
- mutex_lock(&kvm->irq_lock);
- hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
- mutex_unlock(&kvm->irq_lock);
-#ifdef __KVM_HAVE_IOAPIC
- kvm_vcpu_request_scan_ioapic(kvm);
-#endif
+ return n;
}
-void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
- struct kvm_irq_ack_notifier *kian)
+int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin)
{
- mutex_lock(&kvm->irq_lock);
- hlist_del_init_rcu(&kian->link);
- mutex_unlock(&kvm->irq_lock);
- synchronize_rcu();
-#ifdef __KVM_HAVE_IOAPIC
- kvm_vcpu_request_scan_ioapic(kvm);
-#endif
+ struct kvm_irq_routing_table *irq_rt;
+
+ irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
+ return irq_rt->chip[irqchip][pin];
}
int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
@@ -114,9 +92,8 @@ int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
bool line_status)
{
- struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS];
- int ret = -1, i = 0;
- struct kvm_irq_routing_table *irq_rt;
+ struct kvm_kernel_irq_routing_entry irq_set[KVM_NR_IRQCHIPS];
+ int ret = -1, i, idx;
trace_kvm_set_irq(irq, level, irq_source_id);
@@ -124,12 +101,9 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
* IOAPIC. So set the bit in both. The guest will ignore
* writes to the unused one.
*/
- rcu_read_lock();
- irq_rt = rcu_dereference(kvm->irq_routing);
- if (irq < irq_rt->nr_rt_entries)
- hlist_for_each_entry(e, &irq_rt->map[irq], link)
- irq_set[i++] = *e;
- rcu_read_unlock();
+ idx = srcu_read_lock(&kvm->irq_srcu);
+ i = kvm_irq_map_gsi(kvm, irq_set, irq);
+ srcu_read_unlock(&kvm->irq_srcu, idx);
while(i--) {
int r;
@@ -170,9 +144,11 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt,
e->gsi = ue->gsi;
e->type = ue->type;
- r = kvm_set_routing_entry(rt, e, ue);
+ r = kvm_set_routing_entry(e, ue);
if (r)
goto out;
+ if (e->type == KVM_IRQ_ROUTING_IRQCHIP)
+ rt->chip[e->irqchip.irqchip][e->irqchip.pin] = e->gsi;
hlist_add_head(&e->link, &rt->map[e->gsi]);
r = 0;
@@ -223,10 +199,11 @@ int kvm_set_irq_routing(struct kvm *kvm,
mutex_lock(&kvm->irq_lock);
old = kvm->irq_routing;
- kvm_irq_routing_update(kvm, new);
+ rcu_assign_pointer(kvm->irq_routing, new);
+ kvm_irq_routing_update(kvm);
mutex_unlock(&kvm->irq_lock);
- synchronize_rcu();
+ synchronize_srcu_expedited(&kvm->irq_srcu);
new = old;
r = 0;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 03a0381b1cb7..4bdadf1a6754 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -95,14 +95,12 @@ static int hardware_enable_all(void);
static void hardware_disable_all(void);
static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
-static void update_memslots(struct kvm_memslots *slots,
- struct kvm_memory_slot *new, u64 last_generation);
static void kvm_release_pfn_dirty(pfn_t pfn);
static void mark_page_dirty_in_slot(struct kvm *kvm,
struct kvm_memory_slot *memslot, gfn_t gfn);
-bool kvm_rebooting;
+__visible bool kvm_rebooting;
EXPORT_SYMBOL_GPL(kvm_rebooting);
static bool largepages_enabled = true;
@@ -110,7 +108,7 @@ static bool largepages_enabled = true;
bool kvm_is_mmio_pfn(pfn_t pfn)
{
if (pfn_valid(pfn))
- return PageReserved(pfn_to_page(pfn));
+ return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn));
return true;
}
@@ -129,7 +127,8 @@ int vcpu_load(struct kvm_vcpu *vcpu)
struct pid *oldpid = vcpu->pid;
struct pid *newpid = get_task_pid(current, PIDTYPE_PID);
rcu_assign_pointer(vcpu->pid, newpid);
- synchronize_rcu();
+ if (oldpid)
+ synchronize_rcu();
put_pid(oldpid);
}
cpu = get_cpu();
@@ -457,14 +456,16 @@ static struct kvm *kvm_create_vm(unsigned long type)
r = kvm_arch_init_vm(kvm, type);
if (r)
- goto out_err_nodisable;
+ goto out_err_no_disable;
r = hardware_enable_all();
if (r)
- goto out_err_nodisable;
+ goto out_err_no_disable;
#ifdef CONFIG_HAVE_KVM_IRQCHIP
INIT_HLIST_HEAD(&kvm->mask_notifier_list);
+#endif
+#ifdef CONFIG_HAVE_KVM_IRQFD
INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
#endif
@@ -473,10 +474,19 @@ static struct kvm *kvm_create_vm(unsigned long type)
r = -ENOMEM;
kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
if (!kvm->memslots)
- goto out_err_nosrcu;
+ goto out_err_no_srcu;
+
+ /*
+ * Init kvm generation close to the maximum to easily test the
+ * code of handling generation number wrap-around.
+ */
+ kvm->memslots->generation = -150;
+
kvm_init_memslots_id(kvm);
if (init_srcu_struct(&kvm->srcu))
- goto out_err_nosrcu;
+ goto out_err_no_srcu;
+ if (init_srcu_struct(&kvm->irq_srcu))
+ goto out_err_no_irq_srcu;
for (i = 0; i < KVM_NR_BUSES; i++) {
kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus),
GFP_KERNEL);
@@ -505,10 +515,12 @@ static struct kvm *kvm_create_vm(unsigned long type)
return kvm;
out_err:
+ cleanup_srcu_struct(&kvm->irq_srcu);
+out_err_no_irq_srcu:
cleanup_srcu_struct(&kvm->srcu);
-out_err_nosrcu:
+out_err_no_srcu:
hardware_disable_all();
-out_err_nodisable:
+out_err_no_disable:
for (i = 0; i < KVM_NR_BUSES; i++)
kfree(kvm->buses[i]);
kfree(kvm->memslots);
@@ -604,6 +616,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
kvm_arch_destroy_vm(kvm);
kvm_destroy_devices(kvm);
kvm_free_physmem(kvm);
+ cleanup_srcu_struct(&kvm->irq_srcu);
cleanup_srcu_struct(&kvm->srcu);
kvm_arch_free_vm(kvm);
hardware_disable_all();
@@ -682,8 +695,7 @@ static void sort_memslots(struct kvm_memslots *slots)
}
static void update_memslots(struct kvm_memslots *slots,
- struct kvm_memory_slot *new,
- u64 last_generation)
+ struct kvm_memory_slot *new)
{
if (new) {
int id = new->id;
@@ -694,15 +706,13 @@ static void update_memslots(struct kvm_memslots *slots,
if (new->npages != npages)
sort_memslots(slots);
}
-
- slots->generation = last_generation + 1;
}
static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
{
u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES;
-#ifdef KVM_CAP_READONLY_MEM
+#ifdef __KVM_HAVE_READONLY_MEM
valid_flags |= KVM_MEM_READONLY;
#endif
@@ -717,10 +727,24 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
{
struct kvm_memslots *old_memslots = kvm->memslots;
- update_memslots(slots, new, kvm->memslots->generation);
+ /*
+ * Set the low bit in the generation, which disables SPTE caching
+ * until the end of synchronize_srcu_expedited.
+ */
+ WARN_ON(old_memslots->generation & 1);
+ slots->generation = old_memslots->generation + 1;
+
+ update_memslots(slots, new);
rcu_assign_pointer(kvm->memslots, slots);
synchronize_srcu_expedited(&kvm->srcu);
+ /*
+ * Increment the new memslot generation a second time. This prevents
+ * vm exits that race with memslot updates from caching a memslot
+ * generation that will (potentially) be valid forever.
+ */
+ slots->generation++;
+
kvm_arch_memslots_updated(kvm);
return old_memslots;
@@ -771,7 +795,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
npages = mem->memory_size >> PAGE_SHIFT;
- r = -EINVAL;
if (npages > KVM_MEM_MAX_NR_PAGES)
goto out;
@@ -785,7 +808,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
new.npages = npages;
new.flags = mem->flags;
- r = -EINVAL;
if (npages) {
if (!old.npages)
change = KVM_MR_CREATE;
@@ -841,7 +863,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
}
if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) {
- r = -ENOMEM;
slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
GFP_KERNEL);
if (!slots)
@@ -1070,9 +1091,9 @@ EXPORT_SYMBOL_GPL(gfn_to_hva);
* If writable is set to false, the hva returned by this function is only
* allowed to be read.
*/
-unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
+unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot,
+ gfn_t gfn, bool *writable)
{
- struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
unsigned long hva = __gfn_to_hva_many(slot, gfn, NULL, false);
if (!kvm_is_error_hva(hva) && writable)
@@ -1081,6 +1102,13 @@ unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
return hva;
}
+unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
+{
+ struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
+
+ return gfn_to_hva_memslot_prot(slot, gfn, writable);
+}
+
static int kvm_read_hva(void *data, void __user *hva, int len)
{
return __copy_from_user(data, hva, len);
@@ -1720,7 +1748,7 @@ bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
rcu_read_lock();
pid = rcu_dereference(target->pid);
if (pid)
- task = get_pid_task(target->pid, PIDTYPE_PID);
+ task = get_pid_task(pid, PIDTYPE_PID);
rcu_read_unlock();
if (!task)
return ret;
@@ -1763,8 +1791,7 @@ static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
bool eligible;
eligible = !vcpu->spin_loop.in_spin_loop ||
- (vcpu->spin_loop.in_spin_loop &&
- vcpu->spin_loop.dy_eligible);
+ vcpu->spin_loop.dy_eligible;
if (vcpu->spin_loop.in_spin_loop)
kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible);
@@ -1804,7 +1831,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
continue;
if (vcpu == me)
continue;
- if (waitqueue_active(&vcpu->wq))
+ if (waitqueue_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu))
continue;
if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
continue;
@@ -2254,6 +2281,33 @@ struct kvm_device *kvm_device_from_filp(struct file *filp)
return filp->private_data;
}
+static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = {
+#ifdef CONFIG_KVM_MPIC
+ [KVM_DEV_TYPE_FSL_MPIC_20] = &kvm_mpic_ops,
+ [KVM_DEV_TYPE_FSL_MPIC_42] = &kvm_mpic_ops,
+#endif
+
+#ifdef CONFIG_KVM_XICS
+ [KVM_DEV_TYPE_XICS] = &kvm_xics_ops,
+#endif
+
+#ifdef CONFIG_S390
+ [KVM_DEV_TYPE_FLIC] = &kvm_flic_ops,
+#endif
+};
+
+int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type)
+{
+ if (type >= ARRAY_SIZE(kvm_device_ops_table))
+ return -ENOSPC;
+
+ if (kvm_device_ops_table[type] != NULL)
+ return -EEXIST;
+
+ kvm_device_ops_table[type] = ops;
+ return 0;
+}
+
static int kvm_ioctl_create_device(struct kvm *kvm,
struct kvm_create_device *cd)
{
@@ -2262,31 +2316,12 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
bool test = cd->flags & KVM_CREATE_DEVICE_TEST;
int ret;
- switch (cd->type) {
-#ifdef CONFIG_KVM_MPIC
- case KVM_DEV_TYPE_FSL_MPIC_20:
- case KVM_DEV_TYPE_FSL_MPIC_42:
- ops = &kvm_mpic_ops;
- break;
-#endif
-#ifdef CONFIG_KVM_XICS
- case KVM_DEV_TYPE_XICS:
- ops = &kvm_xics_ops;
- break;
-#endif
-#ifdef CONFIG_KVM_VFIO
- case KVM_DEV_TYPE_VFIO:
- ops = &kvm_vfio_ops;
- break;
-#endif
-#ifdef CONFIG_KVM_ARM_VGIC
- case KVM_DEV_TYPE_ARM_VGIC_V2:
- ops = &kvm_arm_vgic_v2_ops;
- break;
-#endif
- default:
+ if (cd->type >= ARRAY_SIZE(kvm_device_ops_table))
+ return -ENODEV;
+
+ ops = kvm_device_ops_table[cd->type];
+ if (ops == NULL)
return -ENODEV;
- }
if (test)
return 0;
@@ -2316,6 +2351,34 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
return 0;
}
+static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
+{
+ switch (arg) {
+ case KVM_CAP_USER_MEMORY:
+ case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
+ case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS:
+#ifdef CONFIG_KVM_APIC_ARCHITECTURE
+ case KVM_CAP_SET_BOOT_CPU_ID:
+#endif
+ case KVM_CAP_INTERNAL_ERROR_DATA:
+#ifdef CONFIG_HAVE_KVM_MSI
+ case KVM_CAP_SIGNAL_MSI:
+#endif
+#ifdef CONFIG_HAVE_KVM_IRQFD
+ case KVM_CAP_IRQFD_RESAMPLE:
+#endif
+ case KVM_CAP_CHECK_EXTENSION_VM:
+ return 1;
+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+ case KVM_CAP_IRQ_ROUTING:
+ return KVM_MAX_IRQ_ROUTES;
+#endif
+ default:
+ break;
+ }
+ return kvm_vm_ioctl_check_extension(kvm, arg);
+}
+
static long kvm_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -2479,6 +2542,9 @@ static long kvm_vm_ioctl(struct file *filp,
r = 0;
break;
}
+ case KVM_CHECK_EXTENSION:
+ r = kvm_vm_ioctl_check_extension_generic(kvm, arg);
+ break;
default:
r = kvm_arch_vm_ioctl(filp, ioctl, arg);
if (r == -ENOTTY)
@@ -2563,33 +2629,6 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
return r;
}
-static long kvm_dev_ioctl_check_extension_generic(long arg)
-{
- switch (arg) {
- case KVM_CAP_USER_MEMORY:
- case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
- case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS:
-#ifdef CONFIG_KVM_APIC_ARCHITECTURE
- case KVM_CAP_SET_BOOT_CPU_ID:
-#endif
- case KVM_CAP_INTERNAL_ERROR_DATA:
-#ifdef CONFIG_HAVE_KVM_MSI
- case KVM_CAP_SIGNAL_MSI:
-#endif
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
- case KVM_CAP_IRQFD_RESAMPLE:
-#endif
- return 1;
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
- case KVM_CAP_IRQ_ROUTING:
- return KVM_MAX_IRQ_ROUTES;
-#endif
- default:
- break;
- }
- return kvm_dev_ioctl_check_extension(arg);
-}
-
static long kvm_dev_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -2597,7 +2636,6 @@ static long kvm_dev_ioctl(struct file *filp,
switch (ioctl) {
case KVM_GET_API_VERSION:
- r = -EINVAL;
if (arg)
goto out;
r = KVM_API_VERSION;
@@ -2606,10 +2644,9 @@ static long kvm_dev_ioctl(struct file *filp,
r = kvm_dev_ioctl_create_vm(arg);
break;
case KVM_CHECK_EXTENSION:
- r = kvm_dev_ioctl_check_extension_generic(arg);
+ r = kvm_vm_ioctl_check_extension_generic(NULL, arg);
break;
case KVM_GET_VCPU_MMAP_SIZE:
- r = -EINVAL;
if (arg)
goto out;
r = PAGE_SIZE; /* struct kvm_run */
@@ -2654,7 +2691,7 @@ static void hardware_enable_nolock(void *junk)
cpumask_set_cpu(cpu, cpus_hardware_enabled);
- r = kvm_arch_hardware_enable(NULL);
+ r = kvm_arch_hardware_enable();
if (r) {
cpumask_clear_cpu(cpu, cpus_hardware_enabled);
@@ -2679,7 +2716,7 @@ static void hardware_disable_nolock(void *junk)
if (!cpumask_test_cpu(cpu, cpus_hardware_enabled))
return;
cpumask_clear_cpu(cpu, cpus_hardware_enabled);
- kvm_arch_hardware_disable(NULL);
+ kvm_arch_hardware_disable();
}
static void hardware_disable(void)
@@ -3108,6 +3145,8 @@ static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
if (vcpu->preempted)
vcpu->preempted = false;
+ kvm_arch_sched_in(vcpu, cpu);
+
kvm_arch_vcpu_load(vcpu, cpu);
}
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index b4f9507ae650..4c8a2e17ed81 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -233,6 +233,16 @@ static void kvm_vfio_destroy(struct kvm_device *dev)
kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */
}
+static int kvm_vfio_create(struct kvm_device *dev, u32 type);
+
+static struct kvm_device_ops kvm_vfio_ops = {
+ .name = "kvm-vfio",
+ .create = kvm_vfio_create,
+ .destroy = kvm_vfio_destroy,
+ .set_attr = kvm_vfio_set_attr,
+ .has_attr = kvm_vfio_has_attr,
+};
+
static int kvm_vfio_create(struct kvm_device *dev, u32 type)
{
struct kvm_device *tmp;
@@ -255,10 +265,8 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type)
return 0;
}
-struct kvm_device_ops kvm_vfio_ops = {
- .name = "kvm-vfio",
- .create = kvm_vfio_create,
- .destroy = kvm_vfio_destroy,
- .set_attr = kvm_vfio_set_attr,
- .has_attr = kvm_vfio_has_attr,
-};
+static int __init kvm_vfio_ops_init(void)
+{
+ return kvm_register_device_ops(&kvm_vfio_ops, KVM_DEV_TYPE_VFIO);
+}
+module_init(kvm_vfio_ops_init);