From 0282abf078c3353a178ab77a115828ce333181dd Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 10 Nov 2015 12:11:08 -0700 Subject: vfio/pci: Hide device PCIe capability on non-express buses for PCIe VMs When we have a PCIe VM, such as Q35, guests start to care more about valid configurations of devices relative to the VM view of the PCI topology. Windows will error with a Code 10 for an assigned device if a PCIe capability is found for a device on a conventional bus. We also have the possibility of IOMMUs, like VT-d, where the where the guest may be acutely aware of valid express capabilities on physical hardware. Some devices, like tg3 are adversely affected by this due to driver dependencies on the PCIe capability. The only solution for such devices is to attach them to an express capable bus in the VM. Signed-off-by: Alex Williamson --- hw/vfio/pci.c | 36 +++++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) (limited to 'hw') diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 8fadbcf682..035007f707 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -28,6 +28,7 @@ #include "config.h" #include "hw/pci/msi.h" #include "hw/pci/msix.h" +#include "hw/pci/pci_bridge.h" #include "qemu/error-report.h" #include "qemu/range.h" #include "sysemu/kvm.h" @@ -1524,13 +1525,38 @@ static int vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int pos, uint8_t size) } if (!pci_bus_is_express(vdev->pdev.bus)) { + PCIBus *bus = vdev->pdev.bus; + PCIDevice *bridge; + /* - * Use express capability as-is on PCI bus. It doesn't make much - * sense to even expose, but some drivers (ex. tg3) depend on it - * and guests don't seem to be particular about it. We'll need - * to revist this or force express devices to express buses if we - * ever expose an IOMMU to the guest. + * Traditionally PCI device assignment exposes the PCIe capability + * as-is on non-express buses. The reason being that some drivers + * simply assume that it's there, for example tg3. However when + * we're running on a native PCIe machine type, like Q35, we need + * to hide the PCIe capability. The reason for this is twofold; + * first Windows guests get a Code 10 error when the PCIe capability + * is exposed in this configuration. Therefore express devices won't + * work at all unless they're attached to express buses in the VM. + * Second, a native PCIe machine introduces the possibility of fine + * granularity IOMMUs supporting both translation and isolation. + * Guest code to discover the IOMMU visibility of a device, such as + * IOMMU grouping code on Linux, is very aware of device types and + * valid transitions between bus types. An express device on a non- + * express bus is not a valid combination on bare metal systems. + * + * Drivers that require a PCIe capability to make the device + * functional are simply going to need to have their devices placed + * on a PCIe bus in the VM. */ + while (!pci_bus_is_root(bus)) { + bridge = pci_bridge_get_device(bus); + bus = bridge->bus; + } + + if (pci_bus_is_express(bus)) { + return 0; + } + } else if (pci_bus_is_root(vdev->pdev.bus)) { /* * On a Root Complex bus Endpoints become Root Complex Integrated -- cgit v1.2.3 From bdd81addf4033ce26e6cd180b060f63095f3ded9 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Tue, 10 Nov 2015 12:11:08 -0700 Subject: vfio: Use g_new() & friends where that makes obvious sense g_new(T, n) is neater than g_malloc(sizeof(T) * n). It's also safer, for two reasons. One, it catches multiplication overflowing size_t. Two, it returns T * rather than void *, which lets the compiler catch more type errors. This commit only touches allocations with size arguments of the form sizeof(T). Same Coccinelle semantic patch as in commit b45c03f. Signed-off-by: Markus Armbruster Signed-off-by: Alex Williamson --- hw/vfio/pci-quirks.c | 16 ++++++++-------- hw/vfio/pci.c | 4 ++-- hw/vfio/platform.c | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) (limited to 'hw') diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c index c675d1bc30..30c68a1e2b 100644 --- a/hw/vfio/pci-quirks.c +++ b/hw/vfio/pci-quirks.c @@ -284,7 +284,7 @@ static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev) } quirk = g_malloc0(sizeof(*quirk)); - quirk->mem = g_malloc0(sizeof(MemoryRegion)); + quirk->mem = g_new0(MemoryRegion, 1); quirk->nr_mem = 1; memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_ati_3c3_quirk, vdev, @@ -319,7 +319,7 @@ static void vfio_probe_ati_bar4_quirk(VFIOPCIDevice *vdev, int nr) } quirk = g_malloc0(sizeof(*quirk)); - quirk->mem = g_malloc0(sizeof(MemoryRegion) * 2); + quirk->mem = g_new0(MemoryRegion, 2); quirk->nr_mem = 2; window = quirk->data = g_malloc0(sizeof(*window) + sizeof(VFIOConfigWindowMatch)); @@ -368,7 +368,7 @@ static void vfio_probe_ati_bar2_quirk(VFIOPCIDevice *vdev, int nr) quirk = g_malloc0(sizeof(*quirk)); mirror = quirk->data = g_malloc0(sizeof(*mirror)); - mirror->mem = quirk->mem = g_malloc0(sizeof(MemoryRegion)); + mirror->mem = quirk->mem = g_new0(MemoryRegion, 1); quirk->nr_mem = 1; mirror->vdev = vdev; mirror->offset = 0x4000; @@ -544,7 +544,7 @@ static void vfio_vga_probe_nvidia_3d0_quirk(VFIOPCIDevice *vdev) quirk = g_malloc0(sizeof(*quirk)); quirk->data = data = g_malloc0(sizeof(*data)); - quirk->mem = g_malloc0(sizeof(MemoryRegion) * 2); + quirk->mem = g_new0(MemoryRegion, 2); quirk->nr_mem = 2; data->vdev = vdev; @@ -661,7 +661,7 @@ static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr) } quirk = g_malloc0(sizeof(*quirk)); - quirk->mem = g_malloc0(sizeof(MemoryRegion) * 4); + quirk->mem = g_new0(MemoryRegion, 4); quirk->nr_mem = 4; bar5 = quirk->data = g_malloc0(sizeof(*bar5) + (sizeof(VFIOConfigWindowMatch) * 2)); @@ -756,7 +756,7 @@ static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr) quirk = g_malloc0(sizeof(*quirk)); mirror = quirk->data = g_malloc0(sizeof(*mirror)); - mirror->mem = quirk->mem = g_malloc0(sizeof(MemoryRegion)); + mirror->mem = quirk->mem = g_new0(MemoryRegion, 1); quirk->nr_mem = 1; mirror->vdev = vdev; mirror->offset = 0x88000; @@ -775,7 +775,7 @@ static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr) if (vdev->has_vga) { quirk = g_malloc0(sizeof(*quirk)); mirror = quirk->data = g_malloc0(sizeof(*mirror)); - mirror->mem = quirk->mem = g_malloc0(sizeof(MemoryRegion)); + mirror->mem = quirk->mem = g_new0(MemoryRegion, 1); quirk->nr_mem = 1; mirror->vdev = vdev; mirror->offset = 0x1800; @@ -938,7 +938,7 @@ static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr) } quirk = g_malloc0(sizeof(*quirk)); - quirk->mem = g_malloc0(sizeof(MemoryRegion) * 2); + quirk->mem = g_new0(MemoryRegion, 2); quirk->nr_mem = 2; quirk->data = rtl = g_malloc0(sizeof(*rtl)); rtl->vdev = vdev; diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 035007f707..1fb868c244 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -587,7 +587,7 @@ static void vfio_msix_enable(VFIOPCIDevice *vdev) { vfio_disable_interrupts(vdev); - vdev->msi_vectors = g_malloc0(vdev->msix->entries * sizeof(VFIOMSIVector)); + vdev->msi_vectors = g_new0(VFIOMSIVector, vdev->msix->entries); vdev->interrupt = VFIO_INT_MSIX; @@ -623,7 +623,7 @@ static void vfio_msi_enable(VFIOPCIDevice *vdev) vdev->nr_vectors = msi_nr_vectors_allocated(&vdev->pdev); retry: - vdev->msi_vectors = g_malloc0(vdev->nr_vectors * sizeof(VFIOMSIVector)); + vdev->msi_vectors = g_new0(VFIOMSIVector, vdev->nr_vectors); for (i = 0; i < vdev->nr_vectors; i++) { VFIOMSIVector *vector = &vdev->msi_vectors[i]; diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c index 5c1156ca3b..289b498ca9 100644 --- a/hw/vfio/platform.c +++ b/hw/vfio/platform.c @@ -478,7 +478,7 @@ static int vfio_populate_device(VFIODevice *vbasedev) struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) }; VFIORegion *ptr; - vdev->regions[i] = g_malloc0(sizeof(VFIORegion)); + vdev->regions[i] = g_new0(VFIORegion, 1); ptr = vdev->regions[i]; reg_info.index = i; ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, ®_info); -- cgit v1.2.3