aboutsummaryrefslogtreecommitdiff
path: root/hw/pci
diff options
context:
space:
mode:
Diffstat (limited to 'hw/pci')
-rw-r--r--hw/pci/Kconfig3
-rw-r--r--hw/pci/meson.build11
-rw-r--r--hw/pci/msi.c62
-rw-r--r--hw/pci/msix.c59
-rw-r--r--hw/pci/pci-hmp-cmds.c239
-rw-r--r--hw/pci/pci-internal.h24
-rw-r--r--hw/pci/pci-qmp-cmds.c199
-rw-r--r--hw/pci/pci-stub.c9
-rw-r--r--hw/pci/pci.c918
-rw-r--r--hw/pci/pci_bridge.c63
-rw-r--r--hw/pci/pci_host.c37
-rw-r--r--hw/pci/pcie.c213
-rw-r--r--hw/pci/pcie_aer.c144
-rw-r--r--hw/pci/pcie_doe.c367
-rw-r--r--hw/pci/pcie_host.c2
-rw-r--r--hw/pci/pcie_port.c74
-rw-r--r--hw/pci/pcie_sriov.c318
-rw-r--r--hw/pci/shpc.c169
-rw-r--r--hw/pci/slotid_cap.c2
-rw-r--r--hw/pci/trace-events20
20 files changed, 2198 insertions, 735 deletions
diff --git a/hw/pci/Kconfig b/hw/pci/Kconfig
index 77f8b005ff..fe70902cd8 100644
--- a/hw/pci/Kconfig
+++ b/hw/pci/Kconfig
@@ -8,6 +8,9 @@ config PCI_EXPRESS
config PCI_DEVICES
bool
+config PCIE_DEVICES
+ bool
+
config MSI_NONBROKEN
# selected by interrupt controllers that do not support MSI,
# or support it and have a good implementation. See commit
diff --git a/hw/pci/meson.build b/hw/pci/meson.build
index 5c4bbac817..b9c34b2acf 100644
--- a/hw/pci/meson.build
+++ b/hw/pci/meson.build
@@ -5,6 +5,9 @@ pci_ss.add(files(
'pci.c',
'pci_bridge.c',
'pci_host.c',
+ 'pci-hmp-cmds.c',
+ 'pci-qmp-cmds.c',
+ 'pcie_sriov.c',
'shpc.c',
'slotid_cap.c'
))
@@ -12,8 +15,8 @@ pci_ss.add(files(
# allow plugging PCIe devices into PCI buses, include them even if
# CONFIG_PCI_EXPRESS=n.
pci_ss.add(files('pcie.c', 'pcie_aer.c'))
-softmmu_ss.add(when: 'CONFIG_PCI_EXPRESS', if_true: files('pcie_port.c', 'pcie_host.c'))
-softmmu_ss.add_all(when: 'CONFIG_PCI', if_true: pci_ss)
+pci_ss.add(files('pcie_doe.c'))
+system_ss.add(when: 'CONFIG_PCI_EXPRESS', if_true: files('pcie_port.c', 'pcie_host.c'))
+system_ss.add_all(when: 'CONFIG_PCI', if_true: pci_ss)
-softmmu_ss.add(when: 'CONFIG_PCI', if_false: files('pci-stub.c'))
-softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('pci-stub.c'))
+system_ss.add(when: 'CONFIG_PCI', if_false: files('pci-stub.c'))
diff --git a/hw/pci/msi.c b/hw/pci/msi.c
index 47d2b0f33c..8104ac1d91 100644
--- a/hw/pci/msi.c
+++ b/hw/pci/msi.c
@@ -23,6 +23,9 @@
#include "hw/xen/xen.h"
#include "qemu/range.h"
#include "qapi/error.h"
+#include "sysemu/xen.h"
+
+#include "hw/i386/kvm/xen_evtchn.h"
/* PCI_MSI_ADDRESS_LO */
#define PCI_MSI_ADDRESS_LO_MASK (~0x3)
@@ -134,7 +137,7 @@ void msi_set_message(PCIDevice *dev, MSIMessage msg)
pci_set_word(dev->config + msi_data_off(dev, msi64bit), msg.data);
}
-MSIMessage msi_get_message(PCIDevice *dev, unsigned int vector)
+static MSIMessage msi_prepare_message(PCIDevice *dev, unsigned int vector)
{
uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
@@ -159,6 +162,11 @@ MSIMessage msi_get_message(PCIDevice *dev, unsigned int vector)
return msg;
}
+MSIMessage msi_get_message(PCIDevice *dev, unsigned int vector)
+{
+ return dev->msi_prepare_message(dev, vector);
+}
+
bool msi_enabled(const PCIDevice *dev)
{
return msi_present(dev) &&
@@ -241,6 +249,8 @@ int msi_init(struct PCIDevice *dev, uint8_t offset,
0xffffffff >> (PCI_MSI_VECTORS_MAX - nr_vectors));
}
+ dev->msi_prepare_message = msi_prepare_message;
+
return 0;
}
@@ -256,6 +266,7 @@ void msi_uninit(struct PCIDevice *dev)
cap_size = msi_cap_sizeof(flags);
pci_del_capability(dev, PCI_CAP_ID_MSI, cap_size);
dev->cap_present &= ~QEMU_PCI_CAP_MSI;
+ dev->msi_prepare_message = NULL;
MSI_DEV_PRINTF(dev, "uninit\n");
}
@@ -298,7 +309,7 @@ bool msi_is_masked(const PCIDevice *dev, unsigned int vector)
}
data = pci_get_word(dev->config + msi_data_off(dev, msi64bit));
- if (xen_is_pirq_msi(data)) {
+ if (xen_enabled() && xen_is_pirq_msi(data)) {
return false;
}
@@ -307,6 +318,38 @@ bool msi_is_masked(const PCIDevice *dev, unsigned int vector)
return mask & (1U << vector);
}
+void msi_set_mask(PCIDevice *dev, int vector, bool mask, Error **errp)
+{
+ uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
+ bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
+ uint32_t irq_state, vector_mask, pending;
+
+ if (vector >= PCI_MSI_VECTORS_MAX) {
+ error_setg(errp, "msi: vector %d not allocated. max vector is %d",
+ vector, (PCI_MSI_VECTORS_MAX - 1));
+ return;
+ }
+
+ vector_mask = (1U << vector);
+
+ irq_state = pci_get_long(dev->config + msi_mask_off(dev, msi64bit));
+
+ if (mask) {
+ irq_state |= vector_mask;
+ } else {
+ irq_state &= ~vector_mask;
+ }
+
+ pci_set_long(dev->config + msi_mask_off(dev, msi64bit), irq_state);
+
+ pending = pci_get_long(dev->config + msi_pending_off(dev, msi64bit));
+ if (!mask && (pending & vector_mask)) {
+ pending &= ~vector_mask;
+ pci_set_long(dev->config + msi_pending_off(dev, msi64bit), pending);
+ msi_notify(dev, vector);
+ }
+}
+
void msi_notify(PCIDevice *dev, unsigned int vector)
{
uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
@@ -334,11 +377,7 @@ void msi_notify(PCIDevice *dev, unsigned int vector)
void msi_send_message(PCIDevice *dev, MSIMessage msg)
{
- MemTxAttrs attrs = {};
-
- attrs.requester_id = pci_requester_id(dev);
- address_space_stl_le(&dev->bus_master_as, msg.address, msg.data,
- attrs, NULL);
+ dev->msi_trigger(dev, msg);
}
/* Normally called by pci_default_write_config(). */
@@ -378,6 +417,15 @@ void msi_write_config(PCIDevice *dev, uint32_t addr, uint32_t val, int len)
fprintf(stderr, "\n");
#endif
+ if (xen_mode == XEN_EMULATE) {
+ for (vector = 0; vector < msi_nr_vectors(flags); vector++) {
+ MSIMessage msg = msi_prepare_message(dev, vector);
+
+ xen_evtchn_snoop_msi(dev, false, vector, msg.address, msg.data,
+ msi_is_masked(dev, vector));
+ }
+ }
+
if (!(flags & PCI_MSI_FLAGS_ENABLE)) {
return;
}
diff --git a/hw/pci/msix.c b/hw/pci/msix.c
index ae9331cd0b..487e49834e 100644
--- a/hw/pci/msix.c
+++ b/hw/pci/msix.c
@@ -26,12 +26,14 @@
#include "qapi/error.h"
#include "trace.h"
+#include "hw/i386/kvm/xen_evtchn.h"
+
/* MSI enable bit and maskall bit are in byte 1 in FLAGS register */
#define MSIX_CONTROL_OFFSET (PCI_MSIX_FLAGS + 1)
#define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
#define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8)
-MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
+static MSIMessage msix_prepare_message(PCIDevice *dev, unsigned vector)
{
uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE;
MSIMessage msg;
@@ -41,6 +43,11 @@ MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
return msg;
}
+MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
+{
+ return dev->msix_prepare_message(dev, vector);
+}
+
/*
* Special API for POWER to configure the vectors through
* a side channel. Should never be used by devices.
@@ -119,6 +126,13 @@ static void msix_handle_mask_update(PCIDevice *dev, int vector, bool was_masked)
{
bool is_masked = msix_is_masked(dev, vector);
+ if (xen_mode == XEN_EMULATE) {
+ MSIMessage msg = msix_prepare_message(dev, vector);
+
+ xen_evtchn_snoop_msi(dev, true, vector, msg.address, msg.data,
+ is_masked);
+ }
+
if (is_masked == was_masked) {
return;
}
@@ -131,6 +145,26 @@ static void msix_handle_mask_update(PCIDevice *dev, int vector, bool was_masked)
}
}
+void msix_set_mask(PCIDevice *dev, int vector, bool mask)
+{
+ unsigned offset;
+ bool was_masked;
+
+ assert(vector < dev->msix_entries_nr);
+
+ offset = vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL;
+
+ was_masked = msix_is_masked(dev, vector);
+
+ if (mask) {
+ dev->msix_table[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
+ } else {
+ dev->msix_table[offset] &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
+ }
+
+ msix_handle_mask_update(dev, vector, was_masked);
+}
+
static bool msix_masked(PCIDevice *dev)
{
return dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & MSIX_MASKALL_MASK;
@@ -344,6 +378,8 @@ int msix_init(struct PCIDevice *dev, unsigned short nentries,
"msix-pba", pba_size);
memory_region_add_subregion(pba_bar, pba_offset, &dev->msix_pba_mmio);
+ dev->msix_prepare_message = msix_prepare_message;
+
return 0;
}
@@ -429,6 +465,7 @@ void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar, MemoryRegion *pba_bar)
g_free(dev->msix_entry_used);
dev->msix_entry_used = NULL;
dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
+ dev->msix_prepare_message = NULL;
}
void msix_uninit_exclusive_bar(PCIDevice *dev)
@@ -489,7 +526,9 @@ void msix_notify(PCIDevice *dev, unsigned vector)
{
MSIMessage msg;
- if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) {
+ assert(vector < dev->msix_entries_nr);
+
+ if (!dev->msix_entry_used[vector]) {
return;
}
@@ -525,20 +564,17 @@ void msix_reset(PCIDevice *dev)
* don't want to follow the spec suggestion can declare all vectors as used. */
/* Mark vector as used. */
-int msix_vector_use(PCIDevice *dev, unsigned vector)
+void msix_vector_use(PCIDevice *dev, unsigned vector)
{
- if (vector >= dev->msix_entries_nr) {
- return -EINVAL;
- }
-
+ assert(vector < dev->msix_entries_nr);
dev->msix_entry_used[vector]++;
- return 0;
}
/* Mark vector as unused. */
void msix_vector_unuse(PCIDevice *dev, unsigned vector)
{
- if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) {
+ assert(vector < dev->msix_entries_nr);
+ if (!dev->msix_entry_used[vector]) {
return;
}
if (--dev->msix_entry_used[vector]) {
@@ -612,6 +648,7 @@ undo:
}
dev->msix_vector_use_notifier = NULL;
dev->msix_vector_release_notifier = NULL;
+ dev->msix_vector_poll_notifier = NULL;
return ret;
}
@@ -648,7 +685,7 @@ static int get_msix_state(QEMUFile *f, void *pv, size_t size,
return 0;
}
-static VMStateInfo vmstate_info_msix = {
+static const VMStateInfo vmstate_info_msix = {
.name = "msix state",
.get = get_msix_state,
.put = put_msix_state,
@@ -656,7 +693,7 @@ static VMStateInfo vmstate_info_msix = {
const VMStateDescription vmstate_msix = {
.name = "msix",
- .fields = (VMStateField[]) {
+ .fields = (const VMStateField[]) {
{
.name = "msix",
.version_id = 0,
diff --git a/hw/pci/pci-hmp-cmds.c b/hw/pci/pci-hmp-cmds.c
new file mode 100644
index 0000000000..b09fce9377
--- /dev/null
+++ b/hw/pci/pci-hmp-cmds.c
@@ -0,0 +1,239 @@
+/*
+ * HMP commands related to PCI
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/pci/pci.h"
+#include "hw/pci/pci_device.h"
+#include "monitor/hmp.h"
+#include "monitor/monitor.h"
+#include "pci-internal.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qapi-commands-pci.h"
+#include "qemu/cutils.h"
+
+static void hmp_info_pci_device(Monitor *mon, const PciDeviceInfo *dev)
+{
+ PciMemoryRegionList *region;
+
+ monitor_printf(mon, " Bus %2" PRId64 ", ", dev->bus);
+ monitor_printf(mon, "device %3" PRId64 ", function %" PRId64 ":\n",
+ dev->slot, dev->function);
+ monitor_printf(mon, " ");
+
+ if (dev->class_info->desc) {
+ monitor_puts(mon, dev->class_info->desc);
+ } else {
+ monitor_printf(mon, "Class %04" PRId64, dev->class_info->q_class);
+ }
+
+ monitor_printf(mon, ": PCI device %04" PRIx64 ":%04" PRIx64 "\n",
+ dev->id->vendor, dev->id->device);
+ if (dev->id->has_subsystem_vendor && dev->id->has_subsystem) {
+ monitor_printf(mon, " PCI subsystem %04" PRIx64 ":%04" PRIx64 "\n",
+ dev->id->subsystem_vendor, dev->id->subsystem);
+ }
+
+ if (dev->has_irq) {
+ monitor_printf(mon, " IRQ %" PRId64 ", pin %c\n",
+ dev->irq, (char)('A' + dev->irq_pin - 1));
+ }
+
+ if (dev->pci_bridge) {
+ monitor_printf(mon, " BUS %" PRId64 ".\n",
+ dev->pci_bridge->bus->number);
+ monitor_printf(mon, " secondary bus %" PRId64 ".\n",
+ dev->pci_bridge->bus->secondary);
+ monitor_printf(mon, " subordinate bus %" PRId64 ".\n",
+ dev->pci_bridge->bus->subordinate);
+
+ monitor_printf(mon, " IO range [0x%04"PRIx64", 0x%04"PRIx64"]\n",
+ dev->pci_bridge->bus->io_range->base,
+ dev->pci_bridge->bus->io_range->limit);
+
+ monitor_printf(mon,
+ " memory range [0x%08"PRIx64", 0x%08"PRIx64"]\n",
+ dev->pci_bridge->bus->memory_range->base,
+ dev->pci_bridge->bus->memory_range->limit);
+
+ monitor_printf(mon, " prefetchable memory range "
+ "[0x%08"PRIx64", 0x%08"PRIx64"]\n",
+ dev->pci_bridge->bus->prefetchable_range->base,
+ dev->pci_bridge->bus->prefetchable_range->limit);
+ }
+
+ for (region = dev->regions; region; region = region->next) {
+ uint64_t addr, size;
+
+ addr = region->value->address;
+ size = region->value->size;
+
+ monitor_printf(mon, " BAR%" PRId64 ": ", region->value->bar);
+
+ if (!strcmp(region->value->type, "io")) {
+ monitor_printf(mon, "I/O at 0x%04" PRIx64
+ " [0x%04" PRIx64 "].\n",
+ addr, addr + size - 1);
+ } else {
+ monitor_printf(mon, "%d bit%s memory at 0x%08" PRIx64
+ " [0x%08" PRIx64 "].\n",
+ region->value->mem_type_64 ? 64 : 32,
+ region->value->prefetch ? " prefetchable" : "",
+ addr, addr + size - 1);
+ }
+ }
+
+ monitor_printf(mon, " id \"%s\"\n", dev->qdev_id);
+
+ if (dev->pci_bridge) {
+ if (dev->pci_bridge->has_devices) {
+ PciDeviceInfoList *cdev;
+ for (cdev = dev->pci_bridge->devices; cdev; cdev = cdev->next) {
+ hmp_info_pci_device(mon, cdev->value);
+ }
+ }
+ }
+}
+
+void hmp_info_pci(Monitor *mon, const QDict *qdict)
+{
+ PciInfoList *info_list, *info;
+
+ info_list = qmp_query_pci(&error_abort);
+
+ for (info = info_list; info; info = info->next) {
+ PciDeviceInfoList *dev;
+
+ for (dev = info->value->devices; dev; dev = dev->next) {
+ hmp_info_pci_device(mon, dev->value);
+ }
+ }
+
+ qapi_free_PciInfoList(info_list);
+}
+
+void pcibus_dev_print(Monitor *mon, DeviceState *dev, int indent)
+{
+ PCIDevice *d = (PCIDevice *)dev;
+ int class = pci_get_word(d->config + PCI_CLASS_DEVICE);
+ const pci_class_desc *desc = get_class_desc(class);
+ char ctxt[64];
+ PCIIORegion *r;
+ int i;
+
+ if (desc->desc) {
+ snprintf(ctxt, sizeof(ctxt), "%s", desc->desc);
+ } else {
+ snprintf(ctxt, sizeof(ctxt), "Class %04x", class);
+ }
+
+ monitor_printf(mon, "%*sclass %s, addr %02x:%02x.%x, "
+ "pci id %04x:%04x (sub %04x:%04x)\n",
+ indent, "", ctxt, pci_dev_bus_num(d),
+ PCI_SLOT(d->devfn), PCI_FUNC(d->devfn),
+ pci_get_word(d->config + PCI_VENDOR_ID),
+ pci_get_word(d->config + PCI_DEVICE_ID),
+ pci_get_word(d->config + PCI_SUBSYSTEM_VENDOR_ID),
+ pci_get_word(d->config + PCI_SUBSYSTEM_ID));
+ for (i = 0; i < PCI_NUM_REGIONS; i++) {
+ r = &d->io_regions[i];
+ if (!r->size) {
+ continue;
+ }
+ monitor_printf(mon, "%*sbar %d: %s at 0x%"FMT_PCIBUS
+ " [0x%"FMT_PCIBUS"]\n",
+ indent, "",
+ i, r->type & PCI_BASE_ADDRESS_SPACE_IO ? "i/o" : "mem",
+ r->addr, r->addr + r->size - 1);
+ }
+}
+
+void hmp_pcie_aer_inject_error(Monitor *mon, const QDict *qdict)
+{
+ Error *err = NULL;
+ const char *id = qdict_get_str(qdict, "id");
+ const char *error_name;
+ uint32_t error_status;
+ unsigned int num;
+ bool correctable;
+ PCIDevice *dev;
+ PCIEAERErr aer_err;
+ int ret;
+
+ ret = pci_qdev_find_device(id, &dev);
+ if (ret == -ENODEV) {
+ error_setg(&err, "device '%s' not found", id);
+ goto out;
+ }
+ if (ret < 0 || !pci_is_express(dev)) {
+ error_setg(&err, "device '%s' is not a PCIe device", id);
+ goto out;
+ }
+
+ error_name = qdict_get_str(qdict, "error_status");
+ if (pcie_aer_parse_error_string(error_name, &error_status, &correctable)) {
+ if (qemu_strtoui(error_name, NULL, 0, &num) < 0) {
+ error_setg(&err, "invalid error status value '%s'", error_name);
+ goto out;
+ }
+ error_status = num;
+ correctable = qdict_get_try_bool(qdict, "correctable", false);
+ } else {
+ if (qdict_haskey(qdict, "correctable")) {
+ error_setg(&err, "-c is only valid with numeric error status");
+ goto out;
+ }
+ }
+ aer_err.status = error_status;
+ aer_err.source_id = pci_requester_id(dev);
+
+ aer_err.flags = 0;
+ if (correctable) {
+ aer_err.flags |= PCIE_AER_ERR_IS_CORRECTABLE;
+ }
+ if (qdict_get_try_bool(qdict, "advisory_non_fatal", false)) {
+ aer_err.flags |= PCIE_AER_ERR_MAYBE_ADVISORY;
+ }
+ if (qdict_haskey(qdict, "header0")) {
+ aer_err.flags |= PCIE_AER_ERR_HEADER_VALID;
+ }
+ if (qdict_haskey(qdict, "prefix0")) {
+ aer_err.flags |= PCIE_AER_ERR_TLP_PREFIX_PRESENT;
+ }
+
+ aer_err.header[0] = qdict_get_try_int(qdict, "header0", 0);
+ aer_err.header[1] = qdict_get_try_int(qdict, "header1", 0);
+ aer_err.header[2] = qdict_get_try_int(qdict, "header2", 0);
+ aer_err.header[3] = qdict_get_try_int(qdict, "header3", 0);
+
+ aer_err.prefix[0] = qdict_get_try_int(qdict, "prefix0", 0);
+ aer_err.prefix[1] = qdict_get_try_int(qdict, "prefix1", 0);
+ aer_err.prefix[2] = qdict_get_try_int(qdict, "prefix2", 0);
+ aer_err.prefix[3] = qdict_get_try_int(qdict, "prefix3", 0);
+
+ ret = pcie_aer_inject_error(dev, &aer_err);
+ if (ret < 0) {
+ error_setg_errno(&err, -ret, "failed to inject error");
+ goto out;
+ }
+
+
+ monitor_printf(mon, "OK id: %s root bus: %s, bus: %x devfn: %x.%x\n",
+ id, pci_root_bus_path(dev), pci_dev_bus_num(dev),
+ PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
+
+out:
+ hmp_handle_error(mon, err);
+}
diff --git a/hw/pci/pci-internal.h b/hw/pci/pci-internal.h
new file mode 100644
index 0000000000..a7d6d8a732
--- /dev/null
+++ b/hw/pci/pci-internal.h
@@ -0,0 +1,24 @@
+#ifndef HW_PCI_PCI_INTERNAL_H
+#define HW_PCI_PCI_INTERNAL_H
+
+#include "qemu/queue.h"
+
+typedef struct {
+ uint16_t class;
+ const char *desc;
+ const char *fw_name;
+ uint16_t fw_ign_bits;
+} pci_class_desc;
+
+typedef QLIST_HEAD(, PCIHostState) PCIHostStateList;
+
+extern PCIHostStateList pci_host_bridges;
+
+const pci_class_desc *get_class_desc(int class);
+PCIBus *pci_find_bus_nr(PCIBus *bus, int bus_num);
+void pcibus_dev_print(Monitor *mon, DeviceState *dev, int indent);
+
+int pcie_aer_parse_error_string(const char *error_name,
+ uint32_t *status, bool *correctable);
+
+#endif
diff --git a/hw/pci/pci-qmp-cmds.c b/hw/pci/pci-qmp-cmds.c
new file mode 100644
index 0000000000..5d9f4817f5
--- /dev/null
+++ b/hw/pci/pci-qmp-cmds.c
@@ -0,0 +1,199 @@
+/*
+ * QMP commands related to PCI
+ *
+ * Copyright (c) 2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/pci/pci.h"
+#include "hw/pci/pci_bridge.h"
+#include "pci-internal.h"
+#include "qapi/qapi-commands-pci.h"
+
+static PciDeviceInfoList *qmp_query_pci_devices(PCIBus *bus, int bus_num);
+
+static PciMemoryRegionList *qmp_query_pci_regions(const PCIDevice *dev)
+{
+ PciMemoryRegionList *head = NULL, **tail = &head;
+ int i;
+
+ for (i = 0; i < PCI_NUM_REGIONS; i++) {
+ const PCIIORegion *r = &dev->io_regions[i];
+ PciMemoryRegion *region;
+
+ if (!r->size) {
+ continue;
+ }
+
+ region = g_malloc0(sizeof(*region));
+
+ if (r->type & PCI_BASE_ADDRESS_SPACE_IO) {
+ region->type = g_strdup("io");
+ } else {
+ region->type = g_strdup("memory");
+ region->has_prefetch = true;
+ region->prefetch = !!(r->type & PCI_BASE_ADDRESS_MEM_PREFETCH);
+ region->has_mem_type_64 = true;
+ region->mem_type_64 = !!(r->type & PCI_BASE_ADDRESS_MEM_TYPE_64);
+ }
+
+ region->bar = i;
+ region->address = r->addr;
+ region->size = r->size;
+
+ QAPI_LIST_APPEND(tail, region);
+ }
+
+ return head;
+}
+
+static PciBridgeInfo *qmp_query_pci_bridge(PCIDevice *dev, PCIBus *bus,
+ int bus_num)
+{
+ PciBridgeInfo *info;
+ PciMemoryRange *range;
+
+ info = g_new0(PciBridgeInfo, 1);
+
+ info->bus = g_new0(PciBusInfo, 1);
+ info->bus->number = dev->config[PCI_PRIMARY_BUS];
+ info->bus->secondary = dev->config[PCI_SECONDARY_BUS];
+ info->bus->subordinate = dev->config[PCI_SUBORDINATE_BUS];
+
+ range = info->bus->io_range = g_new0(PciMemoryRange, 1);
+ range->base = pci_bridge_get_base(dev, PCI_BASE_ADDRESS_SPACE_IO);
+ range->limit = pci_bridge_get_limit(dev, PCI_BASE_ADDRESS_SPACE_IO);
+
+ range = info->bus->memory_range = g_new0(PciMemoryRange, 1);
+ range->base = pci_bridge_get_base(dev, PCI_BASE_ADDRESS_SPACE_MEMORY);
+ range->limit = pci_bridge_get_limit(dev, PCI_BASE_ADDRESS_SPACE_MEMORY);
+
+ range = info->bus->prefetchable_range = g_new0(PciMemoryRange, 1);
+ range->base = pci_bridge_get_base(dev, PCI_BASE_ADDRESS_MEM_PREFETCH);
+ range->limit = pci_bridge_get_limit(dev, PCI_BASE_ADDRESS_MEM_PREFETCH);
+
+ if (dev->config[PCI_SECONDARY_BUS] != 0) {
+ PCIBus *child_bus = pci_find_bus_nr(bus,
+ dev->config[PCI_SECONDARY_BUS]);
+ if (child_bus) {
+ info->has_devices = true;
+ info->devices = qmp_query_pci_devices(child_bus,
+ dev->config[PCI_SECONDARY_BUS]);
+ }
+ }
+
+ return info;
+}
+
+static PciDeviceInfo *qmp_query_pci_device(PCIDevice *dev, PCIBus *bus,
+ int bus_num)
+{
+ const pci_class_desc *desc;
+ PciDeviceInfo *info;
+ uint8_t type;
+ int class;
+
+ info = g_new0(PciDeviceInfo, 1);
+ info->bus = bus_num;
+ info->slot = PCI_SLOT(dev->devfn);
+ info->function = PCI_FUNC(dev->devfn);
+
+ info->class_info = g_new0(PciDeviceClass, 1);
+ class = pci_get_word(dev->config + PCI_CLASS_DEVICE);
+ info->class_info->q_class = class;
+ desc = get_class_desc(class);
+ if (desc->desc) {
+ info->class_info->desc = g_strdup(desc->desc);
+ }
+
+ info->id = g_new0(PciDeviceId, 1);
+ info->id->vendor = pci_get_word(dev->config + PCI_VENDOR_ID);
+ info->id->device = pci_get_word(dev->config + PCI_DEVICE_ID);
+ info->regions = qmp_query_pci_regions(dev);
+ info->qdev_id = g_strdup(dev->qdev.id ? dev->qdev.id : "");
+
+ info->irq_pin = dev->config[PCI_INTERRUPT_PIN];
+ if (dev->config[PCI_INTERRUPT_PIN] != 0) {
+ info->has_irq = true;
+ info->irq = dev->config[PCI_INTERRUPT_LINE];
+ }
+
+ type = dev->config[PCI_HEADER_TYPE] & ~PCI_HEADER_TYPE_MULTI_FUNCTION;
+ if (type == PCI_HEADER_TYPE_BRIDGE) {
+ info->pci_bridge = qmp_query_pci_bridge(dev, bus, bus_num);
+ } else if (type == PCI_HEADER_TYPE_NORMAL) {
+ info->id->has_subsystem = info->id->has_subsystem_vendor = true;
+ info->id->subsystem = pci_get_word(dev->config + PCI_SUBSYSTEM_ID);
+ info->id->subsystem_vendor =
+ pci_get_word(dev->config + PCI_SUBSYSTEM_VENDOR_ID);
+ } else if (type == PCI_HEADER_TYPE_CARDBUS) {
+ info->id->has_subsystem = info->id->has_subsystem_vendor = true;
+ info->id->subsystem = pci_get_word(dev->config + PCI_CB_SUBSYSTEM_ID);
+ info->id->subsystem_vendor =
+ pci_get_word(dev->config + PCI_CB_SUBSYSTEM_VENDOR_ID);
+ }
+
+ return info;
+}
+
+static PciDeviceInfoList *qmp_query_pci_devices(PCIBus *bus, int bus_num)
+{
+ PciDeviceInfoList *head = NULL, **tail = &head;
+ PCIDevice *dev;
+ int devfn;
+
+ for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) {
+ dev = bus->devices[devfn];
+ if (dev) {
+ QAPI_LIST_APPEND(tail, qmp_query_pci_device(dev, bus, bus_num));
+ }
+ }
+
+ return head;
+}
+
+static PciInfo *qmp_query_pci_bus(PCIBus *bus, int bus_num)
+{
+ PciInfo *info = NULL;
+
+ bus = pci_find_bus_nr(bus, bus_num);
+ if (bus) {
+ info = g_malloc0(sizeof(*info));
+ info->bus = bus_num;
+ info->devices = qmp_query_pci_devices(bus, bus_num);
+ }
+
+ return info;
+}
+
+PciInfoList *qmp_query_pci(Error **errp)
+{
+ PciInfoList *head = NULL, **tail = &head;
+ PCIHostState *host_bridge;
+
+ QLIST_FOREACH(host_bridge, &pci_host_bridges, next) {
+ QAPI_LIST_APPEND(tail,
+ qmp_query_pci_bus(host_bridge->bus,
+ pci_bus_num(host_bridge->bus)));
+ }
+
+ return head;
+}
diff --git a/hw/pci/pci-stub.c b/hw/pci/pci-stub.c
index 3a027c42e4..f0508682d2 100644
--- a/hw/pci/pci-stub.c
+++ b/hw/pci/pci-stub.c
@@ -19,11 +19,9 @@
*/
#include "qemu/osdep.h"
-#include "sysemu/sysemu.h"
#include "monitor/monitor.h"
-#include "qapi/error.h"
+#include "monitor/hmp.h"
#include "qapi/qapi-commands-pci.h"
-#include "qapi/qmp/qerror.h"
#include "hw/pci/pci.h"
#include "hw/pci/msi.h"
#include "hw/pci/msix.h"
@@ -33,10 +31,13 @@ bool pci_available;
PciInfoList *qmp_query_pci(Error **errp)
{
- error_setg(errp, QERR_UNSUPPORTED);
return NULL;
}
+void hmp_info_pci(Monitor *mon, const QDict *qdict)
+{
+}
+
void hmp_pcie_aer_inject_error(Monitor *mon, const QDict *qdict)
{
monitor_printf(mon, "PCI devices not supported\n");
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 23d2ae2ab2..e7a39cb203 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -23,7 +23,6 @@
*/
#include "qemu/osdep.h"
-#include "qemu-common.h"
#include "qemu/datadir.h"
#include "qemu/units.h"
#include "hw/irq.h"
@@ -35,9 +34,9 @@
#include "hw/qdev-properties-system.h"
#include "migration/qemu-file-types.h"
#include "migration/vmstate.h"
-#include "monitor/monitor.h"
#include "net/net.h"
#include "sysemu/numa.h"
+#include "sysemu/runstate.h"
#include "sysemu/sysemu.h"
#include "hw/loader.h"
#include "qemu/error-report.h"
@@ -48,8 +47,11 @@
#include "hw/hotplug.h"
#include "hw/boards.h"
#include "qapi/error.h"
-#include "qapi/qapi-commands-pci.h"
#include "qemu/cutils.h"
+#include "pci-internal.h"
+
+#include "hw/xen/xen.h"
+#include "hw/i386/kvm/xen_evtchn.h"
//#define DEBUG_PCI
#ifdef DEBUG_PCI
@@ -60,10 +62,10 @@
bool pci_available = true;
-static void pcibus_dev_print(Monitor *mon, DeviceState *dev, int indent);
static char *pcibus_get_dev_path(DeviceState *dev);
static char *pcibus_get_fw_dev_path(DeviceState *dev);
-static void pcibus_reset(BusState *qbus);
+static void pcibus_reset_hold(Object *obj);
+static bool pcie_has_upstream_port(PCIDevice *dev);
static Property pci_props[] = {
DEFINE_PROP_PCI_DEVFN("addr", PCIDevice, devfn, -1),
@@ -79,6 +81,10 @@ static Property pci_props[] = {
DEFINE_PROP_STRING("failover_pair_id", PCIDevice,
failover_pair_id),
DEFINE_PROP_UINT32("acpi-index", PCIDevice, acpi_index, 0),
+ DEFINE_PROP_BIT("x-pcie-err-unc-mask", PCIDevice, cap_present,
+ QEMU_PCIE_ERR_UNC_MASK_BITNR, true),
+ DEFINE_PROP_BIT("x-pcie-ari-nextfn-1", PCIDevice, cap_present,
+ QEMU_PCIE_ARI_NEXTFN_1_BITNR, false),
DEFINE_PROP_END_OF_LIST()
};
@@ -86,7 +92,7 @@ static const VMStateDescription vmstate_pcibus = {
.name = "PCIBUS",
.version_id = 1,
.minimum_version_id = 1,
- .fields = (VMStateField[]) {
+ .fields = (const VMStateField[]) {
VMSTATE_INT32_EQUAL(nirq, PCIBus, NULL),
VMSTATE_VARRAY_INT32(irq_count, PCIBus,
nirq, 0, vmstate_info_int32,
@@ -95,6 +101,21 @@ static const VMStateDescription vmstate_pcibus = {
}
};
+static gint g_cmp_uint32(gconstpointer a, gconstpointer b, gpointer user_data)
+{
+ return a - b;
+}
+
+static GSequence *pci_acpi_index_list(void)
+{
+ static GSequence *used_acpi_index_list;
+
+ if (!used_acpi_index_list) {
+ used_acpi_index_list = g_sequence_new(NULL);
+ }
+ return used_acpi_index_list;
+}
+
static void pci_init_bus_master(PCIDevice *pci_dev)
{
AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev);
@@ -126,7 +147,7 @@ static void pci_bus_realize(BusState *qbus, Error **errp)
bus->machine_done.notify = pcibus_machine_done;
qemu_add_machine_init_done_notifier(&bus->machine_done);
- vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_pcibus, bus);
+ vmstate_register_any(NULL, &vmstate_pcibus, bus);
}
static void pcie_bus_realize(BusState *qbus, Error **errp)
@@ -181,13 +202,15 @@ static void pci_bus_class_init(ObjectClass *klass, void *data)
{
BusClass *k = BUS_CLASS(klass);
PCIBusClass *pbc = PCI_BUS_CLASS(klass);
+ ResettableClass *rc = RESETTABLE_CLASS(klass);
k->print_dev = pcibus_dev_print;
k->get_dev_path = pcibus_get_dev_path;
k->get_fw_dev_path = pcibus_get_fw_dev_path;
k->realize = pci_bus_realize;
k->unrealize = pci_bus_unrealize;
- k->reset = pcibus_reset;
+
+ rc->phases.hold = pcibus_reset_hold;
pbc->bus_num = pcibus_num;
pbc->numa_node = pcibus_numa_node;
@@ -201,6 +224,11 @@ static const TypeInfo pci_bus_info = {
.class_init = pci_bus_class_init,
};
+static const TypeInfo cxl_interface_info = {
+ .name = INTERFACE_CXL_DEVICE,
+ .parent = TYPE_INTERFACE,
+};
+
static const TypeInfo pcie_interface_info = {
.name = INTERFACE_PCIE_DEVICE,
.parent = TYPE_INTERFACE,
@@ -224,7 +252,12 @@ static const TypeInfo pcie_bus_info = {
.class_init = pcie_bus_class_init,
};
-static PCIBus *pci_find_bus_nr(PCIBus *bus, int bus_num);
+static const TypeInfo cxl_bus_info = {
+ .name = TYPE_CXL_BUS,
+ .parent = TYPE_PCIE_BUS,
+ .class_init = pcie_bus_class_init,
+};
+
static void pci_update_mappings(PCIDevice *d);
static void pci_irq_handler(void *opaque, int irq_num, int level);
static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom, Error **);
@@ -233,12 +266,15 @@ static void pci_del_option_rom(PCIDevice *pdev);
static uint16_t pci_default_sub_vendor_id = PCI_SUBVENDOR_ID_REDHAT_QUMRANET;
static uint16_t pci_default_sub_device_id = PCI_SUBDEVICE_ID_QEMU;
-static QLIST_HEAD(, PCIHostState) pci_host_bridges;
+PCIHostStateList pci_host_bridges;
int pci_bar(PCIDevice *d, int reg)
{
uint8_t type;
+ /* PCIe virtual functions do not have their own BARs */
+ assert(!pci_is_vf(d));
+
if (reg != PCI_ROM_SLOT)
return PCI_BASE_ADDRESS_0 + reg * 4;
@@ -269,8 +305,13 @@ static void pci_change_irq_level(PCIDevice *pci_dev, int irq_num, int change)
{
PCIBus *bus;
for (;;) {
+ int dev_irq = irq_num;
bus = pci_get_bus(pci_dev);
+ assert(bus->map_irq);
irq_num = bus->map_irq(pci_dev, irq_num);
+ trace_pci_route_irq(dev_irq, DEVICE(pci_dev)->canonical_path, irq_num,
+ pci_bus_is_root(bus) ? "root-complex"
+ : DEVICE(bus->parent_dev)->canonical_path);
if (bus->set_irq)
break;
pci_dev = bus->parent_dev;
@@ -304,10 +345,50 @@ void pci_device_deassert_intx(PCIDevice *dev)
}
}
-static void pci_do_device_reset(PCIDevice *dev)
+static void pci_msi_trigger(PCIDevice *dev, MSIMessage msg)
+{
+ MemTxAttrs attrs = {};
+
+ /*
+ * Xen uses the high bits of the address to contain some of the bits
+ * of the PIRQ#. Therefore we can't just send the write cycle and
+ * trust that it's caught by the APIC at 0xfee00000 because the
+ * target of the write might be e.g. 0x0x1000fee46000 for PIRQ#4166.
+ * So we intercept the delivery here instead of in kvm_send_msi().
+ */
+ if (xen_mode == XEN_EMULATE &&
+ xen_evtchn_deliver_pirq_msi(msg.address, msg.data)) {
+ return;
+ }
+ attrs.requester_id = pci_requester_id(dev);
+ address_space_stl_le(&dev->bus_master_as, msg.address, msg.data,
+ attrs, NULL);
+}
+
+static void pci_reset_regions(PCIDevice *dev)
{
int r;
+ if (pci_is_vf(dev)) {
+ return;
+ }
+
+ for (r = 0; r < PCI_NUM_REGIONS; ++r) {
+ PCIIORegion *region = &dev->io_regions[r];
+ if (!region->size) {
+ continue;
+ }
+ if (!(region->type & PCI_BASE_ADDRESS_SPACE_IO) &&
+ region->type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
+ pci_set_quad(dev->config + pci_bar(dev, r), region->type);
+ } else {
+ pci_set_long(dev->config + pci_bar(dev, r), region->type);
+ }
+ }
+}
+
+static void pci_do_device_reset(PCIDevice *dev)
+{
pci_device_deassert_intx(dev);
assert(dev->irq_state == 0);
@@ -323,23 +404,12 @@ static void pci_do_device_reset(PCIDevice *dev)
pci_get_word(dev->wmask + PCI_INTERRUPT_LINE) |
pci_get_word(dev->w1cmask + PCI_INTERRUPT_LINE));
dev->config[PCI_CACHE_LINE_SIZE] = 0x0;
- for (r = 0; r < PCI_NUM_REGIONS; ++r) {
- PCIIORegion *region = &dev->io_regions[r];
- if (!region->size) {
- continue;
- }
-
- if (!(region->type & PCI_BASE_ADDRESS_SPACE_IO) &&
- region->type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
- pci_set_quad(dev->config + pci_bar(dev, r), region->type);
- } else {
- pci_set_long(dev->config + pci_bar(dev, r), region->type);
- }
- }
+ pci_reset_regions(dev);
pci_update_mappings(dev);
msi_reset(dev);
msix_reset(dev);
+ pcie_sriov_pf_reset(dev);
}
/*
@@ -348,18 +418,18 @@ static void pci_do_device_reset(PCIDevice *dev)
*/
void pci_device_reset(PCIDevice *dev)
{
- qdev_reset_all(&dev->qdev);
+ device_cold_reset(&dev->qdev);
pci_do_device_reset(dev);
}
/*
* Trigger pci bus reset under a given bus.
- * Called via qbus_reset_all on RST# assert, after the devices
- * have been reset qdev_reset_all-ed already.
+ * Called via bus_cold_reset on RST# assert, after the devices
+ * have been reset device_cold_reset-ed already.
*/
-static void pcibus_reset(BusState *qbus)
+static void pcibus_reset_hold(Object *obj)
{
- PCIBus *bus = DO_UPCAST(PCIBus, qbus, qbus);
+ PCIBus *bus = PCI_BUS(obj);
int i;
for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) {
@@ -432,16 +502,15 @@ bool pci_bus_bypass_iommu(PCIBus *bus)
return host_bridge->bypass_iommu;
}
-static void pci_root_bus_init(PCIBus *bus, DeviceState *parent,
- MemoryRegion *address_space_mem,
- MemoryRegion *address_space_io,
- uint8_t devfn_min)
+static void pci_root_bus_internal_init(PCIBus *bus, DeviceState *parent,
+ MemoryRegion *mem, MemoryRegion *io,
+ uint8_t devfn_min)
{
assert(PCI_FUNC(devfn_min) == 0);
bus->devfn_min = devfn_min;
bus->slot_reserved_mask = 0x0;
- bus->address_space_mem = address_space_mem;
- bus->address_space_io = address_space_io;
+ bus->address_space_mem = mem;
+ bus->address_space_io = io;
bus->flags |= PCI_BUS_IS_ROOT;
/* host bridge */
@@ -455,32 +524,28 @@ static void pci_bus_uninit(PCIBus *bus)
pci_host_bus_unregister(BUS(bus)->parent);
}
-bool pci_bus_is_express(PCIBus *bus)
+bool pci_bus_is_express(const PCIBus *bus)
{
return object_dynamic_cast(OBJECT(bus), TYPE_PCIE_BUS);
}
-void pci_root_bus_new_inplace(PCIBus *bus, size_t bus_size, DeviceState *parent,
- const char *name,
- MemoryRegion *address_space_mem,
- MemoryRegion *address_space_io,
- uint8_t devfn_min, const char *typename)
+void pci_root_bus_init(PCIBus *bus, size_t bus_size, DeviceState *parent,
+ const char *name,
+ MemoryRegion *mem, MemoryRegion *io,
+ uint8_t devfn_min, const char *typename)
{
- qbus_create_inplace(bus, bus_size, typename, parent, name);
- pci_root_bus_init(bus, parent, address_space_mem, address_space_io,
- devfn_min);
+ qbus_init(bus, bus_size, typename, parent, name);
+ pci_root_bus_internal_init(bus, parent, mem, io, devfn_min);
}
PCIBus *pci_root_bus_new(DeviceState *parent, const char *name,
- MemoryRegion *address_space_mem,
- MemoryRegion *address_space_io,
+ MemoryRegion *mem, MemoryRegion *io,
uint8_t devfn_min, const char *typename)
{
PCIBus *bus;
- bus = PCI_BUS(qbus_create(typename, parent, name));
- pci_root_bus_init(bus, parent, address_space_mem, address_space_io,
- devfn_min);
+ bus = PCI_BUS(qbus_new(typename, parent, name));
+ pci_root_bus_internal_init(bus, parent, mem, io, devfn_min);
return bus;
}
@@ -491,16 +556,21 @@ void pci_root_bus_cleanup(PCIBus *bus)
qbus_unrealize(BUS(bus));
}
-void pci_bus_irqs(PCIBus *bus, pci_set_irq_fn set_irq, pci_map_irq_fn map_irq,
+void pci_bus_irqs(PCIBus *bus, pci_set_irq_fn set_irq,
void *irq_opaque, int nirq)
{
bus->set_irq = set_irq;
- bus->map_irq = map_irq;
bus->irq_opaque = irq_opaque;
bus->nirq = nirq;
+ g_free(bus->irq_count);
bus->irq_count = g_malloc0(nirq * sizeof(bus->irq_count[0]));
}
+void pci_bus_map_irqs(PCIBus *bus, pci_map_irq_fn map_irq)
+{
+ bus->map_irq = map_irq;
+}
+
void pci_bus_irqs_cleanup(PCIBus *bus)
{
bus->set_irq = NULL;
@@ -508,21 +578,21 @@ void pci_bus_irqs_cleanup(PCIBus *bus)
bus->irq_opaque = NULL;
bus->nirq = 0;
g_free(bus->irq_count);
+ bus->irq_count = NULL;
}
PCIBus *pci_register_root_bus(DeviceState *parent, const char *name,
pci_set_irq_fn set_irq, pci_map_irq_fn map_irq,
void *irq_opaque,
- MemoryRegion *address_space_mem,
- MemoryRegion *address_space_io,
+ MemoryRegion *mem, MemoryRegion *io,
uint8_t devfn_min, int nirq,
const char *typename)
{
PCIBus *bus;
- bus = pci_root_bus_new(parent, name, address_space_mem,
- address_space_io, devfn_min, typename);
- pci_bus_irqs(bus, set_irq, map_irq, irq_opaque, nirq);
+ bus = pci_root_bus_new(parent, name, mem, io, devfn_min, typename);
+ pci_bus_irqs(bus, set_irq, irq_opaque, nirq);
+ pci_bus_map_irqs(bus, map_irq);
return bus;
}
@@ -546,7 +616,7 @@ void pci_bus_range(PCIBus *bus, int *min_bus, int *max_bus)
for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) {
PCIDevice *dev = bus->devices[i];
- if (dev && PCI_DEVICE_GET_CLASS(dev)->is_bridge) {
+ if (dev && IS_PCI_BRIDGE(dev)) {
*min_bus = MIN(*min_bus, dev->config[PCI_SECONDARY_BUS]);
*max_bus = MAX(*max_bus, dev->config[PCI_SUBORDINATE_BUS]);
}
@@ -562,7 +632,6 @@ static int get_pci_config_device(QEMUFile *f, void *pv, size_t size,
const VMStateField *field)
{
PCIDevice *s = container_of(pv, PCIDevice, config);
- PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(s);
uint8_t *config;
int i;
@@ -584,9 +653,8 @@ static int get_pci_config_device(QEMUFile *f, void *pv, size_t size,
memcpy(s->config, config, size);
pci_update_mappings(s);
- if (pc->is_bridge) {
- PCIBridge *b = PCI_BRIDGE(s);
- pci_bridge_update_mappings(b);
+ if (IS_PCI_BRIDGE(s)) {
+ pci_bridge_update_mappings(PCI_BRIDGE(s));
}
memory_region_set_enabled(&s->bus_master_enable_region,
@@ -608,7 +676,7 @@ static int put_pci_config_device(QEMUFile *f, void *pv, size_t size,
return 0;
}
-static VMStateInfo vmstate_info_pci_config = {
+static const VMStateInfo vmstate_info_pci_config = {
.name = "pci config",
.get = get_pci_config_device,
.put = put_pci_config_device,
@@ -649,7 +717,7 @@ static int put_pci_irq_state(QEMUFile *f, void *pv, size_t size,
return 0;
}
-static VMStateInfo vmstate_info_pci_irq_state = {
+static const VMStateInfo vmstate_info_pci_irq_state = {
.name = "pci irq state",
.get = get_pci_irq_state,
.put = put_pci_irq_state,
@@ -669,7 +737,7 @@ const VMStateDescription vmstate_pci_device = {
.name = "PCIDevice",
.version_id = 2,
.minimum_version_id = 1,
- .fields = (VMStateField[]) {
+ .fields = (const VMStateField[]) {
VMSTATE_INT32_POSITIVE_LE(version_id, PCIDevice),
VMSTATE_BUFFER_UNSAFE_INFO_TEST(config, PCIDevice,
migrate_is_not_pcie,
@@ -821,7 +889,7 @@ static void pci_init_w1cmask(PCIDevice *dev)
static void pci_init_mask_bridge(PCIDevice *d)
{
/* PCI_PRIMARY_BUS, PCI_SECONDARY_BUS, PCI_SUBORDINATE_BUS and
- PCI_SEC_LETENCY_TIMER */
+ PCI_SEC_LATENCY_TIMER */
memset(d->wmask + PCI_PRIMARY_BUS, 0xff, 4);
/* base and limit */
@@ -885,6 +953,16 @@ static void pci_init_multifunction(PCIBus *bus, PCIDevice *dev, Error **errp)
}
/*
+ * With SR/IOV and ARI, a device at function 0 need not be a multifunction
+ * device, as it may just be a VF that ended up with function 0 in
+ * the legacy PCI interpretation. Avoid failing in such cases:
+ */
+ if (pci_is_vf(dev) &&
+ dev->exp.sriov_vf.pf->cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
+ return;
+ }
+
+ /*
* multifunction bit is interpreted in two ways as follows.
* - all functions must set the bit to 1.
* Example: Intel X53
@@ -947,6 +1025,9 @@ static void do_pci_unregister_device(PCIDevice *pci_dev)
pci_get_bus(pci_dev)->devices[pci_dev->devfn] = NULL;
pci_config_free(pci_dev);
+ if (xen_mode == XEN_EMULATE) {
+ xen_evtchn_remove_pci_device(pci_dev);
+ }
if (memory_region_is_mapped(&pci_dev->bus_master_enable_region)) {
memory_region_del_subregion(&pci_dev->bus_master_container_region,
&pci_dev->bus_master_enable_region);
@@ -1039,6 +1120,21 @@ static bool pci_bus_devfn_reserved(PCIBus *bus, int devfn)
return bus->slot_reserved_mask & (1UL << PCI_SLOT(devfn));
}
+uint32_t pci_bus_get_slot_reserved_mask(PCIBus *bus)
+{
+ return bus->slot_reserved_mask;
+}
+
+void pci_bus_set_slot_reserved_mask(PCIBus *bus, uint32_t mask)
+{
+ bus->slot_reserved_mask |= mask;
+}
+
+void pci_bus_clear_slot_reserved_mask(PCIBus *bus, uint32_t mask)
+{
+ bus->slot_reserved_mask &= ~mask;
+}
+
/* -1 for devfn means auto assign */
static PCIDevice *do_pci_register_device(PCIDevice *pci_dev,
const char *name, int devfn,
@@ -1050,9 +1146,10 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev,
Error *local_err = NULL;
DeviceState *dev = DEVICE(pci_dev);
PCIBus *bus = pci_get_bus(pci_dev);
+ bool is_bridge = IS_PCI_BRIDGE(pci_dev);
/* Only pci bridges can be attached to extra PCI root buses */
- if (pci_bus_is_root(bus) && bus->parent_dev && !pc->is_bridge) {
+ if (pci_bus_is_root(bus) && bus->parent_dev && !is_bridge) {
error_setg(errp,
"PCI: Only PCI/PCIe bridges can be plugged into %s",
bus->parent_dev->name);
@@ -1078,12 +1175,18 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev,
return NULL;
} else if (!pci_bus_devfn_available(bus, devfn)) {
error_setg(errp, "PCI: slot %d function %d not available for %s,"
- " in use by %s",
+ " in use by %s,id=%s",
PCI_SLOT(devfn), PCI_FUNC(devfn), name,
- bus->devices[devfn]->name);
+ bus->devices[devfn]->name, bus->devices[devfn]->qdev.id);
return NULL;
- } else if (dev->hotplugged &&
- pci_get_function_0(pci_dev)) {
+ } /*
+ * Populating function 0 triggers a scan from the guest that
+ * exposes other non-zero functions. Hence we need to ensure that
+ * function 0 wasn't added yet.
+ */
+ else if (dev->hotplugged &&
+ !pci_is_vf(pci_dev) &&
+ pci_get_function_0(pci_dev)) {
error_setg(errp, "PCI: slot %d function 0 already occupied by %s,"
" new func %s cannot be exposed to guest.",
PCI_SLOT(pci_get_function_0(pci_dev)->devfn),
@@ -1113,7 +1216,7 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev,
pci_config_set_revision(pci_dev->config, pc->revision);
pci_config_set_class(pci_dev->config, pc->class_id);
- if (!pc->is_bridge) {
+ if (!is_bridge) {
if (pc->subsystem_vendor_id || pc->subsystem_id) {
pci_set_word(pci_dev->config + PCI_SUBSYSTEM_VENDOR_ID,
pc->subsystem_vendor_id);
@@ -1130,7 +1233,7 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev,
pci_init_cmask(pci_dev);
pci_init_wmask(pci_dev);
pci_init_w1cmask(pci_dev);
- if (pc->is_bridge) {
+ if (is_bridge) {
pci_init_mask_bridge(pci_dev);
}
pci_init_multifunction(bus, pci_dev, &local_err);
@@ -1180,6 +1283,19 @@ static void pci_qdev_unrealize(DeviceState *dev)
pci_device_deassert_intx(pci_dev);
do_pci_unregister_device(pci_dev);
+
+ pci_dev->msi_trigger = NULL;
+
+ /*
+ * clean up acpi-index so it could reused by another device
+ */
+ if (pci_dev->acpi_index) {
+ GSequence *used_indexes = pci_acpi_index_list();
+
+ g_sequence_remove(g_sequence_lookup(used_indexes,
+ GINT_TO_POINTER(pci_dev->acpi_index),
+ g_cmp_uint32, NULL));
+ }
}
void pci_register_bar(PCIDevice *pci_dev, int region_num,
@@ -1191,6 +1307,7 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
pcibus_t size = memory_region_size(memory);
uint8_t hdr_type;
+ assert(!pci_is_vf(pci_dev)); /* VFs must use pcie_sriov_vf_register_bar */
assert(region_num >= 0);
assert(region_num < PCI_NUM_REGIONS);
assert(is_power_of_2(size));
@@ -1294,22 +1411,54 @@ pcibus_t pci_get_bar_addr(PCIDevice *pci_dev, int region_num)
return pci_dev->io_regions[region_num].addr;
}
-static pcibus_t pci_bar_address(PCIDevice *d,
- int reg, uint8_t type, pcibus_t size)
+static pcibus_t pci_config_get_bar_addr(PCIDevice *d, int reg,
+ uint8_t type, pcibus_t size)
+{
+ pcibus_t new_addr;
+ if (!pci_is_vf(d)) {
+ int bar = pci_bar(d, reg);
+ if (type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
+ new_addr = pci_get_quad(d->config + bar);
+ } else {
+ new_addr = pci_get_long(d->config + bar);
+ }
+ } else {
+ PCIDevice *pf = d->exp.sriov_vf.pf;
+ uint16_t sriov_cap = pf->exp.sriov_cap;
+ int bar = sriov_cap + PCI_SRIOV_BAR + reg * 4;
+ uint16_t vf_offset =
+ pci_get_word(pf->config + sriov_cap + PCI_SRIOV_VF_OFFSET);
+ uint16_t vf_stride =
+ pci_get_word(pf->config + sriov_cap + PCI_SRIOV_VF_STRIDE);
+ uint32_t vf_num = (d->devfn - (pf->devfn + vf_offset)) / vf_stride;
+
+ if (type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
+ new_addr = pci_get_quad(pf->config + bar);
+ } else {
+ new_addr = pci_get_long(pf->config + bar);
+ }
+ new_addr += vf_num * size;
+ }
+ /* The ROM slot has a specific enable bit, keep it intact */
+ if (reg != PCI_ROM_SLOT) {
+ new_addr &= ~(size - 1);
+ }
+ return new_addr;
+}
+
+pcibus_t pci_bar_address(PCIDevice *d,
+ int reg, uint8_t type, pcibus_t size)
{
pcibus_t new_addr, last_addr;
- int bar = pci_bar(d, reg);
uint16_t cmd = pci_get_word(d->config + PCI_COMMAND);
- Object *machine = qdev_get_machine();
- ObjectClass *oc = object_get_class(machine);
- MachineClass *mc = MACHINE_CLASS(oc);
+ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
bool allow_0_address = mc->pci_allow_0_address;
if (type & PCI_BASE_ADDRESS_SPACE_IO) {
if (!(cmd & PCI_COMMAND_IO)) {
return PCI_BAR_UNMAPPED;
}
- new_addr = pci_get_long(d->config + bar) & ~(size - 1);
+ new_addr = pci_config_get_bar_addr(d, reg, type, size);
last_addr = new_addr + size - 1;
/* Check if 32 bit BAR wraps around explicitly.
* TODO: make priorities correct and remove this work around.
@@ -1324,11 +1473,7 @@ static pcibus_t pci_bar_address(PCIDevice *d,
if (!(cmd & PCI_COMMAND_MEMORY)) {
return PCI_BAR_UNMAPPED;
}
- if (type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
- new_addr = pci_get_quad(d->config + bar);
- } else {
- new_addr = pci_get_long(d->config + bar);
- }
+ new_addr = pci_config_get_bar_addr(d, reg, type, size);
/* the ROM slot has a specific enable bit */
if (reg == PCI_ROM_SLOT && !(new_addr & PCI_ROM_ADDRESS_ENABLE)) {
return PCI_BAR_UNMAPPED;
@@ -1380,6 +1525,9 @@ static void pci_update_mappings(PCIDevice *d)
continue;
new_addr = pci_bar_address(d, i, r->type, r->size);
+ if (!d->has_power) {
+ new_addr = PCI_BAR_UNMAPPED;
+ }
/* This bar isn't changed */
if (new_addr == r->addr)
@@ -1387,7 +1535,7 @@ static void pci_update_mappings(PCIDevice *d)
/* now do the real mapping */
if (r->addr != PCI_BAR_UNMAPPED) {
- trace_pci_update_mappings_del(d, pci_dev_bus_num(d),
+ trace_pci_update_mappings_del(d->name, pci_dev_bus_num(d),
PCI_SLOT(d->devfn),
PCI_FUNC(d->devfn),
i, r->addr, r->size);
@@ -1395,7 +1543,7 @@ static void pci_update_mappings(PCIDevice *d)
}
r->addr = new_addr;
if (r->addr != PCI_BAR_UNMAPPED) {
- trace_pci_update_mappings_add(d, pci_dev_bus_num(d),
+ trace_pci_update_mappings_add(d->name, pci_dev_bus_num(d),
PCI_SLOT(d->devfn),
PCI_FUNC(d->devfn),
i, r->addr, r->size);
@@ -1461,15 +1609,16 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int
range_covers_byte(addr, l, PCI_COMMAND))
pci_update_mappings(d);
- if (range_covers_byte(addr, l, PCI_COMMAND)) {
+ if (ranges_overlap(addr, l, PCI_COMMAND, 2)) {
pci_update_irq_disabled(d, was_irq_disabled);
memory_region_set_enabled(&d->bus_master_enable_region,
- pci_get_word(d->config + PCI_COMMAND)
- & PCI_COMMAND_MASTER);
+ (pci_get_word(d->config + PCI_COMMAND)
+ & PCI_COMMAND_MASTER) && d->has_power);
}
msi_write_config(d, addr, val_in, l);
msix_write_config(d, addr, val_in, l);
+ pcie_sriov_config_write(d, addr, val_in, l);
}
/***********************************************************/
@@ -1494,11 +1643,6 @@ static void pci_irq_handler(void *opaque, int irq_num, int level)
pci_change_irq_level(pci_dev, irq_num, change);
}
-static inline int pci_intx(PCIDevice *pci_dev)
-{
- return pci_get_byte(pci_dev->config + PCI_INTERRUPT_PIN) - 1;
-}
-
qemu_irq pci_allocate_irq(PCIDevice *pci_dev)
{
int intx = pci_intx(pci_dev);
@@ -1525,8 +1669,12 @@ PCIINTxRoute pci_device_route_intx_to_irq(PCIDevice *dev, int pin)
PCIBus *bus;
do {
+ int dev_irq = pin;
bus = pci_get_bus(dev);
pin = bus->map_irq(dev, pin);
+ trace_pci_route_irq(dev_irq, DEVICE(dev)->canonical_path, pin,
+ pci_bus_is_root(bus) ? "root-complex"
+ : DEVICE(bus->parent_dev)->canonical_path);
dev = bus->parent_dev;
} while (dev);
@@ -1573,7 +1721,7 @@ void pci_device_set_intx_routing_notifier(PCIDevice *dev,
* 9.1: Interrupt routing. Table 9-1
*
* the PCI Express Base Specification, Revision 2.1
- * 2.2.8.1: INTx interrutp signaling - Rules
+ * 2.2.8.1: INTx interrupt signaling - Rules
* the Implementation Note
* Table 2-20
*/
@@ -1589,13 +1737,6 @@ int pci_swizzle_map_irq_fn(PCIDevice *pci_dev, int pin)
/***********************************************************/
/* monitor info on PCI */
-typedef struct {
- uint16_t class;
- const char *desc;
- const char *fw_name;
- uint16_t fw_ign_bits;
-} pci_class_desc;
-
static const pci_class_desc pci_class_descriptions[] =
{
{ 0x0001, "VGA controller", "display"},
@@ -1645,7 +1786,7 @@ static const pci_class_desc pci_class_descriptions[] =
{ 0x0902, "Mouse", "mouse"},
{ 0x0A00, "Dock station", "dock", 0x00ff},
{ 0x0B00, "i386 cpu", "cpu", 0x00ff},
- { 0x0c00, "Fireware contorller", "fireware"},
+ { 0x0c00, "Firewire controller", "firewire"},
{ 0x0c01, "Access bus controller", "access-bus"},
{ 0x0c02, "SSA controller", "ssa"},
{ 0x0c03, "USB controller", "usb"},
@@ -1654,11 +1795,9 @@ static const pci_class_desc pci_class_descriptions[] =
{ 0, NULL}
};
-static void pci_for_each_device_under_bus_reverse(PCIBus *bus,
- void (*fn)(PCIBus *b,
- PCIDevice *d,
- void *opaque),
- void *opaque)
+void pci_for_each_device_under_bus_reverse(PCIBus *bus,
+ pci_bus_dev_fn fn,
+ void *opaque)
{
PCIDevice *d;
int devfn;
@@ -1672,8 +1811,7 @@ static void pci_for_each_device_under_bus_reverse(PCIBus *bus,
}
void pci_for_each_device_reverse(PCIBus *bus, int bus_num,
- void (*fn)(PCIBus *b, PCIDevice *d, void *opaque),
- void *opaque)
+ pci_bus_dev_fn fn, void *opaque)
{
bus = pci_find_bus_nr(bus, bus_num);
@@ -1682,10 +1820,8 @@ void pci_for_each_device_reverse(PCIBus *bus, int bus_num,
}
}
-static void pci_for_each_device_under_bus(PCIBus *bus,
- void (*fn)(PCIBus *b, PCIDevice *d,
- void *opaque),
- void *opaque)
+void pci_for_each_device_under_bus(PCIBus *bus,
+ pci_bus_dev_fn fn, void *opaque)
{
PCIDevice *d;
int devfn;
@@ -1699,8 +1835,7 @@ static void pci_for_each_device_under_bus(PCIBus *bus,
}
void pci_for_each_device(PCIBus *bus, int bus_num,
- void (*fn)(PCIBus *b, PCIDevice *d, void *opaque),
- void *opaque)
+ pci_bus_dev_fn fn, void *opaque)
{
bus = pci_find_bus_nr(bus, bus_num);
@@ -1709,7 +1844,7 @@ void pci_for_each_device(PCIBus *bus, int bus_num,
}
}
-static const pci_class_desc *get_class_desc(int class)
+const pci_class_desc *get_class_desc(int class)
{
const pci_class_desc *desc;
@@ -1721,277 +1856,54 @@ static const pci_class_desc *get_class_desc(int class)
return desc;
}
-static PciDeviceInfoList *qmp_query_pci_devices(PCIBus *bus, int bus_num);
-
-static PciMemoryRegionList *qmp_query_pci_regions(const PCIDevice *dev)
+void pci_init_nic_devices(PCIBus *bus, const char *default_model)
{
- PciMemoryRegionList *head = NULL, **tail = &head;
- int i;
-
- for (i = 0; i < PCI_NUM_REGIONS; i++) {
- const PCIIORegion *r = &dev->io_regions[i];
- PciMemoryRegion *region;
-
- if (!r->size) {
- continue;
- }
-
- region = g_malloc0(sizeof(*region));
-
- if (r->type & PCI_BASE_ADDRESS_SPACE_IO) {
- region->type = g_strdup("io");
- } else {
- region->type = g_strdup("memory");
- region->has_prefetch = true;
- region->prefetch = !!(r->type & PCI_BASE_ADDRESS_MEM_PREFETCH);
- region->has_mem_type_64 = true;
- region->mem_type_64 = !!(r->type & PCI_BASE_ADDRESS_MEM_TYPE_64);
- }
-
- region->bar = i;
- region->address = r->addr;
- region->size = r->size;
-
- QAPI_LIST_APPEND(tail, region);
- }
-
- return head;
-}
-
-static PciBridgeInfo *qmp_query_pci_bridge(PCIDevice *dev, PCIBus *bus,
- int bus_num)
-{
- PciBridgeInfo *info;
- PciMemoryRange *range;
-
- info = g_new0(PciBridgeInfo, 1);
-
- info->bus = g_new0(PciBusInfo, 1);
- info->bus->number = dev->config[PCI_PRIMARY_BUS];
- info->bus->secondary = dev->config[PCI_SECONDARY_BUS];
- info->bus->subordinate = dev->config[PCI_SUBORDINATE_BUS];
-
- range = info->bus->io_range = g_new0(PciMemoryRange, 1);
- range->base = pci_bridge_get_base(dev, PCI_BASE_ADDRESS_SPACE_IO);
- range->limit = pci_bridge_get_limit(dev, PCI_BASE_ADDRESS_SPACE_IO);
-
- range = info->bus->memory_range = g_new0(PciMemoryRange, 1);
- range->base = pci_bridge_get_base(dev, PCI_BASE_ADDRESS_SPACE_MEMORY);
- range->limit = pci_bridge_get_limit(dev, PCI_BASE_ADDRESS_SPACE_MEMORY);
-
- range = info->bus->prefetchable_range = g_new0(PciMemoryRange, 1);
- range->base = pci_bridge_get_base(dev, PCI_BASE_ADDRESS_MEM_PREFETCH);
- range->limit = pci_bridge_get_limit(dev, PCI_BASE_ADDRESS_MEM_PREFETCH);
-
- if (dev->config[PCI_SECONDARY_BUS] != 0) {
- PCIBus *child_bus = pci_find_bus_nr(bus, dev->config[PCI_SECONDARY_BUS]);
- if (child_bus) {
- info->has_devices = true;
- info->devices = qmp_query_pci_devices(child_bus, dev->config[PCI_SECONDARY_BUS]);
- }
- }
-
- return info;
+ qemu_create_nic_bus_devices(&bus->qbus, TYPE_PCI_DEVICE, default_model,
+ "virtio", "virtio-net-pci");
}
-static PciDeviceInfo *qmp_query_pci_device(PCIDevice *dev, PCIBus *bus,
- int bus_num)
+bool pci_init_nic_in_slot(PCIBus *rootbus, const char *model,
+ const char *alias, const char *devaddr)
{
- const pci_class_desc *desc;
- PciDeviceInfo *info;
- uint8_t type;
- int class;
-
- info = g_new0(PciDeviceInfo, 1);
- info->bus = bus_num;
- info->slot = PCI_SLOT(dev->devfn);
- info->function = PCI_FUNC(dev->devfn);
-
- info->class_info = g_new0(PciDeviceClass, 1);
- class = pci_get_word(dev->config + PCI_CLASS_DEVICE);
- info->class_info->q_class = class;
- desc = get_class_desc(class);
- if (desc->desc) {
- info->class_info->has_desc = true;
- info->class_info->desc = g_strdup(desc->desc);
- }
-
- info->id = g_new0(PciDeviceId, 1);
- info->id->vendor = pci_get_word(dev->config + PCI_VENDOR_ID);
- info->id->device = pci_get_word(dev->config + PCI_DEVICE_ID);
- info->regions = qmp_query_pci_regions(dev);
- info->qdev_id = g_strdup(dev->qdev.id ? dev->qdev.id : "");
-
- info->irq_pin = dev->config[PCI_INTERRUPT_PIN];
- if (dev->config[PCI_INTERRUPT_PIN] != 0) {
- info->has_irq = true;
- info->irq = dev->config[PCI_INTERRUPT_LINE];
- }
-
- type = dev->config[PCI_HEADER_TYPE] & ~PCI_HEADER_TYPE_MULTI_FUNCTION;
- if (type == PCI_HEADER_TYPE_BRIDGE) {
- info->has_pci_bridge = true;
- info->pci_bridge = qmp_query_pci_bridge(dev, bus, bus_num);
- } else if (type == PCI_HEADER_TYPE_NORMAL) {
- info->id->has_subsystem = info->id->has_subsystem_vendor = true;
- info->id->subsystem = pci_get_word(dev->config + PCI_SUBSYSTEM_ID);
- info->id->subsystem_vendor =
- pci_get_word(dev->config + PCI_SUBSYSTEM_VENDOR_ID);
- } else if (type == PCI_HEADER_TYPE_CARDBUS) {
- info->id->has_subsystem = info->id->has_subsystem_vendor = true;
- info->id->subsystem = pci_get_word(dev->config + PCI_CB_SUBSYSTEM_ID);
- info->id->subsystem_vendor =
- pci_get_word(dev->config + PCI_CB_SUBSYSTEM_VENDOR_ID);
- }
-
- return info;
-}
-
-static PciDeviceInfoList *qmp_query_pci_devices(PCIBus *bus, int bus_num)
-{
- PciDeviceInfoList *head = NULL, **tail = &head;
- PCIDevice *dev;
- int devfn;
-
- for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) {
- dev = bus->devices[devfn];
- if (dev) {
- QAPI_LIST_APPEND(tail, qmp_query_pci_device(dev, bus, bus_num));
- }
- }
-
- return head;
-}
-
-static PciInfo *qmp_query_pci_bus(PCIBus *bus, int bus_num)
-{
- PciInfo *info = NULL;
-
- bus = pci_find_bus_nr(bus, bus_num);
- if (bus) {
- info = g_malloc0(sizeof(*info));
- info->bus = bus_num;
- info->devices = qmp_query_pci_devices(bus, bus_num);
- }
-
- return info;
-}
-
-PciInfoList *qmp_query_pci(Error **errp)
-{
- PciInfoList *head = NULL, **tail = &head;
- PCIHostState *host_bridge;
-
- QLIST_FOREACH(host_bridge, &pci_host_bridges, next) {
- QAPI_LIST_APPEND(tail,
- qmp_query_pci_bus(host_bridge->bus,
- pci_bus_num(host_bridge->bus)));
- }
-
- return head;
-}
-
-/* Initialize a PCI NIC. */
-PCIDevice *pci_nic_init_nofail(NICInfo *nd, PCIBus *rootbus,
- const char *default_model,
- const char *default_devaddr)
-{
- const char *devaddr = nd->devaddr ? nd->devaddr : default_devaddr;
- GSList *list;
- GPtrArray *pci_nic_models;
- PCIBus *bus;
+ NICInfo *nd = qemu_find_nic_info(model, true, alias);
+ int dom, busnr, devfn;
PCIDevice *pci_dev;
- DeviceState *dev;
- int devfn;
- int i;
- int dom, busnr;
unsigned slot;
+ PCIBus *bus;
- if (nd->model && !strcmp(nd->model, "virtio")) {
- g_free(nd->model);
- nd->model = g_strdup("virtio-net-pci");
- }
-
- list = object_class_get_list_sorted(TYPE_PCI_DEVICE, false);
- pci_nic_models = g_ptr_array_new();
- while (list) {
- DeviceClass *dc = OBJECT_CLASS_CHECK(DeviceClass, list->data,
- TYPE_DEVICE);
- GSList *next;
- if (test_bit(DEVICE_CATEGORY_NETWORK, dc->categories) &&
- dc->user_creatable) {
- const char *name = object_class_get_name(list->data);
- /*
- * A network device might also be something else than a NIC, see
- * e.g. the "rocker" device. Thus we have to look for the "netdev"
- * property, too. Unfortunately, some devices like virtio-net only
- * create this property during instance_init, so we have to create
- * a temporary instance here to be able to check it.
- */
- Object *obj = object_new_with_class(OBJECT_CLASS(dc));
- if (object_property_find(obj, "netdev")) {
- g_ptr_array_add(pci_nic_models, (gpointer)name);
- }
- object_unref(obj);
- }
- next = list->next;
- g_slist_free_1(list);
- list = next;
- }
- g_ptr_array_add(pci_nic_models, NULL);
-
- if (qemu_show_nic_models(nd->model, (const char **)pci_nic_models->pdata)) {
- exit(0);
+ if (!nd) {
+ return false;
}
- i = qemu_find_nic_model(nd, (const char **)pci_nic_models->pdata,
- default_model);
- if (i < 0) {
+ if (!devaddr || pci_parse_devaddr(devaddr, &dom, &busnr, &slot, NULL) < 0) {
+ error_report("Invalid PCI device address %s for device %s",
+ devaddr, model);
exit(1);
}
- if (!rootbus) {
- error_report("No primary PCI bus");
+ if (dom != 0) {
+ error_report("No support for non-zero PCI domains");
exit(1);
}
- assert(!rootbus->parent_dev);
-
- if (!devaddr) {
- devfn = -1;
- busnr = 0;
- } else {
- if (pci_parse_devaddr(devaddr, &dom, &busnr, &slot, NULL) < 0) {
- error_report("Invalid PCI device address %s for device %s",
- devaddr, nd->model);
- exit(1);
- }
-
- if (dom != 0) {
- error_report("No support for non-zero PCI domains");
- exit(1);
- }
-
- devfn = PCI_DEVFN(slot, 0);
- }
+ devfn = PCI_DEVFN(slot, 0);
bus = pci_find_bus_nr(rootbus, busnr);
if (!bus) {
error_report("Invalid PCI device address %s for device %s",
- devaddr, nd->model);
+ devaddr, model);
exit(1);
}
- pci_dev = pci_new(devfn, nd->model);
- dev = &pci_dev->qdev;
- qdev_set_nic_properties(dev, nd);
+ pci_dev = pci_new(devfn, model);
+ qdev_set_nic_properties(&pci_dev->qdev, nd);
pci_realize_and_unref(pci_dev, bus, &error_fatal);
- g_ptr_array_free(pci_nic_models, true);
- return pci_dev;
+ return true;
}
PCIDevice *pci_vga_init(PCIBus *bus)
{
+ vga_interface_created = true;
switch (vga_interface_type) {
case VGA_CIRRUS:
return pci_create_simple(bus, -1, "cirrus-vga");
@@ -2028,7 +1940,7 @@ static bool pci_root_bus_in_range(PCIBus *bus, int bus_num)
for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) {
PCIDevice *dev = bus->devices[i];
- if (dev && PCI_DEVICE_GET_CLASS(dev)->is_bridge) {
+ if (dev && IS_PCI_BRIDGE(dev)) {
if (pci_secondary_bus_in_range(dev, bus_num)) {
return true;
}
@@ -2038,7 +1950,7 @@ static bool pci_root_bus_in_range(PCIBus *bus, int bus_num)
return false;
}
-static PCIBus *pci_find_bus_nr(PCIBus *bus, int bus_num)
+PCIBus *pci_find_bus_nr(PCIBus *bus, int bus_num)
{
PCIBus *sec;
@@ -2078,10 +1990,8 @@ static PCIBus *pci_find_bus_nr(PCIBus *bus, int bus_num)
return NULL;
}
-void pci_for_each_bus_depth_first(PCIBus *bus,
- void *(*begin)(PCIBus *bus, void *parent_state),
- void (*end)(PCIBus *bus, void *state),
- void *parent_state)
+void pci_for_each_bus_depth_first(PCIBus *bus, pci_bus_ret_fn begin,
+ pci_bus_fn end, void *parent_state)
{
PCIBus *sec;
void *state;
@@ -2116,6 +2026,8 @@ PCIDevice *pci_find_device(PCIBus *bus, int bus_num, uint8_t devfn)
return bus->devices[devfn];
}
+#define ONBOARD_INDEX_MAX (16 * 1024 - 1)
+
static void pci_qdev_realize(DeviceState *qdev, Error **errp)
{
PCIDevice *pci_dev = (PCIDevice *)qdev;
@@ -2125,6 +2037,35 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
bool is_default_rom;
uint16_t class_id;
+ /*
+ * capped by systemd (see: udev-builtin-net_id.c)
+ * as it's the only known user honor it to avoid users
+ * misconfigure QEMU and then wonder why acpi-index doesn't work
+ */
+ if (pci_dev->acpi_index > ONBOARD_INDEX_MAX) {
+ error_setg(errp, "acpi-index should be less or equal to %u",
+ ONBOARD_INDEX_MAX);
+ return;
+ }
+
+ /*
+ * make sure that acpi-index is unique across all present PCI devices
+ */
+ if (pci_dev->acpi_index) {
+ GSequence *used_indexes = pci_acpi_index_list();
+
+ if (g_sequence_lookup(used_indexes,
+ GINT_TO_POINTER(pci_dev->acpi_index),
+ g_cmp_uint32, NULL)) {
+ error_setg(errp, "a PCI device with acpi-index = %" PRIu32
+ " already exist", pci_dev->acpi_index);
+ return;
+ }
+ g_sequence_insert_sorted(used_indexes,
+ GINT_TO_POINTER(pci_dev->acpi_index),
+ g_cmp_uint32, NULL);
+ }
+
if (pci_dev->romsize != -1 && !is_power_of_2(pci_dev->romsize)) {
error_setg(errp, "ROM size %u is not a power of two", pci_dev->romsize);
return;
@@ -2138,6 +2079,10 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
}
+ if (object_class_dynamic_cast(klass, INTERFACE_CXL_DEVICE)) {
+ pci_dev->cap_present |= QEMU_PCIE_CAP_CXL;
+ }
+
pci_dev = do_pci_register_device(pci_dev,
object_get_typename(OBJECT(qdev)),
pci_dev->devfn, errp);
@@ -2153,6 +2098,25 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
}
}
+ /*
+ * A PCIe Downstream Port that do not have ARI Forwarding enabled must
+ * associate only Device 0 with the device attached to the bus
+ * representing the Link from the Port (PCIe base spec rev 4.0 ver 0.3,
+ * sec 7.3.1).
+ * With ARI, PCI_SLOT() can return non-zero value as the traditional
+ * 5-bit Device Number and 3-bit Function Number fields in its associated
+ * Routing IDs, Requester IDs and Completer IDs are interpreted as a
+ * single 8-bit Function Number. Hence, ignore ARI capable devices.
+ */
+ if (pci_is_express(pci_dev) &&
+ !pcie_find_capability(pci_dev, PCI_EXT_CAP_ID_ARI) &&
+ pcie_has_upstream_port(pci_dev) &&
+ PCI_SLOT(pci_dev->devfn)) {
+ warn_report("PCI: slot %d is not valid for %s,"
+ " parent device only allows plugging into slot 0.",
+ PCI_SLOT(pci_dev->devfn), pci_dev->name);
+ }
+
if (pci_dev->failover_pair_id) {
if (!pci_bus_is_express(pci_get_bus(pci_dev))) {
error_setg(errp, "failover primary device must be on "
@@ -2190,10 +2154,14 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
pci_qdev_unrealize(DEVICE(pci_dev));
return;
}
+
+ pci_set_power(pci_dev, true);
+
+ pci_dev->msi_trigger = pci_msi_trigger;
}
-PCIDevice *pci_new_multifunction(int devfn, bool multifunction,
- const char *name)
+static PCIDevice *pci_new_internal(int devfn, bool multifunction,
+ const char *name)
{
DeviceState *dev;
@@ -2203,9 +2171,14 @@ PCIDevice *pci_new_multifunction(int devfn, bool multifunction,
return PCI_DEVICE(dev);
}
+PCIDevice *pci_new_multifunction(int devfn, const char *name)
+{
+ return pci_new_internal(devfn, true, name);
+}
+
PCIDevice *pci_new(int devfn, const char *name)
{
- return pci_new_multifunction(devfn, false, name);
+ return pci_new_internal(devfn, false, name);
}
bool pci_realize_and_unref(PCIDevice *dev, PCIBus *bus, Error **errp)
@@ -2214,17 +2187,18 @@ bool pci_realize_and_unref(PCIDevice *dev, PCIBus *bus, Error **errp)
}
PCIDevice *pci_create_simple_multifunction(PCIBus *bus, int devfn,
- bool multifunction,
const char *name)
{
- PCIDevice *dev = pci_new_multifunction(devfn, multifunction, name);
+ PCIDevice *dev = pci_new_multifunction(devfn, name);
pci_realize_and_unref(dev, bus, &error_fatal);
return dev;
}
PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name)
{
- return pci_create_simple_multifunction(bus, devfn, false, name);
+ PCIDevice *dev = pci_new(devfn, name);
+ pci_realize_and_unref(dev, bus, &error_fatal);
+ return dev;
}
static uint8_t pci_find_space(PCIDevice *pdev, uint8_t size)
@@ -2337,16 +2311,21 @@ static void pci_patch_ids(PCIDevice *pdev, uint8_t *ptr, uint32_t size)
static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom,
Error **errp)
{
- int64_t size;
- char *path;
- void *ptr;
+ int64_t size = 0;
+ g_autofree char *path = NULL;
char name[32];
const VMStateDescription *vmsd;
- if (!pdev->romfile)
- return;
- if (strlen(pdev->romfile) == 0)
+ /*
+ * In case of incoming migration ROM will come with migration stream, no
+ * reason to load the file. Neither we want to fail if local ROM file
+ * mismatches with specified romsize.
+ */
+ bool load_file = !runstate_check(RUN_STATE_INMIGRATE);
+
+ if (!pdev->romfile || !strlen(pdev->romfile)) {
return;
+ }
if (!pdev->rom_bar) {
/*
@@ -2373,57 +2352,57 @@ static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom,
return;
}
- path = qemu_find_file(QEMU_FILE_TYPE_BIOS, pdev->romfile);
- if (path == NULL) {
- path = g_strdup(pdev->romfile);
- }
+ if (load_file || pdev->romsize == -1) {
+ path = qemu_find_file(QEMU_FILE_TYPE_BIOS, pdev->romfile);
+ if (path == NULL) {
+ path = g_strdup(pdev->romfile);
+ }
- size = get_image_size(path);
- if (size < 0) {
- error_setg(errp, "failed to find romfile \"%s\"", pdev->romfile);
- g_free(path);
- return;
- } else if (size == 0) {
- error_setg(errp, "romfile \"%s\" is empty", pdev->romfile);
- g_free(path);
- return;
- } else if (size > 2 * GiB) {
- error_setg(errp, "romfile \"%s\" too large (size cannot exceed 2 GiB)",
- pdev->romfile);
- g_free(path);
- return;
- }
- if (pdev->romsize != -1) {
- if (size > pdev->romsize) {
- error_setg(errp, "romfile \"%s\" (%u bytes) is too large for ROM size %u",
- pdev->romfile, (uint32_t)size, pdev->romsize);
- g_free(path);
+ size = get_image_size(path);
+ if (size < 0) {
+ error_setg(errp, "failed to find romfile \"%s\"", pdev->romfile);
+ return;
+ } else if (size == 0) {
+ error_setg(errp, "romfile \"%s\" is empty", pdev->romfile);
+ return;
+ } else if (size > 2 * GiB) {
+ error_setg(errp,
+ "romfile \"%s\" too large (size cannot exceed 2 GiB)",
+ pdev->romfile);
return;
}
- } else {
- pdev->romsize = pow2ceil(size);
+ if (pdev->romsize != -1) {
+ if (size > pdev->romsize) {
+ error_setg(errp, "romfile \"%s\" (%u bytes) "
+ "is too large for ROM size %u",
+ pdev->romfile, (uint32_t)size, pdev->romsize);
+ return;
+ }
+ } else {
+ pdev->romsize = pow2ceil(size);
+ }
}
vmsd = qdev_get_vmsd(DEVICE(pdev));
+ snprintf(name, sizeof(name), "%s.rom",
+ vmsd ? vmsd->name : object_get_typename(OBJECT(pdev)));
- if (vmsd) {
- snprintf(name, sizeof(name), "%s.rom", vmsd->name);
- } else {
- snprintf(name, sizeof(name), "%s.rom", object_get_typename(OBJECT(pdev)));
- }
pdev->has_rom = true;
- memory_region_init_rom(&pdev->rom, OBJECT(pdev), name, pdev->romsize, &error_fatal);
- ptr = memory_region_get_ram_ptr(&pdev->rom);
- if (load_image_size(path, ptr, size) < 0) {
- error_setg(errp, "failed to load romfile \"%s\"", pdev->romfile);
- g_free(path);
- return;
- }
- g_free(path);
+ memory_region_init_rom(&pdev->rom, OBJECT(pdev), name, pdev->romsize,
+ &error_fatal);
- if (is_default_rom) {
- /* Only the default rom images will be patched (if needed). */
- pci_patch_ids(pdev, ptr, size);
+ if (load_file) {
+ void *ptr = memory_region_get_ram_ptr(&pdev->rom);
+
+ if (load_image_size(path, ptr, size) < 0) {
+ error_setg(errp, "failed to load romfile \"%s\"", pdev->romfile);
+ return;
+ }
+
+ if (is_default_rom) {
+ /* Only the default rom images will be patched (if needed). */
+ pci_patch_ids(pdev, ptr, size);
+ }
}
pci_register_bar(pdev, PCI_ROM_SLOT, 0, &pdev->rom);
@@ -2510,44 +2489,6 @@ uint8_t pci_find_capability(PCIDevice *pdev, uint8_t cap_id)
return pci_find_capability_list(pdev, cap_id, NULL);
}
-static void pcibus_dev_print(Monitor *mon, DeviceState *dev, int indent)
-{
- PCIDevice *d = (PCIDevice *)dev;
- const pci_class_desc *desc;
- char ctxt[64];
- PCIIORegion *r;
- int i, class;
-
- class = pci_get_word(d->config + PCI_CLASS_DEVICE);
- desc = pci_class_descriptions;
- while (desc->desc && class != desc->class)
- desc++;
- if (desc->desc) {
- snprintf(ctxt, sizeof(ctxt), "%s", desc->desc);
- } else {
- snprintf(ctxt, sizeof(ctxt), "Class %04x", class);
- }
-
- monitor_printf(mon, "%*sclass %s, addr %02x:%02x.%x, "
- "pci id %04x:%04x (sub %04x:%04x)\n",
- indent, "", ctxt, pci_dev_bus_num(d),
- PCI_SLOT(d->devfn), PCI_FUNC(d->devfn),
- pci_get_word(d->config + PCI_VENDOR_ID),
- pci_get_word(d->config + PCI_DEVICE_ID),
- pci_get_word(d->config + PCI_SUBSYSTEM_VENDOR_ID),
- pci_get_word(d->config + PCI_SUBSYSTEM_ID));
- for (i = 0; i < PCI_NUM_REGIONS; i++) {
- r = &d->io_regions[i];
- if (!r->size)
- continue;
- monitor_printf(mon, "%*sbar %d: %s at 0x%"FMT_PCIBUS
- " [0x%"FMT_PCIBUS"]\n",
- indent, "",
- i, r->type & PCI_BASE_ADDRESS_SPACE_IO ? "i/o" : "mem",
- r->addr, r->addr + r->size - 1);
- }
-}
-
static char *pci_dev_fw_name(DeviceState *dev, char *buf, int len)
{
PCIDevice *d = (PCIDevice *)dev;
@@ -2579,15 +2520,15 @@ static char *pci_dev_fw_name(DeviceState *dev, char *buf, int len)
static char *pcibus_get_fw_dev_path(DeviceState *dev)
{
PCIDevice *d = (PCIDevice *)dev;
- char path[50], name[33];
- int off;
+ char name[33];
+ int has_func = !!PCI_FUNC(d->devfn);
- off = snprintf(path, sizeof(path), "%s@%x",
- pci_dev_fw_name(dev, name, sizeof name),
- PCI_SLOT(d->devfn));
- if (PCI_FUNC(d->devfn))
- snprintf(path + off, sizeof(path) + off, ",%x", PCI_FUNC(d->devfn));
- return g_strdup(path);
+ return g_strdup_printf("%s@%x%s%.*x",
+ pci_dev_fw_name(dev, name, sizeof(name)),
+ PCI_SLOT(d->devfn),
+ has_func ? "," : "",
+ has_func,
+ PCI_FUNC(d->devfn));
}
static char *pcibus_get_dev_path(DeviceState *dev)
@@ -2701,7 +2642,9 @@ static void pci_device_class_base_init(ObjectClass *klass, void *data)
object_class_dynamic_cast(klass, INTERFACE_CONVENTIONAL_PCI_DEVICE);
ObjectClass *pcie =
object_class_dynamic_cast(klass, INTERFACE_PCIE_DEVICE);
- assert(conventional || pcie);
+ ObjectClass *cxl =
+ object_class_dynamic_cast(klass, INTERFACE_CXL_DEVICE);
+ assert(conventional || pcie || cxl);
}
}
@@ -2711,7 +2654,7 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
PCIBus *iommu_bus = bus;
uint8_t devfn = dev->devfn;
- while (iommu_bus && !iommu_bus->iommu_fn && iommu_bus->parent_dev) {
+ while (iommu_bus && !iommu_bus->iommu_ops && iommu_bus->parent_dev) {
PCIBus *parent_bus = pci_get_bus(iommu_bus->parent_dev);
/*
@@ -2750,22 +2693,29 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
iommu_bus = parent_bus;
}
- if (!pci_bus_bypass_iommu(bus) && iommu_bus && iommu_bus->iommu_fn) {
- return iommu_bus->iommu_fn(bus, iommu_bus->iommu_opaque, devfn);
+ if (!pci_bus_bypass_iommu(bus) && iommu_bus->iommu_ops) {
+ return iommu_bus->iommu_ops->get_address_space(bus,
+ iommu_bus->iommu_opaque, devfn);
}
return &address_space_memory;
}
-void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque)
+void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque)
{
- bus->iommu_fn = fn;
+ /*
+ * If called, pci_setup_iommu() should provide a minimum set of
+ * useful callbacks for the bus.
+ */
+ assert(ops);
+ assert(ops->get_address_space);
+
+ bus->iommu_ops = ops;
bus->iommu_opaque = opaque;
}
static void pci_dev_get_w64(PCIBus *b, PCIDevice *dev, void *opaque)
{
Range *range = opaque;
- PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev);
uint16_t cmd = pci_get_word(dev->config + PCI_COMMAND);
int i;
@@ -2773,7 +2723,7 @@ static void pci_dev_get_w64(PCIBus *b, PCIDevice *dev, void *opaque)
return;
}
- if (pc->is_bridge) {
+ if (IS_PCI_BRIDGE(dev)) {
pcibus_t base = pci_bridge_get_base(dev, PCI_BASE_ADDRESS_MEM_PREFETCH);
pcibus_t limit = pci_bridge_get_limit(dev, PCI_BASE_ADDRESS_MEM_PREFETCH);
@@ -2861,6 +2811,22 @@ MSIMessage pci_get_msi_message(PCIDevice *dev, int vector)
return msg;
}
+void pci_set_power(PCIDevice *d, bool state)
+{
+ if (d->has_power == state) {
+ return;
+ }
+
+ d->has_power = state;
+ pci_update_mappings(d);
+ memory_region_set_enabled(&d->bus_master_enable_region,
+ (pci_get_word(d->config + PCI_COMMAND)
+ & PCI_COMMAND_MASTER) && d->has_power);
+ if (!d->has_power) {
+ pci_device_reset(d);
+ }
+}
+
static const TypeInfo pci_device_type_info = {
.name = TYPE_PCI_DEVICE,
.parent = TYPE_DEVICE,
@@ -2875,7 +2841,9 @@ static void pci_register_types(void)
{
type_register_static(&pci_bus_info);
type_register_static(&pcie_bus_info);
+ type_register_static(&cxl_bus_info);
type_register_static(&conventional_pci_interface_info);
+ type_register_static(&cxl_interface_info);
type_register_static(&pcie_interface_info);
type_register_static(&pci_device_type_info);
}
diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
index 3789c17edc..6a4e38856d 100644
--- a/hw/pci/pci_bridge.c
+++ b/hw/pci/pci_bridge.c
@@ -36,6 +36,9 @@
#include "qemu/module.h"
#include "qemu/range.h"
#include "qapi/error.h"
+#include "hw/acpi/acpi_aml_interface.h"
+#include "hw/acpi/pci.h"
+#include "hw/qdev-properties.h"
/* PCI bridge subsystem vendor ID helper functions */
#define PCI_SSVID_SIZEOF 8
@@ -182,11 +185,11 @@ static void pci_bridge_init_vga_aliases(PCIBridge *br, PCIBus *parent,
}
}
-static PCIBridgeWindows *pci_bridge_region_init(PCIBridge *br)
+static void pci_bridge_region_init(PCIBridge *br)
{
PCIDevice *pd = PCI_DEVICE(br);
PCIBus *parent = pci_get_bus(pd);
- PCIBridgeWindows *w = g_new(PCIBridgeWindows, 1);
+ PCIBridgeWindows *w = &br->windows;
uint16_t cmd = pci_get_word(pd->config + PCI_COMMAND);
pci_bridge_init_alias(br, &w->alias_pref_mem,
@@ -209,8 +212,6 @@ static PCIBridgeWindows *pci_bridge_region_init(PCIBridge *br)
cmd & PCI_COMMAND_IO);
pci_bridge_init_vga_aliases(br, parent, w->alias_vga);
-
- return w;
}
static void pci_bridge_region_del(PCIBridge *br, PCIBridgeWindows *w)
@@ -232,19 +233,18 @@ static void pci_bridge_region_cleanup(PCIBridge *br, PCIBridgeWindows *w)
object_unparent(OBJECT(&w->alias_vga[QEMU_PCI_VGA_IO_LO]));
object_unparent(OBJECT(&w->alias_vga[QEMU_PCI_VGA_IO_HI]));
object_unparent(OBJECT(&w->alias_vga[QEMU_PCI_VGA_MEM]));
- g_free(w);
}
void pci_bridge_update_mappings(PCIBridge *br)
{
- PCIBridgeWindows *w = br->windows;
+ PCIBridgeWindows *w = &br->windows;
/* Make updates atomic to: handle the case of one VCPU updating the bridge
* while another accesses an unaffected region. */
memory_region_transaction_begin();
- pci_bridge_region_del(br, br->windows);
+ pci_bridge_region_del(br, w);
pci_bridge_region_cleanup(br, w);
- br->windows = pci_bridge_region_init(br);
+ pci_bridge_region_init(br);
memory_region_transaction_commit();
}
@@ -275,7 +275,7 @@ void pci_bridge_write_config(PCIDevice *d,
newctl = pci_get_word(d->config + PCI_BRIDGE_CONTROL);
if (~oldctl & newctl & PCI_BRIDGE_CTL_BUS_RESET) {
/* Trigger hot reset on 0->1 transition. */
- qbus_reset_all(BUS(&s->sec_bus));
+ bus_cold_reset(BUS(&s->sec_bus));
}
}
@@ -374,8 +374,8 @@ void pci_bridge_initfn(PCIDevice *dev, const char *typename)
br->bus_name = dev->qdev.id;
}
- qbus_create_inplace(sec_bus, sizeof(br->sec_bus), typename, DEVICE(dev),
- br->bus_name);
+ qbus_init(sec_bus, sizeof(br->sec_bus), typename, DEVICE(dev),
+ br->bus_name);
sec_bus->parent_dev = dev;
sec_bus->map_irq = br->map_irq ? br->map_irq : pci_swizzle_map_irq_fn;
sec_bus->address_space_mem = &br->address_space_mem;
@@ -383,9 +383,14 @@ void pci_bridge_initfn(PCIDevice *dev, const char *typename)
sec_bus->address_space_io = &br->address_space_io;
memory_region_init(&br->address_space_io, OBJECT(br), "pci_bridge_io",
4 * GiB);
- br->windows = pci_bridge_region_init(br);
+ pci_bridge_region_init(br);
QLIST_INIT(&sec_bus->child);
QLIST_INSERT_HEAD(&parent->child, sec_bus, sibling);
+
+ /* For express secondary buses, secondary latency timer is RO 0 */
+ if (pci_bus_is_express(sec_bus) && !br->pcie_writeable_slt_bug) {
+ dev->wmask[PCI_SEC_LATENCY_TIMER] = 0;
+ }
}
/* default qdev clean up function for PCI-to-PCI bridge */
@@ -394,8 +399,8 @@ void pci_bridge_exitfn(PCIDevice *pci_dev)
PCIBridge *s = PCI_BRIDGE(pci_dev);
assert(QLIST_EMPTY(&s->sec_bus.child));
QLIST_REMOVE(&s->sec_bus, sibling);
- pci_bridge_region_del(s, s->windows);
- pci_bridge_region_cleanup(s, s->windows);
+ pci_bridge_region_del(s, &s->windows);
+ pci_bridge_region_cleanup(s, &s->windows);
/* object_unparent() is called automatically during device deletion */
}
@@ -448,11 +453,11 @@ int pci_bridge_qemu_reserve_cap_init(PCIDevice *dev, int cap_offset,
PCIBridgeQemuCap cap = {
.len = cap_len,
.type = REDHAT_PCI_CAP_RESOURCE_RESERVE,
- .bus_res = res_reserve.bus,
- .io = res_reserve.io,
- .mem = res_reserve.mem_non_pref,
- .mem_pref_32 = res_reserve.mem_pref_32,
- .mem_pref_64 = res_reserve.mem_pref_64
+ .bus_res = cpu_to_le32(res_reserve.bus),
+ .io = cpu_to_le64(res_reserve.io),
+ .mem = cpu_to_le32(res_reserve.mem_non_pref),
+ .mem_pref_32 = cpu_to_le32(res_reserve.mem_pref_32),
+ .mem_pref_64 = cpu_to_le64(res_reserve.mem_pref_64)
};
int offset = pci_add_capability(dev, PCI_CAP_ID_VNDR,
@@ -467,11 +472,31 @@ int pci_bridge_qemu_reserve_cap_init(PCIDevice *dev, int cap_offset,
return 0;
}
+static Property pci_bridge_properties[] = {
+ DEFINE_PROP_BOOL("x-pci-express-writeable-slt-bug", PCIBridge,
+ pcie_writeable_slt_bug, false),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void pci_bridge_class_init(ObjectClass *klass, void *data)
+{
+ AcpiDevAmlIfClass *adevc = ACPI_DEV_AML_IF_CLASS(klass);
+ DeviceClass *k = DEVICE_CLASS(klass);
+
+ device_class_set_props(k, pci_bridge_properties);
+ adevc->build_dev_aml = build_pci_bridge_aml;
+}
+
static const TypeInfo pci_bridge_type_info = {
.name = TYPE_PCI_BRIDGE,
.parent = TYPE_PCI_DEVICE,
.instance_size = sizeof(PCIBridge),
+ .class_init = pci_bridge_class_init,
.abstract = true,
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_ACPI_DEV_AML_IF },
+ { },
+ },
};
static void pci_bridge_register_types(void)
diff --git a/hw/pci/pci_host.c b/hw/pci/pci_host.c
index cf02f0d6a5..dfe6fe6184 100644
--- a/hw/pci/pci_host.c
+++ b/hw/pci/pci_host.c
@@ -62,6 +62,17 @@ static void pci_adjust_config_limit(PCIBus *bus, uint32_t *limit)
}
}
+static bool is_pci_dev_ejected(PCIDevice *pci_dev)
+{
+ /*
+ * device unplug was requested and the guest acked it,
+ * so we stop responding config accesses even if the
+ * device is not deleted (failover flow)
+ */
+ return pci_dev && pci_dev->partially_hotplugged &&
+ !pci_dev->qdev.pending_deleted_event;
+}
+
void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr,
uint32_t limit, uint32_t val, uint32_t len)
{
@@ -74,11 +85,13 @@ void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr,
/* non-zero functions are only exposed when function 0 is present,
* allowing direct removal of unexposed functions.
*/
- if (pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) {
+ if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) ||
+ !pci_dev->has_power || is_pci_dev_ejected(pci_dev)) {
return;
}
- trace_pci_cfg_write(pci_dev->name, PCI_SLOT(pci_dev->devfn),
+ trace_pci_cfg_write(pci_dev->name, pci_dev_bus_num(pci_dev),
+ PCI_SLOT(pci_dev->devfn),
PCI_FUNC(pci_dev->devfn), addr, val);
pci_dev->config_write(pci_dev, addr, val, MIN(len, limit - addr));
}
@@ -97,12 +110,14 @@ uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr,
/* non-zero functions are only exposed when function 0 is present,
* allowing direct removal of unexposed functions.
*/
- if (pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) {
+ if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) ||
+ !pci_dev->has_power || is_pci_dev_ejected(pci_dev)) {
return ~0x0;
}
ret = pci_dev->config_read(pci_dev, addr, MIN(len, limit - addr));
- trace_pci_cfg_read(pci_dev->name, PCI_SLOT(pci_dev->devfn),
+ trace_pci_cfg_read(pci_dev->name, pci_dev_bus_num(pci_dev),
+ PCI_SLOT(pci_dev->devfn),
PCI_FUNC(pci_dev->devfn), addr, ret);
return ret;
@@ -114,6 +129,9 @@ void pci_data_write(PCIBus *s, uint32_t addr, uint32_t val, unsigned len)
uint32_t config_addr = addr & (PCI_CONFIG_SPACE_SIZE - 1);
if (!pci_dev) {
+ trace_pci_cfg_write("empty", extract32(addr, 16, 8),
+ extract32(addr, 11, 5), extract32(addr, 8, 3),
+ config_addr, val);
return;
}
@@ -127,6 +145,9 @@ uint32_t pci_data_read(PCIBus *s, uint32_t addr, unsigned len)
uint32_t config_addr = addr & (PCI_CONFIG_SPACE_SIZE - 1);
if (!pci_dev) {
+ trace_pci_cfg_read("empty", extract32(addr, 16, 8),
+ extract32(addr, 11, 5), extract32(addr, 8, 3),
+ config_addr, ~0x0);
return ~0x0;
}
@@ -139,7 +160,7 @@ static void pci_host_config_write(void *opaque, hwaddr addr,
{
PCIHostState *s = opaque;
- PCI_DPRINTF("%s addr " TARGET_FMT_plx " len %d val %"PRIx64"\n",
+ PCI_DPRINTF("%s addr " HWADDR_FMT_plx " len %d val %"PRIx64"\n",
__func__, addr, len, val);
if (addr != 0 || len != 4) {
return;
@@ -153,7 +174,7 @@ static uint64_t pci_host_config_read(void *opaque, hwaddr addr,
PCIHostState *s = opaque;
uint32_t val = s->config_reg;
- PCI_DPRINTF("%s addr " TARGET_FMT_plx " len %d val %"PRIx32"\n",
+ PCI_DPRINTF("%s addr " HWADDR_FMT_plx " len %d val %"PRIx32"\n",
__func__, addr, len, val);
return val;
}
@@ -213,7 +234,7 @@ const VMStateDescription vmstate_pcihost = {
.needed = pci_host_needed,
.version_id = 1,
.minimum_version_id = 1,
- .fields = (VMStateField[]) {
+ .fields = (const VMStateField[]) {
VMSTATE_UINT32(config_reg, PCIHostState),
VMSTATE_END_OF_LIST()
}
@@ -222,7 +243,7 @@ const VMStateDescription vmstate_pcihost = {
static Property pci_host_properties_common[] = {
DEFINE_PROP_BOOL("x-config-reg-migration-enabled", PCIHostState,
mig_enabled, true),
- DEFINE_PROP_BOOL("bypass-iommu", PCIHostState, bypass_iommu, false),
+ DEFINE_PROP_BOOL(PCI_HOST_BYPASS_IOMMU, PCIHostState, bypass_iommu, false),
DEFINE_PROP_END_OF_LIST(),
};
diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 6e95d82903..4b2f0805c6 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -28,6 +28,7 @@
#include "hw/pci/pcie_regs.h"
#include "hw/pci/pcie_port.h"
#include "qemu/range.h"
+#include "trace.h"
//#define DEBUG_PCIE
#ifdef DEBUG_PCIE
@@ -39,6 +40,28 @@
#define PCIE_DEV_PRINTF(dev, fmt, ...) \
PCIE_DPRINTF("%s:%x "fmt, (dev)->name, (dev)->devfn, ## __VA_ARGS__)
+static bool pcie_sltctl_powered_off(uint16_t sltctl)
+{
+ return (sltctl & PCI_EXP_SLTCTL_PCC) == PCI_EXP_SLTCTL_PWR_OFF
+ && (sltctl & PCI_EXP_SLTCTL_PIC) == PCI_EXP_SLTCTL_PWR_IND_OFF;
+}
+
+static const char *pcie_led_state_to_str(uint16_t value)
+{
+ switch (value) {
+ case PCI_EXP_SLTCTL_PWR_IND_ON:
+ case PCI_EXP_SLTCTL_ATTN_IND_ON:
+ return "on";
+ case PCI_EXP_SLTCTL_PWR_IND_BLINK:
+ case PCI_EXP_SLTCTL_ATTN_IND_BLINK:
+ return "blink";
+ case PCI_EXP_SLTCTL_PWR_IND_OFF:
+ case PCI_EXP_SLTCTL_ATTN_IND_OFF:
+ return "off";
+ default:
+ return "invalid";
+ }
+}
/***************************************************************************
* pci express capability helper functions
@@ -148,6 +171,14 @@ static void pcie_cap_fill_slot_lnk(PCIDevice *dev)
pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP2,
PCI_EXP_LNKCAP2_SLS_16_0GB);
}
+ if (s->speed > QEMU_PCI_EXP_LNK_16GT) {
+ pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP2,
+ PCI_EXP_LNKCAP2_SLS_32_0GB);
+ }
+ if (s->speed > QEMU_PCI_EXP_LNK_32GT) {
+ pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP2,
+ PCI_EXP_LNKCAP2_SLS_64_0GB);
+ }
}
}
@@ -269,6 +300,13 @@ uint8_t pcie_cap_get_type(const PCIDevice *dev)
PCI_EXP_FLAGS_TYPE) >> PCI_EXP_FLAGS_TYPE_SHIFT;
}
+uint8_t pcie_cap_get_version(const PCIDevice *dev)
+{
+ uint32_t pos = dev->exp.exp_cap;
+ assert(pos > 0);
+ return pci_get_word(dev->config + pos + PCI_EXP_FLAGS) & PCI_EXP_FLAGS_VERS;
+}
+
/* MSI/MSI-X */
/* pci express interrupt message number */
/* 7.8.2 PCI Express Capabilities Register: Interrupt Message Number */
@@ -353,7 +391,7 @@ static void hotplug_event_notify(PCIDevice *dev)
msix_notify(dev, pcie_cap_flags_get_vector(dev));
} else if (msi_enabled(dev)) {
msi_notify(dev, pcie_cap_flags_get_vector(dev));
- } else {
+ } else if (pci_intx(dev) != -1) {
pci_set_irq(dev, dev->exp.hpev_notified);
}
}
@@ -361,11 +399,47 @@ static void hotplug_event_notify(PCIDevice *dev)
static void hotplug_event_clear(PCIDevice *dev)
{
hotplug_event_update_event_status(dev);
- if (!msix_enabled(dev) && !msi_enabled(dev) && !dev->exp.hpev_notified) {
+ if (!msix_enabled(dev) && !msi_enabled(dev) && pci_intx(dev) != -1 &&
+ !dev->exp.hpev_notified) {
pci_irq_deassert(dev);
}
}
+void pcie_cap_slot_enable_power(PCIDevice *dev)
+{
+ uint8_t *exp_cap = dev->config + dev->exp.exp_cap;
+ uint32_t sltcap = pci_get_long(exp_cap + PCI_EXP_SLTCAP);
+
+ if (sltcap & PCI_EXP_SLTCAP_PCP) {
+ pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTCTL,
+ PCI_EXP_SLTCTL_PCC);
+ }
+}
+
+static void pcie_set_power_device(PCIBus *bus, PCIDevice *dev, void *opaque)
+{
+ bool *power = opaque;
+
+ pci_set_power(dev, *power);
+}
+
+static void pcie_cap_update_power(PCIDevice *hotplug_dev)
+{
+ uint8_t *exp_cap = hotplug_dev->config + hotplug_dev->exp.exp_cap;
+ PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(hotplug_dev));
+ uint32_t sltcap = pci_get_long(exp_cap + PCI_EXP_SLTCAP);
+ uint16_t sltctl = pci_get_word(exp_cap + PCI_EXP_SLTCTL);
+ bool power = true;
+
+ if (sltcap & PCI_EXP_SLTCAP_PCP) {
+ power = (sltctl & PCI_EXP_SLTCTL_PCC) == PCI_EXP_SLTCTL_PWR_ON;
+ /* Don't we need to check also (sltctl & PCI_EXP_SLTCTL_PIC) ? */
+ }
+
+ pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
+ pcie_set_power_device, &power);
+}
+
/*
* A PCI Express Hot-Plug Event has occurred, so update slot status register
* and notify OS of the event if necessary.
@@ -423,6 +497,11 @@ void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
PCIDevice *pci_dev = PCI_DEVICE(dev);
uint32_t lnkcap = pci_get_long(exp_cap + PCI_EXP_LNKCAP);
+ if (pci_is_vf(pci_dev)) {
+ /* Virtual function cannot be physically disconnected */
+ return;
+ }
+
/* Don't send event when device is enabled during qemu machine creation:
* it is present on boot, no hotplug event is necessary. We do send an
* event when the device is disabled later. */
@@ -434,6 +513,7 @@ void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA,
PCI_EXP_LNKSTA_DLLLA);
}
+ pcie_cap_update_power(hotplug_pdev);
return;
}
@@ -451,6 +531,7 @@ void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
}
pcie_cap_slot_event(hotplug_pdev,
PCI_EXP_HP_EV_PDC | PCI_EXP_HP_EV_ABP);
+ pcie_cap_update_power(hotplug_pdev);
}
}
@@ -472,6 +553,25 @@ static void pcie_unplug_device(PCIBus *bus, PCIDevice *dev, void *opaque)
object_unparent(OBJECT(dev));
}
+static void pcie_cap_slot_do_unplug(PCIDevice *dev)
+{
+ PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev));
+ uint8_t *exp_cap = dev->config + dev->exp.exp_cap;
+ uint32_t lnkcap = pci_get_long(exp_cap + PCI_EXP_LNKCAP);
+
+ pci_for_each_device_under_bus(sec_bus, pcie_unplug_device, NULL);
+
+ pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA,
+ PCI_EXP_SLTSTA_PDS);
+ if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA ||
+ (lnkcap & PCI_EXP_LNKCAP_DLLLARC)) {
+ pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA,
+ PCI_EXP_LNKSTA_DLLLA);
+ }
+ pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA,
+ PCI_EXP_SLTSTA_PDC);
+}
+
void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp)
{
@@ -481,6 +581,7 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev,
PCIDevice *hotplug_pdev = PCI_DEVICE(hotplug_dev);
uint8_t *exp_cap = hotplug_pdev->config + hotplug_pdev->exp.exp_cap;
uint32_t sltcap = pci_get_word(exp_cap + PCI_EXP_SLTCAP);
+ uint16_t sltctl = pci_get_word(exp_cap + PCI_EXP_SLTCTL);
/* Check if hot-unplug is disabled on the slot */
if ((sltcap & PCI_EXP_SLTCAP_HPC) == 0) {
@@ -496,7 +597,15 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev,
return;
}
+ if ((sltctl & PCI_EXP_SLTCTL_PIC) == PCI_EXP_SLTCTL_PWR_IND_BLINK) {
+ error_setg(errp, "Hot-unplug failed: "
+ "guest is busy (power indicator blinking)");
+ return;
+ }
+
dev->pending_deleted_event = true;
+ dev->pending_deleted_expires_ms =
+ qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 5000; /* 5 secs */
/* In case user cancel the operation of multi-function hot-add,
* remove the function that is unexposed to guest individually,
@@ -509,6 +618,15 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev,
return;
}
+ if (pcie_sltctl_powered_off(sltctl)) {
+ /* slot is powered off -> unplug without round-trip to the guest */
+ pcie_cap_slot_do_unplug(hotplug_pdev);
+ hotplug_event_notify(hotplug_pdev);
+ pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA,
+ PCI_EXP_SLTSTA_ABP);
+ return;
+ }
+
pcie_cap_slot_push_attention_button(hotplug_pdev);
}
@@ -531,11 +649,11 @@ void pcie_cap_slot_init(PCIDevice *dev, PCIESlot *s)
PCI_EXP_SLTCAP_ABP);
/*
- * Enable native hot-plug on all hot-plugged bridges unless
- * hot-plug is disabled on the slot.
+ * Expose native hot-plug on all bridges if hot-plug is enabled on the slot.
+ * (unless broken 6.1 ABI is enforced for compat reasons)
*/
if (s->hotplug &&
- (s->native_hotplug || DEVICE(dev)->hotplugged)) {
+ (!s->hide_native_hotplug_cap || DEVICE(dev)->hotplugged)) {
pci_long_test_and_set_mask(dev->config + pos + PCI_EXP_SLTCAP,
PCI_EXP_SLTCAP_HPS |
PCI_EXP_SLTCAP_HPC);
@@ -554,8 +672,8 @@ void pcie_cap_slot_init(PCIDevice *dev, PCIESlot *s)
PCI_EXP_SLTCTL_PIC |
PCI_EXP_SLTCTL_AIC);
pci_word_test_and_set_mask(dev->config + pos + PCI_EXP_SLTCTL,
- PCI_EXP_SLTCTL_PIC_OFF |
- PCI_EXP_SLTCTL_AIC_OFF);
+ PCI_EXP_SLTCTL_PWR_IND_OFF |
+ PCI_EXP_SLTCTL_ATTN_IND_OFF);
pci_word_test_and_set_mask(dev->wmask + pos + PCI_EXP_SLTCTL,
PCI_EXP_SLTCTL_PIC |
PCI_EXP_SLTCTL_AIC |
@@ -574,6 +692,10 @@ void pcie_cap_slot_init(PCIDevice *dev, PCIESlot *s)
pci_word_test_and_set_mask(dev->w1cmask + pos + PCI_EXP_SLTSTA,
PCI_EXP_HP_EV_SUPPORTED);
+ /* Avoid migration abortion when this device hot-removed by guest */
+ pci_word_test_and_clear_mask(dev->cmask + pos + PCI_EXP_SLTSTA,
+ PCI_EXP_SLTSTA_PDS);
+
dev->exp.hpev_notified = false;
qbus_set_hotplug_handler(BUS(pci_bridge_get_sec_bus(PCI_BRIDGE(dev))),
@@ -599,7 +721,8 @@ void pcie_cap_slot_reset(PCIDevice *dev)
PCI_EXP_SLTCTL_PDCE |
PCI_EXP_SLTCTL_ABPE);
pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTCTL,
- PCI_EXP_SLTCTL_AIC_OFF);
+ PCI_EXP_SLTCTL_PWR_IND_OFF |
+ PCI_EXP_SLTCTL_ATTN_IND_OFF);
if (dev->cap_present & QEMU_PCIE_SLTCAP_PCP) {
/* Downstream ports enforce device number 0. */
@@ -614,7 +737,8 @@ void pcie_cap_slot_reset(PCIDevice *dev)
PCI_EXP_SLTCTL_PCC);
}
- pic = populated ? PCI_EXP_SLTCTL_PIC_ON : PCI_EXP_SLTCTL_PIC_OFF;
+ pic = populated ?
+ PCI_EXP_SLTCTL_PWR_IND_ON : PCI_EXP_SLTCTL_PWR_IND_OFF;
pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTCTL, pic);
}
@@ -625,6 +749,7 @@ void pcie_cap_slot_reset(PCIDevice *dev)
PCI_EXP_SLTSTA_PDC |
PCI_EXP_SLTSTA_ABP);
+ pcie_cap_update_power(dev);
hotplug_event_update_event_status(dev);
}
@@ -636,6 +761,28 @@ void pcie_cap_slot_get(PCIDevice *dev, uint16_t *slt_ctl, uint16_t *slt_sta)
*slt_sta = pci_get_word(exp_cap + PCI_EXP_SLTSTA);
}
+static void find_child_fn(PCIBus *bus, PCIDevice *dev, void *opaque)
+{
+ PCIDevice **child = opaque;
+
+ if (!*child) {
+ *child = dev;
+ }
+}
+
+/*
+ * Returns the plugged device or first function of multifunction plugged device
+ */
+static PCIDevice *pcie_cap_slot_find_child(PCIDevice *dev)
+{
+ PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev));
+ PCIDevice *child = NULL;
+
+ pci_for_each_device(sec_bus, pci_bus_num(sec_bus), find_child_fn, &child);
+
+ return child;
+}
+
void pcie_cap_slot_write_config(PCIDevice *dev,
uint16_t old_slt_ctl, uint16_t old_slt_sta,
uint32_t addr, uint32_t val, int len)
@@ -643,7 +790,6 @@ void pcie_cap_slot_write_config(PCIDevice *dev,
uint32_t pos = dev->exp.exp_cap;
uint8_t *exp_cap = dev->config + pos;
uint16_t sltsta = pci_get_word(exp_cap + PCI_EXP_SLTSTA);
- uint32_t lnkcap = pci_get_long(exp_cap + PCI_EXP_LNKCAP);
if (ranges_overlap(addr, len, pos + PCI_EXP_SLTSTA, 2)) {
/*
@@ -681,6 +827,22 @@ void pcie_cap_slot_write_config(PCIDevice *dev,
sltsta);
}
+ if (trace_event_get_state_backends(TRACE_PCIE_CAP_SLOT_WRITE_CONFIG)) {
+ DeviceState *parent = DEVICE(dev);
+ DeviceState *child = DEVICE(pcie_cap_slot_find_child(dev));
+
+ trace_pcie_cap_slot_write_config(
+ parent->canonical_path,
+ child ? child->canonical_path : "no-child",
+ (sltsta & PCI_EXP_SLTSTA_PDS) ? "present" : "not present",
+ pcie_led_state_to_str(old_slt_ctl & PCI_EXP_SLTCTL_PIC),
+ pcie_led_state_to_str(val & PCI_EXP_SLTCTL_PIC),
+ pcie_led_state_to_str(old_slt_ctl & PCI_EXP_SLTCTL_AIC),
+ pcie_led_state_to_str(val & PCI_EXP_SLTCTL_AIC),
+ (old_slt_ctl & PCI_EXP_SLTCTL_PWR_OFF) ? "off" : "on",
+ (val & PCI_EXP_SLTCTL_PWR_OFF) ? "off" : "on");
+ }
+
/*
* If the slot is populated, power indicator is off and power
* controller is off, it is safe to detach the devices.
@@ -689,24 +851,12 @@ void pcie_cap_slot_write_config(PCIDevice *dev,
* this is a work around for guests that overwrite
* control of powered off slots before powering them on.
*/
- if ((sltsta & PCI_EXP_SLTSTA_PDS) && (val & PCI_EXP_SLTCTL_PCC) &&
- (val & PCI_EXP_SLTCTL_PIC_OFF) == PCI_EXP_SLTCTL_PIC_OFF &&
- (!(old_slt_ctl & PCI_EXP_SLTCTL_PCC) ||
- (old_slt_ctl & PCI_EXP_SLTCTL_PIC_OFF) != PCI_EXP_SLTCTL_PIC_OFF)) {
- PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev));
- pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
- pcie_unplug_device, NULL);
-
- pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA,
- PCI_EXP_SLTSTA_PDS);
- if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA ||
- (lnkcap & PCI_EXP_LNKCAP_DLLLARC)) {
- pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA,
- PCI_EXP_LNKSTA_DLLLA);
- }
- pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA,
- PCI_EXP_SLTSTA_PDC);
+ if ((sltsta & PCI_EXP_SLTSTA_PDS) && pcie_sltctl_powered_off(val) &&
+ !pcie_sltctl_powered_off(old_slt_ctl))
+ {
+ pcie_cap_slot_do_unplug(dev);
}
+ pcie_cap_update_power(dev);
hotplug_event_notify(dev);
@@ -733,6 +883,7 @@ int pcie_cap_slot_post_load(void *opaque, int version_id)
{
PCIDevice *dev = opaque;
hotplug_event_update_event_status(dev);
+ pcie_cap_update_power(dev);
return 0;
}
@@ -876,8 +1027,8 @@ void pcie_add_capability(PCIDevice *dev,
uint16_t offset, uint16_t size)
{
assert(offset >= PCI_CONFIG_SPACE_SIZE);
- assert(offset < offset + size);
- assert(offset + size <= PCIE_CONFIG_SPACE_SIZE);
+ assert(offset < (uint16_t)(offset + size));
+ assert((uint16_t)(offset + size) <= PCIE_CONFIG_SPACE_SIZE);
assert(size >= 8);
assert(pci_is_express(dev));
@@ -952,8 +1103,10 @@ void pcie_sync_bridge_lnk(PCIDevice *bridge_dev)
*/
/* ARI */
-void pcie_ari_init(PCIDevice *dev, uint16_t offset, uint16_t nextfn)
+void pcie_ari_init(PCIDevice *dev, uint16_t offset)
{
+ uint16_t nextfn = dev->cap_present & QEMU_PCIE_ARI_NEXTFN_1 ? 1 : 0;
+
pcie_add_capability(dev, PCI_EXT_CAP_ID_ARI, PCI_ARI_VER,
offset, PCI_ARI_SIZEOF);
pci_set_long(dev->config + offset + PCI_ARI_CAP, (nextfn & 0xff) << 8);
diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c
index 27f9cc56af..2c85a78fcd 100644
--- a/hw/pci/pcie_aer.c
+++ b/hw/pci/pcie_aer.c
@@ -19,17 +19,14 @@
*/
#include "qemu/osdep.h"
-#include "sysemu/sysemu.h"
-#include "qapi/qmp/qdict.h"
#include "migration/vmstate.h"
-#include "monitor/monitor.h"
#include "hw/pci/pci_bridge.h"
#include "hw/pci/pcie.h"
#include "hw/pci/msix.h"
#include "hw/pci/msi.h"
#include "hw/pci/pci_bus.h"
#include "hw/pci/pcie_regs.h"
-#include "qapi/error.h"
+#include "pci-internal.h"
//#define DEBUG_PCIE
#ifdef DEBUG_PCIE
@@ -44,13 +41,6 @@
#define PCI_ERR_SRC_COR_OFFS 0
#define PCI_ERR_SRC_UNCOR_OFFS 2
-typedef struct PCIEErrorDetails {
- const char *id;
- const char *root_bus;
- int bus;
- int devfn;
-} PCIEErrorDetails;
-
/* From 6.2.7 Error Listing and Rules. Table 6-2, 6-3 and 6-4 */
static uint32_t pcie_aer_uncor_default_severity(uint32_t status)
{
@@ -123,6 +113,13 @@ int pcie_aer_init(PCIDevice *dev, uint8_t cap_ver, uint16_t offset,
pci_set_long(dev->w1cmask + offset + PCI_ERR_UNCOR_STATUS,
PCI_ERR_UNC_SUPPORTED);
+ if (dev->cap_present & QEMU_PCIE_ERR_UNC_MASK) {
+ pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK,
+ PCI_ERR_UNC_MASK_DEFAULT);
+ pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK,
+ PCI_ERR_UNC_SUPPORTED);
+ }
+
pci_set_long(dev->config + offset + PCI_ERR_UNCOR_SEVER,
PCI_ERR_UNC_SEVERITY_DEFAULT);
pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_SEVER,
@@ -198,8 +195,16 @@ static void pcie_aer_update_uncor_status(PCIDevice *dev)
static bool
pcie_aer_msg_alldev(PCIDevice *dev, const PCIEAERMsg *msg)
{
+ uint16_t devctl = pci_get_word(dev->config + dev->exp.exp_cap +
+ PCI_EXP_DEVCTL);
if (!(pcie_aer_msg_is_uncor(msg) &&
- (pci_get_word(dev->config + PCI_COMMAND) & PCI_COMMAND_SERR))) {
+ (pci_get_word(dev->config + PCI_COMMAND) & PCI_COMMAND_SERR)) &&
+ !((msg->severity == PCI_ERR_ROOT_CMD_NONFATAL_EN) &&
+ (devctl & PCI_EXP_DEVCTL_NFERE)) &&
+ !((msg->severity == PCI_ERR_ROOT_CMD_COR_EN) &&
+ (devctl & PCI_EXP_DEVCTL_CERE)) &&
+ !((msg->severity == PCI_ERR_ROOT_CMD_FATAL_EN) &&
+ (devctl & PCI_EXP_DEVCTL_FERE))) {
return false;
}
@@ -290,7 +295,7 @@ static void pcie_aer_root_notify(PCIDevice *dev)
msix_notify(dev, pcie_aer_root_get_vector(dev));
} else if (msi_enabled(dev)) {
msi_notify(dev, pcie_aer_root_get_vector(dev));
- } else {
+ } else if (pci_intx(dev) != -1) {
pci_irq_assert(dev);
}
}
@@ -319,11 +324,11 @@ static void pcie_aer_msg_root_port(PCIDevice *dev, const PCIEAERMsg *msg)
* it isn't implemented in qemu right now.
* So just discard the error for now.
* OS which cares of aer would receive errors via
- * native aer mechanims, so this wouldn't matter.
+ * native aer mechanisms, so this wouldn't matter.
*/
}
- /* Errro Message Received: Root Error Status register */
+ /* Error Message Received: Root Error Status register */
switch (msg->severity) {
case PCI_ERR_ROOT_CMD_COR_EN:
if (root_status & PCI_ERR_ROOT_COR_RCV) {
@@ -631,7 +636,7 @@ static bool pcie_aer_inject_uncor_error(PCIEAERInject *inj, bool is_fatal)
* Figure 6-2: Flowchart Showing Sequence of Device Error Signaling and Logging
* Operations
*/
-static int pcie_aer_inject_error(PCIDevice *dev, const PCIEAERErr *err)
+int pcie_aer_inject_error(PCIDevice *dev, const PCIEAERErr *err)
{
uint8_t *aer_cap = NULL;
uint16_t devctl = 0;
@@ -774,7 +779,9 @@ void pcie_aer_root_write_config(PCIDevice *dev,
uint32_t root_cmd = pci_get_long(aer_cap + PCI_ERR_ROOT_COMMAND);
/* 6.2.4.1.2 Interrupt Generation */
if (!msix_enabled(dev) && !msi_enabled(dev)) {
- pci_set_irq(dev, !!(root_cmd & enabled_cmd));
+ if (pci_intx(dev) != -1) {
+ pci_set_irq(dev, !!(root_cmd & enabled_cmd));
+ }
return;
}
@@ -790,7 +797,7 @@ static const VMStateDescription vmstate_pcie_aer_err = {
.name = "PCIE_AER_ERROR",
.version_id = 1,
.minimum_version_id = 1,
- .fields = (VMStateField[]) {
+ .fields = (const VMStateField[]) {
VMSTATE_UINT32(status, PCIEAERErr),
VMSTATE_UINT16(source_id, PCIEAERErr),
VMSTATE_UINT16(flags, PCIEAERErr),
@@ -811,7 +818,7 @@ const VMStateDescription vmstate_pcie_aer_log = {
.name = "PCIE_AER_ERROR_LOG",
.version_id = 1,
.minimum_version_id = 1,
- .fields = (VMStateField[]) {
+ .fields = (const VMStateField[]) {
VMSTATE_UINT16(log_num, PCIEAERLog),
VMSTATE_UINT16_EQUAL(log_max, PCIEAERLog, NULL),
VMSTATE_VALIDATE("log_num <= log_max", pcie_aer_state_log_num_valid),
@@ -931,8 +938,8 @@ static const struct PCIEAERErrorName pcie_aer_error_list[] = {
},
};
-static int pcie_aer_parse_error_string(const char *error_name,
- uint32_t *status, bool *correctable)
+int pcie_aer_parse_error_string(const char *error_name,
+ uint32_t *status, bool *correctable)
{
int i;
@@ -948,98 +955,3 @@ static int pcie_aer_parse_error_string(const char *error_name,
}
return -EINVAL;
}
-
-/*
- * Inject an error described by @qdict.
- * On success, set @details to show where error was sent.
- * Return negative errno if injection failed and a message was emitted.
- */
-static int do_pcie_aer_inject_error(Monitor *mon,
- const QDict *qdict,
- PCIEErrorDetails *details)
-{
- const char *id = qdict_get_str(qdict, "id");
- const char *error_name;
- uint32_t error_status;
- bool correctable;
- PCIDevice *dev;
- PCIEAERErr err;
- int ret;
-
- ret = pci_qdev_find_device(id, &dev);
- if (ret < 0) {
- monitor_printf(mon,
- "id or pci device path is invalid or device not "
- "found. %s\n", id);
- return ret;
- }
- if (!pci_is_express(dev)) {
- monitor_printf(mon, "the device doesn't support pci express. %s\n",
- id);
- return -ENOSYS;
- }
-
- error_name = qdict_get_str(qdict, "error_status");
- if (pcie_aer_parse_error_string(error_name, &error_status, &correctable)) {
- char *e = NULL;
- error_status = strtoul(error_name, &e, 0);
- correctable = qdict_get_try_bool(qdict, "correctable", false);
- if (!e || *e != '\0') {
- monitor_printf(mon, "invalid error status value. \"%s\"",
- error_name);
- return -EINVAL;
- }
- }
- err.status = error_status;
- err.source_id = pci_requester_id(dev);
-
- err.flags = 0;
- if (correctable) {
- err.flags |= PCIE_AER_ERR_IS_CORRECTABLE;
- }
- if (qdict_get_try_bool(qdict, "advisory_non_fatal", false)) {
- err.flags |= PCIE_AER_ERR_MAYBE_ADVISORY;
- }
- if (qdict_haskey(qdict, "header0")) {
- err.flags |= PCIE_AER_ERR_HEADER_VALID;
- }
- if (qdict_haskey(qdict, "prefix0")) {
- err.flags |= PCIE_AER_ERR_TLP_PREFIX_PRESENT;
- }
-
- err.header[0] = qdict_get_try_int(qdict, "header0", 0);
- err.header[1] = qdict_get_try_int(qdict, "header1", 0);
- err.header[2] = qdict_get_try_int(qdict, "header2", 0);
- err.header[3] = qdict_get_try_int(qdict, "header3", 0);
-
- err.prefix[0] = qdict_get_try_int(qdict, "prefix0", 0);
- err.prefix[1] = qdict_get_try_int(qdict, "prefix1", 0);
- err.prefix[2] = qdict_get_try_int(qdict, "prefix2", 0);
- err.prefix[3] = qdict_get_try_int(qdict, "prefix3", 0);
-
- ret = pcie_aer_inject_error(dev, &err);
- if (ret < 0) {
- monitor_printf(mon, "failed to inject error: %s\n",
- strerror(-ret));
- return ret;
- }
- details->id = id;
- details->root_bus = pci_root_bus_path(dev);
- details->bus = pci_dev_bus_num(dev);
- details->devfn = dev->devfn;
-
- return 0;
-}
-
-void hmp_pcie_aer_inject_error(Monitor *mon, const QDict *qdict)
-{
- PCIEErrorDetails data;
-
- if (do_pcie_aer_inject_error(mon, qdict, &data) < 0) {
- return;
- }
-
- monitor_printf(mon, "OK id: %s root bus: %s, bus: %x devfn: %x.%x\n",
- data.id, data.root_bus, data.bus,
- PCI_SLOT(data.devfn), PCI_FUNC(data.devfn));
-}
diff --git a/hw/pci/pcie_doe.c b/hw/pci/pcie_doe.c
new file mode 100644
index 0000000000..2210f86968
--- /dev/null
+++ b/hw/pci/pcie_doe.c
@@ -0,0 +1,367 @@
+/*
+ * PCIe Data Object Exchange
+ *
+ * Copyright (C) 2021 Avery Design Systems, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "qemu/range.h"
+#include "hw/pci/pci.h"
+#include "hw/pci/pcie.h"
+#include "hw/pci/pcie_doe.h"
+#include "hw/pci/msi.h"
+#include "hw/pci/msix.h"
+
+#define DWORD_BYTE 4
+
+typedef struct DoeDiscoveryReq {
+ DOEHeader header;
+ uint8_t index;
+ uint8_t reserved[3];
+} QEMU_PACKED DoeDiscoveryReq;
+
+typedef struct DoeDiscoveryRsp {
+ DOEHeader header;
+ uint16_t vendor_id;
+ uint8_t data_obj_type;
+ uint8_t next_index;
+} QEMU_PACKED DoeDiscoveryRsp;
+
+static bool pcie_doe_discovery(DOECap *doe_cap)
+{
+ DoeDiscoveryReq *req = pcie_doe_get_write_mbox_ptr(doe_cap);
+ DoeDiscoveryRsp rsp;
+ uint8_t index = req->index;
+ DOEProtocol *prot;
+
+ /* Discard request if length does not match DoeDiscoveryReq */
+ if (pcie_doe_get_obj_len(req) <
+ DIV_ROUND_UP(sizeof(DoeDiscoveryReq), DWORD_BYTE)) {
+ return false;
+ }
+
+ rsp.header = (DOEHeader) {
+ .vendor_id = PCI_VENDOR_ID_PCI_SIG,
+ .data_obj_type = PCI_SIG_DOE_DISCOVERY,
+ .length = DIV_ROUND_UP(sizeof(DoeDiscoveryRsp), DWORD_BYTE),
+ };
+
+ /* Point to the requested protocol, index 0 must be Discovery */
+ if (index == 0) {
+ rsp.vendor_id = PCI_VENDOR_ID_PCI_SIG;
+ rsp.data_obj_type = PCI_SIG_DOE_DISCOVERY;
+ } else {
+ if (index < doe_cap->protocol_num) {
+ prot = &doe_cap->protocols[index - 1];
+ rsp.vendor_id = prot->vendor_id;
+ rsp.data_obj_type = prot->data_obj_type;
+ } else {
+ rsp.vendor_id = 0xFFFF;
+ rsp.data_obj_type = 0xFF;
+ }
+ }
+
+ if (index + 1 == doe_cap->protocol_num) {
+ rsp.next_index = 0;
+ } else {
+ rsp.next_index = index + 1;
+ }
+
+ pcie_doe_set_rsp(doe_cap, &rsp);
+
+ return true;
+}
+
+static void pcie_doe_reset_mbox(DOECap *st)
+{
+ st->read_mbox_idx = 0;
+ st->read_mbox_len = 0;
+ st->write_mbox_len = 0;
+
+ memset(st->read_mbox, 0, PCI_DOE_DW_SIZE_MAX * DWORD_BYTE);
+ memset(st->write_mbox, 0, PCI_DOE_DW_SIZE_MAX * DWORD_BYTE);
+}
+
+void pcie_doe_init(PCIDevice *dev, DOECap *doe_cap, uint16_t offset,
+ DOEProtocol *protocols, bool intr, uint16_t vec)
+{
+ pcie_add_capability(dev, PCI_EXT_CAP_ID_DOE, 0x1, offset,
+ PCI_DOE_SIZEOF);
+
+ doe_cap->pdev = dev;
+ doe_cap->offset = offset;
+
+ if (intr && (msi_present(dev) || msix_present(dev))) {
+ doe_cap->cap.intr = intr;
+ doe_cap->cap.vec = vec;
+ }
+
+ doe_cap->write_mbox = g_malloc0(PCI_DOE_DW_SIZE_MAX * DWORD_BYTE);
+ doe_cap->read_mbox = g_malloc0(PCI_DOE_DW_SIZE_MAX * DWORD_BYTE);
+
+ pcie_doe_reset_mbox(doe_cap);
+
+ doe_cap->protocols = protocols;
+ for (; protocols->vendor_id; protocols++) {
+ doe_cap->protocol_num++;
+ }
+ assert(doe_cap->protocol_num < PCI_DOE_PROTOCOL_NUM_MAX);
+
+ /* Increment to allow for the discovery protocol */
+ doe_cap->protocol_num++;
+}
+
+void pcie_doe_fini(DOECap *doe_cap)
+{
+ g_free(doe_cap->read_mbox);
+ g_free(doe_cap->write_mbox);
+ g_free(doe_cap);
+}
+
+uint32_t pcie_doe_build_protocol(DOEProtocol *p)
+{
+ return DATA_OBJ_BUILD_HEADER1(p->vendor_id, p->data_obj_type);
+}
+
+void *pcie_doe_get_write_mbox_ptr(DOECap *doe_cap)
+{
+ return doe_cap->write_mbox;
+}
+
+/*
+ * Copy the response to read mailbox buffer
+ * This might be called in self-defined handle_request() if a DOE response is
+ * required in the corresponding protocol
+ */
+void pcie_doe_set_rsp(DOECap *doe_cap, void *rsp)
+{
+ uint32_t len = pcie_doe_get_obj_len(rsp);
+
+ memcpy(doe_cap->read_mbox + doe_cap->read_mbox_len, rsp, len * DWORD_BYTE);
+ doe_cap->read_mbox_len += len;
+}
+
+uint32_t pcie_doe_get_obj_len(void *obj)
+{
+ uint32_t len;
+
+ if (!obj) {
+ return 0;
+ }
+
+ /* Only lower 18 bits are valid */
+ len = DATA_OBJ_LEN_MASK(((DOEHeader *)obj)->length);
+
+ /* PCIe r6.0 Table 6.29: a value of 00000h indicates 2^18 DW */
+ return (len) ? len : PCI_DOE_DW_SIZE_MAX;
+}
+
+static void pcie_doe_irq_assert(DOECap *doe_cap)
+{
+ PCIDevice *dev = doe_cap->pdev;
+
+ if (doe_cap->cap.intr && doe_cap->ctrl.intr) {
+ if (doe_cap->status.intr) {
+ return;
+ }
+ doe_cap->status.intr = 1;
+
+ if (msix_enabled(dev)) {
+ msix_notify(dev, doe_cap->cap.vec);
+ } else if (msi_enabled(dev)) {
+ msi_notify(dev, doe_cap->cap.vec);
+ }
+ }
+}
+
+static void pcie_doe_set_ready(DOECap *doe_cap, bool rdy)
+{
+ doe_cap->status.ready = rdy;
+
+ if (rdy) {
+ pcie_doe_irq_assert(doe_cap);
+ }
+}
+
+static void pcie_doe_set_error(DOECap *doe_cap, bool err)
+{
+ doe_cap->status.error = err;
+
+ if (err) {
+ pcie_doe_irq_assert(doe_cap);
+ }
+}
+
+/*
+ * Check incoming request in write_mbox for protocol format
+ */
+static void pcie_doe_prepare_rsp(DOECap *doe_cap)
+{
+ bool success = false;
+ int p;
+ bool (*handle_request)(DOECap *) = NULL;
+
+ if (doe_cap->status.error) {
+ return;
+ }
+
+ if (doe_cap->write_mbox[0] ==
+ DATA_OBJ_BUILD_HEADER1(PCI_VENDOR_ID_PCI_SIG, PCI_SIG_DOE_DISCOVERY)) {
+ handle_request = pcie_doe_discovery;
+ } else {
+ for (p = 0; p < doe_cap->protocol_num - 1; p++) {
+ if (doe_cap->write_mbox[0] ==
+ pcie_doe_build_protocol(&doe_cap->protocols[p])) {
+ handle_request = doe_cap->protocols[p].handle_request;
+ break;
+ }
+ }
+ }
+
+ /*
+ * PCIe r6 DOE 6.30.1:
+ * If the number of DW transferred does not match the
+ * indicated Length for a data object, then the
+ * data object must be silently discarded.
+ */
+ if (handle_request && (doe_cap->write_mbox_len ==
+ pcie_doe_get_obj_len(pcie_doe_get_write_mbox_ptr(doe_cap)))) {
+ success = handle_request(doe_cap);
+ }
+
+ if (success) {
+ pcie_doe_set_ready(doe_cap, 1);
+ } else {
+ pcie_doe_reset_mbox(doe_cap);
+ }
+}
+
+/*
+ * Read from DOE config space.
+ * Return false if the address not within DOE_CAP range.
+ */
+bool pcie_doe_read_config(DOECap *doe_cap, uint32_t addr, int size,
+ uint32_t *buf)
+{
+ uint32_t shift;
+ uint16_t doe_offset = doe_cap->offset;
+
+ if (!range_covers_byte(doe_offset + PCI_EXP_DOE_CAP,
+ PCI_DOE_SIZEOF - 4, addr)) {
+ return false;
+ }
+
+ addr -= doe_offset;
+ *buf = 0;
+
+ if (range_covers_byte(PCI_EXP_DOE_CAP, DWORD_BYTE, addr)) {
+ *buf = FIELD_DP32(*buf, PCI_DOE_CAP_REG, INTR_SUPP,
+ doe_cap->cap.intr);
+ *buf = FIELD_DP32(*buf, PCI_DOE_CAP_REG, DOE_INTR_MSG_NUM,
+ doe_cap->cap.vec);
+ } else if (range_covers_byte(PCI_EXP_DOE_CTRL, DWORD_BYTE, addr)) {
+ /* Must return ABORT=0 and GO=0 */
+ *buf = FIELD_DP32(*buf, PCI_DOE_CAP_CONTROL, DOE_INTR_EN,
+ doe_cap->ctrl.intr);
+ } else if (range_covers_byte(PCI_EXP_DOE_STATUS, DWORD_BYTE, addr)) {
+ *buf = FIELD_DP32(*buf, PCI_DOE_CAP_STATUS, DOE_BUSY,
+ doe_cap->status.busy);
+ *buf = FIELD_DP32(*buf, PCI_DOE_CAP_STATUS, DOE_INTR_STATUS,
+ doe_cap->status.intr);
+ *buf = FIELD_DP32(*buf, PCI_DOE_CAP_STATUS, DOE_ERROR,
+ doe_cap->status.error);
+ *buf = FIELD_DP32(*buf, PCI_DOE_CAP_STATUS, DATA_OBJ_RDY,
+ doe_cap->status.ready);
+ /* Mailbox should be DW accessed */
+ } else if (addr == PCI_EXP_DOE_RD_DATA_MBOX && size == DWORD_BYTE) {
+ if (doe_cap->status.ready && !doe_cap->status.error) {
+ *buf = doe_cap->read_mbox[doe_cap->read_mbox_idx];
+ }
+ }
+
+ /* Process Alignment */
+ shift = addr % DWORD_BYTE;
+ *buf = extract32(*buf, shift * 8, size * 8);
+
+ return true;
+}
+
+/*
+ * Write to DOE config space.
+ * Return if the address not within DOE_CAP range or receives an abort
+ */
+void pcie_doe_write_config(DOECap *doe_cap,
+ uint32_t addr, uint32_t val, int size)
+{
+ uint16_t doe_offset = doe_cap->offset;
+ uint32_t shift;
+
+ if (!range_covers_byte(doe_offset + PCI_EXP_DOE_CAP,
+ PCI_DOE_SIZEOF - 4, addr)) {
+ return;
+ }
+
+ /* Process Alignment */
+ shift = addr % DWORD_BYTE;
+ addr -= (doe_offset + shift);
+ val = deposit32(val, shift * 8, size * 8, val);
+
+ switch (addr) {
+ case PCI_EXP_DOE_CTRL:
+ if (FIELD_EX32(val, PCI_DOE_CAP_CONTROL, DOE_ABORT)) {
+ pcie_doe_set_ready(doe_cap, 0);
+ pcie_doe_set_error(doe_cap, 0);
+ pcie_doe_reset_mbox(doe_cap);
+ return;
+ }
+
+ if (FIELD_EX32(val, PCI_DOE_CAP_CONTROL, DOE_GO)) {
+ pcie_doe_prepare_rsp(doe_cap);
+ }
+
+ if (FIELD_EX32(val, PCI_DOE_CAP_CONTROL, DOE_INTR_EN)) {
+ doe_cap->ctrl.intr = 1;
+ /* Clear interrupt bit located within the first byte */
+ } else if (shift == 0) {
+ doe_cap->ctrl.intr = 0;
+ }
+ break;
+ case PCI_EXP_DOE_STATUS:
+ if (FIELD_EX32(val, PCI_DOE_CAP_STATUS, DOE_INTR_STATUS)) {
+ doe_cap->status.intr = 0;
+ }
+ break;
+ case PCI_EXP_DOE_RD_DATA_MBOX:
+ /* Mailbox should be DW accessed */
+ if (size != DWORD_BYTE) {
+ return;
+ }
+ doe_cap->read_mbox_idx++;
+ if (doe_cap->read_mbox_idx == doe_cap->read_mbox_len) {
+ pcie_doe_reset_mbox(doe_cap);
+ pcie_doe_set_ready(doe_cap, 0);
+ } else if (doe_cap->read_mbox_idx > doe_cap->read_mbox_len) {
+ /* Underflow */
+ pcie_doe_set_error(doe_cap, 1);
+ }
+ break;
+ case PCI_EXP_DOE_WR_DATA_MBOX:
+ /* Mailbox should be DW accessed */
+ if (size != DWORD_BYTE) {
+ return;
+ }
+ doe_cap->write_mbox[doe_cap->write_mbox_len] = val;
+ doe_cap->write_mbox_len++;
+ break;
+ case PCI_EXP_DOE_CAP:
+ /* fallthrough */
+ default:
+ break;
+ }
+}
diff --git a/hw/pci/pcie_host.c b/hw/pci/pcie_host.c
index 5abbe83220..3717e1a086 100644
--- a/hw/pci/pcie_host.c
+++ b/hw/pci/pcie_host.c
@@ -20,7 +20,7 @@
*/
#include "qemu/osdep.h"
-#include "hw/pci/pci.h"
+#include "hw/pci/pci_device.h"
#include "hw/pci/pcie_host.h"
#include "qemu/module.h"
diff --git a/hw/pci/pcie_port.c b/hw/pci/pcie_port.c
index da850e8dde..20ff2b39e8 100644
--- a/hw/pci/pcie_port.c
+++ b/hw/pci/pcie_port.c
@@ -136,6 +136,76 @@ static void pcie_port_class_init(ObjectClass *oc, void *data)
device_class_set_props(dc, pcie_port_props);
}
+PCIDevice *pcie_find_port_by_pn(PCIBus *bus, uint8_t pn)
+{
+ int devfn;
+
+ for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) {
+ PCIDevice *d = bus->devices[devfn];
+ PCIEPort *port;
+
+ if (!d || !pci_is_express(d) || !d->exp.exp_cap) {
+ continue;
+ }
+
+ if (!object_dynamic_cast(OBJECT(d), TYPE_PCIE_PORT)) {
+ continue;
+ }
+
+ port = PCIE_PORT(d);
+ if (port->port == pn) {
+ return d;
+ }
+ }
+
+ return NULL;
+}
+
+/* Find first port in devfn number order */
+PCIDevice *pcie_find_port_first(PCIBus *bus)
+{
+ int devfn;
+
+ for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) {
+ PCIDevice *d = bus->devices[devfn];
+
+ if (!d || !pci_is_express(d) || !d->exp.exp_cap) {
+ continue;
+ }
+
+ if (object_dynamic_cast(OBJECT(d), TYPE_PCIE_PORT)) {
+ return d;
+ }
+ }
+
+ return NULL;
+}
+
+int pcie_count_ds_ports(PCIBus *bus)
+{
+ int dsp_count = 0;
+ int devfn;
+
+ for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) {
+ PCIDevice *d = bus->devices[devfn];
+
+ if (!d || !pci_is_express(d) || !d->exp.exp_cap) {
+ continue;
+ }
+ if (object_dynamic_cast(OBJECT(d), TYPE_PCIE_PORT)) {
+ dsp_count++;
+ }
+ }
+ return dsp_count;
+}
+
+static bool pcie_slot_is_hotpluggbale_bus(HotplugHandler *plug_handler,
+ BusState *bus)
+{
+ PCIESlot *s = PCIE_SLOT(bus->parent);
+ return s->hotplug;
+}
+
static const TypeInfo pcie_port_type_info = {
.name = TYPE_PCIE_PORT,
.parent = TYPE_PCI_BRIDGE,
@@ -148,7 +218,8 @@ static Property pcie_slot_props[] = {
DEFINE_PROP_UINT8("chassis", PCIESlot, chassis, 0),
DEFINE_PROP_UINT16("slot", PCIESlot, slot, 0),
DEFINE_PROP_BOOL("hotplug", PCIESlot, hotplug, true),
- DEFINE_PROP_BOOL("native-hotplug", PCIESlot, native_hotplug, true),
+ DEFINE_PROP_BOOL("x-do-not-expose-native-hotplug-cap", PCIESlot,
+ hide_native_hotplug_cap, false),
DEFINE_PROP_END_OF_LIST()
};
@@ -162,6 +233,7 @@ static void pcie_slot_class_init(ObjectClass *oc, void *data)
hc->plug = pcie_cap_slot_plug_cb;
hc->unplug = pcie_cap_slot_unplug_cb;
hc->unplug_request = pcie_cap_slot_unplug_request_cb;
+ hc->is_hotpluggable_bus = pcie_slot_is_hotpluggbale_bus;
}
static const TypeInfo pcie_slot_type_info = {
diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c
new file mode 100644
index 0000000000..e9b23221d7
--- /dev/null
+++ b/hw/pci/pcie_sriov.c
@@ -0,0 +1,318 @@
+/*
+ * pcie_sriov.c:
+ *
+ * Implementation of SR/IOV emulation support.
+ *
+ * Copyright (c) 2015-2017 Knut Omang <knut.omang@oracle.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "hw/pci/pci_device.h"
+#include "hw/pci/pcie.h"
+#include "hw/pci/pci_bus.h"
+#include "hw/qdev-properties.h"
+#include "qemu/error-report.h"
+#include "qemu/range.h"
+#include "qapi/error.h"
+#include "trace.h"
+
+static PCIDevice *register_vf(PCIDevice *pf, int devfn,
+ const char *name, uint16_t vf_num);
+static void unregister_vfs(PCIDevice *dev);
+
+void pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
+ const char *vfname, uint16_t vf_dev_id,
+ uint16_t init_vfs, uint16_t total_vfs,
+ uint16_t vf_offset, uint16_t vf_stride)
+{
+ uint8_t *cfg = dev->config + offset;
+ uint8_t *wmask;
+
+ pcie_add_capability(dev, PCI_EXT_CAP_ID_SRIOV, 1,
+ offset, PCI_EXT_CAP_SRIOV_SIZEOF);
+ dev->exp.sriov_cap = offset;
+ dev->exp.sriov_pf.num_vfs = 0;
+ dev->exp.sriov_pf.vfname = g_strdup(vfname);
+ dev->exp.sriov_pf.vf = NULL;
+
+ pci_set_word(cfg + PCI_SRIOV_VF_OFFSET, vf_offset);
+ pci_set_word(cfg + PCI_SRIOV_VF_STRIDE, vf_stride);
+
+ /*
+ * Mandatory page sizes to support.
+ * Device implementations can call pcie_sriov_pf_add_sup_pgsize()
+ * to set more bits:
+ */
+ pci_set_word(cfg + PCI_SRIOV_SUP_PGSIZE, SRIOV_SUP_PGSIZE_MINREQ);
+
+ /*
+ * Default is to use 4K pages, software can modify it
+ * to any of the supported bits
+ */
+ pci_set_word(cfg + PCI_SRIOV_SYS_PGSIZE, 0x1);
+
+ /* Set up device ID and initial/total number of VFs available */
+ pci_set_word(cfg + PCI_SRIOV_VF_DID, vf_dev_id);
+ pci_set_word(cfg + PCI_SRIOV_INITIAL_VF, init_vfs);
+ pci_set_word(cfg + PCI_SRIOV_TOTAL_VF, total_vfs);
+ pci_set_word(cfg + PCI_SRIOV_NUM_VF, 0);
+
+ /* Write enable control bits */
+ wmask = dev->wmask + offset;
+ pci_set_word(wmask + PCI_SRIOV_CTRL,
+ PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE | PCI_SRIOV_CTRL_ARI);
+ pci_set_word(wmask + PCI_SRIOV_NUM_VF, 0xffff);
+ pci_set_word(wmask + PCI_SRIOV_SYS_PGSIZE, 0x553);
+
+ qdev_prop_set_bit(&dev->qdev, "multifunction", true);
+}
+
+void pcie_sriov_pf_exit(PCIDevice *dev)
+{
+ unregister_vfs(dev);
+ g_free((char *)dev->exp.sriov_pf.vfname);
+ dev->exp.sriov_pf.vfname = NULL;
+}
+
+void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num,
+ uint8_t type, dma_addr_t size)
+{
+ uint32_t addr;
+ uint64_t wmask;
+ uint16_t sriov_cap = dev->exp.sriov_cap;
+
+ assert(sriov_cap > 0);
+ assert(region_num >= 0);
+ assert(region_num < PCI_NUM_REGIONS);
+ assert(region_num != PCI_ROM_SLOT);
+
+ wmask = ~(size - 1);
+ addr = sriov_cap + PCI_SRIOV_BAR + region_num * 4;
+
+ pci_set_long(dev->config + addr, type);
+ if (!(type & PCI_BASE_ADDRESS_SPACE_IO) &&
+ type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
+ pci_set_quad(dev->wmask + addr, wmask);
+ pci_set_quad(dev->cmask + addr, ~0ULL);
+ } else {
+ pci_set_long(dev->wmask + addr, wmask & 0xffffffff);
+ pci_set_long(dev->cmask + addr, 0xffffffff);
+ }
+ dev->exp.sriov_pf.vf_bar_type[region_num] = type;
+}
+
+void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num,
+ MemoryRegion *memory)
+{
+ PCIIORegion *r;
+ PCIBus *bus = pci_get_bus(dev);
+ uint8_t type;
+ pcibus_t size = memory_region_size(memory);
+
+ assert(pci_is_vf(dev)); /* PFs must use pci_register_bar */
+ assert(region_num >= 0);
+ assert(region_num < PCI_NUM_REGIONS);
+ type = dev->exp.sriov_vf.pf->exp.sriov_pf.vf_bar_type[region_num];
+
+ if (!is_power_of_2(size)) {
+ error_report("%s: PCI region size must be a power"
+ " of two - type=0x%x, size=0x%"FMT_PCIBUS,
+ __func__, type, size);
+ exit(1);
+ }
+
+ r = &dev->io_regions[region_num];
+ r->memory = memory;
+ r->address_space =
+ type & PCI_BASE_ADDRESS_SPACE_IO
+ ? bus->address_space_io
+ : bus->address_space_mem;
+ r->size = size;
+ r->type = type;
+
+ r->addr = pci_bar_address(dev, region_num, r->type, r->size);
+ if (r->addr != PCI_BAR_UNMAPPED) {
+ memory_region_add_subregion_overlap(r->address_space,
+ r->addr, r->memory, 1);
+ }
+}
+
+static PCIDevice *register_vf(PCIDevice *pf, int devfn, const char *name,
+ uint16_t vf_num)
+{
+ PCIDevice *dev = pci_new(devfn, name);
+ dev->exp.sriov_vf.pf = pf;
+ dev->exp.sriov_vf.vf_number = vf_num;
+ PCIBus *bus = pci_get_bus(pf);
+ Error *local_err = NULL;
+
+ qdev_realize(&dev->qdev, &bus->qbus, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ return NULL;
+ }
+
+ /* set vid/did according to sr/iov spec - they are not used */
+ pci_config_set_vendor_id(dev->config, 0xffff);
+ pci_config_set_device_id(dev->config, 0xffff);
+
+ return dev;
+}
+
+static void register_vfs(PCIDevice *dev)
+{
+ uint16_t num_vfs;
+ uint16_t i;
+ uint16_t sriov_cap = dev->exp.sriov_cap;
+ uint16_t vf_offset =
+ pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_OFFSET);
+ uint16_t vf_stride =
+ pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_STRIDE);
+ int32_t devfn = dev->devfn + vf_offset;
+
+ assert(sriov_cap > 0);
+ num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF);
+ if (num_vfs > pci_get_word(dev->config + sriov_cap + PCI_SRIOV_TOTAL_VF)) {
+ return;
+ }
+
+ dev->exp.sriov_pf.vf = g_new(PCIDevice *, num_vfs);
+
+ trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), num_vfs);
+ for (i = 0; i < num_vfs; i++) {
+ dev->exp.sriov_pf.vf[i] = register_vf(dev, devfn,
+ dev->exp.sriov_pf.vfname, i);
+ if (!dev->exp.sriov_pf.vf[i]) {
+ num_vfs = i;
+ break;
+ }
+ devfn += vf_stride;
+ }
+ dev->exp.sriov_pf.num_vfs = num_vfs;
+}
+
+static void unregister_vfs(PCIDevice *dev)
+{
+ uint16_t num_vfs = dev->exp.sriov_pf.num_vfs;
+ uint16_t i;
+
+ trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), num_vfs);
+ for (i = 0; i < num_vfs; i++) {
+ Error *err = NULL;
+ PCIDevice *vf = dev->exp.sriov_pf.vf[i];
+ if (!object_property_set_bool(OBJECT(vf), "realized", false, &err)) {
+ error_reportf_err(err, "Failed to unplug: ");
+ }
+ object_unparent(OBJECT(vf));
+ object_unref(OBJECT(vf));
+ }
+ g_free(dev->exp.sriov_pf.vf);
+ dev->exp.sriov_pf.vf = NULL;
+ dev->exp.sriov_pf.num_vfs = 0;
+}
+
+void pcie_sriov_config_write(PCIDevice *dev, uint32_t address,
+ uint32_t val, int len)
+{
+ uint32_t off;
+ uint16_t sriov_cap = dev->exp.sriov_cap;
+
+ if (!sriov_cap || address < sriov_cap) {
+ return;
+ }
+ off = address - sriov_cap;
+ if (off >= PCI_EXT_CAP_SRIOV_SIZEOF) {
+ return;
+ }
+
+ trace_sriov_config_write(dev->name, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), off, val, len);
+
+ if (range_covers_byte(off, len, PCI_SRIOV_CTRL)) {
+ if (dev->exp.sriov_pf.num_vfs) {
+ if (!(val & PCI_SRIOV_CTRL_VFE)) {
+ unregister_vfs(dev);
+ }
+ } else {
+ if (val & PCI_SRIOV_CTRL_VFE) {
+ register_vfs(dev);
+ }
+ }
+ }
+}
+
+
+/* Reset SR/IOV */
+void pcie_sriov_pf_reset(PCIDevice *dev)
+{
+ uint16_t sriov_cap = dev->exp.sriov_cap;
+ if (!sriov_cap) {
+ return;
+ }
+
+ pci_set_word(dev->config + sriov_cap + PCI_SRIOV_CTRL, 0);
+ unregister_vfs(dev);
+
+ pci_set_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF, 0);
+
+ /*
+ * Default is to use 4K pages, software can modify it
+ * to any of the supported bits
+ */
+ pci_set_word(dev->config + sriov_cap + PCI_SRIOV_SYS_PGSIZE, 0x1);
+
+ for (uint16_t i = 0; i < PCI_NUM_REGIONS; i++) {
+ pci_set_quad(dev->config + sriov_cap + PCI_SRIOV_BAR + i * 4,
+ dev->exp.sriov_pf.vf_bar_type[i]);
+ }
+}
+
+/* Add optional supported page sizes to the mask of supported page sizes */
+void pcie_sriov_pf_add_sup_pgsize(PCIDevice *dev, uint16_t opt_sup_pgsize)
+{
+ uint8_t *cfg = dev->config + dev->exp.sriov_cap;
+ uint8_t *wmask = dev->wmask + dev->exp.sriov_cap;
+
+ uint16_t sup_pgsize = pci_get_word(cfg + PCI_SRIOV_SUP_PGSIZE);
+
+ sup_pgsize |= opt_sup_pgsize;
+
+ /*
+ * Make sure the new bits are set, and that system page size
+ * also can be set to any of the new values according to spec:
+ */
+ pci_set_word(cfg + PCI_SRIOV_SUP_PGSIZE, sup_pgsize);
+ pci_set_word(wmask + PCI_SRIOV_SYS_PGSIZE, sup_pgsize);
+}
+
+
+uint16_t pcie_sriov_vf_number(PCIDevice *dev)
+{
+ assert(pci_is_vf(dev));
+ return dev->exp.sriov_vf.vf_number;
+}
+
+PCIDevice *pcie_sriov_get_pf(PCIDevice *dev)
+{
+ return dev->exp.sriov_vf.pf;
+}
+
+PCIDevice *pcie_sriov_get_vf_at_index(PCIDevice *dev, int n)
+{
+ assert(!pci_is_vf(dev));
+ if (n < dev->exp.sriov_pf.num_vfs) {
+ return dev->exp.sriov_pf.vf[n];
+ }
+ return NULL;
+}
+
+uint16_t pcie_sriov_num_vfs(PCIDevice *dev)
+{
+ return dev->exp.sriov_pf.num_vfs;
+}
diff --git a/hw/pci/shpc.c b/hw/pci/shpc.c
index 28e62174c4..aac6f2d034 100644
--- a/hw/pci/shpc.c
+++ b/hw/pci/shpc.c
@@ -8,6 +8,7 @@
#include "hw/pci/pci.h"
#include "hw/pci/pci_bus.h"
#include "hw/pci/msi.h"
+#include "trace.h"
/* TODO: model power only and disabled slot states. */
/* TODO: handle SERR and wakeups */
@@ -123,10 +124,41 @@
#define SHPC_PCI_TO_IDX(pci_slot) ((pci_slot) - 1)
#define SHPC_IDX_TO_PHYSICAL(slot) ((slot) + 1)
-static uint16_t shpc_get_status(SHPCDevice *shpc, int slot, uint16_t msk)
+static const char *shpc_led_state_to_str(uint8_t value)
+{
+ switch (value) {
+ case SHPC_LED_ON:
+ return "on";
+ case SHPC_LED_BLINK:
+ return "blink";
+ case SHPC_LED_OFF:
+ return "off";
+ default:
+ return "invalid";
+ }
+}
+
+static const char *shpc_slot_state_to_str(uint8_t value)
+{
+ switch (value) {
+ case SHPC_STATE_PWRONLY:
+ return "power-only";
+ case SHPC_STATE_ENABLED:
+ return "enabled";
+ case SHPC_STATE_DISABLED:
+ return "disabled";
+ default:
+ return "invalid";
+ }
+}
+
+static uint8_t shpc_get_status(SHPCDevice *shpc, int slot, uint16_t msk)
{
uint8_t *status = shpc->config + SHPC_SLOT_STATUS(slot);
- return (pci_get_word(status) & msk) >> ctz32(msk);
+ uint16_t result = (pci_get_word(status) & msk) >> ctz32(msk);
+
+ assert(result <= UINT8_MAX);
+ return result;
}
static void shpc_set_status(SHPCDevice *shpc,
@@ -223,6 +255,7 @@ void shpc_reset(PCIDevice *d)
SHPC_SLOT_STATUS_PRSNT_MASK);
shpc_set_status(shpc, i, SHPC_LED_OFF, SHPC_SLOT_PWR_LED_MASK);
}
+ shpc_set_status(shpc, i, SHPC_LED_OFF, SHPC_SLOT_ATTN_LED_MASK);
shpc_set_status(shpc, i, 0, SHPC_SLOT_STATUS_66);
}
shpc_set_sec_bus_speed(shpc, SHPC_SEC_BUS_33);
@@ -254,60 +287,83 @@ static void shpc_free_devices_in_slot(SHPCDevice *shpc, int slot)
}
}
-static void shpc_slot_command(SHPCDevice *shpc, uint8_t target,
+static bool shpc_slot_is_off(uint8_t state, uint8_t power, uint8_t attn)
+{
+ return state == SHPC_STATE_DISABLED && power == SHPC_LED_OFF;
+}
+
+static void shpc_slot_command(PCIDevice *d, uint8_t target,
uint8_t state, uint8_t power, uint8_t attn)
{
- uint8_t current_state;
+ SHPCDevice *shpc = d->shpc;
int slot = SHPC_LOGICAL_TO_IDX(target);
+ uint8_t old_state = shpc_get_status(shpc, slot, SHPC_SLOT_STATE_MASK);
+ uint8_t old_power = shpc_get_status(shpc, slot, SHPC_SLOT_PWR_LED_MASK);
+ uint8_t old_attn = shpc_get_status(shpc, slot, SHPC_SLOT_ATTN_LED_MASK);
+
if (target < SHPC_CMD_TRGT_MIN || slot >= shpc->nslots) {
shpc_invalid_command(shpc);
return;
}
- current_state = shpc_get_status(shpc, slot, SHPC_SLOT_STATE_MASK);
- if (current_state == SHPC_STATE_ENABLED && state == SHPC_STATE_PWRONLY) {
+
+ if (old_state == SHPC_STATE_ENABLED && state == SHPC_STATE_PWRONLY) {
shpc_invalid_command(shpc);
return;
}
- switch (power) {
- case SHPC_LED_NO:
- break;
- default:
+ if (power == SHPC_LED_NO) {
+ power = old_power;
+ } else {
/* TODO: send event to monitor */
shpc_set_status(shpc, slot, power, SHPC_SLOT_PWR_LED_MASK);
}
- switch (attn) {
- case SHPC_LED_NO:
- break;
- default:
+
+ if (attn == SHPC_LED_NO) {
+ attn = old_attn;
+ } else {
/* TODO: send event to monitor */
shpc_set_status(shpc, slot, attn, SHPC_SLOT_ATTN_LED_MASK);
}
- if ((current_state == SHPC_STATE_DISABLED && state == SHPC_STATE_PWRONLY) ||
- (current_state == SHPC_STATE_DISABLED && state == SHPC_STATE_ENABLED)) {
- shpc_set_status(shpc, slot, state, SHPC_SLOT_STATE_MASK);
- } else if ((current_state == SHPC_STATE_ENABLED ||
- current_state == SHPC_STATE_PWRONLY) &&
- state == SHPC_STATE_DISABLED) {
+ if (state == SHPC_STATE_NO) {
+ state = old_state;
+ } else {
shpc_set_status(shpc, slot, state, SHPC_SLOT_STATE_MASK);
- power = shpc_get_status(shpc, slot, SHPC_SLOT_PWR_LED_MASK);
- /* TODO: track what monitor requested. */
- /* Look at LED to figure out whether it's ok to remove the device. */
- if (power == SHPC_LED_OFF) {
- shpc_free_devices_in_slot(shpc, slot);
- shpc_set_status(shpc, slot, 1, SHPC_SLOT_STATUS_MRL_OPEN);
- shpc_set_status(shpc, slot, SHPC_SLOT_STATUS_PRSNT_EMPTY,
- SHPC_SLOT_STATUS_PRSNT_MASK);
- shpc->config[SHPC_SLOT_EVENT_LATCH(slot)] |=
- SHPC_SLOT_EVENT_MRL |
- SHPC_SLOT_EVENT_PRESENCE;
- }
+ }
+
+ if (trace_event_get_state_backends(TRACE_SHPC_SLOT_COMMAND)) {
+ DeviceState *parent = DEVICE(d);
+ int pci_slot = SHPC_IDX_TO_PCI(slot);
+ DeviceState *child =
+ DEVICE(shpc->sec_bus->devices[PCI_DEVFN(pci_slot, 0)]);
+
+ trace_shpc_slot_command(
+ parent->canonical_path, pci_slot,
+ child ? child->canonical_path : "no-child",
+ shpc_led_state_to_str(old_power),
+ shpc_led_state_to_str(power),
+ shpc_led_state_to_str(old_attn),
+ shpc_led_state_to_str(attn),
+ shpc_slot_state_to_str(old_state),
+ shpc_slot_state_to_str(state));
+ }
+
+ if (!shpc_slot_is_off(old_state, old_power, old_attn) &&
+ shpc_slot_is_off(state, power, attn))
+ {
+ shpc_free_devices_in_slot(shpc, slot);
+ shpc_set_status(shpc, slot, 1, SHPC_SLOT_STATUS_MRL_OPEN);
+ shpc_set_status(shpc, slot, SHPC_SLOT_STATUS_PRSNT_EMPTY,
+ SHPC_SLOT_STATUS_PRSNT_MASK);
+ shpc->config[SHPC_SLOT_EVENT_LATCH(slot)] |=
+ SHPC_SLOT_EVENT_MRL |
+ SHPC_SLOT_EVENT_PRESENCE;
}
}
-static void shpc_command(SHPCDevice *shpc)
+static void shpc_command(PCIDevice *d)
{
+ SHPCDevice *shpc = d->shpc;
uint8_t code = pci_get_byte(shpc->config + SHPC_CMD_CODE);
uint8_t speed;
uint8_t target;
@@ -328,7 +384,7 @@ static void shpc_command(SHPCDevice *shpc)
state = (code & SHPC_SLOT_STATE_MASK) >> SHPC_SLOT_STATE_SHIFT;
power = (code & SHPC_SLOT_PWR_LED_MASK) >> SHPC_SLOT_PWR_LED_SHIFT;
attn = (code & SHPC_SLOT_ATTN_LED_MASK) >> SHPC_SLOT_ATTN_LED_SHIFT;
- shpc_slot_command(shpc, target, state, power, attn);
+ shpc_slot_command(d, target, state, power, attn);
break;
case 0x40 ... 0x47:
speed = code & SHPC_SEC_BUS_MASK;
@@ -346,10 +402,10 @@ static void shpc_command(SHPCDevice *shpc)
}
for (i = 0; i < shpc->nslots; ++i) {
if (!(shpc_get_status(shpc, i, SHPC_SLOT_STATUS_MRL_OPEN))) {
- shpc_slot_command(shpc, i + SHPC_CMD_TRGT_MIN,
+ shpc_slot_command(d, i + SHPC_CMD_TRGT_MIN,
SHPC_STATE_PWRONLY, SHPC_LED_ON, SHPC_LED_NO);
} else {
- shpc_slot_command(shpc, i + SHPC_CMD_TRGT_MIN,
+ shpc_slot_command(d, i + SHPC_CMD_TRGT_MIN,
SHPC_STATE_NO, SHPC_LED_OFF, SHPC_LED_NO);
}
}
@@ -367,10 +423,10 @@ static void shpc_command(SHPCDevice *shpc)
}
for (i = 0; i < shpc->nslots; ++i) {
if (!(shpc_get_status(shpc, i, SHPC_SLOT_STATUS_MRL_OPEN))) {
- shpc_slot_command(shpc, i + SHPC_CMD_TRGT_MIN,
+ shpc_slot_command(d, i + SHPC_CMD_TRGT_MIN,
SHPC_STATE_ENABLED, SHPC_LED_ON, SHPC_LED_NO);
} else {
- shpc_slot_command(shpc, i + SHPC_CMD_TRGT_MIN,
+ shpc_slot_command(d, i + SHPC_CMD_TRGT_MIN,
SHPC_STATE_NO, SHPC_LED_OFF, SHPC_LED_NO);
}
}
@@ -402,7 +458,7 @@ static void shpc_write(PCIDevice *d, unsigned addr, uint64_t val, int l)
shpc->config[a] &= ~(val & w1cmask); /* W1C: Write 1 to Clear */
}
if (ranges_overlap(addr, l, SHPC_CMD_CODE, 2)) {
- shpc_command(shpc);
+ shpc_command(d);
}
shpc_interrupt_update(d);
}
@@ -456,7 +512,7 @@ static int shpc_cap_add_config(PCIDevice *d, Error **errp)
pci_set_byte(config + SHPC_CAP_CxP, 0);
pci_set_long(config + SHPC_CAP_DWORD_DATA, 0);
d->shpc->cap = config_offset;
- /* Make dword select and data writeable. */
+ /* Make dword select and data writable. */
pci_set_byte(d->wmask + config_offset + SHPC_CAP_DWORD_SELECT, 0xff);
pci_set_long(d->wmask + config_offset + SHPC_CAP_DWORD_DATA, 0xffffffff);
return 0;
@@ -480,13 +536,15 @@ static const MemoryRegionOps shpc_mmio_ops = {
.endianness = DEVICE_LITTLE_ENDIAN,
.valid = {
/* SHPC ECN requires dword accesses, but the original 1.0 spec doesn't.
- * It's easier to suppport all sizes than worry about it. */
+ * It's easier to support all sizes than worry about it.
+ */
.min_access_size = 1,
.max_access_size = 4,
},
};
-static void shpc_device_plug_common(PCIDevice *affected_dev, int *slot,
- SHPCDevice *shpc, Error **errp)
+
+static bool shpc_device_get_slot(PCIDevice *affected_dev, int *slot,
+ SHPCDevice *shpc, Error **errp)
{
int pci_slot = PCI_SLOT(affected_dev->devfn);
*slot = SHPC_PCI_TO_IDX(pci_slot);
@@ -496,21 +554,20 @@ static void shpc_device_plug_common(PCIDevice *affected_dev, int *slot,
"controller. Valid slots are between %d and %d.",
pci_slot, SHPC_IDX_TO_PCI(0),
SHPC_IDX_TO_PCI(shpc->nslots) - 1);
- return;
+ return false;
}
+
+ return true;
}
void shpc_device_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
Error **errp)
{
- Error *local_err = NULL;
PCIDevice *pci_hotplug_dev = PCI_DEVICE(hotplug_dev);
SHPCDevice *shpc = pci_hotplug_dev->shpc;
int slot;
- shpc_device_plug_common(PCI_DEVICE(dev), &slot, shpc, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
+ if (!shpc_device_get_slot(PCI_DEVICE(dev), &slot, shpc, errp)) {
return;
}
@@ -552,21 +609,25 @@ void shpc_device_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
void shpc_device_unplug_request_cb(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp)
{
- Error *local_err = NULL;
PCIDevice *pci_hotplug_dev = PCI_DEVICE(hotplug_dev);
SHPCDevice *shpc = pci_hotplug_dev->shpc;
uint8_t state;
uint8_t led;
int slot;
- shpc_device_plug_common(PCI_DEVICE(dev), &slot, shpc, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
+ if (!shpc_device_get_slot(PCI_DEVICE(dev), &slot, shpc, errp)) {
return;
}
state = shpc_get_status(shpc, slot, SHPC_SLOT_STATE_MASK);
led = shpc_get_status(shpc, slot, SHPC_SLOT_PWR_LED_MASK);
+
+ if (led == SHPC_LED_BLINK) {
+ error_setg(errp, "Hot-unplug failed: "
+ "guest is busy (power indicator blinking)");
+ return;
+ }
+
if (state == SHPC_STATE_DISABLED && led == SHPC_LED_OFF) {
shpc_free_devices_in_slot(shpc, slot);
shpc_set_status(shpc, slot, 1, SHPC_SLOT_STATUS_MRL_OPEN);
@@ -600,7 +661,7 @@ int shpc_init(PCIDevice *d, PCIBus *sec_bus, MemoryRegion *bar,
}
if (nslots > SHPC_MAX_SLOTS ||
SHPC_IDX_TO_PCI(nslots) > PCI_SLOT_MAX) {
- /* TODO: report an error mesage that makes sense. */
+ /* TODO: report an error message that makes sense. */
return -EINVAL;
}
shpc->nslots = nslots;
@@ -721,7 +782,7 @@ static int shpc_load(QEMUFile *f, void *pv, size_t size,
return 0;
}
-VMStateInfo shpc_vmstate_info = {
+const VMStateInfo shpc_vmstate_info = {
.name = "shpc",
.get = shpc_load,
.put = shpc_save,
diff --git a/hw/pci/slotid_cap.c b/hw/pci/slotid_cap.c
index 36d021b4a6..8372d05d9e 100644
--- a/hw/pci/slotid_cap.c
+++ b/hw/pci/slotid_cap.c
@@ -1,6 +1,6 @@
#include "qemu/osdep.h"
#include "hw/pci/slotid_cap.h"
-#include "hw/pci/pci.h"
+#include "hw/pci/pci_device.h"
#include "qemu/error-report.h"
#include "qapi/error.h"
diff --git a/hw/pci/trace-events b/hw/pci/trace-events
index fc777d0b5e..19643aa8c6 100644
--- a/hw/pci/trace-events
+++ b/hw/pci/trace-events
@@ -1,12 +1,24 @@
# See docs/devel/tracing.rst for syntax documentation.
# pci.c
-pci_update_mappings_del(void *d, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "d=%p %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64
-pci_update_mappings_add(void *d, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "d=%p %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64
+pci_update_mappings_del(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "%s %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64
+pci_update_mappings_add(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "%s %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64
+pci_route_irq(int dev_irq, const char *dev_path, int parent_irq, const char *parent_path) "IRQ %d @%s -> IRQ %d @%s"
# pci_host.c
-pci_cfg_read(const char *dev, unsigned devid, unsigned fnid, unsigned offs, unsigned val) "%s %02u:%u @0x%x -> 0x%x"
-pci_cfg_write(const char *dev, unsigned devid, unsigned fnid, unsigned offs, unsigned val) "%s %02u:%u @0x%x <- 0x%x"
+pci_cfg_read(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, unsigned offs, unsigned val) "%s %02x:%02x.%x @0x%x -> 0x%x"
+pci_cfg_write(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, unsigned offs, unsigned val) "%s %02x:%02x.%x @0x%x <- 0x%x"
# msix.c
msix_write_config(char *name, bool enabled, bool masked) "dev %s enabled %d masked %d"
+
+# hw/pci/pcie_sriov.c
+sriov_register_vfs(const char *name, int slot, int function, int num_vfs) "%s %02x:%x: creating %d vf devs"
+sriov_unregister_vfs(const char *name, int slot, int function, int num_vfs) "%s %02x:%x: Unregistering %d vf devs"
+sriov_config_write(const char *name, int slot, int fun, uint32_t offset, uint32_t val, uint32_t len) "%s %02x:%x: sriov offset 0x%x val 0x%x len %d"
+
+# pcie.c
+pcie_cap_slot_write_config(const char *parent, const char *child, const char *pds, const char *old_pic, const char *new_pic, const char *old_aic, const char *new_aic, const char *old_power, const char *new_power) "%s > %s: pds: %s, pic: %s->%s, aic: %s->%s, power: %s->%s"
+
+# shpc.c
+shpc_slot_command(const char *parent, int pci_slot, const char *child, const char *old_pic, const char *new_pic, const char *old_aic, const char *new_aic, const char *old_state, const char *new_state) "%s[%d] > %s: pic: %s->%s, aic: %s->%s, state: %s->%s"