diff options
Diffstat (limited to 'hw/pci')
-rw-r--r-- | hw/pci/Kconfig | 3 | ||||
-rw-r--r-- | hw/pci/meson.build | 11 | ||||
-rw-r--r-- | hw/pci/msi.c | 62 | ||||
-rw-r--r-- | hw/pci/msix.c | 59 | ||||
-rw-r--r-- | hw/pci/pci-hmp-cmds.c | 239 | ||||
-rw-r--r-- | hw/pci/pci-internal.h | 24 | ||||
-rw-r--r-- | hw/pci/pci-qmp-cmds.c | 199 | ||||
-rw-r--r-- | hw/pci/pci-stub.c | 9 | ||||
-rw-r--r-- | hw/pci/pci.c | 918 | ||||
-rw-r--r-- | hw/pci/pci_bridge.c | 63 | ||||
-rw-r--r-- | hw/pci/pci_host.c | 37 | ||||
-rw-r--r-- | hw/pci/pcie.c | 213 | ||||
-rw-r--r-- | hw/pci/pcie_aer.c | 144 | ||||
-rw-r--r-- | hw/pci/pcie_doe.c | 367 | ||||
-rw-r--r-- | hw/pci/pcie_host.c | 2 | ||||
-rw-r--r-- | hw/pci/pcie_port.c | 74 | ||||
-rw-r--r-- | hw/pci/pcie_sriov.c | 318 | ||||
-rw-r--r-- | hw/pci/shpc.c | 169 | ||||
-rw-r--r-- | hw/pci/slotid_cap.c | 2 | ||||
-rw-r--r-- | hw/pci/trace-events | 20 |
20 files changed, 2198 insertions, 735 deletions
diff --git a/hw/pci/Kconfig b/hw/pci/Kconfig index 77f8b005ff..fe70902cd8 100644 --- a/hw/pci/Kconfig +++ b/hw/pci/Kconfig @@ -8,6 +8,9 @@ config PCI_EXPRESS config PCI_DEVICES bool +config PCIE_DEVICES + bool + config MSI_NONBROKEN # selected by interrupt controllers that do not support MSI, # or support it and have a good implementation. See commit diff --git a/hw/pci/meson.build b/hw/pci/meson.build index 5c4bbac817..b9c34b2acf 100644 --- a/hw/pci/meson.build +++ b/hw/pci/meson.build @@ -5,6 +5,9 @@ pci_ss.add(files( 'pci.c', 'pci_bridge.c', 'pci_host.c', + 'pci-hmp-cmds.c', + 'pci-qmp-cmds.c', + 'pcie_sriov.c', 'shpc.c', 'slotid_cap.c' )) @@ -12,8 +15,8 @@ pci_ss.add(files( # allow plugging PCIe devices into PCI buses, include them even if # CONFIG_PCI_EXPRESS=n. pci_ss.add(files('pcie.c', 'pcie_aer.c')) -softmmu_ss.add(when: 'CONFIG_PCI_EXPRESS', if_true: files('pcie_port.c', 'pcie_host.c')) -softmmu_ss.add_all(when: 'CONFIG_PCI', if_true: pci_ss) +pci_ss.add(files('pcie_doe.c')) +system_ss.add(when: 'CONFIG_PCI_EXPRESS', if_true: files('pcie_port.c', 'pcie_host.c')) +system_ss.add_all(when: 'CONFIG_PCI', if_true: pci_ss) -softmmu_ss.add(when: 'CONFIG_PCI', if_false: files('pci-stub.c')) -softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('pci-stub.c')) +system_ss.add(when: 'CONFIG_PCI', if_false: files('pci-stub.c')) diff --git a/hw/pci/msi.c b/hw/pci/msi.c index 47d2b0f33c..8104ac1d91 100644 --- a/hw/pci/msi.c +++ b/hw/pci/msi.c @@ -23,6 +23,9 @@ #include "hw/xen/xen.h" #include "qemu/range.h" #include "qapi/error.h" +#include "sysemu/xen.h" + +#include "hw/i386/kvm/xen_evtchn.h" /* PCI_MSI_ADDRESS_LO */ #define PCI_MSI_ADDRESS_LO_MASK (~0x3) @@ -134,7 +137,7 @@ void msi_set_message(PCIDevice *dev, MSIMessage msg) pci_set_word(dev->config + msi_data_off(dev, msi64bit), msg.data); } -MSIMessage msi_get_message(PCIDevice *dev, unsigned int vector) +static MSIMessage msi_prepare_message(PCIDevice *dev, unsigned int vector) { uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev)); bool msi64bit = flags & PCI_MSI_FLAGS_64BIT; @@ -159,6 +162,11 @@ MSIMessage msi_get_message(PCIDevice *dev, unsigned int vector) return msg; } +MSIMessage msi_get_message(PCIDevice *dev, unsigned int vector) +{ + return dev->msi_prepare_message(dev, vector); +} + bool msi_enabled(const PCIDevice *dev) { return msi_present(dev) && @@ -241,6 +249,8 @@ int msi_init(struct PCIDevice *dev, uint8_t offset, 0xffffffff >> (PCI_MSI_VECTORS_MAX - nr_vectors)); } + dev->msi_prepare_message = msi_prepare_message; + return 0; } @@ -256,6 +266,7 @@ void msi_uninit(struct PCIDevice *dev) cap_size = msi_cap_sizeof(flags); pci_del_capability(dev, PCI_CAP_ID_MSI, cap_size); dev->cap_present &= ~QEMU_PCI_CAP_MSI; + dev->msi_prepare_message = NULL; MSI_DEV_PRINTF(dev, "uninit\n"); } @@ -298,7 +309,7 @@ bool msi_is_masked(const PCIDevice *dev, unsigned int vector) } data = pci_get_word(dev->config + msi_data_off(dev, msi64bit)); - if (xen_is_pirq_msi(data)) { + if (xen_enabled() && xen_is_pirq_msi(data)) { return false; } @@ -307,6 +318,38 @@ bool msi_is_masked(const PCIDevice *dev, unsigned int vector) return mask & (1U << vector); } +void msi_set_mask(PCIDevice *dev, int vector, bool mask, Error **errp) +{ + uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev)); + bool msi64bit = flags & PCI_MSI_FLAGS_64BIT; + uint32_t irq_state, vector_mask, pending; + + if (vector >= PCI_MSI_VECTORS_MAX) { + error_setg(errp, "msi: vector %d not allocated. max vector is %d", + vector, (PCI_MSI_VECTORS_MAX - 1)); + return; + } + + vector_mask = (1U << vector); + + irq_state = pci_get_long(dev->config + msi_mask_off(dev, msi64bit)); + + if (mask) { + irq_state |= vector_mask; + } else { + irq_state &= ~vector_mask; + } + + pci_set_long(dev->config + msi_mask_off(dev, msi64bit), irq_state); + + pending = pci_get_long(dev->config + msi_pending_off(dev, msi64bit)); + if (!mask && (pending & vector_mask)) { + pending &= ~vector_mask; + pci_set_long(dev->config + msi_pending_off(dev, msi64bit), pending); + msi_notify(dev, vector); + } +} + void msi_notify(PCIDevice *dev, unsigned int vector) { uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev)); @@ -334,11 +377,7 @@ void msi_notify(PCIDevice *dev, unsigned int vector) void msi_send_message(PCIDevice *dev, MSIMessage msg) { - MemTxAttrs attrs = {}; - - attrs.requester_id = pci_requester_id(dev); - address_space_stl_le(&dev->bus_master_as, msg.address, msg.data, - attrs, NULL); + dev->msi_trigger(dev, msg); } /* Normally called by pci_default_write_config(). */ @@ -378,6 +417,15 @@ void msi_write_config(PCIDevice *dev, uint32_t addr, uint32_t val, int len) fprintf(stderr, "\n"); #endif + if (xen_mode == XEN_EMULATE) { + for (vector = 0; vector < msi_nr_vectors(flags); vector++) { + MSIMessage msg = msi_prepare_message(dev, vector); + + xen_evtchn_snoop_msi(dev, false, vector, msg.address, msg.data, + msi_is_masked(dev, vector)); + } + } + if (!(flags & PCI_MSI_FLAGS_ENABLE)) { return; } diff --git a/hw/pci/msix.c b/hw/pci/msix.c index ae9331cd0b..487e49834e 100644 --- a/hw/pci/msix.c +++ b/hw/pci/msix.c @@ -26,12 +26,14 @@ #include "qapi/error.h" #include "trace.h" +#include "hw/i386/kvm/xen_evtchn.h" + /* MSI enable bit and maskall bit are in byte 1 in FLAGS register */ #define MSIX_CONTROL_OFFSET (PCI_MSIX_FLAGS + 1) #define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8) #define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8) -MSIMessage msix_get_message(PCIDevice *dev, unsigned vector) +static MSIMessage msix_prepare_message(PCIDevice *dev, unsigned vector) { uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE; MSIMessage msg; @@ -41,6 +43,11 @@ MSIMessage msix_get_message(PCIDevice *dev, unsigned vector) return msg; } +MSIMessage msix_get_message(PCIDevice *dev, unsigned vector) +{ + return dev->msix_prepare_message(dev, vector); +} + /* * Special API for POWER to configure the vectors through * a side channel. Should never be used by devices. @@ -119,6 +126,13 @@ static void msix_handle_mask_update(PCIDevice *dev, int vector, bool was_masked) { bool is_masked = msix_is_masked(dev, vector); + if (xen_mode == XEN_EMULATE) { + MSIMessage msg = msix_prepare_message(dev, vector); + + xen_evtchn_snoop_msi(dev, true, vector, msg.address, msg.data, + is_masked); + } + if (is_masked == was_masked) { return; } @@ -131,6 +145,26 @@ static void msix_handle_mask_update(PCIDevice *dev, int vector, bool was_masked) } } +void msix_set_mask(PCIDevice *dev, int vector, bool mask) +{ + unsigned offset; + bool was_masked; + + assert(vector < dev->msix_entries_nr); + + offset = vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL; + + was_masked = msix_is_masked(dev, vector); + + if (mask) { + dev->msix_table[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT; + } else { + dev->msix_table[offset] &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT; + } + + msix_handle_mask_update(dev, vector, was_masked); +} + static bool msix_masked(PCIDevice *dev) { return dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & MSIX_MASKALL_MASK; @@ -344,6 +378,8 @@ int msix_init(struct PCIDevice *dev, unsigned short nentries, "msix-pba", pba_size); memory_region_add_subregion(pba_bar, pba_offset, &dev->msix_pba_mmio); + dev->msix_prepare_message = msix_prepare_message; + return 0; } @@ -429,6 +465,7 @@ void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar, MemoryRegion *pba_bar) g_free(dev->msix_entry_used); dev->msix_entry_used = NULL; dev->cap_present &= ~QEMU_PCI_CAP_MSIX; + dev->msix_prepare_message = NULL; } void msix_uninit_exclusive_bar(PCIDevice *dev) @@ -489,7 +526,9 @@ void msix_notify(PCIDevice *dev, unsigned vector) { MSIMessage msg; - if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) { + assert(vector < dev->msix_entries_nr); + + if (!dev->msix_entry_used[vector]) { return; } @@ -525,20 +564,17 @@ void msix_reset(PCIDevice *dev) * don't want to follow the spec suggestion can declare all vectors as used. */ /* Mark vector as used. */ -int msix_vector_use(PCIDevice *dev, unsigned vector) +void msix_vector_use(PCIDevice *dev, unsigned vector) { - if (vector >= dev->msix_entries_nr) { - return -EINVAL; - } - + assert(vector < dev->msix_entries_nr); dev->msix_entry_used[vector]++; - return 0; } /* Mark vector as unused. */ void msix_vector_unuse(PCIDevice *dev, unsigned vector) { - if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) { + assert(vector < dev->msix_entries_nr); + if (!dev->msix_entry_used[vector]) { return; } if (--dev->msix_entry_used[vector]) { @@ -612,6 +648,7 @@ undo: } dev->msix_vector_use_notifier = NULL; dev->msix_vector_release_notifier = NULL; + dev->msix_vector_poll_notifier = NULL; return ret; } @@ -648,7 +685,7 @@ static int get_msix_state(QEMUFile *f, void *pv, size_t size, return 0; } -static VMStateInfo vmstate_info_msix = { +static const VMStateInfo vmstate_info_msix = { .name = "msix state", .get = get_msix_state, .put = put_msix_state, @@ -656,7 +693,7 @@ static VMStateInfo vmstate_info_msix = { const VMStateDescription vmstate_msix = { .name = "msix", - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { { .name = "msix", .version_id = 0, diff --git a/hw/pci/pci-hmp-cmds.c b/hw/pci/pci-hmp-cmds.c new file mode 100644 index 0000000000..b09fce9377 --- /dev/null +++ b/hw/pci/pci-hmp-cmds.c @@ -0,0 +1,239 @@ +/* + * HMP commands related to PCI + * + * Copyright IBM, Corp. 2011 + * + * Authors: + * Anthony Liguori <aliguori@us.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#include "qemu/osdep.h" +#include "hw/pci/pci.h" +#include "hw/pci/pci_device.h" +#include "monitor/hmp.h" +#include "monitor/monitor.h" +#include "pci-internal.h" +#include "qapi/error.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qapi-commands-pci.h" +#include "qemu/cutils.h" + +static void hmp_info_pci_device(Monitor *mon, const PciDeviceInfo *dev) +{ + PciMemoryRegionList *region; + + monitor_printf(mon, " Bus %2" PRId64 ", ", dev->bus); + monitor_printf(mon, "device %3" PRId64 ", function %" PRId64 ":\n", + dev->slot, dev->function); + monitor_printf(mon, " "); + + if (dev->class_info->desc) { + monitor_puts(mon, dev->class_info->desc); + } else { + monitor_printf(mon, "Class %04" PRId64, dev->class_info->q_class); + } + + monitor_printf(mon, ": PCI device %04" PRIx64 ":%04" PRIx64 "\n", + dev->id->vendor, dev->id->device); + if (dev->id->has_subsystem_vendor && dev->id->has_subsystem) { + monitor_printf(mon, " PCI subsystem %04" PRIx64 ":%04" PRIx64 "\n", + dev->id->subsystem_vendor, dev->id->subsystem); + } + + if (dev->has_irq) { + monitor_printf(mon, " IRQ %" PRId64 ", pin %c\n", + dev->irq, (char)('A' + dev->irq_pin - 1)); + } + + if (dev->pci_bridge) { + monitor_printf(mon, " BUS %" PRId64 ".\n", + dev->pci_bridge->bus->number); + monitor_printf(mon, " secondary bus %" PRId64 ".\n", + dev->pci_bridge->bus->secondary); + monitor_printf(mon, " subordinate bus %" PRId64 ".\n", + dev->pci_bridge->bus->subordinate); + + monitor_printf(mon, " IO range [0x%04"PRIx64", 0x%04"PRIx64"]\n", + dev->pci_bridge->bus->io_range->base, + dev->pci_bridge->bus->io_range->limit); + + monitor_printf(mon, + " memory range [0x%08"PRIx64", 0x%08"PRIx64"]\n", + dev->pci_bridge->bus->memory_range->base, + dev->pci_bridge->bus->memory_range->limit); + + monitor_printf(mon, " prefetchable memory range " + "[0x%08"PRIx64", 0x%08"PRIx64"]\n", + dev->pci_bridge->bus->prefetchable_range->base, + dev->pci_bridge->bus->prefetchable_range->limit); + } + + for (region = dev->regions; region; region = region->next) { + uint64_t addr, size; + + addr = region->value->address; + size = region->value->size; + + monitor_printf(mon, " BAR%" PRId64 ": ", region->value->bar); + + if (!strcmp(region->value->type, "io")) { + monitor_printf(mon, "I/O at 0x%04" PRIx64 + " [0x%04" PRIx64 "].\n", + addr, addr + size - 1); + } else { + monitor_printf(mon, "%d bit%s memory at 0x%08" PRIx64 + " [0x%08" PRIx64 "].\n", + region->value->mem_type_64 ? 64 : 32, + region->value->prefetch ? " prefetchable" : "", + addr, addr + size - 1); + } + } + + monitor_printf(mon, " id \"%s\"\n", dev->qdev_id); + + if (dev->pci_bridge) { + if (dev->pci_bridge->has_devices) { + PciDeviceInfoList *cdev; + for (cdev = dev->pci_bridge->devices; cdev; cdev = cdev->next) { + hmp_info_pci_device(mon, cdev->value); + } + } + } +} + +void hmp_info_pci(Monitor *mon, const QDict *qdict) +{ + PciInfoList *info_list, *info; + + info_list = qmp_query_pci(&error_abort); + + for (info = info_list; info; info = info->next) { + PciDeviceInfoList *dev; + + for (dev = info->value->devices; dev; dev = dev->next) { + hmp_info_pci_device(mon, dev->value); + } + } + + qapi_free_PciInfoList(info_list); +} + +void pcibus_dev_print(Monitor *mon, DeviceState *dev, int indent) +{ + PCIDevice *d = (PCIDevice *)dev; + int class = pci_get_word(d->config + PCI_CLASS_DEVICE); + const pci_class_desc *desc = get_class_desc(class); + char ctxt[64]; + PCIIORegion *r; + int i; + + if (desc->desc) { + snprintf(ctxt, sizeof(ctxt), "%s", desc->desc); + } else { + snprintf(ctxt, sizeof(ctxt), "Class %04x", class); + } + + monitor_printf(mon, "%*sclass %s, addr %02x:%02x.%x, " + "pci id %04x:%04x (sub %04x:%04x)\n", + indent, "", ctxt, pci_dev_bus_num(d), + PCI_SLOT(d->devfn), PCI_FUNC(d->devfn), + pci_get_word(d->config + PCI_VENDOR_ID), + pci_get_word(d->config + PCI_DEVICE_ID), + pci_get_word(d->config + PCI_SUBSYSTEM_VENDOR_ID), + pci_get_word(d->config + PCI_SUBSYSTEM_ID)); + for (i = 0; i < PCI_NUM_REGIONS; i++) { + r = &d->io_regions[i]; + if (!r->size) { + continue; + } + monitor_printf(mon, "%*sbar %d: %s at 0x%"FMT_PCIBUS + " [0x%"FMT_PCIBUS"]\n", + indent, "", + i, r->type & PCI_BASE_ADDRESS_SPACE_IO ? "i/o" : "mem", + r->addr, r->addr + r->size - 1); + } +} + +void hmp_pcie_aer_inject_error(Monitor *mon, const QDict *qdict) +{ + Error *err = NULL; + const char *id = qdict_get_str(qdict, "id"); + const char *error_name; + uint32_t error_status; + unsigned int num; + bool correctable; + PCIDevice *dev; + PCIEAERErr aer_err; + int ret; + + ret = pci_qdev_find_device(id, &dev); + if (ret == -ENODEV) { + error_setg(&err, "device '%s' not found", id); + goto out; + } + if (ret < 0 || !pci_is_express(dev)) { + error_setg(&err, "device '%s' is not a PCIe device", id); + goto out; + } + + error_name = qdict_get_str(qdict, "error_status"); + if (pcie_aer_parse_error_string(error_name, &error_status, &correctable)) { + if (qemu_strtoui(error_name, NULL, 0, &num) < 0) { + error_setg(&err, "invalid error status value '%s'", error_name); + goto out; + } + error_status = num; + correctable = qdict_get_try_bool(qdict, "correctable", false); + } else { + if (qdict_haskey(qdict, "correctable")) { + error_setg(&err, "-c is only valid with numeric error status"); + goto out; + } + } + aer_err.status = error_status; + aer_err.source_id = pci_requester_id(dev); + + aer_err.flags = 0; + if (correctable) { + aer_err.flags |= PCIE_AER_ERR_IS_CORRECTABLE; + } + if (qdict_get_try_bool(qdict, "advisory_non_fatal", false)) { + aer_err.flags |= PCIE_AER_ERR_MAYBE_ADVISORY; + } + if (qdict_haskey(qdict, "header0")) { + aer_err.flags |= PCIE_AER_ERR_HEADER_VALID; + } + if (qdict_haskey(qdict, "prefix0")) { + aer_err.flags |= PCIE_AER_ERR_TLP_PREFIX_PRESENT; + } + + aer_err.header[0] = qdict_get_try_int(qdict, "header0", 0); + aer_err.header[1] = qdict_get_try_int(qdict, "header1", 0); + aer_err.header[2] = qdict_get_try_int(qdict, "header2", 0); + aer_err.header[3] = qdict_get_try_int(qdict, "header3", 0); + + aer_err.prefix[0] = qdict_get_try_int(qdict, "prefix0", 0); + aer_err.prefix[1] = qdict_get_try_int(qdict, "prefix1", 0); + aer_err.prefix[2] = qdict_get_try_int(qdict, "prefix2", 0); + aer_err.prefix[3] = qdict_get_try_int(qdict, "prefix3", 0); + + ret = pcie_aer_inject_error(dev, &aer_err); + if (ret < 0) { + error_setg_errno(&err, -ret, "failed to inject error"); + goto out; + } + + + monitor_printf(mon, "OK id: %s root bus: %s, bus: %x devfn: %x.%x\n", + id, pci_root_bus_path(dev), pci_dev_bus_num(dev), + PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn)); + +out: + hmp_handle_error(mon, err); +} diff --git a/hw/pci/pci-internal.h b/hw/pci/pci-internal.h new file mode 100644 index 0000000000..a7d6d8a732 --- /dev/null +++ b/hw/pci/pci-internal.h @@ -0,0 +1,24 @@ +#ifndef HW_PCI_PCI_INTERNAL_H +#define HW_PCI_PCI_INTERNAL_H + +#include "qemu/queue.h" + +typedef struct { + uint16_t class; + const char *desc; + const char *fw_name; + uint16_t fw_ign_bits; +} pci_class_desc; + +typedef QLIST_HEAD(, PCIHostState) PCIHostStateList; + +extern PCIHostStateList pci_host_bridges; + +const pci_class_desc *get_class_desc(int class); +PCIBus *pci_find_bus_nr(PCIBus *bus, int bus_num); +void pcibus_dev_print(Monitor *mon, DeviceState *dev, int indent); + +int pcie_aer_parse_error_string(const char *error_name, + uint32_t *status, bool *correctable); + +#endif diff --git a/hw/pci/pci-qmp-cmds.c b/hw/pci/pci-qmp-cmds.c new file mode 100644 index 0000000000..5d9f4817f5 --- /dev/null +++ b/hw/pci/pci-qmp-cmds.c @@ -0,0 +1,199 @@ +/* + * QMP commands related to PCI + * + * Copyright (c) 2004 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "hw/pci/pci.h" +#include "hw/pci/pci_bridge.h" +#include "pci-internal.h" +#include "qapi/qapi-commands-pci.h" + +static PciDeviceInfoList *qmp_query_pci_devices(PCIBus *bus, int bus_num); + +static PciMemoryRegionList *qmp_query_pci_regions(const PCIDevice *dev) +{ + PciMemoryRegionList *head = NULL, **tail = &head; + int i; + + for (i = 0; i < PCI_NUM_REGIONS; i++) { + const PCIIORegion *r = &dev->io_regions[i]; + PciMemoryRegion *region; + + if (!r->size) { + continue; + } + + region = g_malloc0(sizeof(*region)); + + if (r->type & PCI_BASE_ADDRESS_SPACE_IO) { + region->type = g_strdup("io"); + } else { + region->type = g_strdup("memory"); + region->has_prefetch = true; + region->prefetch = !!(r->type & PCI_BASE_ADDRESS_MEM_PREFETCH); + region->has_mem_type_64 = true; + region->mem_type_64 = !!(r->type & PCI_BASE_ADDRESS_MEM_TYPE_64); + } + + region->bar = i; + region->address = r->addr; + region->size = r->size; + + QAPI_LIST_APPEND(tail, region); + } + + return head; +} + +static PciBridgeInfo *qmp_query_pci_bridge(PCIDevice *dev, PCIBus *bus, + int bus_num) +{ + PciBridgeInfo *info; + PciMemoryRange *range; + + info = g_new0(PciBridgeInfo, 1); + + info->bus = g_new0(PciBusInfo, 1); + info->bus->number = dev->config[PCI_PRIMARY_BUS]; + info->bus->secondary = dev->config[PCI_SECONDARY_BUS]; + info->bus->subordinate = dev->config[PCI_SUBORDINATE_BUS]; + + range = info->bus->io_range = g_new0(PciMemoryRange, 1); + range->base = pci_bridge_get_base(dev, PCI_BASE_ADDRESS_SPACE_IO); + range->limit = pci_bridge_get_limit(dev, PCI_BASE_ADDRESS_SPACE_IO); + + range = info->bus->memory_range = g_new0(PciMemoryRange, 1); + range->base = pci_bridge_get_base(dev, PCI_BASE_ADDRESS_SPACE_MEMORY); + range->limit = pci_bridge_get_limit(dev, PCI_BASE_ADDRESS_SPACE_MEMORY); + + range = info->bus->prefetchable_range = g_new0(PciMemoryRange, 1); + range->base = pci_bridge_get_base(dev, PCI_BASE_ADDRESS_MEM_PREFETCH); + range->limit = pci_bridge_get_limit(dev, PCI_BASE_ADDRESS_MEM_PREFETCH); + + if (dev->config[PCI_SECONDARY_BUS] != 0) { + PCIBus *child_bus = pci_find_bus_nr(bus, + dev->config[PCI_SECONDARY_BUS]); + if (child_bus) { + info->has_devices = true; + info->devices = qmp_query_pci_devices(child_bus, + dev->config[PCI_SECONDARY_BUS]); + } + } + + return info; +} + +static PciDeviceInfo *qmp_query_pci_device(PCIDevice *dev, PCIBus *bus, + int bus_num) +{ + const pci_class_desc *desc; + PciDeviceInfo *info; + uint8_t type; + int class; + + info = g_new0(PciDeviceInfo, 1); + info->bus = bus_num; + info->slot = PCI_SLOT(dev->devfn); + info->function = PCI_FUNC(dev->devfn); + + info->class_info = g_new0(PciDeviceClass, 1); + class = pci_get_word(dev->config + PCI_CLASS_DEVICE); + info->class_info->q_class = class; + desc = get_class_desc(class); + if (desc->desc) { + info->class_info->desc = g_strdup(desc->desc); + } + + info->id = g_new0(PciDeviceId, 1); + info->id->vendor = pci_get_word(dev->config + PCI_VENDOR_ID); + info->id->device = pci_get_word(dev->config + PCI_DEVICE_ID); + info->regions = qmp_query_pci_regions(dev); + info->qdev_id = g_strdup(dev->qdev.id ? dev->qdev.id : ""); + + info->irq_pin = dev->config[PCI_INTERRUPT_PIN]; + if (dev->config[PCI_INTERRUPT_PIN] != 0) { + info->has_irq = true; + info->irq = dev->config[PCI_INTERRUPT_LINE]; + } + + type = dev->config[PCI_HEADER_TYPE] & ~PCI_HEADER_TYPE_MULTI_FUNCTION; + if (type == PCI_HEADER_TYPE_BRIDGE) { + info->pci_bridge = qmp_query_pci_bridge(dev, bus, bus_num); + } else if (type == PCI_HEADER_TYPE_NORMAL) { + info->id->has_subsystem = info->id->has_subsystem_vendor = true; + info->id->subsystem = pci_get_word(dev->config + PCI_SUBSYSTEM_ID); + info->id->subsystem_vendor = + pci_get_word(dev->config + PCI_SUBSYSTEM_VENDOR_ID); + } else if (type == PCI_HEADER_TYPE_CARDBUS) { + info->id->has_subsystem = info->id->has_subsystem_vendor = true; + info->id->subsystem = pci_get_word(dev->config + PCI_CB_SUBSYSTEM_ID); + info->id->subsystem_vendor = + pci_get_word(dev->config + PCI_CB_SUBSYSTEM_VENDOR_ID); + } + + return info; +} + +static PciDeviceInfoList *qmp_query_pci_devices(PCIBus *bus, int bus_num) +{ + PciDeviceInfoList *head = NULL, **tail = &head; + PCIDevice *dev; + int devfn; + + for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) { + dev = bus->devices[devfn]; + if (dev) { + QAPI_LIST_APPEND(tail, qmp_query_pci_device(dev, bus, bus_num)); + } + } + + return head; +} + +static PciInfo *qmp_query_pci_bus(PCIBus *bus, int bus_num) +{ + PciInfo *info = NULL; + + bus = pci_find_bus_nr(bus, bus_num); + if (bus) { + info = g_malloc0(sizeof(*info)); + info->bus = bus_num; + info->devices = qmp_query_pci_devices(bus, bus_num); + } + + return info; +} + +PciInfoList *qmp_query_pci(Error **errp) +{ + PciInfoList *head = NULL, **tail = &head; + PCIHostState *host_bridge; + + QLIST_FOREACH(host_bridge, &pci_host_bridges, next) { + QAPI_LIST_APPEND(tail, + qmp_query_pci_bus(host_bridge->bus, + pci_bus_num(host_bridge->bus))); + } + + return head; +} diff --git a/hw/pci/pci-stub.c b/hw/pci/pci-stub.c index 3a027c42e4..f0508682d2 100644 --- a/hw/pci/pci-stub.c +++ b/hw/pci/pci-stub.c @@ -19,11 +19,9 @@ */ #include "qemu/osdep.h" -#include "sysemu/sysemu.h" #include "monitor/monitor.h" -#include "qapi/error.h" +#include "monitor/hmp.h" #include "qapi/qapi-commands-pci.h" -#include "qapi/qmp/qerror.h" #include "hw/pci/pci.h" #include "hw/pci/msi.h" #include "hw/pci/msix.h" @@ -33,10 +31,13 @@ bool pci_available; PciInfoList *qmp_query_pci(Error **errp) { - error_setg(errp, QERR_UNSUPPORTED); return NULL; } +void hmp_info_pci(Monitor *mon, const QDict *qdict) +{ +} + void hmp_pcie_aer_inject_error(Monitor *mon, const QDict *qdict) { monitor_printf(mon, "PCI devices not supported\n"); diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 23d2ae2ab2..e7a39cb203 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -23,7 +23,6 @@ */ #include "qemu/osdep.h" -#include "qemu-common.h" #include "qemu/datadir.h" #include "qemu/units.h" #include "hw/irq.h" @@ -35,9 +34,9 @@ #include "hw/qdev-properties-system.h" #include "migration/qemu-file-types.h" #include "migration/vmstate.h" -#include "monitor/monitor.h" #include "net/net.h" #include "sysemu/numa.h" +#include "sysemu/runstate.h" #include "sysemu/sysemu.h" #include "hw/loader.h" #include "qemu/error-report.h" @@ -48,8 +47,11 @@ #include "hw/hotplug.h" #include "hw/boards.h" #include "qapi/error.h" -#include "qapi/qapi-commands-pci.h" #include "qemu/cutils.h" +#include "pci-internal.h" + +#include "hw/xen/xen.h" +#include "hw/i386/kvm/xen_evtchn.h" //#define DEBUG_PCI #ifdef DEBUG_PCI @@ -60,10 +62,10 @@ bool pci_available = true; -static void pcibus_dev_print(Monitor *mon, DeviceState *dev, int indent); static char *pcibus_get_dev_path(DeviceState *dev); static char *pcibus_get_fw_dev_path(DeviceState *dev); -static void pcibus_reset(BusState *qbus); +static void pcibus_reset_hold(Object *obj); +static bool pcie_has_upstream_port(PCIDevice *dev); static Property pci_props[] = { DEFINE_PROP_PCI_DEVFN("addr", PCIDevice, devfn, -1), @@ -79,6 +81,10 @@ static Property pci_props[] = { DEFINE_PROP_STRING("failover_pair_id", PCIDevice, failover_pair_id), DEFINE_PROP_UINT32("acpi-index", PCIDevice, acpi_index, 0), + DEFINE_PROP_BIT("x-pcie-err-unc-mask", PCIDevice, cap_present, + QEMU_PCIE_ERR_UNC_MASK_BITNR, true), + DEFINE_PROP_BIT("x-pcie-ari-nextfn-1", PCIDevice, cap_present, + QEMU_PCIE_ARI_NEXTFN_1_BITNR, false), DEFINE_PROP_END_OF_LIST() }; @@ -86,7 +92,7 @@ static const VMStateDescription vmstate_pcibus = { .name = "PCIBUS", .version_id = 1, .minimum_version_id = 1, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_INT32_EQUAL(nirq, PCIBus, NULL), VMSTATE_VARRAY_INT32(irq_count, PCIBus, nirq, 0, vmstate_info_int32, @@ -95,6 +101,21 @@ static const VMStateDescription vmstate_pcibus = { } }; +static gint g_cmp_uint32(gconstpointer a, gconstpointer b, gpointer user_data) +{ + return a - b; +} + +static GSequence *pci_acpi_index_list(void) +{ + static GSequence *used_acpi_index_list; + + if (!used_acpi_index_list) { + used_acpi_index_list = g_sequence_new(NULL); + } + return used_acpi_index_list; +} + static void pci_init_bus_master(PCIDevice *pci_dev) { AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev); @@ -126,7 +147,7 @@ static void pci_bus_realize(BusState *qbus, Error **errp) bus->machine_done.notify = pcibus_machine_done; qemu_add_machine_init_done_notifier(&bus->machine_done); - vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_pcibus, bus); + vmstate_register_any(NULL, &vmstate_pcibus, bus); } static void pcie_bus_realize(BusState *qbus, Error **errp) @@ -181,13 +202,15 @@ static void pci_bus_class_init(ObjectClass *klass, void *data) { BusClass *k = BUS_CLASS(klass); PCIBusClass *pbc = PCI_BUS_CLASS(klass); + ResettableClass *rc = RESETTABLE_CLASS(klass); k->print_dev = pcibus_dev_print; k->get_dev_path = pcibus_get_dev_path; k->get_fw_dev_path = pcibus_get_fw_dev_path; k->realize = pci_bus_realize; k->unrealize = pci_bus_unrealize; - k->reset = pcibus_reset; + + rc->phases.hold = pcibus_reset_hold; pbc->bus_num = pcibus_num; pbc->numa_node = pcibus_numa_node; @@ -201,6 +224,11 @@ static const TypeInfo pci_bus_info = { .class_init = pci_bus_class_init, }; +static const TypeInfo cxl_interface_info = { + .name = INTERFACE_CXL_DEVICE, + .parent = TYPE_INTERFACE, +}; + static const TypeInfo pcie_interface_info = { .name = INTERFACE_PCIE_DEVICE, .parent = TYPE_INTERFACE, @@ -224,7 +252,12 @@ static const TypeInfo pcie_bus_info = { .class_init = pcie_bus_class_init, }; -static PCIBus *pci_find_bus_nr(PCIBus *bus, int bus_num); +static const TypeInfo cxl_bus_info = { + .name = TYPE_CXL_BUS, + .parent = TYPE_PCIE_BUS, + .class_init = pcie_bus_class_init, +}; + static void pci_update_mappings(PCIDevice *d); static void pci_irq_handler(void *opaque, int irq_num, int level); static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom, Error **); @@ -233,12 +266,15 @@ static void pci_del_option_rom(PCIDevice *pdev); static uint16_t pci_default_sub_vendor_id = PCI_SUBVENDOR_ID_REDHAT_QUMRANET; static uint16_t pci_default_sub_device_id = PCI_SUBDEVICE_ID_QEMU; -static QLIST_HEAD(, PCIHostState) pci_host_bridges; +PCIHostStateList pci_host_bridges; int pci_bar(PCIDevice *d, int reg) { uint8_t type; + /* PCIe virtual functions do not have their own BARs */ + assert(!pci_is_vf(d)); + if (reg != PCI_ROM_SLOT) return PCI_BASE_ADDRESS_0 + reg * 4; @@ -269,8 +305,13 @@ static void pci_change_irq_level(PCIDevice *pci_dev, int irq_num, int change) { PCIBus *bus; for (;;) { + int dev_irq = irq_num; bus = pci_get_bus(pci_dev); + assert(bus->map_irq); irq_num = bus->map_irq(pci_dev, irq_num); + trace_pci_route_irq(dev_irq, DEVICE(pci_dev)->canonical_path, irq_num, + pci_bus_is_root(bus) ? "root-complex" + : DEVICE(bus->parent_dev)->canonical_path); if (bus->set_irq) break; pci_dev = bus->parent_dev; @@ -304,10 +345,50 @@ void pci_device_deassert_intx(PCIDevice *dev) } } -static void pci_do_device_reset(PCIDevice *dev) +static void pci_msi_trigger(PCIDevice *dev, MSIMessage msg) +{ + MemTxAttrs attrs = {}; + + /* + * Xen uses the high bits of the address to contain some of the bits + * of the PIRQ#. Therefore we can't just send the write cycle and + * trust that it's caught by the APIC at 0xfee00000 because the + * target of the write might be e.g. 0x0x1000fee46000 for PIRQ#4166. + * So we intercept the delivery here instead of in kvm_send_msi(). + */ + if (xen_mode == XEN_EMULATE && + xen_evtchn_deliver_pirq_msi(msg.address, msg.data)) { + return; + } + attrs.requester_id = pci_requester_id(dev); + address_space_stl_le(&dev->bus_master_as, msg.address, msg.data, + attrs, NULL); +} + +static void pci_reset_regions(PCIDevice *dev) { int r; + if (pci_is_vf(dev)) { + return; + } + + for (r = 0; r < PCI_NUM_REGIONS; ++r) { + PCIIORegion *region = &dev->io_regions[r]; + if (!region->size) { + continue; + } + if (!(region->type & PCI_BASE_ADDRESS_SPACE_IO) && + region->type & PCI_BASE_ADDRESS_MEM_TYPE_64) { + pci_set_quad(dev->config + pci_bar(dev, r), region->type); + } else { + pci_set_long(dev->config + pci_bar(dev, r), region->type); + } + } +} + +static void pci_do_device_reset(PCIDevice *dev) +{ pci_device_deassert_intx(dev); assert(dev->irq_state == 0); @@ -323,23 +404,12 @@ static void pci_do_device_reset(PCIDevice *dev) pci_get_word(dev->wmask + PCI_INTERRUPT_LINE) | pci_get_word(dev->w1cmask + PCI_INTERRUPT_LINE)); dev->config[PCI_CACHE_LINE_SIZE] = 0x0; - for (r = 0; r < PCI_NUM_REGIONS; ++r) { - PCIIORegion *region = &dev->io_regions[r]; - if (!region->size) { - continue; - } - - if (!(region->type & PCI_BASE_ADDRESS_SPACE_IO) && - region->type & PCI_BASE_ADDRESS_MEM_TYPE_64) { - pci_set_quad(dev->config + pci_bar(dev, r), region->type); - } else { - pci_set_long(dev->config + pci_bar(dev, r), region->type); - } - } + pci_reset_regions(dev); pci_update_mappings(dev); msi_reset(dev); msix_reset(dev); + pcie_sriov_pf_reset(dev); } /* @@ -348,18 +418,18 @@ static void pci_do_device_reset(PCIDevice *dev) */ void pci_device_reset(PCIDevice *dev) { - qdev_reset_all(&dev->qdev); + device_cold_reset(&dev->qdev); pci_do_device_reset(dev); } /* * Trigger pci bus reset under a given bus. - * Called via qbus_reset_all on RST# assert, after the devices - * have been reset qdev_reset_all-ed already. + * Called via bus_cold_reset on RST# assert, after the devices + * have been reset device_cold_reset-ed already. */ -static void pcibus_reset(BusState *qbus) +static void pcibus_reset_hold(Object *obj) { - PCIBus *bus = DO_UPCAST(PCIBus, qbus, qbus); + PCIBus *bus = PCI_BUS(obj); int i; for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) { @@ -432,16 +502,15 @@ bool pci_bus_bypass_iommu(PCIBus *bus) return host_bridge->bypass_iommu; } -static void pci_root_bus_init(PCIBus *bus, DeviceState *parent, - MemoryRegion *address_space_mem, - MemoryRegion *address_space_io, - uint8_t devfn_min) +static void pci_root_bus_internal_init(PCIBus *bus, DeviceState *parent, + MemoryRegion *mem, MemoryRegion *io, + uint8_t devfn_min) { assert(PCI_FUNC(devfn_min) == 0); bus->devfn_min = devfn_min; bus->slot_reserved_mask = 0x0; - bus->address_space_mem = address_space_mem; - bus->address_space_io = address_space_io; + bus->address_space_mem = mem; + bus->address_space_io = io; bus->flags |= PCI_BUS_IS_ROOT; /* host bridge */ @@ -455,32 +524,28 @@ static void pci_bus_uninit(PCIBus *bus) pci_host_bus_unregister(BUS(bus)->parent); } -bool pci_bus_is_express(PCIBus *bus) +bool pci_bus_is_express(const PCIBus *bus) { return object_dynamic_cast(OBJECT(bus), TYPE_PCIE_BUS); } -void pci_root_bus_new_inplace(PCIBus *bus, size_t bus_size, DeviceState *parent, - const char *name, - MemoryRegion *address_space_mem, - MemoryRegion *address_space_io, - uint8_t devfn_min, const char *typename) +void pci_root_bus_init(PCIBus *bus, size_t bus_size, DeviceState *parent, + const char *name, + MemoryRegion *mem, MemoryRegion *io, + uint8_t devfn_min, const char *typename) { - qbus_create_inplace(bus, bus_size, typename, parent, name); - pci_root_bus_init(bus, parent, address_space_mem, address_space_io, - devfn_min); + qbus_init(bus, bus_size, typename, parent, name); + pci_root_bus_internal_init(bus, parent, mem, io, devfn_min); } PCIBus *pci_root_bus_new(DeviceState *parent, const char *name, - MemoryRegion *address_space_mem, - MemoryRegion *address_space_io, + MemoryRegion *mem, MemoryRegion *io, uint8_t devfn_min, const char *typename) { PCIBus *bus; - bus = PCI_BUS(qbus_create(typename, parent, name)); - pci_root_bus_init(bus, parent, address_space_mem, address_space_io, - devfn_min); + bus = PCI_BUS(qbus_new(typename, parent, name)); + pci_root_bus_internal_init(bus, parent, mem, io, devfn_min); return bus; } @@ -491,16 +556,21 @@ void pci_root_bus_cleanup(PCIBus *bus) qbus_unrealize(BUS(bus)); } -void pci_bus_irqs(PCIBus *bus, pci_set_irq_fn set_irq, pci_map_irq_fn map_irq, +void pci_bus_irqs(PCIBus *bus, pci_set_irq_fn set_irq, void *irq_opaque, int nirq) { bus->set_irq = set_irq; - bus->map_irq = map_irq; bus->irq_opaque = irq_opaque; bus->nirq = nirq; + g_free(bus->irq_count); bus->irq_count = g_malloc0(nirq * sizeof(bus->irq_count[0])); } +void pci_bus_map_irqs(PCIBus *bus, pci_map_irq_fn map_irq) +{ + bus->map_irq = map_irq; +} + void pci_bus_irqs_cleanup(PCIBus *bus) { bus->set_irq = NULL; @@ -508,21 +578,21 @@ void pci_bus_irqs_cleanup(PCIBus *bus) bus->irq_opaque = NULL; bus->nirq = 0; g_free(bus->irq_count); + bus->irq_count = NULL; } PCIBus *pci_register_root_bus(DeviceState *parent, const char *name, pci_set_irq_fn set_irq, pci_map_irq_fn map_irq, void *irq_opaque, - MemoryRegion *address_space_mem, - MemoryRegion *address_space_io, + MemoryRegion *mem, MemoryRegion *io, uint8_t devfn_min, int nirq, const char *typename) { PCIBus *bus; - bus = pci_root_bus_new(parent, name, address_space_mem, - address_space_io, devfn_min, typename); - pci_bus_irqs(bus, set_irq, map_irq, irq_opaque, nirq); + bus = pci_root_bus_new(parent, name, mem, io, devfn_min, typename); + pci_bus_irqs(bus, set_irq, irq_opaque, nirq); + pci_bus_map_irqs(bus, map_irq); return bus; } @@ -546,7 +616,7 @@ void pci_bus_range(PCIBus *bus, int *min_bus, int *max_bus) for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) { PCIDevice *dev = bus->devices[i]; - if (dev && PCI_DEVICE_GET_CLASS(dev)->is_bridge) { + if (dev && IS_PCI_BRIDGE(dev)) { *min_bus = MIN(*min_bus, dev->config[PCI_SECONDARY_BUS]); *max_bus = MAX(*max_bus, dev->config[PCI_SUBORDINATE_BUS]); } @@ -562,7 +632,6 @@ static int get_pci_config_device(QEMUFile *f, void *pv, size_t size, const VMStateField *field) { PCIDevice *s = container_of(pv, PCIDevice, config); - PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(s); uint8_t *config; int i; @@ -584,9 +653,8 @@ static int get_pci_config_device(QEMUFile *f, void *pv, size_t size, memcpy(s->config, config, size); pci_update_mappings(s); - if (pc->is_bridge) { - PCIBridge *b = PCI_BRIDGE(s); - pci_bridge_update_mappings(b); + if (IS_PCI_BRIDGE(s)) { + pci_bridge_update_mappings(PCI_BRIDGE(s)); } memory_region_set_enabled(&s->bus_master_enable_region, @@ -608,7 +676,7 @@ static int put_pci_config_device(QEMUFile *f, void *pv, size_t size, return 0; } -static VMStateInfo vmstate_info_pci_config = { +static const VMStateInfo vmstate_info_pci_config = { .name = "pci config", .get = get_pci_config_device, .put = put_pci_config_device, @@ -649,7 +717,7 @@ static int put_pci_irq_state(QEMUFile *f, void *pv, size_t size, return 0; } -static VMStateInfo vmstate_info_pci_irq_state = { +static const VMStateInfo vmstate_info_pci_irq_state = { .name = "pci irq state", .get = get_pci_irq_state, .put = put_pci_irq_state, @@ -669,7 +737,7 @@ const VMStateDescription vmstate_pci_device = { .name = "PCIDevice", .version_id = 2, .minimum_version_id = 1, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_INT32_POSITIVE_LE(version_id, PCIDevice), VMSTATE_BUFFER_UNSAFE_INFO_TEST(config, PCIDevice, migrate_is_not_pcie, @@ -821,7 +889,7 @@ static void pci_init_w1cmask(PCIDevice *dev) static void pci_init_mask_bridge(PCIDevice *d) { /* PCI_PRIMARY_BUS, PCI_SECONDARY_BUS, PCI_SUBORDINATE_BUS and - PCI_SEC_LETENCY_TIMER */ + PCI_SEC_LATENCY_TIMER */ memset(d->wmask + PCI_PRIMARY_BUS, 0xff, 4); /* base and limit */ @@ -885,6 +953,16 @@ static void pci_init_multifunction(PCIBus *bus, PCIDevice *dev, Error **errp) } /* + * With SR/IOV and ARI, a device at function 0 need not be a multifunction + * device, as it may just be a VF that ended up with function 0 in + * the legacy PCI interpretation. Avoid failing in such cases: + */ + if (pci_is_vf(dev) && + dev->exp.sriov_vf.pf->cap_present & QEMU_PCI_CAP_MULTIFUNCTION) { + return; + } + + /* * multifunction bit is interpreted in two ways as follows. * - all functions must set the bit to 1. * Example: Intel X53 @@ -947,6 +1025,9 @@ static void do_pci_unregister_device(PCIDevice *pci_dev) pci_get_bus(pci_dev)->devices[pci_dev->devfn] = NULL; pci_config_free(pci_dev); + if (xen_mode == XEN_EMULATE) { + xen_evtchn_remove_pci_device(pci_dev); + } if (memory_region_is_mapped(&pci_dev->bus_master_enable_region)) { memory_region_del_subregion(&pci_dev->bus_master_container_region, &pci_dev->bus_master_enable_region); @@ -1039,6 +1120,21 @@ static bool pci_bus_devfn_reserved(PCIBus *bus, int devfn) return bus->slot_reserved_mask & (1UL << PCI_SLOT(devfn)); } +uint32_t pci_bus_get_slot_reserved_mask(PCIBus *bus) +{ + return bus->slot_reserved_mask; +} + +void pci_bus_set_slot_reserved_mask(PCIBus *bus, uint32_t mask) +{ + bus->slot_reserved_mask |= mask; +} + +void pci_bus_clear_slot_reserved_mask(PCIBus *bus, uint32_t mask) +{ + bus->slot_reserved_mask &= ~mask; +} + /* -1 for devfn means auto assign */ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, const char *name, int devfn, @@ -1050,9 +1146,10 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, Error *local_err = NULL; DeviceState *dev = DEVICE(pci_dev); PCIBus *bus = pci_get_bus(pci_dev); + bool is_bridge = IS_PCI_BRIDGE(pci_dev); /* Only pci bridges can be attached to extra PCI root buses */ - if (pci_bus_is_root(bus) && bus->parent_dev && !pc->is_bridge) { + if (pci_bus_is_root(bus) && bus->parent_dev && !is_bridge) { error_setg(errp, "PCI: Only PCI/PCIe bridges can be plugged into %s", bus->parent_dev->name); @@ -1078,12 +1175,18 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, return NULL; } else if (!pci_bus_devfn_available(bus, devfn)) { error_setg(errp, "PCI: slot %d function %d not available for %s," - " in use by %s", + " in use by %s,id=%s", PCI_SLOT(devfn), PCI_FUNC(devfn), name, - bus->devices[devfn]->name); + bus->devices[devfn]->name, bus->devices[devfn]->qdev.id); return NULL; - } else if (dev->hotplugged && - pci_get_function_0(pci_dev)) { + } /* + * Populating function 0 triggers a scan from the guest that + * exposes other non-zero functions. Hence we need to ensure that + * function 0 wasn't added yet. + */ + else if (dev->hotplugged && + !pci_is_vf(pci_dev) && + pci_get_function_0(pci_dev)) { error_setg(errp, "PCI: slot %d function 0 already occupied by %s," " new func %s cannot be exposed to guest.", PCI_SLOT(pci_get_function_0(pci_dev)->devfn), @@ -1113,7 +1216,7 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, pci_config_set_revision(pci_dev->config, pc->revision); pci_config_set_class(pci_dev->config, pc->class_id); - if (!pc->is_bridge) { + if (!is_bridge) { if (pc->subsystem_vendor_id || pc->subsystem_id) { pci_set_word(pci_dev->config + PCI_SUBSYSTEM_VENDOR_ID, pc->subsystem_vendor_id); @@ -1130,7 +1233,7 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, pci_init_cmask(pci_dev); pci_init_wmask(pci_dev); pci_init_w1cmask(pci_dev); - if (pc->is_bridge) { + if (is_bridge) { pci_init_mask_bridge(pci_dev); } pci_init_multifunction(bus, pci_dev, &local_err); @@ -1180,6 +1283,19 @@ static void pci_qdev_unrealize(DeviceState *dev) pci_device_deassert_intx(pci_dev); do_pci_unregister_device(pci_dev); + + pci_dev->msi_trigger = NULL; + + /* + * clean up acpi-index so it could reused by another device + */ + if (pci_dev->acpi_index) { + GSequence *used_indexes = pci_acpi_index_list(); + + g_sequence_remove(g_sequence_lookup(used_indexes, + GINT_TO_POINTER(pci_dev->acpi_index), + g_cmp_uint32, NULL)); + } } void pci_register_bar(PCIDevice *pci_dev, int region_num, @@ -1191,6 +1307,7 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num, pcibus_t size = memory_region_size(memory); uint8_t hdr_type; + assert(!pci_is_vf(pci_dev)); /* VFs must use pcie_sriov_vf_register_bar */ assert(region_num >= 0); assert(region_num < PCI_NUM_REGIONS); assert(is_power_of_2(size)); @@ -1294,22 +1411,54 @@ pcibus_t pci_get_bar_addr(PCIDevice *pci_dev, int region_num) return pci_dev->io_regions[region_num].addr; } -static pcibus_t pci_bar_address(PCIDevice *d, - int reg, uint8_t type, pcibus_t size) +static pcibus_t pci_config_get_bar_addr(PCIDevice *d, int reg, + uint8_t type, pcibus_t size) +{ + pcibus_t new_addr; + if (!pci_is_vf(d)) { + int bar = pci_bar(d, reg); + if (type & PCI_BASE_ADDRESS_MEM_TYPE_64) { + new_addr = pci_get_quad(d->config + bar); + } else { + new_addr = pci_get_long(d->config + bar); + } + } else { + PCIDevice *pf = d->exp.sriov_vf.pf; + uint16_t sriov_cap = pf->exp.sriov_cap; + int bar = sriov_cap + PCI_SRIOV_BAR + reg * 4; + uint16_t vf_offset = + pci_get_word(pf->config + sriov_cap + PCI_SRIOV_VF_OFFSET); + uint16_t vf_stride = + pci_get_word(pf->config + sriov_cap + PCI_SRIOV_VF_STRIDE); + uint32_t vf_num = (d->devfn - (pf->devfn + vf_offset)) / vf_stride; + + if (type & PCI_BASE_ADDRESS_MEM_TYPE_64) { + new_addr = pci_get_quad(pf->config + bar); + } else { + new_addr = pci_get_long(pf->config + bar); + } + new_addr += vf_num * size; + } + /* The ROM slot has a specific enable bit, keep it intact */ + if (reg != PCI_ROM_SLOT) { + new_addr &= ~(size - 1); + } + return new_addr; +} + +pcibus_t pci_bar_address(PCIDevice *d, + int reg, uint8_t type, pcibus_t size) { pcibus_t new_addr, last_addr; - int bar = pci_bar(d, reg); uint16_t cmd = pci_get_word(d->config + PCI_COMMAND); - Object *machine = qdev_get_machine(); - ObjectClass *oc = object_get_class(machine); - MachineClass *mc = MACHINE_CLASS(oc); + MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); bool allow_0_address = mc->pci_allow_0_address; if (type & PCI_BASE_ADDRESS_SPACE_IO) { if (!(cmd & PCI_COMMAND_IO)) { return PCI_BAR_UNMAPPED; } - new_addr = pci_get_long(d->config + bar) & ~(size - 1); + new_addr = pci_config_get_bar_addr(d, reg, type, size); last_addr = new_addr + size - 1; /* Check if 32 bit BAR wraps around explicitly. * TODO: make priorities correct and remove this work around. @@ -1324,11 +1473,7 @@ static pcibus_t pci_bar_address(PCIDevice *d, if (!(cmd & PCI_COMMAND_MEMORY)) { return PCI_BAR_UNMAPPED; } - if (type & PCI_BASE_ADDRESS_MEM_TYPE_64) { - new_addr = pci_get_quad(d->config + bar); - } else { - new_addr = pci_get_long(d->config + bar); - } + new_addr = pci_config_get_bar_addr(d, reg, type, size); /* the ROM slot has a specific enable bit */ if (reg == PCI_ROM_SLOT && !(new_addr & PCI_ROM_ADDRESS_ENABLE)) { return PCI_BAR_UNMAPPED; @@ -1380,6 +1525,9 @@ static void pci_update_mappings(PCIDevice *d) continue; new_addr = pci_bar_address(d, i, r->type, r->size); + if (!d->has_power) { + new_addr = PCI_BAR_UNMAPPED; + } /* This bar isn't changed */ if (new_addr == r->addr) @@ -1387,7 +1535,7 @@ static void pci_update_mappings(PCIDevice *d) /* now do the real mapping */ if (r->addr != PCI_BAR_UNMAPPED) { - trace_pci_update_mappings_del(d, pci_dev_bus_num(d), + trace_pci_update_mappings_del(d->name, pci_dev_bus_num(d), PCI_SLOT(d->devfn), PCI_FUNC(d->devfn), i, r->addr, r->size); @@ -1395,7 +1543,7 @@ static void pci_update_mappings(PCIDevice *d) } r->addr = new_addr; if (r->addr != PCI_BAR_UNMAPPED) { - trace_pci_update_mappings_add(d, pci_dev_bus_num(d), + trace_pci_update_mappings_add(d->name, pci_dev_bus_num(d), PCI_SLOT(d->devfn), PCI_FUNC(d->devfn), i, r->addr, r->size); @@ -1461,15 +1609,16 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int range_covers_byte(addr, l, PCI_COMMAND)) pci_update_mappings(d); - if (range_covers_byte(addr, l, PCI_COMMAND)) { + if (ranges_overlap(addr, l, PCI_COMMAND, 2)) { pci_update_irq_disabled(d, was_irq_disabled); memory_region_set_enabled(&d->bus_master_enable_region, - pci_get_word(d->config + PCI_COMMAND) - & PCI_COMMAND_MASTER); + (pci_get_word(d->config + PCI_COMMAND) + & PCI_COMMAND_MASTER) && d->has_power); } msi_write_config(d, addr, val_in, l); msix_write_config(d, addr, val_in, l); + pcie_sriov_config_write(d, addr, val_in, l); } /***********************************************************/ @@ -1494,11 +1643,6 @@ static void pci_irq_handler(void *opaque, int irq_num, int level) pci_change_irq_level(pci_dev, irq_num, change); } -static inline int pci_intx(PCIDevice *pci_dev) -{ - return pci_get_byte(pci_dev->config + PCI_INTERRUPT_PIN) - 1; -} - qemu_irq pci_allocate_irq(PCIDevice *pci_dev) { int intx = pci_intx(pci_dev); @@ -1525,8 +1669,12 @@ PCIINTxRoute pci_device_route_intx_to_irq(PCIDevice *dev, int pin) PCIBus *bus; do { + int dev_irq = pin; bus = pci_get_bus(dev); pin = bus->map_irq(dev, pin); + trace_pci_route_irq(dev_irq, DEVICE(dev)->canonical_path, pin, + pci_bus_is_root(bus) ? "root-complex" + : DEVICE(bus->parent_dev)->canonical_path); dev = bus->parent_dev; } while (dev); @@ -1573,7 +1721,7 @@ void pci_device_set_intx_routing_notifier(PCIDevice *dev, * 9.1: Interrupt routing. Table 9-1 * * the PCI Express Base Specification, Revision 2.1 - * 2.2.8.1: INTx interrutp signaling - Rules + * 2.2.8.1: INTx interrupt signaling - Rules * the Implementation Note * Table 2-20 */ @@ -1589,13 +1737,6 @@ int pci_swizzle_map_irq_fn(PCIDevice *pci_dev, int pin) /***********************************************************/ /* monitor info on PCI */ -typedef struct { - uint16_t class; - const char *desc; - const char *fw_name; - uint16_t fw_ign_bits; -} pci_class_desc; - static const pci_class_desc pci_class_descriptions[] = { { 0x0001, "VGA controller", "display"}, @@ -1645,7 +1786,7 @@ static const pci_class_desc pci_class_descriptions[] = { 0x0902, "Mouse", "mouse"}, { 0x0A00, "Dock station", "dock", 0x00ff}, { 0x0B00, "i386 cpu", "cpu", 0x00ff}, - { 0x0c00, "Fireware contorller", "fireware"}, + { 0x0c00, "Firewire controller", "firewire"}, { 0x0c01, "Access bus controller", "access-bus"}, { 0x0c02, "SSA controller", "ssa"}, { 0x0c03, "USB controller", "usb"}, @@ -1654,11 +1795,9 @@ static const pci_class_desc pci_class_descriptions[] = { 0, NULL} }; -static void pci_for_each_device_under_bus_reverse(PCIBus *bus, - void (*fn)(PCIBus *b, - PCIDevice *d, - void *opaque), - void *opaque) +void pci_for_each_device_under_bus_reverse(PCIBus *bus, + pci_bus_dev_fn fn, + void *opaque) { PCIDevice *d; int devfn; @@ -1672,8 +1811,7 @@ static void pci_for_each_device_under_bus_reverse(PCIBus *bus, } void pci_for_each_device_reverse(PCIBus *bus, int bus_num, - void (*fn)(PCIBus *b, PCIDevice *d, void *opaque), - void *opaque) + pci_bus_dev_fn fn, void *opaque) { bus = pci_find_bus_nr(bus, bus_num); @@ -1682,10 +1820,8 @@ void pci_for_each_device_reverse(PCIBus *bus, int bus_num, } } -static void pci_for_each_device_under_bus(PCIBus *bus, - void (*fn)(PCIBus *b, PCIDevice *d, - void *opaque), - void *opaque) +void pci_for_each_device_under_bus(PCIBus *bus, + pci_bus_dev_fn fn, void *opaque) { PCIDevice *d; int devfn; @@ -1699,8 +1835,7 @@ static void pci_for_each_device_under_bus(PCIBus *bus, } void pci_for_each_device(PCIBus *bus, int bus_num, - void (*fn)(PCIBus *b, PCIDevice *d, void *opaque), - void *opaque) + pci_bus_dev_fn fn, void *opaque) { bus = pci_find_bus_nr(bus, bus_num); @@ -1709,7 +1844,7 @@ void pci_for_each_device(PCIBus *bus, int bus_num, } } -static const pci_class_desc *get_class_desc(int class) +const pci_class_desc *get_class_desc(int class) { const pci_class_desc *desc; @@ -1721,277 +1856,54 @@ static const pci_class_desc *get_class_desc(int class) return desc; } -static PciDeviceInfoList *qmp_query_pci_devices(PCIBus *bus, int bus_num); - -static PciMemoryRegionList *qmp_query_pci_regions(const PCIDevice *dev) +void pci_init_nic_devices(PCIBus *bus, const char *default_model) { - PciMemoryRegionList *head = NULL, **tail = &head; - int i; - - for (i = 0; i < PCI_NUM_REGIONS; i++) { - const PCIIORegion *r = &dev->io_regions[i]; - PciMemoryRegion *region; - - if (!r->size) { - continue; - } - - region = g_malloc0(sizeof(*region)); - - if (r->type & PCI_BASE_ADDRESS_SPACE_IO) { - region->type = g_strdup("io"); - } else { - region->type = g_strdup("memory"); - region->has_prefetch = true; - region->prefetch = !!(r->type & PCI_BASE_ADDRESS_MEM_PREFETCH); - region->has_mem_type_64 = true; - region->mem_type_64 = !!(r->type & PCI_BASE_ADDRESS_MEM_TYPE_64); - } - - region->bar = i; - region->address = r->addr; - region->size = r->size; - - QAPI_LIST_APPEND(tail, region); - } - - return head; -} - -static PciBridgeInfo *qmp_query_pci_bridge(PCIDevice *dev, PCIBus *bus, - int bus_num) -{ - PciBridgeInfo *info; - PciMemoryRange *range; - - info = g_new0(PciBridgeInfo, 1); - - info->bus = g_new0(PciBusInfo, 1); - info->bus->number = dev->config[PCI_PRIMARY_BUS]; - info->bus->secondary = dev->config[PCI_SECONDARY_BUS]; - info->bus->subordinate = dev->config[PCI_SUBORDINATE_BUS]; - - range = info->bus->io_range = g_new0(PciMemoryRange, 1); - range->base = pci_bridge_get_base(dev, PCI_BASE_ADDRESS_SPACE_IO); - range->limit = pci_bridge_get_limit(dev, PCI_BASE_ADDRESS_SPACE_IO); - - range = info->bus->memory_range = g_new0(PciMemoryRange, 1); - range->base = pci_bridge_get_base(dev, PCI_BASE_ADDRESS_SPACE_MEMORY); - range->limit = pci_bridge_get_limit(dev, PCI_BASE_ADDRESS_SPACE_MEMORY); - - range = info->bus->prefetchable_range = g_new0(PciMemoryRange, 1); - range->base = pci_bridge_get_base(dev, PCI_BASE_ADDRESS_MEM_PREFETCH); - range->limit = pci_bridge_get_limit(dev, PCI_BASE_ADDRESS_MEM_PREFETCH); - - if (dev->config[PCI_SECONDARY_BUS] != 0) { - PCIBus *child_bus = pci_find_bus_nr(bus, dev->config[PCI_SECONDARY_BUS]); - if (child_bus) { - info->has_devices = true; - info->devices = qmp_query_pci_devices(child_bus, dev->config[PCI_SECONDARY_BUS]); - } - } - - return info; + qemu_create_nic_bus_devices(&bus->qbus, TYPE_PCI_DEVICE, default_model, + "virtio", "virtio-net-pci"); } -static PciDeviceInfo *qmp_query_pci_device(PCIDevice *dev, PCIBus *bus, - int bus_num) +bool pci_init_nic_in_slot(PCIBus *rootbus, const char *model, + const char *alias, const char *devaddr) { - const pci_class_desc *desc; - PciDeviceInfo *info; - uint8_t type; - int class; - - info = g_new0(PciDeviceInfo, 1); - info->bus = bus_num; - info->slot = PCI_SLOT(dev->devfn); - info->function = PCI_FUNC(dev->devfn); - - info->class_info = g_new0(PciDeviceClass, 1); - class = pci_get_word(dev->config + PCI_CLASS_DEVICE); - info->class_info->q_class = class; - desc = get_class_desc(class); - if (desc->desc) { - info->class_info->has_desc = true; - info->class_info->desc = g_strdup(desc->desc); - } - - info->id = g_new0(PciDeviceId, 1); - info->id->vendor = pci_get_word(dev->config + PCI_VENDOR_ID); - info->id->device = pci_get_word(dev->config + PCI_DEVICE_ID); - info->regions = qmp_query_pci_regions(dev); - info->qdev_id = g_strdup(dev->qdev.id ? dev->qdev.id : ""); - - info->irq_pin = dev->config[PCI_INTERRUPT_PIN]; - if (dev->config[PCI_INTERRUPT_PIN] != 0) { - info->has_irq = true; - info->irq = dev->config[PCI_INTERRUPT_LINE]; - } - - type = dev->config[PCI_HEADER_TYPE] & ~PCI_HEADER_TYPE_MULTI_FUNCTION; - if (type == PCI_HEADER_TYPE_BRIDGE) { - info->has_pci_bridge = true; - info->pci_bridge = qmp_query_pci_bridge(dev, bus, bus_num); - } else if (type == PCI_HEADER_TYPE_NORMAL) { - info->id->has_subsystem = info->id->has_subsystem_vendor = true; - info->id->subsystem = pci_get_word(dev->config + PCI_SUBSYSTEM_ID); - info->id->subsystem_vendor = - pci_get_word(dev->config + PCI_SUBSYSTEM_VENDOR_ID); - } else if (type == PCI_HEADER_TYPE_CARDBUS) { - info->id->has_subsystem = info->id->has_subsystem_vendor = true; - info->id->subsystem = pci_get_word(dev->config + PCI_CB_SUBSYSTEM_ID); - info->id->subsystem_vendor = - pci_get_word(dev->config + PCI_CB_SUBSYSTEM_VENDOR_ID); - } - - return info; -} - -static PciDeviceInfoList *qmp_query_pci_devices(PCIBus *bus, int bus_num) -{ - PciDeviceInfoList *head = NULL, **tail = &head; - PCIDevice *dev; - int devfn; - - for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) { - dev = bus->devices[devfn]; - if (dev) { - QAPI_LIST_APPEND(tail, qmp_query_pci_device(dev, bus, bus_num)); - } - } - - return head; -} - -static PciInfo *qmp_query_pci_bus(PCIBus *bus, int bus_num) -{ - PciInfo *info = NULL; - - bus = pci_find_bus_nr(bus, bus_num); - if (bus) { - info = g_malloc0(sizeof(*info)); - info->bus = bus_num; - info->devices = qmp_query_pci_devices(bus, bus_num); - } - - return info; -} - -PciInfoList *qmp_query_pci(Error **errp) -{ - PciInfoList *head = NULL, **tail = &head; - PCIHostState *host_bridge; - - QLIST_FOREACH(host_bridge, &pci_host_bridges, next) { - QAPI_LIST_APPEND(tail, - qmp_query_pci_bus(host_bridge->bus, - pci_bus_num(host_bridge->bus))); - } - - return head; -} - -/* Initialize a PCI NIC. */ -PCIDevice *pci_nic_init_nofail(NICInfo *nd, PCIBus *rootbus, - const char *default_model, - const char *default_devaddr) -{ - const char *devaddr = nd->devaddr ? nd->devaddr : default_devaddr; - GSList *list; - GPtrArray *pci_nic_models; - PCIBus *bus; + NICInfo *nd = qemu_find_nic_info(model, true, alias); + int dom, busnr, devfn; PCIDevice *pci_dev; - DeviceState *dev; - int devfn; - int i; - int dom, busnr; unsigned slot; + PCIBus *bus; - if (nd->model && !strcmp(nd->model, "virtio")) { - g_free(nd->model); - nd->model = g_strdup("virtio-net-pci"); - } - - list = object_class_get_list_sorted(TYPE_PCI_DEVICE, false); - pci_nic_models = g_ptr_array_new(); - while (list) { - DeviceClass *dc = OBJECT_CLASS_CHECK(DeviceClass, list->data, - TYPE_DEVICE); - GSList *next; - if (test_bit(DEVICE_CATEGORY_NETWORK, dc->categories) && - dc->user_creatable) { - const char *name = object_class_get_name(list->data); - /* - * A network device might also be something else than a NIC, see - * e.g. the "rocker" device. Thus we have to look for the "netdev" - * property, too. Unfortunately, some devices like virtio-net only - * create this property during instance_init, so we have to create - * a temporary instance here to be able to check it. - */ - Object *obj = object_new_with_class(OBJECT_CLASS(dc)); - if (object_property_find(obj, "netdev")) { - g_ptr_array_add(pci_nic_models, (gpointer)name); - } - object_unref(obj); - } - next = list->next; - g_slist_free_1(list); - list = next; - } - g_ptr_array_add(pci_nic_models, NULL); - - if (qemu_show_nic_models(nd->model, (const char **)pci_nic_models->pdata)) { - exit(0); + if (!nd) { + return false; } - i = qemu_find_nic_model(nd, (const char **)pci_nic_models->pdata, - default_model); - if (i < 0) { + if (!devaddr || pci_parse_devaddr(devaddr, &dom, &busnr, &slot, NULL) < 0) { + error_report("Invalid PCI device address %s for device %s", + devaddr, model); exit(1); } - if (!rootbus) { - error_report("No primary PCI bus"); + if (dom != 0) { + error_report("No support for non-zero PCI domains"); exit(1); } - assert(!rootbus->parent_dev); - - if (!devaddr) { - devfn = -1; - busnr = 0; - } else { - if (pci_parse_devaddr(devaddr, &dom, &busnr, &slot, NULL) < 0) { - error_report("Invalid PCI device address %s for device %s", - devaddr, nd->model); - exit(1); - } - - if (dom != 0) { - error_report("No support for non-zero PCI domains"); - exit(1); - } - - devfn = PCI_DEVFN(slot, 0); - } + devfn = PCI_DEVFN(slot, 0); bus = pci_find_bus_nr(rootbus, busnr); if (!bus) { error_report("Invalid PCI device address %s for device %s", - devaddr, nd->model); + devaddr, model); exit(1); } - pci_dev = pci_new(devfn, nd->model); - dev = &pci_dev->qdev; - qdev_set_nic_properties(dev, nd); + pci_dev = pci_new(devfn, model); + qdev_set_nic_properties(&pci_dev->qdev, nd); pci_realize_and_unref(pci_dev, bus, &error_fatal); - g_ptr_array_free(pci_nic_models, true); - return pci_dev; + return true; } PCIDevice *pci_vga_init(PCIBus *bus) { + vga_interface_created = true; switch (vga_interface_type) { case VGA_CIRRUS: return pci_create_simple(bus, -1, "cirrus-vga"); @@ -2028,7 +1940,7 @@ static bool pci_root_bus_in_range(PCIBus *bus, int bus_num) for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) { PCIDevice *dev = bus->devices[i]; - if (dev && PCI_DEVICE_GET_CLASS(dev)->is_bridge) { + if (dev && IS_PCI_BRIDGE(dev)) { if (pci_secondary_bus_in_range(dev, bus_num)) { return true; } @@ -2038,7 +1950,7 @@ static bool pci_root_bus_in_range(PCIBus *bus, int bus_num) return false; } -static PCIBus *pci_find_bus_nr(PCIBus *bus, int bus_num) +PCIBus *pci_find_bus_nr(PCIBus *bus, int bus_num) { PCIBus *sec; @@ -2078,10 +1990,8 @@ static PCIBus *pci_find_bus_nr(PCIBus *bus, int bus_num) return NULL; } -void pci_for_each_bus_depth_first(PCIBus *bus, - void *(*begin)(PCIBus *bus, void *parent_state), - void (*end)(PCIBus *bus, void *state), - void *parent_state) +void pci_for_each_bus_depth_first(PCIBus *bus, pci_bus_ret_fn begin, + pci_bus_fn end, void *parent_state) { PCIBus *sec; void *state; @@ -2116,6 +2026,8 @@ PCIDevice *pci_find_device(PCIBus *bus, int bus_num, uint8_t devfn) return bus->devices[devfn]; } +#define ONBOARD_INDEX_MAX (16 * 1024 - 1) + static void pci_qdev_realize(DeviceState *qdev, Error **errp) { PCIDevice *pci_dev = (PCIDevice *)qdev; @@ -2125,6 +2037,35 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp) bool is_default_rom; uint16_t class_id; + /* + * capped by systemd (see: udev-builtin-net_id.c) + * as it's the only known user honor it to avoid users + * misconfigure QEMU and then wonder why acpi-index doesn't work + */ + if (pci_dev->acpi_index > ONBOARD_INDEX_MAX) { + error_setg(errp, "acpi-index should be less or equal to %u", + ONBOARD_INDEX_MAX); + return; + } + + /* + * make sure that acpi-index is unique across all present PCI devices + */ + if (pci_dev->acpi_index) { + GSequence *used_indexes = pci_acpi_index_list(); + + if (g_sequence_lookup(used_indexes, + GINT_TO_POINTER(pci_dev->acpi_index), + g_cmp_uint32, NULL)) { + error_setg(errp, "a PCI device with acpi-index = %" PRIu32 + " already exist", pci_dev->acpi_index); + return; + } + g_sequence_insert_sorted(used_indexes, + GINT_TO_POINTER(pci_dev->acpi_index), + g_cmp_uint32, NULL); + } + if (pci_dev->romsize != -1 && !is_power_of_2(pci_dev->romsize)) { error_setg(errp, "ROM size %u is not a power of two", pci_dev->romsize); return; @@ -2138,6 +2079,10 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp) pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS; } + if (object_class_dynamic_cast(klass, INTERFACE_CXL_DEVICE)) { + pci_dev->cap_present |= QEMU_PCIE_CAP_CXL; + } + pci_dev = do_pci_register_device(pci_dev, object_get_typename(OBJECT(qdev)), pci_dev->devfn, errp); @@ -2153,6 +2098,25 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp) } } + /* + * A PCIe Downstream Port that do not have ARI Forwarding enabled must + * associate only Device 0 with the device attached to the bus + * representing the Link from the Port (PCIe base spec rev 4.0 ver 0.3, + * sec 7.3.1). + * With ARI, PCI_SLOT() can return non-zero value as the traditional + * 5-bit Device Number and 3-bit Function Number fields in its associated + * Routing IDs, Requester IDs and Completer IDs are interpreted as a + * single 8-bit Function Number. Hence, ignore ARI capable devices. + */ + if (pci_is_express(pci_dev) && + !pcie_find_capability(pci_dev, PCI_EXT_CAP_ID_ARI) && + pcie_has_upstream_port(pci_dev) && + PCI_SLOT(pci_dev->devfn)) { + warn_report("PCI: slot %d is not valid for %s," + " parent device only allows plugging into slot 0.", + PCI_SLOT(pci_dev->devfn), pci_dev->name); + } + if (pci_dev->failover_pair_id) { if (!pci_bus_is_express(pci_get_bus(pci_dev))) { error_setg(errp, "failover primary device must be on " @@ -2190,10 +2154,14 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp) pci_qdev_unrealize(DEVICE(pci_dev)); return; } + + pci_set_power(pci_dev, true); + + pci_dev->msi_trigger = pci_msi_trigger; } -PCIDevice *pci_new_multifunction(int devfn, bool multifunction, - const char *name) +static PCIDevice *pci_new_internal(int devfn, bool multifunction, + const char *name) { DeviceState *dev; @@ -2203,9 +2171,14 @@ PCIDevice *pci_new_multifunction(int devfn, bool multifunction, return PCI_DEVICE(dev); } +PCIDevice *pci_new_multifunction(int devfn, const char *name) +{ + return pci_new_internal(devfn, true, name); +} + PCIDevice *pci_new(int devfn, const char *name) { - return pci_new_multifunction(devfn, false, name); + return pci_new_internal(devfn, false, name); } bool pci_realize_and_unref(PCIDevice *dev, PCIBus *bus, Error **errp) @@ -2214,17 +2187,18 @@ bool pci_realize_and_unref(PCIDevice *dev, PCIBus *bus, Error **errp) } PCIDevice *pci_create_simple_multifunction(PCIBus *bus, int devfn, - bool multifunction, const char *name) { - PCIDevice *dev = pci_new_multifunction(devfn, multifunction, name); + PCIDevice *dev = pci_new_multifunction(devfn, name); pci_realize_and_unref(dev, bus, &error_fatal); return dev; } PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name) { - return pci_create_simple_multifunction(bus, devfn, false, name); + PCIDevice *dev = pci_new(devfn, name); + pci_realize_and_unref(dev, bus, &error_fatal); + return dev; } static uint8_t pci_find_space(PCIDevice *pdev, uint8_t size) @@ -2337,16 +2311,21 @@ static void pci_patch_ids(PCIDevice *pdev, uint8_t *ptr, uint32_t size) static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom, Error **errp) { - int64_t size; - char *path; - void *ptr; + int64_t size = 0; + g_autofree char *path = NULL; char name[32]; const VMStateDescription *vmsd; - if (!pdev->romfile) - return; - if (strlen(pdev->romfile) == 0) + /* + * In case of incoming migration ROM will come with migration stream, no + * reason to load the file. Neither we want to fail if local ROM file + * mismatches with specified romsize. + */ + bool load_file = !runstate_check(RUN_STATE_INMIGRATE); + + if (!pdev->romfile || !strlen(pdev->romfile)) { return; + } if (!pdev->rom_bar) { /* @@ -2373,57 +2352,57 @@ static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom, return; } - path = qemu_find_file(QEMU_FILE_TYPE_BIOS, pdev->romfile); - if (path == NULL) { - path = g_strdup(pdev->romfile); - } + if (load_file || pdev->romsize == -1) { + path = qemu_find_file(QEMU_FILE_TYPE_BIOS, pdev->romfile); + if (path == NULL) { + path = g_strdup(pdev->romfile); + } - size = get_image_size(path); - if (size < 0) { - error_setg(errp, "failed to find romfile \"%s\"", pdev->romfile); - g_free(path); - return; - } else if (size == 0) { - error_setg(errp, "romfile \"%s\" is empty", pdev->romfile); - g_free(path); - return; - } else if (size > 2 * GiB) { - error_setg(errp, "romfile \"%s\" too large (size cannot exceed 2 GiB)", - pdev->romfile); - g_free(path); - return; - } - if (pdev->romsize != -1) { - if (size > pdev->romsize) { - error_setg(errp, "romfile \"%s\" (%u bytes) is too large for ROM size %u", - pdev->romfile, (uint32_t)size, pdev->romsize); - g_free(path); + size = get_image_size(path); + if (size < 0) { + error_setg(errp, "failed to find romfile \"%s\"", pdev->romfile); + return; + } else if (size == 0) { + error_setg(errp, "romfile \"%s\" is empty", pdev->romfile); + return; + } else if (size > 2 * GiB) { + error_setg(errp, + "romfile \"%s\" too large (size cannot exceed 2 GiB)", + pdev->romfile); return; } - } else { - pdev->romsize = pow2ceil(size); + if (pdev->romsize != -1) { + if (size > pdev->romsize) { + error_setg(errp, "romfile \"%s\" (%u bytes) " + "is too large for ROM size %u", + pdev->romfile, (uint32_t)size, pdev->romsize); + return; + } + } else { + pdev->romsize = pow2ceil(size); + } } vmsd = qdev_get_vmsd(DEVICE(pdev)); + snprintf(name, sizeof(name), "%s.rom", + vmsd ? vmsd->name : object_get_typename(OBJECT(pdev))); - if (vmsd) { - snprintf(name, sizeof(name), "%s.rom", vmsd->name); - } else { - snprintf(name, sizeof(name), "%s.rom", object_get_typename(OBJECT(pdev))); - } pdev->has_rom = true; - memory_region_init_rom(&pdev->rom, OBJECT(pdev), name, pdev->romsize, &error_fatal); - ptr = memory_region_get_ram_ptr(&pdev->rom); - if (load_image_size(path, ptr, size) < 0) { - error_setg(errp, "failed to load romfile \"%s\"", pdev->romfile); - g_free(path); - return; - } - g_free(path); + memory_region_init_rom(&pdev->rom, OBJECT(pdev), name, pdev->romsize, + &error_fatal); - if (is_default_rom) { - /* Only the default rom images will be patched (if needed). */ - pci_patch_ids(pdev, ptr, size); + if (load_file) { + void *ptr = memory_region_get_ram_ptr(&pdev->rom); + + if (load_image_size(path, ptr, size) < 0) { + error_setg(errp, "failed to load romfile \"%s\"", pdev->romfile); + return; + } + + if (is_default_rom) { + /* Only the default rom images will be patched (if needed). */ + pci_patch_ids(pdev, ptr, size); + } } pci_register_bar(pdev, PCI_ROM_SLOT, 0, &pdev->rom); @@ -2510,44 +2489,6 @@ uint8_t pci_find_capability(PCIDevice *pdev, uint8_t cap_id) return pci_find_capability_list(pdev, cap_id, NULL); } -static void pcibus_dev_print(Monitor *mon, DeviceState *dev, int indent) -{ - PCIDevice *d = (PCIDevice *)dev; - const pci_class_desc *desc; - char ctxt[64]; - PCIIORegion *r; - int i, class; - - class = pci_get_word(d->config + PCI_CLASS_DEVICE); - desc = pci_class_descriptions; - while (desc->desc && class != desc->class) - desc++; - if (desc->desc) { - snprintf(ctxt, sizeof(ctxt), "%s", desc->desc); - } else { - snprintf(ctxt, sizeof(ctxt), "Class %04x", class); - } - - monitor_printf(mon, "%*sclass %s, addr %02x:%02x.%x, " - "pci id %04x:%04x (sub %04x:%04x)\n", - indent, "", ctxt, pci_dev_bus_num(d), - PCI_SLOT(d->devfn), PCI_FUNC(d->devfn), - pci_get_word(d->config + PCI_VENDOR_ID), - pci_get_word(d->config + PCI_DEVICE_ID), - pci_get_word(d->config + PCI_SUBSYSTEM_VENDOR_ID), - pci_get_word(d->config + PCI_SUBSYSTEM_ID)); - for (i = 0; i < PCI_NUM_REGIONS; i++) { - r = &d->io_regions[i]; - if (!r->size) - continue; - monitor_printf(mon, "%*sbar %d: %s at 0x%"FMT_PCIBUS - " [0x%"FMT_PCIBUS"]\n", - indent, "", - i, r->type & PCI_BASE_ADDRESS_SPACE_IO ? "i/o" : "mem", - r->addr, r->addr + r->size - 1); - } -} - static char *pci_dev_fw_name(DeviceState *dev, char *buf, int len) { PCIDevice *d = (PCIDevice *)dev; @@ -2579,15 +2520,15 @@ static char *pci_dev_fw_name(DeviceState *dev, char *buf, int len) static char *pcibus_get_fw_dev_path(DeviceState *dev) { PCIDevice *d = (PCIDevice *)dev; - char path[50], name[33]; - int off; + char name[33]; + int has_func = !!PCI_FUNC(d->devfn); - off = snprintf(path, sizeof(path), "%s@%x", - pci_dev_fw_name(dev, name, sizeof name), - PCI_SLOT(d->devfn)); - if (PCI_FUNC(d->devfn)) - snprintf(path + off, sizeof(path) + off, ",%x", PCI_FUNC(d->devfn)); - return g_strdup(path); + return g_strdup_printf("%s@%x%s%.*x", + pci_dev_fw_name(dev, name, sizeof(name)), + PCI_SLOT(d->devfn), + has_func ? "," : "", + has_func, + PCI_FUNC(d->devfn)); } static char *pcibus_get_dev_path(DeviceState *dev) @@ -2701,7 +2642,9 @@ static void pci_device_class_base_init(ObjectClass *klass, void *data) object_class_dynamic_cast(klass, INTERFACE_CONVENTIONAL_PCI_DEVICE); ObjectClass *pcie = object_class_dynamic_cast(klass, INTERFACE_PCIE_DEVICE); - assert(conventional || pcie); + ObjectClass *cxl = + object_class_dynamic_cast(klass, INTERFACE_CXL_DEVICE); + assert(conventional || pcie || cxl); } } @@ -2711,7 +2654,7 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) PCIBus *iommu_bus = bus; uint8_t devfn = dev->devfn; - while (iommu_bus && !iommu_bus->iommu_fn && iommu_bus->parent_dev) { + while (iommu_bus && !iommu_bus->iommu_ops && iommu_bus->parent_dev) { PCIBus *parent_bus = pci_get_bus(iommu_bus->parent_dev); /* @@ -2750,22 +2693,29 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) iommu_bus = parent_bus; } - if (!pci_bus_bypass_iommu(bus) && iommu_bus && iommu_bus->iommu_fn) { - return iommu_bus->iommu_fn(bus, iommu_bus->iommu_opaque, devfn); + if (!pci_bus_bypass_iommu(bus) && iommu_bus->iommu_ops) { + return iommu_bus->iommu_ops->get_address_space(bus, + iommu_bus->iommu_opaque, devfn); } return &address_space_memory; } -void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque) +void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque) { - bus->iommu_fn = fn; + /* + * If called, pci_setup_iommu() should provide a minimum set of + * useful callbacks for the bus. + */ + assert(ops); + assert(ops->get_address_space); + + bus->iommu_ops = ops; bus->iommu_opaque = opaque; } static void pci_dev_get_w64(PCIBus *b, PCIDevice *dev, void *opaque) { Range *range = opaque; - PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev); uint16_t cmd = pci_get_word(dev->config + PCI_COMMAND); int i; @@ -2773,7 +2723,7 @@ static void pci_dev_get_w64(PCIBus *b, PCIDevice *dev, void *opaque) return; } - if (pc->is_bridge) { + if (IS_PCI_BRIDGE(dev)) { pcibus_t base = pci_bridge_get_base(dev, PCI_BASE_ADDRESS_MEM_PREFETCH); pcibus_t limit = pci_bridge_get_limit(dev, PCI_BASE_ADDRESS_MEM_PREFETCH); @@ -2861,6 +2811,22 @@ MSIMessage pci_get_msi_message(PCIDevice *dev, int vector) return msg; } +void pci_set_power(PCIDevice *d, bool state) +{ + if (d->has_power == state) { + return; + } + + d->has_power = state; + pci_update_mappings(d); + memory_region_set_enabled(&d->bus_master_enable_region, + (pci_get_word(d->config + PCI_COMMAND) + & PCI_COMMAND_MASTER) && d->has_power); + if (!d->has_power) { + pci_device_reset(d); + } +} + static const TypeInfo pci_device_type_info = { .name = TYPE_PCI_DEVICE, .parent = TYPE_DEVICE, @@ -2875,7 +2841,9 @@ static void pci_register_types(void) { type_register_static(&pci_bus_info); type_register_static(&pcie_bus_info); + type_register_static(&cxl_bus_info); type_register_static(&conventional_pci_interface_info); + type_register_static(&cxl_interface_info); type_register_static(&pcie_interface_info); type_register_static(&pci_device_type_info); } diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c index 3789c17edc..6a4e38856d 100644 --- a/hw/pci/pci_bridge.c +++ b/hw/pci/pci_bridge.c @@ -36,6 +36,9 @@ #include "qemu/module.h" #include "qemu/range.h" #include "qapi/error.h" +#include "hw/acpi/acpi_aml_interface.h" +#include "hw/acpi/pci.h" +#include "hw/qdev-properties.h" /* PCI bridge subsystem vendor ID helper functions */ #define PCI_SSVID_SIZEOF 8 @@ -182,11 +185,11 @@ static void pci_bridge_init_vga_aliases(PCIBridge *br, PCIBus *parent, } } -static PCIBridgeWindows *pci_bridge_region_init(PCIBridge *br) +static void pci_bridge_region_init(PCIBridge *br) { PCIDevice *pd = PCI_DEVICE(br); PCIBus *parent = pci_get_bus(pd); - PCIBridgeWindows *w = g_new(PCIBridgeWindows, 1); + PCIBridgeWindows *w = &br->windows; uint16_t cmd = pci_get_word(pd->config + PCI_COMMAND); pci_bridge_init_alias(br, &w->alias_pref_mem, @@ -209,8 +212,6 @@ static PCIBridgeWindows *pci_bridge_region_init(PCIBridge *br) cmd & PCI_COMMAND_IO); pci_bridge_init_vga_aliases(br, parent, w->alias_vga); - - return w; } static void pci_bridge_region_del(PCIBridge *br, PCIBridgeWindows *w) @@ -232,19 +233,18 @@ static void pci_bridge_region_cleanup(PCIBridge *br, PCIBridgeWindows *w) object_unparent(OBJECT(&w->alias_vga[QEMU_PCI_VGA_IO_LO])); object_unparent(OBJECT(&w->alias_vga[QEMU_PCI_VGA_IO_HI])); object_unparent(OBJECT(&w->alias_vga[QEMU_PCI_VGA_MEM])); - g_free(w); } void pci_bridge_update_mappings(PCIBridge *br) { - PCIBridgeWindows *w = br->windows; + PCIBridgeWindows *w = &br->windows; /* Make updates atomic to: handle the case of one VCPU updating the bridge * while another accesses an unaffected region. */ memory_region_transaction_begin(); - pci_bridge_region_del(br, br->windows); + pci_bridge_region_del(br, w); pci_bridge_region_cleanup(br, w); - br->windows = pci_bridge_region_init(br); + pci_bridge_region_init(br); memory_region_transaction_commit(); } @@ -275,7 +275,7 @@ void pci_bridge_write_config(PCIDevice *d, newctl = pci_get_word(d->config + PCI_BRIDGE_CONTROL); if (~oldctl & newctl & PCI_BRIDGE_CTL_BUS_RESET) { /* Trigger hot reset on 0->1 transition. */ - qbus_reset_all(BUS(&s->sec_bus)); + bus_cold_reset(BUS(&s->sec_bus)); } } @@ -374,8 +374,8 @@ void pci_bridge_initfn(PCIDevice *dev, const char *typename) br->bus_name = dev->qdev.id; } - qbus_create_inplace(sec_bus, sizeof(br->sec_bus), typename, DEVICE(dev), - br->bus_name); + qbus_init(sec_bus, sizeof(br->sec_bus), typename, DEVICE(dev), + br->bus_name); sec_bus->parent_dev = dev; sec_bus->map_irq = br->map_irq ? br->map_irq : pci_swizzle_map_irq_fn; sec_bus->address_space_mem = &br->address_space_mem; @@ -383,9 +383,14 @@ void pci_bridge_initfn(PCIDevice *dev, const char *typename) sec_bus->address_space_io = &br->address_space_io; memory_region_init(&br->address_space_io, OBJECT(br), "pci_bridge_io", 4 * GiB); - br->windows = pci_bridge_region_init(br); + pci_bridge_region_init(br); QLIST_INIT(&sec_bus->child); QLIST_INSERT_HEAD(&parent->child, sec_bus, sibling); + + /* For express secondary buses, secondary latency timer is RO 0 */ + if (pci_bus_is_express(sec_bus) && !br->pcie_writeable_slt_bug) { + dev->wmask[PCI_SEC_LATENCY_TIMER] = 0; + } } /* default qdev clean up function for PCI-to-PCI bridge */ @@ -394,8 +399,8 @@ void pci_bridge_exitfn(PCIDevice *pci_dev) PCIBridge *s = PCI_BRIDGE(pci_dev); assert(QLIST_EMPTY(&s->sec_bus.child)); QLIST_REMOVE(&s->sec_bus, sibling); - pci_bridge_region_del(s, s->windows); - pci_bridge_region_cleanup(s, s->windows); + pci_bridge_region_del(s, &s->windows); + pci_bridge_region_cleanup(s, &s->windows); /* object_unparent() is called automatically during device deletion */ } @@ -448,11 +453,11 @@ int pci_bridge_qemu_reserve_cap_init(PCIDevice *dev, int cap_offset, PCIBridgeQemuCap cap = { .len = cap_len, .type = REDHAT_PCI_CAP_RESOURCE_RESERVE, - .bus_res = res_reserve.bus, - .io = res_reserve.io, - .mem = res_reserve.mem_non_pref, - .mem_pref_32 = res_reserve.mem_pref_32, - .mem_pref_64 = res_reserve.mem_pref_64 + .bus_res = cpu_to_le32(res_reserve.bus), + .io = cpu_to_le64(res_reserve.io), + .mem = cpu_to_le32(res_reserve.mem_non_pref), + .mem_pref_32 = cpu_to_le32(res_reserve.mem_pref_32), + .mem_pref_64 = cpu_to_le64(res_reserve.mem_pref_64) }; int offset = pci_add_capability(dev, PCI_CAP_ID_VNDR, @@ -467,11 +472,31 @@ int pci_bridge_qemu_reserve_cap_init(PCIDevice *dev, int cap_offset, return 0; } +static Property pci_bridge_properties[] = { + DEFINE_PROP_BOOL("x-pci-express-writeable-slt-bug", PCIBridge, + pcie_writeable_slt_bug, false), + DEFINE_PROP_END_OF_LIST(), +}; + +static void pci_bridge_class_init(ObjectClass *klass, void *data) +{ + AcpiDevAmlIfClass *adevc = ACPI_DEV_AML_IF_CLASS(klass); + DeviceClass *k = DEVICE_CLASS(klass); + + device_class_set_props(k, pci_bridge_properties); + adevc->build_dev_aml = build_pci_bridge_aml; +} + static const TypeInfo pci_bridge_type_info = { .name = TYPE_PCI_BRIDGE, .parent = TYPE_PCI_DEVICE, .instance_size = sizeof(PCIBridge), + .class_init = pci_bridge_class_init, .abstract = true, + .interfaces = (InterfaceInfo[]) { + { TYPE_ACPI_DEV_AML_IF }, + { }, + }, }; static void pci_bridge_register_types(void) diff --git a/hw/pci/pci_host.c b/hw/pci/pci_host.c index cf02f0d6a5..dfe6fe6184 100644 --- a/hw/pci/pci_host.c +++ b/hw/pci/pci_host.c @@ -62,6 +62,17 @@ static void pci_adjust_config_limit(PCIBus *bus, uint32_t *limit) } } +static bool is_pci_dev_ejected(PCIDevice *pci_dev) +{ + /* + * device unplug was requested and the guest acked it, + * so we stop responding config accesses even if the + * device is not deleted (failover flow) + */ + return pci_dev && pci_dev->partially_hotplugged && + !pci_dev->qdev.pending_deleted_event; +} + void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr, uint32_t limit, uint32_t val, uint32_t len) { @@ -74,11 +85,13 @@ void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr, /* non-zero functions are only exposed when function 0 is present, * allowing direct removal of unexposed functions. */ - if (pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) { + if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) || + !pci_dev->has_power || is_pci_dev_ejected(pci_dev)) { return; } - trace_pci_cfg_write(pci_dev->name, PCI_SLOT(pci_dev->devfn), + trace_pci_cfg_write(pci_dev->name, pci_dev_bus_num(pci_dev), + PCI_SLOT(pci_dev->devfn), PCI_FUNC(pci_dev->devfn), addr, val); pci_dev->config_write(pci_dev, addr, val, MIN(len, limit - addr)); } @@ -97,12 +110,14 @@ uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr, /* non-zero functions are only exposed when function 0 is present, * allowing direct removal of unexposed functions. */ - if (pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) { + if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) || + !pci_dev->has_power || is_pci_dev_ejected(pci_dev)) { return ~0x0; } ret = pci_dev->config_read(pci_dev, addr, MIN(len, limit - addr)); - trace_pci_cfg_read(pci_dev->name, PCI_SLOT(pci_dev->devfn), + trace_pci_cfg_read(pci_dev->name, pci_dev_bus_num(pci_dev), + PCI_SLOT(pci_dev->devfn), PCI_FUNC(pci_dev->devfn), addr, ret); return ret; @@ -114,6 +129,9 @@ void pci_data_write(PCIBus *s, uint32_t addr, uint32_t val, unsigned len) uint32_t config_addr = addr & (PCI_CONFIG_SPACE_SIZE - 1); if (!pci_dev) { + trace_pci_cfg_write("empty", extract32(addr, 16, 8), + extract32(addr, 11, 5), extract32(addr, 8, 3), + config_addr, val); return; } @@ -127,6 +145,9 @@ uint32_t pci_data_read(PCIBus *s, uint32_t addr, unsigned len) uint32_t config_addr = addr & (PCI_CONFIG_SPACE_SIZE - 1); if (!pci_dev) { + trace_pci_cfg_read("empty", extract32(addr, 16, 8), + extract32(addr, 11, 5), extract32(addr, 8, 3), + config_addr, ~0x0); return ~0x0; } @@ -139,7 +160,7 @@ static void pci_host_config_write(void *opaque, hwaddr addr, { PCIHostState *s = opaque; - PCI_DPRINTF("%s addr " TARGET_FMT_plx " len %d val %"PRIx64"\n", + PCI_DPRINTF("%s addr " HWADDR_FMT_plx " len %d val %"PRIx64"\n", __func__, addr, len, val); if (addr != 0 || len != 4) { return; @@ -153,7 +174,7 @@ static uint64_t pci_host_config_read(void *opaque, hwaddr addr, PCIHostState *s = opaque; uint32_t val = s->config_reg; - PCI_DPRINTF("%s addr " TARGET_FMT_plx " len %d val %"PRIx32"\n", + PCI_DPRINTF("%s addr " HWADDR_FMT_plx " len %d val %"PRIx32"\n", __func__, addr, len, val); return val; } @@ -213,7 +234,7 @@ const VMStateDescription vmstate_pcihost = { .needed = pci_host_needed, .version_id = 1, .minimum_version_id = 1, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT32(config_reg, PCIHostState), VMSTATE_END_OF_LIST() } @@ -222,7 +243,7 @@ const VMStateDescription vmstate_pcihost = { static Property pci_host_properties_common[] = { DEFINE_PROP_BOOL("x-config-reg-migration-enabled", PCIHostState, mig_enabled, true), - DEFINE_PROP_BOOL("bypass-iommu", PCIHostState, bypass_iommu, false), + DEFINE_PROP_BOOL(PCI_HOST_BYPASS_IOMMU, PCIHostState, bypass_iommu, false), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 6e95d82903..4b2f0805c6 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -28,6 +28,7 @@ #include "hw/pci/pcie_regs.h" #include "hw/pci/pcie_port.h" #include "qemu/range.h" +#include "trace.h" //#define DEBUG_PCIE #ifdef DEBUG_PCIE @@ -39,6 +40,28 @@ #define PCIE_DEV_PRINTF(dev, fmt, ...) \ PCIE_DPRINTF("%s:%x "fmt, (dev)->name, (dev)->devfn, ## __VA_ARGS__) +static bool pcie_sltctl_powered_off(uint16_t sltctl) +{ + return (sltctl & PCI_EXP_SLTCTL_PCC) == PCI_EXP_SLTCTL_PWR_OFF + && (sltctl & PCI_EXP_SLTCTL_PIC) == PCI_EXP_SLTCTL_PWR_IND_OFF; +} + +static const char *pcie_led_state_to_str(uint16_t value) +{ + switch (value) { + case PCI_EXP_SLTCTL_PWR_IND_ON: + case PCI_EXP_SLTCTL_ATTN_IND_ON: + return "on"; + case PCI_EXP_SLTCTL_PWR_IND_BLINK: + case PCI_EXP_SLTCTL_ATTN_IND_BLINK: + return "blink"; + case PCI_EXP_SLTCTL_PWR_IND_OFF: + case PCI_EXP_SLTCTL_ATTN_IND_OFF: + return "off"; + default: + return "invalid"; + } +} /*************************************************************************** * pci express capability helper functions @@ -148,6 +171,14 @@ static void pcie_cap_fill_slot_lnk(PCIDevice *dev) pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP2, PCI_EXP_LNKCAP2_SLS_16_0GB); } + if (s->speed > QEMU_PCI_EXP_LNK_16GT) { + pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP2, + PCI_EXP_LNKCAP2_SLS_32_0GB); + } + if (s->speed > QEMU_PCI_EXP_LNK_32GT) { + pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP2, + PCI_EXP_LNKCAP2_SLS_64_0GB); + } } } @@ -269,6 +300,13 @@ uint8_t pcie_cap_get_type(const PCIDevice *dev) PCI_EXP_FLAGS_TYPE) >> PCI_EXP_FLAGS_TYPE_SHIFT; } +uint8_t pcie_cap_get_version(const PCIDevice *dev) +{ + uint32_t pos = dev->exp.exp_cap; + assert(pos > 0); + return pci_get_word(dev->config + pos + PCI_EXP_FLAGS) & PCI_EXP_FLAGS_VERS; +} + /* MSI/MSI-X */ /* pci express interrupt message number */ /* 7.8.2 PCI Express Capabilities Register: Interrupt Message Number */ @@ -353,7 +391,7 @@ static void hotplug_event_notify(PCIDevice *dev) msix_notify(dev, pcie_cap_flags_get_vector(dev)); } else if (msi_enabled(dev)) { msi_notify(dev, pcie_cap_flags_get_vector(dev)); - } else { + } else if (pci_intx(dev) != -1) { pci_set_irq(dev, dev->exp.hpev_notified); } } @@ -361,11 +399,47 @@ static void hotplug_event_notify(PCIDevice *dev) static void hotplug_event_clear(PCIDevice *dev) { hotplug_event_update_event_status(dev); - if (!msix_enabled(dev) && !msi_enabled(dev) && !dev->exp.hpev_notified) { + if (!msix_enabled(dev) && !msi_enabled(dev) && pci_intx(dev) != -1 && + !dev->exp.hpev_notified) { pci_irq_deassert(dev); } } +void pcie_cap_slot_enable_power(PCIDevice *dev) +{ + uint8_t *exp_cap = dev->config + dev->exp.exp_cap; + uint32_t sltcap = pci_get_long(exp_cap + PCI_EXP_SLTCAP); + + if (sltcap & PCI_EXP_SLTCAP_PCP) { + pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTCTL, + PCI_EXP_SLTCTL_PCC); + } +} + +static void pcie_set_power_device(PCIBus *bus, PCIDevice *dev, void *opaque) +{ + bool *power = opaque; + + pci_set_power(dev, *power); +} + +static void pcie_cap_update_power(PCIDevice *hotplug_dev) +{ + uint8_t *exp_cap = hotplug_dev->config + hotplug_dev->exp.exp_cap; + PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(hotplug_dev)); + uint32_t sltcap = pci_get_long(exp_cap + PCI_EXP_SLTCAP); + uint16_t sltctl = pci_get_word(exp_cap + PCI_EXP_SLTCTL); + bool power = true; + + if (sltcap & PCI_EXP_SLTCAP_PCP) { + power = (sltctl & PCI_EXP_SLTCTL_PCC) == PCI_EXP_SLTCTL_PWR_ON; + /* Don't we need to check also (sltctl & PCI_EXP_SLTCTL_PIC) ? */ + } + + pci_for_each_device(sec_bus, pci_bus_num(sec_bus), + pcie_set_power_device, &power); +} + /* * A PCI Express Hot-Plug Event has occurred, so update slot status register * and notify OS of the event if necessary. @@ -423,6 +497,11 @@ void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, PCIDevice *pci_dev = PCI_DEVICE(dev); uint32_t lnkcap = pci_get_long(exp_cap + PCI_EXP_LNKCAP); + if (pci_is_vf(pci_dev)) { + /* Virtual function cannot be physically disconnected */ + return; + } + /* Don't send event when device is enabled during qemu machine creation: * it is present on boot, no hotplug event is necessary. We do send an * event when the device is disabled later. */ @@ -434,6 +513,7 @@ void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA, PCI_EXP_LNKSTA_DLLLA); } + pcie_cap_update_power(hotplug_pdev); return; } @@ -451,6 +531,7 @@ void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, } pcie_cap_slot_event(hotplug_pdev, PCI_EXP_HP_EV_PDC | PCI_EXP_HP_EV_ABP); + pcie_cap_update_power(hotplug_pdev); } } @@ -472,6 +553,25 @@ static void pcie_unplug_device(PCIBus *bus, PCIDevice *dev, void *opaque) object_unparent(OBJECT(dev)); } +static void pcie_cap_slot_do_unplug(PCIDevice *dev) +{ + PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev)); + uint8_t *exp_cap = dev->config + dev->exp.exp_cap; + uint32_t lnkcap = pci_get_long(exp_cap + PCI_EXP_LNKCAP); + + pci_for_each_device_under_bus(sec_bus, pcie_unplug_device, NULL); + + pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA, + PCI_EXP_SLTSTA_PDS); + if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA || + (lnkcap & PCI_EXP_LNKCAP_DLLLARC)) { + pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA, + PCI_EXP_LNKSTA_DLLLA); + } + pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA, + PCI_EXP_SLTSTA_PDC); +} + void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { @@ -481,6 +581,7 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, PCIDevice *hotplug_pdev = PCI_DEVICE(hotplug_dev); uint8_t *exp_cap = hotplug_pdev->config + hotplug_pdev->exp.exp_cap; uint32_t sltcap = pci_get_word(exp_cap + PCI_EXP_SLTCAP); + uint16_t sltctl = pci_get_word(exp_cap + PCI_EXP_SLTCTL); /* Check if hot-unplug is disabled on the slot */ if ((sltcap & PCI_EXP_SLTCAP_HPC) == 0) { @@ -496,7 +597,15 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, return; } + if ((sltctl & PCI_EXP_SLTCTL_PIC) == PCI_EXP_SLTCTL_PWR_IND_BLINK) { + error_setg(errp, "Hot-unplug failed: " + "guest is busy (power indicator blinking)"); + return; + } + dev->pending_deleted_event = true; + dev->pending_deleted_expires_ms = + qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 5000; /* 5 secs */ /* In case user cancel the operation of multi-function hot-add, * remove the function that is unexposed to guest individually, @@ -509,6 +618,15 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, return; } + if (pcie_sltctl_powered_off(sltctl)) { + /* slot is powered off -> unplug without round-trip to the guest */ + pcie_cap_slot_do_unplug(hotplug_pdev); + hotplug_event_notify(hotplug_pdev); + pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA, + PCI_EXP_SLTSTA_ABP); + return; + } + pcie_cap_slot_push_attention_button(hotplug_pdev); } @@ -531,11 +649,11 @@ void pcie_cap_slot_init(PCIDevice *dev, PCIESlot *s) PCI_EXP_SLTCAP_ABP); /* - * Enable native hot-plug on all hot-plugged bridges unless - * hot-plug is disabled on the slot. + * Expose native hot-plug on all bridges if hot-plug is enabled on the slot. + * (unless broken 6.1 ABI is enforced for compat reasons) */ if (s->hotplug && - (s->native_hotplug || DEVICE(dev)->hotplugged)) { + (!s->hide_native_hotplug_cap || DEVICE(dev)->hotplugged)) { pci_long_test_and_set_mask(dev->config + pos + PCI_EXP_SLTCAP, PCI_EXP_SLTCAP_HPS | PCI_EXP_SLTCAP_HPC); @@ -554,8 +672,8 @@ void pcie_cap_slot_init(PCIDevice *dev, PCIESlot *s) PCI_EXP_SLTCTL_PIC | PCI_EXP_SLTCTL_AIC); pci_word_test_and_set_mask(dev->config + pos + PCI_EXP_SLTCTL, - PCI_EXP_SLTCTL_PIC_OFF | - PCI_EXP_SLTCTL_AIC_OFF); + PCI_EXP_SLTCTL_PWR_IND_OFF | + PCI_EXP_SLTCTL_ATTN_IND_OFF); pci_word_test_and_set_mask(dev->wmask + pos + PCI_EXP_SLTCTL, PCI_EXP_SLTCTL_PIC | PCI_EXP_SLTCTL_AIC | @@ -574,6 +692,10 @@ void pcie_cap_slot_init(PCIDevice *dev, PCIESlot *s) pci_word_test_and_set_mask(dev->w1cmask + pos + PCI_EXP_SLTSTA, PCI_EXP_HP_EV_SUPPORTED); + /* Avoid migration abortion when this device hot-removed by guest */ + pci_word_test_and_clear_mask(dev->cmask + pos + PCI_EXP_SLTSTA, + PCI_EXP_SLTSTA_PDS); + dev->exp.hpev_notified = false; qbus_set_hotplug_handler(BUS(pci_bridge_get_sec_bus(PCI_BRIDGE(dev))), @@ -599,7 +721,8 @@ void pcie_cap_slot_reset(PCIDevice *dev) PCI_EXP_SLTCTL_PDCE | PCI_EXP_SLTCTL_ABPE); pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTCTL, - PCI_EXP_SLTCTL_AIC_OFF); + PCI_EXP_SLTCTL_PWR_IND_OFF | + PCI_EXP_SLTCTL_ATTN_IND_OFF); if (dev->cap_present & QEMU_PCIE_SLTCAP_PCP) { /* Downstream ports enforce device number 0. */ @@ -614,7 +737,8 @@ void pcie_cap_slot_reset(PCIDevice *dev) PCI_EXP_SLTCTL_PCC); } - pic = populated ? PCI_EXP_SLTCTL_PIC_ON : PCI_EXP_SLTCTL_PIC_OFF; + pic = populated ? + PCI_EXP_SLTCTL_PWR_IND_ON : PCI_EXP_SLTCTL_PWR_IND_OFF; pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTCTL, pic); } @@ -625,6 +749,7 @@ void pcie_cap_slot_reset(PCIDevice *dev) PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_ABP); + pcie_cap_update_power(dev); hotplug_event_update_event_status(dev); } @@ -636,6 +761,28 @@ void pcie_cap_slot_get(PCIDevice *dev, uint16_t *slt_ctl, uint16_t *slt_sta) *slt_sta = pci_get_word(exp_cap + PCI_EXP_SLTSTA); } +static void find_child_fn(PCIBus *bus, PCIDevice *dev, void *opaque) +{ + PCIDevice **child = opaque; + + if (!*child) { + *child = dev; + } +} + +/* + * Returns the plugged device or first function of multifunction plugged device + */ +static PCIDevice *pcie_cap_slot_find_child(PCIDevice *dev) +{ + PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev)); + PCIDevice *child = NULL; + + pci_for_each_device(sec_bus, pci_bus_num(sec_bus), find_child_fn, &child); + + return child; +} + void pcie_cap_slot_write_config(PCIDevice *dev, uint16_t old_slt_ctl, uint16_t old_slt_sta, uint32_t addr, uint32_t val, int len) @@ -643,7 +790,6 @@ void pcie_cap_slot_write_config(PCIDevice *dev, uint32_t pos = dev->exp.exp_cap; uint8_t *exp_cap = dev->config + pos; uint16_t sltsta = pci_get_word(exp_cap + PCI_EXP_SLTSTA); - uint32_t lnkcap = pci_get_long(exp_cap + PCI_EXP_LNKCAP); if (ranges_overlap(addr, len, pos + PCI_EXP_SLTSTA, 2)) { /* @@ -681,6 +827,22 @@ void pcie_cap_slot_write_config(PCIDevice *dev, sltsta); } + if (trace_event_get_state_backends(TRACE_PCIE_CAP_SLOT_WRITE_CONFIG)) { + DeviceState *parent = DEVICE(dev); + DeviceState *child = DEVICE(pcie_cap_slot_find_child(dev)); + + trace_pcie_cap_slot_write_config( + parent->canonical_path, + child ? child->canonical_path : "no-child", + (sltsta & PCI_EXP_SLTSTA_PDS) ? "present" : "not present", + pcie_led_state_to_str(old_slt_ctl & PCI_EXP_SLTCTL_PIC), + pcie_led_state_to_str(val & PCI_EXP_SLTCTL_PIC), + pcie_led_state_to_str(old_slt_ctl & PCI_EXP_SLTCTL_AIC), + pcie_led_state_to_str(val & PCI_EXP_SLTCTL_AIC), + (old_slt_ctl & PCI_EXP_SLTCTL_PWR_OFF) ? "off" : "on", + (val & PCI_EXP_SLTCTL_PWR_OFF) ? "off" : "on"); + } + /* * If the slot is populated, power indicator is off and power * controller is off, it is safe to detach the devices. @@ -689,24 +851,12 @@ void pcie_cap_slot_write_config(PCIDevice *dev, * this is a work around for guests that overwrite * control of powered off slots before powering them on. */ - if ((sltsta & PCI_EXP_SLTSTA_PDS) && (val & PCI_EXP_SLTCTL_PCC) && - (val & PCI_EXP_SLTCTL_PIC_OFF) == PCI_EXP_SLTCTL_PIC_OFF && - (!(old_slt_ctl & PCI_EXP_SLTCTL_PCC) || - (old_slt_ctl & PCI_EXP_SLTCTL_PIC_OFF) != PCI_EXP_SLTCTL_PIC_OFF)) { - PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev)); - pci_for_each_device(sec_bus, pci_bus_num(sec_bus), - pcie_unplug_device, NULL); - - pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA, - PCI_EXP_SLTSTA_PDS); - if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA || - (lnkcap & PCI_EXP_LNKCAP_DLLLARC)) { - pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA, - PCI_EXP_LNKSTA_DLLLA); - } - pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA, - PCI_EXP_SLTSTA_PDC); + if ((sltsta & PCI_EXP_SLTSTA_PDS) && pcie_sltctl_powered_off(val) && + !pcie_sltctl_powered_off(old_slt_ctl)) + { + pcie_cap_slot_do_unplug(dev); } + pcie_cap_update_power(dev); hotplug_event_notify(dev); @@ -733,6 +883,7 @@ int pcie_cap_slot_post_load(void *opaque, int version_id) { PCIDevice *dev = opaque; hotplug_event_update_event_status(dev); + pcie_cap_update_power(dev); return 0; } @@ -876,8 +1027,8 @@ void pcie_add_capability(PCIDevice *dev, uint16_t offset, uint16_t size) { assert(offset >= PCI_CONFIG_SPACE_SIZE); - assert(offset < offset + size); - assert(offset + size <= PCIE_CONFIG_SPACE_SIZE); + assert(offset < (uint16_t)(offset + size)); + assert((uint16_t)(offset + size) <= PCIE_CONFIG_SPACE_SIZE); assert(size >= 8); assert(pci_is_express(dev)); @@ -952,8 +1103,10 @@ void pcie_sync_bridge_lnk(PCIDevice *bridge_dev) */ /* ARI */ -void pcie_ari_init(PCIDevice *dev, uint16_t offset, uint16_t nextfn) +void pcie_ari_init(PCIDevice *dev, uint16_t offset) { + uint16_t nextfn = dev->cap_present & QEMU_PCIE_ARI_NEXTFN_1 ? 1 : 0; + pcie_add_capability(dev, PCI_EXT_CAP_ID_ARI, PCI_ARI_VER, offset, PCI_ARI_SIZEOF); pci_set_long(dev->config + offset + PCI_ARI_CAP, (nextfn & 0xff) << 8); diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c index 27f9cc56af..2c85a78fcd 100644 --- a/hw/pci/pcie_aer.c +++ b/hw/pci/pcie_aer.c @@ -19,17 +19,14 @@ */ #include "qemu/osdep.h" -#include "sysemu/sysemu.h" -#include "qapi/qmp/qdict.h" #include "migration/vmstate.h" -#include "monitor/monitor.h" #include "hw/pci/pci_bridge.h" #include "hw/pci/pcie.h" #include "hw/pci/msix.h" #include "hw/pci/msi.h" #include "hw/pci/pci_bus.h" #include "hw/pci/pcie_regs.h" -#include "qapi/error.h" +#include "pci-internal.h" //#define DEBUG_PCIE #ifdef DEBUG_PCIE @@ -44,13 +41,6 @@ #define PCI_ERR_SRC_COR_OFFS 0 #define PCI_ERR_SRC_UNCOR_OFFS 2 -typedef struct PCIEErrorDetails { - const char *id; - const char *root_bus; - int bus; - int devfn; -} PCIEErrorDetails; - /* From 6.2.7 Error Listing and Rules. Table 6-2, 6-3 and 6-4 */ static uint32_t pcie_aer_uncor_default_severity(uint32_t status) { @@ -123,6 +113,13 @@ int pcie_aer_init(PCIDevice *dev, uint8_t cap_ver, uint16_t offset, pci_set_long(dev->w1cmask + offset + PCI_ERR_UNCOR_STATUS, PCI_ERR_UNC_SUPPORTED); + if (dev->cap_present & QEMU_PCIE_ERR_UNC_MASK) { + pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK, + PCI_ERR_UNC_MASK_DEFAULT); + pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK, + PCI_ERR_UNC_SUPPORTED); + } + pci_set_long(dev->config + offset + PCI_ERR_UNCOR_SEVER, PCI_ERR_UNC_SEVERITY_DEFAULT); pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_SEVER, @@ -198,8 +195,16 @@ static void pcie_aer_update_uncor_status(PCIDevice *dev) static bool pcie_aer_msg_alldev(PCIDevice *dev, const PCIEAERMsg *msg) { + uint16_t devctl = pci_get_word(dev->config + dev->exp.exp_cap + + PCI_EXP_DEVCTL); if (!(pcie_aer_msg_is_uncor(msg) && - (pci_get_word(dev->config + PCI_COMMAND) & PCI_COMMAND_SERR))) { + (pci_get_word(dev->config + PCI_COMMAND) & PCI_COMMAND_SERR)) && + !((msg->severity == PCI_ERR_ROOT_CMD_NONFATAL_EN) && + (devctl & PCI_EXP_DEVCTL_NFERE)) && + !((msg->severity == PCI_ERR_ROOT_CMD_COR_EN) && + (devctl & PCI_EXP_DEVCTL_CERE)) && + !((msg->severity == PCI_ERR_ROOT_CMD_FATAL_EN) && + (devctl & PCI_EXP_DEVCTL_FERE))) { return false; } @@ -290,7 +295,7 @@ static void pcie_aer_root_notify(PCIDevice *dev) msix_notify(dev, pcie_aer_root_get_vector(dev)); } else if (msi_enabled(dev)) { msi_notify(dev, pcie_aer_root_get_vector(dev)); - } else { + } else if (pci_intx(dev) != -1) { pci_irq_assert(dev); } } @@ -319,11 +324,11 @@ static void pcie_aer_msg_root_port(PCIDevice *dev, const PCIEAERMsg *msg) * it isn't implemented in qemu right now. * So just discard the error for now. * OS which cares of aer would receive errors via - * native aer mechanims, so this wouldn't matter. + * native aer mechanisms, so this wouldn't matter. */ } - /* Errro Message Received: Root Error Status register */ + /* Error Message Received: Root Error Status register */ switch (msg->severity) { case PCI_ERR_ROOT_CMD_COR_EN: if (root_status & PCI_ERR_ROOT_COR_RCV) { @@ -631,7 +636,7 @@ static bool pcie_aer_inject_uncor_error(PCIEAERInject *inj, bool is_fatal) * Figure 6-2: Flowchart Showing Sequence of Device Error Signaling and Logging * Operations */ -static int pcie_aer_inject_error(PCIDevice *dev, const PCIEAERErr *err) +int pcie_aer_inject_error(PCIDevice *dev, const PCIEAERErr *err) { uint8_t *aer_cap = NULL; uint16_t devctl = 0; @@ -774,7 +779,9 @@ void pcie_aer_root_write_config(PCIDevice *dev, uint32_t root_cmd = pci_get_long(aer_cap + PCI_ERR_ROOT_COMMAND); /* 6.2.4.1.2 Interrupt Generation */ if (!msix_enabled(dev) && !msi_enabled(dev)) { - pci_set_irq(dev, !!(root_cmd & enabled_cmd)); + if (pci_intx(dev) != -1) { + pci_set_irq(dev, !!(root_cmd & enabled_cmd)); + } return; } @@ -790,7 +797,7 @@ static const VMStateDescription vmstate_pcie_aer_err = { .name = "PCIE_AER_ERROR", .version_id = 1, .minimum_version_id = 1, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT32(status, PCIEAERErr), VMSTATE_UINT16(source_id, PCIEAERErr), VMSTATE_UINT16(flags, PCIEAERErr), @@ -811,7 +818,7 @@ const VMStateDescription vmstate_pcie_aer_log = { .name = "PCIE_AER_ERROR_LOG", .version_id = 1, .minimum_version_id = 1, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT16(log_num, PCIEAERLog), VMSTATE_UINT16_EQUAL(log_max, PCIEAERLog, NULL), VMSTATE_VALIDATE("log_num <= log_max", pcie_aer_state_log_num_valid), @@ -931,8 +938,8 @@ static const struct PCIEAERErrorName pcie_aer_error_list[] = { }, }; -static int pcie_aer_parse_error_string(const char *error_name, - uint32_t *status, bool *correctable) +int pcie_aer_parse_error_string(const char *error_name, + uint32_t *status, bool *correctable) { int i; @@ -948,98 +955,3 @@ static int pcie_aer_parse_error_string(const char *error_name, } return -EINVAL; } - -/* - * Inject an error described by @qdict. - * On success, set @details to show where error was sent. - * Return negative errno if injection failed and a message was emitted. - */ -static int do_pcie_aer_inject_error(Monitor *mon, - const QDict *qdict, - PCIEErrorDetails *details) -{ - const char *id = qdict_get_str(qdict, "id"); - const char *error_name; - uint32_t error_status; - bool correctable; - PCIDevice *dev; - PCIEAERErr err; - int ret; - - ret = pci_qdev_find_device(id, &dev); - if (ret < 0) { - monitor_printf(mon, - "id or pci device path is invalid or device not " - "found. %s\n", id); - return ret; - } - if (!pci_is_express(dev)) { - monitor_printf(mon, "the device doesn't support pci express. %s\n", - id); - return -ENOSYS; - } - - error_name = qdict_get_str(qdict, "error_status"); - if (pcie_aer_parse_error_string(error_name, &error_status, &correctable)) { - char *e = NULL; - error_status = strtoul(error_name, &e, 0); - correctable = qdict_get_try_bool(qdict, "correctable", false); - if (!e || *e != '\0') { - monitor_printf(mon, "invalid error status value. \"%s\"", - error_name); - return -EINVAL; - } - } - err.status = error_status; - err.source_id = pci_requester_id(dev); - - err.flags = 0; - if (correctable) { - err.flags |= PCIE_AER_ERR_IS_CORRECTABLE; - } - if (qdict_get_try_bool(qdict, "advisory_non_fatal", false)) { - err.flags |= PCIE_AER_ERR_MAYBE_ADVISORY; - } - if (qdict_haskey(qdict, "header0")) { - err.flags |= PCIE_AER_ERR_HEADER_VALID; - } - if (qdict_haskey(qdict, "prefix0")) { - err.flags |= PCIE_AER_ERR_TLP_PREFIX_PRESENT; - } - - err.header[0] = qdict_get_try_int(qdict, "header0", 0); - err.header[1] = qdict_get_try_int(qdict, "header1", 0); - err.header[2] = qdict_get_try_int(qdict, "header2", 0); - err.header[3] = qdict_get_try_int(qdict, "header3", 0); - - err.prefix[0] = qdict_get_try_int(qdict, "prefix0", 0); - err.prefix[1] = qdict_get_try_int(qdict, "prefix1", 0); - err.prefix[2] = qdict_get_try_int(qdict, "prefix2", 0); - err.prefix[3] = qdict_get_try_int(qdict, "prefix3", 0); - - ret = pcie_aer_inject_error(dev, &err); - if (ret < 0) { - monitor_printf(mon, "failed to inject error: %s\n", - strerror(-ret)); - return ret; - } - details->id = id; - details->root_bus = pci_root_bus_path(dev); - details->bus = pci_dev_bus_num(dev); - details->devfn = dev->devfn; - - return 0; -} - -void hmp_pcie_aer_inject_error(Monitor *mon, const QDict *qdict) -{ - PCIEErrorDetails data; - - if (do_pcie_aer_inject_error(mon, qdict, &data) < 0) { - return; - } - - monitor_printf(mon, "OK id: %s root bus: %s, bus: %x devfn: %x.%x\n", - data.id, data.root_bus, data.bus, - PCI_SLOT(data.devfn), PCI_FUNC(data.devfn)); -} diff --git a/hw/pci/pcie_doe.c b/hw/pci/pcie_doe.c new file mode 100644 index 0000000000..2210f86968 --- /dev/null +++ b/hw/pci/pcie_doe.c @@ -0,0 +1,367 @@ +/* + * PCIe Data Object Exchange + * + * Copyright (C) 2021 Avery Design Systems, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "qemu/error-report.h" +#include "qapi/error.h" +#include "qemu/range.h" +#include "hw/pci/pci.h" +#include "hw/pci/pcie.h" +#include "hw/pci/pcie_doe.h" +#include "hw/pci/msi.h" +#include "hw/pci/msix.h" + +#define DWORD_BYTE 4 + +typedef struct DoeDiscoveryReq { + DOEHeader header; + uint8_t index; + uint8_t reserved[3]; +} QEMU_PACKED DoeDiscoveryReq; + +typedef struct DoeDiscoveryRsp { + DOEHeader header; + uint16_t vendor_id; + uint8_t data_obj_type; + uint8_t next_index; +} QEMU_PACKED DoeDiscoveryRsp; + +static bool pcie_doe_discovery(DOECap *doe_cap) +{ + DoeDiscoveryReq *req = pcie_doe_get_write_mbox_ptr(doe_cap); + DoeDiscoveryRsp rsp; + uint8_t index = req->index; + DOEProtocol *prot; + + /* Discard request if length does not match DoeDiscoveryReq */ + if (pcie_doe_get_obj_len(req) < + DIV_ROUND_UP(sizeof(DoeDiscoveryReq), DWORD_BYTE)) { + return false; + } + + rsp.header = (DOEHeader) { + .vendor_id = PCI_VENDOR_ID_PCI_SIG, + .data_obj_type = PCI_SIG_DOE_DISCOVERY, + .length = DIV_ROUND_UP(sizeof(DoeDiscoveryRsp), DWORD_BYTE), + }; + + /* Point to the requested protocol, index 0 must be Discovery */ + if (index == 0) { + rsp.vendor_id = PCI_VENDOR_ID_PCI_SIG; + rsp.data_obj_type = PCI_SIG_DOE_DISCOVERY; + } else { + if (index < doe_cap->protocol_num) { + prot = &doe_cap->protocols[index - 1]; + rsp.vendor_id = prot->vendor_id; + rsp.data_obj_type = prot->data_obj_type; + } else { + rsp.vendor_id = 0xFFFF; + rsp.data_obj_type = 0xFF; + } + } + + if (index + 1 == doe_cap->protocol_num) { + rsp.next_index = 0; + } else { + rsp.next_index = index + 1; + } + + pcie_doe_set_rsp(doe_cap, &rsp); + + return true; +} + +static void pcie_doe_reset_mbox(DOECap *st) +{ + st->read_mbox_idx = 0; + st->read_mbox_len = 0; + st->write_mbox_len = 0; + + memset(st->read_mbox, 0, PCI_DOE_DW_SIZE_MAX * DWORD_BYTE); + memset(st->write_mbox, 0, PCI_DOE_DW_SIZE_MAX * DWORD_BYTE); +} + +void pcie_doe_init(PCIDevice *dev, DOECap *doe_cap, uint16_t offset, + DOEProtocol *protocols, bool intr, uint16_t vec) +{ + pcie_add_capability(dev, PCI_EXT_CAP_ID_DOE, 0x1, offset, + PCI_DOE_SIZEOF); + + doe_cap->pdev = dev; + doe_cap->offset = offset; + + if (intr && (msi_present(dev) || msix_present(dev))) { + doe_cap->cap.intr = intr; + doe_cap->cap.vec = vec; + } + + doe_cap->write_mbox = g_malloc0(PCI_DOE_DW_SIZE_MAX * DWORD_BYTE); + doe_cap->read_mbox = g_malloc0(PCI_DOE_DW_SIZE_MAX * DWORD_BYTE); + + pcie_doe_reset_mbox(doe_cap); + + doe_cap->protocols = protocols; + for (; protocols->vendor_id; protocols++) { + doe_cap->protocol_num++; + } + assert(doe_cap->protocol_num < PCI_DOE_PROTOCOL_NUM_MAX); + + /* Increment to allow for the discovery protocol */ + doe_cap->protocol_num++; +} + +void pcie_doe_fini(DOECap *doe_cap) +{ + g_free(doe_cap->read_mbox); + g_free(doe_cap->write_mbox); + g_free(doe_cap); +} + +uint32_t pcie_doe_build_protocol(DOEProtocol *p) +{ + return DATA_OBJ_BUILD_HEADER1(p->vendor_id, p->data_obj_type); +} + +void *pcie_doe_get_write_mbox_ptr(DOECap *doe_cap) +{ + return doe_cap->write_mbox; +} + +/* + * Copy the response to read mailbox buffer + * This might be called in self-defined handle_request() if a DOE response is + * required in the corresponding protocol + */ +void pcie_doe_set_rsp(DOECap *doe_cap, void *rsp) +{ + uint32_t len = pcie_doe_get_obj_len(rsp); + + memcpy(doe_cap->read_mbox + doe_cap->read_mbox_len, rsp, len * DWORD_BYTE); + doe_cap->read_mbox_len += len; +} + +uint32_t pcie_doe_get_obj_len(void *obj) +{ + uint32_t len; + + if (!obj) { + return 0; + } + + /* Only lower 18 bits are valid */ + len = DATA_OBJ_LEN_MASK(((DOEHeader *)obj)->length); + + /* PCIe r6.0 Table 6.29: a value of 00000h indicates 2^18 DW */ + return (len) ? len : PCI_DOE_DW_SIZE_MAX; +} + +static void pcie_doe_irq_assert(DOECap *doe_cap) +{ + PCIDevice *dev = doe_cap->pdev; + + if (doe_cap->cap.intr && doe_cap->ctrl.intr) { + if (doe_cap->status.intr) { + return; + } + doe_cap->status.intr = 1; + + if (msix_enabled(dev)) { + msix_notify(dev, doe_cap->cap.vec); + } else if (msi_enabled(dev)) { + msi_notify(dev, doe_cap->cap.vec); + } + } +} + +static void pcie_doe_set_ready(DOECap *doe_cap, bool rdy) +{ + doe_cap->status.ready = rdy; + + if (rdy) { + pcie_doe_irq_assert(doe_cap); + } +} + +static void pcie_doe_set_error(DOECap *doe_cap, bool err) +{ + doe_cap->status.error = err; + + if (err) { + pcie_doe_irq_assert(doe_cap); + } +} + +/* + * Check incoming request in write_mbox for protocol format + */ +static void pcie_doe_prepare_rsp(DOECap *doe_cap) +{ + bool success = false; + int p; + bool (*handle_request)(DOECap *) = NULL; + + if (doe_cap->status.error) { + return; + } + + if (doe_cap->write_mbox[0] == + DATA_OBJ_BUILD_HEADER1(PCI_VENDOR_ID_PCI_SIG, PCI_SIG_DOE_DISCOVERY)) { + handle_request = pcie_doe_discovery; + } else { + for (p = 0; p < doe_cap->protocol_num - 1; p++) { + if (doe_cap->write_mbox[0] == + pcie_doe_build_protocol(&doe_cap->protocols[p])) { + handle_request = doe_cap->protocols[p].handle_request; + break; + } + } + } + + /* + * PCIe r6 DOE 6.30.1: + * If the number of DW transferred does not match the + * indicated Length for a data object, then the + * data object must be silently discarded. + */ + if (handle_request && (doe_cap->write_mbox_len == + pcie_doe_get_obj_len(pcie_doe_get_write_mbox_ptr(doe_cap)))) { + success = handle_request(doe_cap); + } + + if (success) { + pcie_doe_set_ready(doe_cap, 1); + } else { + pcie_doe_reset_mbox(doe_cap); + } +} + +/* + * Read from DOE config space. + * Return false if the address not within DOE_CAP range. + */ +bool pcie_doe_read_config(DOECap *doe_cap, uint32_t addr, int size, + uint32_t *buf) +{ + uint32_t shift; + uint16_t doe_offset = doe_cap->offset; + + if (!range_covers_byte(doe_offset + PCI_EXP_DOE_CAP, + PCI_DOE_SIZEOF - 4, addr)) { + return false; + } + + addr -= doe_offset; + *buf = 0; + + if (range_covers_byte(PCI_EXP_DOE_CAP, DWORD_BYTE, addr)) { + *buf = FIELD_DP32(*buf, PCI_DOE_CAP_REG, INTR_SUPP, + doe_cap->cap.intr); + *buf = FIELD_DP32(*buf, PCI_DOE_CAP_REG, DOE_INTR_MSG_NUM, + doe_cap->cap.vec); + } else if (range_covers_byte(PCI_EXP_DOE_CTRL, DWORD_BYTE, addr)) { + /* Must return ABORT=0 and GO=0 */ + *buf = FIELD_DP32(*buf, PCI_DOE_CAP_CONTROL, DOE_INTR_EN, + doe_cap->ctrl.intr); + } else if (range_covers_byte(PCI_EXP_DOE_STATUS, DWORD_BYTE, addr)) { + *buf = FIELD_DP32(*buf, PCI_DOE_CAP_STATUS, DOE_BUSY, + doe_cap->status.busy); + *buf = FIELD_DP32(*buf, PCI_DOE_CAP_STATUS, DOE_INTR_STATUS, + doe_cap->status.intr); + *buf = FIELD_DP32(*buf, PCI_DOE_CAP_STATUS, DOE_ERROR, + doe_cap->status.error); + *buf = FIELD_DP32(*buf, PCI_DOE_CAP_STATUS, DATA_OBJ_RDY, + doe_cap->status.ready); + /* Mailbox should be DW accessed */ + } else if (addr == PCI_EXP_DOE_RD_DATA_MBOX && size == DWORD_BYTE) { + if (doe_cap->status.ready && !doe_cap->status.error) { + *buf = doe_cap->read_mbox[doe_cap->read_mbox_idx]; + } + } + + /* Process Alignment */ + shift = addr % DWORD_BYTE; + *buf = extract32(*buf, shift * 8, size * 8); + + return true; +} + +/* + * Write to DOE config space. + * Return if the address not within DOE_CAP range or receives an abort + */ +void pcie_doe_write_config(DOECap *doe_cap, + uint32_t addr, uint32_t val, int size) +{ + uint16_t doe_offset = doe_cap->offset; + uint32_t shift; + + if (!range_covers_byte(doe_offset + PCI_EXP_DOE_CAP, + PCI_DOE_SIZEOF - 4, addr)) { + return; + } + + /* Process Alignment */ + shift = addr % DWORD_BYTE; + addr -= (doe_offset + shift); + val = deposit32(val, shift * 8, size * 8, val); + + switch (addr) { + case PCI_EXP_DOE_CTRL: + if (FIELD_EX32(val, PCI_DOE_CAP_CONTROL, DOE_ABORT)) { + pcie_doe_set_ready(doe_cap, 0); + pcie_doe_set_error(doe_cap, 0); + pcie_doe_reset_mbox(doe_cap); + return; + } + + if (FIELD_EX32(val, PCI_DOE_CAP_CONTROL, DOE_GO)) { + pcie_doe_prepare_rsp(doe_cap); + } + + if (FIELD_EX32(val, PCI_DOE_CAP_CONTROL, DOE_INTR_EN)) { + doe_cap->ctrl.intr = 1; + /* Clear interrupt bit located within the first byte */ + } else if (shift == 0) { + doe_cap->ctrl.intr = 0; + } + break; + case PCI_EXP_DOE_STATUS: + if (FIELD_EX32(val, PCI_DOE_CAP_STATUS, DOE_INTR_STATUS)) { + doe_cap->status.intr = 0; + } + break; + case PCI_EXP_DOE_RD_DATA_MBOX: + /* Mailbox should be DW accessed */ + if (size != DWORD_BYTE) { + return; + } + doe_cap->read_mbox_idx++; + if (doe_cap->read_mbox_idx == doe_cap->read_mbox_len) { + pcie_doe_reset_mbox(doe_cap); + pcie_doe_set_ready(doe_cap, 0); + } else if (doe_cap->read_mbox_idx > doe_cap->read_mbox_len) { + /* Underflow */ + pcie_doe_set_error(doe_cap, 1); + } + break; + case PCI_EXP_DOE_WR_DATA_MBOX: + /* Mailbox should be DW accessed */ + if (size != DWORD_BYTE) { + return; + } + doe_cap->write_mbox[doe_cap->write_mbox_len] = val; + doe_cap->write_mbox_len++; + break; + case PCI_EXP_DOE_CAP: + /* fallthrough */ + default: + break; + } +} diff --git a/hw/pci/pcie_host.c b/hw/pci/pcie_host.c index 5abbe83220..3717e1a086 100644 --- a/hw/pci/pcie_host.c +++ b/hw/pci/pcie_host.c @@ -20,7 +20,7 @@ */ #include "qemu/osdep.h" -#include "hw/pci/pci.h" +#include "hw/pci/pci_device.h" #include "hw/pci/pcie_host.h" #include "qemu/module.h" diff --git a/hw/pci/pcie_port.c b/hw/pci/pcie_port.c index da850e8dde..20ff2b39e8 100644 --- a/hw/pci/pcie_port.c +++ b/hw/pci/pcie_port.c @@ -136,6 +136,76 @@ static void pcie_port_class_init(ObjectClass *oc, void *data) device_class_set_props(dc, pcie_port_props); } +PCIDevice *pcie_find_port_by_pn(PCIBus *bus, uint8_t pn) +{ + int devfn; + + for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) { + PCIDevice *d = bus->devices[devfn]; + PCIEPort *port; + + if (!d || !pci_is_express(d) || !d->exp.exp_cap) { + continue; + } + + if (!object_dynamic_cast(OBJECT(d), TYPE_PCIE_PORT)) { + continue; + } + + port = PCIE_PORT(d); + if (port->port == pn) { + return d; + } + } + + return NULL; +} + +/* Find first port in devfn number order */ +PCIDevice *pcie_find_port_first(PCIBus *bus) +{ + int devfn; + + for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) { + PCIDevice *d = bus->devices[devfn]; + + if (!d || !pci_is_express(d) || !d->exp.exp_cap) { + continue; + } + + if (object_dynamic_cast(OBJECT(d), TYPE_PCIE_PORT)) { + return d; + } + } + + return NULL; +} + +int pcie_count_ds_ports(PCIBus *bus) +{ + int dsp_count = 0; + int devfn; + + for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) { + PCIDevice *d = bus->devices[devfn]; + + if (!d || !pci_is_express(d) || !d->exp.exp_cap) { + continue; + } + if (object_dynamic_cast(OBJECT(d), TYPE_PCIE_PORT)) { + dsp_count++; + } + } + return dsp_count; +} + +static bool pcie_slot_is_hotpluggbale_bus(HotplugHandler *plug_handler, + BusState *bus) +{ + PCIESlot *s = PCIE_SLOT(bus->parent); + return s->hotplug; +} + static const TypeInfo pcie_port_type_info = { .name = TYPE_PCIE_PORT, .parent = TYPE_PCI_BRIDGE, @@ -148,7 +218,8 @@ static Property pcie_slot_props[] = { DEFINE_PROP_UINT8("chassis", PCIESlot, chassis, 0), DEFINE_PROP_UINT16("slot", PCIESlot, slot, 0), DEFINE_PROP_BOOL("hotplug", PCIESlot, hotplug, true), - DEFINE_PROP_BOOL("native-hotplug", PCIESlot, native_hotplug, true), + DEFINE_PROP_BOOL("x-do-not-expose-native-hotplug-cap", PCIESlot, + hide_native_hotplug_cap, false), DEFINE_PROP_END_OF_LIST() }; @@ -162,6 +233,7 @@ static void pcie_slot_class_init(ObjectClass *oc, void *data) hc->plug = pcie_cap_slot_plug_cb; hc->unplug = pcie_cap_slot_unplug_cb; hc->unplug_request = pcie_cap_slot_unplug_request_cb; + hc->is_hotpluggable_bus = pcie_slot_is_hotpluggbale_bus; } static const TypeInfo pcie_slot_type_info = { diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c new file mode 100644 index 0000000000..e9b23221d7 --- /dev/null +++ b/hw/pci/pcie_sriov.c @@ -0,0 +1,318 @@ +/* + * pcie_sriov.c: + * + * Implementation of SR/IOV emulation support. + * + * Copyright (c) 2015-2017 Knut Omang <knut.omang@oracle.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "hw/pci/pci_device.h" +#include "hw/pci/pcie.h" +#include "hw/pci/pci_bus.h" +#include "hw/qdev-properties.h" +#include "qemu/error-report.h" +#include "qemu/range.h" +#include "qapi/error.h" +#include "trace.h" + +static PCIDevice *register_vf(PCIDevice *pf, int devfn, + const char *name, uint16_t vf_num); +static void unregister_vfs(PCIDevice *dev); + +void pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, + const char *vfname, uint16_t vf_dev_id, + uint16_t init_vfs, uint16_t total_vfs, + uint16_t vf_offset, uint16_t vf_stride) +{ + uint8_t *cfg = dev->config + offset; + uint8_t *wmask; + + pcie_add_capability(dev, PCI_EXT_CAP_ID_SRIOV, 1, + offset, PCI_EXT_CAP_SRIOV_SIZEOF); + dev->exp.sriov_cap = offset; + dev->exp.sriov_pf.num_vfs = 0; + dev->exp.sriov_pf.vfname = g_strdup(vfname); + dev->exp.sriov_pf.vf = NULL; + + pci_set_word(cfg + PCI_SRIOV_VF_OFFSET, vf_offset); + pci_set_word(cfg + PCI_SRIOV_VF_STRIDE, vf_stride); + + /* + * Mandatory page sizes to support. + * Device implementations can call pcie_sriov_pf_add_sup_pgsize() + * to set more bits: + */ + pci_set_word(cfg + PCI_SRIOV_SUP_PGSIZE, SRIOV_SUP_PGSIZE_MINREQ); + + /* + * Default is to use 4K pages, software can modify it + * to any of the supported bits + */ + pci_set_word(cfg + PCI_SRIOV_SYS_PGSIZE, 0x1); + + /* Set up device ID and initial/total number of VFs available */ + pci_set_word(cfg + PCI_SRIOV_VF_DID, vf_dev_id); + pci_set_word(cfg + PCI_SRIOV_INITIAL_VF, init_vfs); + pci_set_word(cfg + PCI_SRIOV_TOTAL_VF, total_vfs); + pci_set_word(cfg + PCI_SRIOV_NUM_VF, 0); + + /* Write enable control bits */ + wmask = dev->wmask + offset; + pci_set_word(wmask + PCI_SRIOV_CTRL, + PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE | PCI_SRIOV_CTRL_ARI); + pci_set_word(wmask + PCI_SRIOV_NUM_VF, 0xffff); + pci_set_word(wmask + PCI_SRIOV_SYS_PGSIZE, 0x553); + + qdev_prop_set_bit(&dev->qdev, "multifunction", true); +} + +void pcie_sriov_pf_exit(PCIDevice *dev) +{ + unregister_vfs(dev); + g_free((char *)dev->exp.sriov_pf.vfname); + dev->exp.sriov_pf.vfname = NULL; +} + +void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num, + uint8_t type, dma_addr_t size) +{ + uint32_t addr; + uint64_t wmask; + uint16_t sriov_cap = dev->exp.sriov_cap; + + assert(sriov_cap > 0); + assert(region_num >= 0); + assert(region_num < PCI_NUM_REGIONS); + assert(region_num != PCI_ROM_SLOT); + + wmask = ~(size - 1); + addr = sriov_cap + PCI_SRIOV_BAR + region_num * 4; + + pci_set_long(dev->config + addr, type); + if (!(type & PCI_BASE_ADDRESS_SPACE_IO) && + type & PCI_BASE_ADDRESS_MEM_TYPE_64) { + pci_set_quad(dev->wmask + addr, wmask); + pci_set_quad(dev->cmask + addr, ~0ULL); + } else { + pci_set_long(dev->wmask + addr, wmask & 0xffffffff); + pci_set_long(dev->cmask + addr, 0xffffffff); + } + dev->exp.sriov_pf.vf_bar_type[region_num] = type; +} + +void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num, + MemoryRegion *memory) +{ + PCIIORegion *r; + PCIBus *bus = pci_get_bus(dev); + uint8_t type; + pcibus_t size = memory_region_size(memory); + + assert(pci_is_vf(dev)); /* PFs must use pci_register_bar */ + assert(region_num >= 0); + assert(region_num < PCI_NUM_REGIONS); + type = dev->exp.sriov_vf.pf->exp.sriov_pf.vf_bar_type[region_num]; + + if (!is_power_of_2(size)) { + error_report("%s: PCI region size must be a power" + " of two - type=0x%x, size=0x%"FMT_PCIBUS, + __func__, type, size); + exit(1); + } + + r = &dev->io_regions[region_num]; + r->memory = memory; + r->address_space = + type & PCI_BASE_ADDRESS_SPACE_IO + ? bus->address_space_io + : bus->address_space_mem; + r->size = size; + r->type = type; + + r->addr = pci_bar_address(dev, region_num, r->type, r->size); + if (r->addr != PCI_BAR_UNMAPPED) { + memory_region_add_subregion_overlap(r->address_space, + r->addr, r->memory, 1); + } +} + +static PCIDevice *register_vf(PCIDevice *pf, int devfn, const char *name, + uint16_t vf_num) +{ + PCIDevice *dev = pci_new(devfn, name); + dev->exp.sriov_vf.pf = pf; + dev->exp.sriov_vf.vf_number = vf_num; + PCIBus *bus = pci_get_bus(pf); + Error *local_err = NULL; + + qdev_realize(&dev->qdev, &bus->qbus, &local_err); + if (local_err) { + error_report_err(local_err); + return NULL; + } + + /* set vid/did according to sr/iov spec - they are not used */ + pci_config_set_vendor_id(dev->config, 0xffff); + pci_config_set_device_id(dev->config, 0xffff); + + return dev; +} + +static void register_vfs(PCIDevice *dev) +{ + uint16_t num_vfs; + uint16_t i; + uint16_t sriov_cap = dev->exp.sriov_cap; + uint16_t vf_offset = + pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_OFFSET); + uint16_t vf_stride = + pci_get_word(dev->config + sriov_cap + PCI_SRIOV_VF_STRIDE); + int32_t devfn = dev->devfn + vf_offset; + + assert(sriov_cap > 0); + num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF); + if (num_vfs > pci_get_word(dev->config + sriov_cap + PCI_SRIOV_TOTAL_VF)) { + return; + } + + dev->exp.sriov_pf.vf = g_new(PCIDevice *, num_vfs); + + trace_sriov_register_vfs(dev->name, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), num_vfs); + for (i = 0; i < num_vfs; i++) { + dev->exp.sriov_pf.vf[i] = register_vf(dev, devfn, + dev->exp.sriov_pf.vfname, i); + if (!dev->exp.sriov_pf.vf[i]) { + num_vfs = i; + break; + } + devfn += vf_stride; + } + dev->exp.sriov_pf.num_vfs = num_vfs; +} + +static void unregister_vfs(PCIDevice *dev) +{ + uint16_t num_vfs = dev->exp.sriov_pf.num_vfs; + uint16_t i; + + trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), num_vfs); + for (i = 0; i < num_vfs; i++) { + Error *err = NULL; + PCIDevice *vf = dev->exp.sriov_pf.vf[i]; + if (!object_property_set_bool(OBJECT(vf), "realized", false, &err)) { + error_reportf_err(err, "Failed to unplug: "); + } + object_unparent(OBJECT(vf)); + object_unref(OBJECT(vf)); + } + g_free(dev->exp.sriov_pf.vf); + dev->exp.sriov_pf.vf = NULL; + dev->exp.sriov_pf.num_vfs = 0; +} + +void pcie_sriov_config_write(PCIDevice *dev, uint32_t address, + uint32_t val, int len) +{ + uint32_t off; + uint16_t sriov_cap = dev->exp.sriov_cap; + + if (!sriov_cap || address < sriov_cap) { + return; + } + off = address - sriov_cap; + if (off >= PCI_EXT_CAP_SRIOV_SIZEOF) { + return; + } + + trace_sriov_config_write(dev->name, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), off, val, len); + + if (range_covers_byte(off, len, PCI_SRIOV_CTRL)) { + if (dev->exp.sriov_pf.num_vfs) { + if (!(val & PCI_SRIOV_CTRL_VFE)) { + unregister_vfs(dev); + } + } else { + if (val & PCI_SRIOV_CTRL_VFE) { + register_vfs(dev); + } + } + } +} + + +/* Reset SR/IOV */ +void pcie_sriov_pf_reset(PCIDevice *dev) +{ + uint16_t sriov_cap = dev->exp.sriov_cap; + if (!sriov_cap) { + return; + } + + pci_set_word(dev->config + sriov_cap + PCI_SRIOV_CTRL, 0); + unregister_vfs(dev); + + pci_set_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF, 0); + + /* + * Default is to use 4K pages, software can modify it + * to any of the supported bits + */ + pci_set_word(dev->config + sriov_cap + PCI_SRIOV_SYS_PGSIZE, 0x1); + + for (uint16_t i = 0; i < PCI_NUM_REGIONS; i++) { + pci_set_quad(dev->config + sriov_cap + PCI_SRIOV_BAR + i * 4, + dev->exp.sriov_pf.vf_bar_type[i]); + } +} + +/* Add optional supported page sizes to the mask of supported page sizes */ +void pcie_sriov_pf_add_sup_pgsize(PCIDevice *dev, uint16_t opt_sup_pgsize) +{ + uint8_t *cfg = dev->config + dev->exp.sriov_cap; + uint8_t *wmask = dev->wmask + dev->exp.sriov_cap; + + uint16_t sup_pgsize = pci_get_word(cfg + PCI_SRIOV_SUP_PGSIZE); + + sup_pgsize |= opt_sup_pgsize; + + /* + * Make sure the new bits are set, and that system page size + * also can be set to any of the new values according to spec: + */ + pci_set_word(cfg + PCI_SRIOV_SUP_PGSIZE, sup_pgsize); + pci_set_word(wmask + PCI_SRIOV_SYS_PGSIZE, sup_pgsize); +} + + +uint16_t pcie_sriov_vf_number(PCIDevice *dev) +{ + assert(pci_is_vf(dev)); + return dev->exp.sriov_vf.vf_number; +} + +PCIDevice *pcie_sriov_get_pf(PCIDevice *dev) +{ + return dev->exp.sriov_vf.pf; +} + +PCIDevice *pcie_sriov_get_vf_at_index(PCIDevice *dev, int n) +{ + assert(!pci_is_vf(dev)); + if (n < dev->exp.sriov_pf.num_vfs) { + return dev->exp.sriov_pf.vf[n]; + } + return NULL; +} + +uint16_t pcie_sriov_num_vfs(PCIDevice *dev) +{ + return dev->exp.sriov_pf.num_vfs; +} diff --git a/hw/pci/shpc.c b/hw/pci/shpc.c index 28e62174c4..aac6f2d034 100644 --- a/hw/pci/shpc.c +++ b/hw/pci/shpc.c @@ -8,6 +8,7 @@ #include "hw/pci/pci.h" #include "hw/pci/pci_bus.h" #include "hw/pci/msi.h" +#include "trace.h" /* TODO: model power only and disabled slot states. */ /* TODO: handle SERR and wakeups */ @@ -123,10 +124,41 @@ #define SHPC_PCI_TO_IDX(pci_slot) ((pci_slot) - 1) #define SHPC_IDX_TO_PHYSICAL(slot) ((slot) + 1) -static uint16_t shpc_get_status(SHPCDevice *shpc, int slot, uint16_t msk) +static const char *shpc_led_state_to_str(uint8_t value) +{ + switch (value) { + case SHPC_LED_ON: + return "on"; + case SHPC_LED_BLINK: + return "blink"; + case SHPC_LED_OFF: + return "off"; + default: + return "invalid"; + } +} + +static const char *shpc_slot_state_to_str(uint8_t value) +{ + switch (value) { + case SHPC_STATE_PWRONLY: + return "power-only"; + case SHPC_STATE_ENABLED: + return "enabled"; + case SHPC_STATE_DISABLED: + return "disabled"; + default: + return "invalid"; + } +} + +static uint8_t shpc_get_status(SHPCDevice *shpc, int slot, uint16_t msk) { uint8_t *status = shpc->config + SHPC_SLOT_STATUS(slot); - return (pci_get_word(status) & msk) >> ctz32(msk); + uint16_t result = (pci_get_word(status) & msk) >> ctz32(msk); + + assert(result <= UINT8_MAX); + return result; } static void shpc_set_status(SHPCDevice *shpc, @@ -223,6 +255,7 @@ void shpc_reset(PCIDevice *d) SHPC_SLOT_STATUS_PRSNT_MASK); shpc_set_status(shpc, i, SHPC_LED_OFF, SHPC_SLOT_PWR_LED_MASK); } + shpc_set_status(shpc, i, SHPC_LED_OFF, SHPC_SLOT_ATTN_LED_MASK); shpc_set_status(shpc, i, 0, SHPC_SLOT_STATUS_66); } shpc_set_sec_bus_speed(shpc, SHPC_SEC_BUS_33); @@ -254,60 +287,83 @@ static void shpc_free_devices_in_slot(SHPCDevice *shpc, int slot) } } -static void shpc_slot_command(SHPCDevice *shpc, uint8_t target, +static bool shpc_slot_is_off(uint8_t state, uint8_t power, uint8_t attn) +{ + return state == SHPC_STATE_DISABLED && power == SHPC_LED_OFF; +} + +static void shpc_slot_command(PCIDevice *d, uint8_t target, uint8_t state, uint8_t power, uint8_t attn) { - uint8_t current_state; + SHPCDevice *shpc = d->shpc; int slot = SHPC_LOGICAL_TO_IDX(target); + uint8_t old_state = shpc_get_status(shpc, slot, SHPC_SLOT_STATE_MASK); + uint8_t old_power = shpc_get_status(shpc, slot, SHPC_SLOT_PWR_LED_MASK); + uint8_t old_attn = shpc_get_status(shpc, slot, SHPC_SLOT_ATTN_LED_MASK); + if (target < SHPC_CMD_TRGT_MIN || slot >= shpc->nslots) { shpc_invalid_command(shpc); return; } - current_state = shpc_get_status(shpc, slot, SHPC_SLOT_STATE_MASK); - if (current_state == SHPC_STATE_ENABLED && state == SHPC_STATE_PWRONLY) { + + if (old_state == SHPC_STATE_ENABLED && state == SHPC_STATE_PWRONLY) { shpc_invalid_command(shpc); return; } - switch (power) { - case SHPC_LED_NO: - break; - default: + if (power == SHPC_LED_NO) { + power = old_power; + } else { /* TODO: send event to monitor */ shpc_set_status(shpc, slot, power, SHPC_SLOT_PWR_LED_MASK); } - switch (attn) { - case SHPC_LED_NO: - break; - default: + + if (attn == SHPC_LED_NO) { + attn = old_attn; + } else { /* TODO: send event to monitor */ shpc_set_status(shpc, slot, attn, SHPC_SLOT_ATTN_LED_MASK); } - if ((current_state == SHPC_STATE_DISABLED && state == SHPC_STATE_PWRONLY) || - (current_state == SHPC_STATE_DISABLED && state == SHPC_STATE_ENABLED)) { - shpc_set_status(shpc, slot, state, SHPC_SLOT_STATE_MASK); - } else if ((current_state == SHPC_STATE_ENABLED || - current_state == SHPC_STATE_PWRONLY) && - state == SHPC_STATE_DISABLED) { + if (state == SHPC_STATE_NO) { + state = old_state; + } else { shpc_set_status(shpc, slot, state, SHPC_SLOT_STATE_MASK); - power = shpc_get_status(shpc, slot, SHPC_SLOT_PWR_LED_MASK); - /* TODO: track what monitor requested. */ - /* Look at LED to figure out whether it's ok to remove the device. */ - if (power == SHPC_LED_OFF) { - shpc_free_devices_in_slot(shpc, slot); - shpc_set_status(shpc, slot, 1, SHPC_SLOT_STATUS_MRL_OPEN); - shpc_set_status(shpc, slot, SHPC_SLOT_STATUS_PRSNT_EMPTY, - SHPC_SLOT_STATUS_PRSNT_MASK); - shpc->config[SHPC_SLOT_EVENT_LATCH(slot)] |= - SHPC_SLOT_EVENT_MRL | - SHPC_SLOT_EVENT_PRESENCE; - } + } + + if (trace_event_get_state_backends(TRACE_SHPC_SLOT_COMMAND)) { + DeviceState *parent = DEVICE(d); + int pci_slot = SHPC_IDX_TO_PCI(slot); + DeviceState *child = + DEVICE(shpc->sec_bus->devices[PCI_DEVFN(pci_slot, 0)]); + + trace_shpc_slot_command( + parent->canonical_path, pci_slot, + child ? child->canonical_path : "no-child", + shpc_led_state_to_str(old_power), + shpc_led_state_to_str(power), + shpc_led_state_to_str(old_attn), + shpc_led_state_to_str(attn), + shpc_slot_state_to_str(old_state), + shpc_slot_state_to_str(state)); + } + + if (!shpc_slot_is_off(old_state, old_power, old_attn) && + shpc_slot_is_off(state, power, attn)) + { + shpc_free_devices_in_slot(shpc, slot); + shpc_set_status(shpc, slot, 1, SHPC_SLOT_STATUS_MRL_OPEN); + shpc_set_status(shpc, slot, SHPC_SLOT_STATUS_PRSNT_EMPTY, + SHPC_SLOT_STATUS_PRSNT_MASK); + shpc->config[SHPC_SLOT_EVENT_LATCH(slot)] |= + SHPC_SLOT_EVENT_MRL | + SHPC_SLOT_EVENT_PRESENCE; } } -static void shpc_command(SHPCDevice *shpc) +static void shpc_command(PCIDevice *d) { + SHPCDevice *shpc = d->shpc; uint8_t code = pci_get_byte(shpc->config + SHPC_CMD_CODE); uint8_t speed; uint8_t target; @@ -328,7 +384,7 @@ static void shpc_command(SHPCDevice *shpc) state = (code & SHPC_SLOT_STATE_MASK) >> SHPC_SLOT_STATE_SHIFT; power = (code & SHPC_SLOT_PWR_LED_MASK) >> SHPC_SLOT_PWR_LED_SHIFT; attn = (code & SHPC_SLOT_ATTN_LED_MASK) >> SHPC_SLOT_ATTN_LED_SHIFT; - shpc_slot_command(shpc, target, state, power, attn); + shpc_slot_command(d, target, state, power, attn); break; case 0x40 ... 0x47: speed = code & SHPC_SEC_BUS_MASK; @@ -346,10 +402,10 @@ static void shpc_command(SHPCDevice *shpc) } for (i = 0; i < shpc->nslots; ++i) { if (!(shpc_get_status(shpc, i, SHPC_SLOT_STATUS_MRL_OPEN))) { - shpc_slot_command(shpc, i + SHPC_CMD_TRGT_MIN, + shpc_slot_command(d, i + SHPC_CMD_TRGT_MIN, SHPC_STATE_PWRONLY, SHPC_LED_ON, SHPC_LED_NO); } else { - shpc_slot_command(shpc, i + SHPC_CMD_TRGT_MIN, + shpc_slot_command(d, i + SHPC_CMD_TRGT_MIN, SHPC_STATE_NO, SHPC_LED_OFF, SHPC_LED_NO); } } @@ -367,10 +423,10 @@ static void shpc_command(SHPCDevice *shpc) } for (i = 0; i < shpc->nslots; ++i) { if (!(shpc_get_status(shpc, i, SHPC_SLOT_STATUS_MRL_OPEN))) { - shpc_slot_command(shpc, i + SHPC_CMD_TRGT_MIN, + shpc_slot_command(d, i + SHPC_CMD_TRGT_MIN, SHPC_STATE_ENABLED, SHPC_LED_ON, SHPC_LED_NO); } else { - shpc_slot_command(shpc, i + SHPC_CMD_TRGT_MIN, + shpc_slot_command(d, i + SHPC_CMD_TRGT_MIN, SHPC_STATE_NO, SHPC_LED_OFF, SHPC_LED_NO); } } @@ -402,7 +458,7 @@ static void shpc_write(PCIDevice *d, unsigned addr, uint64_t val, int l) shpc->config[a] &= ~(val & w1cmask); /* W1C: Write 1 to Clear */ } if (ranges_overlap(addr, l, SHPC_CMD_CODE, 2)) { - shpc_command(shpc); + shpc_command(d); } shpc_interrupt_update(d); } @@ -456,7 +512,7 @@ static int shpc_cap_add_config(PCIDevice *d, Error **errp) pci_set_byte(config + SHPC_CAP_CxP, 0); pci_set_long(config + SHPC_CAP_DWORD_DATA, 0); d->shpc->cap = config_offset; - /* Make dword select and data writeable. */ + /* Make dword select and data writable. */ pci_set_byte(d->wmask + config_offset + SHPC_CAP_DWORD_SELECT, 0xff); pci_set_long(d->wmask + config_offset + SHPC_CAP_DWORD_DATA, 0xffffffff); return 0; @@ -480,13 +536,15 @@ static const MemoryRegionOps shpc_mmio_ops = { .endianness = DEVICE_LITTLE_ENDIAN, .valid = { /* SHPC ECN requires dword accesses, but the original 1.0 spec doesn't. - * It's easier to suppport all sizes than worry about it. */ + * It's easier to support all sizes than worry about it. + */ .min_access_size = 1, .max_access_size = 4, }, }; -static void shpc_device_plug_common(PCIDevice *affected_dev, int *slot, - SHPCDevice *shpc, Error **errp) + +static bool shpc_device_get_slot(PCIDevice *affected_dev, int *slot, + SHPCDevice *shpc, Error **errp) { int pci_slot = PCI_SLOT(affected_dev->devfn); *slot = SHPC_PCI_TO_IDX(pci_slot); @@ -496,21 +554,20 @@ static void shpc_device_plug_common(PCIDevice *affected_dev, int *slot, "controller. Valid slots are between %d and %d.", pci_slot, SHPC_IDX_TO_PCI(0), SHPC_IDX_TO_PCI(shpc->nslots) - 1); - return; + return false; } + + return true; } void shpc_device_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { - Error *local_err = NULL; PCIDevice *pci_hotplug_dev = PCI_DEVICE(hotplug_dev); SHPCDevice *shpc = pci_hotplug_dev->shpc; int slot; - shpc_device_plug_common(PCI_DEVICE(dev), &slot, shpc, &local_err); - if (local_err) { - error_propagate(errp, local_err); + if (!shpc_device_get_slot(PCI_DEVICE(dev), &slot, shpc, errp)) { return; } @@ -552,21 +609,25 @@ void shpc_device_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, void shpc_device_unplug_request_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { - Error *local_err = NULL; PCIDevice *pci_hotplug_dev = PCI_DEVICE(hotplug_dev); SHPCDevice *shpc = pci_hotplug_dev->shpc; uint8_t state; uint8_t led; int slot; - shpc_device_plug_common(PCI_DEVICE(dev), &slot, shpc, &local_err); - if (local_err) { - error_propagate(errp, local_err); + if (!shpc_device_get_slot(PCI_DEVICE(dev), &slot, shpc, errp)) { return; } state = shpc_get_status(shpc, slot, SHPC_SLOT_STATE_MASK); led = shpc_get_status(shpc, slot, SHPC_SLOT_PWR_LED_MASK); + + if (led == SHPC_LED_BLINK) { + error_setg(errp, "Hot-unplug failed: " + "guest is busy (power indicator blinking)"); + return; + } + if (state == SHPC_STATE_DISABLED && led == SHPC_LED_OFF) { shpc_free_devices_in_slot(shpc, slot); shpc_set_status(shpc, slot, 1, SHPC_SLOT_STATUS_MRL_OPEN); @@ -600,7 +661,7 @@ int shpc_init(PCIDevice *d, PCIBus *sec_bus, MemoryRegion *bar, } if (nslots > SHPC_MAX_SLOTS || SHPC_IDX_TO_PCI(nslots) > PCI_SLOT_MAX) { - /* TODO: report an error mesage that makes sense. */ + /* TODO: report an error message that makes sense. */ return -EINVAL; } shpc->nslots = nslots; @@ -721,7 +782,7 @@ static int shpc_load(QEMUFile *f, void *pv, size_t size, return 0; } -VMStateInfo shpc_vmstate_info = { +const VMStateInfo shpc_vmstate_info = { .name = "shpc", .get = shpc_load, .put = shpc_save, diff --git a/hw/pci/slotid_cap.c b/hw/pci/slotid_cap.c index 36d021b4a6..8372d05d9e 100644 --- a/hw/pci/slotid_cap.c +++ b/hw/pci/slotid_cap.c @@ -1,6 +1,6 @@ #include "qemu/osdep.h" #include "hw/pci/slotid_cap.h" -#include "hw/pci/pci.h" +#include "hw/pci/pci_device.h" #include "qemu/error-report.h" #include "qapi/error.h" diff --git a/hw/pci/trace-events b/hw/pci/trace-events index fc777d0b5e..19643aa8c6 100644 --- a/hw/pci/trace-events +++ b/hw/pci/trace-events @@ -1,12 +1,24 @@ # See docs/devel/tracing.rst for syntax documentation. # pci.c -pci_update_mappings_del(void *d, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "d=%p %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64 -pci_update_mappings_add(void *d, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "d=%p %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64 +pci_update_mappings_del(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "%s %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64 +pci_update_mappings_add(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "%s %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64 +pci_route_irq(int dev_irq, const char *dev_path, int parent_irq, const char *parent_path) "IRQ %d @%s -> IRQ %d @%s" # pci_host.c -pci_cfg_read(const char *dev, unsigned devid, unsigned fnid, unsigned offs, unsigned val) "%s %02u:%u @0x%x -> 0x%x" -pci_cfg_write(const char *dev, unsigned devid, unsigned fnid, unsigned offs, unsigned val) "%s %02u:%u @0x%x <- 0x%x" +pci_cfg_read(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, unsigned offs, unsigned val) "%s %02x:%02x.%x @0x%x -> 0x%x" +pci_cfg_write(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, unsigned offs, unsigned val) "%s %02x:%02x.%x @0x%x <- 0x%x" # msix.c msix_write_config(char *name, bool enabled, bool masked) "dev %s enabled %d masked %d" + +# hw/pci/pcie_sriov.c +sriov_register_vfs(const char *name, int slot, int function, int num_vfs) "%s %02x:%x: creating %d vf devs" +sriov_unregister_vfs(const char *name, int slot, int function, int num_vfs) "%s %02x:%x: Unregistering %d vf devs" +sriov_config_write(const char *name, int slot, int fun, uint32_t offset, uint32_t val, uint32_t len) "%s %02x:%x: sriov offset 0x%x val 0x%x len %d" + +# pcie.c +pcie_cap_slot_write_config(const char *parent, const char *child, const char *pds, const char *old_pic, const char *new_pic, const char *old_aic, const char *new_aic, const char *old_power, const char *new_power) "%s > %s: pds: %s, pic: %s->%s, aic: %s->%s, power: %s->%s" + +# shpc.c +shpc_slot_command(const char *parent, int pci_slot, const char *child, const char *old_pic, const char *new_pic, const char *old_aic, const char *new_aic, const char *old_state, const char *new_state) "%s[%d] > %s: pic: %s->%s, aic: %s->%s, state: %s->%s" |