diff options
Diffstat (limited to 'hw/ppc/spapr_pci.c')
-rw-r--r-- | hw/ppc/spapr_pci.c | 1125 |
1 files changed, 681 insertions, 444 deletions
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index b74f2632ec..25e0295d6f 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -22,33 +22,36 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ + #include "qemu/osdep.h" #include "qapi/error.h" -#include "qemu-common.h" -#include "cpu.h" -#include "hw/hw.h" +#include "hw/irq.h" #include "hw/sysbus.h" +#include "migration/vmstate.h" #include "hw/pci/pci.h" #include "hw/pci/msi.h" #include "hw/pci/msix.h" #include "hw/pci/pci_host.h" #include "hw/ppc/spapr.h" #include "hw/pci-host/spapr.h" -#include "exec/address-spaces.h" #include "exec/ram_addr.h" #include <libfdt.h> #include "trace.h" #include "qemu/error-report.h" +#include "qemu/module.h" #include "qapi/qmp/qerror.h" #include "hw/ppc/fdt.h" #include "hw/pci/pci_bridge.h" #include "hw/pci/pci_bus.h" #include "hw/pci/pci_ids.h" #include "hw/ppc/spapr_drc.h" +#include "hw/qdev-properties.h" #include "sysemu/device_tree.h" #include "sysemu/kvm.h" #include "sysemu/hostmem.h" #include "sysemu/numa.h" +#include "hw/ppc/spapr_numa.h" +#include "qemu/log.h" /* Copied from the kernel arch/powerpc/platforms/pseries/msi.c */ #define RTAS_QUERY_FN 0 @@ -61,9 +64,9 @@ #define RTAS_TYPE_MSI 1 #define RTAS_TYPE_MSIX 2 -sPAPRPHBState *spapr_pci_find_phb(sPAPRMachineState *spapr, uint64_t buid) +SpaprPhbState *spapr_pci_find_phb(SpaprMachineState *spapr, uint64_t buid) { - sPAPRPHBState *sphb; + SpaprPhbState *sphb; QLIST_FOREACH(sphb, &spapr->phbs, list) { if (sphb->buid != buid) { @@ -75,10 +78,10 @@ sPAPRPHBState *spapr_pci_find_phb(sPAPRMachineState *spapr, uint64_t buid) return NULL; } -PCIDevice *spapr_pci_find_dev(sPAPRMachineState *spapr, uint64_t buid, +PCIDevice *spapr_pci_find_dev(SpaprMachineState *spapr, uint64_t buid, uint32_t config_addr) { - sPAPRPHBState *sphb = spapr_pci_find_phb(spapr, buid); + SpaprPhbState *sphb = spapr_pci_find_phb(spapr, buid); PCIHostState *phb = PCI_HOST_BRIDGE(sphb); int bus_num = (config_addr >> 16) & 0xFF; int devfn = (config_addr >> 8) & 0xFF; @@ -96,7 +99,7 @@ static uint32_t rtas_pci_cfgaddr(uint32_t arg) return ((arg >> 20) & 0xf00) | (arg & 0xff); } -static void finish_read_pci_config(sPAPRMachineState *spapr, uint64_t buid, +static void finish_read_pci_config(SpaprMachineState *spapr, uint64_t buid, uint32_t addr, uint32_t size, target_ulong rets) { @@ -126,7 +129,7 @@ static void finish_read_pci_config(sPAPRMachineState *spapr, uint64_t buid, rtas_st(rets, 1, val); } -static void rtas_ibm_read_pci_config(PowerPCCPU *cpu, sPAPRMachineState *spapr, +static void rtas_ibm_read_pci_config(PowerPCCPU *cpu, SpaprMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -146,7 +149,7 @@ static void rtas_ibm_read_pci_config(PowerPCCPU *cpu, sPAPRMachineState *spapr, finish_read_pci_config(spapr, buid, addr, size, rets); } -static void rtas_read_pci_config(PowerPCCPU *cpu, sPAPRMachineState *spapr, +static void rtas_read_pci_config(PowerPCCPU *cpu, SpaprMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -164,7 +167,7 @@ static void rtas_read_pci_config(PowerPCCPU *cpu, sPAPRMachineState *spapr, finish_read_pci_config(spapr, 0, addr, size, rets); } -static void finish_write_pci_config(sPAPRMachineState *spapr, uint64_t buid, +static void finish_write_pci_config(SpaprMachineState *spapr, uint64_t buid, uint32_t addr, uint32_t size, uint32_t val, target_ulong rets) { @@ -192,7 +195,7 @@ static void finish_write_pci_config(sPAPRMachineState *spapr, uint64_t buid, rtas_st(rets, 0, RTAS_OUT_SUCCESS); } -static void rtas_ibm_write_pci_config(PowerPCCPU *cpu, sPAPRMachineState *spapr, +static void rtas_ibm_write_pci_config(PowerPCCPU *cpu, SpaprMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -213,7 +216,7 @@ static void rtas_ibm_write_pci_config(PowerPCCPU *cpu, sPAPRMachineState *spapr, finish_write_pci_config(spapr, buid, addr, size, val, rets); } -static void rtas_write_pci_config(PowerPCCPU *cpu, sPAPRMachineState *spapr, +static void rtas_write_pci_config(PowerPCCPU *cpu, SpaprMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) @@ -262,12 +265,12 @@ static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr, bool msix, } } -static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr, +static void rtas_ibm_change_msi(PowerPCCPU *cpu, SpaprMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) { - sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); + SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); uint32_t config_addr = rtas_ld(args, 0); uint64_t buid = rtas_ldq(args, 1); unsigned int func = rtas_ld(args, 3); @@ -275,14 +278,14 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr, unsigned int seq_num = rtas_ld(args, 5); unsigned int ret_intr_type; unsigned int irq, max_irqs = 0; - sPAPRPHBState *phb = NULL; + SpaprPhbState *phb = NULL; PCIDevice *pdev = NULL; - spapr_pci_msi *msi; + SpaprPciMsi *msi; int *config_addr_key; Error *err = NULL; int i; - /* Fins sPAPRPHBState */ + /* Fins SpaprPhbState */ phb = spapr_pci_find_phb(spapr, buid); if (phb) { pdev = spapr_pci_find_dev(spapr, buid, config_addr); @@ -325,7 +328,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr, return; } - msi = (spapr_pci_msi *) g_hash_table_lookup(phb->msi, &config_addr); + msi = (SpaprPciMsi *) g_hash_table_lookup(phb->msi, &config_addr); /* Releasing MSIs */ if (!req_num) { @@ -335,10 +338,6 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr, return; } - if (!smc->legacy_irq_allocation) { - spapr_irq_msi_free(spapr, msi->first_irq, msi->num); - } - spapr_irq_free(spapr, msi->first_irq, msi->num); if (msi_present(pdev)) { spapr_msi_setmsg(pdev, 0, false, 0, 0); } @@ -393,6 +392,12 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr, for (i = 0; i < req_num; i++) { spapr_irq_claim(spapr, irq + i, false, &err); if (err) { + if (i) { + spapr_irq_free(spapr, irq, i); + } + if (!smc->legacy_irq_allocation) { + spapr_irq_msi_free(spapr, irq, req_num); + } error_reportf_err(err, "Can't allocate MSIs for device %x: ", config_addr); rtas_st(rets, 0, RTAS_OUT_HW_ERROR); @@ -402,10 +407,6 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr, /* Release previous MSIs */ if (msi) { - if (!smc->legacy_irq_allocation) { - spapr_irq_msi_free(spapr, msi->first_irq, msi->num); - } - spapr_irq_free(spapr, msi->first_irq, msi->num); g_hash_table_remove(phb->msi, &config_addr); } @@ -414,7 +415,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr, irq, req_num); /* Add MSI device to cache */ - msi = g_new(spapr_pci_msi, 1); + msi = g_new(SpaprPciMsi, 1); msi->first_irq = irq; msi->num = req_num; config_addr_key = g_new(int, 1); @@ -433,7 +434,7 @@ out: } static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu, - sPAPRMachineState *spapr, + SpaprMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, @@ -443,11 +444,11 @@ static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu, uint32_t config_addr = rtas_ld(args, 0); uint64_t buid = rtas_ldq(args, 1); unsigned int intr_src_num = -1, ioa_intr_num = rtas_ld(args, 3); - sPAPRPHBState *phb = NULL; + SpaprPhbState *phb = NULL; PCIDevice *pdev = NULL; - spapr_pci_msi *msi; + SpaprPciMsi *msi; - /* Find sPAPRPHBState */ + /* Find SpaprPhbState */ phb = spapr_pci_find_phb(spapr, buid); if (phb) { pdev = spapr_pci_find_dev(spapr, buid, config_addr); @@ -458,7 +459,7 @@ static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu, } /* Find device descriptor and start IRQ */ - msi = (spapr_pci_msi *) g_hash_table_lookup(phb->msi, &config_addr); + msi = (SpaprPciMsi *) g_hash_table_lookup(phb->msi, &config_addr); if (!msi || !msi->first_irq || !msi->num || (ioa_intr_num >= msi->num)) { trace_spapr_pci_msi("Failed to return vector", config_addr); rtas_st(rets, 0, RTAS_OUT_HW_ERROR); @@ -474,12 +475,12 @@ static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu, } static void rtas_ibm_set_eeh_option(PowerPCCPU *cpu, - sPAPRMachineState *spapr, + SpaprMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) { - sPAPRPHBState *sphb; + SpaprPhbState *sphb; uint32_t addr, option; uint64_t buid; int ret; @@ -510,12 +511,12 @@ param_error_exit: } static void rtas_ibm_get_config_addr_info2(PowerPCCPU *cpu, - sPAPRMachineState *spapr, + SpaprMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) { - sPAPRPHBState *sphb; + SpaprPhbState *sphb; PCIDevice *pdev; uint32_t addr, option; uint64_t buid; @@ -564,12 +565,12 @@ param_error_exit: } static void rtas_ibm_read_slot_reset_state2(PowerPCCPU *cpu, - sPAPRMachineState *spapr, + SpaprMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) { - sPAPRPHBState *sphb; + SpaprPhbState *sphb; uint64_t buid; int state, ret; @@ -606,12 +607,12 @@ param_error_exit: } static void rtas_ibm_set_slot_reset(PowerPCCPU *cpu, - sPAPRMachineState *spapr, + SpaprMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) { - sPAPRPHBState *sphb; + SpaprPhbState *sphb; uint32_t option; uint64_t buid; int ret; @@ -640,12 +641,12 @@ param_error_exit: } static void rtas_ibm_configure_pe(PowerPCCPU *cpu, - sPAPRMachineState *spapr, + SpaprMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) { - sPAPRPHBState *sphb; + SpaprPhbState *sphb; uint64_t buid; int ret; @@ -673,12 +674,12 @@ param_error_exit: /* To support it later */ static void rtas_ibm_slot_error_detail(PowerPCCPU *cpu, - sPAPRMachineState *spapr, + SpaprMachineState *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets) { - sPAPRPHBState *sphb; + SpaprPhbState *sphb; int option; uint64_t buid; @@ -713,37 +714,22 @@ param_error_exit: rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); } -static int pci_spapr_swizzle(int slot, int pin) -{ - return (slot + pin) % PCI_NUM_PINS; -} - -static int pci_spapr_map_irq(PCIDevice *pci_dev, int irq_num) -{ - /* - * Here we need to convert pci_dev + irq_num to some unique value - * which is less than number of IRQs on the specific bus (4). We - * use standard PCI swizzling, that is (slot number + pin number) - * % 4. - */ - return pci_spapr_swizzle(PCI_SLOT(pci_dev->devfn), irq_num); -} - static void pci_spapr_set_irq(void *opaque, int irq_num, int level) { /* - * Here we use the number returned by pci_spapr_map_irq to find a + * Here we use the number returned by pci_swizzle_map_irq_fn to find a * corresponding qemu_irq. */ - sPAPRPHBState *phb = opaque; + SpaprPhbState *phb = opaque; + SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); trace_spapr_pci_lsi_set(phb->dtbusname, irq_num, phb->lsi_table[irq_num].irq); - qemu_set_irq(spapr_phb_lsi_qirq(phb, irq_num), level); + qemu_set_irq(spapr_qirq(spapr, phb->lsi_table[irq_num].irq), level); } static PCIINTxRoute spapr_route_intx_pin_to_irq(void *opaque, int pin) { - sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(opaque); + SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(opaque); PCIINTxRoute route; route.mode = PCI_INTX_ENABLED; @@ -752,6 +738,12 @@ static PCIINTxRoute spapr_route_intx_pin_to_irq(void *opaque, int pin) return route; } +static uint64_t spapr_msi_read(void *opaque, hwaddr addr, unsigned size) +{ + qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid access\n", __func__); + return 0; +} + /* * MSI/MSIX memory region implementation. * The handler handles both MSI and MSIX. @@ -760,7 +752,7 @@ static PCIINTxRoute spapr_route_intx_pin_to_irq(void *opaque, int pin) static void spapr_msi_write(void *opaque, hwaddr addr, uint64_t data, unsigned size) { - sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); + SpaprMachineState *spapr = opaque; uint32_t irq = data; trace_spapr_pci_msi_write(addr, data, irq); @@ -769,8 +761,11 @@ static void spapr_msi_write(void *opaque, hwaddr addr, } static const MemoryRegionOps spapr_msi_ops = { - /* There is no .read as the read result is undefined by PCI spec */ - .read = NULL, + /* + * .read result is undefined by PCI spec. + * define .read method to avoid assert failure in memory_region_init_io + */ + .read = spapr_msi_read, .write = spapr_msi_write, .endianness = DEVICE_LITTLE_ENDIAN }; @@ -780,43 +775,44 @@ static const MemoryRegionOps spapr_msi_ops = { */ static AddressSpace *spapr_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn) { - sPAPRPHBState *phb = opaque; + SpaprPhbState *phb = opaque; return &phb->iommu_as; } -static char *spapr_phb_vfio_get_loc_code(sPAPRPHBState *sphb, PCIDevice *pdev) +static const PCIIOMMUOps spapr_iommu_ops = { + .get_address_space = spapr_pci_dma_iommu, +}; + +static char *spapr_phb_vfio_get_loc_code(SpaprPhbState *sphb, PCIDevice *pdev) { - char *path = NULL, *buf = NULL, *host = NULL; + g_autofree char *path = NULL; + g_autofree char *host = NULL; + g_autofree char *devspec = NULL; + char *buf = NULL; /* Get the PCI VFIO host id */ host = object_property_get_str(OBJECT(pdev), "host", NULL); if (!host) { - goto err_out; + return NULL; } /* Construct the path of the file that will give us the DT location */ path = g_strdup_printf("/sys/bus/pci/devices/%s/devspec", host); - g_free(host); - if (!g_file_get_contents(path, &buf, NULL, NULL)) { - goto err_out; + if (!g_file_get_contents(path, &devspec, NULL, NULL)) { + return NULL; } - g_free(path); /* Construct and read from host device tree the loc-code */ - path = g_strdup_printf("/proc/device-tree%s/ibm,loc-code", buf); - g_free(buf); + g_free(path); + path = g_strdup_printf("/proc/device-tree%s/ibm,loc-code", devspec); if (!g_file_get_contents(path, &buf, NULL, NULL)) { - goto err_out; + return NULL; } return buf; - -err_out: - g_free(path); - return NULL; } -static char *spapr_phb_get_loc_code(sPAPRPHBState *sphb, PCIDevice *pdev) +static char *spapr_phb_get_loc_code(SpaprPhbState *sphb, PCIDevice *pdev) { char *buf; const char *devtype = "qemu"; @@ -850,7 +846,7 @@ static char *spapr_phb_get_loc_code(sPAPRPHBState *sphb, PCIDevice *pdev) #define b_fff(x) b_x((x), 8, 3) /* function number */ #define b_rrrrrrrr(x) b_x((x), 0, 8) /* register number */ -/* for 'reg'/'assigned-addresses' OF properties */ +/* for 'reg' OF properties */ #define RESOURCE_CELLS_SIZE 2 #define RESOURCE_CELLS_ADDRESS 3 @@ -864,17 +860,14 @@ typedef struct ResourceFields { typedef struct ResourceProps { ResourceFields reg[8]; - ResourceFields assigned[7]; uint32_t reg_len; - uint32_t assigned_len; } ResourceProps; -/* fill in the 'reg'/'assigned-resources' OF properties for +/* fill in the 'reg' OF properties for * a PCI device. 'reg' describes resource requirements for a - * device's IO/MEM regions, 'assigned-addresses' describes the - * actual resource assignments. + * device's IO/MEM regions. * - * the properties are arrays of ('phys-addr', 'size') pairs describing + * the property is an array of ('phys-addr', 'size') pairs describing * the addressable regions of the PCI device, where 'phys-addr' is a * RESOURCE_CELLS_ADDRESS-tuple of 32-bit integers corresponding to * (phys.hi, phys.mid, phys.lo), and 'size' is a @@ -903,18 +896,7 @@ typedef struct ResourceProps { * phys.mid and phys.lo correspond respectively to the hi/lo portions * of the actual address of the region. * - * how the phys-addr/size values are used differ slightly between - * 'reg' and 'assigned-addresses' properties. namely, 'reg' has - * an additional description for the config space region of the - * device, and in the case of QEMU has n=0 and phys.mid=phys.lo=0 - * to describe the region as relocatable, with an address-mapping - * that corresponds directly to the PHB's address space for the - * resource. 'assigned-addresses' always has n=1 set with an absolute - * address assigned for the resource. in general, 'assigned-addresses' - * won't be populated, since addresses for PCI devices are generally - * unmapped initially and left to the guest to assign. - * - * note also that addresses defined in these properties are, at least + * note also that addresses defined in this property are, at least * for PAPR guests, relative to the PHBs IO/MEM windows, and * correspond directly to the addresses in the BARs. * @@ -928,8 +910,8 @@ static void populate_resource_props(PCIDevice *d, ResourceProps *rp) uint32_t dev_id = (b_bbbbbbbb(bus_num) | b_ddddd(PCI_SLOT(d->devfn)) | b_fff(PCI_FUNC(d->devfn))); - ResourceFields *reg, *assigned; - int i, reg_idx = 0, assigned_idx = 0; + ResourceFields *reg; + int i, reg_idx = 0; /* config space region */ reg = &rp->reg[reg_idx++]; @@ -958,21 +940,9 @@ static void populate_resource_props(PCIDevice *d, ResourceProps *rp) reg->phys_lo = 0; reg->size_hi = cpu_to_be32(d->io_regions[i].size >> 32); reg->size_lo = cpu_to_be32(d->io_regions[i].size); - - if (d->io_regions[i].addr == PCI_BAR_UNMAPPED) { - continue; - } - - assigned = &rp->assigned[assigned_idx++]; - assigned->phys_hi = cpu_to_be32(reg->phys_hi | b_n(1)); - assigned->phys_mid = cpu_to_be32(d->io_regions[i].addr >> 32); - assigned->phys_lo = cpu_to_be32(d->io_regions[i].addr); - assigned->size_hi = reg->size_hi; - assigned->size_lo = reg->size_lo; } rp->reg_len = reg_idx * sizeof(ResourceFields); - rp->assigned_len = assigned_idx * sizeof(ResourceFields); } typedef struct PCIClass PCIClass; @@ -1183,8 +1153,8 @@ static const PCIClass pci_classes[] = { { "data-processing-controller", spc_subclass }, }; -static const char *pci_find_device_name(uint8_t class, uint8_t subclass, - uint8_t iface) +static const char *dt_name_from_class(uint8_t class, uint8_t subclass, + uint8_t iface) { const PCIClass *pclass; const PCISubClass *psubclass; @@ -1226,77 +1196,215 @@ static const char *pci_find_device_name(uint8_t class, uint8_t subclass, return name; } -static gchar *pci_get_node_name(PCIDevice *dev) +/* + * DRC helper functions + */ + +static uint32_t drc_id_from_devfn(SpaprPhbState *phb, + uint8_t chassis, int32_t devfn) +{ + return (phb->index << 16) | (chassis << 8) | devfn; +} + +static SpaprDrc *drc_from_devfn(SpaprPhbState *phb, + uint8_t chassis, int32_t devfn) +{ + return spapr_drc_by_id(TYPE_SPAPR_DRC_PCI, + drc_id_from_devfn(phb, chassis, devfn)); +} + +static uint8_t chassis_from_bus(PCIBus *bus) +{ + if (pci_bus_is_root(bus)) { + return 0; + } else { + PCIDevice *bridge = pci_bridge_get_device(bus); + + return object_property_get_uint(OBJECT(bridge), "chassis_nr", + &error_abort); + } +} + +static SpaprDrc *drc_from_dev(SpaprPhbState *phb, PCIDevice *dev) +{ + uint8_t chassis = chassis_from_bus(pci_get_bus(dev)); + + return drc_from_devfn(phb, chassis, dev->devfn); +} + +static void add_drcs(SpaprPhbState *phb, PCIBus *bus) +{ + Object *owner; + int i; + uint8_t chassis; + + if (!phb->dr_enabled) { + return; + } + + chassis = chassis_from_bus(bus); + + if (pci_bus_is_root(bus)) { + owner = OBJECT(phb); + } else { + owner = OBJECT(pci_bridge_get_device(bus)); + } + + for (i = 0; i < PCI_SLOT_MAX * PCI_FUNC_MAX; i++) { + spapr_dr_connector_new(owner, TYPE_SPAPR_DRC_PCI, + drc_id_from_devfn(phb, chassis, i)); + } +} + +static void remove_drcs(SpaprPhbState *phb, PCIBus *bus) +{ + int i; + uint8_t chassis; + + if (!phb->dr_enabled) { + return; + } + + chassis = chassis_from_bus(bus); + + for (i = PCI_SLOT_MAX * PCI_FUNC_MAX - 1; i >= 0; i--) { + SpaprDrc *drc = drc_from_devfn(phb, chassis, i); + + if (drc) { + object_unparent(OBJECT(drc)); + } + } +} + +typedef struct PciWalkFdt { + void *fdt; + int offset; + SpaprPhbState *sphb; + int err; +} PciWalkFdt; + +static int spapr_dt_pci_device(SpaprPhbState *sphb, PCIDevice *dev, + void *fdt, int parent_offset); + +static void spapr_dt_pci_device_cb(PCIBus *bus, PCIDevice *pdev, + void *opaque) +{ + PciWalkFdt *p = opaque; + int err; + + if (p->err) { + /* Something's already broken, don't keep going */ + return; + } + + err = spapr_dt_pci_device(p->sphb, pdev, p->fdt, p->offset); + if (err < 0) { + p->err = err; + } +} + +/* Augment PCI device node with bridge specific information */ +static int spapr_dt_pci_bus(SpaprPhbState *sphb, PCIBus *bus, + void *fdt, int offset) { + Object *owner; + PciWalkFdt cbinfo = { + .fdt = fdt, + .offset = offset, + .sphb = sphb, + .err = 0, + }; + int ret; + + _FDT(fdt_setprop_cell(fdt, offset, "#address-cells", + RESOURCE_CELLS_ADDRESS)); + _FDT(fdt_setprop_cell(fdt, offset, "#size-cells", + RESOURCE_CELLS_SIZE)); + + assert(bus); + pci_for_each_device_under_bus_reverse(bus, spapr_dt_pci_device_cb, &cbinfo); + if (cbinfo.err) { + return cbinfo.err; + } + + if (pci_bus_is_root(bus)) { + owner = OBJECT(sphb); + } else { + owner = OBJECT(pci_bridge_get_device(bus)); + } + + ret = spapr_dt_drc(fdt, offset, owner, + SPAPR_DR_CONNECTOR_TYPE_PCI); + if (ret) { + return ret; + } + + return offset; +} + +char *spapr_pci_fw_dev_name(PCIDevice *dev) +{ + const gchar *basename; int slot = PCI_SLOT(dev->devfn); int func = PCI_FUNC(dev->devfn); uint32_t ccode = pci_default_read_config(dev, PCI_CLASS_PROG, 3); - const char *name; - name = pci_find_device_name((ccode >> 16) & 0xff, (ccode >> 8) & 0xff, - ccode & 0xff); + basename = dt_name_from_class((ccode >> 16) & 0xff, (ccode >> 8) & 0xff, + ccode & 0xff); if (func != 0) { - return g_strdup_printf("%s@%x,%x", name, slot, func); + return g_strdup_printf("%s@%x,%x", basename, slot, func); } else { - return g_strdup_printf("%s@%x", name, slot); + return g_strdup_printf("%s@%x", basename, slot); } } -static uint32_t spapr_phb_get_pci_drc_index(sPAPRPHBState *phb, - PCIDevice *pdev); - -static void spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset, - sPAPRPHBState *sphb) +/* create OF node for pci device and required OF DT properties */ +static int spapr_dt_pci_device(SpaprPhbState *sphb, PCIDevice *dev, + void *fdt, int parent_offset) { + int offset; + g_autofree gchar *nodename = spapr_pci_fw_dev_name(dev); ResourceProps rp; - bool is_bridge = false; - int pci_status; - char *buf = NULL; - uint32_t drc_index = spapr_phb_get_pci_drc_index(sphb, dev); + SpaprDrc *drc = drc_from_dev(sphb, dev); + uint32_t vendor_id = pci_default_read_config(dev, PCI_VENDOR_ID, 2); + uint32_t device_id = pci_default_read_config(dev, PCI_DEVICE_ID, 2); + uint32_t revision_id = pci_default_read_config(dev, PCI_REVISION_ID, 1); uint32_t ccode = pci_default_read_config(dev, PCI_CLASS_PROG, 3); - uint32_t max_msi, max_msix; + uint32_t irq_pin = pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1); + uint32_t subsystem_id = pci_default_read_config(dev, PCI_SUBSYSTEM_ID, 2); + uint32_t subsystem_vendor_id = + pci_default_read_config(dev, PCI_SUBSYSTEM_VENDOR_ID, 2); + uint32_t cache_line_size = + pci_default_read_config(dev, PCI_CACHE_LINE_SIZE, 1); + uint32_t pci_status = pci_default_read_config(dev, PCI_STATUS, 2); + gchar *loc_code; - if (pci_default_read_config(dev, PCI_HEADER_TYPE, 1) == - PCI_HEADER_TYPE_BRIDGE) { - is_bridge = true; - } + _FDT(offset = fdt_add_subnode(fdt, parent_offset, nodename)); /* in accordance with PAPR+ v2.7 13.6.3, Table 181 */ - _FDT(fdt_setprop_cell(fdt, offset, "vendor-id", - pci_default_read_config(dev, PCI_VENDOR_ID, 2))); - _FDT(fdt_setprop_cell(fdt, offset, "device-id", - pci_default_read_config(dev, PCI_DEVICE_ID, 2))); - _FDT(fdt_setprop_cell(fdt, offset, "revision-id", - pci_default_read_config(dev, PCI_REVISION_ID, 1))); - _FDT(fdt_setprop_cell(fdt, offset, "class-code", ccode)); - if (pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1)) { - _FDT(fdt_setprop_cell(fdt, offset, "interrupts", - pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1))); - } + _FDT(fdt_setprop_cell(fdt, offset, "vendor-id", vendor_id)); + _FDT(fdt_setprop_cell(fdt, offset, "device-id", device_id)); + _FDT(fdt_setprop_cell(fdt, offset, "revision-id", revision_id)); - if (!is_bridge) { - _FDT(fdt_setprop_cell(fdt, offset, "min-grant", - pci_default_read_config(dev, PCI_MIN_GNT, 1))); - _FDT(fdt_setprop_cell(fdt, offset, "max-latency", - pci_default_read_config(dev, PCI_MAX_LAT, 1))); + _FDT(fdt_setprop_cell(fdt, offset, "class-code", ccode)); + if (irq_pin) { + _FDT(fdt_setprop_cell(fdt, offset, "interrupts", irq_pin)); } - if (pci_default_read_config(dev, PCI_SUBSYSTEM_ID, 2)) { - _FDT(fdt_setprop_cell(fdt, offset, "subsystem-id", - pci_default_read_config(dev, PCI_SUBSYSTEM_ID, 2))); + if (subsystem_id) { + _FDT(fdt_setprop_cell(fdt, offset, "subsystem-id", subsystem_id)); } - if (pci_default_read_config(dev, PCI_SUBSYSTEM_VENDOR_ID, 2)) { + if (subsystem_vendor_id) { _FDT(fdt_setprop_cell(fdt, offset, "subsystem-vendor-id", - pci_default_read_config(dev, PCI_SUBSYSTEM_VENDOR_ID, 2))); + subsystem_vendor_id)); } - _FDT(fdt_setprop_cell(fdt, offset, "cache-line-size", - pci_default_read_config(dev, PCI_CACHE_LINE_SIZE, 1))); + _FDT(fdt_setprop_cell(fdt, offset, "cache-line-size", cache_line_size)); + /* the following fdt cells are masked off the pci status register */ - pci_status = pci_default_read_config(dev, PCI_STATUS, 2); _FDT(fdt_setprop_cell(fdt, offset, "devsel-speed", PCI_STATUS_DEVSEL_MASK & pci_status)); @@ -1310,32 +1418,23 @@ static void spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset, _FDT(fdt_setprop(fdt, offset, "udf-supported", NULL, 0)); } - _FDT(fdt_setprop_string(fdt, offset, "name", - pci_find_device_name((ccode >> 16) & 0xff, - (ccode >> 8) & 0xff, - ccode & 0xff))); - - buf = spapr_phb_get_loc_code(sphb, dev); - _FDT(fdt_setprop_string(fdt, offset, "ibm,loc-code", buf)); - g_free(buf); + loc_code = spapr_phb_get_loc_code(sphb, dev); + _FDT(fdt_setprop_string(fdt, offset, "ibm,loc-code", loc_code)); + g_free(loc_code); - if (drc_index) { - _FDT(fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index)); + if (drc) { + _FDT(fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", + spapr_drc_index(drc))); } - _FDT(fdt_setprop_cell(fdt, offset, "#address-cells", - RESOURCE_CELLS_ADDRESS)); - _FDT(fdt_setprop_cell(fdt, offset, "#size-cells", - RESOURCE_CELLS_SIZE)); - if (msi_present(dev)) { - max_msi = msi_nr_vectors_allocated(dev); + uint32_t max_msi = msi_nr_vectors_allocated(dev); if (max_msi) { _FDT(fdt_setprop_cell(fdt, offset, "ibm,req#msi", max_msi)); } } if (msix_present(dev)) { - max_msix = dev->msix_entries_nr; + uint32_t max_msix = dev->msix_entries_nr; if (max_msix) { _FDT(fdt_setprop_cell(fdt, offset, "ibm,req#msi-x", max_msix)); } @@ -1343,28 +1442,23 @@ static void spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset, populate_resource_props(dev, &rp); _FDT(fdt_setprop(fdt, offset, "reg", (uint8_t *)rp.reg, rp.reg_len)); - _FDT(fdt_setprop(fdt, offset, "assigned-addresses", - (uint8_t *)rp.assigned, rp.assigned_len)); if (sphb->pcie_ecs && pci_is_express(dev)) { _FDT(fdt_setprop_cell(fdt, offset, "ibm,pci-config-space-type", 0x1)); } -} -/* create OF node for pci device and required OF DT properties */ -static int spapr_create_pci_child_dt(sPAPRPHBState *phb, PCIDevice *dev, - void *fdt, int node_offset) -{ - int offset; - gchar *nodename; - - nodename = pci_get_node_name(dev); - _FDT(offset = fdt_add_subnode(fdt, node_offset, nodename)); - g_free(nodename); - - spapr_populate_pci_child_dt(dev, fdt, offset, phb); + if (!IS_PCI_BRIDGE(dev)) { + /* Properties only for non-bridges */ + uint32_t min_grant = pci_default_read_config(dev, PCI_MIN_GNT, 1); + uint32_t max_latency = pci_default_read_config(dev, PCI_MAX_LAT, 1); + _FDT(fdt_setprop_cell(fdt, offset, "min-grant", min_grant)); + _FDT(fdt_setprop_cell(fdt, offset, "max-latency", max_latency)); + return offset; + } else { + PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev)); - return offset; + return spapr_dt_pci_bus(sphb, sec_bus, fdt, offset); + } } /* Callback to be called during DRC release. */ @@ -1373,62 +1467,104 @@ void spapr_phb_remove_pci_device_cb(DeviceState *dev) HotplugHandler *hotplug_ctrl = qdev_get_hotplug_handler(dev); hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort); + object_unparent(OBJECT(dev)); } -static sPAPRDRConnector *spapr_phb_get_pci_func_drc(sPAPRPHBState *phb, - uint32_t busnr, - int32_t devfn) +int spapr_pci_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr, + void *fdt, int *fdt_start_offset, Error **errp) { - return spapr_drc_by_id(TYPE_SPAPR_DRC_PCI, - (phb->index << 16) | (busnr << 8) | devfn); + HotplugHandler *plug_handler = qdev_get_hotplug_handler(drc->dev); + SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(plug_handler); + PCIDevice *pdev = PCI_DEVICE(drc->dev); + + *fdt_start_offset = spapr_dt_pci_device(sphb, pdev, fdt, 0); + return 0; } -static sPAPRDRConnector *spapr_phb_get_pci_drc(sPAPRPHBState *phb, - PCIDevice *pdev) +static void spapr_pci_bridge_plug(SpaprPhbState *phb, + PCIBridge *bridge) { - uint32_t busnr = pci_bus_num(PCI_BUS(qdev_get_parent_bus(DEVICE(pdev)))); - return spapr_phb_get_pci_func_drc(phb, busnr, pdev->devfn); + PCIBus *bus = pci_bridge_get_sec_bus(bridge); + + add_drcs(phb, bus); } -static uint32_t spapr_phb_get_pci_drc_index(sPAPRPHBState *phb, - PCIDevice *pdev) +/* Returns non-zero if the value of "chassis_nr" is already in use */ +static int check_chassis_nr(Object *obj, void *opaque) { - sPAPRDRConnector *drc = spapr_phb_get_pci_drc(phb, pdev); + int new_chassis_nr = + object_property_get_uint(opaque, "chassis_nr", &error_abort); + int chassis_nr = + object_property_get_uint(obj, "chassis_nr", NULL); + + if (!object_dynamic_cast(obj, TYPE_PCI_BRIDGE)) { + return 0; + } - if (!drc) { + /* Skip unsupported bridge types */ + if (!chassis_nr) { return 0; } - return spapr_drc_index(drc); + /* Skip self */ + if (obj == opaque) { + return 0; + } + + return chassis_nr == new_chassis_nr; } -static void spapr_pci_plug(HotplugHandler *plug_handler, - DeviceState *plugged_dev, Error **errp) +static bool bridge_has_valid_chassis_nr(Object *bridge, Error **errp) { - sPAPRPHBState *phb = SPAPR_PCI_HOST_BRIDGE(DEVICE(plug_handler)); + int chassis_nr = + object_property_get_uint(bridge, "chassis_nr", NULL); + + /* + * slotid_cap_init() already ensures that "chassis_nr" isn't null for + * standard PCI bridges, so this really tells if "chassis_nr" is present + * or not. + */ + if (!chassis_nr) { + error_setg(errp, "PCI Bridge lacks a \"chassis_nr\" property"); + error_append_hint(errp, "Try -device pci-bridge instead.\n"); + return false; + } + + /* We want unique values for "chassis_nr" */ + if (object_child_foreach_recursive(object_get_root(), check_chassis_nr, + bridge)) { + error_setg(errp, "Bridge chassis %d already in use", chassis_nr); + return false; + } + + return true; +} + +static void spapr_pci_pre_plug(HotplugHandler *plug_handler, + DeviceState *plugged_dev, Error **errp) +{ + SpaprPhbState *phb = SPAPR_PCI_HOST_BRIDGE(DEVICE(plug_handler)); PCIDevice *pdev = PCI_DEVICE(plugged_dev); - sPAPRDRConnector *drc = spapr_phb_get_pci_drc(phb, pdev); - Error *local_err = NULL; + SpaprDrc *drc = drc_from_dev(phb, pdev); PCIBus *bus = PCI_BUS(qdev_get_parent_bus(DEVICE(pdev))); uint32_t slotnr = PCI_SLOT(pdev->devfn); - void *fdt = NULL; - int fdt_start_offset, fdt_size; - /* if DR is disabled we don't need to do anything in the case of - * hotplug or coldplug callbacks - */ if (!phb->dr_enabled) { /* if this is a hotplug operation initiated by the user * we need to let them know it's not enabled */ if (plugged_dev->hotplugged) { - error_setg(&local_err, QERR_BUS_NO_HOTPLUG, - object_get_typename(OBJECT(phb))); + error_setg(errp, QERR_BUS_NO_HOTPLUG, + phb->parent_obj.bus->qbus.name); + return; } - goto out; } - g_assert(drc); + if (IS_PCI_BRIDGE(plugged_dev)) { + if (!bridge_has_valid_chassis_nr(OBJECT(plugged_dev), errp)) { + return; + } + } /* Following the QEMU convention used for PCIe multifunction * hotplug, we do not allow functions to be hotplugged to a @@ -1436,20 +1572,43 @@ static void spapr_pci_plug(HotplugHandler *plug_handler, */ if (plugged_dev->hotplugged && bus->devices[PCI_DEVFN(slotnr, 0)] && PCI_FUNC(pdev->devfn) != 0) { - error_setg(&local_err, "PCI: slot %d function 0 already ocuppied by %s," + error_setg(errp, "PCI: slot %d function 0 already occupied by %s," " additional functions can no longer be exposed to guest.", slotnr, bus->devices[PCI_DEVFN(slotnr, 0)]->name); - goto out; } - fdt = create_device_tree(&fdt_size); - fdt_start_offset = spapr_create_pci_child_dt(phb, pdev, fdt, 0); + if (drc && drc->dev) { + error_setg(errp, "PCI: slot %d already occupied by %s", slotnr, + pci_get_function_0(PCI_DEVICE(drc->dev))->name); + return; + } +} - spapr_drc_attach(drc, DEVICE(pdev), fdt, fdt_start_offset, &local_err); - if (local_err) { - goto out; +static void spapr_pci_plug(HotplugHandler *plug_handler, + DeviceState *plugged_dev, Error **errp) +{ + SpaprPhbState *phb = SPAPR_PCI_HOST_BRIDGE(DEVICE(plug_handler)); + PCIDevice *pdev = PCI_DEVICE(plugged_dev); + SpaprDrc *drc = drc_from_dev(phb, pdev); + uint32_t slotnr = PCI_SLOT(pdev->devfn); + + /* + * If DR is disabled we don't need to do anything in the case of + * hotplug or coldplug callbacks. + */ + if (!phb->dr_enabled) { + return; } + g_assert(drc); + + if (IS_PCI_BRIDGE(plugged_dev)) { + spapr_pci_bridge_plug(phb, PCI_BRIDGE(plugged_dev)); + } + + /* spapr_pci_pre_plug() already checked the DRC is attachable */ + spapr_drc_attach(drc, DEVICE(pdev)); + /* If this is function 0, signal hotplug for all the device functions. * Otherwise defer sending the hotplug event. */ @@ -1457,14 +1616,14 @@ static void spapr_pci_plug(HotplugHandler *plug_handler, spapr_drc_reset(drc); } else if (PCI_FUNC(pdev->devfn) == 0) { int i; + uint8_t chassis = chassis_from_bus(pci_get_bus(pdev)); for (i = 0; i < 8; i++) { - sPAPRDRConnector *func_drc; - sPAPRDRConnectorClass *func_drck; - sPAPRDREntitySense state; + SpaprDrc *func_drc; + SpaprDrcClass *func_drck; + SpaprDREntitySense state; - func_drc = spapr_phb_get_pci_func_drc(phb, pci_bus_num(bus), - PCI_DEVFN(slotnr, i)); + func_drc = drc_from_devfn(phb, chassis, PCI_DEVFN(slotnr, i)); func_drck = SPAPR_DR_CONNECTOR_GET_CLASS(func_drc); state = func_drck->dr_entity_sense(func_drc); @@ -1473,17 +1632,21 @@ static void spapr_pci_plug(HotplugHandler *plug_handler, } } } +} -out: - if (local_err) { - error_propagate(errp, local_err); - g_free(fdt); - } +static void spapr_pci_bridge_unplug(SpaprPhbState *phb, + PCIBridge *bridge) +{ + PCIBus *bus = pci_bridge_get_sec_bus(bridge); + + remove_drcs(phb, bus); } static void spapr_pci_unplug(HotplugHandler *plug_handler, DeviceState *plugged_dev, Error **errp) { + SpaprPhbState *phb = SPAPR_PCI_HOST_BRIDGE(DEVICE(plug_handler)); + /* some version guests do not wait for completion of a device * cleanup (generally done asynchronously by the kernel) before * signaling to QEMU that the device is safe, but instead sleep @@ -1495,19 +1658,25 @@ static void spapr_pci_unplug(HotplugHandler *plug_handler, * an 'idle' state, as the device cleanup code expects. */ pci_device_reset(PCI_DEVICE(plugged_dev)); - object_unparent(OBJECT(plugged_dev)); + + if (IS_PCI_BRIDGE(plugged_dev)) { + spapr_pci_bridge_unplug(phb, PCI_BRIDGE(plugged_dev)); + return; + } + + qdev_unrealize(plugged_dev); } static void spapr_pci_unplug_request(HotplugHandler *plug_handler, DeviceState *plugged_dev, Error **errp) { - sPAPRPHBState *phb = SPAPR_PCI_HOST_BRIDGE(DEVICE(plug_handler)); + SpaprPhbState *phb = SPAPR_PCI_HOST_BRIDGE(DEVICE(plug_handler)); PCIDevice *pdev = PCI_DEVICE(plugged_dev); - sPAPRDRConnector *drc = spapr_phb_get_pci_drc(phb, pdev); + SpaprDrc *drc = drc_from_dev(phb, pdev); if (!phb->dr_enabled) { error_setg(errp, QERR_BUS_NO_HOTPLUG, - object_get_typename(OBJECT(phb))); + phb->parent_obj.bus->qbus.name); return; } @@ -1515,40 +1684,49 @@ static void spapr_pci_unplug_request(HotplugHandler *plug_handler, g_assert(drc->dev == plugged_dev); if (!spapr_drc_unplug_requested(drc)) { - PCIBus *bus = PCI_BUS(qdev_get_parent_bus(DEVICE(pdev))); uint32_t slotnr = PCI_SLOT(pdev->devfn); - sPAPRDRConnector *func_drc; - sPAPRDRConnectorClass *func_drck; - sPAPRDREntitySense state; + SpaprDrc *func_drc; + SpaprDrcClass *func_drck; + SpaprDREntitySense state; int i; + uint8_t chassis = chassis_from_bus(pci_get_bus(pdev)); + + if (IS_PCI_BRIDGE(plugged_dev)) { + error_setg(errp, "PCI: Hot unplug of PCI bridges not supported"); + return; + } + if (object_property_get_uint(OBJECT(pdev), "nvlink2-tgt", NULL)) { + error_setg(errp, "PCI: Cannot unplug NVLink2 devices"); + return; + } /* ensure any other present functions are pending unplug */ if (PCI_FUNC(pdev->devfn) == 0) { for (i = 1; i < 8; i++) { - func_drc = spapr_phb_get_pci_func_drc(phb, pci_bus_num(bus), - PCI_DEVFN(slotnr, i)); + func_drc = drc_from_devfn(phb, chassis, PCI_DEVFN(slotnr, i)); func_drck = SPAPR_DR_CONNECTOR_GET_CLASS(func_drc); state = func_drck->dr_entity_sense(func_drc); if (state == SPAPR_DR_ENTITY_SENSE_PRESENT && !spapr_drc_unplug_requested(func_drc)) { - error_setg(errp, - "PCI: slot %d, function %d still present. " - "Must unplug all non-0 functions first.", - slotnr, i); - return; + /* + * Attempting to remove function 0 of a multifunction + * device will will cascade into removing all child + * functions, even if their unplug weren't requested + * beforehand. + */ + spapr_drc_unplug_request(func_drc); } } } - spapr_drc_detach(drc); + spapr_drc_unplug_request(drc); /* if this isn't func 0, defer unplug event. otherwise signal removal * for all present functions */ if (PCI_FUNC(pdev->devfn) == 0) { for (i = 7; i >= 0; i--) { - func_drc = spapr_phb_get_pci_func_drc(phb, pci_bus_num(bus), - PCI_DEVFN(slotnr, i)); + func_drc = drc_from_devfn(phb, chassis, PCI_DEVFN(slotnr, i)); func_drck = SPAPR_DR_CONNECTOR_GET_CLASS(func_drc); state = func_drck->dr_entity_sense(func_drc); if (state == SPAPR_DR_ENTITY_SENSE_PRESENT) { @@ -1556,49 +1734,119 @@ static void spapr_pci_unplug_request(HotplugHandler *plug_handler, } } } + } else { + error_setg(errp, + "PCI device unplug already in progress for device %s", + drc->dev->id); } } +static void spapr_phb_finalizefn(Object *obj) +{ + SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(obj); + + g_free(sphb->dtbusname); + sphb->dtbusname = NULL; +} + +static void spapr_phb_unrealize(DeviceState *dev) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); + SysBusDevice *s = SYS_BUS_DEVICE(dev); + PCIHostState *phb = PCI_HOST_BRIDGE(s); + SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(phb); + SpaprTceTable *tcet; + int i; + const unsigned windows_supported = spapr_phb_windows_supported(sphb); + + if (sphb->msi) { + g_hash_table_unref(sphb->msi); + sphb->msi = NULL; + } + + /* + * Remove IO/MMIO subregions and aliases, rest should get cleaned + * via PHB's unrealize->object_finalize + */ + for (i = windows_supported - 1; i >= 0; i--) { + tcet = spapr_tce_find_by_liobn(sphb->dma_liobn[i]); + if (tcet) { + memory_region_del_subregion(&sphb->iommu_root, + spapr_tce_get_iommu(tcet)); + } + } + + remove_drcs(sphb, phb->bus); + + for (i = PCI_NUM_PINS - 1; i >= 0; i--) { + if (sphb->lsi_table[i].irq) { + spapr_irq_free(spapr, sphb->lsi_table[i].irq, 1); + sphb->lsi_table[i].irq = 0; + } + } + + QLIST_REMOVE(sphb, list); + + memory_region_del_subregion(&sphb->iommu_root, &sphb->msiwindow); + + /* + * An attached PCI device may have memory listeners, eg. VFIO PCI. We have + * unmapped all sections. Remove the listeners now, before destroying the + * address space. + */ + address_space_remove_listeners(&sphb->iommu_as); + address_space_destroy(&sphb->iommu_as); + + qbus_set_hotplug_handler(BUS(phb->bus), NULL); + pci_unregister_root_bus(phb->bus); + + memory_region_del_subregion(get_system_memory(), &sphb->iowindow); + if (sphb->mem64_win_pciaddr != (hwaddr)-1) { + memory_region_del_subregion(get_system_memory(), &sphb->mem64window); + } + memory_region_del_subregion(get_system_memory(), &sphb->mem32window); +} + +static void spapr_phb_destroy_msi(gpointer opaque) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); + SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); + SpaprPciMsi *msi = opaque; + + if (!smc->legacy_irq_allocation) { + spapr_irq_msi_free(spapr, msi->first_irq, msi->num); + } + spapr_irq_free(spapr, msi->first_irq, msi->num); + g_free(msi); +} + static void spapr_phb_realize(DeviceState *dev, Error **errp) { + ERRP_GUARD(); /* We don't use SPAPR_MACHINE() in order to exit gracefully if the user * tries to add a sPAPR PHB to a non-pseries machine. */ - sPAPRMachineState *spapr = - (sPAPRMachineState *) object_dynamic_cast(qdev_get_machine(), + SpaprMachineState *spapr = + (SpaprMachineState *) object_dynamic_cast(qdev_get_machine(), TYPE_SPAPR_MACHINE); - sPAPRMachineClass *smc = spapr ? SPAPR_MACHINE_GET_CLASS(spapr) : NULL; - SysBusDevice *s = SYS_BUS_DEVICE(dev); - sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s); - PCIHostState *phb = PCI_HOST_BRIDGE(s); + SpaprMachineClass *smc = spapr ? SPAPR_MACHINE_GET_CLASS(spapr) : NULL; + SysBusDevice *sbd = SYS_BUS_DEVICE(dev); + SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(sbd); + PCIHostState *phb = PCI_HOST_BRIDGE(sbd); + MachineState *ms = MACHINE(spapr); char *namebuf; int i; PCIBus *bus; uint64_t msi_window_size = 4096; - sPAPRTCETable *tcet; - const unsigned windows_supported = - sphb->ddw_enabled ? SPAPR_PCI_DMA_MAX_WINDOWS : 1; + SpaprTceTable *tcet; + const unsigned windows_supported = spapr_phb_windows_supported(sphb); if (!spapr) { error_setg(errp, TYPE_SPAPR_PCI_HOST_BRIDGE " needs a pseries machine"); return; } - if (sphb->index != (uint32_t)-1) { - Error *local_err = NULL; - - smc->phb_placement(spapr, sphb->index, - &sphb->buid, &sphb->io_win_addr, - &sphb->mem_win_addr, &sphb->mem64_win_addr, - windows_supported, sphb->dma_liobn, &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } - } else { - error_setg(errp, "\"index\" for PAPR PHB is mandatory"); - return; - } + assert(sphb->index != (uint32_t)-1); /* checked in spapr_phb_pre_plug() */ if (sphb->mem64_win_size != 0) { if (sphb->mem_win_size > SPAPR_PCI_MEM32_WIN_SIZE) { @@ -1625,12 +1873,20 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) } if (spapr_pci_find_phb(spapr, sphb->buid)) { - error_setg(errp, "PCI host bridges must have unique BUIDs"); + SpaprPhbState *s; + + error_setg(errp, "PCI host bridges must have unique indexes"); + error_append_hint(errp, "The following indexes are already in use:"); + QLIST_FOREACH(s, &spapr->phbs, list) { + error_append_hint(errp, " %d", s->index); + } + error_append_hint(errp, "\nTry another value for the index property\n"); return; } if (sphb->numa_node != -1 && - (sphb->numa_node >= MAX_NODES || !numa_info[sphb->numa_node].present)) { + (sphb->numa_node >= MAX_NODES || + !ms->numa_state->nodes[sphb->numa_node].present)) { error_setg(errp, "Invalid NUMA node ID for PCI host bridge"); return; } @@ -1676,11 +1932,21 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) &sphb->iowindow); bus = pci_register_root_bus(dev, NULL, - pci_spapr_set_irq, pci_spapr_map_irq, sphb, + pci_spapr_set_irq, pci_swizzle_map_irq_fn, sphb, &sphb->memspace, &sphb->iospace, - PCI_DEVFN(0, 0), PCI_NUM_PINS, TYPE_PCI_BUS); + PCI_DEVFN(0, 0), PCI_NUM_PINS, + TYPE_PCI_BUS); + + /* + * Despite resembling a vanilla PCI bus in most ways, the PAPR + * para-virtualized PCI bus *does* permit PCI-E extended config + * space access + */ + if (sphb->pcie_ecs) { + bus->flags |= PCI_BUS_EXTENDED_CONFIG_SPACE; + } phb->bus = bus; - qbus_set_hotplug_handler(BUS(phb->bus), DEVICE(sphb), NULL); + qbus_set_hotplug_handler(BUS(phb->bus), OBJECT(sphb)); /* * Initialize PHB address space. @@ -1707,18 +1973,16 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) * For KVM we want to ensure that this memory is a full page so that * our memory slot is of page size granularity. */ -#ifdef CONFIG_KVM if (kvm_enabled()) { - msi_window_size = getpagesize(); + msi_window_size = qemu_real_host_page_size(); } -#endif memory_region_init_io(&sphb->msiwindow, OBJECT(sphb), &spapr_msi_ops, spapr, "msi", msi_window_size); memory_region_add_subregion(&sphb->iommu_root, SPAPR_PCI_MSI_WINDOW, &sphb->msiwindow); - pci_setup_iommu(bus, spapr_pci_dma_iommu, sphb); + pci_setup_iommu(bus, &spapr_iommu_ops, sphb); pci_bus_set_route_irq_fn(bus, spapr_route_intx_pin_to_irq); @@ -1726,34 +1990,30 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) /* Initialize the LSI table */ for (i = 0; i < PCI_NUM_PINS; i++) { - uint32_t irq = SPAPR_IRQ_PCI_LSI + sphb->index * PCI_NUM_PINS + i; - Error *local_err = NULL; + int irq = SPAPR_IRQ_PCI_LSI + sphb->index * PCI_NUM_PINS + i; if (smc->legacy_irq_allocation) { - irq = spapr_irq_findone(spapr, &local_err); - if (local_err) { - error_propagate_prepend(errp, local_err, - "can't allocate LSIs: "); + irq = spapr_irq_findone(spapr, errp); + if (irq < 0) { + error_prepend(errp, "can't allocate LSIs: "); + /* + * Older machines will never support PHB hotplug, ie, this is an + * init only path and QEMU will terminate. No need to rollback. + */ return; } } - spapr_irq_claim(spapr, irq, true, &local_err); - if (local_err) { - error_propagate_prepend(errp, local_err, "can't allocate LSIs: "); - return; + if (spapr_irq_claim(spapr, irq, true, errp) < 0) { + error_prepend(errp, "can't allocate LSIs: "); + goto unrealize; } sphb->lsi_table[i].irq = irq; } /* allocate connectors for child PCI devices */ - if (sphb->dr_enabled) { - for (i = 0; i < PCI_SLOT_MAX * 8; i++) { - spapr_dr_connector_new(OBJECT(phb), TYPE_SPAPR_DRC_PCI, - (sphb->index << 16) | i); - } - } + add_drcs(sphb, phb->bus); /* DMA setup */ for (i = 0; i < windows_supported; ++i) { @@ -1761,13 +2021,18 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp) if (!tcet) { error_setg(errp, "Creating window#%d failed for %s", i, sphb->dtbusname); - return; + goto unrealize; } memory_region_add_subregion(&sphb->iommu_root, 0, spapr_tce_get_iommu(tcet)); } - sphb->msi = g_hash_table_new_full(g_int_hash, g_int_equal, g_free, g_free); + sphb->msi = g_hash_table_new_full(g_int_hash, g_int_equal, g_free, + spapr_phb_destroy_msi); + return; + +unrealize: + spapr_phb_unrealize(dev); } static int spapr_phb_children_reset(Object *child, void *opaque) @@ -1775,16 +2040,16 @@ static int spapr_phb_children_reset(Object *child, void *opaque) DeviceState *dev = (DeviceState *) object_dynamic_cast(child, TYPE_DEVICE); if (dev) { - device_reset(dev); + device_cold_reset(dev); } return 0; } -void spapr_phb_dma_reset(sPAPRPHBState *sphb) +void spapr_phb_dma_reset(SpaprPhbState *sphb) { int i; - sPAPRTCETable *tcet; + SpaprTceTable *tcet; for (i = 0; i < SPAPR_PCI_DMA_MAX_WINDOWS; ++i) { tcet = spapr_tce_find_by_liobn(sphb->dma_liobn[i]); @@ -1798,11 +2063,12 @@ void spapr_phb_dma_reset(sPAPRPHBState *sphb) tcet = spapr_tce_find_by_liobn(sphb->dma_liobn[0]); spapr_tce_table_enable(tcet, SPAPR_TCE_PAGE_SHIFT, sphb->dma_win_addr, sphb->dma_win_size >> SPAPR_TCE_PAGE_SHIFT); + tcet->def_win = true; } static void spapr_phb_reset(DeviceState *qdev) { - sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(qdev); + SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(qdev); spapr_phb_dma_reset(sphb); @@ -1812,31 +2078,36 @@ static void spapr_phb_reset(DeviceState *qdev) if (spapr_phb_eeh_available(SPAPR_PCI_HOST_BRIDGE(qdev))) { spapr_phb_vfio_reset(qdev); } + + g_hash_table_remove_all(sphb->msi); } static Property spapr_phb_properties[] = { - DEFINE_PROP_UINT32("index", sPAPRPHBState, index, -1), - DEFINE_PROP_UINT64("mem_win_size", sPAPRPHBState, mem_win_size, + DEFINE_PROP_UINT32("index", SpaprPhbState, index, -1), + DEFINE_PROP_UINT64("mem_win_size", SpaprPhbState, mem_win_size, SPAPR_PCI_MEM32_WIN_SIZE), - DEFINE_PROP_UINT64("mem64_win_size", sPAPRPHBState, mem64_win_size, + DEFINE_PROP_UINT64("mem64_win_size", SpaprPhbState, mem64_win_size, SPAPR_PCI_MEM64_WIN_SIZE), - DEFINE_PROP_UINT64("io_win_size", sPAPRPHBState, io_win_size, + DEFINE_PROP_UINT64("io_win_size", SpaprPhbState, io_win_size, SPAPR_PCI_IO_WIN_SIZE), - DEFINE_PROP_BOOL("dynamic-reconfiguration", sPAPRPHBState, dr_enabled, + DEFINE_PROP_BOOL("dynamic-reconfiguration", SpaprPhbState, dr_enabled, true), /* Default DMA window is 0..1GB */ - DEFINE_PROP_UINT64("dma_win_addr", sPAPRPHBState, dma_win_addr, 0), - DEFINE_PROP_UINT64("dma_win_size", sPAPRPHBState, dma_win_size, 0x40000000), - DEFINE_PROP_UINT64("dma64_win_addr", sPAPRPHBState, dma64_win_addr, + DEFINE_PROP_UINT64("dma_win_addr", SpaprPhbState, dma_win_addr, 0), + DEFINE_PROP_UINT64("dma_win_size", SpaprPhbState, dma_win_size, 0x40000000), + DEFINE_PROP_UINT64("dma64_win_addr", SpaprPhbState, dma64_win_addr, 0x800000000000000ULL), - DEFINE_PROP_BOOL("ddw", sPAPRPHBState, ddw_enabled, true), - DEFINE_PROP_UINT64("pgsz", sPAPRPHBState, page_size_mask, - (1ULL << 12) | (1ULL << 16)), - DEFINE_PROP_UINT32("numa_node", sPAPRPHBState, numa_node, -1), - DEFINE_PROP_BOOL("pre-2.8-migration", sPAPRPHBState, + DEFINE_PROP_BOOL("ddw", SpaprPhbState, ddw_enabled, true), + DEFINE_PROP_UINT64("pgsz", SpaprPhbState, page_size_mask, + (1ULL << 12) | (1ULL << 16) + | (1ULL << 21) | (1ULL << 24)), + DEFINE_PROP_UINT32("numa_node", SpaprPhbState, numa_node, -1), + DEFINE_PROP_BOOL("pre-2.8-migration", SpaprPhbState, pre_2_8_migration, false), - DEFINE_PROP_BOOL("pcie-extended-configuration-space", sPAPRPHBState, + DEFINE_PROP_BOOL("pcie-extended-configuration-space", SpaprPhbState, pcie_ecs, true), + DEFINE_PROP_BOOL("pre-5.1-associativity", SpaprPhbState, + pre_5_1_assoc, false), DEFINE_PROP_END_OF_LIST(), }; @@ -1844,8 +2115,8 @@ static const VMStateDescription vmstate_spapr_pci_lsi = { .name = "spapr_pci/lsi", .version_id = 1, .minimum_version_id = 1, - .fields = (VMStateField[]) { - VMSTATE_UINT32_EQUAL(irq, struct spapr_pci_lsi, NULL), + .fields = (const VMStateField[]) { + VMSTATE_UINT32_EQUAL(irq, SpaprPciLsi, NULL), VMSTATE_END_OF_LIST() }, @@ -1855,17 +2126,17 @@ static const VMStateDescription vmstate_spapr_pci_msi = { .name = "spapr_pci/msi", .version_id = 1, .minimum_version_id = 1, - .fields = (VMStateField []) { - VMSTATE_UINT32(key, spapr_pci_msi_mig), - VMSTATE_UINT32(value.first_irq, spapr_pci_msi_mig), - VMSTATE_UINT32(value.num, spapr_pci_msi_mig), + .fields = (const VMStateField []) { + VMSTATE_UINT32(key, SpaprPciMsiMig), + VMSTATE_UINT32(value.first_irq, SpaprPciMsiMig), + VMSTATE_UINT32(value.num, SpaprPciMsiMig), VMSTATE_END_OF_LIST() }, }; static int spapr_pci_pre_save(void *opaque) { - sPAPRPHBState *sphb = opaque; + SpaprPhbState *sphb = opaque; GHashTableIter iter; gpointer key, value; int i; @@ -1890,20 +2161,30 @@ static int spapr_pci_pre_save(void *opaque) if (!sphb->msi_devs_num) { return 0; } - sphb->msi_devs = g_new(spapr_pci_msi_mig, sphb->msi_devs_num); + sphb->msi_devs = g_new(SpaprPciMsiMig, sphb->msi_devs_num); g_hash_table_iter_init(&iter, sphb->msi); for (i = 0; g_hash_table_iter_next(&iter, &key, &value); ++i) { sphb->msi_devs[i].key = *(uint32_t *) key; - sphb->msi_devs[i].value = *(spapr_pci_msi *) value; + sphb->msi_devs[i].value = *(SpaprPciMsi *) value; } return 0; } +static int spapr_pci_post_save(void *opaque) +{ + SpaprPhbState *sphb = opaque; + + g_free(sphb->msi_devs); + sphb->msi_devs = NULL; + sphb->msi_devs_num = 0; + return 0; +} + static int spapr_pci_post_load(void *opaque, int version_id) { - sPAPRPHBState *sphb = opaque; + SpaprPhbState *sphb = opaque; gpointer key, value; int i; @@ -1923,7 +2204,7 @@ static int spapr_pci_post_load(void *opaque, int version_id) static bool pre_2_8_migration(void *opaque, int version_id) { - sPAPRPHBState *sphb = opaque; + SpaprPhbState *sphb = opaque; return sphb->pre_2_8_migration; } @@ -1933,19 +2214,20 @@ static const VMStateDescription vmstate_spapr_pci = { .version_id = 2, .minimum_version_id = 2, .pre_save = spapr_pci_pre_save, + .post_save = spapr_pci_post_save, .post_load = spapr_pci_post_load, - .fields = (VMStateField[]) { - VMSTATE_UINT64_EQUAL(buid, sPAPRPHBState, NULL), - VMSTATE_UINT32_TEST(mig_liobn, sPAPRPHBState, pre_2_8_migration), - VMSTATE_UINT64_TEST(mig_mem_win_addr, sPAPRPHBState, pre_2_8_migration), - VMSTATE_UINT64_TEST(mig_mem_win_size, sPAPRPHBState, pre_2_8_migration), - VMSTATE_UINT64_TEST(mig_io_win_addr, sPAPRPHBState, pre_2_8_migration), - VMSTATE_UINT64_TEST(mig_io_win_size, sPAPRPHBState, pre_2_8_migration), - VMSTATE_STRUCT_ARRAY(lsi_table, sPAPRPHBState, PCI_NUM_PINS, 0, - vmstate_spapr_pci_lsi, struct spapr_pci_lsi), - VMSTATE_INT32(msi_devs_num, sPAPRPHBState), - VMSTATE_STRUCT_VARRAY_ALLOC(msi_devs, sPAPRPHBState, msi_devs_num, 0, - vmstate_spapr_pci_msi, spapr_pci_msi_mig), + .fields = (const VMStateField[]) { + VMSTATE_UINT64_EQUAL(buid, SpaprPhbState, NULL), + VMSTATE_UINT32_TEST(mig_liobn, SpaprPhbState, pre_2_8_migration), + VMSTATE_UINT64_TEST(mig_mem_win_addr, SpaprPhbState, pre_2_8_migration), + VMSTATE_UINT64_TEST(mig_mem_win_size, SpaprPhbState, pre_2_8_migration), + VMSTATE_UINT64_TEST(mig_io_win_addr, SpaprPhbState, pre_2_8_migration), + VMSTATE_UINT64_TEST(mig_io_win_size, SpaprPhbState, pre_2_8_migration), + VMSTATE_STRUCT_ARRAY(lsi_table, SpaprPhbState, PCI_NUM_PINS, 0, + vmstate_spapr_pci_lsi, SpaprPciLsi), + VMSTATE_INT32(msi_devs_num, SpaprPhbState), + VMSTATE_STRUCT_VARRAY_ALLOC(msi_devs, SpaprPhbState, msi_devs_num, 0, + vmstate_spapr_pci_msi, SpaprPciMsiMig), VMSTATE_END_OF_LIST() }, }; @@ -1953,7 +2235,7 @@ static const VMStateDescription vmstate_spapr_pci = { static const char *spapr_phb_root_bus_path(PCIHostState *host_bridge, PCIBus *rootbus) { - sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(host_bridge); + SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(host_bridge); return sphb->dtbusname; } @@ -1966,12 +2248,14 @@ static void spapr_phb_class_init(ObjectClass *klass, void *data) hc->root_bus_path = spapr_phb_root_bus_path; dc->realize = spapr_phb_realize; - dc->props = spapr_phb_properties; + dc->unrealize = spapr_phb_unrealize; + device_class_set_props(dc, spapr_phb_properties); dc->reset = spapr_phb_reset; dc->vmsd = &vmstate_spapr_pci; /* Supported by TYPE_SPAPR_MACHINE */ dc->user_creatable = true; set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); + hp->pre_plug = spapr_pci_pre_plug; hp->plug = spapr_pci_plug; hp->unplug = spapr_pci_unplug; hp->unplug_request = spapr_pci_unplug_request; @@ -1980,7 +2264,8 @@ static void spapr_phb_class_init(ObjectClass *klass, void *data) static const TypeInfo spapr_phb_info = { .name = TYPE_SPAPR_PCI_HOST_BRIDGE, .parent = TYPE_PCI_HOST_BRIDGE, - .instance_size = sizeof(sPAPRPHBState), + .instance_size = sizeof(SpaprPhbState), + .instance_finalize = spapr_phb_finalizefn, .class_init = spapr_phb_class_init, .interfaces = (InterfaceInfo[]) { { TYPE_HOTPLUG_HANDLER }, @@ -1988,50 +2273,10 @@ static const TypeInfo spapr_phb_info = { } }; -typedef struct sPAPRFDT { - void *fdt; - int node_off; - sPAPRPHBState *sphb; -} sPAPRFDT; - -static void spapr_populate_pci_devices_dt(PCIBus *bus, PCIDevice *pdev, - void *opaque) -{ - PCIBus *sec_bus; - sPAPRFDT *p = opaque; - int offset; - sPAPRFDT s_fdt; - - offset = spapr_create_pci_child_dt(p->sphb, pdev, p->fdt, p->node_off); - if (!offset) { - error_report("Failed to create pci child device tree node"); - return; - } - - if ((pci_default_read_config(pdev, PCI_HEADER_TYPE, 1) != - PCI_HEADER_TYPE_BRIDGE)) { - return; - } - - sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev)); - if (!sec_bus) { - return; - } - - s_fdt.fdt = p->fdt; - s_fdt.node_off = offset; - s_fdt.sphb = p->sphb; - pci_for_each_device_reverse(sec_bus, pci_bus_num(sec_bus), - spapr_populate_pci_devices_dt, - &s_fdt); -} - static void spapr_phb_pci_enumerate_bridge(PCIBus *bus, PCIDevice *pdev, void *opaque) { unsigned int *bus_no = opaque; - unsigned int primary = *bus_no; - unsigned int subordinate = 0xff; PCIBus *sec_bus = NULL; if ((pci_default_read_config(pdev, PCI_HEADER_TYPE, 1) != @@ -2040,7 +2285,7 @@ static void spapr_phb_pci_enumerate_bridge(PCIBus *bus, PCIDevice *pdev, } (*bus_no)++; - pci_default_write_config(pdev, PCI_PRIMARY_BUS, primary, 1); + pci_default_write_config(pdev, PCI_PRIMARY_BUS, pci_dev_bus_num(pdev), 1); pci_default_write_config(pdev, PCI_SECONDARY_BUS, *bus_no, 1); pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, *bus_no, 1); @@ -2049,28 +2294,25 @@ static void spapr_phb_pci_enumerate_bridge(PCIBus *bus, PCIDevice *pdev, return; } - pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, subordinate, 1); - pci_for_each_device(sec_bus, pci_bus_num(sec_bus), - spapr_phb_pci_enumerate_bridge, bus_no); + pci_for_each_device_under_bus(sec_bus, spapr_phb_pci_enumerate_bridge, + bus_no); pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, *bus_no, 1); } -static void spapr_phb_pci_enumerate(sPAPRPHBState *phb) +static void spapr_phb_pci_enumerate(SpaprPhbState *phb) { PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus; unsigned int bus_no = 0; - pci_for_each_device(bus, pci_bus_num(bus), - spapr_phb_pci_enumerate_bridge, - &bus_no); + pci_for_each_device_under_bus(bus, spapr_phb_pci_enumerate_bridge, + &bus_no); } -int spapr_populate_pci_dt(sPAPRPHBState *phb, uint32_t xics_phandle, void *fdt, - uint32_t nr_msis) +int spapr_dt_phb(SpaprMachineState *spapr, SpaprPhbState *phb, + uint32_t intc_phandle, void *fdt, int *node_offset) { int bus_off, i, j, ret; - gchar *nodename; uint32_t bus_range[] = { cpu_to_be32(0), cpu_to_be32(0xff) }; struct { uint32_t hi; @@ -2106,35 +2348,30 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb, uint32_t xics_phandle, void *fdt, cpu_to_be32(RTAS_IBM_REMOVE_PE_DMA_WINDOW) }; uint32_t ddw_extensions[] = { - cpu_to_be32(1), - cpu_to_be32(RTAS_IBM_RESET_PE_DMA_WINDOW) + cpu_to_be32(2), + cpu_to_be32(RTAS_IBM_RESET_PE_DMA_WINDOW), + cpu_to_be32(1), /* 1: ibm,query-pe-dma-window 6 outputs, PAPR 2.8 */ }; - uint32_t associativity[] = {cpu_to_be32(0x4), - cpu_to_be32(0x0), - cpu_to_be32(0x0), - cpu_to_be32(0x0), - cpu_to_be32(phb->numa_node)}; - sPAPRTCETable *tcet; - PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus; - sPAPRFDT s_fdt; + SpaprTceTable *tcet; + SpaprDrc *drc; /* Start populating the FDT */ - nodename = g_strdup_printf("pci@%" PRIx64, phb->buid); - _FDT(bus_off = fdt_add_subnode(fdt, 0, nodename)); - g_free(nodename); + _FDT(bus_off = fdt_add_subnode(fdt, 0, phb->dtbusname)); + if (node_offset) { + *node_offset = bus_off; + } /* Write PHB properties */ _FDT(fdt_setprop_string(fdt, bus_off, "device_type", "pci")); _FDT(fdt_setprop_string(fdt, bus_off, "compatible", "IBM,Logical_PHB")); - _FDT(fdt_setprop_cell(fdt, bus_off, "#address-cells", 0x3)); - _FDT(fdt_setprop_cell(fdt, bus_off, "#size-cells", 0x2)); _FDT(fdt_setprop_cell(fdt, bus_off, "#interrupt-cells", 0x1)); _FDT(fdt_setprop(fdt, bus_off, "used-by-rtas", NULL, 0)); _FDT(fdt_setprop(fdt, bus_off, "bus-range", &bus_range, sizeof(bus_range))); _FDT(fdt_setprop(fdt, bus_off, "ranges", &ranges, sizeof_ranges)); _FDT(fdt_setprop(fdt, bus_off, "reg", &bus_reg, sizeof(bus_reg))); _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pci-config-space-type", 0x1)); - _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pe-total-#msi", nr_msis)); + _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pe-total-#msi", + spapr_irq_nr_msis(spapr))); /* Dynamic DMA window */ if (phb->ddw_enabled) { @@ -2146,26 +2383,25 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb, uint32_t xics_phandle, void *fdt, /* Advertise NUMA via ibm,associativity */ if (phb->numa_node != -1) { - _FDT(fdt_setprop(fdt, bus_off, "ibm,associativity", associativity, - sizeof(associativity))); + spapr_numa_write_associativity_dt(spapr, fdt, bus_off, phb->numa_node); } /* Build the interrupt-map, this must matches what is done - * in pci_spapr_map_irq + * in pci_swizzle_map_irq_fn */ _FDT(fdt_setprop(fdt, bus_off, "interrupt-map-mask", &interrupt_map_mask, sizeof(interrupt_map_mask))); for (i = 0; i < PCI_SLOT_MAX; i++) { for (j = 0; j < PCI_NUM_PINS; j++) { uint32_t *irqmap = interrupt_map[i*PCI_NUM_PINS + j]; - int lsi_num = pci_spapr_swizzle(i, j); + int lsi_num = pci_swizzle(i, j); irqmap[0] = cpu_to_be32(b_ddddd(i)|b_fff(0)); irqmap[1] = 0; irqmap[2] = 0; irqmap[3] = cpu_to_be32(j+1); - irqmap[4] = cpu_to_be32(xics_phandle); - spapr_dt_xics_irq(&irqmap[5], phb->lsi_table[lsi_num].irq, true); + irqmap[4] = cpu_to_be32(intc_phandle); + spapr_dt_irq(&irqmap[5], phb->lsi_table[lsi_num].irq, true); } } /* Write interrupt map */ @@ -2180,21 +2416,21 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb, uint32_t xics_phandle, void *fdt, tcet->liobn, tcet->bus_offset, tcet->nb_table << tcet->page_shift); + drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PHB, phb->index); + if (drc) { + uint32_t drc_index = cpu_to_be32(spapr_drc_index(drc)); + + _FDT(fdt_setprop(fdt, bus_off, "ibm,my-drc-index", &drc_index, + sizeof(drc_index))); + } + /* Walk the bridges and program the bus numbers*/ spapr_phb_pci_enumerate(phb); _FDT(fdt_setprop_cell(fdt, bus_off, "qemu,phb-enumerated", 0x1)); - /* Populate tree nodes with PCI devices attached */ - s_fdt.fdt = fdt; - s_fdt.node_off = bus_off; - s_fdt.sphb = phb; - pci_for_each_device_reverse(bus, pci_bus_num(bus), - spapr_populate_pci_devices_dt, - &s_fdt); - - ret = spapr_drc_populate_dt(fdt, bus_off, OBJECT(phb), - SPAPR_DR_CONNECTOR_TYPE_PCI); - if (ret) { + /* Walk the bridge and subordinate buses */ + ret = spapr_dt_pci_bus(phb, PCI_HOST_BRIDGE(phb)->bus, fdt, bus_off); + if (ret < 0) { return ret; } @@ -2251,17 +2487,18 @@ static int spapr_switch_one_vga(DeviceState *dev, void *opaque) bool be = *(bool *)opaque; if (object_dynamic_cast(OBJECT(dev), "VGA") - || object_dynamic_cast(OBJECT(dev), "secondary-vga")) { - object_property_set_bool(OBJECT(dev), be, "big-endian-framebuffer", + || object_dynamic_cast(OBJECT(dev), "secondary-vga") + || object_dynamic_cast(OBJECT(dev), "bochs-display") + || object_dynamic_cast(OBJECT(dev), "virtio-vga")) { + object_property_set_bool(OBJECT(dev), "big-endian-framebuffer", be, &error_abort); } return 0; } -void spapr_pci_switch_vga(bool big_endian) +void spapr_pci_switch_vga(SpaprMachineState *spapr, bool big_endian) { - sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); - sPAPRPHBState *sphb; + SpaprPhbState *sphb; /* * For backward compatibility with existing guests, we switch |