aboutsummaryrefslogtreecommitdiff
path: root/arch/powerpc/platforms/powernv
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/platforms/powernv')
-rw-r--r--arch/powerpc/platforms/powernv/Kconfig2
-rw-r--r--arch/powerpc/platforms/powernv/eeh-powernv.c8
-rw-r--r--arch/powerpc/platforms/powernv/npu-dma.c564
-rw-r--r--arch/powerpc/platforms/powernv/opal-power.c3
-rw-r--r--arch/powerpc/platforms/powernv/opal.c5
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda-tce.c5
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c265
-rw-r--r--arch/powerpc/platforms/powernv/pci.c49
-rw-r--r--arch/powerpc/platforms/powernv/pci.h36
-rw-r--r--arch/powerpc/platforms/powernv/vas-debug.c28
10 files changed, 641 insertions, 324 deletions
diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig
index 99083fe992d5..850eee860cf2 100644
--- a/arch/powerpc/platforms/powernv/Kconfig
+++ b/arch/powerpc/platforms/powernv/Kconfig
@@ -7,7 +7,7 @@ config PPC_POWERNV
select PPC_ICP_NATIVE
select PPC_XIVE_NATIVE
select PPC_P7_NAP
- select PCI
+ select FORCE_PCI
select PCI_MSI
select EPAPR_BOOT
select PPC_INDIRECT_PIO
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index abc0be7507c8..f38078976c5d 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -564,8 +564,8 @@ static void pnv_eeh_get_phb_diag(struct eeh_pe *pe)
static int pnv_eeh_get_phb_state(struct eeh_pe *pe)
{
struct pnv_phb *phb = pe->phb->private_data;
- u8 fstate;
- __be16 pcierr;
+ u8 fstate = 0;
+ __be16 pcierr = 0;
s64 rc;
int result = 0;
@@ -603,8 +603,8 @@ static int pnv_eeh_get_phb_state(struct eeh_pe *pe)
static int pnv_eeh_get_pe_state(struct eeh_pe *pe)
{
struct pnv_phb *phb = pe->phb->private_data;
- u8 fstate;
- __be16 pcierr;
+ u8 fstate = 0;
+ __be16 pcierr = 0;
s64 rc;
int result;
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index 75b935252981..d7f742ed48ba 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -9,32 +9,19 @@
* License as published by the Free Software Foundation.
*/
-#include <linux/slab.h>
#include <linux/mmu_notifier.h>
#include <linux/mmu_context.h>
#include <linux/of.h>
-#include <linux/export.h>
#include <linux/pci.h>
#include <linux/memblock.h>
-#include <linux/iommu.h>
#include <linux/sizes.h>
#include <asm/debugfs.h>
-#include <asm/tlb.h>
#include <asm/powernv.h>
-#include <asm/reg.h>
-#include <asm/opal.h>
-#include <asm/io.h>
-#include <asm/iommu.h>
-#include <asm/pnv-pci.h>
-#include <asm/msi_bitmap.h>
#include <asm/opal.h>
-#include "powernv.h"
#include "pci.h"
-#define npu_to_phb(x) container_of(x, struct pnv_phb, npu)
-
/*
* spinlock to protect initialisation of an npu_context for a particular
* mm_struct.
@@ -133,15 +120,25 @@ static struct pnv_ioda_pe *get_gpu_pci_dev_and_pe(struct pnv_ioda_pe *npe,
return pe;
}
-long pnv_npu_set_window(struct pnv_ioda_pe *npe, int num,
+static long pnv_npu_unset_window(struct iommu_table_group *table_group,
+ int num);
+
+static long pnv_npu_set_window(struct iommu_table_group *table_group, int num,
struct iommu_table *tbl)
{
+ struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe,
+ table_group);
struct pnv_phb *phb = npe->phb;
int64_t rc;
const unsigned long size = tbl->it_indirect_levels ?
tbl->it_level_size : tbl->it_size;
const __u64 start_addr = tbl->it_offset << tbl->it_page_shift;
const __u64 win_size = tbl->it_size << tbl->it_page_shift;
+ int num2 = (num == 0) ? 1 : 0;
+
+ /* NPU has just one TVE so if there is another table, remove it first */
+ if (npe->table_group.tables[num2])
+ pnv_npu_unset_window(&npe->table_group, num2);
pe_info(npe, "Setting up window %llx..%llx pg=%lx\n",
start_addr, start_addr + win_size - 1,
@@ -167,11 +164,16 @@ long pnv_npu_set_window(struct pnv_ioda_pe *npe, int num,
return 0;
}
-long pnv_npu_unset_window(struct pnv_ioda_pe *npe, int num)
+static long pnv_npu_unset_window(struct iommu_table_group *table_group, int num)
{
+ struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe,
+ table_group);
struct pnv_phb *phb = npe->phb;
int64_t rc;
+ if (!npe->table_group.tables[num])
+ return 0;
+
pe_info(npe, "Removing DMA window\n");
rc = opal_pci_map_pe_dma_window(phb->opal_id, npe->pe_number,
@@ -210,7 +212,8 @@ static void pnv_npu_dma_set_32(struct pnv_ioda_pe *npe)
if (!gpe)
return;
- rc = pnv_npu_set_window(npe, 0, gpe->table_group.tables[0]);
+ rc = pnv_npu_set_window(&npe->table_group, 0,
+ gpe->table_group.tables[0]);
/*
* NVLink devices use the same TCE table configuration as
@@ -235,7 +238,7 @@ static int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe)
if (phb->type != PNV_PHB_NPU_NVLINK || !npe->pdev)
return -EINVAL;
- rc = pnv_npu_unset_window(npe, 0);
+ rc = pnv_npu_unset_window(&npe->table_group, 0);
if (rc != OPAL_SUCCESS)
return rc;
@@ -288,11 +291,15 @@ void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass)
}
}
+#ifdef CONFIG_IOMMU_API
/* Switch ownership from platform code to external user (e.g. VFIO) */
-void pnv_npu_take_ownership(struct pnv_ioda_pe *npe)
+static void pnv_npu_take_ownership(struct iommu_table_group *table_group)
{
+ struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe,
+ table_group);
struct pnv_phb *phb = npe->phb;
int64_t rc;
+ struct pci_dev *gpdev = NULL;
/*
* Note: NPU has just a single TVE in the hardware which means that
@@ -301,7 +308,7 @@ void pnv_npu_take_ownership(struct pnv_ioda_pe *npe)
* if it was enabled at the moment of ownership change.
*/
if (npe->table_group.tables[0]) {
- pnv_npu_unset_window(npe, 0);
+ pnv_npu_unset_window(&npe->table_group, 0);
return;
}
@@ -314,30 +321,315 @@ void pnv_npu_take_ownership(struct pnv_ioda_pe *npe)
return;
}
pnv_pci_ioda2_tce_invalidate_entire(npe->phb, false);
+
+ get_gpu_pci_dev_and_pe(npe, &gpdev);
+ if (gpdev)
+ pnv_npu2_unmap_lpar_dev(gpdev);
}
-struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe)
+static void pnv_npu_release_ownership(struct iommu_table_group *table_group)
{
- struct pnv_phb *phb = npe->phb;
- struct pci_bus *pbus = phb->hose->bus;
- struct pci_dev *npdev, *gpdev = NULL, *gptmp;
- struct pnv_ioda_pe *gpe = get_gpu_pci_dev_and_pe(npe, &gpdev);
+ struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe,
+ table_group);
+ struct pci_dev *gpdev = NULL;
+
+ get_gpu_pci_dev_and_pe(npe, &gpdev);
+ if (gpdev)
+ pnv_npu2_map_lpar_dev(gpdev, 0, MSR_DR | MSR_PR | MSR_HV);
+}
+
+static struct iommu_table_group_ops pnv_pci_npu_ops = {
+ .set_window = pnv_npu_set_window,
+ .unset_window = pnv_npu_unset_window,
+ .take_ownership = pnv_npu_take_ownership,
+ .release_ownership = pnv_npu_release_ownership,
+};
+#endif /* !CONFIG_IOMMU_API */
+
+/*
+ * NPU2 ATS
+ */
+/* Maximum possible number of ATSD MMIO registers per NPU */
+#define NV_NMMU_ATSD_REGS 8
+#define NV_NPU_MAX_PE_NUM 16
+
+/*
+ * A compound NPU IOMMU group which might consist of 1 GPU + 2xNPUs (POWER8) or
+ * up to 3 x (GPU + 2xNPUs) (POWER9).
+ */
+struct npu_comp {
+ struct iommu_table_group table_group;
+ int pe_num;
+ struct pnv_ioda_pe *pe[NV_NPU_MAX_PE_NUM];
+};
+
+/* An NPU descriptor, valid for POWER9 only */
+struct npu {
+ int index;
+ __be64 *mmio_atsd_regs[NV_NMMU_ATSD_REGS];
+ unsigned int mmio_atsd_count;
+
+ /* Bitmask for MMIO register usage */
+ unsigned long mmio_atsd_usage;
+
+ /* Do we need to explicitly flush the nest mmu? */
+ bool nmmu_flush;
+
+ struct npu_comp npucomp;
+};
+
+#ifdef CONFIG_IOMMU_API
+static long pnv_npu_peers_create_table_userspace(
+ struct iommu_table_group *table_group,
+ int num, __u32 page_shift, __u64 window_size, __u32 levels,
+ struct iommu_table **ptbl)
+{
+ struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
+ table_group);
+
+ if (!npucomp->pe_num || !npucomp->pe[0] ||
+ !npucomp->pe[0]->table_group.ops ||
+ !npucomp->pe[0]->table_group.ops->create_table)
+ return -EFAULT;
+
+ return npucomp->pe[0]->table_group.ops->create_table(
+ &npucomp->pe[0]->table_group, num, page_shift,
+ window_size, levels, ptbl);
+}
+
+static long pnv_npu_peers_set_window(struct iommu_table_group *table_group,
+ int num, struct iommu_table *tbl)
+{
+ int i, j;
+ long ret = 0;
+ struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
+ table_group);
+
+ for (i = 0; i < npucomp->pe_num; ++i) {
+ struct pnv_ioda_pe *pe = npucomp->pe[i];
+
+ if (!pe->table_group.ops->set_window)
+ continue;
+
+ ret = pe->table_group.ops->set_window(&pe->table_group,
+ num, tbl);
+ if (ret)
+ break;
+ }
+
+ if (ret) {
+ for (j = 0; j < i; ++j) {
+ struct pnv_ioda_pe *pe = npucomp->pe[j];
+
+ if (!pe->table_group.ops->unset_window)
+ continue;
+
+ ret = pe->table_group.ops->unset_window(
+ &pe->table_group, num);
+ if (ret)
+ break;
+ }
+ } else {
+ table_group->tables[num] = iommu_tce_table_get(tbl);
+ }
+
+ return ret;
+}
- if (!gpe || !gpdev)
+static long pnv_npu_peers_unset_window(struct iommu_table_group *table_group,
+ int num)
+{
+ int i, j;
+ long ret = 0;
+ struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
+ table_group);
+
+ for (i = 0; i < npucomp->pe_num; ++i) {
+ struct pnv_ioda_pe *pe = npucomp->pe[i];
+
+ WARN_ON(npucomp->table_group.tables[num] !=
+ table_group->tables[num]);
+ if (!npucomp->table_group.tables[num])
+ continue;
+
+ if (!pe->table_group.ops->unset_window)
+ continue;
+
+ ret = pe->table_group.ops->unset_window(&pe->table_group, num);
+ if (ret)
+ break;
+ }
+
+ if (ret) {
+ for (j = 0; j < i; ++j) {
+ struct pnv_ioda_pe *pe = npucomp->pe[j];
+
+ if (!npucomp->table_group.tables[num])
+ continue;
+
+ if (!pe->table_group.ops->set_window)
+ continue;
+
+ ret = pe->table_group.ops->set_window(&pe->table_group,
+ num, table_group->tables[num]);
+ if (ret)
+ break;
+ }
+ } else if (table_group->tables[num]) {
+ iommu_tce_table_put(table_group->tables[num]);
+ table_group->tables[num] = NULL;
+ }
+
+ return ret;
+}
+
+static void pnv_npu_peers_take_ownership(struct iommu_table_group *table_group)
+{
+ int i;
+ struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
+ table_group);
+
+ for (i = 0; i < npucomp->pe_num; ++i) {
+ struct pnv_ioda_pe *pe = npucomp->pe[i];
+
+ if (!pe->table_group.ops->take_ownership)
+ continue;
+ pe->table_group.ops->take_ownership(&pe->table_group);
+ }
+}
+
+static void pnv_npu_peers_release_ownership(
+ struct iommu_table_group *table_group)
+{
+ int i;
+ struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
+ table_group);
+
+ for (i = 0; i < npucomp->pe_num; ++i) {
+ struct pnv_ioda_pe *pe = npucomp->pe[i];
+
+ if (!pe->table_group.ops->release_ownership)
+ continue;
+ pe->table_group.ops->release_ownership(&pe->table_group);
+ }
+}
+
+static struct iommu_table_group_ops pnv_npu_peers_ops = {
+ .get_table_size = pnv_pci_ioda2_get_table_size,
+ .create_table = pnv_npu_peers_create_table_userspace,
+ .set_window = pnv_npu_peers_set_window,
+ .unset_window = pnv_npu_peers_unset_window,
+ .take_ownership = pnv_npu_peers_take_ownership,
+ .release_ownership = pnv_npu_peers_release_ownership,
+};
+
+static void pnv_comp_attach_table_group(struct npu_comp *npucomp,
+ struct pnv_ioda_pe *pe)
+{
+ if (WARN_ON(npucomp->pe_num == NV_NPU_MAX_PE_NUM))
+ return;
+
+ npucomp->pe[npucomp->pe_num] = pe;
+ ++npucomp->pe_num;
+}
+
+struct iommu_table_group *pnv_try_setup_npu_table_group(struct pnv_ioda_pe *pe)
+{
+ struct iommu_table_group *table_group;
+ struct npu_comp *npucomp;
+ struct pci_dev *gpdev = NULL;
+ struct pci_controller *hose;
+ struct pci_dev *npdev = NULL;
+
+ list_for_each_entry(gpdev, &pe->pbus->devices, bus_list) {
+ npdev = pnv_pci_get_npu_dev(gpdev, 0);
+ if (npdev)
+ break;
+ }
+
+ if (!npdev)
+ /* It is not an NPU attached device, skip */
+ return NULL;
+
+ hose = pci_bus_to_host(npdev->bus);
+
+ if (hose->npu) {
+ table_group = &hose->npu->npucomp.table_group;
+
+ if (!table_group->group) {
+ table_group->ops = &pnv_npu_peers_ops;
+ iommu_register_group(table_group,
+ hose->global_number,
+ pe->pe_number);
+ }
+ } else {
+ /* Create a group for 1 GPU and attached NPUs for POWER8 */
+ pe->npucomp = kzalloc(sizeof(pe->npucomp), GFP_KERNEL);
+ table_group = &pe->npucomp->table_group;
+ table_group->ops = &pnv_npu_peers_ops;
+ iommu_register_group(table_group, hose->global_number,
+ pe->pe_number);
+ }
+
+ /* Steal capabilities from a GPU PE */
+ table_group->max_dynamic_windows_supported =
+ pe->table_group.max_dynamic_windows_supported;
+ table_group->tce32_start = pe->table_group.tce32_start;
+ table_group->tce32_size = pe->table_group.tce32_size;
+ table_group->max_levels = pe->table_group.max_levels;
+ if (!table_group->pgsizes)
+ table_group->pgsizes = pe->table_group.pgsizes;
+
+ npucomp = container_of(table_group, struct npu_comp, table_group);
+ pnv_comp_attach_table_group(npucomp, pe);
+
+ return table_group;
+}
+
+struct iommu_table_group *pnv_npu_compound_attach(struct pnv_ioda_pe *pe)
+{
+ struct iommu_table_group *table_group;
+ struct npu_comp *npucomp;
+ struct pci_dev *gpdev = NULL;
+ struct pci_dev *npdev;
+ struct pnv_ioda_pe *gpe = get_gpu_pci_dev_and_pe(pe, &gpdev);
+
+ WARN_ON(!(pe->flags & PNV_IODA_PE_DEV));
+ if (!gpe)
return NULL;
- list_for_each_entry(npdev, &pbus->devices, bus_list) {
- gptmp = pnv_pci_get_gpu_dev(npdev);
+ /*
+ * IODA2 bridges get this set up from pci_controller_ops::setup_bridge
+ * but NPU bridges do not have this hook defined so we do it here.
+ * We do not setup other table group parameters as they won't be used
+ * anyway - NVLink bridges are subordinate PEs.
+ */
+ pe->table_group.ops = &pnv_pci_npu_ops;
+
+ table_group = iommu_group_get_iommudata(
+ iommu_group_get(&gpdev->dev));
+
+ /*
+ * On P9 NPU PHB and PCI PHB support different page sizes,
+ * keep only matching. We expect here that NVLink bridge PE pgsizes is
+ * initialized by the caller.
+ */
+ table_group->pgsizes &= pe->table_group.pgsizes;
+ npucomp = container_of(table_group, struct npu_comp, table_group);
+ pnv_comp_attach_table_group(npucomp, pe);
+
+ list_for_each_entry(npdev, &pe->phb->hose->bus->devices, bus_list) {
+ struct pci_dev *gpdevtmp = pnv_pci_get_gpu_dev(npdev);
- if (gptmp != gpdev)
+ if (gpdevtmp != gpdev)
continue;
- pe_info(gpe, "Attached NPU %s\n", dev_name(&npdev->dev));
- iommu_group_add_device(gpe->table_group.group, &npdev->dev);
+ iommu_add_device(table_group, &npdev->dev);
}
- return gpe;
+ return table_group;
}
+#endif /* CONFIG_IOMMU_API */
/* Maximum number of nvlinks per npu */
#define NV_MAX_LINKS 6
@@ -490,7 +782,6 @@ static void acquire_atsd_reg(struct npu_context *npu_context,
int i, j;
struct npu *npu;
struct pci_dev *npdev;
- struct pnv_phb *nphb;
for (i = 0; i <= max_npu2_index; i++) {
mmio_atsd_reg[i].reg = -1;
@@ -505,8 +796,10 @@ static void acquire_atsd_reg(struct npu_context *npu_context,
if (!npdev)
continue;
- nphb = pci_bus_to_host(npdev->bus)->private_data;
- npu = &nphb->npu;
+ npu = pci_bus_to_host(npdev->bus)->npu;
+ if (!npu)
+ continue;
+
mmio_atsd_reg[i].npu = npu;
mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu);
while (mmio_atsd_reg[i].reg < 0) {
@@ -671,9 +964,9 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
u32 nvlink_index;
struct device_node *nvlink_dn;
struct mm_struct *mm = current->mm;
- struct pnv_phb *nphb;
struct npu *npu;
struct npu_context *npu_context;
+ struct pci_controller *hose;
/*
* At present we don't support GPUs connected to multiple NPUs and I'm
@@ -681,13 +974,14 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
*/
struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
- if (!firmware_has_feature(FW_FEATURE_OPAL))
- return ERR_PTR(-ENODEV);
-
if (!npdev)
/* No nvlink associated with this GPU device */
return ERR_PTR(-ENODEV);
+ /* We only support DR/PR/HV in pnv_npu2_map_lpar_dev() */
+ if (flags & ~(MSR_DR | MSR_PR | MSR_HV))
+ return ERR_PTR(-EINVAL);
+
nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0);
if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index",
&nvlink_index)))
@@ -701,20 +995,10 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
return ERR_PTR(-EINVAL);
}
- nphb = pci_bus_to_host(npdev->bus)->private_data;
- npu = &nphb->npu;
-
- /*
- * Setup the NPU context table for a particular GPU. These need to be
- * per-GPU as we need the tables to filter ATSDs when there are no
- * active contexts on a particular GPU. It is safe for these to be
- * called concurrently with destroy as the OPAL call takes appropriate
- * locks and refcounts on init/destroy.
- */
- rc = opal_npu_init_context(nphb->opal_id, mm->context.id, flags,
- PCI_DEVID(gpdev->bus->number, gpdev->devfn));
- if (rc < 0)
- return ERR_PTR(-ENOSPC);
+ hose = pci_bus_to_host(npdev->bus);
+ npu = hose->npu;
+ if (!npu)
+ return ERR_PTR(-ENODEV);
/*
* We store the npu pci device so we can more easily get at the
@@ -726,9 +1010,6 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
if (npu_context->release_cb != cb ||
npu_context->priv != priv) {
spin_unlock(&npu_context_lock);
- opal_npu_destroy_context(nphb->opal_id, mm->context.id,
- PCI_DEVID(gpdev->bus->number,
- gpdev->devfn));
return ERR_PTR(-EINVAL);
}
@@ -754,9 +1035,6 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
if (rc) {
kfree(npu_context);
- opal_npu_destroy_context(nphb->opal_id, mm->context.id,
- PCI_DEVID(gpdev->bus->number,
- gpdev->devfn));
return ERR_PTR(rc);
}
@@ -776,7 +1054,7 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
*/
WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], npdev);
- if (!nphb->npu.nmmu_flush) {
+ if (!npu->nmmu_flush) {
/*
* If we're not explicitly flushing ourselves we need to mark
* the thread for global flushes
@@ -809,27 +1087,24 @@ void pnv_npu2_destroy_context(struct npu_context *npu_context,
struct pci_dev *gpdev)
{
int removed;
- struct pnv_phb *nphb;
struct npu *npu;
struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
struct device_node *nvlink_dn;
u32 nvlink_index;
+ struct pci_controller *hose;
if (WARN_ON(!npdev))
return;
- if (!firmware_has_feature(FW_FEATURE_OPAL))
+ hose = pci_bus_to_host(npdev->bus);
+ npu = hose->npu;
+ if (!npu)
return;
-
- nphb = pci_bus_to_host(npdev->bus)->private_data;
- npu = &nphb->npu;
nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0);
if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index",
&nvlink_index)))
return;
WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], NULL);
- opal_npu_destroy_context(nphb->opal_id, npu_context->mm->context.id,
- PCI_DEVID(gpdev->bus->number, gpdev->devfn));
spin_lock(&npu_context_lock);
removed = kref_put(&npu_context->kref, pnv_npu2_release_context);
spin_unlock(&npu_context_lock);
@@ -857,13 +1132,12 @@ int pnv_npu2_handle_fault(struct npu_context *context, uintptr_t *ea,
u64 rc = 0, result = 0;
int i, is_write;
struct page *page[1];
+ const char __user *u;
+ char c;
/* mmap_sem should be held so the struct_mm must be present */
struct mm_struct *mm = context->mm;
- if (!firmware_has_feature(FW_FEATURE_OPAL))
- return -ENODEV;
-
WARN_ON(!rwsem_is_locked(&mm->mmap_sem));
for (i = 0; i < count; i++) {
@@ -872,18 +1146,17 @@ int pnv_npu2_handle_fault(struct npu_context *context, uintptr_t *ea,
is_write ? FOLL_WRITE : 0,
page, NULL, NULL);
- /*
- * To support virtualised environments we will have to do an
- * access to the page to ensure it gets faulted into the
- * hypervisor. For the moment virtualisation is not supported in
- * other areas so leave the access out.
- */
if (rc != 1) {
status[i] = rc;
result = -EFAULT;
continue;
}
+ /* Make sure partition scoped tree gets a pte */
+ u = page_address(page[0]);
+ if (__get_user(c, u))
+ result = -EFAULT;
+
status[i] = 0;
put_page(page[0]);
}
@@ -892,42 +1165,127 @@ int pnv_npu2_handle_fault(struct npu_context *context, uintptr_t *ea,
}
EXPORT_SYMBOL(pnv_npu2_handle_fault);
-int pnv_npu2_init(struct pnv_phb *phb)
+int pnv_npu2_init(struct pci_controller *hose)
{
unsigned int i;
u64 mmio_atsd;
- struct device_node *dn;
- struct pci_dev *gpdev;
static int npu_index;
- uint64_t rc = 0;
-
- phb->npu.nmmu_flush =
- of_property_read_bool(phb->hose->dn, "ibm,nmmu-flush");
- for_each_child_of_node(phb->hose->dn, dn) {
- gpdev = pnv_pci_get_gpu_dev(get_pci_dev(dn));
- if (gpdev) {
- rc = opal_npu_map_lpar(phb->opal_id,
- PCI_DEVID(gpdev->bus->number, gpdev->devfn),
- 0, 0);
- if (rc)
- dev_err(&gpdev->dev,
- "Error %lld mapping device to LPAR\n",
- rc);
- }
- }
+ struct npu *npu;
+ int ret;
+
+ npu = kzalloc(sizeof(*npu), GFP_KERNEL);
+ if (!npu)
+ return -ENOMEM;
- for (i = 0; !of_property_read_u64_index(phb->hose->dn, "ibm,mmio-atsd",
- i, &mmio_atsd); i++)
- phb->npu.mmio_atsd_regs[i] = ioremap(mmio_atsd, 32);
+ npu->nmmu_flush = of_property_read_bool(hose->dn, "ibm,nmmu-flush");
- pr_info("NPU%lld: Found %d MMIO ATSD registers", phb->opal_id, i);
- phb->npu.mmio_atsd_count = i;
- phb->npu.mmio_atsd_usage = 0;
+ for (i = 0; i < ARRAY_SIZE(npu->mmio_atsd_regs) &&
+ !of_property_read_u64_index(hose->dn, "ibm,mmio-atsd",
+ i, &mmio_atsd); i++)
+ npu->mmio_atsd_regs[i] = ioremap(mmio_atsd, 32);
+
+ pr_info("NPU%d: Found %d MMIO ATSD registers", hose->global_number, i);
+ npu->mmio_atsd_count = i;
+ npu->mmio_atsd_usage = 0;
npu_index++;
- if (WARN_ON(npu_index >= NV_MAX_NPUS))
- return -ENOSPC;
+ if (WARN_ON(npu_index >= NV_MAX_NPUS)) {
+ ret = -ENOSPC;
+ goto fail_exit;
+ }
max_npu2_index = npu_index;
- phb->npu.index = npu_index;
+ npu->index = npu_index;
+ hose->npu = npu;
+
+ return 0;
+
+fail_exit:
+ for (i = 0; i < npu->mmio_atsd_count; ++i)
+ iounmap(npu->mmio_atsd_regs[i]);
+
+ kfree(npu);
+
+ return ret;
+}
+
+int pnv_npu2_map_lpar_dev(struct pci_dev *gpdev, unsigned int lparid,
+ unsigned long msr)
+{
+ int ret;
+ struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
+ struct pci_controller *hose;
+ struct pnv_phb *nphb;
+
+ if (!npdev)
+ return -ENODEV;
+
+ hose = pci_bus_to_host(npdev->bus);
+ nphb = hose->private_data;
+
+ dev_dbg(&gpdev->dev, "Map LPAR opalid=%llu lparid=%u\n",
+ nphb->opal_id, lparid);
+ /*
+ * Currently we only support radix and non-zero LPCR only makes sense
+ * for hash tables so skiboot expects the LPCR parameter to be a zero.
+ */
+ ret = opal_npu_map_lpar(nphb->opal_id,
+ PCI_DEVID(gpdev->bus->number, gpdev->devfn), lparid,
+ 0 /* LPCR bits */);
+ if (ret) {
+ dev_err(&gpdev->dev, "Error %d mapping device to LPAR\n", ret);
+ return ret;
+ }
+
+ dev_dbg(&gpdev->dev, "init context opalid=%llu msr=%lx\n",
+ nphb->opal_id, msr);
+ ret = opal_npu_init_context(nphb->opal_id, 0/*__unused*/, msr,
+ PCI_DEVID(gpdev->bus->number, gpdev->devfn));
+ if (ret < 0)
+ dev_err(&gpdev->dev, "Failed to init context: %d\n", ret);
+ else
+ ret = 0;
return 0;
}
+EXPORT_SYMBOL_GPL(pnv_npu2_map_lpar_dev);
+
+void pnv_npu2_map_lpar(struct pnv_ioda_pe *gpe, unsigned long msr)
+{
+ struct pci_dev *gpdev;
+
+ list_for_each_entry(gpdev, &gpe->pbus->devices, bus_list)
+ pnv_npu2_map_lpar_dev(gpdev, 0, msr);
+}
+
+int pnv_npu2_unmap_lpar_dev(struct pci_dev *gpdev)
+{
+ int ret;
+ struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
+ struct pci_controller *hose;
+ struct pnv_phb *nphb;
+
+ if (!npdev)
+ return -ENODEV;
+
+ hose = pci_bus_to_host(npdev->bus);
+ nphb = hose->private_data;
+
+ dev_dbg(&gpdev->dev, "destroy context opalid=%llu\n",
+ nphb->opal_id);
+ ret = opal_npu_destroy_context(nphb->opal_id, 0/*__unused*/,
+ PCI_DEVID(gpdev->bus->number, gpdev->devfn));
+ if (ret < 0) {
+ dev_err(&gpdev->dev, "Failed to destroy context: %d\n", ret);
+ return ret;
+ }
+
+ /* Set LPID to 0 anyway, just to be safe */
+ dev_dbg(&gpdev->dev, "Map LPAR opalid=%llu lparid=0\n", nphb->opal_id);
+ ret = opal_npu_map_lpar(nphb->opal_id,
+ PCI_DEVID(gpdev->bus->number, gpdev->devfn), 0 /*LPID*/,
+ 0 /* LPCR bits */);
+ if (ret)
+ dev_err(&gpdev->dev, "Error %d mapping device to LPAR\n", ret);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(pnv_npu2_unmap_lpar_dev);
diff --git a/arch/powerpc/platforms/powernv/opal-power.c b/arch/powerpc/platforms/powernv/opal-power.c
index 58dc3308237f..89ab1da57657 100644
--- a/arch/powerpc/platforms/powernv/opal-power.c
+++ b/arch/powerpc/platforms/powernv/opal-power.c
@@ -138,7 +138,7 @@ static struct notifier_block opal_power_control_nb = {
.priority = 0,
};
-static int __init opal_power_control_init(void)
+int __init opal_power_control_init(void)
{
int ret, supported = 0;
struct device_node *np;
@@ -176,4 +176,3 @@ static int __init opal_power_control_init(void)
return 0;
}
-machine_subsys_initcall(powernv, opal_power_control_init);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index beed86f4224b..79586f127521 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -877,7 +877,7 @@ static int __init opal_init(void)
consoles = of_find_node_by_path("/ibm,opal/consoles");
if (consoles) {
for_each_child_of_node(consoles, np) {
- if (strcmp(np->name, "serial"))
+ if (!of_node_name_eq(np, "serial"))
continue;
of_platform_device_create(np, NULL, NULL);
}
@@ -960,6 +960,9 @@ static int __init opal_init(void)
/* Initialise OPAL sensor groups */
opal_sensor_groups_init();
+ /* Initialise OPAL Power control interface */
+ opal_power_control_init();
+
return 0;
}
machine_subsys_initcall(powernv, opal_init);
diff --git a/arch/powerpc/platforms/powernv/pci-ioda-tce.c b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
index fe9691040f54..697449afb3f7 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda-tce.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
@@ -299,7 +299,7 @@ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
if (alloc_userspace_copy) {
offset = 0;
uas = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
- levels, tce_table_size, &offset,
+ tmplevels, tce_table_size, &offset,
&total_allocated_uas);
if (!uas)
goto free_tces_exit;
@@ -368,6 +368,7 @@ void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
found = false;
for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
if (table_group->tables[i] == tbl) {
+ iommu_tce_table_put(tbl);
table_group->tables[i] = NULL;
found = true;
break;
@@ -393,7 +394,7 @@ long pnv_pci_link_table_and_group(int node, int num,
tgl->table_group = table_group;
list_add_rcu(&tgl->next, &tbl->it_group_list);
- table_group->tables[num] = tbl;
+ table_group->tables[num] = iommu_tce_table_get(tbl);
return 0;
}
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index dd807446801e..1d6406a051f1 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -190,7 +190,8 @@ static void pnv_ioda_free_pe(struct pnv_ioda_pe *pe)
unsigned int pe_num = pe->pe_number;
WARN_ON(pe->pdev);
-
+ WARN_ON(pe->npucomp); /* NPUs are not supposed to be freed */
+ kfree(pe->npucomp);
memset(pe, 0, sizeof(struct pnv_ioda_pe));
clear_bit(pe_num, phb->ioda.pe_alloc);
}
@@ -517,8 +518,6 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb)
phb->init_m64 = pnv_ioda1_init_m64;
else
phb->init_m64 = pnv_ioda2_init_m64;
- phb->reserve_m64_pe = pnv_ioda_reserve_m64_pe;
- phb->pick_m64_pe = pnv_ioda_pick_m64_pe;
}
static void pnv_ioda_freeze_pe(struct pnv_phb *phb, int pe_no)
@@ -604,8 +603,8 @@ static int pnv_ioda_unfreeze_pe(struct pnv_phb *phb, int pe_no, int opt)
static int pnv_ioda_get_pe_state(struct pnv_phb *phb, int pe_no)
{
struct pnv_ioda_pe *slave, *pe;
- u8 fstate, state;
- __be16 pcierr;
+ u8 fstate = 0, state;
+ __be16 pcierr = 0;
s64 rc;
/* Sanity check on PE number */
@@ -663,10 +662,6 @@ static int pnv_ioda_get_pe_state(struct pnv_phb *phb, int pe_no)
return state;
}
-/* Currently those 2 are only used when MSIs are enabled, this will change
- * but in the meantime, we need to protect them to avoid warnings
- */
-#ifdef CONFIG_PCI_MSI
struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev)
{
struct pci_controller *hose = pci_bus_to_host(dev->bus);
@@ -679,7 +674,6 @@ struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev)
return NULL;
return &phb->ioda.pe_array[pdn->pe_number];
}
-#endif /* CONFIG_PCI_MSI */
static int pnv_ioda_set_one_peltv(struct pnv_phb *phb,
struct pnv_ioda_pe *parent,
@@ -1160,8 +1154,8 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
pe = &phb->ioda.pe_array[phb->ioda.root_pe_idx];
/* Check if PE is determined by M64 */
- if (!pe && phb->pick_m64_pe)
- pe = phb->pick_m64_pe(bus, all);
+ if (!pe)
+ pe = pnv_ioda_pick_m64_pe(bus, all);
/* The PE number isn't pinned by M64 */
if (!pe)
@@ -1273,19 +1267,20 @@ static void pnv_ioda_setup_npu_PEs(struct pci_bus *bus)
static void pnv_pci_ioda_setup_PEs(void)
{
- struct pci_controller *hose, *tmp;
+ struct pci_controller *hose;
struct pnv_phb *phb;
struct pci_bus *bus;
struct pci_dev *pdev;
+ struct pnv_ioda_pe *pe;
- list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+ list_for_each_entry(hose, &hose_list, list_node) {
phb = hose->private_data;
if (phb->type == PNV_PHB_NPU_NVLINK) {
/* PE#0 is needed for error reporting */
pnv_ioda_reserve_pe(phb, 0);
pnv_ioda_setup_npu_PEs(hose->bus);
if (phb->model == PNV_PHB_MODEL_NPU2)
- pnv_npu2_init(phb);
+ WARN_ON_ONCE(pnv_npu2_init(hose));
}
if (phb->type == PNV_PHB_NPU_OCAPI) {
bus = hose->bus;
@@ -1293,6 +1288,14 @@ static void pnv_pci_ioda_setup_PEs(void)
pnv_ioda_setup_dev_PE(pdev);
}
}
+ list_for_each_entry(hose, &hose_list, list_node) {
+ phb = hose->private_data;
+ if (phb->type != PNV_PHB_IODA2)
+ continue;
+
+ list_for_each_entry(pe, &phb->ioda.pe_list, list)
+ pnv_npu2_map_lpar(pe, MSR_DR | MSR_PR | MSR_HV);
+ }
}
#ifdef CONFIG_PCI_IOV
@@ -1531,6 +1534,11 @@ void pnv_pci_sriov_disable(struct pci_dev *pdev)
static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
struct pnv_ioda_pe *pe);
+#ifdef CONFIG_IOMMU_API
+static void pnv_ioda_setup_bus_iommu_group(struct pnv_ioda_pe *pe,
+ struct iommu_table_group *table_group, struct pci_bus *bus);
+
+#endif
static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
{
struct pci_bus *bus;
@@ -1584,6 +1592,9 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
mutex_unlock(&phb->ioda.pe_list_mutex);
pnv_pci_ioda2_setup_dma_pe(phb, pe);
+#ifdef CONFIG_IOMMU_API
+ pnv_ioda_setup_bus_iommu_group(pe, &pe->table_group, NULL);
+#endif
}
}
@@ -1923,21 +1934,16 @@ static u64 pnv_pci_ioda_dma_get_required_mask(struct pci_dev *pdev)
return mask;
}
-static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe,
- struct pci_bus *bus,
- bool add_to_group)
+static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
{
struct pci_dev *dev;
list_for_each_entry(dev, &bus->devices, bus_list) {
set_iommu_table_base(&dev->dev, pe->table_group.tables[0]);
set_dma_offset(&dev->dev, pe->tce_bypass_base);
- if (add_to_group)
- iommu_add_device(&dev->dev);
if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
- pnv_ioda_setup_bus_dma(pe, dev->subordinate,
- add_to_group);
+ pnv_ioda_setup_bus_dma(pe, dev->subordinate);
}
}
@@ -2366,16 +2372,8 @@ found:
pe->table_group.tce32_size = tbl->it_size << tbl->it_page_shift;
iommu_init_table(tbl, phb->hose->node);
- if (pe->flags & PNV_IODA_PE_DEV) {
- /*
- * Setting table base here only for carrying iommu_group
- * further down to let iommu_add_device() do the job.
- * pnv_pci_ioda_dma_dev_setup will override it later anyway.
- */
- set_iommu_table_base(&pe->pdev->dev, tbl);
- iommu_add_device(&pe->pdev->dev);
- } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
- pnv_ioda_setup_bus_dma(pe, pe->pbus, true);
+ if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
+ pnv_ioda_setup_bus_dma(pe, pe->pbus);
return;
fail:
@@ -2527,14 +2525,6 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe)
if (!pnv_iommu_bypass_disabled)
pnv_pci_ioda2_set_bypass(pe, true);
- /*
- * Setting table base here only for carrying iommu_group
- * further down to let iommu_add_device() do the job.
- * pnv_pci_ioda_dma_dev_setup will override it later anyway.
- */
- if (pe->flags & PNV_IODA_PE_DEV)
- set_iommu_table_base(&pe->pdev->dev, tbl);
-
return 0;
}
@@ -2565,7 +2555,7 @@ static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group,
#endif
#ifdef CONFIG_IOMMU_API
-static unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift,
+unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift,
__u64 window_size, __u32 levels)
{
unsigned long bytes = 0;
@@ -2616,7 +2606,7 @@ static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group)
pnv_pci_ioda2_set_bypass(pe, false);
pnv_pci_ioda2_unset_window(&pe->table_group, 0);
if (pe->pbus)
- pnv_ioda_setup_bus_dma(pe, pe->pbus, false);
+ pnv_ioda_setup_bus_dma(pe, pe->pbus);
iommu_tce_table_put(tbl);
}
@@ -2627,7 +2617,7 @@ static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group)
pnv_pci_ioda2_setup_default_config(pe);
if (pe->pbus)
- pnv_ioda_setup_bus_dma(pe, pe->pbus, false);
+ pnv_ioda_setup_bus_dma(pe, pe->pbus);
}
static struct iommu_table_group_ops pnv_pci_ioda2_ops = {
@@ -2639,131 +2629,100 @@ static struct iommu_table_group_ops pnv_pci_ioda2_ops = {
.release_ownership = pnv_ioda2_release_ownership,
};
-static int gpe_table_group_to_npe_cb(struct device *dev, void *opaque)
+static void pnv_ioda_setup_bus_iommu_group_add_devices(struct pnv_ioda_pe *pe,
+ struct iommu_table_group *table_group,
+ struct pci_bus *bus)
{
- struct pci_controller *hose;
- struct pnv_phb *phb;
- struct pnv_ioda_pe **ptmppe = opaque;
- struct pci_dev *pdev = container_of(dev, struct pci_dev, dev);
- struct pci_dn *pdn = pci_get_pdn(pdev);
-
- if (!pdn || pdn->pe_number == IODA_INVALID_PE)
- return 0;
-
- hose = pci_bus_to_host(pdev->bus);
- phb = hose->private_data;
- if (phb->type != PNV_PHB_NPU_NVLINK)
- return 0;
+ struct pci_dev *dev;
- *ptmppe = &phb->ioda.pe_array[pdn->pe_number];
+ list_for_each_entry(dev, &bus->devices, bus_list) {
+ iommu_add_device(table_group, &dev->dev);
- return 1;
+ if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
+ pnv_ioda_setup_bus_iommu_group_add_devices(pe,
+ table_group, dev->subordinate);
+ }
}
-/*
- * This returns PE of associated NPU.
- * This assumes that NPU is in the same IOMMU group with GPU and there is
- * no other PEs.
- */
-static struct pnv_ioda_pe *gpe_table_group_to_npe(
- struct iommu_table_group *table_group)
+static void pnv_ioda_setup_bus_iommu_group(struct pnv_ioda_pe *pe,
+ struct iommu_table_group *table_group, struct pci_bus *bus)
{
- struct pnv_ioda_pe *npe = NULL;
- int ret = iommu_group_for_each_dev(table_group->group, &npe,
- gpe_table_group_to_npe_cb);
- BUG_ON(!ret || !npe);
+ if (pe->flags & PNV_IODA_PE_DEV)
+ iommu_add_device(table_group, &pe->pdev->dev);
- return npe;
+ if ((pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) || bus)
+ pnv_ioda_setup_bus_iommu_group_add_devices(pe, table_group,
+ bus);
}
-static long pnv_pci_ioda2_npu_set_window(struct iommu_table_group *table_group,
- int num, struct iommu_table *tbl)
-{
- struct pnv_ioda_pe *npe = gpe_table_group_to_npe(table_group);
- int num2 = (num == 0) ? 1 : 0;
- long ret = pnv_pci_ioda2_set_window(table_group, num, tbl);
-
- if (ret)
- return ret;
-
- if (table_group->tables[num2])
- pnv_npu_unset_window(npe, num2);
-
- ret = pnv_npu_set_window(npe, num, tbl);
- if (ret) {
- pnv_pci_ioda2_unset_window(table_group, num);
- if (table_group->tables[num2])
- pnv_npu_set_window(npe, num2,
- table_group->tables[num2]);
- }
-
- return ret;
-}
+static unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb);
-static long pnv_pci_ioda2_npu_unset_window(
- struct iommu_table_group *table_group,
- int num)
+static void pnv_pci_ioda_setup_iommu_api(void)
{
- struct pnv_ioda_pe *npe = gpe_table_group_to_npe(table_group);
- int num2 = (num == 0) ? 1 : 0;
- long ret = pnv_pci_ioda2_unset_window(table_group, num);
-
- if (ret)
- return ret;
-
- if (!npe->table_group.tables[num])
- return 0;
-
- ret = pnv_npu_unset_window(npe, num);
- if (ret)
- return ret;
-
- if (table_group->tables[num2])
- ret = pnv_npu_set_window(npe, num2, table_group->tables[num2]);
-
- return ret;
-}
+ struct pci_controller *hose;
+ struct pnv_phb *phb;
+ struct pnv_ioda_pe *pe;
-static void pnv_ioda2_npu_take_ownership(struct iommu_table_group *table_group)
-{
/*
- * Detach NPU first as pnv_ioda2_take_ownership() will destroy
- * the iommu_table if 32bit DMA is enabled.
+ * There are 4 types of PEs:
+ * - PNV_IODA_PE_BUS: a downstream port with an adapter,
+ * created from pnv_pci_setup_bridge();
+ * - PNV_IODA_PE_BUS_ALL: a PCI-PCIX bridge with devices behind it,
+ * created from pnv_pci_setup_bridge();
+ * - PNV_IODA_PE_VF: a SRIOV virtual function,
+ * created from pnv_pcibios_sriov_enable();
+ * - PNV_IODA_PE_DEV: an NPU or OCAPI device,
+ * created from pnv_pci_ioda_fixup().
+ *
+ * Normally a PE is represented by an IOMMU group, however for
+ * devices with side channels the groups need to be more strict.
*/
- pnv_npu_take_ownership(gpe_table_group_to_npe(table_group));
- pnv_ioda2_take_ownership(table_group);
-}
+ list_for_each_entry(hose, &hose_list, list_node) {
+ phb = hose->private_data;
-static struct iommu_table_group_ops pnv_pci_ioda2_npu_ops = {
- .get_table_size = pnv_pci_ioda2_get_table_size,
- .create_table = pnv_pci_ioda2_create_table_userspace,
- .set_window = pnv_pci_ioda2_npu_set_window,
- .unset_window = pnv_pci_ioda2_npu_unset_window,
- .take_ownership = pnv_ioda2_npu_take_ownership,
- .release_ownership = pnv_ioda2_release_ownership,
-};
+ if (phb->type == PNV_PHB_NPU_NVLINK)
+ continue;
-static void pnv_pci_ioda_setup_iommu_api(void)
-{
- struct pci_controller *hose, *tmp;
- struct pnv_phb *phb;
- struct pnv_ioda_pe *pe, *gpe;
+ list_for_each_entry(pe, &phb->ioda.pe_list, list) {
+ struct iommu_table_group *table_group;
+
+ table_group = pnv_try_setup_npu_table_group(pe);
+ if (!table_group) {
+ if (!pnv_pci_ioda_pe_dma_weight(pe))
+ continue;
+
+ table_group = &pe->table_group;
+ iommu_register_group(&pe->table_group,
+ pe->phb->hose->global_number,
+ pe->pe_number);
+ }
+ pnv_ioda_setup_bus_iommu_group(pe, table_group,
+ pe->pbus);
+ }
+ }
/*
* Now we have all PHBs discovered, time to add NPU devices to
* the corresponding IOMMU groups.
*/
- list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+ list_for_each_entry(hose, &hose_list, list_node) {
+ unsigned long pgsizes;
+
phb = hose->private_data;
if (phb->type != PNV_PHB_NPU_NVLINK)
continue;
+ pgsizes = pnv_ioda_parse_tce_sizes(phb);
list_for_each_entry(pe, &phb->ioda.pe_list, list) {
- gpe = pnv_pci_npu_setup_iommu(pe);
- if (gpe)
- gpe->table_group.ops = &pnv_pci_ioda2_npu_ops;
+ /*
+ * IODA2 bridges get this set up from
+ * pci_controller_ops::setup_bridge but NPU bridges
+ * do not have this hook defined so we do it here.
+ */
+ pe->table_group.pgsizes = pgsizes;
+ pnv_npu_compound_attach(pe);
}
}
}
@@ -2810,9 +2769,6 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
/* TVE #1 is selected by PCI address bit 59 */
pe->tce_bypass_base = 1ull << 59;
- iommu_register_group(&pe->table_group, phb->hose->global_number,
- pe->pe_number);
-
/* The PE will reserve all possible 32-bits space */
pe_info(pe, "Setting up 32-bit TCE table at 0..%08x\n",
phb->ioda.m32_pci_base);
@@ -2833,10 +2789,9 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
return;
if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
- pnv_ioda_setup_bus_dma(pe, pe->pbus, true);
+ pnv_ioda_setup_bus_dma(pe, pe->pbus);
}
-#ifdef CONFIG_PCI_MSI
int64_t pnv_opal_pci_msi_eoi(struct irq_chip *chip, unsigned int hw_irq)
{
struct pnv_phb *phb = container_of(chip, struct pnv_phb,
@@ -2982,9 +2937,6 @@ static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
pr_info(" Allocated bitmap for %d MSIs (base IRQ 0x%x)\n",
count, phb->msi_base);
}
-#else
-static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) { }
-#endif /* CONFIG_PCI_MSI */
#ifdef CONFIG_PCI_IOV
static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
@@ -3402,8 +3354,7 @@ static void pnv_pci_setup_bridge(struct pci_bus *bus, unsigned long type)
return;
/* Reserve PEs according to used M64 resources */
- if (phb->reserve_m64_pe)
- phb->reserve_m64_pe(bus, NULL, all);
+ pnv_ioda_reserve_m64_pe(bus, NULL, all);
/*
* Assign PE. We might run here because of partial hotplug.
@@ -3687,6 +3638,15 @@ static void pnv_pci_release_device(struct pci_dev *pdev)
pnv_ioda_release_pe(pe);
}
+static void pnv_npu_disable_device(struct pci_dev *pdev)
+{
+ struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
+ struct eeh_pe *eehpe = edev ? edev->pe : NULL;
+
+ if (eehpe && eeh_ops && eeh_ops->reset)
+ eeh_ops->reset(eehpe, EEH_RESET_HOT);
+}
+
static void pnv_pci_ioda_shutdown(struct pci_controller *hose)
{
struct pnv_phb *phb = hose->private_data;
@@ -3698,10 +3658,8 @@ static void pnv_pci_ioda_shutdown(struct pci_controller *hose)
static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
.dma_dev_setup = pnv_pci_dma_dev_setup,
.dma_bus_setup = pnv_pci_dma_bus_setup,
-#ifdef CONFIG_PCI_MSI
.setup_msi_irqs = pnv_setup_msi_irqs,
.teardown_msi_irqs = pnv_teardown_msi_irqs,
-#endif
.enable_device_hook = pnv_pci_enable_device_hook,
.release_device = pnv_pci_release_device,
.window_alignment = pnv_pci_window_alignment,
@@ -3722,15 +3680,14 @@ static int pnv_npu_dma_set_mask(struct pci_dev *npdev, u64 dma_mask)
static const struct pci_controller_ops pnv_npu_ioda_controller_ops = {
.dma_dev_setup = pnv_pci_dma_dev_setup,
-#ifdef CONFIG_PCI_MSI
.setup_msi_irqs = pnv_setup_msi_irqs,
.teardown_msi_irqs = pnv_teardown_msi_irqs,
-#endif
.enable_device_hook = pnv_pci_enable_device_hook,
.window_alignment = pnv_pci_window_alignment,
.reset_secondary_bus = pnv_pci_reset_secondary_bus,
.dma_set_mask = pnv_npu_dma_set_mask,
.shutdown = pnv_pci_ioda_shutdown,
+ .disable_device = pnv_npu_disable_device,
};
static const struct pci_controller_ops pnv_npu_ocapi_ioda_controller_ops = {
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 13aef2323bbc..45fb70b4bfa7 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -160,7 +160,6 @@ exit:
}
EXPORT_SYMBOL_GPL(pnv_pci_set_power_state);
-#ifdef CONFIG_PCI_MSI
int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
{
struct pci_controller *hose = pci_bus_to_host(pdev->bus);
@@ -229,7 +228,6 @@ void pnv_teardown_msi_irqs(struct pci_dev *pdev)
msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq - phb->msi_base, 1);
}
}
-#endif /* CONFIG_PCI_MSI */
/* Nicely print the contents of the PE State Tables (PEST). */
static void pnv_pci_dump_pest(__be64 pestA[], __be64 pestB[], int pest_size)
@@ -602,8 +600,8 @@ static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no)
static void pnv_pci_config_check_eeh(struct pci_dn *pdn)
{
struct pnv_phb *phb = pdn->phb->private_data;
- u8 fstate;
- __be16 pcierr;
+ u8 fstate = 0;
+ __be16 pcierr = 0;
unsigned int pe_no;
s64 rc;
@@ -1127,4 +1125,45 @@ void __init pnv_pci_init(void)
set_pci_dma_ops(&dma_iommu_ops);
}
-machine_subsys_initcall_sync(powernv, tce_iommu_bus_notifier_init);
+static int pnv_tce_iommu_bus_notifier(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct device *dev = data;
+ struct pci_dev *pdev;
+ struct pci_dn *pdn;
+ struct pnv_ioda_pe *pe;
+ struct pci_controller *hose;
+ struct pnv_phb *phb;
+
+ switch (action) {
+ case BUS_NOTIFY_ADD_DEVICE:
+ pdev = to_pci_dev(dev);
+ pdn = pci_get_pdn(pdev);
+ hose = pci_bus_to_host(pdev->bus);
+ phb = hose->private_data;
+
+ WARN_ON_ONCE(!phb);
+ if (!pdn || pdn->pe_number == IODA_INVALID_PE || !phb)
+ return 0;
+
+ pe = &phb->ioda.pe_array[pdn->pe_number];
+ iommu_add_device(&pe->table_group, dev);
+ return 0;
+ case BUS_NOTIFY_DEL_DEVICE:
+ iommu_del_device(dev);
+ return 0;
+ default:
+ return 0;
+ }
+}
+
+static struct notifier_block pnv_tce_iommu_bus_nb = {
+ .notifier_call = pnv_tce_iommu_bus_notifier,
+};
+
+static int __init pnv_tce_iommu_bus_notifier_init(void)
+{
+ bus_register_notifier(&pci_bus_type, &pnv_tce_iommu_bus_nb);
+ return 0;
+}
+machine_subsys_initcall_sync(powernv, pnv_tce_iommu_bus_notifier_init);
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 8b37b28e3831..8e36da379252 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -8,9 +8,6 @@
struct pci_dn;
-/* Maximum possible number of ATSD MMIO registers per NPU */
-#define NV_NMMU_ATSD_REGS 8
-
enum pnv_phb_type {
PNV_PHB_IODA1 = 0,
PNV_PHB_IODA2 = 1,
@@ -65,6 +62,7 @@ struct pnv_ioda_pe {
/* "Base" iommu table, ie, 4K TCEs, 32-bit DMA */
struct iommu_table_group table_group;
+ struct npu_comp *npucomp;
/* 64-bit TCE bypass region */
bool tce_bypass_enabled;
@@ -106,20 +104,14 @@ struct pnv_phb {
struct dentry *dbgfs;
#endif
-#ifdef CONFIG_PCI_MSI
unsigned int msi_base;
unsigned int msi32_support;
struct msi_bitmap msi_bmp;
-#endif
int (*msi_setup)(struct pnv_phb *phb, struct pci_dev *dev,
unsigned int hwirq, unsigned int virq,
unsigned int is_64, struct msi_msg *msg);
void (*dma_dev_setup)(struct pnv_phb *phb, struct pci_dev *pdev);
- void (*fixup_phb)(struct pci_controller *hose);
int (*init_m64)(struct pnv_phb *phb);
- void (*reserve_m64_pe)(struct pci_bus *bus,
- unsigned long *pe_bitmap, bool all);
- struct pnv_ioda_pe *(*pick_m64_pe)(struct pci_bus *bus, bool all);
int (*get_pe_state)(struct pnv_phb *phb, int pe_no);
void (*freeze_pe)(struct pnv_phb *phb, int pe_no);
int (*unfreeze_pe)(struct pnv_phb *phb, int pe_no, int opt);
@@ -180,19 +172,6 @@ struct pnv_phb {
unsigned int diag_data_size;
u8 *diag_data;
- /* Nvlink2 data */
- struct npu {
- int index;
- __be64 *mmio_atsd_regs[NV_NMMU_ATSD_REGS];
- unsigned int mmio_atsd_count;
-
- /* Bitmask for MMIO register usage */
- unsigned long mmio_atsd_usage;
-
- /* Do we need to explicitly flush the nest mmu? */
- bool nmmu_flush;
- } npu;
-
int p2p_target_count;
};
@@ -210,6 +189,7 @@ extern void pnv_pci_init_ioda_hub(struct device_node *np);
extern void pnv_pci_init_ioda2_phb(struct device_node *np);
extern void pnv_pci_init_npu_phb(struct device_node *np);
extern void pnv_pci_init_npu2_opencapi_phb(struct device_node *np);
+extern void pnv_npu2_map_lpar(struct pnv_ioda_pe *gpe, unsigned long msr);
extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev);
extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option);
@@ -220,6 +200,8 @@ extern void pnv_teardown_msi_irqs(struct pci_dev *pdev);
extern struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev);
extern void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq);
extern void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable);
+extern unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift,
+ __u64 window_size, __u32 levels);
extern int pnv_eeh_post_init(void);
extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
@@ -235,12 +217,10 @@ extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
extern void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass);
extern void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm);
extern struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe);
-extern long pnv_npu_set_window(struct pnv_ioda_pe *npe, int num,
- struct iommu_table *tbl);
-extern long pnv_npu_unset_window(struct pnv_ioda_pe *npe, int num);
-extern void pnv_npu_take_ownership(struct pnv_ioda_pe *npe);
-extern void pnv_npu_release_ownership(struct pnv_ioda_pe *npe);
-extern int pnv_npu2_init(struct pnv_phb *phb);
+extern struct iommu_table_group *pnv_try_setup_npu_table_group(
+ struct pnv_ioda_pe *pe);
+extern struct iommu_table_group *pnv_npu_compound_attach(
+ struct pnv_ioda_pe *pe);
/* pci-ioda-tce.c */
#define POWERNV_IOMMU_DEFAULT_LEVELS 1
diff --git a/arch/powerpc/platforms/powernv/vas-debug.c b/arch/powerpc/platforms/powernv/vas-debug.c
index 4f7276ebdf9c..4d3929fbc08f 100644
--- a/arch/powerpc/platforms/powernv/vas-debug.c
+++ b/arch/powerpc/platforms/powernv/vas-debug.c
@@ -30,7 +30,7 @@ static char *cop_to_str(int cop)
}
}
-static int info_dbg_show(struct seq_file *s, void *private)
+static int info_show(struct seq_file *s, void *private)
{
struct vas_window *window = s->private;
@@ -49,17 +49,7 @@ unlock:
return 0;
}
-static int info_dbg_open(struct inode *inode, struct file *file)
-{
- return single_open(file, info_dbg_show, inode->i_private);
-}
-
-static const struct file_operations info_fops = {
- .open = info_dbg_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(info);
static inline void print_reg(struct seq_file *s, struct vas_window *win,
char *name, u32 reg)
@@ -67,7 +57,7 @@ static inline void print_reg(struct seq_file *s, struct vas_window *win,
seq_printf(s, "0x%016llx %s\n", read_hvwc_reg(win, name, reg), name);
}
-static int hvwc_dbg_show(struct seq_file *s, void *private)
+static int hvwc_show(struct seq_file *s, void *private)
{
struct vas_window *window = s->private;
@@ -115,17 +105,7 @@ unlock:
return 0;
}
-static int hvwc_dbg_open(struct inode *inode, struct file *file)
-{
- return single_open(file, hvwc_dbg_show, inode->i_private);
-}
-
-static const struct file_operations hvwc_fops = {
- .open = hvwc_dbg_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(hvwc);
void vas_window_free_dbgdir(struct vas_window *window)
{