aboutsummaryrefslogtreecommitdiff
path: root/arch/powerpc/platforms
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/platforms')
-rw-r--r--arch/powerpc/platforms/cell/spufs/sched.c2
-rw-r--r--arch/powerpc/platforms/powernv/memtrace.c17
-rw-r--r--arch/powerpc/platforms/powernv/npu-dma.c88
-rw-r--r--arch/powerpc/platforms/powernv/opal-nvram.c14
-rw-r--r--arch/powerpc/platforms/powernv/opal-rtc.c8
-rw-r--r--arch/powerpc/platforms/powernv/setup.c1
-rw-r--r--arch/powerpc/platforms/pseries/setup.c1
7 files changed, 93 insertions, 38 deletions
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 9033c8194eda..ccc421503363 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -1093,7 +1093,7 @@ static int show_spu_loadavg(struct seq_file *s, void *private)
LOAD_INT(c), LOAD_FRAC(c),
count_active_contexts(),
atomic_read(&nr_spu_contexts),
- idr_get_cursor(&task_active_pid_ns(current)->idr));
+ idr_get_cursor(&task_active_pid_ns(current)->idr) - 1);
return 0;
}
diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c
index de470caf0784..fc222a0c2ac4 100644
--- a/arch/powerpc/platforms/powernv/memtrace.c
+++ b/arch/powerpc/platforms/powernv/memtrace.c
@@ -82,19 +82,6 @@ static const struct file_operations memtrace_fops = {
.open = simple_open,
};
-static void flush_memory_region(u64 base, u64 size)
-{
- unsigned long line_size = ppc64_caches.l1d.size;
- u64 end = base + size;
- u64 addr;
-
- base = round_down(base, line_size);
- end = round_up(end, line_size);
-
- for (addr = base; addr < end; addr += line_size)
- asm volatile("dcbf 0,%0" : "=r" (addr) :: "memory");
-}
-
static int check_memblock_online(struct memory_block *mem, void *arg)
{
if (mem->state != MEM_ONLINE)
@@ -132,10 +119,6 @@ static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages)
walk_memory_range(start_pfn, end_pfn, (void *)MEM_OFFLINE,
change_memblock_state);
- /* RCU grace period? */
- flush_memory_region((u64)__va(start_pfn << PAGE_SHIFT),
- nr_pages << PAGE_SHIFT);
-
lock_device_hotplug();
remove_memory(nid, start_pfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT);
unlock_device_hotplug();
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index 69a4f9e8bd55..525e966dce34 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -34,6 +34,19 @@
#define npu_to_phb(x) container_of(x, struct pnv_phb, npu)
/*
+ * spinlock to protect initialisation of an npu_context for a particular
+ * mm_struct.
+ */
+static DEFINE_SPINLOCK(npu_context_lock);
+
+/*
+ * When an address shootdown range exceeds this threshold we invalidate the
+ * entire TLB on the GPU for the given PID rather than each specific address in
+ * the range.
+ */
+#define ATSD_THRESHOLD (2*1024*1024)
+
+/*
* Other types of TCE cache invalidation are not functional in the
* hardware.
*/
@@ -401,7 +414,7 @@ struct npu_context {
bool nmmu_flush;
/* Callback to stop translation requests on a given GPU */
- struct npu_context *(*release_cb)(struct npu_context *, void *);
+ void (*release_cb)(struct npu_context *context, void *priv);
/*
* Private pointer passed to the above callback for usage by
@@ -671,11 +684,19 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
struct npu_context *npu_context = mn_to_npu_context(mn);
unsigned long address;
- for (address = start; address < end; address += PAGE_SIZE)
- mmio_invalidate(npu_context, 1, address, false);
+ if (end - start > ATSD_THRESHOLD) {
+ /*
+ * Just invalidate the entire PID if the address range is too
+ * large.
+ */
+ mmio_invalidate(npu_context, 0, 0, true);
+ } else {
+ for (address = start; address < end; address += PAGE_SIZE)
+ mmio_invalidate(npu_context, 1, address, false);
- /* Do the flush only on the final addess == end */
- mmio_invalidate(npu_context, 1, address, true);
+ /* Do the flush only on the final addess == end */
+ mmio_invalidate(npu_context, 1, address, true);
+ }
}
static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
@@ -696,11 +717,12 @@ static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
* Returns an error if there no contexts are currently available or a
* npu_context which should be passed to pnv_npu2_handle_fault().
*
- * mmap_sem must be held in write mode.
+ * mmap_sem must be held in write mode and must not be called from interrupt
+ * context.
*/
struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
unsigned long flags,
- struct npu_context *(*cb)(struct npu_context *, void *),
+ void (*cb)(struct npu_context *, void *),
void *priv)
{
int rc;
@@ -743,7 +765,9 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
/*
* Setup the NPU context table for a particular GPU. These need to be
* per-GPU as we need the tables to filter ATSDs when there are no
- * active contexts on a particular GPU.
+ * active contexts on a particular GPU. It is safe for these to be
+ * called concurrently with destroy as the OPAL call takes appropriate
+ * locks and refcounts on init/destroy.
*/
rc = opal_npu_init_context(nphb->opal_id, mm->context.id, flags,
PCI_DEVID(gpdev->bus->number, gpdev->devfn));
@@ -754,8 +778,29 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
* We store the npu pci device so we can more easily get at the
* associated npus.
*/
+ spin_lock(&npu_context_lock);
npu_context = mm->context.npu_context;
+ if (npu_context) {
+ if (npu_context->release_cb != cb ||
+ npu_context->priv != priv) {
+ spin_unlock(&npu_context_lock);
+ opal_npu_destroy_context(nphb->opal_id, mm->context.id,
+ PCI_DEVID(gpdev->bus->number,
+ gpdev->devfn));
+ return ERR_PTR(-EINVAL);
+ }
+
+ WARN_ON(!kref_get_unless_zero(&npu_context->kref));
+ }
+ spin_unlock(&npu_context_lock);
+
if (!npu_context) {
+ /*
+ * We can set up these fields without holding the
+ * npu_context_lock as the npu_context hasn't been returned to
+ * the caller meaning it can't be destroyed. Parallel allocation
+ * is protected against by mmap_sem.
+ */
rc = -ENOMEM;
npu_context = kzalloc(sizeof(struct npu_context), GFP_KERNEL);
if (npu_context) {
@@ -774,8 +819,6 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
}
mm->context.npu_context = npu_context;
- } else {
- WARN_ON(!kref_get_unless_zero(&npu_context->kref));
}
npu_context->release_cb = cb;
@@ -814,15 +857,16 @@ static void pnv_npu2_release_context(struct kref *kref)
mm_context_remove_copro(npu_context->mm);
npu_context->mm->context.npu_context = NULL;
- mmu_notifier_unregister(&npu_context->mn,
- npu_context->mm);
-
- kfree(npu_context);
}
+/*
+ * Destroy a context on the given GPU. May free the npu_context if it is no
+ * longer active on any GPUs. Must not be called from interrupt context.
+ */
void pnv_npu2_destroy_context(struct npu_context *npu_context,
struct pci_dev *gpdev)
{
+ int removed;
struct pnv_phb *nphb;
struct npu *npu;
struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
@@ -844,7 +888,21 @@ void pnv_npu2_destroy_context(struct npu_context *npu_context,
WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], NULL);
opal_npu_destroy_context(nphb->opal_id, npu_context->mm->context.id,
PCI_DEVID(gpdev->bus->number, gpdev->devfn));
- kref_put(&npu_context->kref, pnv_npu2_release_context);
+ spin_lock(&npu_context_lock);
+ removed = kref_put(&npu_context->kref, pnv_npu2_release_context);
+ spin_unlock(&npu_context_lock);
+
+ /*
+ * We need to do this outside of pnv_npu2_release_context so that it is
+ * outside the spinlock as mmu_notifier_destroy uses SRCU.
+ */
+ if (removed) {
+ mmu_notifier_unregister(&npu_context->mn,
+ npu_context->mm);
+
+ kfree(npu_context);
+ }
+
}
EXPORT_SYMBOL(pnv_npu2_destroy_context);
diff --git a/arch/powerpc/platforms/powernv/opal-nvram.c b/arch/powerpc/platforms/powernv/opal-nvram.c
index 1bceb95f422d..5584247f5029 100644
--- a/arch/powerpc/platforms/powernv/opal-nvram.c
+++ b/arch/powerpc/platforms/powernv/opal-nvram.c
@@ -44,6 +44,10 @@ static ssize_t opal_nvram_read(char *buf, size_t count, loff_t *index)
return count;
}
+/*
+ * This can be called in the panic path with interrupts off, so use
+ * mdelay in that case.
+ */
static ssize_t opal_nvram_write(char *buf, size_t count, loff_t *index)
{
s64 rc = OPAL_BUSY;
@@ -58,10 +62,16 @@ static ssize_t opal_nvram_write(char *buf, size_t count, loff_t *index)
while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
rc = opal_write_nvram(__pa(buf), count, off);
if (rc == OPAL_BUSY_EVENT) {
- msleep(OPAL_BUSY_DELAY_MS);
+ if (in_interrupt() || irqs_disabled())
+ mdelay(OPAL_BUSY_DELAY_MS);
+ else
+ msleep(OPAL_BUSY_DELAY_MS);
opal_poll_events(NULL);
} else if (rc == OPAL_BUSY) {
- msleep(OPAL_BUSY_DELAY_MS);
+ if (in_interrupt() || irqs_disabled())
+ mdelay(OPAL_BUSY_DELAY_MS);
+ else
+ msleep(OPAL_BUSY_DELAY_MS);
}
}
diff --git a/arch/powerpc/platforms/powernv/opal-rtc.c b/arch/powerpc/platforms/powernv/opal-rtc.c
index f8868864f373..aa2a5139462e 100644
--- a/arch/powerpc/platforms/powernv/opal-rtc.c
+++ b/arch/powerpc/platforms/powernv/opal-rtc.c
@@ -48,10 +48,12 @@ unsigned long __init opal_get_boot_time(void)
while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms);
- if (rc == OPAL_BUSY_EVENT)
+ if (rc == OPAL_BUSY_EVENT) {
+ mdelay(OPAL_BUSY_DELAY_MS);
opal_poll_events(NULL);
- else if (rc == OPAL_BUSY)
- mdelay(10);
+ } else if (rc == OPAL_BUSY) {
+ mdelay(OPAL_BUSY_DELAY_MS);
+ }
}
if (rc != OPAL_SUCCESS)
return 0;
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index ef8c9ce53a61..a6648ec99ca7 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -131,6 +131,7 @@ static void __init pnv_setup_arch(void)
set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
pnv_setup_rfi_flush();
+ setup_stf_barrier();
/* Initialize SMP */
pnv_smp_init();
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index b55ad4286dc7..fdb32e056ef4 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -710,6 +710,7 @@ static void __init pSeries_setup_arch(void)
fwnmi_init();
pseries_setup_rfi_flush();
+ setup_stf_barrier();
/* By default, only probe PCI (can be overridden by rtas_pci) */
pci_add_flags(PCI_PROBE_ONLY);