aboutsummaryrefslogtreecommitdiff
path: root/arch/powerpc/platforms/powernv
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/platforms/powernv')
-rw-r--r--arch/powerpc/platforms/powernv/Kconfig22
-rw-r--r--arch/powerpc/platforms/powernv/Makefile5
-rw-r--r--arch/powerpc/platforms/powernv/copy-paste.h46
-rw-r--r--arch/powerpc/platforms/powernv/eeh-powernv.c29
-rw-r--r--arch/powerpc/platforms/powernv/idle.c34
-rw-r--r--arch/powerpc/platforms/powernv/memtrace.c282
-rw-r--r--arch/powerpc/platforms/powernv/npu-dma.c22
-rw-r--r--arch/powerpc/platforms/powernv/opal-async.c4
-rw-r--r--arch/powerpc/platforms/powernv/opal-flash.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-hmi.c22
-rw-r--r--arch/powerpc/platforms/powernv/opal-imc.c226
-rw-r--r--arch/powerpc/platforms/powernv/opal-powercap.c244
-rw-r--r--arch/powerpc/platforms/powernv/opal-prd.c13
-rw-r--r--arch/powerpc/platforms/powernv/opal-psr.c175
-rw-r--r--arch/powerpc/platforms/powernv/opal-sensor-groups.c212
-rw-r--r--arch/powerpc/platforms/powernv/opal-wrappers.S11
-rw-r--r--arch/powerpc/platforms/powernv/opal-xscom.c8
-rw-r--r--arch/powerpc/platforms/powernv/opal.c152
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c18
-rw-r--r--arch/powerpc/platforms/powernv/pci.c75
-rw-r--r--arch/powerpc/platforms/powernv/pci.h5
-rw-r--r--arch/powerpc/platforms/powernv/powernv.h2
-rw-r--r--arch/powerpc/platforms/powernv/rng.c45
-rw-r--r--arch/powerpc/platforms/powernv/smp.c11
-rw-r--r--arch/powerpc/platforms/powernv/vas-window.c1134
-rw-r--r--arch/powerpc/platforms/powernv/vas.c151
-rw-r--r--arch/powerpc/platforms/powernv/vas.h467
27 files changed, 3271 insertions, 146 deletions
diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig
index 6a6f4ef46b9e..340cbe263b33 100644
--- a/arch/powerpc/platforms/powernv/Kconfig
+++ b/arch/powerpc/platforms/powernv/Kconfig
@@ -30,3 +30,25 @@ config OPAL_PRD
help
This enables the opal-prd driver, a facility to run processor
recovery diagnostics on OpenPower machines
+
+config PPC_MEMTRACE
+ bool "Enable removal of RAM from kernel mappings for tracing"
+ depends on PPC_POWERNV && MEMORY_HOTREMOVE
+ default n
+ help
+ Enabling this option allows for the removal of memory (RAM)
+ from the kernel mappings to be used for hardware tracing.
+
+config PPC_VAS
+ bool "IBM Virtual Accelerator Switchboard (VAS)"
+ depends on PPC_POWERNV && PPC_64K_PAGES
+ default y
+ help
+ This enables support for IBM Virtual Accelerator Switchboard (VAS).
+
+ VAS allows accelerators in co-processors like NX-GZIP and NX-842
+ to be accessible to kernel subsystems and user processes.
+
+ VAS adapters are found in POWER9 based systems.
+
+ If unsure, say N.
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index b5d98cb3f482..37d60f7dd86d 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -2,7 +2,7 @@ obj-y += setup.o opal-wrappers.o opal.o opal-async.o idle.o
obj-y += opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o
obj-y += rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o
obj-y += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o
-obj-y += opal-kmsg.o
+obj-y += opal-kmsg.o opal-powercap.o opal-psr.o opal-sensor-groups.o
obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o
obj-$(CONFIG_PCI) += pci.o pci-ioda.o npu-dma.o
@@ -12,3 +12,6 @@ obj-$(CONFIG_PPC_SCOM) += opal-xscom.o
obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o
obj-$(CONFIG_TRACEPOINTS) += opal-tracepoints.o
obj-$(CONFIG_OPAL_PRD) += opal-prd.o
+obj-$(CONFIG_PERF_EVENTS) += opal-imc.o
+obj-$(CONFIG_PPC_MEMTRACE) += memtrace.o
+obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o
diff --git a/arch/powerpc/platforms/powernv/copy-paste.h b/arch/powerpc/platforms/powernv/copy-paste.h
new file mode 100644
index 000000000000..c9a503623431
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/copy-paste.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2016-17 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <asm/ppc-opcode.h>
+
+#define CR0_SHIFT 28
+#define CR0_MASK 0xF
+/*
+ * Copy/paste instructions:
+ *
+ * copy RA,RB
+ * Copy contents of address (RA) + effective_address(RB)
+ * to internal copy-buffer.
+ *
+ * paste RA,RB
+ * Paste contents of internal copy-buffer to the address
+ * (RA) + effective_address(RB)
+ */
+static inline int vas_copy(void *crb, int offset)
+{
+ asm volatile(PPC_COPY(%0, %1)";"
+ :
+ : "b" (offset), "b" (crb)
+ : "memory");
+
+ return 0;
+}
+
+static inline int vas_paste(void *paste_address, int offset)
+{
+ u32 cr;
+
+ cr = 0;
+ asm volatile(PPC_PASTE(%1, %2)";"
+ "mfocrf %0, 0x80;"
+ : "=r" (cr)
+ : "b" (offset), "b" (paste_address)
+ : "memory", "cr0");
+
+ return (cr >> CR0_SHIFT) & CR0_MASK;
+}
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 3f48f6df1cf3..8864065eba22 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -113,7 +113,6 @@ static ssize_t pnv_eeh_ei_write(struct file *filp,
size_t count, loff_t *ppos)
{
struct pci_controller *hose = filp->private_data;
- struct eeh_dev *edev;
struct eeh_pe *pe;
int pe_no, type, func;
unsigned long addr, mask;
@@ -135,13 +134,7 @@ static ssize_t pnv_eeh_ei_write(struct file *filp,
return -EINVAL;
/* Retrieve PE */
- edev = kzalloc(sizeof(*edev), GFP_KERNEL);
- if (!edev)
- return -ENOMEM;
- edev->phb = hose;
- edev->pe_config_addr = pe_no;
- pe = eeh_pe_get(edev);
- kfree(edev);
+ pe = eeh_pe_get(hose, pe_no, 0);
if (!pe)
return -ENODEV;
@@ -359,6 +352,7 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
uint32_t pcie_flags;
int ret;
+ int config_addr = (pdn->busno << 8) | (pdn->devfn);
/*
* When probing the root bridge, which doesn't have any
@@ -393,8 +387,7 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
}
}
- edev->config_addr = (pdn->busno << 8) | (pdn->devfn);
- edev->pe_config_addr = phb->ioda.pe_rmap[edev->config_addr];
+ edev->pe_config_addr = phb->ioda.pe_rmap[config_addr];
/* Create PE */
ret = eeh_add_to_parent_pe(edev);
@@ -933,7 +926,6 @@ void pnv_pci_reset_secondary_bus(struct pci_dev *dev)
static void pnv_eeh_wait_for_pending(struct pci_dn *pdn, const char *type,
int pos, u16 mask)
{
- struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
int i, status = 0;
/* Wait for Transaction Pending bit to be cleared */
@@ -947,7 +939,7 @@ static void pnv_eeh_wait_for_pending(struct pci_dn *pdn, const char *type,
pr_warn("%s: Pending transaction while issuing %sFLR to %04x:%02x:%02x.%01x\n",
__func__, type,
- edev->phb->global_number, pdn->busno,
+ pdn->phb->global_number, pdn->busno,
PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
}
@@ -1381,7 +1373,6 @@ static int pnv_eeh_get_pe(struct pci_controller *hose,
struct pnv_phb *phb = hose->private_data;
struct pnv_ioda_pe *pnv_pe;
struct eeh_pe *dev_pe;
- struct eeh_dev edev;
/*
* If PHB supports compound PE, to fetch
@@ -1397,10 +1388,7 @@ static int pnv_eeh_get_pe(struct pci_controller *hose,
}
/* Find the PE according to PE# */
- memset(&edev, 0, sizeof(struct eeh_dev));
- edev.phb = hose;
- edev.pe_config_addr = pe_no;
- dev_pe = eeh_pe_get(&edev);
+ dev_pe = eeh_pe_get(hose, pe_no, 0);
if (!dev_pe)
return -EEXIST;
@@ -1711,6 +1699,7 @@ static int pnv_eeh_restore_config(struct pci_dn *pdn)
struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
struct pnv_phb *phb;
s64 ret;
+ int config_addr = (pdn->busno << 8) | (pdn->devfn);
if (!edev)
return -EEXIST;
@@ -1725,14 +1714,14 @@ static int pnv_eeh_restore_config(struct pci_dn *pdn)
if (edev->physfn) {
ret = pnv_eeh_restore_vf_config(pdn);
} else {
- phb = edev->phb->private_data;
+ phb = pdn->phb->private_data;
ret = opal_pci_reinit(phb->opal_id,
- OPAL_REINIT_PCI_DEV, edev->config_addr);
+ OPAL_REINIT_PCI_DEV, config_addr);
}
if (ret) {
pr_warn("%s: Can't reinit PCI dev 0x%x (%lld)\n",
- __func__, edev->config_addr, ret);
+ __func__, config_addr, ret);
return -EIO;
}
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index a553aeea7af6..9f59041a172b 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -69,7 +69,7 @@ static int pnv_save_sprs_for_deep_states(void)
* all cpus at boot. Get these reg values of current cpu and use the
* same across all cpus.
*/
- uint64_t lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1;
+ uint64_t lpcr_val = mfspr(SPRN_LPCR);
uint64_t hid0_val = mfspr(SPRN_HID0);
uint64_t hid1_val = mfspr(SPRN_HID1);
uint64_t hid4_val = mfspr(SPRN_HID4);
@@ -388,6 +388,14 @@ void power9_idle(void)
}
#ifdef CONFIG_HOTPLUG_CPU
+static void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val)
+{
+ u64 pir = get_hard_smp_processor_id(cpu);
+
+ mtspr(SPRN_LPCR, lpcr_val);
+ opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
+}
+
/*
* pnv_cpu_offline: A function that puts the CPU into the deepest
* available platform idle state on a CPU-Offline.
@@ -397,6 +405,20 @@ unsigned long pnv_cpu_offline(unsigned int cpu)
{
unsigned long srr1;
u32 idle_states = pnv_get_supported_cpuidle_states();
+ u64 lpcr_val;
+
+ /*
+ * We don't want to take decrementer interrupts while we are
+ * offline, so clear LPCR:PECE1. We keep PECE2 (and
+ * LPCR_PECE_HVEE on P9) enabled as to let IPIs in.
+ *
+ * If the CPU gets woken up by a special wakeup, ensure that
+ * the SLW engine sets LPCR with decrementer bit cleared, else
+ * the CPU will come back to the kernel due to a spurious
+ * wakeup.
+ */
+ lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1;
+ pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val);
__ppc64_runlatch_off();
@@ -428,6 +450,16 @@ unsigned long pnv_cpu_offline(unsigned int cpu)
__ppc64_runlatch_on();
+ /*
+ * Re-enable decrementer interrupts in LPCR.
+ *
+ * Further, we want stop states to be woken up by decrementer
+ * for non-hotplug cases. So program the LPCR via stop api as
+ * well.
+ */
+ lpcr_val = mfspr(SPRN_LPCR) | (u64)LPCR_PECE1;
+ pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val);
+
return srr1;
}
#endif
diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c
new file mode 100644
index 000000000000..de470caf0784
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/memtrace.c
@@ -0,0 +1,282 @@
+/*
+ * Copyright (C) IBM Corporation, 2014, 2017
+ * Anton Blanchard, Rashmica Gupta.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) "memtrace: " fmt
+
+#include <linux/bitops.h>
+#include <linux/string.h>
+#include <linux/memblock.h>
+#include <linux/init.h>
+#include <linux/moduleparam.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <linux/slab.h>
+#include <linux/memory.h>
+#include <linux/memory_hotplug.h>
+#include <asm/machdep.h>
+#include <asm/debugfs.h>
+
+/* This enables us to keep track of the memory removed from each node. */
+struct memtrace_entry {
+ void *mem;
+ u64 start;
+ u64 size;
+ u32 nid;
+ struct dentry *dir;
+ char name[16];
+};
+
+static u64 memtrace_size;
+
+static struct memtrace_entry *memtrace_array;
+static unsigned int memtrace_array_nr;
+
+
+static ssize_t memtrace_read(struct file *filp, char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ struct memtrace_entry *ent = filp->private_data;
+
+ return simple_read_from_buffer(ubuf, count, ppos, ent->mem, ent->size);
+}
+
+static bool valid_memtrace_range(struct memtrace_entry *dev,
+ unsigned long start, unsigned long size)
+{
+ if ((start >= dev->start) &&
+ ((start + size) <= (dev->start + dev->size)))
+ return true;
+
+ return false;
+}
+
+static int memtrace_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+ unsigned long size = vma->vm_end - vma->vm_start;
+ struct memtrace_entry *dev = filp->private_data;
+
+ if (!valid_memtrace_range(dev, vma->vm_pgoff << PAGE_SHIFT, size))
+ return -EINVAL;
+
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ if (remap_pfn_range(vma, vma->vm_start,
+ vma->vm_pgoff + (dev->start >> PAGE_SHIFT),
+ size, vma->vm_page_prot))
+ return -EAGAIN;
+
+ return 0;
+}
+
+static const struct file_operations memtrace_fops = {
+ .llseek = default_llseek,
+ .read = memtrace_read,
+ .mmap = memtrace_mmap,
+ .open = simple_open,
+};
+
+static void flush_memory_region(u64 base, u64 size)
+{
+ unsigned long line_size = ppc64_caches.l1d.size;
+ u64 end = base + size;
+ u64 addr;
+
+ base = round_down(base, line_size);
+ end = round_up(end, line_size);
+
+ for (addr = base; addr < end; addr += line_size)
+ asm volatile("dcbf 0,%0" : "=r" (addr) :: "memory");
+}
+
+static int check_memblock_online(struct memory_block *mem, void *arg)
+{
+ if (mem->state != MEM_ONLINE)
+ return -1;
+
+ return 0;
+}
+
+static int change_memblock_state(struct memory_block *mem, void *arg)
+{
+ unsigned long state = (unsigned long)arg;
+
+ mem->state = state;
+
+ return 0;
+}
+
+static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages)
+{
+ u64 end_pfn = start_pfn + nr_pages - 1;
+
+ if (walk_memory_range(start_pfn, end_pfn, NULL,
+ check_memblock_online))
+ return false;
+
+ walk_memory_range(start_pfn, end_pfn, (void *)MEM_GOING_OFFLINE,
+ change_memblock_state);
+
+ if (offline_pages(start_pfn, nr_pages)) {
+ walk_memory_range(start_pfn, end_pfn, (void *)MEM_ONLINE,
+ change_memblock_state);
+ return false;
+ }
+
+ walk_memory_range(start_pfn, end_pfn, (void *)MEM_OFFLINE,
+ change_memblock_state);
+
+ /* RCU grace period? */
+ flush_memory_region((u64)__va(start_pfn << PAGE_SHIFT),
+ nr_pages << PAGE_SHIFT);
+
+ lock_device_hotplug();
+ remove_memory(nid, start_pfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT);
+ unlock_device_hotplug();
+
+ return true;
+}
+
+static u64 memtrace_alloc_node(u32 nid, u64 size)
+{
+ u64 start_pfn, end_pfn, nr_pages;
+ u64 base_pfn;
+
+ if (!NODE_DATA(nid) || !node_spanned_pages(nid))
+ return 0;
+
+ start_pfn = node_start_pfn(nid);
+ end_pfn = node_end_pfn(nid);
+ nr_pages = size >> PAGE_SHIFT;
+
+ /* Trace memory needs to be aligned to the size */
+ end_pfn = round_down(end_pfn - nr_pages, nr_pages);
+
+ for (base_pfn = end_pfn; base_pfn > start_pfn; base_pfn -= nr_pages) {
+ if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true)
+ return base_pfn << PAGE_SHIFT;
+ }
+
+ return 0;
+}
+
+static int memtrace_init_regions_runtime(u64 size)
+{
+ u32 nid;
+ u64 m;
+
+ memtrace_array = kcalloc(num_online_nodes(),
+ sizeof(struct memtrace_entry), GFP_KERNEL);
+ if (!memtrace_array) {
+ pr_err("Failed to allocate memtrace_array\n");
+ return -EINVAL;
+ }
+
+ for_each_online_node(nid) {
+ m = memtrace_alloc_node(nid, size);
+
+ /*
+ * A node might not have any local memory, so warn but
+ * continue on.
+ */
+ if (!m) {
+ pr_err("Failed to allocate trace memory on node %d\n", nid);
+ continue;
+ }
+
+ pr_info("Allocated trace memory on node %d at 0x%016llx\n", nid, m);
+
+ memtrace_array[memtrace_array_nr].start = m;
+ memtrace_array[memtrace_array_nr].size = size;
+ memtrace_array[memtrace_array_nr].nid = nid;
+ memtrace_array_nr++;
+ }
+
+ return 0;
+}
+
+static struct dentry *memtrace_debugfs_dir;
+
+static int memtrace_init_debugfs(void)
+{
+ int ret = 0;
+ int i;
+
+ for (i = 0; i < memtrace_array_nr; i++) {
+ struct dentry *dir;
+ struct memtrace_entry *ent = &memtrace_array[i];
+
+ ent->mem = ioremap(ent->start, ent->size);
+ /* Warn but continue on */
+ if (!ent->mem) {
+ pr_err("Failed to map trace memory at 0x%llx\n",
+ ent->start);
+ ret = -1;
+ continue;
+ }
+
+ snprintf(ent->name, 16, "%08x", ent->nid);
+ dir = debugfs_create_dir(ent->name, memtrace_debugfs_dir);
+ if (!dir)
+ return -1;
+
+ ent->dir = dir;
+ debugfs_create_file("trace", 0400, dir, ent, &memtrace_fops);
+ debugfs_create_x64("start", 0400, dir, &ent->start);
+ debugfs_create_x64("size", 0400, dir, &ent->size);
+ }
+
+ return ret;
+}
+
+static int memtrace_enable_set(void *data, u64 val)
+{
+ if (memtrace_size)
+ return -EINVAL;
+
+ if (!val)
+ return -EINVAL;
+
+ /* Make sure size is aligned to a memory block */
+ if (val & (memory_block_size_bytes() - 1))
+ return -EINVAL;
+
+ if (memtrace_init_regions_runtime(val))
+ return -EINVAL;
+
+ if (memtrace_init_debugfs())
+ return -EINVAL;
+
+ memtrace_size = val;
+
+ return 0;
+}
+
+static int memtrace_enable_get(void *data, u64 *val)
+{
+ *val = memtrace_size;
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(memtrace_init_fops, memtrace_enable_get,
+ memtrace_enable_set, "0x%016llx\n");
+
+static int memtrace_init(void)
+{
+ memtrace_debugfs_dir = debugfs_create_dir("memtrace",
+ powerpc_debugfs_root);
+ if (!memtrace_debugfs_dir)
+ return -1;
+
+ debugfs_create_file("enable", 0600, memtrace_debugfs_dir,
+ NULL, &memtrace_init_fops);
+
+ return 0;
+}
+machine_device_initcall(powernv, memtrace_init);
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index b5d960d6db3d..2cb6cbea4b3b 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -546,6 +546,12 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
unsigned long pid = npu_context->mm->context.id;
/*
+ * Unfortunately the nest mmu does not support flushing specific
+ * addresses so we have to flush the whole mm.
+ */
+ flush_tlb_mm(npu_context->mm);
+
+ /*
* Loop over all the NPUs this process is active on and launch
* an invalidate.
*/
@@ -576,12 +582,6 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
}
}
- /*
- * Unfortunately the nest mmu does not support flushing specific
- * addresses so we have to flush the whole mm.
- */
- flush_tlb_mm(npu_context->mm);
-
mmio_invalidate_wait(mmio_atsd_reg, flush);
if (flush)
/* Wait for the flush to complete */
@@ -614,15 +614,6 @@ static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
mmio_invalidate(npu_context, 1, address, true);
}
-static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
- struct mm_struct *mm,
- unsigned long address)
-{
- struct npu_context *npu_context = mn_to_npu_context(mn);
-
- mmio_invalidate(npu_context, 1, address, true);
-}
-
static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start, unsigned long end)
@@ -640,7 +631,6 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
.release = pnv_npu2_mn_release,
.change_pte = pnv_npu2_mn_change_pte,
- .invalidate_page = pnv_npu2_mn_invalidate_page,
.invalidate_range = pnv_npu2_mn_invalidate_range,
};
diff --git a/arch/powerpc/platforms/powernv/opal-async.c b/arch/powerpc/platforms/powernv/opal-async.c
index 83bebeec0fea..cf33769a7b72 100644
--- a/arch/powerpc/platforms/powernv/opal-async.c
+++ b/arch/powerpc/platforms/powernv/opal-async.c
@@ -171,8 +171,8 @@ int __init opal_async_comp_init(void)
async = of_get_property(opal_node, "opal-msg-async-num", NULL);
if (!async) {
- pr_err("%s: %s has no opal-msg-async-num\n",
- __func__, opal_node->full_name);
+ pr_err("%s: %pOF has no opal-msg-async-num\n",
+ __func__, opal_node);
err = -ENOENT;
goto out_opal_node;
}
diff --git a/arch/powerpc/platforms/powernv/opal-flash.c b/arch/powerpc/platforms/powernv/opal-flash.c
index 4ec6219287fc..2fa3ac80cb4e 100644
--- a/arch/powerpc/platforms/powernv/opal-flash.c
+++ b/arch/powerpc/platforms/powernv/opal-flash.c
@@ -520,7 +520,7 @@ out:
* update_flash : Flash new firmware image
*
*/
-static struct bin_attribute image_data_attr = {
+static const struct bin_attribute image_data_attr = {
.attr = {.name = "image", .mode = 0200},
.size = MAX_IMAGE_SIZE, /* Limit image size */
.write = image_data_write,
diff --git a/arch/powerpc/platforms/powernv/opal-hmi.c b/arch/powerpc/platforms/powernv/opal-hmi.c
index 88f3c61eec95..d78fed728cdf 100644
--- a/arch/powerpc/platforms/powernv/opal-hmi.c
+++ b/arch/powerpc/platforms/powernv/opal-hmi.c
@@ -30,6 +30,8 @@
#include <asm/cputable.h>
#include <asm/machdep.h>
+#include "powernv.h"
+
static int opal_hmi_handler_nb_init;
struct OpalHmiEvtNode {
struct list_head list;
@@ -267,8 +269,6 @@ static void hmi_event_handler(struct work_struct *work)
spin_unlock_irqrestore(&opal_hmi_evt_lock, flags);
if (unrecoverable) {
- int ret;
-
/* Pull all HMI events from OPAL before we panic. */
while (opal_get_msg(__pa(&msg), sizeof(msg)) == OPAL_SUCCESS) {
u32 type;
@@ -284,23 +284,7 @@ static void hmi_event_handler(struct work_struct *work)
print_hmi_event_info(hmi_evt);
}
- /*
- * Unrecoverable HMI exception. We need to inform BMC/OCC
- * about this error so that it can collect relevant data
- * for error analysis before rebooting.
- */
- ret = opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR,
- "Unrecoverable HMI exception");
- if (ret == OPAL_UNSUPPORTED) {
- pr_emerg("Reboot type %d not supported\n",
- OPAL_REBOOT_PLATFORM_ERROR);
- }
-
- /*
- * Fall through and panic if opal_cec_reboot2() returns
- * OPAL_UNSUPPORTED.
- */
- panic("Unrecoverable HMI exception");
+ pnv_platform_error_reboot(NULL, "Unrecoverable HMI exception");
}
}
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
new file mode 100644
index 000000000000..21f6531fae20
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -0,0 +1,226 @@
+/*
+ * OPAL IMC interface detection driver
+ * Supported on POWERNV platform
+ *
+ * Copyright (C) 2017 Madhavan Srinivasan, IBM Corporation.
+ * (C) 2017 Anju T Sudhakar, IBM Corporation.
+ * (C) 2017 Hemant K Shaw, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or later version.
+ */
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/crash_dump.h>
+#include <asm/opal.h>
+#include <asm/io.h>
+#include <asm/imc-pmu.h>
+#include <asm/cputhreads.h>
+
+/*
+ * imc_get_mem_addr_nest: Function to get nest counter memory region
+ * for each chip
+ */
+static int imc_get_mem_addr_nest(struct device_node *node,
+ struct imc_pmu *pmu_ptr,
+ u32 offset)
+{
+ int nr_chips = 0, i;
+ u64 *base_addr_arr, baddr;
+ u32 *chipid_arr;
+
+ nr_chips = of_property_count_u32_elems(node, "chip-id");
+ if (nr_chips <= 0)
+ return -ENODEV;
+
+ base_addr_arr = kcalloc(nr_chips, sizeof(u64), GFP_KERNEL);
+ if (!base_addr_arr)
+ return -ENOMEM;
+
+ chipid_arr = kcalloc(nr_chips, sizeof(u32), GFP_KERNEL);
+ if (!chipid_arr)
+ return -ENOMEM;
+
+ if (of_property_read_u32_array(node, "chip-id", chipid_arr, nr_chips))
+ goto error;
+
+ if (of_property_read_u64_array(node, "base-addr", base_addr_arr,
+ nr_chips))
+ goto error;
+
+ pmu_ptr->mem_info = kcalloc(nr_chips, sizeof(struct imc_mem_info),
+ GFP_KERNEL);
+ if (!pmu_ptr->mem_info)
+ goto error;
+
+ for (i = 0; i < nr_chips; i++) {
+ pmu_ptr->mem_info[i].id = chipid_arr[i];
+ baddr = base_addr_arr[i] + offset;
+ pmu_ptr->mem_info[i].vbase = phys_to_virt(baddr);
+ }
+
+ pmu_ptr->imc_counter_mmaped = true;
+ kfree(base_addr_arr);
+ kfree(chipid_arr);
+ return 0;
+
+error:
+ kfree(pmu_ptr->mem_info);
+ kfree(base_addr_arr);
+ kfree(chipid_arr);
+ return -1;
+}
+
+/*
+ * imc_pmu_create : Takes the parent device which is the pmu unit, pmu_index
+ * and domain as the inputs.
+ * Allocates memory for the struct imc_pmu, sets up its domain, size and offsets
+ */
+static int imc_pmu_create(struct device_node *parent, int pmu_index, int domain)
+{
+ int ret = 0;
+ struct imc_pmu *pmu_ptr;
+ u32 offset;
+
+ /* memory for pmu */
+ pmu_ptr = kzalloc(sizeof(struct imc_pmu), GFP_KERNEL);
+ if (!pmu_ptr)
+ return -ENOMEM;
+
+ /* Set the domain */
+ pmu_ptr->domain = domain;
+
+ ret = of_property_read_u32(parent, "size", &pmu_ptr->counter_mem_size);
+ if (ret) {
+ ret = -EINVAL;
+ goto free_pmu;
+ }
+
+ if (!of_property_read_u32(parent, "offset", &offset)) {
+ if (imc_get_mem_addr_nest(parent, pmu_ptr, offset)) {
+ ret = -EINVAL;
+ goto free_pmu;
+ }
+ }
+
+ /* Function to register IMC pmu */
+ ret = init_imc_pmu(parent, pmu_ptr, pmu_index);
+ if (ret)
+ pr_err("IMC PMU %s Register failed\n", pmu_ptr->pmu.name);
+
+ return 0;
+
+free_pmu:
+ kfree(pmu_ptr);
+ return ret;
+}
+
+static void disable_nest_pmu_counters(void)
+{
+ int nid, cpu;
+ const struct cpumask *l_cpumask;
+
+ get_online_cpus();
+ for_each_online_node(nid) {
+ l_cpumask = cpumask_of_node(nid);
+ cpu = cpumask_first(l_cpumask);
+ opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
+ get_hard_smp_processor_id(cpu));
+ }
+ put_online_cpus();
+}
+
+static void disable_core_pmu_counters(void)
+{
+ cpumask_t cores_map;
+ int cpu, rc;
+
+ get_online_cpus();
+ /* Disable the IMC Core functions */
+ cores_map = cpu_online_cores_map();
+ for_each_cpu(cpu, &cores_map) {
+ rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
+ get_hard_smp_processor_id(cpu));
+ if (rc)
+ pr_err("%s: Failed to stop Core (cpu = %d)\n",
+ __FUNCTION__, cpu);
+ }
+ put_online_cpus();
+}
+
+static int opal_imc_counters_probe(struct platform_device *pdev)
+{
+ struct device_node *imc_dev = pdev->dev.of_node;
+ int pmu_count = 0, domain;
+ u32 type;
+
+ /*
+ * Check whether this is kdump kernel. If yes, force the engines to
+ * stop and return.
+ */
+ if (is_kdump_kernel()) {
+ disable_nest_pmu_counters();
+ disable_core_pmu_counters();
+ return -ENODEV;
+ }
+
+ for_each_compatible_node(imc_dev, NULL, IMC_DTB_UNIT_COMPAT) {
+ if (of_property_read_u32(imc_dev, "type", &type)) {
+ pr_warn("IMC Device without type property\n");
+ continue;
+ }
+
+ switch (type) {
+ case IMC_TYPE_CHIP:
+ domain = IMC_DOMAIN_NEST;
+ break;
+ case IMC_TYPE_CORE:
+ domain =IMC_DOMAIN_CORE;
+ break;
+ case IMC_TYPE_THREAD:
+ domain = IMC_DOMAIN_THREAD;
+ break;
+ default:
+ pr_warn("IMC Unknown Device type \n");
+ domain = -1;
+ break;
+ }
+
+ if (!imc_pmu_create(imc_dev, pmu_count, domain))
+ pmu_count++;
+ }
+
+ return 0;
+}
+
+static void opal_imc_counters_shutdown(struct platform_device *pdev)
+{
+ /*
+ * Function only stops the engines which is bare minimum.
+ * TODO: Need to handle proper memory cleanup and pmu
+ * unregister.
+ */
+ disable_nest_pmu_counters();
+ disable_core_pmu_counters();
+}
+
+static const struct of_device_id opal_imc_match[] = {
+ { .compatible = IMC_DTB_COMPAT },
+ {},
+};
+
+static struct platform_driver opal_imc_driver = {
+ .driver = {
+ .name = "opal-imc-counters",
+ .of_match_table = opal_imc_match,
+ },
+ .probe = opal_imc_counters_probe,
+ .shutdown = opal_imc_counters_shutdown,
+};
+
+builtin_platform_driver(opal_imc_driver);
diff --git a/arch/powerpc/platforms/powernv/opal-powercap.c b/arch/powerpc/platforms/powernv/opal-powercap.c
new file mode 100644
index 000000000000..badb29bde93f
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-powercap.c
@@ -0,0 +1,244 @@
+/*
+ * PowerNV OPAL Powercap interface
+ *
+ * Copyright 2017 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) "opal-powercap: " fmt
+
+#include <linux/of.h>
+#include <linux/kobject.h>
+#include <linux/slab.h>
+
+#include <asm/opal.h>
+
+DEFINE_MUTEX(powercap_mutex);
+
+static struct kobject *powercap_kobj;
+
+struct powercap_attr {
+ u32 handle;
+ struct kobj_attribute attr;
+};
+
+static struct pcap {
+ struct attribute_group pg;
+ struct powercap_attr *pattrs;
+} *pcaps;
+
+static ssize_t powercap_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct powercap_attr *pcap_attr = container_of(attr,
+ struct powercap_attr, attr);
+ struct opal_msg msg;
+ u32 pcap;
+ int ret, token;
+
+ token = opal_async_get_token_interruptible();
+ if (token < 0) {
+ pr_devel("Failed to get token\n");
+ return token;
+ }
+
+ ret = mutex_lock_interruptible(&powercap_mutex);
+ if (ret)
+ goto out_token;
+
+ ret = opal_get_powercap(pcap_attr->handle, token, (u32 *)__pa(&pcap));
+ switch (ret) {
+ case OPAL_ASYNC_COMPLETION:
+ ret = opal_async_wait_response(token, &msg);
+ if (ret) {
+ pr_devel("Failed to wait for the async response\n");
+ ret = -EIO;
+ goto out;
+ }
+ ret = opal_error_code(opal_get_async_rc(msg));
+ if (!ret) {
+ ret = sprintf(buf, "%u\n", be32_to_cpu(pcap));
+ if (ret < 0)
+ ret = -EIO;
+ }
+ break;
+ case OPAL_SUCCESS:
+ ret = sprintf(buf, "%u\n", be32_to_cpu(pcap));
+ if (ret < 0)
+ ret = -EIO;
+ break;
+ default:
+ ret = opal_error_code(ret);
+ }
+
+out:
+ mutex_unlock(&powercap_mutex);
+out_token:
+ opal_async_release_token(token);
+ return ret;
+}
+
+static ssize_t powercap_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf,
+ size_t count)
+{
+ struct powercap_attr *pcap_attr = container_of(attr,
+ struct powercap_attr, attr);
+ struct opal_msg msg;
+ u32 pcap;
+ int ret, token;
+
+ ret = kstrtoint(buf, 0, &pcap);
+ if (ret)
+ return ret;
+
+ token = opal_async_get_token_interruptible();
+ if (token < 0) {
+ pr_devel("Failed to get token\n");
+ return token;
+ }
+
+ ret = mutex_lock_interruptible(&powercap_mutex);
+ if (ret)
+ goto out_token;
+
+ ret = opal_set_powercap(pcap_attr->handle, token, pcap);
+ switch (ret) {
+ case OPAL_ASYNC_COMPLETION:
+ ret = opal_async_wait_response(token, &msg);
+ if (ret) {
+ pr_devel("Failed to wait for the async response\n");
+ ret = -EIO;
+ goto out;
+ }
+ ret = opal_error_code(opal_get_async_rc(msg));
+ if (!ret)
+ ret = count;
+ break;
+ case OPAL_SUCCESS:
+ ret = count;
+ break;
+ default:
+ ret = opal_error_code(ret);
+ }
+
+out:
+ mutex_unlock(&powercap_mutex);
+out_token:
+ opal_async_release_token(token);
+ return ret;
+}
+
+static void powercap_add_attr(int handle, const char *name,
+ struct powercap_attr *attr)
+{
+ attr->handle = handle;
+ sysfs_attr_init(&attr->attr.attr);
+ attr->attr.attr.name = name;
+ attr->attr.attr.mode = 0444;
+ attr->attr.show = powercap_show;
+}
+
+void __init opal_powercap_init(void)
+{
+ struct device_node *powercap, *node;
+ int i = 0;
+
+ powercap = of_find_compatible_node(NULL, NULL, "ibm,opal-powercap");
+ if (!powercap) {
+ pr_devel("Powercap node not found\n");
+ return;
+ }
+
+ pcaps = kcalloc(of_get_child_count(powercap), sizeof(*pcaps),
+ GFP_KERNEL);
+ if (!pcaps)
+ return;
+
+ powercap_kobj = kobject_create_and_add("powercap", opal_kobj);
+ if (!powercap_kobj) {
+ pr_warn("Failed to create powercap kobject\n");
+ goto out_pcaps;
+ }
+
+ i = 0;
+ for_each_child_of_node(powercap, node) {
+ u32 cur, min, max;
+ int j = 0;
+ bool has_cur = false, has_min = false, has_max = false;
+
+ if (!of_property_read_u32(node, "powercap-min", &min)) {
+ j++;
+ has_min = true;
+ }
+
+ if (!of_property_read_u32(node, "powercap-max", &max)) {
+ j++;
+ has_max = true;
+ }
+
+ if (!of_property_read_u32(node, "powercap-current", &cur)) {
+ j++;
+ has_cur = true;
+ }
+
+ pcaps[i].pattrs = kcalloc(j, sizeof(struct powercap_attr),
+ GFP_KERNEL);
+ if (!pcaps[i].pattrs)
+ goto out_pcaps_pattrs;
+
+ pcaps[i].pg.attrs = kcalloc(j + 1, sizeof(struct attribute *),
+ GFP_KERNEL);
+ if (!pcaps[i].pg.attrs) {
+ kfree(pcaps[i].pattrs);
+ goto out_pcaps_pattrs;
+ }
+
+ j = 0;
+ pcaps[i].pg.name = node->name;
+ if (has_min) {
+ powercap_add_attr(min, "powercap-min",
+ &pcaps[i].pattrs[j]);
+ pcaps[i].pg.attrs[j] = &pcaps[i].pattrs[j].attr.attr;
+ j++;
+ }
+
+ if (has_max) {
+ powercap_add_attr(max, "powercap-max",
+ &pcaps[i].pattrs[j]);
+ pcaps[i].pg.attrs[j] = &pcaps[i].pattrs[j].attr.attr;
+ j++;
+ }
+
+ if (has_cur) {
+ powercap_add_attr(cur, "powercap-current",
+ &pcaps[i].pattrs[j]);
+ pcaps[i].pattrs[j].attr.attr.mode |= 0220;
+ pcaps[i].pattrs[j].attr.store = powercap_store;
+ pcaps[i].pg.attrs[j] = &pcaps[i].pattrs[j].attr.attr;
+ j++;
+ }
+
+ if (sysfs_create_group(powercap_kobj, &pcaps[i].pg)) {
+ pr_warn("Failed to create powercap attribute group %s\n",
+ pcaps[i].pg.name);
+ goto out_pcaps_pattrs;
+ }
+ i++;
+ }
+
+ return;
+
+out_pcaps_pattrs:
+ while (--i >= 0) {
+ kfree(pcaps[i].pattrs);
+ kfree(pcaps[i].pg.attrs);
+ }
+ kobject_put(powercap_kobj);
+out_pcaps:
+ kfree(pcaps);
+}
diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c
index 2d6ee1c5ad85..de4dd09f4a15 100644
--- a/arch/powerpc/platforms/powernv/opal-prd.c
+++ b/arch/powerpc/platforms/powernv/opal-prd.c
@@ -241,15 +241,9 @@ static ssize_t opal_prd_write(struct file *file, const char __user *buf,
size = be16_to_cpu(hdr.size);
- msg = kmalloc(size, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- rc = copy_from_user(msg, buf, size);
- if (rc) {
- size = -EFAULT;
- goto out_free;
- }
+ msg = memdup_user(buf, size);
+ if (IS_ERR(msg))
+ return PTR_ERR(msg);
rc = opal_prd_msg(msg);
if (rc) {
@@ -257,7 +251,6 @@ static ssize_t opal_prd_write(struct file *file, const char __user *buf,
size = -EIO;
}
-out_free:
kfree(msg);
return size;
diff --git a/arch/powerpc/platforms/powernv/opal-psr.c b/arch/powerpc/platforms/powernv/opal-psr.c
new file mode 100644
index 000000000000..7313b7fc9071
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-psr.c
@@ -0,0 +1,175 @@
+/*
+ * PowerNV OPAL Power-Shift-Ratio interface
+ *
+ * Copyright 2017 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) "opal-psr: " fmt
+
+#include <linux/of.h>
+#include <linux/kobject.h>
+#include <linux/slab.h>
+
+#include <asm/opal.h>
+
+DEFINE_MUTEX(psr_mutex);
+
+static struct kobject *psr_kobj;
+
+struct psr_attr {
+ u32 handle;
+ struct kobj_attribute attr;
+} *psr_attrs;
+
+static ssize_t psr_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct psr_attr *psr_attr = container_of(attr, struct psr_attr, attr);
+ struct opal_msg msg;
+ int psr, ret, token;
+
+ token = opal_async_get_token_interruptible();
+ if (token < 0) {
+ pr_devel("Failed to get token\n");
+ return token;
+ }
+
+ ret = mutex_lock_interruptible(&psr_mutex);
+ if (ret)
+ goto out_token;
+
+ ret = opal_get_power_shift_ratio(psr_attr->handle, token,
+ (u32 *)__pa(&psr));
+ switch (ret) {
+ case OPAL_ASYNC_COMPLETION:
+ ret = opal_async_wait_response(token, &msg);
+ if (ret) {
+ pr_devel("Failed to wait for the async response\n");
+ ret = -EIO;
+ goto out;
+ }
+ ret = opal_error_code(opal_get_async_rc(msg));
+ if (!ret) {
+ ret = sprintf(buf, "%u\n", be32_to_cpu(psr));
+ if (ret < 0)
+ ret = -EIO;
+ }
+ break;
+ case OPAL_SUCCESS:
+ ret = sprintf(buf, "%u\n", be32_to_cpu(psr));
+ if (ret < 0)
+ ret = -EIO;
+ break;
+ default:
+ ret = opal_error_code(ret);
+ }
+
+out:
+ mutex_unlock(&psr_mutex);
+out_token:
+ opal_async_release_token(token);
+ return ret;
+}
+
+static ssize_t psr_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct psr_attr *psr_attr = container_of(attr, struct psr_attr, attr);
+ struct opal_msg msg;
+ int psr, ret, token;
+
+ ret = kstrtoint(buf, 0, &psr);
+ if (ret)
+ return ret;
+
+ token = opal_async_get_token_interruptible();
+ if (token < 0) {
+ pr_devel("Failed to get token\n");
+ return token;
+ }
+
+ ret = mutex_lock_interruptible(&psr_mutex);
+ if (ret)
+ goto out_token;
+
+ ret = opal_set_power_shift_ratio(psr_attr->handle, token, psr);
+ switch (ret) {
+ case OPAL_ASYNC_COMPLETION:
+ ret = opal_async_wait_response(token, &msg);
+ if (ret) {
+ pr_devel("Failed to wait for the async response\n");
+ ret = -EIO;
+ goto out;
+ }
+ ret = opal_error_code(opal_get_async_rc(msg));
+ if (!ret)
+ ret = count;
+ break;
+ case OPAL_SUCCESS:
+ ret = count;
+ break;
+ default:
+ ret = opal_error_code(ret);
+ }
+
+out:
+ mutex_unlock(&psr_mutex);
+out_token:
+ opal_async_release_token(token);
+ return ret;
+}
+
+void __init opal_psr_init(void)
+{
+ struct device_node *psr, *node;
+ int i = 0;
+
+ psr = of_find_compatible_node(NULL, NULL,
+ "ibm,opal-power-shift-ratio");
+ if (!psr) {
+ pr_devel("Power-shift-ratio node not found\n");
+ return;
+ }
+
+ psr_attrs = kcalloc(of_get_child_count(psr), sizeof(struct psr_attr),
+ GFP_KERNEL);
+ if (!psr_attrs)
+ return;
+
+ psr_kobj = kobject_create_and_add("psr", opal_kobj);
+ if (!psr_kobj) {
+ pr_warn("Failed to create psr kobject\n");
+ goto out;
+ }
+
+ for_each_child_of_node(psr, node) {
+ if (of_property_read_u32(node, "handle",
+ &psr_attrs[i].handle))
+ goto out_kobj;
+
+ sysfs_attr_init(&psr_attrs[i].attr.attr);
+ if (of_property_read_string(node, "label",
+ &psr_attrs[i].attr.attr.name))
+ goto out_kobj;
+ psr_attrs[i].attr.attr.mode = 0664;
+ psr_attrs[i].attr.show = psr_show;
+ psr_attrs[i].attr.store = psr_store;
+ if (sysfs_create_file(psr_kobj, &psr_attrs[i].attr.attr)) {
+ pr_devel("Failed to create psr sysfs file %s\n",
+ psr_attrs[i].attr.attr.name);
+ goto out_kobj;
+ }
+ i++;
+ }
+
+ return;
+out_kobj:
+ kobject_put(psr_kobj);
+out:
+ kfree(psr_attrs);
+}
diff --git a/arch/powerpc/platforms/powernv/opal-sensor-groups.c b/arch/powerpc/platforms/powernv/opal-sensor-groups.c
new file mode 100644
index 000000000000..7e5a235ebf76
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-sensor-groups.c
@@ -0,0 +1,212 @@
+/*
+ * PowerNV OPAL Sensor-groups interface
+ *
+ * Copyright 2017 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) "opal-sensor-groups: " fmt
+
+#include <linux/of.h>
+#include <linux/kobject.h>
+#include <linux/slab.h>
+
+#include <asm/opal.h>
+
+DEFINE_MUTEX(sg_mutex);
+
+static struct kobject *sg_kobj;
+
+struct sg_attr {
+ u32 handle;
+ struct kobj_attribute attr;
+};
+
+static struct sensor_group {
+ char name[20];
+ struct attribute_group sg;
+ struct sg_attr *sgattrs;
+} *sgs;
+
+static ssize_t sg_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct sg_attr *sattr = container_of(attr, struct sg_attr, attr);
+ struct opal_msg msg;
+ u32 data;
+ int ret, token;
+
+ ret = kstrtoint(buf, 0, &data);
+ if (ret)
+ return ret;
+
+ if (data != 1)
+ return -EINVAL;
+
+ token = opal_async_get_token_interruptible();
+ if (token < 0) {
+ pr_devel("Failed to get token\n");
+ return token;
+ }
+
+ ret = mutex_lock_interruptible(&sg_mutex);
+ if (ret)
+ goto out_token;
+
+ ret = opal_sensor_group_clear(sattr->handle, token);
+ switch (ret) {
+ case OPAL_ASYNC_COMPLETION:
+ ret = opal_async_wait_response(token, &msg);
+ if (ret) {
+ pr_devel("Failed to wait for the async response\n");
+ ret = -EIO;
+ goto out;
+ }
+ ret = opal_error_code(opal_get_async_rc(msg));
+ if (!ret)
+ ret = count;
+ break;
+ case OPAL_SUCCESS:
+ ret = count;
+ break;
+ default:
+ ret = opal_error_code(ret);
+ }
+
+out:
+ mutex_unlock(&sg_mutex);
+out_token:
+ opal_async_release_token(token);
+ return ret;
+}
+
+static struct sg_ops_info {
+ int opal_no;
+ const char *attr_name;
+ ssize_t (*store)(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count);
+} ops_info[] = {
+ { OPAL_SENSOR_GROUP_CLEAR, "clear", sg_store },
+};
+
+static void add_attr(int handle, struct sg_attr *attr, int index)
+{
+ attr->handle = handle;
+ sysfs_attr_init(&attr->attr.attr);
+ attr->attr.attr.name = ops_info[index].attr_name;
+ attr->attr.attr.mode = 0220;
+ attr->attr.store = ops_info[index].store;
+}
+
+static int add_attr_group(const __be32 *ops, int len, struct sensor_group *sg,
+ u32 handle)
+{
+ int i, j;
+ int count = 0;
+
+ for (i = 0; i < len; i++)
+ for (j = 0; j < ARRAY_SIZE(ops_info); j++)
+ if (be32_to_cpu(ops[i]) == ops_info[j].opal_no) {
+ add_attr(handle, &sg->sgattrs[count], j);
+ sg->sg.attrs[count] =
+ &sg->sgattrs[count].attr.attr;
+ count++;
+ }
+
+ return sysfs_create_group(sg_kobj, &sg->sg);
+}
+
+static int get_nr_attrs(const __be32 *ops, int len)
+{
+ int i, j;
+ int nr_attrs = 0;
+
+ for (i = 0; i < len; i++)
+ for (j = 0; j < ARRAY_SIZE(ops_info); j++)
+ if (be32_to_cpu(ops[i]) == ops_info[j].opal_no)
+ nr_attrs++;
+
+ return nr_attrs;
+}
+
+void __init opal_sensor_groups_init(void)
+{
+ struct device_node *sg, *node;
+ int i = 0;
+
+ sg = of_find_compatible_node(NULL, NULL, "ibm,opal-sensor-group");
+ if (!sg) {
+ pr_devel("Sensor groups node not found\n");
+ return;
+ }
+
+ sgs = kcalloc(of_get_child_count(sg), sizeof(*sgs), GFP_KERNEL);
+ if (!sgs)
+ return;
+
+ sg_kobj = kobject_create_and_add("sensor_groups", opal_kobj);
+ if (!sg_kobj) {
+ pr_warn("Failed to create sensor group kobject\n");
+ goto out_sgs;
+ }
+
+ for_each_child_of_node(sg, node) {
+ const __be32 *ops;
+ u32 sgid, len, nr_attrs, chipid;
+
+ ops = of_get_property(node, "ops", &len);
+ if (!ops)
+ continue;
+
+ nr_attrs = get_nr_attrs(ops, len);
+ if (!nr_attrs)
+ continue;
+
+ sgs[i].sgattrs = kcalloc(nr_attrs, sizeof(struct sg_attr),
+ GFP_KERNEL);
+ if (!sgs[i].sgattrs)
+ goto out_sgs_sgattrs;
+
+ sgs[i].sg.attrs = kcalloc(nr_attrs + 1,
+ sizeof(struct attribute *),
+ GFP_KERNEL);
+
+ if (!sgs[i].sg.attrs) {
+ kfree(sgs[i].sgattrs);
+ goto out_sgs_sgattrs;
+ }
+
+ if (of_property_read_u32(node, "sensor-group-id", &sgid)) {
+ pr_warn("sensor-group-id property not found\n");
+ goto out_sgs_sgattrs;
+ }
+
+ if (!of_property_read_u32(node, "ibm,chip-id", &chipid))
+ sprintf(sgs[i].name, "%s%d", node->name, chipid);
+ else
+ sprintf(sgs[i].name, "%s", node->name);
+
+ sgs[i].sg.name = sgs[i].name;
+ if (add_attr_group(ops, len, &sgs[i], sgid)) {
+ pr_warn("Failed to create sensor attribute group %s\n",
+ sgs[i].sg.name);
+ goto out_sgs_sgattrs;
+ }
+ i++;
+ }
+
+ return;
+
+out_sgs_sgattrs:
+ while (--i >= 0) {
+ kfree(sgs[i].sgattrs);
+ kfree(sgs[i].sg.attrs);
+ }
+ kobject_put(sg_kobj);
+out_sgs:
+ kfree(sgs);
+}
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 4ca6c26a56d5..8c1ede2d3f7e 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -27,7 +27,7 @@
.globl opal_tracepoint_refcount
opal_tracepoint_refcount:
- .llong 0
+ .8byte 0
.section ".text"
@@ -310,3 +310,12 @@ OPAL_CALL(opal_xive_dump, OPAL_XIVE_DUMP);
OPAL_CALL(opal_npu_init_context, OPAL_NPU_INIT_CONTEXT);
OPAL_CALL(opal_npu_destroy_context, OPAL_NPU_DESTROY_CONTEXT);
OPAL_CALL(opal_npu_map_lpar, OPAL_NPU_MAP_LPAR);
+OPAL_CALL(opal_imc_counters_init, OPAL_IMC_COUNTERS_INIT);
+OPAL_CALL(opal_imc_counters_start, OPAL_IMC_COUNTERS_START);
+OPAL_CALL(opal_imc_counters_stop, OPAL_IMC_COUNTERS_STOP);
+OPAL_CALL(opal_pci_set_p2p, OPAL_PCI_SET_P2P);
+OPAL_CALL(opal_get_powercap, OPAL_GET_POWERCAP);
+OPAL_CALL(opal_set_powercap, OPAL_SET_POWERCAP);
+OPAL_CALL(opal_get_power_shift_ratio, OPAL_GET_POWER_SHIFT_RATIO);
+OPAL_CALL(opal_set_power_shift_ratio, OPAL_SET_POWER_SHIFT_RATIO);
+OPAL_CALL(opal_sensor_group_clear, OPAL_SENSOR_GROUP_CLEAR);
diff --git a/arch/powerpc/platforms/powernv/opal-xscom.c b/arch/powerpc/platforms/powernv/opal-xscom.c
index 28651fb25417..81c0a943dea9 100644
--- a/arch/powerpc/platforms/powernv/opal-xscom.c
+++ b/arch/powerpc/platforms/powernv/opal-xscom.c
@@ -36,14 +36,14 @@ static scom_map_t opal_scom_map(struct device_node *dev, u64 reg, u64 count)
const __be32 *gcid;
if (!of_get_property(dev, "scom-controller", NULL)) {
- pr_err("%s: device %s is not a SCOM controller\n",
- __func__, dev->full_name);
+ pr_err("%s: device %pOF is not a SCOM controller\n",
+ __func__, dev);
return SCOM_MAP_INVALID;
}
gcid = of_get_property(dev, "ibm,chip-id", NULL);
if (!gcid) {
- pr_err("%s: device %s has no ibm,chip-id\n",
- __func__, dev->full_name);
+ pr_err("%s: device %pOF has no ibm,chip-id\n",
+ __func__, dev);
return SCOM_MAP_INVALID;
}
m = kmalloc(sizeof(struct opal_scom_map), GFP_KERNEL);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index cad6b57ce494..65c79ecf5a4d 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -16,6 +16,7 @@
#include <linux/of.h>
#include <linux/of_fdt.h>
#include <linux/of_platform.h>
+#include <linux/of_address.h>
#include <linux/interrupt.h>
#include <linux/notifier.h>
#include <linux/slab.h>
@@ -25,11 +26,17 @@
#include <linux/memblock.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
+#include <linux/printk.h>
+#include <linux/kmsg_dump.h>
+#include <linux/console.h>
+#include <linux/sched/debug.h>
#include <asm/machdep.h>
#include <asm/opal.h>
#include <asm/firmware.h>
#include <asm/mce.h>
+#include <asm/imc-pmu.h>
+#include <asm/bug.h>
#include "powernv.h"
@@ -162,12 +169,9 @@ int __init early_init_dt_scan_recoverable_ranges(unsigned long node,
sizeof(struct mcheck_recoverable_range);
/*
- * Allocate a buffer to hold the MC recoverable ranges. We would be
- * accessing them in real mode, hence it needs to be within
- * RMO region.
+ * Allocate a buffer to hold the MC recoverable ranges.
*/
- mc_recoverable_range =__va(memblock_alloc_base(size, __alignof__(u64),
- ppc64_rma_size));
+ mc_recoverable_range =__va(memblock_alloc(size, __alignof__(u64)));
memset(mc_recoverable_range, 0, size);
for (i = 0; i < mc_recoverable_range_len; i++) {
@@ -422,24 +426,88 @@ static int opal_recover_mce(struct pt_regs *regs,
/* Fatal machine check */
pr_err("Machine check interrupt is fatal\n");
recovered = 0;
- } else if ((evt->severity == MCE_SEV_ERROR_SYNC) &&
- (user_mode(regs) && !is_global_init(current))) {
+ }
+
+ if (!recovered && evt->severity == MCE_SEV_ERROR_SYNC) {
/*
- * For now, kill the task if we have received exception when
- * in userspace.
+ * Try to kill processes if we get a synchronous machine check
+ * (e.g., one caused by execution of this instruction). This
+ * will devolve into a panic if we try to kill init or are in
+ * an interrupt etc.
*
* TODO: Queue up this address for hwpoisioning later.
+ * TODO: This is not quite right for d-side machine
+ * checks ->nip is not necessarily the important
+ * address.
*/
- _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
- recovered = 1;
+ if ((user_mode(regs))) {
+ _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
+ recovered = 1;
+ } else if (die_will_crash()) {
+ /*
+ * die() would kill the kernel, so better to go via
+ * the platform reboot code that will log the
+ * machine check.
+ */
+ recovered = 0;
+ } else {
+ die("Machine check", regs, SIGBUS);
+ recovered = 1;
+ }
}
+
return recovered;
}
+void pnv_platform_error_reboot(struct pt_regs *regs, const char *msg)
+{
+ /*
+ * This is mostly taken from kernel/panic.c, but tries to do
+ * relatively minimal work. Don't use delay functions (TB may
+ * be broken), don't crash dump (need to set a firmware log),
+ * don't run notifiers. We do want to get some information to
+ * Linux console.
+ */
+ console_verbose();
+ bust_spinlocks(1);
+ pr_emerg("Hardware platform error: %s\n", msg);
+ if (regs)
+ show_regs(regs);
+ smp_send_stop();
+ printk_safe_flush_on_panic();
+ kmsg_dump(KMSG_DUMP_PANIC);
+ bust_spinlocks(0);
+ debug_locks_off();
+ console_flush_on_panic();
+
+ /*
+ * Don't bother to shut things down because this will
+ * xstop the system.
+ */
+ if (opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR, msg)
+ == OPAL_UNSUPPORTED) {
+ pr_emerg("Reboot type %d not supported for %s\n",
+ OPAL_REBOOT_PLATFORM_ERROR, msg);
+ }
+
+ /*
+ * We reached here. There can be three possibilities:
+ * 1. We are running on a firmware level that do not support
+ * opal_cec_reboot2()
+ * 2. We are running on a firmware level that do not support
+ * OPAL_REBOOT_PLATFORM_ERROR reboot type.
+ * 3. We are running on FSP based system that does not need
+ * opal to trigger checkstop explicitly for error analysis.
+ * The FSP PRD component would have already got notified
+ * about this error through other channels.
+ */
+
+ ppc_md.restart(NULL);
+}
+
int opal_machine_check(struct pt_regs *regs)
{
struct machine_check_event evt;
- int ret;
if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
return 0;
@@ -455,43 +523,7 @@ int opal_machine_check(struct pt_regs *regs)
if (opal_recover_mce(regs, &evt))
return 1;
- /*
- * Unrecovered machine check, we are heading to panic path.
- *
- * We may have hit this MCE in very early stage of kernel
- * initialization even before opal-prd has started running. If
- * this is the case then this MCE error may go un-noticed or
- * un-analyzed if we go down panic path. We need to inform
- * BMC/OCC about this error so that they can collect relevant
- * data for error analysis before rebooting.
- * Use opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR) to do so.
- * This function may not return on BMC based system.
- */
- ret = opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR,
- "Unrecoverable Machine Check exception");
- if (ret == OPAL_UNSUPPORTED) {
- pr_emerg("Reboot type %d not supported\n",
- OPAL_REBOOT_PLATFORM_ERROR);
- }
-
- /*
- * We reached here. There can be three possibilities:
- * 1. We are running on a firmware level that do not support
- * opal_cec_reboot2()
- * 2. We are running on a firmware level that do not support
- * OPAL_REBOOT_PLATFORM_ERROR reboot type.
- * 3. We are running on FSP based system that does not need opal
- * to trigger checkstop explicitly for error analysis. The FSP
- * PRD component would have already got notified about this
- * error through other channels.
- *
- * If hardware marked this as an unrecoverable MCE, we are
- * going to panic anyway. Even if it didn't, it's not safe to
- * continue at this point, so we should explicitly panic.
- */
-
- panic("PowerNV Unrecovered Machine Check");
- return 0;
+ pnv_platform_error_reboot(regs, "Unrecoverable Machine Check exception");
}
/* Early hmi handler called in real mode. */
@@ -720,6 +752,15 @@ static void opal_pdev_init(const char *compatible)
of_platform_device_create(np, NULL, NULL);
}
+static void __init opal_imc_init_dev(void)
+{
+ struct device_node *np;
+
+ np = of_find_compatible_node(NULL, NULL, IMC_DTB_COMPAT);
+ if (np)
+ of_platform_device_create(np, NULL, NULL);
+}
+
static int kopald(void *unused)
{
unsigned long timeout = msecs_to_jiffies(opal_heartbeat) + 1;
@@ -793,6 +834,9 @@ static int __init opal_init(void)
/* Setup a heatbeat thread if requested by OPAL */
opal_init_heartbeat();
+ /* Detect In-Memory Collection counters and create devices*/
+ opal_imc_init_dev();
+
/* Create leds platform devices */
leds = of_find_node_by_path("/ibm,opal/leds");
if (leds) {
@@ -836,6 +880,15 @@ static int __init opal_init(void)
/* Initialise OPAL kmsg dumper for flushing console on panic */
opal_kmsg_init();
+ /* Initialise OPAL powercap interface */
+ opal_powercap_init();
+
+ /* Initialise OPAL Power-Shifting-Ratio interface */
+ opal_psr_init();
+
+ /* Initialise OPAL sensor groups */
+ opal_sensor_groups_init();
+
return 0;
}
machine_subsys_initcall(powernv, opal_init);
@@ -952,6 +1005,7 @@ int opal_error_code(int rc)
case OPAL_UNSUPPORTED: return -EIO;
case OPAL_HARDWARE: return -EIO;
case OPAL_INTERNAL_ERROR: return -EIO;
+ case OPAL_TIMEOUT: return -ETIMEDOUT;
default:
pr_err("%s: unexpected OPAL error %d\n", __func__, rc);
return -EIO;
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index b900eb1d5e17..57f9e55f4352 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -444,8 +444,8 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb)
r = of_get_property(dn, "ibm,opal-m64-window", NULL);
if (!r) {
- pr_info(" No <ibm,opal-m64-window> on %s\n",
- dn->full_name);
+ pr_info(" No <ibm,opal-m64-window> on %pOF\n",
+ dn);
return;
}
@@ -1408,7 +1408,6 @@ m64_failed:
static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group,
int num);
-static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable);
static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe *pe)
{
@@ -2402,7 +2401,7 @@ static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group,
return 0;
}
-static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable)
+void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable)
{
uint16_t window_id = (pe->pe_number << 1 ) + 1;
int64_t rc;
@@ -3797,8 +3796,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
if (!of_device_is_available(np))
return;
- pr_info("Initializing %s PHB (%s)\n",
- pnv_phb_names[ioda_type], of_node_full_name(np));
+ pr_info("Initializing %s PHB (%pOF)\n", pnv_phb_names[ioda_type], np);
prop64 = of_get_property(np, "ibm,opal-phbid", NULL);
if (!prop64) {
@@ -3813,8 +3811,8 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
/* Allocate PCI controller */
phb->hose = hose = pcibios_alloc_controller(np);
if (!phb->hose) {
- pr_err(" Can't allocate PCI controller for %s\n",
- np->full_name);
+ pr_err(" Can't allocate PCI controller for %pOF\n",
+ np);
memblock_free(__pa(phb), sizeof(struct pnv_phb));
return;
}
@@ -3825,7 +3823,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
hose->first_busno = be32_to_cpu(prop32[0]);
hose->last_busno = be32_to_cpu(prop32[1]);
} else {
- pr_warn(" Broken <bus-range> on %s\n", np->full_name);
+ pr_warn(" Broken <bus-range> on %pOF\n", np);
hose->first_busno = 0;
hose->last_busno = 0xff;
}
@@ -4046,7 +4044,7 @@ void __init pnv_pci_init_ioda_hub(struct device_node *np)
const __be64 *prop64;
u64 hub_id;
- pr_info("Probing IODA IO-Hub %s\n", np->full_name);
+ pr_info("Probing IODA IO-Hub %pOF\n", np);
prop64 = of_get_property(np, "ibm,opal-hubid", NULL);
if (!prop64) {
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 7905d179d036..5422f4a6317c 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -37,6 +37,8 @@
#include "powernv.h"
#include "pci.h"
+static DEFINE_MUTEX(p2p_mutex);
+
int pnv_pci_get_slot_id(struct device_node *np, uint64_t *id)
{
struct device_node *parent = np;
@@ -1017,6 +1019,79 @@ void pnv_pci_dma_bus_setup(struct pci_bus *bus)
}
}
+int pnv_pci_set_p2p(struct pci_dev *initiator, struct pci_dev *target, u64 desc)
+{
+ struct pci_controller *hose;
+ struct pnv_phb *phb_init, *phb_target;
+ struct pnv_ioda_pe *pe_init;
+ int rc;
+
+ if (!opal_check_token(OPAL_PCI_SET_P2P))
+ return -ENXIO;
+
+ hose = pci_bus_to_host(initiator->bus);
+ phb_init = hose->private_data;
+
+ hose = pci_bus_to_host(target->bus);
+ phb_target = hose->private_data;
+
+ pe_init = pnv_ioda_get_pe(initiator);
+ if (!pe_init)
+ return -ENODEV;
+
+ /*
+ * Configuring the initiator's PHB requires to adjust its
+ * TVE#1 setting. Since the same device can be an initiator
+ * several times for different target devices, we need to keep
+ * a reference count to know when we can restore the default
+ * bypass setting on its TVE#1 when disabling. Opal is not
+ * tracking PE states, so we add a reference count on the PE
+ * in linux.
+ *
+ * For the target, the configuration is per PHB, so we keep a
+ * target reference count on the PHB.
+ */
+ mutex_lock(&p2p_mutex);
+
+ if (desc & OPAL_PCI_P2P_ENABLE) {
+ /* always go to opal to validate the configuration */
+ rc = opal_pci_set_p2p(phb_init->opal_id, phb_target->opal_id,
+ desc, pe_init->pe_number);
+
+ if (rc != OPAL_SUCCESS) {
+ rc = -EIO;
+ goto out;
+ }
+
+ pe_init->p2p_initiator_count++;
+ phb_target->p2p_target_count++;
+ } else {
+ if (!pe_init->p2p_initiator_count ||
+ !phb_target->p2p_target_count) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ if (--pe_init->p2p_initiator_count == 0)
+ pnv_pci_ioda2_set_bypass(pe_init, true);
+
+ if (--phb_target->p2p_target_count == 0) {
+ rc = opal_pci_set_p2p(phb_init->opal_id,
+ phb_target->opal_id, desc,
+ pe_init->pe_number);
+ if (rc != OPAL_SUCCESS) {
+ rc = -EIO;
+ goto out;
+ }
+ }
+ }
+ rc = 0;
+out:
+ mutex_unlock(&p2p_mutex);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(pnv_pci_set_p2p);
+
void pnv_pci_shutdown(void)
{
struct pci_controller *hose;
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index f16bc403ec03..a95273c524f6 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -78,6 +78,9 @@ struct pnv_ioda_pe {
struct pnv_ioda_pe *master;
struct list_head slaves;
+ /* PCI peer-to-peer*/
+ int p2p_initiator_count;
+
/* Link in list of PE#s */
struct list_head list;
};
@@ -189,6 +192,7 @@ struct pnv_phb {
#ifdef CONFIG_CXL_BASE
struct cxl_afu *cxl_afu;
#endif
+ int p2p_target_count;
};
extern struct pci_ops pnv_pci_ops;
@@ -229,6 +233,7 @@ extern void pnv_teardown_msi_irqs(struct pci_dev *pdev);
extern struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev);
extern void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq);
extern bool pnv_pci_enable_device_hook(struct pci_dev *dev);
+extern void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable);
extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
const char *fmt, ...);
diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h
index 6dbc0a1da1f6..a159d48573d7 100644
--- a/arch/powerpc/platforms/powernv/powernv.h
+++ b/arch/powerpc/platforms/powernv/powernv.h
@@ -7,6 +7,8 @@ extern void pnv_smp_init(void);
static inline void pnv_smp_init(void) { }
#endif
+extern void pnv_platform_error_reboot(struct pt_regs *regs, const char *msg) __noreturn;
+
struct pci_dev;
#ifdef CONFIG_PCI
diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c
index 1a9d84371a4d..718f50ed22f1 100644
--- a/arch/powerpc/platforms/powernv/rng.c
+++ b/arch/powerpc/platforms/powernv/rng.c
@@ -16,11 +16,13 @@
#include <linux/slab.h>
#include <linux/smp.h>
#include <asm/archrandom.h>
+#include <asm/cputable.h>
#include <asm/io.h>
#include <asm/prom.h>
#include <asm/machdep.h>
#include <asm/smp.h>
+#define DARN_ERR 0xFFFFFFFFFFFFFFFFul
struct powernv_rng {
void __iomem *regs;
@@ -67,6 +69,41 @@ int powernv_get_random_real_mode(unsigned long *v)
return 1;
}
+int powernv_get_random_darn(unsigned long *v)
+{
+ unsigned long val;
+
+ /* Using DARN with L=1 - 64-bit conditioned random number */
+ asm volatile(PPC_DARN(%0, 1) : "=r"(val));
+
+ if (val == DARN_ERR)
+ return 0;
+
+ *v = val;
+
+ return 1;
+}
+
+static int initialise_darn(void)
+{
+ unsigned long val;
+ int i;
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ return -ENODEV;
+
+ for (i = 0; i < 10; i++) {
+ if (powernv_get_random_darn(&val)) {
+ ppc_md.get_random_seed = powernv_get_random_darn;
+ return 0;
+ }
+ }
+
+ pr_warn("Unable to use DARN for get_random_seed()\n");
+
+ return -EIO;
+}
+
int powernv_get_random_long(unsigned long *v)
{
struct powernv_rng *rng;
@@ -88,7 +125,7 @@ static __init void rng_init_per_cpu(struct powernv_rng *rng,
chip_id = of_get_ibm_chip_id(dn);
if (chip_id == -1)
- pr_warn("No ibm,chip-id found for %s.\n", dn->full_name);
+ pr_warn("No ibm,chip-id found for %pOF.\n", dn);
for_each_possible_cpu(cpu) {
if (per_cpu(powernv_rng, cpu) == NULL ||
@@ -141,8 +178,8 @@ static __init int rng_init(void)
for_each_compatible_node(dn, NULL, "ibm,power-rng") {
rc = rng_create(dn);
if (rc) {
- pr_err("Failed creating rng for %s (%d).\n",
- dn->full_name, rc);
+ pr_err("Failed creating rng for %pOF (%d).\n",
+ dn, rc);
continue;
}
@@ -150,6 +187,8 @@ static __init int rng_init(void)
of_platform_device_create(dn, NULL, NULL);
}
+ initialise_darn();
+
return 0;
}
machine_subsys_initcall(powernv, rng_init);
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index 40dae96f7e20..c17f81e433f7 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -57,7 +57,7 @@ static void pnv_smp_setup_cpu(int cpu)
static int pnv_smp_kick_cpu(int nr)
{
- unsigned int pcpu = get_hard_smp_processor_id(nr);
+ unsigned int pcpu;
unsigned long start_here =
__pa(ppc_function_entry(generic_secondary_smp_init));
long rc;
@@ -66,6 +66,7 @@ static int pnv_smp_kick_cpu(int nr)
if (nr < 0 || nr >= nr_cpu_ids)
return -EINVAL;
+ pcpu = get_hard_smp_processor_id(nr);
/*
* If we already started or OPAL is not supported, we just
* kick the CPU via the PACA
@@ -164,12 +165,6 @@ static void pnv_smp_cpu_kill_self(void)
if (cpu_has_feature(CPU_FTR_ARCH_207S))
wmask = SRR1_WAKEMASK_P8;
- /* We don't want to take decrementer interrupts while we are offline,
- * so clear LPCR:PECE1. We keep PECE2 (and LPCR_PECE_HVEE on P9)
- * enabled as to let IPIs in.
- */
- mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1);
-
while (!generic_check_cpu_restart(cpu)) {
/*
* Clear IPI flag, since we don't handle IPIs while
@@ -219,8 +214,6 @@ static void pnv_smp_cpu_kill_self(void)
}
- /* Re-enable decrementer interrupts */
- mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_PECE1);
DBG("CPU%d coming online...\n", cpu);
}
diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c
new file mode 100644
index 000000000000..5aae845b8cd9
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -0,0 +1,1134 @@
+/*
+ * Copyright 2016-17 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) "vas: " fmt
+
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/log2.h>
+#include <linux/rcupdate.h>
+#include <linux/cred.h>
+
+#include "vas.h"
+#include "copy-paste.h"
+
+/*
+ * Compute the paste address region for the window @window using the
+ * ->paste_base_addr and ->paste_win_id_shift we got from device tree.
+ */
+static void compute_paste_address(struct vas_window *window, u64 *addr, int *len)
+{
+ int winid;
+ u64 base, shift;
+
+ base = window->vinst->paste_base_addr;
+ shift = window->vinst->paste_win_id_shift;
+ winid = window->winid;
+
+ *addr = base + (winid << shift);
+ if (len)
+ *len = PAGE_SIZE;
+
+ pr_debug("Txwin #%d: Paste addr 0x%llx\n", winid, *addr);
+}
+
+static inline void get_hvwc_mmio_bar(struct vas_window *window,
+ u64 *start, int *len)
+{
+ u64 pbaddr;
+
+ pbaddr = window->vinst->hvwc_bar_start;
+ *start = pbaddr + window->winid * VAS_HVWC_SIZE;
+ *len = VAS_HVWC_SIZE;
+}
+
+static inline void get_uwc_mmio_bar(struct vas_window *window,
+ u64 *start, int *len)
+{
+ u64 pbaddr;
+
+ pbaddr = window->vinst->uwc_bar_start;
+ *start = pbaddr + window->winid * VAS_UWC_SIZE;
+ *len = VAS_UWC_SIZE;
+}
+
+/*
+ * Map the paste bus address of the given send window into kernel address
+ * space. Unlike MMIO regions (map_mmio_region() below), paste region must
+ * be mapped cache-able and is only applicable to send windows.
+ */
+static void *map_paste_region(struct vas_window *txwin)
+{
+ int len;
+ void *map;
+ char *name;
+ u64 start;
+
+ name = kasprintf(GFP_KERNEL, "window-v%d-w%d", txwin->vinst->vas_id,
+ txwin->winid);
+ if (!name)
+ goto free_name;
+
+ txwin->paste_addr_name = name;
+ compute_paste_address(txwin, &start, &len);
+
+ if (!request_mem_region(start, len, name)) {
+ pr_devel("%s(): request_mem_region(0x%llx, %d) failed\n",
+ __func__, start, len);
+ goto free_name;
+ }
+
+ map = ioremap_cache(start, len);
+ if (!map) {
+ pr_devel("%s(): ioremap_cache(0x%llx, %d) failed\n", __func__,
+ start, len);
+ goto free_name;
+ }
+
+ pr_devel("Mapped paste addr 0x%llx to kaddr 0x%p\n", start, map);
+ return map;
+
+free_name:
+ kfree(name);
+ return ERR_PTR(-ENOMEM);
+}
+
+static void *map_mmio_region(char *name, u64 start, int len)
+{
+ void *map;
+
+ if (!request_mem_region(start, len, name)) {
+ pr_devel("%s(): request_mem_region(0x%llx, %d) failed\n",
+ __func__, start, len);
+ return NULL;
+ }
+
+ map = ioremap(start, len);
+ if (!map) {
+ pr_devel("%s(): ioremap(0x%llx, %d) failed\n", __func__, start,
+ len);
+ return NULL;
+ }
+
+ return map;
+}
+
+static void unmap_region(void *addr, u64 start, int len)
+{
+ iounmap(addr);
+ release_mem_region((phys_addr_t)start, len);
+}
+
+/*
+ * Unmap the paste address region for a window.
+ */
+static void unmap_paste_region(struct vas_window *window)
+{
+ int len;
+ u64 busaddr_start;
+
+ if (window->paste_kaddr) {
+ compute_paste_address(window, &busaddr_start, &len);
+ unmap_region(window->paste_kaddr, busaddr_start, len);
+ window->paste_kaddr = NULL;
+ kfree(window->paste_addr_name);
+ window->paste_addr_name = NULL;
+ }
+}
+
+/*
+ * Unmap the MMIO regions for a window.
+ */
+static void unmap_winctx_mmio_bars(struct vas_window *window)
+{
+ int len;
+ u64 busaddr_start;
+
+ if (window->hvwc_map) {
+ get_hvwc_mmio_bar(window, &busaddr_start, &len);
+ unmap_region(window->hvwc_map, busaddr_start, len);
+ window->hvwc_map = NULL;
+ }
+
+ if (window->uwc_map) {
+ get_uwc_mmio_bar(window, &busaddr_start, &len);
+ unmap_region(window->uwc_map, busaddr_start, len);
+ window->uwc_map = NULL;
+ }
+}
+
+/*
+ * Find the Hypervisor Window Context (HVWC) MMIO Base Address Region and the
+ * OS/User Window Context (UWC) MMIO Base Address Region for the given window.
+ * Map these bus addresses and save the mapped kernel addresses in @window.
+ */
+int map_winctx_mmio_bars(struct vas_window *window)
+{
+ int len;
+ u64 start;
+
+ get_hvwc_mmio_bar(window, &start, &len);
+ window->hvwc_map = map_mmio_region("HVWCM_Window", start, len);
+
+ get_uwc_mmio_bar(window, &start, &len);
+ window->uwc_map = map_mmio_region("UWCM_Window", start, len);
+
+ if (!window->hvwc_map || !window->uwc_map) {
+ unmap_winctx_mmio_bars(window);
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * Reset all valid registers in the HV and OS/User Window Contexts for
+ * the window identified by @window.
+ *
+ * NOTE: We cannot really use a for loop to reset window context. Not all
+ * offsets in a window context are valid registers and the valid
+ * registers are not sequential. And, we can only write to offsets
+ * with valid registers.
+ */
+void reset_window_regs(struct vas_window *window)
+{
+ write_hvwc_reg(window, VREG(LPID), 0ULL);
+ write_hvwc_reg(window, VREG(PID), 0ULL);
+ write_hvwc_reg(window, VREG(XLATE_MSR), 0ULL);
+ write_hvwc_reg(window, VREG(XLATE_LPCR), 0ULL);
+ write_hvwc_reg(window, VREG(XLATE_CTL), 0ULL);
+ write_hvwc_reg(window, VREG(AMR), 0ULL);
+ write_hvwc_reg(window, VREG(SEIDR), 0ULL);
+ write_hvwc_reg(window, VREG(FAULT_TX_WIN), 0ULL);
+ write_hvwc_reg(window, VREG(OSU_INTR_SRC_RA), 0ULL);
+ write_hvwc_reg(window, VREG(HV_INTR_SRC_RA), 0ULL);
+ write_hvwc_reg(window, VREG(PSWID), 0ULL);
+ write_hvwc_reg(window, VREG(LFIFO_BAR), 0ULL);
+ write_hvwc_reg(window, VREG(LDATA_STAMP_CTL), 0ULL);
+ write_hvwc_reg(window, VREG(LDMA_CACHE_CTL), 0ULL);
+ write_hvwc_reg(window, VREG(LRFIFO_PUSH), 0ULL);
+ write_hvwc_reg(window, VREG(CURR_MSG_COUNT), 0ULL);
+ write_hvwc_reg(window, VREG(LNOTIFY_AFTER_COUNT), 0ULL);
+ write_hvwc_reg(window, VREG(LRX_WCRED), 0ULL);
+ write_hvwc_reg(window, VREG(LRX_WCRED_ADDER), 0ULL);
+ write_hvwc_reg(window, VREG(TX_WCRED), 0ULL);
+ write_hvwc_reg(window, VREG(TX_WCRED_ADDER), 0ULL);
+ write_hvwc_reg(window, VREG(LFIFO_SIZE), 0ULL);
+ write_hvwc_reg(window, VREG(WINCTL), 0ULL);
+ write_hvwc_reg(window, VREG(WIN_STATUS), 0ULL);
+ write_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL), 0ULL);
+ write_hvwc_reg(window, VREG(TX_RSVD_BUF_COUNT), 0ULL);
+ write_hvwc_reg(window, VREG(LRFIFO_WIN_PTR), 0ULL);
+ write_hvwc_reg(window, VREG(LNOTIFY_CTL), 0ULL);
+ write_hvwc_reg(window, VREG(LNOTIFY_PID), 0ULL);
+ write_hvwc_reg(window, VREG(LNOTIFY_LPID), 0ULL);
+ write_hvwc_reg(window, VREG(LNOTIFY_TID), 0ULL);
+ write_hvwc_reg(window, VREG(LNOTIFY_SCOPE), 0ULL);
+ write_hvwc_reg(window, VREG(NX_UTIL_ADDER), 0ULL);
+
+ /* Skip read-only registers: NX_UTIL and NX_UTIL_SE */
+
+ /*
+ * The send and receive window credit adder registers are also
+ * accessible from HVWC and have been initialized above. We don't
+ * need to initialize from the OS/User Window Context, so skip
+ * following calls:
+ *
+ * write_uwc_reg(window, VREG(TX_WCRED_ADDER), 0ULL);
+ * write_uwc_reg(window, VREG(LRX_WCRED_ADDER), 0ULL);
+ */
+}
+
+/*
+ * Initialize window context registers related to Address Translation.
+ * These registers are common to send/receive windows although they
+ * differ for user/kernel windows. As we resolve the TODOs we may
+ * want to add fields to vas_winctx and move the initialization to
+ * init_vas_winctx_regs().
+ */
+static void init_xlate_regs(struct vas_window *window, bool user_win)
+{
+ u64 lpcr, val;
+
+ /*
+ * MSR_TA, MSR_US are false for both kernel and user.
+ * MSR_DR and MSR_PR are false for kernel.
+ */
+ val = 0ULL;
+ val = SET_FIELD(VAS_XLATE_MSR_HV, val, 1);
+ val = SET_FIELD(VAS_XLATE_MSR_SF, val, 1);
+ if (user_win) {
+ val = SET_FIELD(VAS_XLATE_MSR_DR, val, 1);
+ val = SET_FIELD(VAS_XLATE_MSR_PR, val, 1);
+ }
+ write_hvwc_reg(window, VREG(XLATE_MSR), val);
+
+ lpcr = mfspr(SPRN_LPCR);
+ val = 0ULL;
+ /*
+ * NOTE: From Section 5.7.8.1 Segment Lookaside Buffer of the
+ * Power ISA, v3.0B, Page size encoding is 0 = 4KB, 5 = 64KB.
+ *
+ * NOTE: From Section 1.3.1, Address Translation Context of the
+ * Nest MMU Workbook, LPCR_SC should be 0 for Power9.
+ */
+ val = SET_FIELD(VAS_XLATE_LPCR_PAGE_SIZE, val, 5);
+ val = SET_FIELD(VAS_XLATE_LPCR_ISL, val, lpcr & LPCR_ISL);
+ val = SET_FIELD(VAS_XLATE_LPCR_TC, val, lpcr & LPCR_TC);
+ val = SET_FIELD(VAS_XLATE_LPCR_SC, val, 0);
+ write_hvwc_reg(window, VREG(XLATE_LPCR), val);
+
+ /*
+ * Section 1.3.1 (Address translation Context) of NMMU workbook.
+ * 0b00 Hashed Page Table mode
+ * 0b01 Reserved
+ * 0b10 Radix on HPT
+ * 0b11 Radix on Radix
+ */
+ val = 0ULL;
+ val = SET_FIELD(VAS_XLATE_MODE, val, radix_enabled() ? 3 : 2);
+ write_hvwc_reg(window, VREG(XLATE_CTL), val);
+
+ /*
+ * TODO: Can we mfspr(AMR) even for user windows?
+ */
+ val = 0ULL;
+ val = SET_FIELD(VAS_AMR, val, mfspr(SPRN_AMR));
+ write_hvwc_reg(window, VREG(AMR), val);
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_SEIDR, val, 0);
+ write_hvwc_reg(window, VREG(SEIDR), val);
+}
+
+/*
+ * Initialize Reserved Send Buffer Count for the send window. It involves
+ * writing to the register, reading it back to confirm that the hardware
+ * has enough buffers to reserve. See section 1.3.1.2.1 of VAS workbook.
+ *
+ * Since we can only make a best-effort attempt to fulfill the request,
+ * we don't return any errors if we cannot.
+ *
+ * TODO: Reserved (aka dedicated) send buffers are not supported yet.
+ */
+static void init_rsvd_tx_buf_count(struct vas_window *txwin,
+ struct vas_winctx *winctx)
+{
+ write_hvwc_reg(txwin, VREG(TX_RSVD_BUF_COUNT), 0ULL);
+}
+
+/*
+ * init_winctx_regs()
+ * Initialize window context registers for a receive window.
+ * Except for caching control and marking window open, the registers
+ * are initialized in the order listed in Section 3.1.4 (Window Context
+ * Cache Register Details) of the VAS workbook although they don't need
+ * to be.
+ *
+ * Design note: For NX receive windows, NX allocates the FIFO buffer in OPAL
+ * (so that it can get a large contiguous area) and passes that buffer
+ * to kernel via device tree. We now write that buffer address to the
+ * FIFO BAR. Would it make sense to do this all in OPAL? i.e have OPAL
+ * write the per-chip RX FIFO addresses to the windows during boot-up
+ * as a one-time task? That could work for NX but what about other
+ * receivers? Let the receivers tell us the rx-fifo buffers for now.
+ */
+int init_winctx_regs(struct vas_window *window, struct vas_winctx *winctx)
+{
+ u64 val;
+ int fifo_size;
+
+ reset_window_regs(window);
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_LPID, val, winctx->lpid);
+ write_hvwc_reg(window, VREG(LPID), val);
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_PID_ID, val, winctx->pidr);
+ write_hvwc_reg(window, VREG(PID), val);
+
+ init_xlate_regs(window, winctx->user_win);
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_FAULT_TX_WIN, val, 0);
+ write_hvwc_reg(window, VREG(FAULT_TX_WIN), val);
+
+ /* In PowerNV, interrupts go to HV. */
+ write_hvwc_reg(window, VREG(OSU_INTR_SRC_RA), 0ULL);
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_HV_INTR_SRC_RA, val, winctx->irq_port);
+ write_hvwc_reg(window, VREG(HV_INTR_SRC_RA), val);
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_PSWID_EA_HANDLE, val, winctx->pswid);
+ write_hvwc_reg(window, VREG(PSWID), val);
+
+ write_hvwc_reg(window, VREG(SPARE1), 0ULL);
+ write_hvwc_reg(window, VREG(SPARE2), 0ULL);
+ write_hvwc_reg(window, VREG(SPARE3), 0ULL);
+
+ /*
+ * NOTE: VAS expects the FIFO address to be copied into the LFIFO_BAR
+ * register as is - do NOT shift the address into VAS_LFIFO_BAR
+ * bit fields! Ok to set the page migration select fields -
+ * VAS ignores the lower 10+ bits in the address anyway, because
+ * the minimum FIFO size is 1K?
+ *
+ * See also: Design note in function header.
+ */
+ val = __pa(winctx->rx_fifo);
+ val = SET_FIELD(VAS_PAGE_MIGRATION_SELECT, val, 0);
+ write_hvwc_reg(window, VREG(LFIFO_BAR), val);
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_LDATA_STAMP, val, winctx->data_stamp);
+ write_hvwc_reg(window, VREG(LDATA_STAMP_CTL), val);
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_LDMA_TYPE, val, winctx->dma_type);
+ val = SET_FIELD(VAS_LDMA_FIFO_DISABLE, val, winctx->fifo_disable);
+ write_hvwc_reg(window, VREG(LDMA_CACHE_CTL), val);
+
+ write_hvwc_reg(window, VREG(LRFIFO_PUSH), 0ULL);
+ write_hvwc_reg(window, VREG(CURR_MSG_COUNT), 0ULL);
+ write_hvwc_reg(window, VREG(LNOTIFY_AFTER_COUNT), 0ULL);
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_LRX_WCRED, val, winctx->wcreds_max);
+ write_hvwc_reg(window, VREG(LRX_WCRED), val);
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_TX_WCRED, val, winctx->wcreds_max);
+ write_hvwc_reg(window, VREG(TX_WCRED), val);
+
+ write_hvwc_reg(window, VREG(LRX_WCRED_ADDER), 0ULL);
+ write_hvwc_reg(window, VREG(TX_WCRED_ADDER), 0ULL);
+
+ fifo_size = winctx->rx_fifo_size / 1024;
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_LFIFO_SIZE, val, ilog2(fifo_size));
+ write_hvwc_reg(window, VREG(LFIFO_SIZE), val);
+
+ /* Update window control and caching control registers last so
+ * we mark the window open only after fully initializing it and
+ * pushing context to cache.
+ */
+
+ write_hvwc_reg(window, VREG(WIN_STATUS), 0ULL);
+
+ init_rsvd_tx_buf_count(window, winctx);
+
+ /* for a send window, point to the matching receive window */
+ val = 0ULL;
+ val = SET_FIELD(VAS_LRX_WIN_ID, val, winctx->rx_win_id);
+ write_hvwc_reg(window, VREG(LRFIFO_WIN_PTR), val);
+
+ write_hvwc_reg(window, VREG(SPARE4), 0ULL);
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_NOTIFY_DISABLE, val, winctx->notify_disable);
+ val = SET_FIELD(VAS_INTR_DISABLE, val, winctx->intr_disable);
+ val = SET_FIELD(VAS_NOTIFY_EARLY, val, winctx->notify_early);
+ val = SET_FIELD(VAS_NOTIFY_OSU_INTR, val, winctx->notify_os_intr_reg);
+ write_hvwc_reg(window, VREG(LNOTIFY_CTL), val);
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_LNOTIFY_PID, val, winctx->lnotify_pid);
+ write_hvwc_reg(window, VREG(LNOTIFY_PID), val);
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_LNOTIFY_LPID, val, winctx->lnotify_lpid);
+ write_hvwc_reg(window, VREG(LNOTIFY_LPID), val);
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_LNOTIFY_TID, val, winctx->lnotify_tid);
+ write_hvwc_reg(window, VREG(LNOTIFY_TID), val);
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_LNOTIFY_MIN_SCOPE, val, winctx->min_scope);
+ val = SET_FIELD(VAS_LNOTIFY_MAX_SCOPE, val, winctx->max_scope);
+ write_hvwc_reg(window, VREG(LNOTIFY_SCOPE), val);
+
+ /* Skip read-only registers NX_UTIL and NX_UTIL_SE */
+
+ write_hvwc_reg(window, VREG(SPARE5), 0ULL);
+ write_hvwc_reg(window, VREG(NX_UTIL_ADDER), 0ULL);
+ write_hvwc_reg(window, VREG(SPARE6), 0ULL);
+
+ /* Finally, push window context to memory and... */
+ val = 0ULL;
+ val = SET_FIELD(VAS_PUSH_TO_MEM, val, 1);
+ write_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL), val);
+
+ /* ... mark the window open for business */
+ val = 0ULL;
+ val = SET_FIELD(VAS_WINCTL_REJ_NO_CREDIT, val, winctx->rej_no_credit);
+ val = SET_FIELD(VAS_WINCTL_PIN, val, winctx->pin_win);
+ val = SET_FIELD(VAS_WINCTL_TX_WCRED_MODE, val, winctx->tx_wcred_mode);
+ val = SET_FIELD(VAS_WINCTL_RX_WCRED_MODE, val, winctx->rx_wcred_mode);
+ val = SET_FIELD(VAS_WINCTL_TX_WORD_MODE, val, winctx->tx_word_mode);
+ val = SET_FIELD(VAS_WINCTL_RX_WORD_MODE, val, winctx->rx_word_mode);
+ val = SET_FIELD(VAS_WINCTL_FAULT_WIN, val, winctx->fault_win);
+ val = SET_FIELD(VAS_WINCTL_NX_WIN, val, winctx->nx_win);
+ val = SET_FIELD(VAS_WINCTL_OPEN, val, 1);
+ write_hvwc_reg(window, VREG(WINCTL), val);
+
+ return 0;
+}
+
+static DEFINE_SPINLOCK(vas_ida_lock);
+
+static void vas_release_window_id(struct ida *ida, int winid)
+{
+ spin_lock(&vas_ida_lock);
+ ida_remove(ida, winid);
+ spin_unlock(&vas_ida_lock);
+}
+
+static int vas_assign_window_id(struct ida *ida)
+{
+ int rc, winid;
+
+ do {
+ rc = ida_pre_get(ida, GFP_KERNEL);
+ if (!rc)
+ return -EAGAIN;
+
+ spin_lock(&vas_ida_lock);
+ rc = ida_get_new(ida, &winid);
+ spin_unlock(&vas_ida_lock);
+ } while (rc == -EAGAIN);
+
+ if (rc)
+ return rc;
+
+ if (winid > VAS_WINDOWS_PER_CHIP) {
+ pr_err("Too many (%d) open windows\n", winid);
+ vas_release_window_id(ida, winid);
+ return -EAGAIN;
+ }
+
+ return winid;
+}
+
+static void vas_window_free(struct vas_window *window)
+{
+ int winid = window->winid;
+ struct vas_instance *vinst = window->vinst;
+
+ unmap_winctx_mmio_bars(window);
+ kfree(window);
+
+ vas_release_window_id(&vinst->ida, winid);
+}
+
+static struct vas_window *vas_window_alloc(struct vas_instance *vinst)
+{
+ int winid;
+ struct vas_window *window;
+
+ winid = vas_assign_window_id(&vinst->ida);
+ if (winid < 0)
+ return ERR_PTR(winid);
+
+ window = kzalloc(sizeof(*window), GFP_KERNEL);
+ if (!window)
+ goto out_free;
+
+ window->vinst = vinst;
+ window->winid = winid;
+
+ if (map_winctx_mmio_bars(window))
+ goto out_free;
+
+ return window;
+
+out_free:
+ kfree(window);
+ vas_release_window_id(&vinst->ida, winid);
+ return ERR_PTR(-ENOMEM);
+}
+
+static void put_rx_win(struct vas_window *rxwin)
+{
+ /* Better not be a send window! */
+ WARN_ON_ONCE(rxwin->tx_win);
+
+ atomic_dec(&rxwin->num_txwins);
+}
+
+/*
+ * Get the VAS receive window associated with NX engine identified
+ * by @cop and if applicable, @pswid.
+ *
+ * See also function header of set_vinst_win().
+ */
+static struct vas_window *get_vinst_rxwin(struct vas_instance *vinst,
+ enum vas_cop_type cop, u32 pswid)
+{
+ struct vas_window *rxwin;
+
+ mutex_lock(&vinst->mutex);
+
+ if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI)
+ rxwin = vinst->rxwin[cop] ?: ERR_PTR(-EINVAL);
+ else
+ rxwin = ERR_PTR(-EINVAL);
+
+ if (!IS_ERR(rxwin))
+ atomic_inc(&rxwin->num_txwins);
+
+ mutex_unlock(&vinst->mutex);
+
+ return rxwin;
+}
+
+/*
+ * We have two tables of windows in a VAS instance. The first one,
+ * ->windows[], contains all the windows in the instance and allows
+ * looking up a window by its id. It is used to look up send windows
+ * during fault handling and receive windows when pairing user space
+ * send/receive windows.
+ *
+ * The second table, ->rxwin[], contains receive windows that are
+ * associated with NX engines. This table has VAS_COP_TYPE_MAX
+ * entries and is used to look up a receive window by its
+ * coprocessor type.
+ *
+ * Here, we save @window in the ->windows[] table. If it is a receive
+ * window, we also save the window in the ->rxwin[] table.
+ */
+static void set_vinst_win(struct vas_instance *vinst,
+ struct vas_window *window)
+{
+ int id = window->winid;
+
+ mutex_lock(&vinst->mutex);
+
+ /*
+ * There should only be one receive window for a coprocessor type
+ * unless its a user (FTW) window.
+ */
+ if (!window->user_win && !window->tx_win) {
+ WARN_ON_ONCE(vinst->rxwin[window->cop]);
+ vinst->rxwin[window->cop] = window;
+ }
+
+ WARN_ON_ONCE(vinst->windows[id] != NULL);
+ vinst->windows[id] = window;
+
+ mutex_unlock(&vinst->mutex);
+}
+
+/*
+ * Clear this window from the table(s) of windows for this VAS instance.
+ * See also function header of set_vinst_win().
+ */
+static void clear_vinst_win(struct vas_window *window)
+{
+ int id = window->winid;
+ struct vas_instance *vinst = window->vinst;
+
+ mutex_lock(&vinst->mutex);
+
+ if (!window->user_win && !window->tx_win) {
+ WARN_ON_ONCE(!vinst->rxwin[window->cop]);
+ vinst->rxwin[window->cop] = NULL;
+ }
+
+ WARN_ON_ONCE(vinst->windows[id] != window);
+ vinst->windows[id] = NULL;
+
+ mutex_unlock(&vinst->mutex);
+}
+
+static void init_winctx_for_rxwin(struct vas_window *rxwin,
+ struct vas_rx_win_attr *rxattr,
+ struct vas_winctx *winctx)
+{
+ /*
+ * We first zero (memset()) all fields and only set non-zero fields.
+ * Following fields are 0/false but maybe deserve a comment:
+ *
+ * ->notify_os_intr_reg In powerNV, send intrs to HV
+ * ->notify_disable False for NX windows
+ * ->intr_disable False for Fault Windows
+ * ->xtra_write False for NX windows
+ * ->notify_early NA for NX windows
+ * ->rsvd_txbuf_count NA for Rx windows
+ * ->lpid, ->pid, ->tid NA for Rx windows
+ */
+
+ memset(winctx, 0, sizeof(struct vas_winctx));
+
+ winctx->rx_fifo = rxattr->rx_fifo;
+ winctx->rx_fifo_size = rxattr->rx_fifo_size;
+ winctx->wcreds_max = rxattr->wcreds_max ?: VAS_WCREDS_DEFAULT;
+ winctx->pin_win = rxattr->pin_win;
+
+ winctx->nx_win = rxattr->nx_win;
+ winctx->fault_win = rxattr->fault_win;
+ winctx->rx_word_mode = rxattr->rx_win_ord_mode;
+ winctx->tx_word_mode = rxattr->tx_win_ord_mode;
+ winctx->rx_wcred_mode = rxattr->rx_wcred_mode;
+ winctx->tx_wcred_mode = rxattr->tx_wcred_mode;
+
+ if (winctx->nx_win) {
+ winctx->data_stamp = true;
+ winctx->intr_disable = true;
+ winctx->pin_win = true;
+
+ WARN_ON_ONCE(winctx->fault_win);
+ WARN_ON_ONCE(!winctx->rx_word_mode);
+ WARN_ON_ONCE(!winctx->tx_word_mode);
+ WARN_ON_ONCE(winctx->notify_after_count);
+ } else if (winctx->fault_win) {
+ winctx->notify_disable = true;
+ } else if (winctx->user_win) {
+ /*
+ * Section 1.8.1 Low Latency Core-Core Wake up of
+ * the VAS workbook:
+ *
+ * - disable credit checks ([tr]x_wcred_mode = false)
+ * - disable FIFO writes
+ * - enable ASB_Notify, disable interrupt
+ */
+ winctx->fifo_disable = true;
+ winctx->intr_disable = true;
+ winctx->rx_fifo = NULL;
+ }
+
+ winctx->lnotify_lpid = rxattr->lnotify_lpid;
+ winctx->lnotify_pid = rxattr->lnotify_pid;
+ winctx->lnotify_tid = rxattr->lnotify_tid;
+ winctx->pswid = rxattr->pswid;
+ winctx->dma_type = VAS_DMA_TYPE_INJECT;
+ winctx->tc_mode = rxattr->tc_mode;
+
+ winctx->min_scope = VAS_SCOPE_LOCAL;
+ winctx->max_scope = VAS_SCOPE_VECTORED_GROUP;
+}
+
+static bool rx_win_args_valid(enum vas_cop_type cop,
+ struct vas_rx_win_attr *attr)
+{
+ dump_rx_win_attr(attr);
+
+ if (cop >= VAS_COP_TYPE_MAX)
+ return false;
+
+ if (cop != VAS_COP_TYPE_FTW &&
+ attr->rx_fifo_size < VAS_RX_FIFO_SIZE_MIN)
+ return false;
+
+ if (attr->rx_fifo_size > VAS_RX_FIFO_SIZE_MAX)
+ return false;
+
+ if (attr->nx_win) {
+ /* cannot be fault or user window if it is nx */
+ if (attr->fault_win || attr->user_win)
+ return false;
+ /*
+ * Section 3.1.4.32: NX Windows must not disable notification,
+ * and must not enable interrupts or early notification.
+ */
+ if (attr->notify_disable || !attr->intr_disable ||
+ attr->notify_early)
+ return false;
+ } else if (attr->fault_win) {
+ /* cannot be both fault and user window */
+ if (attr->user_win)
+ return false;
+
+ /*
+ * Section 3.1.4.32: Fault windows must disable notification
+ * but not interrupts.
+ */
+ if (!attr->notify_disable || attr->intr_disable)
+ return false;
+
+ } else if (attr->user_win) {
+ /*
+ * User receive windows are only for fast-thread-wakeup
+ * (FTW). They don't need a FIFO and must disable interrupts
+ */
+ if (attr->rx_fifo || attr->rx_fifo_size || !attr->intr_disable)
+ return false;
+ } else {
+ /* Rx window must be one of NX or Fault or User window. */
+ return false;
+ }
+
+ return true;
+}
+
+void vas_init_rx_win_attr(struct vas_rx_win_attr *rxattr, enum vas_cop_type cop)
+{
+ memset(rxattr, 0, sizeof(*rxattr));
+
+ if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI) {
+ rxattr->pin_win = true;
+ rxattr->nx_win = true;
+ rxattr->fault_win = false;
+ rxattr->intr_disable = true;
+ rxattr->rx_wcred_mode = true;
+ rxattr->tx_wcred_mode = true;
+ rxattr->rx_win_ord_mode = true;
+ rxattr->tx_win_ord_mode = true;
+ } else if (cop == VAS_COP_TYPE_FAULT) {
+ rxattr->pin_win = true;
+ rxattr->fault_win = true;
+ rxattr->notify_disable = true;
+ rxattr->rx_wcred_mode = true;
+ rxattr->tx_wcred_mode = true;
+ rxattr->rx_win_ord_mode = true;
+ rxattr->tx_win_ord_mode = true;
+ } else if (cop == VAS_COP_TYPE_FTW) {
+ rxattr->user_win = true;
+ rxattr->intr_disable = true;
+
+ /*
+ * As noted in the VAS Workbook we disable credit checks.
+ * If we enable credit checks in the future, we must also
+ * implement a mechanism to return the user credits or new
+ * paste operations will fail.
+ */
+ }
+}
+EXPORT_SYMBOL_GPL(vas_init_rx_win_attr);
+
+struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop,
+ struct vas_rx_win_attr *rxattr)
+{
+ struct vas_window *rxwin;
+ struct vas_winctx winctx;
+ struct vas_instance *vinst;
+
+ if (!rx_win_args_valid(cop, rxattr))
+ return ERR_PTR(-EINVAL);
+
+ vinst = find_vas_instance(vasid);
+ if (!vinst) {
+ pr_devel("vasid %d not found!\n", vasid);
+ return ERR_PTR(-EINVAL);
+ }
+ pr_devel("Found instance %d\n", vasid);
+
+ rxwin = vas_window_alloc(vinst);
+ if (IS_ERR(rxwin)) {
+ pr_devel("Unable to allocate memory for Rx window\n");
+ return rxwin;
+ }
+
+ rxwin->tx_win = false;
+ rxwin->nx_win = rxattr->nx_win;
+ rxwin->user_win = rxattr->user_win;
+ rxwin->cop = cop;
+ if (rxattr->user_win)
+ rxwin->pid = task_pid_vnr(current);
+
+ init_winctx_for_rxwin(rxwin, rxattr, &winctx);
+ init_winctx_regs(rxwin, &winctx);
+
+ set_vinst_win(vinst, rxwin);
+
+ return rxwin;
+}
+EXPORT_SYMBOL_GPL(vas_rx_win_open);
+
+void vas_init_tx_win_attr(struct vas_tx_win_attr *txattr, enum vas_cop_type cop)
+{
+ memset(txattr, 0, sizeof(*txattr));
+
+ if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI) {
+ txattr->rej_no_credit = false;
+ txattr->rx_wcred_mode = true;
+ txattr->tx_wcred_mode = true;
+ txattr->rx_win_ord_mode = true;
+ txattr->tx_win_ord_mode = true;
+ } else if (cop == VAS_COP_TYPE_FTW) {
+ txattr->user_win = true;
+ }
+}
+EXPORT_SYMBOL_GPL(vas_init_tx_win_attr);
+
+static void init_winctx_for_txwin(struct vas_window *txwin,
+ struct vas_tx_win_attr *txattr,
+ struct vas_winctx *winctx)
+{
+ /*
+ * We first zero all fields and only set non-zero ones. Following
+ * are some fields set to 0/false for the stated reason:
+ *
+ * ->notify_os_intr_reg In powernv, send intrs to HV
+ * ->rsvd_txbuf_count Not supported yet.
+ * ->notify_disable False for NX windows
+ * ->xtra_write False for NX windows
+ * ->notify_early NA for NX windows
+ * ->lnotify_lpid NA for Tx windows
+ * ->lnotify_pid NA for Tx windows
+ * ->lnotify_tid NA for Tx windows
+ * ->tx_win_cred_mode Ignore for now for NX windows
+ * ->rx_win_cred_mode Ignore for now for NX windows
+ */
+ memset(winctx, 0, sizeof(struct vas_winctx));
+
+ winctx->wcreds_max = txattr->wcreds_max ?: VAS_WCREDS_DEFAULT;
+
+ winctx->user_win = txattr->user_win;
+ winctx->nx_win = txwin->rxwin->nx_win;
+ winctx->pin_win = txattr->pin_win;
+
+ winctx->rx_wcred_mode = txattr->rx_wcred_mode;
+ winctx->tx_wcred_mode = txattr->tx_wcred_mode;
+ winctx->rx_word_mode = txattr->rx_win_ord_mode;
+ winctx->tx_word_mode = txattr->tx_win_ord_mode;
+
+ if (winctx->nx_win) {
+ winctx->data_stamp = true;
+ winctx->intr_disable = true;
+ }
+
+ winctx->lpid = txattr->lpid;
+ winctx->pidr = txattr->pidr;
+ winctx->rx_win_id = txwin->rxwin->winid;
+
+ winctx->dma_type = VAS_DMA_TYPE_INJECT;
+ winctx->tc_mode = txattr->tc_mode;
+ winctx->min_scope = VAS_SCOPE_LOCAL;
+ winctx->max_scope = VAS_SCOPE_VECTORED_GROUP;
+
+ winctx->pswid = 0;
+}
+
+static bool tx_win_args_valid(enum vas_cop_type cop,
+ struct vas_tx_win_attr *attr)
+{
+ if (attr->tc_mode != VAS_THRESH_DISABLED)
+ return false;
+
+ if (cop > VAS_COP_TYPE_MAX)
+ return false;
+
+ if (attr->user_win &&
+ (cop != VAS_COP_TYPE_FTW || attr->rsvd_txbuf_count))
+ return false;
+
+ return true;
+}
+
+struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
+ struct vas_tx_win_attr *attr)
+{
+ int rc;
+ struct vas_window *txwin;
+ struct vas_window *rxwin;
+ struct vas_winctx winctx;
+ struct vas_instance *vinst;
+
+ if (!tx_win_args_valid(cop, attr))
+ return ERR_PTR(-EINVAL);
+
+ vinst = find_vas_instance(vasid);
+ if (!vinst) {
+ pr_devel("vasid %d not found!\n", vasid);
+ return ERR_PTR(-EINVAL);
+ }
+
+ rxwin = get_vinst_rxwin(vinst, cop, attr->pswid);
+ if (IS_ERR(rxwin)) {
+ pr_devel("No RxWin for vasid %d, cop %d\n", vasid, cop);
+ return rxwin;
+ }
+
+ txwin = vas_window_alloc(vinst);
+ if (IS_ERR(txwin)) {
+ rc = PTR_ERR(txwin);
+ goto put_rxwin;
+ }
+
+ txwin->tx_win = 1;
+ txwin->rxwin = rxwin;
+ txwin->nx_win = txwin->rxwin->nx_win;
+ txwin->pid = attr->pid;
+ txwin->user_win = attr->user_win;
+
+ init_winctx_for_txwin(txwin, attr, &winctx);
+
+ init_winctx_regs(txwin, &winctx);
+
+ /*
+ * If its a kernel send window, map the window address into the
+ * kernel's address space. For user windows, user must issue an
+ * mmap() to map the window into their address space.
+ *
+ * NOTE: If kernel ever resubmits a user CRB after handling a page
+ * fault, we will need to map this into kernel as well.
+ */
+ if (!txwin->user_win) {
+ txwin->paste_kaddr = map_paste_region(txwin);
+ if (IS_ERR(txwin->paste_kaddr)) {
+ rc = PTR_ERR(txwin->paste_kaddr);
+ goto free_window;
+ }
+ }
+
+ set_vinst_win(vinst, txwin);
+
+ return txwin;
+
+free_window:
+ vas_window_free(txwin);
+
+put_rxwin:
+ put_rx_win(rxwin);
+ return ERR_PTR(rc);
+
+}
+EXPORT_SYMBOL_GPL(vas_tx_win_open);
+
+int vas_copy_crb(void *crb, int offset)
+{
+ return vas_copy(crb, offset);
+}
+EXPORT_SYMBOL_GPL(vas_copy_crb);
+
+#define RMA_LSMP_REPORT_ENABLE PPC_BIT(53)
+int vas_paste_crb(struct vas_window *txwin, int offset, bool re)
+{
+ int rc;
+ void *addr;
+ uint64_t val;
+
+ /*
+ * Only NX windows are supported for now and hardware assumes
+ * report-enable flag is set for NX windows. Ensure software
+ * complies too.
+ */
+ WARN_ON_ONCE(txwin->nx_win && !re);
+
+ addr = txwin->paste_kaddr;
+ if (re) {
+ /*
+ * Set the REPORT_ENABLE bit (equivalent to writing
+ * to 1K offset of the paste address)
+ */
+ val = SET_FIELD(RMA_LSMP_REPORT_ENABLE, 0ULL, 1);
+ addr += val;
+ }
+
+ /*
+ * Map the raw CR value from vas_paste() to an error code (there
+ * is just pass or fail for now though).
+ */
+ rc = vas_paste(addr, offset);
+ if (rc == 2)
+ rc = 0;
+ else
+ rc = -EINVAL;
+
+ print_fifo_msg_count(txwin);
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(vas_paste_crb);
+
+static void poll_window_busy_state(struct vas_window *window)
+{
+ int busy;
+ u64 val;
+
+retry:
+ /*
+ * Poll Window Busy flag
+ */
+ val = read_hvwc_reg(window, VREG(WIN_STATUS));
+ busy = GET_FIELD(VAS_WIN_BUSY, val);
+ if (busy) {
+ val = 0;
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(HZ);
+ goto retry;
+ }
+}
+
+static void poll_window_castout(struct vas_window *window)
+{
+ int cached;
+ u64 val;
+
+ /* Cast window context out of the cache */
+retry:
+ val = read_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL));
+ cached = GET_FIELD(VAS_WIN_CACHE_STATUS, val);
+ if (cached) {
+ val = 0ULL;
+ val = SET_FIELD(VAS_CASTOUT_REQ, val, 1);
+ val = SET_FIELD(VAS_PUSH_TO_MEM, val, 0);
+ write_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL), val);
+
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(HZ);
+ goto retry;
+ }
+}
+
+/*
+ * Close a window.
+ *
+ * See Section 1.12.1 of VAS workbook v1.05 for details on closing window:
+ * - Disable new paste operations (unmap paste address)
+ * - Poll for the "Window Busy" bit to be cleared
+ * - Clear the Open/Enable bit for the Window.
+ * - Poll for return of window Credits (implies FIFO empty for Rx win?)
+ * - Unpin and cast window context out of cache
+ *
+ * Besides the hardware, kernel has some bookkeeping of course.
+ */
+int vas_win_close(struct vas_window *window)
+{
+ u64 val;
+
+ if (!window)
+ return 0;
+
+ if (!window->tx_win && atomic_read(&window->num_txwins) != 0) {
+ pr_devel("Attempting to close an active Rx window!\n");
+ WARN_ON_ONCE(1);
+ return -EBUSY;
+ }
+
+ unmap_paste_region(window);
+
+ clear_vinst_win(window);
+
+ poll_window_busy_state(window);
+
+ /* Unpin window from cache and close it */
+ val = read_hvwc_reg(window, VREG(WINCTL));
+ val = SET_FIELD(VAS_WINCTL_PIN, val, 0);
+ val = SET_FIELD(VAS_WINCTL_OPEN, val, 0);
+ write_hvwc_reg(window, VREG(WINCTL), val);
+
+ poll_window_castout(window);
+
+ /* if send window, drop reference to matching receive window */
+ if (window->tx_win)
+ put_rx_win(window->rxwin);
+
+ vas_window_free(window);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(vas_win_close);
diff --git a/arch/powerpc/platforms/powernv/vas.c b/arch/powerpc/platforms/powernv/vas.c
new file mode 100644
index 000000000000..565a4878fefa
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/vas.c
@@ -0,0 +1,151 @@
+/*
+ * Copyright 2016-17 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) "vas: " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+#include <linux/of_platform.h>
+#include <linux/of_address.h>
+#include <linux/of.h>
+
+#include "vas.h"
+
+static DEFINE_MUTEX(vas_mutex);
+static LIST_HEAD(vas_instances);
+
+static int init_vas_instance(struct platform_device *pdev)
+{
+ int rc, vasid;
+ struct resource *res;
+ struct vas_instance *vinst;
+ struct device_node *dn = pdev->dev.of_node;
+
+ rc = of_property_read_u32(dn, "ibm,vas-id", &vasid);
+ if (rc) {
+ pr_err("No ibm,vas-id property for %s?\n", pdev->name);
+ return -ENODEV;
+ }
+
+ if (pdev->num_resources != 4) {
+ pr_err("Unexpected DT configuration for [%s, %d]\n",
+ pdev->name, vasid);
+ return -ENODEV;
+ }
+
+ vinst = kzalloc(sizeof(*vinst), GFP_KERNEL);
+ if (!vinst)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&vinst->node);
+ ida_init(&vinst->ida);
+ mutex_init(&vinst->mutex);
+ vinst->vas_id = vasid;
+ vinst->pdev = pdev;
+
+ res = &pdev->resource[0];
+ vinst->hvwc_bar_start = res->start;
+
+ res = &pdev->resource[1];
+ vinst->uwc_bar_start = res->start;
+
+ res = &pdev->resource[2];
+ vinst->paste_base_addr = res->start;
+
+ res = &pdev->resource[3];
+ if (res->end > 62) {
+ pr_err("Bad 'paste_win_id_shift' in DT, %llx\n", res->end);
+ goto free_vinst;
+ }
+
+ vinst->paste_win_id_shift = 63 - res->end;
+
+ pr_devel("Initialized instance [%s, %d], paste_base 0x%llx, "
+ "paste_win_id_shift 0x%llx\n", pdev->name, vasid,
+ vinst->paste_base_addr, vinst->paste_win_id_shift);
+
+ mutex_lock(&vas_mutex);
+ list_add(&vinst->node, &vas_instances);
+ mutex_unlock(&vas_mutex);
+
+ dev_set_drvdata(&pdev->dev, vinst);
+
+ return 0;
+
+free_vinst:
+ kfree(vinst);
+ return -ENODEV;
+
+}
+
+/*
+ * Although this is read/used multiple times, it is written to only
+ * during initialization.
+ */
+struct vas_instance *find_vas_instance(int vasid)
+{
+ struct list_head *ent;
+ struct vas_instance *vinst;
+
+ mutex_lock(&vas_mutex);
+ list_for_each(ent, &vas_instances) {
+ vinst = list_entry(ent, struct vas_instance, node);
+ if (vinst->vas_id == vasid) {
+ mutex_unlock(&vas_mutex);
+ return vinst;
+ }
+ }
+ mutex_unlock(&vas_mutex);
+
+ pr_devel("Instance %d not found\n", vasid);
+ return NULL;
+}
+
+static int vas_probe(struct platform_device *pdev)
+{
+ return init_vas_instance(pdev);
+}
+
+static const struct of_device_id powernv_vas_match[] = {
+ { .compatible = "ibm,vas",},
+ {},
+};
+
+static struct platform_driver vas_driver = {
+ .driver = {
+ .name = "vas",
+ .of_match_table = powernv_vas_match,
+ },
+ .probe = vas_probe,
+};
+
+static int __init vas_init(void)
+{
+ int found = 0;
+ struct device_node *dn;
+
+ platform_driver_register(&vas_driver);
+
+ for_each_compatible_node(dn, NULL, "ibm,vas") {
+ of_platform_device_create(dn, NULL, NULL);
+ found++;
+ }
+
+ if (!found)
+ return -ENODEV;
+
+ pr_devel("Found %d instances\n", found);
+
+ return 0;
+}
+device_initcall(vas_init);
diff --git a/arch/powerpc/platforms/powernv/vas.h b/arch/powerpc/platforms/powernv/vas.h
new file mode 100644
index 000000000000..38dee5d50f31
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/vas.h
@@ -0,0 +1,467 @@
+/*
+ * Copyright 2016-17 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _VAS_H
+#define _VAS_H
+#include <linux/atomic.h>
+#include <linux/idr.h>
+#include <asm/vas.h>
+#include <linux/io.h>
+
+/*
+ * Overview of Virtual Accelerator Switchboard (VAS).
+ *
+ * VAS is a hardware "switchboard" that allows senders and receivers to
+ * exchange messages with _minimal_ kernel involvment. The receivers are
+ * typically NX coprocessor engines that perform compression or encryption
+ * in hardware, but receivers can also be other software threads.
+ *
+ * Senders are user/kernel threads that submit compression/encryption or
+ * other requests to the receivers. Senders must format their messages as
+ * Coprocessor Request Blocks (CRB)s and submit them using the "copy" and
+ * "paste" instructions which were introduced in Power9.
+ *
+ * A Power node can have (upto?) 8 Power chips. There is one instance of
+ * VAS in each Power9 chip. Each instance of VAS has 64K windows or ports,
+ * Senders and receivers must each connect to a separate window before they
+ * can exchange messages through the switchboard.
+ *
+ * Each window is described by two types of window contexts:
+ *
+ * Hypervisor Window Context (HVWC) of size VAS_HVWC_SIZE bytes
+ *
+ * OS/User Window Context (UWC) of size VAS_UWC_SIZE bytes.
+ *
+ * A window context can be viewed as a set of 64-bit registers. The settings
+ * in these registers configure/control/determine the behavior of the VAS
+ * hardware when messages are sent/received through the window. The registers
+ * in the HVWC are configured by the kernel while the registers in the UWC can
+ * be configured by the kernel or by the user space application that is using
+ * the window.
+ *
+ * The HVWCs for all windows on a specific instance of VAS are in a contiguous
+ * range of hardware addresses or Base address region (BAR) referred to as the
+ * HVWC BAR for the instance. Similarly the UWCs for all windows on an instance
+ * are referred to as the UWC BAR for the instance.
+ *
+ * The two BARs for each instance are defined Power9 MMIO Ranges spreadsheet
+ * and available to the kernel in the VAS node's "reg" property in the device
+ * tree:
+ *
+ * /proc/device-tree/vasm@.../reg
+ *
+ * (see vas_probe() for details on the reg property).
+ *
+ * The kernel maps the HVWC and UWC BAR regions into the kernel address
+ * space (hvwc_map and uwc_map). The kernel can then access the window
+ * contexts of a specific window using:
+ *
+ * hvwc = hvwc_map + winid * VAS_HVWC_SIZE.
+ * uwc = uwc_map + winid * VAS_UWC_SIZE.
+ *
+ * where winid is the window index (0..64K).
+ *
+ * As mentioned, a window context is used to "configure" a window. Besides
+ * this configuration address, each _send_ window also has a unique hardware
+ * "paste" address that is used to submit requests/CRBs (see vas_paste_crb()).
+ *
+ * The hardware paste address for a window is computed using the "paste
+ * base address" and "paste win id shift" reg properties in the VAS device
+ * tree node using:
+ *
+ * paste_addr = paste_base + ((winid << paste_win_id_shift))
+ *
+ * (again, see vas_probe() for ->paste_base_addr and ->paste_win_id_shift).
+ *
+ * The kernel maps this hardware address into the sender's address space
+ * after which they can use the 'paste' instruction (new in Power9) to
+ * send a message (submit a request aka CRB) to the coprocessor.
+ *
+ * NOTE: In the initial version, senders can only in-kernel drivers/threads.
+ * Support for user space threads will be added in follow-on patches.
+ *
+ * TODO: Do we need to map the UWC into user address space so they can return
+ * credits? Its NA for NX but may be needed for other receive windows.
+ *
+ */
+
+#define VAS_WINDOWS_PER_CHIP (64 << 10)
+
+/*
+ * Hypervisor and OS/USer Window Context sizes
+ */
+#define VAS_HVWC_SIZE 512
+#define VAS_UWC_SIZE PAGE_SIZE
+
+/*
+ * Initial per-process credits.
+ * Max send window credits: 4K-1 (12-bits in VAS_TX_WCRED)
+ * Max receive window credits: 64K-1 (16 bits in VAS_LRX_WCRED)
+ *
+ * TODO: Needs tuning for per-process credits
+ */
+#define VAS_WCREDS_MIN 16
+#define VAS_WCREDS_MAX ((64 << 10) - 1)
+#define VAS_WCREDS_DEFAULT (1 << 10)
+
+/*
+ * VAS Window Context Register Offsets and bitmasks.
+ * See Section 3.1.4 of VAS Work book
+ */
+#define VAS_LPID_OFFSET 0x010
+#define VAS_LPID PPC_BITMASK(0, 11)
+
+#define VAS_PID_OFFSET 0x018
+#define VAS_PID_ID PPC_BITMASK(0, 19)
+
+#define VAS_XLATE_MSR_OFFSET 0x020
+#define VAS_XLATE_MSR_DR PPC_BIT(0)
+#define VAS_XLATE_MSR_TA PPC_BIT(1)
+#define VAS_XLATE_MSR_PR PPC_BIT(2)
+#define VAS_XLATE_MSR_US PPC_BIT(3)
+#define VAS_XLATE_MSR_HV PPC_BIT(4)
+#define VAS_XLATE_MSR_SF PPC_BIT(5)
+
+#define VAS_XLATE_LPCR_OFFSET 0x028
+#define VAS_XLATE_LPCR_PAGE_SIZE PPC_BITMASK(0, 2)
+#define VAS_XLATE_LPCR_ISL PPC_BIT(3)
+#define VAS_XLATE_LPCR_TC PPC_BIT(4)
+#define VAS_XLATE_LPCR_SC PPC_BIT(5)
+
+#define VAS_XLATE_CTL_OFFSET 0x030
+#define VAS_XLATE_MODE PPC_BITMASK(0, 1)
+
+#define VAS_AMR_OFFSET 0x040
+#define VAS_AMR PPC_BITMASK(0, 63)
+
+#define VAS_SEIDR_OFFSET 0x048
+#define VAS_SEIDR PPC_BITMASK(0, 63)
+
+#define VAS_FAULT_TX_WIN_OFFSET 0x050
+#define VAS_FAULT_TX_WIN PPC_BITMASK(48, 63)
+
+#define VAS_OSU_INTR_SRC_RA_OFFSET 0x060
+#define VAS_OSU_INTR_SRC_RA PPC_BITMASK(8, 63)
+
+#define VAS_HV_INTR_SRC_RA_OFFSET 0x070
+#define VAS_HV_INTR_SRC_RA PPC_BITMASK(8, 63)
+
+#define VAS_PSWID_OFFSET 0x078
+#define VAS_PSWID_EA_HANDLE PPC_BITMASK(0, 31)
+
+#define VAS_SPARE1_OFFSET 0x080
+#define VAS_SPARE2_OFFSET 0x088
+#define VAS_SPARE3_OFFSET 0x090
+#define VAS_SPARE4_OFFSET 0x130
+#define VAS_SPARE5_OFFSET 0x160
+#define VAS_SPARE6_OFFSET 0x188
+
+#define VAS_LFIFO_BAR_OFFSET 0x0A0
+#define VAS_LFIFO_BAR PPC_BITMASK(8, 53)
+#define VAS_PAGE_MIGRATION_SELECT PPC_BITMASK(54, 56)
+
+#define VAS_LDATA_STAMP_CTL_OFFSET 0x0A8
+#define VAS_LDATA_STAMP PPC_BITMASK(0, 1)
+#define VAS_XTRA_WRITE PPC_BIT(2)
+
+#define VAS_LDMA_CACHE_CTL_OFFSET 0x0B0
+#define VAS_LDMA_TYPE PPC_BITMASK(0, 1)
+#define VAS_LDMA_FIFO_DISABLE PPC_BIT(2)
+
+#define VAS_LRFIFO_PUSH_OFFSET 0x0B8
+#define VAS_LRFIFO_PUSH PPC_BITMASK(0, 15)
+
+#define VAS_CURR_MSG_COUNT_OFFSET 0x0C0
+#define VAS_CURR_MSG_COUNT PPC_BITMASK(0, 7)
+
+#define VAS_LNOTIFY_AFTER_COUNT_OFFSET 0x0C8
+#define VAS_LNOTIFY_AFTER_COUNT PPC_BITMASK(0, 7)
+
+#define VAS_LRX_WCRED_OFFSET 0x0E0
+#define VAS_LRX_WCRED PPC_BITMASK(0, 15)
+
+#define VAS_LRX_WCRED_ADDER_OFFSET 0x190
+#define VAS_LRX_WCRED_ADDER PPC_BITMASK(0, 15)
+
+#define VAS_TX_WCRED_OFFSET 0x0F0
+#define VAS_TX_WCRED PPC_BITMASK(4, 15)
+
+#define VAS_TX_WCRED_ADDER_OFFSET 0x1A0
+#define VAS_TX_WCRED_ADDER PPC_BITMASK(4, 15)
+
+#define VAS_LFIFO_SIZE_OFFSET 0x100
+#define VAS_LFIFO_SIZE PPC_BITMASK(0, 3)
+
+#define VAS_WINCTL_OFFSET 0x108
+#define VAS_WINCTL_OPEN PPC_BIT(0)
+#define VAS_WINCTL_REJ_NO_CREDIT PPC_BIT(1)
+#define VAS_WINCTL_PIN PPC_BIT(2)
+#define VAS_WINCTL_TX_WCRED_MODE PPC_BIT(3)
+#define VAS_WINCTL_RX_WCRED_MODE PPC_BIT(4)
+#define VAS_WINCTL_TX_WORD_MODE PPC_BIT(5)
+#define VAS_WINCTL_RX_WORD_MODE PPC_BIT(6)
+#define VAS_WINCTL_RSVD_TXBUF PPC_BIT(7)
+#define VAS_WINCTL_THRESH_CTL PPC_BITMASK(8, 9)
+#define VAS_WINCTL_FAULT_WIN PPC_BIT(10)
+#define VAS_WINCTL_NX_WIN PPC_BIT(11)
+
+#define VAS_WIN_STATUS_OFFSET 0x110
+#define VAS_WIN_BUSY PPC_BIT(1)
+
+#define VAS_WIN_CTX_CACHING_CTL_OFFSET 0x118
+#define VAS_CASTOUT_REQ PPC_BIT(0)
+#define VAS_PUSH_TO_MEM PPC_BIT(1)
+#define VAS_WIN_CACHE_STATUS PPC_BIT(4)
+
+#define VAS_TX_RSVD_BUF_COUNT_OFFSET 0x120
+#define VAS_RXVD_BUF_COUNT PPC_BITMASK(58, 63)
+
+#define VAS_LRFIFO_WIN_PTR_OFFSET 0x128
+#define VAS_LRX_WIN_ID PPC_BITMASK(0, 15)
+
+/*
+ * Local Notification Control Register controls what happens in _response_
+ * to a paste command and hence applies only to receive windows.
+ */
+#define VAS_LNOTIFY_CTL_OFFSET 0x138
+#define VAS_NOTIFY_DISABLE PPC_BIT(0)
+#define VAS_INTR_DISABLE PPC_BIT(1)
+#define VAS_NOTIFY_EARLY PPC_BIT(2)
+#define VAS_NOTIFY_OSU_INTR PPC_BIT(3)
+
+#define VAS_LNOTIFY_PID_OFFSET 0x140
+#define VAS_LNOTIFY_PID PPC_BITMASK(0, 19)
+
+#define VAS_LNOTIFY_LPID_OFFSET 0x148
+#define VAS_LNOTIFY_LPID PPC_BITMASK(0, 11)
+
+#define VAS_LNOTIFY_TID_OFFSET 0x150
+#define VAS_LNOTIFY_TID PPC_BITMASK(0, 15)
+
+#define VAS_LNOTIFY_SCOPE_OFFSET 0x158
+#define VAS_LNOTIFY_MIN_SCOPE PPC_BITMASK(0, 1)
+#define VAS_LNOTIFY_MAX_SCOPE PPC_BITMASK(2, 3)
+
+#define VAS_NX_UTIL_OFFSET 0x1B0
+#define VAS_NX_UTIL PPC_BITMASK(0, 63)
+
+/* SE: Side effects */
+#define VAS_NX_UTIL_SE_OFFSET 0x1B8
+#define VAS_NX_UTIL_SE PPC_BITMASK(0, 63)
+
+#define VAS_NX_UTIL_ADDER_OFFSET 0x180
+#define VAS_NX_UTIL_ADDER PPC_BITMASK(32, 63)
+
+/*
+ * Local Notify Scope Control Register. (Receive windows only).
+ */
+enum vas_notify_scope {
+ VAS_SCOPE_LOCAL,
+ VAS_SCOPE_GROUP,
+ VAS_SCOPE_VECTORED_GROUP,
+ VAS_SCOPE_UNUSED,
+};
+
+/*
+ * Local DMA Cache Control Register (Receive windows only).
+ */
+enum vas_dma_type {
+ VAS_DMA_TYPE_INJECT,
+ VAS_DMA_TYPE_WRITE,
+};
+
+/*
+ * Local Notify Scope Control Register. (Receive windows only).
+ * Not applicable to NX receive windows.
+ */
+enum vas_notify_after_count {
+ VAS_NOTIFY_AFTER_256 = 0,
+ VAS_NOTIFY_NONE,
+ VAS_NOTIFY_AFTER_2
+};
+
+/*
+ * One per instance of VAS. Each instance will have a separate set of
+ * receive windows, one per coprocessor type.
+ *
+ * See also function header of set_vinst_win() for details on ->windows[]
+ * and ->rxwin[] tables.
+ */
+struct vas_instance {
+ int vas_id;
+ struct ida ida;
+ struct list_head node;
+ struct platform_device *pdev;
+
+ u64 hvwc_bar_start;
+ u64 uwc_bar_start;
+ u64 paste_base_addr;
+ u64 paste_win_id_shift;
+
+ struct mutex mutex;
+ struct vas_window *rxwin[VAS_COP_TYPE_MAX];
+ struct vas_window *windows[VAS_WINDOWS_PER_CHIP];
+};
+
+/*
+ * In-kernel state a VAS window. One per window.
+ */
+struct vas_window {
+ /* Fields common to send and receive windows */
+ struct vas_instance *vinst;
+ int winid;
+ bool tx_win; /* True if send window */
+ bool nx_win; /* True if NX window */
+ bool user_win; /* True if user space window */
+ void *hvwc_map; /* HV window context */
+ void *uwc_map; /* OS/User window context */
+ pid_t pid; /* Linux process id of owner */
+
+ /* Fields applicable only to send windows */
+ void *paste_kaddr;
+ char *paste_addr_name;
+ struct vas_window *rxwin;
+
+ /* Feilds applicable only to receive windows */
+ enum vas_cop_type cop;
+ atomic_t num_txwins;
+};
+
+/*
+ * Container for the hardware state of a window. One per-window.
+ *
+ * A VAS Window context is a 512-byte area in the hardware that contains
+ * a set of 64-bit registers. Individual bit-fields in these registers
+ * determine the configuration/operation of the hardware. struct vas_winctx
+ * is a container for the register fields in the window context.
+ */
+struct vas_winctx {
+ void *rx_fifo;
+ int rx_fifo_size;
+ int wcreds_max;
+ int rsvd_txbuf_count;
+
+ bool user_win;
+ bool nx_win;
+ bool fault_win;
+ bool rsvd_txbuf_enable;
+ bool pin_win;
+ bool rej_no_credit;
+ bool tx_wcred_mode;
+ bool rx_wcred_mode;
+ bool tx_word_mode;
+ bool rx_word_mode;
+ bool data_stamp;
+ bool xtra_write;
+ bool notify_disable;
+ bool intr_disable;
+ bool fifo_disable;
+ bool notify_early;
+ bool notify_os_intr_reg;
+
+ int lpid;
+ int pidr; /* value from SPRN_PID, not linux pid */
+ int lnotify_lpid;
+ int lnotify_pid;
+ int lnotify_tid;
+ u32 pswid;
+ int rx_win_id;
+ int fault_win_id;
+ int tc_mode;
+
+ u64 irq_port;
+
+ enum vas_dma_type dma_type;
+ enum vas_notify_scope min_scope;
+ enum vas_notify_scope max_scope;
+ enum vas_notify_after_count notify_after_count;
+};
+
+extern struct vas_instance *find_vas_instance(int vasid);
+
+/*
+ * VREG(x):
+ * Expand a register's short name (eg: LPID) into two parameters:
+ * - the register's short name in string form ("LPID"), and
+ * - the name of the macro (eg: VAS_LPID_OFFSET), defining the
+ * register's offset in the window context
+ */
+#define VREG_SFX(n, s) __stringify(n), VAS_##n##s
+#define VREG(r) VREG_SFX(r, _OFFSET)
+
+#ifdef vas_debug
+static inline void dump_rx_win_attr(struct vas_rx_win_attr *attr)
+{
+ pr_err("fault %d, notify %d, intr %d early %d\n",
+ attr->fault_win, attr->notify_disable,
+ attr->intr_disable, attr->notify_early);
+
+ pr_err("rx_fifo_size %d, max value %d\n",
+ attr->rx_fifo_size, VAS_RX_FIFO_SIZE_MAX);
+}
+
+static inline void vas_log_write(struct vas_window *win, char *name,
+ void *regptr, u64 val)
+{
+ if (val)
+ pr_err("%swin #%d: %s reg %p, val 0x%016llx\n",
+ win->tx_win ? "Tx" : "Rx", win->winid, name,
+ regptr, val);
+}
+
+#else /* vas_debug */
+
+#define vas_log_write(win, name, reg, val)
+#define dump_rx_win_attr(attr)
+
+#endif /* vas_debug */
+
+static inline void write_uwc_reg(struct vas_window *win, char *name,
+ s32 reg, u64 val)
+{
+ void *regptr;
+
+ regptr = win->uwc_map + reg;
+ vas_log_write(win, name, regptr, val);
+
+ out_be64(regptr, val);
+}
+
+static inline void write_hvwc_reg(struct vas_window *win, char *name,
+ s32 reg, u64 val)
+{
+ void *regptr;
+
+ regptr = win->hvwc_map + reg;
+ vas_log_write(win, name, regptr, val);
+
+ out_be64(regptr, val);
+}
+
+static inline u64 read_hvwc_reg(struct vas_window *win,
+ char *name __maybe_unused, s32 reg)
+{
+ return in_be64(win->hvwc_map+reg);
+}
+
+#ifdef vas_debug
+
+static void print_fifo_msg_count(struct vas_window *txwin)
+{
+ uint64_t read_hvwc_reg(struct vas_window *w, char *n, uint64_t o);
+ pr_devel("Winid %d, Msg count %llu\n", txwin->winid,
+ (uint64_t)read_hvwc_reg(txwin, VREG(LRFIFO_PUSH)));
+}
+#else /* vas_debug */
+
+#define print_fifo_msg_count(window)
+
+#endif /* vas_debug */
+
+#endif /* _VAS_H */