aboutsummaryrefslogtreecommitdiff
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/entry_64.S25
-rw-r--r--arch/powerpc/kernel/ftrace.c69
-rw-r--r--arch/powerpc/kernel/iommu.c197
-rw-r--r--arch/powerpc/kernel/pci-common.c1
-rw-r--r--arch/powerpc/kernel/pci_of_scan.c1
-rw-r--r--arch/powerpc/kernel/smp.c2
-rw-r--r--arch/powerpc/kernel/vio.c6
7 files changed, 210 insertions, 91 deletions
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 5971c85df13..cf38a17ab28 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -197,7 +197,16 @@ syscall_exit:
wrteei 0
#else
ld r10,PACAKMSR(r13)
- mtmsrd r10,1
+ /*
+ * For performance reasons we clear RI the same time that we
+ * clear EE. We only need to clear RI just before we restore r13
+ * below, but batching it with EE saves us one expensive mtmsrd call.
+ * We have to be careful to restore RI if we branch anywhere from
+ * here (eg syscall_exit_work).
+ */
+ li r9,MSR_RI
+ andc r11,r10,r9
+ mtmsrd r11,1
#endif /* CONFIG_PPC_BOOK3E */
ld r9,TI_FLAGS(r12)
@@ -214,17 +223,6 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
andi. r6,r8,MSR_PR
ld r4,_LINK(r1)
- /*
- * Clear RI before restoring r13. If we are returning to
- * userspace and we take an exception after restoring r13,
- * we end up corrupting the userspace r13 value.
- */
-#ifdef CONFIG_PPC_BOOK3S
- /* No MSR:RI on BookE */
- li r12,MSR_RI
- andc r11,r10,r12
- mtmsrd r11,1 /* clear MSR.RI */
-#endif /* CONFIG_PPC_BOOK3S */
beq- 1f
ACCOUNT_CPU_USER_EXIT(r11, r12)
@@ -271,6 +269,9 @@ syscall_enosys:
b syscall_exit
syscall_exit_work:
+#ifdef CONFIG_PPC_BOOK3S
+ mtmsrd r10,1 /* Restore RI */
+#endif
/* If TIF_RESTOREALL is set, don't scribble on either r3 or ccr.
If TIF_NOERROR is set, just save r3 as it is. */
diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
index bf99cfa6bbf..6f33296a057 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/ftrace.c
@@ -63,11 +63,9 @@ ftrace_modify_code(unsigned long ip, unsigned int old, unsigned int new)
return -EINVAL;
/* replace the text with the new text */
- if (probe_kernel_write((void *)ip, &new, MCOUNT_INSN_SIZE))
+ if (patch_instruction((unsigned int *)ip, new))
return -EPERM;
- flush_icache_range(ip, ip + 8);
-
return 0;
}
@@ -212,12 +210,9 @@ __ftrace_make_nop(struct module *mod,
*/
op = 0x48000008; /* b +8 */
- if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE))
+ if (patch_instruction((unsigned int *)ip, op))
return -EPERM;
-
- flush_icache_range(ip, ip + 8);
-
return 0;
}
@@ -286,11 +281,9 @@ __ftrace_make_nop(struct module *mod,
op = PPC_INST_NOP;
- if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE))
+ if (patch_instruction((unsigned int *)ip, op))
return -EPERM;
- flush_icache_range(ip, ip + 8);
-
return 0;
}
#endif /* PPC64 */
@@ -426,11 +419,9 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
pr_devel("write to %lx\n", rec->ip);
- if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE))
+ if (patch_instruction((unsigned int *)ip, op))
return -EPERM;
- flush_icache_range(ip, ip + 8);
-
return 0;
}
#endif /* CONFIG_PPC64 */
@@ -484,6 +475,58 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
return ret;
}
+static int __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
+{
+ unsigned long ftrace_addr = (unsigned long)FTRACE_ADDR;
+ int ret;
+
+ ret = ftrace_update_record(rec, enable);
+
+ switch (ret) {
+ case FTRACE_UPDATE_IGNORE:
+ return 0;
+ case FTRACE_UPDATE_MAKE_CALL:
+ return ftrace_make_call(rec, ftrace_addr);
+ case FTRACE_UPDATE_MAKE_NOP:
+ return ftrace_make_nop(NULL, rec, ftrace_addr);
+ }
+
+ return 0;
+}
+
+void ftrace_replace_code(int enable)
+{
+ struct ftrace_rec_iter *iter;
+ struct dyn_ftrace *rec;
+ int ret;
+
+ for (iter = ftrace_rec_iter_start(); iter;
+ iter = ftrace_rec_iter_next(iter)) {
+ rec = ftrace_rec_iter_record(iter);
+ ret = __ftrace_replace_code(rec, enable);
+ if (ret) {
+ ftrace_bug(ret, rec->ip);
+ return;
+ }
+ }
+}
+
+void arch_ftrace_update_code(int command)
+{
+ if (command & FTRACE_UPDATE_CALLS)
+ ftrace_replace_code(1);
+ else if (command & FTRACE_DISABLE_CALLS)
+ ftrace_replace_code(0);
+
+ if (command & FTRACE_UPDATE_TRACE_FUNC)
+ ftrace_update_ftrace_func(ftrace_trace_function);
+
+ if (command & FTRACE_START_FUNC_RET)
+ ftrace_enable_ftrace_graph_caller();
+ else if (command & FTRACE_STOP_FUNC_RET)
+ ftrace_disable_ftrace_graph_caller();
+}
+
int __init ftrace_dyn_arch_init(void *data)
{
/* caller expects data to be zero */
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 359f078571c..7bc94da1a83 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -33,6 +33,7 @@
#include <linux/bitmap.h>
#include <linux/iommu-helper.h>
#include <linux/crash_dump.h>
+#include <linux/hash.h>
#include <asm/io.h>
#include <asm/prom.h>
#include <asm/iommu.h>
@@ -58,6 +59,26 @@ static int __init setup_iommu(char *str)
__setup("iommu=", setup_iommu);
+static DEFINE_PER_CPU(unsigned int, iommu_pool_hash);
+
+/*
+ * We precalculate the hash to avoid doing it on every allocation.
+ *
+ * The hash is important to spread CPUs across all the pools. For example,
+ * on a POWER7 with 4 way SMT we want interrupts on the primary threads and
+ * with 4 pools all primary threads would map to the same pool.
+ */
+static int __init setup_iommu_pool_hash(void)
+{
+ unsigned int i;
+
+ for_each_possible_cpu(i)
+ per_cpu(iommu_pool_hash, i) = hash_32(i, IOMMU_POOL_HASHBITS);
+
+ return 0;
+}
+subsys_initcall(setup_iommu_pool_hash);
+
static unsigned long iommu_range_alloc(struct device *dev,
struct iommu_table *tbl,
unsigned long npages,
@@ -71,6 +92,9 @@ static unsigned long iommu_range_alloc(struct device *dev,
int pass = 0;
unsigned long align_mask;
unsigned long boundary_size;
+ unsigned long flags;
+ unsigned int pool_nr;
+ struct iommu_pool *pool;
align_mask = 0xffffffffffffffffl >> (64 - align_order);
@@ -83,36 +107,46 @@ static unsigned long iommu_range_alloc(struct device *dev,
return DMA_ERROR_CODE;
}
- if (handle && *handle)
- start = *handle;
+ /*
+ * We don't need to disable preemption here because any CPU can
+ * safely use any IOMMU pool.
+ */
+ pool_nr = __raw_get_cpu_var(iommu_pool_hash) & (tbl->nr_pools - 1);
+
+ if (largealloc)
+ pool = &(tbl->large_pool);
else
- start = largealloc ? tbl->it_largehint : tbl->it_hint;
+ pool = &(tbl->pools[pool_nr]);
+
+ spin_lock_irqsave(&(pool->lock), flags);
- /* Use only half of the table for small allocs (15 pages or less) */
- limit = largealloc ? tbl->it_size : tbl->it_halfpoint;
+again:
+ if ((pass == 0) && handle && *handle)
+ start = *handle;
+ else
+ start = pool->hint;
- if (largealloc && start < tbl->it_halfpoint)
- start = tbl->it_halfpoint;
+ limit = pool->end;
/* The case below can happen if we have a small segment appended
* to a large, or when the previous alloc was at the very end of
* the available space. If so, go back to the initial start.
*/
if (start >= limit)
- start = largealloc ? tbl->it_largehint : tbl->it_hint;
-
- again:
+ start = pool->start;
if (limit + tbl->it_offset > mask) {
limit = mask - tbl->it_offset + 1;
/* If we're constrained on address range, first try
* at the masked hint to avoid O(n) search complexity,
- * but on second pass, start at 0.
+ * but on second pass, start at 0 in pool 0.
*/
- if ((start & mask) >= limit || pass > 0)
- start = 0;
- else
+ if ((start & mask) >= limit || pass > 0) {
+ pool = &(tbl->pools[0]);
+ start = pool->start;
+ } else {
start &= mask;
+ }
}
if (dev)
@@ -126,16 +160,25 @@ static unsigned long iommu_range_alloc(struct device *dev,
tbl->it_offset, boundary_size >> IOMMU_PAGE_SHIFT,
align_mask);
if (n == -1) {
- if (likely(pass < 2)) {
- /* First failure, just rescan the half of the table.
- * Second failure, rescan the other half of the table.
- */
- start = (largealloc ^ pass) ? tbl->it_halfpoint : 0;
- limit = pass ? tbl->it_size : limit;
+ if (likely(pass == 0)) {
+ /* First try the pool from the start */
+ pool->hint = pool->start;
pass++;
goto again;
+
+ } else if (pass <= tbl->nr_pools) {
+ /* Now try scanning all the other pools */
+ spin_unlock(&(pool->lock));
+ pool_nr = (pool_nr + 1) & (tbl->nr_pools - 1);
+ pool = &tbl->pools[pool_nr];
+ spin_lock(&(pool->lock));
+ pool->hint = pool->start;
+ pass++;
+ goto again;
+
} else {
- /* Third failure, give up */
+ /* Give up */
+ spin_unlock_irqrestore(&(pool->lock), flags);
return DMA_ERROR_CODE;
}
}
@@ -145,10 +188,10 @@ static unsigned long iommu_range_alloc(struct device *dev,
/* Bump the hint to a new block for small allocs. */
if (largealloc) {
/* Don't bump to new block to avoid fragmentation */
- tbl->it_largehint = end;
+ pool->hint = end;
} else {
/* Overflow will be taken care of at the next allocation */
- tbl->it_hint = (end + tbl->it_blocksize - 1) &
+ pool->hint = (end + tbl->it_blocksize - 1) &
~(tbl->it_blocksize - 1);
}
@@ -156,6 +199,8 @@ static unsigned long iommu_range_alloc(struct device *dev,
if (handle)
*handle = end;
+ spin_unlock_irqrestore(&(pool->lock), flags);
+
return n;
}
@@ -165,18 +210,14 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
unsigned long mask, unsigned int align_order,
struct dma_attrs *attrs)
{
- unsigned long entry, flags;
+ unsigned long entry;
dma_addr_t ret = DMA_ERROR_CODE;
int build_fail;
- spin_lock_irqsave(&(tbl->it_lock), flags);
-
entry = iommu_range_alloc(dev, tbl, npages, NULL, mask, align_order);
- if (unlikely(entry == DMA_ERROR_CODE)) {
- spin_unlock_irqrestore(&(tbl->it_lock), flags);
+ if (unlikely(entry == DMA_ERROR_CODE))
return DMA_ERROR_CODE;
- }
entry += tbl->it_offset; /* Offset into real TCE table */
ret = entry << IOMMU_PAGE_SHIFT; /* Set the return dma address */
@@ -193,8 +234,6 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
*/
if (unlikely(build_fail)) {
__iommu_free(tbl, ret, npages);
-
- spin_unlock_irqrestore(&(tbl->it_lock), flags);
return DMA_ERROR_CODE;
}
@@ -202,16 +241,14 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
if (ppc_md.tce_flush)
ppc_md.tce_flush(tbl);
- spin_unlock_irqrestore(&(tbl->it_lock), flags);
-
/* Make sure updates are seen by hardware */
mb();
return ret;
}
-static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
- unsigned int npages)
+static bool iommu_free_check(struct iommu_table *tbl, dma_addr_t dma_addr,
+ unsigned int npages)
{
unsigned long entry, free_entry;
@@ -231,20 +268,57 @@ static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
printk(KERN_INFO "\tindex = 0x%llx\n", (u64)tbl->it_index);
WARN_ON(1);
}
- return;
+
+ return false;
+ }
+
+ return true;
+}
+
+static struct iommu_pool *get_pool(struct iommu_table *tbl,
+ unsigned long entry)
+{
+ struct iommu_pool *p;
+ unsigned long largepool_start = tbl->large_pool.start;
+
+ /* The large pool is the last pool at the top of the table */
+ if (entry >= largepool_start) {
+ p = &tbl->large_pool;
+ } else {
+ unsigned int pool_nr = entry / tbl->poolsize;
+
+ BUG_ON(pool_nr > tbl->nr_pools);
+ p = &tbl->pools[pool_nr];
}
+ return p;
+}
+
+static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
+ unsigned int npages)
+{
+ unsigned long entry, free_entry;
+ unsigned long flags;
+ struct iommu_pool *pool;
+
+ entry = dma_addr >> IOMMU_PAGE_SHIFT;
+ free_entry = entry - tbl->it_offset;
+
+ pool = get_pool(tbl, free_entry);
+
+ if (!iommu_free_check(tbl, dma_addr, npages))
+ return;
+
ppc_md.tce_free(tbl, entry, npages);
+
+ spin_lock_irqsave(&(pool->lock), flags);
bitmap_clear(tbl->it_map, free_entry, npages);
+ spin_unlock_irqrestore(&(pool->lock), flags);
}
static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
unsigned int npages)
{
- unsigned long flags;
-
- spin_lock_irqsave(&(tbl->it_lock), flags);
-
__iommu_free(tbl, dma_addr, npages);
/* Make sure TLB cache is flushed if the HW needs it. We do
@@ -253,8 +327,6 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
*/
if (ppc_md.tce_flush)
ppc_md.tce_flush(tbl);
-
- spin_unlock_irqrestore(&(tbl->it_lock), flags);
}
int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
@@ -263,7 +335,6 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
struct dma_attrs *attrs)
{
dma_addr_t dma_next = 0, dma_addr;
- unsigned long flags;
struct scatterlist *s, *outs, *segstart;
int outcount, incount, i, build_fail = 0;
unsigned int align;
@@ -285,8 +356,6 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
DBG("sg mapping %d elements:\n", nelems);
- spin_lock_irqsave(&(tbl->it_lock), flags);
-
max_seg_size = dma_get_max_seg_size(dev);
for_each_sg(sglist, s, nelems, i) {
unsigned long vaddr, npages, entry, slen;
@@ -369,8 +438,6 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
if (ppc_md.tce_flush)
ppc_md.tce_flush(tbl);
- spin_unlock_irqrestore(&(tbl->it_lock), flags);
-
DBG("mapped %d elements:\n", outcount);
/* For the sake of iommu_unmap_sg, we clear out the length in the
@@ -402,7 +469,6 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
if (s == outs)
break;
}
- spin_unlock_irqrestore(&(tbl->it_lock), flags);
return 0;
}
@@ -412,15 +478,12 @@ void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
struct dma_attrs *attrs)
{
struct scatterlist *sg;
- unsigned long flags;
BUG_ON(direction == DMA_NONE);
if (!tbl)
return;
- spin_lock_irqsave(&(tbl->it_lock), flags);
-
sg = sglist;
while (nelems--) {
unsigned int npages;
@@ -440,8 +503,6 @@ void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
*/
if (ppc_md.tce_flush)
ppc_md.tce_flush(tbl);
-
- spin_unlock_irqrestore(&(tbl->it_lock), flags);
}
static void iommu_table_clear(struct iommu_table *tbl)
@@ -494,9 +555,8 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
unsigned long sz;
static int welcomed = 0;
struct page *page;
-
- /* Set aside 1/4 of the table for large allocations. */
- tbl->it_halfpoint = tbl->it_size * 3 / 4;
+ unsigned int i;
+ struct iommu_pool *p;
/* number of bytes needed for the bitmap */
sz = (tbl->it_size + 7) >> 3;
@@ -515,9 +575,28 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
if (tbl->it_offset == 0)
set_bit(0, tbl->it_map);
- tbl->it_hint = 0;
- tbl->it_largehint = tbl->it_halfpoint;
- spin_lock_init(&tbl->it_lock);
+ /* We only split the IOMMU table if we have 1GB or more of space */
+ if ((tbl->it_size << IOMMU_PAGE_SHIFT) >= (1UL * 1024 * 1024 * 1024))
+ tbl->nr_pools = IOMMU_NR_POOLS;
+ else
+ tbl->nr_pools = 1;
+
+ /* We reserve the top 1/4 of the table for large allocations */
+ tbl->poolsize = (tbl->it_size * 3 / 4) / IOMMU_NR_POOLS;
+
+ for (i = 0; i < IOMMU_NR_POOLS; i++) {
+ p = &tbl->pools[i];
+ spin_lock_init(&(p->lock));
+ p->start = tbl->poolsize * i;
+ p->hint = p->start;
+ p->end = p->start + tbl->poolsize;
+ }
+
+ p = &tbl->large_pool;
+ spin_lock_init(&(p->lock));
+ p->start = tbl->poolsize * i;
+ p->hint = p->start;
+ p->end = tbl->it_size;
iommu_table_clear(tbl);
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 8e78e93c818..0f75bd50040 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1646,7 +1646,6 @@ void __devinit pcibios_scan_phb(struct pci_controller *hose)
pci_free_resource_list(&resources);
return;
}
- bus->secondary = hose->first_busno;
hose->bus = bus;
/* Get probe mode and perform scan */
diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
index 89dde171a6f..d7dd42bd145 100644
--- a/arch/powerpc/kernel/pci_of_scan.c
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -198,7 +198,6 @@ EXPORT_SYMBOL(of_create_pci_dev);
/**
* of_scan_pci_bridge - Set up a PCI bridge and scan for child nodes
- * @node: device tree node of bridge
* @dev: pci_dev structure for the bridge
*
* of_scan_bus() calls this routine for each PCI bridge that it finds, and
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index e4cb34322de..e1417c42155 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -571,7 +571,6 @@ void __devinit start_secondary(void *unused)
if (system_state == SYSTEM_RUNNING)
vdso_data->processorCount++;
#endif
- ipi_call_lock();
notify_cpu_starting(cpu);
set_cpu_online(cpu, true);
/* Update sibling maps */
@@ -601,7 +600,6 @@ void __devinit start_secondary(void *unused)
of_node_put(np);
}
of_node_put(l2_cache);
- ipi_call_unlock();
local_irq_enable();
diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c
index cb87301ccd5..06cbc309b81 100644
--- a/arch/powerpc/kernel/vio.c
+++ b/arch/powerpc/kernel/vio.c
@@ -625,7 +625,7 @@ struct dma_map_ops vio_dma_mapping_ops = {
* vio_cmo_set_dev_desired - Set desired entitlement for a device
*
* @viodev: struct vio_dev for device to alter
- * @new_desired: new desired entitlement level in bytes
+ * @desired: new desired entitlement level in bytes
*
* For use by devices to request a change to their entitlement at runtime or
* through sysfs. The desired entitlement level is changed and a balancing
@@ -1262,7 +1262,7 @@ static int vio_bus_remove(struct device *dev)
/**
* vio_register_driver: - Register a new vio driver
- * @drv: The vio_driver structure to be registered.
+ * @viodrv: The vio_driver structure to be registered.
*/
int __vio_register_driver(struct vio_driver *viodrv, struct module *owner,
const char *mod_name)
@@ -1282,7 +1282,7 @@ EXPORT_SYMBOL(__vio_register_driver);
/**
* vio_unregister_driver - Remove registration of vio driver.
- * @driver: The vio_driver struct to be removed form registration
+ * @viodrv: The vio_driver struct to be removed form registration
*/
void vio_unregister_driver(struct vio_driver *viodrv)
{