aboutsummaryrefslogtreecommitdiff
path: root/arch/ppc/lib
diff options
context:
space:
mode:
Diffstat (limited to 'arch/ppc/lib')
-rw-r--r--arch/ppc/lib/Makefile9
-rw-r--r--arch/ppc/lib/checksum.S225
-rw-r--r--arch/ppc/lib/dec_and_lock.c46
-rw-r--r--arch/ppc/lib/div64.S58
-rw-r--r--arch/ppc/lib/locks.c190
-rw-r--r--arch/ppc/lib/rheap.c693
-rw-r--r--arch/ppc/lib/strcase.c23
-rw-r--r--arch/ppc/lib/string.S716
8 files changed, 1960 insertions, 0 deletions
diff --git a/arch/ppc/lib/Makefile b/arch/ppc/lib/Makefile
new file mode 100644
index 000000000000..1c380e67d435
--- /dev/null
+++ b/arch/ppc/lib/Makefile
@@ -0,0 +1,9 @@
+#
+# Makefile for ppc-specific library files..
+#
+
+obj-y := checksum.o string.o strcase.o dec_and_lock.o div64.o
+
+obj-$(CONFIG_SMP) += locks.o
+obj-$(CONFIG_8xx) += rheap.o
+obj-$(CONFIG_CPM2) += rheap.o
diff --git a/arch/ppc/lib/checksum.S b/arch/ppc/lib/checksum.S
new file mode 100644
index 000000000000..7874e8a80455
--- /dev/null
+++ b/arch/ppc/lib/checksum.S
@@ -0,0 +1,225 @@
+/*
+ * This file contains assembly-language implementations
+ * of IP-style 1's complement checksum routines.
+ *
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
+ */
+
+#include <linux/sys.h>
+#include <asm/processor.h>
+#include <asm/errno.h>
+#include <asm/ppc_asm.h>
+
+ .text
+
+/*
+ * ip_fast_csum(buf, len) -- Optimized for IP header
+ * len is in words and is always >= 5.
+ */
+_GLOBAL(ip_fast_csum)
+ lwz r0,0(r3)
+ lwzu r5,4(r3)
+ addic. r4,r4,-2
+ addc r0,r0,r5
+ mtctr r4
+ blelr-
+1: lwzu r4,4(r3)
+ adde r0,r0,r4
+ bdnz 1b
+ addze r0,r0 /* add in final carry */
+ rlwinm r3,r0,16,0,31 /* fold two halves together */
+ add r3,r0,r3
+ not r3,r3
+ srwi r3,r3,16
+ blr
+
+/*
+ * Compute checksum of TCP or UDP pseudo-header:
+ * csum_tcpudp_magic(saddr, daddr, len, proto, sum)
+ */
+_GLOBAL(csum_tcpudp_magic)
+ rlwimi r5,r6,16,0,15 /* put proto in upper half of len */
+ addc r0,r3,r4 /* add 4 32-bit words together */
+ adde r0,r0,r5
+ adde r0,r0,r7
+ addze r0,r0 /* add in final carry */
+ rlwinm r3,r0,16,0,31 /* fold two halves together */
+ add r3,r0,r3
+ not r3,r3
+ srwi r3,r3,16
+ blr
+
+/*
+ * computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * csum_partial(buff, len, sum)
+ */
+_GLOBAL(csum_partial)
+ addic r0,r5,0
+ subi r3,r3,4
+ srwi. r6,r4,2
+ beq 3f /* if we're doing < 4 bytes */
+ andi. r5,r3,2 /* Align buffer to longword boundary */
+ beq+ 1f
+ lhz r5,4(r3) /* do 2 bytes to get aligned */
+ addi r3,r3,2
+ subi r4,r4,2
+ addc r0,r0,r5
+ srwi. r6,r4,2 /* # words to do */
+ beq 3f
+1: mtctr r6
+2: lwzu r5,4(r3) /* the bdnz has zero overhead, so it should */
+ adde r0,r0,r5 /* be unnecessary to unroll this loop */
+ bdnz 2b
+ andi. r4,r4,3
+3: cmpwi 0,r4,2
+ blt+ 4f
+ lhz r5,4(r3)
+ addi r3,r3,2
+ subi r4,r4,2
+ adde r0,r0,r5
+4: cmpwi 0,r4,1
+ bne+ 5f
+ lbz r5,4(r3)
+ slwi r5,r5,8 /* Upper byte of word */
+ adde r0,r0,r5
+5: addze r3,r0 /* add in final carry */
+ blr
+
+/*
+ * Computes the checksum of a memory block at src, length len,
+ * and adds in "sum" (32-bit), while copying the block to dst.
+ * If an access exception occurs on src or dst, it stores -EFAULT
+ * to *src_err or *dst_err respectively, and (for an error on
+ * src) zeroes the rest of dst.
+ *
+ * csum_partial_copy_generic(src, dst, len, sum, src_err, dst_err)
+ */
+_GLOBAL(csum_partial_copy_generic)
+ addic r0,r6,0
+ subi r3,r3,4
+ subi r4,r4,4
+ srwi. r6,r5,2
+ beq 3f /* if we're doing < 4 bytes */
+ andi. r9,r4,2 /* Align dst to longword boundary */
+ beq+ 1f
+81: lhz r6,4(r3) /* do 2 bytes to get aligned */
+ addi r3,r3,2
+ subi r5,r5,2
+91: sth r6,4(r4)
+ addi r4,r4,2
+ addc r0,r0,r6
+ srwi. r6,r5,2 /* # words to do */
+ beq 3f
+1: srwi. r6,r5,4 /* # groups of 4 words to do */
+ beq 10f
+ mtctr r6
+71: lwz r6,4(r3)
+72: lwz r9,8(r3)
+73: lwz r10,12(r3)
+74: lwzu r11,16(r3)
+ adde r0,r0,r6
+75: stw r6,4(r4)
+ adde r0,r0,r9
+76: stw r9,8(r4)
+ adde r0,r0,r10
+77: stw r10,12(r4)
+ adde r0,r0,r11
+78: stwu r11,16(r4)
+ bdnz 71b
+10: rlwinm. r6,r5,30,30,31 /* # words left to do */
+ beq 13f
+ mtctr r6
+82: lwzu r9,4(r3)
+92: stwu r9,4(r4)
+ adde r0,r0,r9
+ bdnz 82b
+13: andi. r5,r5,3
+3: cmpwi 0,r5,2
+ blt+ 4f
+83: lhz r6,4(r3)
+ addi r3,r3,2
+ subi r5,r5,2
+93: sth r6,4(r4)
+ addi r4,r4,2
+ adde r0,r0,r6
+4: cmpwi 0,r5,1
+ bne+ 5f
+84: lbz r6,4(r3)
+94: stb r6,4(r4)
+ slwi r6,r6,8 /* Upper byte of word */
+ adde r0,r0,r6
+5: addze r3,r0 /* add in final carry */
+ blr
+
+/* These shouldn't go in the fixup section, since that would
+ cause the ex_table addresses to get out of order. */
+
+src_error_4:
+ mfctr r6 /* update # bytes remaining from ctr */
+ rlwimi r5,r6,4,0,27
+ b 79f
+src_error_1:
+ li r6,0
+ subi r5,r5,2
+95: sth r6,4(r4)
+ addi r4,r4,2
+79: srwi. r6,r5,2
+ beq 3f
+ mtctr r6
+src_error_2:
+ li r6,0
+96: stwu r6,4(r4)
+ bdnz 96b
+3: andi. r5,r5,3
+ beq src_error
+src_error_3:
+ li r6,0
+ mtctr r5
+ addi r4,r4,3
+97: stbu r6,1(r4)
+ bdnz 97b
+src_error:
+ cmpwi 0,r7,0
+ beq 1f
+ li r6,-EFAULT
+ stw r6,0(r7)
+1: addze r3,r0
+ blr
+
+dst_error:
+ cmpwi 0,r8,0
+ beq 1f
+ li r6,-EFAULT
+ stw r6,0(r8)
+1: addze r3,r0
+ blr
+
+.section __ex_table,"a"
+ .long 81b,src_error_1
+ .long 91b,dst_error
+ .long 71b,src_error_4
+ .long 72b,src_error_4
+ .long 73b,src_error_4
+ .long 74b,src_error_4
+ .long 75b,dst_error
+ .long 76b,dst_error
+ .long 77b,dst_error
+ .long 78b,dst_error
+ .long 82b,src_error_2
+ .long 92b,dst_error
+ .long 83b,src_error_3
+ .long 93b,dst_error
+ .long 84b,src_error_3
+ .long 94b,dst_error
+ .long 95b,dst_error
+ .long 96b,dst_error
+ .long 97b,dst_error
diff --git a/arch/ppc/lib/dec_and_lock.c b/arch/ppc/lib/dec_and_lock.c
new file mode 100644
index 000000000000..4ee888070d91
--- /dev/null
+++ b/arch/ppc/lib/dec_and_lock.c
@@ -0,0 +1,46 @@
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <asm/atomic.h>
+#include <asm/system.h>
+
+/*
+ * This is an implementation of the notion of "decrement a
+ * reference count, and return locked if it decremented to zero".
+ *
+ * This implementation can be used on any architecture that
+ * has a cmpxchg, and where atomic->value is an int holding
+ * the value of the atomic (i.e. the high bits aren't used
+ * for a lock or anything like that).
+ *
+ * N.B. ATOMIC_DEC_AND_LOCK gets defined in include/linux/spinlock.h
+ * if spinlocks are empty and thus atomic_dec_and_lock is defined
+ * to be atomic_dec_and_test - in that case we don't need it
+ * defined here as well.
+ */
+
+#ifndef ATOMIC_DEC_AND_LOCK
+int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock)
+{
+ int counter;
+ int newcount;
+
+ for (;;) {
+ counter = atomic_read(atomic);
+ newcount = counter - 1;
+ if (!newcount)
+ break; /* do it the slow way */
+
+ newcount = cmpxchg(&atomic->counter, counter, newcount);
+ if (newcount == counter)
+ return 0;
+ }
+
+ spin_lock(lock);
+ if (atomic_dec_and_test(atomic))
+ return 1;
+ spin_unlock(lock);
+ return 0;
+}
+
+EXPORT_SYMBOL(_atomic_dec_and_lock);
+#endif /* ATOMIC_DEC_AND_LOCK */
diff --git a/arch/ppc/lib/div64.S b/arch/ppc/lib/div64.S
new file mode 100644
index 000000000000..3527569e9926
--- /dev/null
+++ b/arch/ppc/lib/div64.S
@@ -0,0 +1,58 @@
+/*
+ * Divide a 64-bit unsigned number by a 32-bit unsigned number.
+ * This routine assumes that the top 32 bits of the dividend are
+ * non-zero to start with.
+ * On entry, r3 points to the dividend, which get overwritten with
+ * the 64-bit quotient, and r4 contains the divisor.
+ * On exit, r3 contains the remainder.
+ *
+ * Copyright (C) 2002 Paul Mackerras, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <asm/ppc_asm.h>
+#include <asm/processor.h>
+
+_GLOBAL(__div64_32)
+ lwz r5,0(r3) # get the dividend into r5/r6
+ lwz r6,4(r3)
+ cmplw r5,r4
+ li r7,0
+ li r8,0
+ blt 1f
+ divwu r7,r5,r4 # if dividend.hi >= divisor,
+ mullw r0,r7,r4 # quotient.hi = dividend.hi / divisor
+ subf. r5,r0,r5 # dividend.hi %= divisor
+ beq 3f
+1: mr r11,r5 # here dividend.hi != 0
+ andis. r0,r5,0xc000
+ bne 2f
+ cntlzw r0,r5 # we are shifting the dividend right
+ li r10,-1 # to make it < 2^32, and shifting
+ srw r10,r10,r0 # the divisor right the same amount,
+ add r9,r4,r10 # rounding up (so the estimate cannot
+ andc r11,r6,r10 # ever be too large, only too small)
+ andc r9,r9,r10
+ or r11,r5,r11
+ rotlw r9,r9,r0
+ rotlw r11,r11,r0
+ divwu r11,r11,r9 # then we divide the shifted quantities
+2: mullw r10,r11,r4 # to get an estimate of the quotient,
+ mulhwu r9,r11,r4 # multiply the estimate by the divisor,
+ subfc r6,r10,r6 # take the product from the divisor,
+ add r8,r8,r11 # and add the estimate to the accumulated
+ subfe. r5,r9,r5 # quotient
+ bne 1b
+3: cmplw r6,r4
+ blt 4f
+ divwu r0,r6,r4 # perform the remaining 32-bit division
+ mullw r10,r0,r4 # and get the remainder
+ add r8,r8,r0
+ subf r6,r10,r6
+4: stw r7,0(r3) # return the quotient in *r3
+ stw r8,4(r3)
+ mr r3,r6 # return the remainder in r3
+ blr
diff --git a/arch/ppc/lib/locks.c b/arch/ppc/lib/locks.c
new file mode 100644
index 000000000000..694163d696d8
--- /dev/null
+++ b/arch/ppc/lib/locks.c
@@ -0,0 +1,190 @@
+/*
+ * Locks for smp ppc
+ *
+ * Written by Cort Dougan (cort@cs.nmt.edu)
+ */
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <asm/ppc_asm.h>
+#include <asm/smp.h>
+
+#ifdef CONFIG_DEBUG_SPINLOCK
+
+#undef INIT_STUCK
+#define INIT_STUCK 200000000 /*0xffffffff*/
+
+/*
+ * Try to acquire a spinlock.
+ * Only does the stwcx. if the load returned 0 - the Programming
+ * Environments Manual suggests not doing unnecessary stcwx.'s
+ * since they may inhibit forward progress by other CPUs in getting
+ * a lock.
+ */
+static inline unsigned long __spin_trylock(volatile unsigned long *lock)
+{
+ unsigned long ret;
+
+ __asm__ __volatile__ ("\n\
+1: lwarx %0,0,%1\n\
+ cmpwi 0,%0,0\n\
+ bne 2f\n"
+ PPC405_ERR77(0,%1)
+" stwcx. %2,0,%1\n\
+ bne- 1b\n\
+ isync\n\
+2:"
+ : "=&r"(ret)
+ : "r"(lock), "r"(1)
+ : "cr0", "memory");
+
+ return ret;
+}
+
+void _raw_spin_lock(spinlock_t *lock)
+{
+ int cpu = smp_processor_id();
+ unsigned int stuck = INIT_STUCK;
+ while (__spin_trylock(&lock->lock)) {
+ while ((unsigned volatile long)lock->lock != 0) {
+ if (!--stuck) {
+ printk("_spin_lock(%p) CPU#%d NIP %p"
+ " holder: cpu %ld pc %08lX\n",
+ lock, cpu, __builtin_return_address(0),
+ lock->owner_cpu,lock->owner_pc);
+ stuck = INIT_STUCK;
+ /* steal the lock */
+ /*xchg_u32((void *)&lock->lock,0);*/
+ }
+ }
+ }
+ lock->owner_pc = (unsigned long)__builtin_return_address(0);
+ lock->owner_cpu = cpu;
+}
+EXPORT_SYMBOL(_raw_spin_lock);
+
+int _raw_spin_trylock(spinlock_t *lock)
+{
+ if (__spin_trylock(&lock->lock))
+ return 0;
+ lock->owner_cpu = smp_processor_id();
+ lock->owner_pc = (unsigned long)__builtin_return_address(0);
+ return 1;
+}
+EXPORT_SYMBOL(_raw_spin_trylock);
+
+void _raw_spin_unlock(spinlock_t *lp)
+{
+ if ( !lp->lock )
+ printk("_spin_unlock(%p): no lock cpu %d curr PC %p %s/%d\n",
+ lp, smp_processor_id(), __builtin_return_address(0),
+ current->comm, current->pid);
+ if ( lp->owner_cpu != smp_processor_id() )
+ printk("_spin_unlock(%p): cpu %d trying clear of cpu %d pc %lx val %lx\n",
+ lp, smp_processor_id(), (int)lp->owner_cpu,
+ lp->owner_pc,lp->lock);
+ lp->owner_pc = lp->owner_cpu = 0;
+ wmb();
+ lp->lock = 0;
+}
+EXPORT_SYMBOL(_raw_spin_unlock);
+
+/*
+ * For rwlocks, zero is unlocked, -1 is write-locked,
+ * positive is read-locked.
+ */
+static __inline__ int __read_trylock(rwlock_t *rw)
+{
+ signed int tmp;
+
+ __asm__ __volatile__(
+"2: lwarx %0,0,%1 # __read_trylock\n\
+ addic. %0,%0,1\n\
+ ble- 1f\n"
+ PPC405_ERR77(0,%1)
+" stwcx. %0,0,%1\n\
+ bne- 2b\n\
+ isync\n\
+1:"
+ : "=&r"(tmp)
+ : "r"(&rw->lock)
+ : "cr0", "memory");
+
+ return tmp;
+}
+
+int _raw_read_trylock(rwlock_t *rw)
+{
+ return __read_trylock(rw) > 0;
+}
+EXPORT_SYMBOL(_raw_read_trylock);
+
+void _raw_read_lock(rwlock_t *rw)
+{
+ unsigned int stuck;
+
+ while (__read_trylock(rw) <= 0) {
+ stuck = INIT_STUCK;
+ while (!read_can_lock(rw)) {
+ if (--stuck == 0) {
+ printk("_read_lock(%p) CPU#%d lock %d\n",
+ rw, _smp_processor_id(), rw->lock);
+ stuck = INIT_STUCK;
+ }
+ }
+ }
+}
+EXPORT_SYMBOL(_raw_read_lock);
+
+void _raw_read_unlock(rwlock_t *rw)
+{
+ if ( rw->lock == 0 )
+ printk("_read_unlock(): %s/%d (nip %08lX) lock %d\n",
+ current->comm,current->pid,current->thread.regs->nip,
+ rw->lock);
+ wmb();
+ atomic_dec((atomic_t *) &(rw)->lock);
+}
+EXPORT_SYMBOL(_raw_read_unlock);
+
+void _raw_write_lock(rwlock_t *rw)
+{
+ unsigned int stuck;
+
+ while (cmpxchg(&rw->lock, 0, -1) != 0) {
+ stuck = INIT_STUCK;
+ while (!write_can_lock(rw)) {
+ if (--stuck == 0) {
+ printk("write_lock(%p) CPU#%d lock %d)\n",
+ rw, _smp_processor_id(), rw->lock);
+ stuck = INIT_STUCK;
+ }
+ }
+ }
+ wmb();
+}
+EXPORT_SYMBOL(_raw_write_lock);
+
+int _raw_write_trylock(rwlock_t *rw)
+{
+ if (cmpxchg(&rw->lock, 0, -1) != 0)
+ return 0;
+ wmb();
+ return 1;
+}
+EXPORT_SYMBOL(_raw_write_trylock);
+
+void _raw_write_unlock(rwlock_t *rw)
+{
+ if (rw->lock >= 0)
+ printk("_write_lock(): %s/%d (nip %08lX) lock %d\n",
+ current->comm,current->pid,current->thread.regs->nip,
+ rw->lock);
+ wmb();
+ rw->lock = 0;
+}
+EXPORT_SYMBOL(_raw_write_unlock);
+
+#endif
diff --git a/arch/ppc/lib/rheap.c b/arch/ppc/lib/rheap.c
new file mode 100644
index 000000000000..42c5de2c898f
--- /dev/null
+++ b/arch/ppc/lib/rheap.c
@@ -0,0 +1,693 @@
+/*
+ * arch/ppc/syslib/rheap.c
+ *
+ * A Remote Heap. Remote means that we don't touch the memory that the
+ * heap points to. Normal heap implementations use the memory they manage
+ * to place their list. We cannot do that because the memory we manage may
+ * have special properties, for example it is uncachable or of different
+ * endianess.
+ *
+ * Author: Pantelis Antoniou <panto@intracom.gr>
+ *
+ * 2004 (c) INTRACOM S.A. Greece. This file is licensed under
+ * the terms of the GNU General Public License version 2. This program
+ * is licensed "as is" without any warranty of any kind, whether express
+ * or implied.
+ */
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+
+#include <asm/rheap.h>
+
+/*
+ * Fixup a list_head, needed when copying lists. If the pointers fall
+ * between s and e, apply the delta. This assumes that
+ * sizeof(struct list_head *) == sizeof(unsigned long *).
+ */
+static inline void fixup(unsigned long s, unsigned long e, int d,
+ struct list_head *l)
+{
+ unsigned long *pp;
+
+ pp = (unsigned long *)&l->next;
+ if (*pp >= s && *pp < e)
+ *pp += d;
+
+ pp = (unsigned long *)&l->prev;
+ if (*pp >= s && *pp < e)
+ *pp += d;
+}
+
+/* Grow the allocated blocks */
+static int grow(rh_info_t * info, int max_blocks)
+{
+ rh_block_t *block, *blk;
+ int i, new_blocks;
+ int delta;
+ unsigned long blks, blke;
+
+ if (max_blocks <= info->max_blocks)
+ return -EINVAL;
+
+ new_blocks = max_blocks - info->max_blocks;
+
+ block = kmalloc(sizeof(rh_block_t) * max_blocks, GFP_KERNEL);
+ if (block == NULL)
+ return -ENOMEM;
+
+ if (info->max_blocks > 0) {
+
+ /* copy old block area */
+ memcpy(block, info->block,
+ sizeof(rh_block_t) * info->max_blocks);
+
+ delta = (char *)block - (char *)info->block;
+
+ /* and fixup list pointers */
+ blks = (unsigned long)info->block;
+ blke = (unsigned long)(info->block + info->max_blocks);
+
+ for (i = 0, blk = block; i < info->max_blocks; i++, blk++)
+ fixup(blks, blke, delta, &blk->list);
+
+ fixup(blks, blke, delta, &info->empty_list);
+ fixup(blks, blke, delta, &info->free_list);
+ fixup(blks, blke, delta, &info->taken_list);
+
+ /* free the old allocated memory */
+ if ((info->flags & RHIF_STATIC_BLOCK) == 0)
+ kfree(info->block);
+ }
+
+ info->block = block;
+ info->empty_slots += new_blocks;
+ info->max_blocks = max_blocks;
+ info->flags &= ~RHIF_STATIC_BLOCK;
+
+ /* add all new blocks to the free list */
+ for (i = 0, blk = block + info->max_blocks; i < new_blocks; i++, blk++)
+ list_add(&blk->list, &info->empty_list);
+
+ return 0;
+}
+
+/*
+ * Assure at least the required amount of empty slots. If this function
+ * causes a grow in the block area then all pointers kept to the block
+ * area are invalid!
+ */
+static int assure_empty(rh_info_t * info, int slots)
+{
+ int max_blocks;
+
+ /* This function is not meant to be used to grow uncontrollably */
+ if (slots >= 4)
+ return -EINVAL;
+
+ /* Enough space */
+ if (info->empty_slots >= slots)
+ return 0;
+
+ /* Next 16 sized block */
+ max_blocks = ((info->max_blocks + slots) + 15) & ~15;
+
+ return grow(info, max_blocks);
+}
+
+static rh_block_t *get_slot(rh_info_t * info)
+{
+ rh_block_t *blk;
+
+ /* If no more free slots, and failure to extend. */
+ /* XXX: You should have called assure_empty before */
+ if (info->empty_slots == 0) {
+ printk(KERN_ERR "rh: out of slots; crash is imminent.\n");
+ return NULL;
+ }
+
+ /* Get empty slot to use */
+ blk = list_entry(info->empty_list.next, rh_block_t, list);
+ list_del_init(&blk->list);
+ info->empty_slots--;
+
+ /* Initialize */
+ blk->start = NULL;
+ blk->size = 0;
+ blk->owner = NULL;
+
+ return blk;
+}
+
+static inline void release_slot(rh_info_t * info, rh_block_t * blk)
+{
+ list_add(&blk->list, &info->empty_list);
+ info->empty_slots++;
+}
+
+static void attach_free_block(rh_info_t * info, rh_block_t * blkn)
+{
+ rh_block_t *blk;
+ rh_block_t *before;
+ rh_block_t *after;
+ rh_block_t *next;
+ int size;
+ unsigned long s, e, bs, be;
+ struct list_head *l;
+
+ /* We assume that they are aligned properly */
+ size = blkn->size;
+ s = (unsigned long)blkn->start;
+ e = s + size;
+
+ /* Find the blocks immediately before and after the given one
+ * (if any) */
+ before = NULL;
+ after = NULL;
+ next = NULL;
+
+ list_for_each(l, &info->free_list) {
+ blk = list_entry(l, rh_block_t, list);
+
+ bs = (unsigned long)blk->start;
+ be = bs + blk->size;
+
+ if (next == NULL && s >= bs)
+ next = blk;
+
+ if (be == s)
+ before = blk;
+
+ if (e == bs)
+ after = blk;
+
+ /* If both are not null, break now */
+ if (before != NULL && after != NULL)
+ break;
+ }
+
+ /* Now check if they are really adjacent */
+ if (before != NULL && s != (unsigned long)before->start + before->size)
+ before = NULL;
+
+ if (after != NULL && e != (unsigned long)after->start)
+ after = NULL;
+
+ /* No coalescing; list insert and return */
+ if (before == NULL && after == NULL) {
+
+ if (next != NULL)
+ list_add(&blkn->list, &next->list);
+ else
+ list_add(&blkn->list, &info->free_list);
+
+ return;
+ }
+
+ /* We don't need it anymore */
+ release_slot(info, blkn);
+
+ /* Grow the before block */
+ if (before != NULL && after == NULL) {
+ before->size += size;
+ return;
+ }
+
+ /* Grow the after block backwards */
+ if (before == NULL && after != NULL) {
+ after->start = (int8_t *)after->start - size;
+ after->size += size;
+ return;
+ }
+
+ /* Grow the before block, and release the after block */
+ before->size += size + after->size;
+ list_del(&after->list);
+ release_slot(info, after);
+}
+
+static void attach_taken_block(rh_info_t * info, rh_block_t * blkn)
+{
+ rh_block_t *blk;
+ struct list_head *l;
+
+ /* Find the block immediately before the given one (if any) */
+ list_for_each(l, &info->taken_list) {
+ blk = list_entry(l, rh_block_t, list);
+ if (blk->start > blkn->start) {
+ list_add_tail(&blkn->list, &blk->list);
+ return;
+ }
+ }
+
+ list_add_tail(&blkn->list, &info->taken_list);
+}
+
+/*
+ * Create a remote heap dynamically. Note that no memory for the blocks
+ * are allocated. It will upon the first allocation
+ */
+rh_info_t *rh_create(unsigned int alignment)
+{
+ rh_info_t *info;
+
+ /* Alignment must be a power of two */
+ if ((alignment & (alignment - 1)) != 0)
+ return ERR_PTR(-EINVAL);
+
+ info = kmalloc(sizeof(*info), GFP_KERNEL);
+ if (info == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ info->alignment = alignment;
+
+ /* Initially everything as empty */
+ info->block = NULL;
+ info->max_blocks = 0;
+ info->empty_slots = 0;
+ info->flags = 0;
+
+ INIT_LIST_HEAD(&info->empty_list);
+ INIT_LIST_HEAD(&info->free_list);
+ INIT_LIST_HEAD(&info->taken_list);
+
+ return info;
+}
+
+/*
+ * Destroy a dynamically created remote heap. Deallocate only if the areas
+ * are not static
+ */
+void rh_destroy(rh_info_t * info)
+{
+ if ((info->flags & RHIF_STATIC_BLOCK) == 0 && info->block != NULL)
+ kfree(info->block);
+
+ if ((info->flags & RHIF_STATIC_INFO) == 0)
+ kfree(info);
+}
+
+/*
+ * Initialize in place a remote heap info block. This is needed to support
+ * operation very early in the startup of the kernel, when it is not yet safe
+ * to call kmalloc.
+ */
+void rh_init(rh_info_t * info, unsigned int alignment, int max_blocks,
+ rh_block_t * block)
+{
+ int i;
+ rh_block_t *blk;
+
+ /* Alignment must be a power of two */
+ if ((alignment & (alignment - 1)) != 0)
+ return;
+
+ info->alignment = alignment;
+
+ /* Initially everything as empty */
+ info->block = block;
+ info->max_blocks = max_blocks;
+ info->empty_slots = max_blocks;
+ info->flags = RHIF_STATIC_INFO | RHIF_STATIC_BLOCK;
+
+ INIT_LIST_HEAD(&info->empty_list);
+ INIT_LIST_HEAD(&info->free_list);
+ INIT_LIST_HEAD(&info->taken_list);
+
+ /* Add all new blocks to the free list */
+ for (i = 0, blk = block; i < max_blocks; i++, blk++)
+ list_add(&blk->list, &info->empty_list);
+}
+
+/* Attach a free memory region, coalesces regions if adjuscent */
+int rh_attach_region(rh_info_t * info, void *start, int size)
+{
+ rh_block_t *blk;
+ unsigned long s, e, m;
+ int r;
+
+ /* The region must be aligned */
+ s = (unsigned long)start;
+ e = s + size;
+ m = info->alignment - 1;
+
+ /* Round start up */
+ s = (s + m) & ~m;
+
+ /* Round end down */
+ e = e & ~m;
+
+ /* Take final values */
+ start = (void *)s;
+ size = (int)(e - s);
+
+ /* Grow the blocks, if needed */
+ r = assure_empty(info, 1);
+ if (r < 0)
+ return r;
+
+ blk = get_slot(info);
+ blk->start = start;
+ blk->size = size;
+ blk->owner = NULL;
+
+ attach_free_block(info, blk);
+
+ return 0;
+}
+
+/* Detatch given address range, splits free block if needed. */
+void *rh_detach_region(rh_info_t * info, void *start, int size)
+{
+ struct list_head *l;
+ rh_block_t *blk, *newblk;
+ unsigned long s, e, m, bs, be;
+
+ /* Validate size */
+ if (size <= 0)
+ return ERR_PTR(-EINVAL);
+
+ /* The region must be aligned */
+ s = (unsigned long)start;
+ e = s + size;
+ m = info->alignment - 1;
+
+ /* Round start up */
+ s = (s + m) & ~m;
+
+ /* Round end down */
+ e = e & ~m;
+
+ if (assure_empty(info, 1) < 0)
+ return ERR_PTR(-ENOMEM);
+
+ blk = NULL;
+ list_for_each(l, &info->free_list) {
+ blk = list_entry(l, rh_block_t, list);
+ /* The range must lie entirely inside one free block */
+ bs = (unsigned long)blk->start;
+ be = (unsigned long)blk->start + blk->size;
+ if (s >= bs && e <= be)
+ break;
+ blk = NULL;
+ }
+
+ if (blk == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ /* Perfect fit */
+ if (bs == s && be == e) {
+ /* Delete from free list, release slot */
+ list_del(&blk->list);
+ release_slot(info, blk);
+ return (void *)s;
+ }
+
+ /* blk still in free list, with updated start and/or size */
+ if (bs == s || be == e) {
+ if (bs == s)
+ blk->start = (int8_t *)blk->start + size;
+ blk->size -= size;
+
+ } else {
+ /* The front free fragment */
+ blk->size = s - bs;
+
+ /* the back free fragment */
+ newblk = get_slot(info);
+ newblk->start = (void *)e;
+ newblk->size = be - e;
+
+ list_add(&newblk->list, &blk->list);
+ }
+
+ return (void *)s;
+}
+
+void *rh_alloc(rh_info_t * info, int size, const char *owner)
+{
+ struct list_head *l;
+ rh_block_t *blk;
+ rh_block_t *newblk;
+ void *start;
+
+ /* Validate size */
+ if (size <= 0)
+ return ERR_PTR(-EINVAL);
+
+ /* Align to configured alignment */
+ size = (size + (info->alignment - 1)) & ~(info->alignment - 1);
+
+ if (assure_empty(info, 1) < 0)
+ return ERR_PTR(-ENOMEM);
+
+ blk = NULL;
+ list_for_each(l, &info->free_list) {
+ blk = list_entry(l, rh_block_t, list);
+ if (size <= blk->size)
+ break;
+ blk = NULL;
+ }
+
+ if (blk == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ /* Just fits */
+ if (blk->size == size) {
+ /* Move from free list to taken list */
+ list_del(&blk->list);
+ blk->owner = owner;
+ start = blk->start;
+
+ attach_taken_block(info, blk);
+
+ return start;
+ }
+
+ newblk = get_slot(info);
+ newblk->start = blk->start;
+ newblk->size = size;
+ newblk->owner = owner;
+
+ /* blk still in free list, with updated start, size */
+ blk->start = (int8_t *)blk->start + size;
+ blk->size -= size;
+
+ start = newblk->start;
+
+ attach_taken_block(info, newblk);
+
+ return start;
+}
+
+/* allocate at precisely the given address */
+void *rh_alloc_fixed(rh_info_t * info, void *start, int size, const char *owner)
+{
+ struct list_head *l;
+ rh_block_t *blk, *newblk1, *newblk2;
+ unsigned long s, e, m, bs, be;
+
+ /* Validate size */
+ if (size <= 0)
+ return ERR_PTR(-EINVAL);
+
+ /* The region must be aligned */
+ s = (unsigned long)start;
+ e = s + size;
+ m = info->alignment - 1;
+
+ /* Round start up */
+ s = (s + m) & ~m;
+
+ /* Round end down */
+ e = e & ~m;
+
+ if (assure_empty(info, 2) < 0)
+ return ERR_PTR(-ENOMEM);
+
+ blk = NULL;
+ list_for_each(l, &info->free_list) {
+ blk = list_entry(l, rh_block_t, list);
+ /* The range must lie entirely inside one free block */
+ bs = (unsigned long)blk->start;
+ be = (unsigned long)blk->start + blk->size;
+ if (s >= bs && e <= be)
+ break;
+ }
+
+ if (blk == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ /* Perfect fit */
+ if (bs == s && be == e) {
+ /* Move from free list to taken list */
+ list_del(&blk->list);
+ blk->owner = owner;
+
+ start = blk->start;
+ attach_taken_block(info, blk);
+
+ return start;
+
+ }
+
+ /* blk still in free list, with updated start and/or size */
+ if (bs == s || be == e) {
+ if (bs == s)
+ blk->start = (int8_t *)blk->start + size;
+ blk->size -= size;
+
+ } else {
+ /* The front free fragment */
+ blk->size = s - bs;
+
+ /* The back free fragment */
+ newblk2 = get_slot(info);
+ newblk2->start = (void *)e;
+ newblk2->size = be - e;
+
+ list_add(&newblk2->list, &blk->list);
+ }
+
+ newblk1 = get_slot(info);
+ newblk1->start = (void *)s;
+ newblk1->size = e - s;
+ newblk1->owner = owner;
+
+ start = newblk1->start;
+ attach_taken_block(info, newblk1);
+
+ return start;
+}
+
+int rh_free(rh_info_t * info, void *start)
+{
+ rh_block_t *blk, *blk2;
+ struct list_head *l;
+ int size;
+
+ /* Linear search for block */
+ blk = NULL;
+ list_for_each(l, &info->taken_list) {
+ blk2 = list_entry(l, rh_block_t, list);
+ if (start < blk2->start)
+ break;
+ blk = blk2;
+ }
+
+ if (blk == NULL || start > (blk->start + blk->size))
+ return -EINVAL;
+
+ /* Remove from taken list */
+ list_del(&blk->list);
+
+ /* Get size of freed block */
+ size = blk->size;
+ attach_free_block(info, blk);
+
+ return size;
+}
+
+int rh_get_stats(rh_info_t * info, int what, int max_stats, rh_stats_t * stats)
+{
+ rh_block_t *blk;
+ struct list_head *l;
+ struct list_head *h;
+ int nr;
+
+ switch (what) {
+
+ case RHGS_FREE:
+ h = &info->free_list;
+ break;
+
+ case RHGS_TAKEN:
+ h = &info->taken_list;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ /* Linear search for block */
+ nr = 0;
+ list_for_each(l, h) {
+ blk = list_entry(l, rh_block_t, list);
+ if (stats != NULL && nr < max_stats) {
+ stats->start = blk->start;
+ stats->size = blk->size;
+ stats->owner = blk->owner;
+ stats++;
+ }
+ nr++;
+ }
+
+ return nr;
+}
+
+int rh_set_owner(rh_info_t * info, void *start, const char *owner)
+{
+ rh_block_t *blk, *blk2;
+ struct list_head *l;
+ int size;
+
+ /* Linear search for block */
+ blk = NULL;
+ list_for_each(l, &info->taken_list) {
+ blk2 = list_entry(l, rh_block_t, list);
+ if (start < blk2->start)
+ break;
+ blk = blk2;
+ }
+
+ if (blk == NULL || start > (blk->start + blk->size))
+ return -EINVAL;
+
+ blk->owner = owner;
+ size = blk->size;
+
+ return size;
+}
+
+void rh_dump(rh_info_t * info)
+{
+ static rh_stats_t st[32]; /* XXX maximum 32 blocks */
+ int maxnr;
+ int i, nr;
+
+ maxnr = sizeof(st) / sizeof(st[0]);
+
+ printk(KERN_INFO
+ "info @0x%p (%d slots empty / %d max)\n",
+ info, info->empty_slots, info->max_blocks);
+
+ printk(KERN_INFO " Free:\n");
+ nr = rh_get_stats(info, RHGS_FREE, maxnr, st);
+ if (nr > maxnr)
+ nr = maxnr;
+ for (i = 0; i < nr; i++)
+ printk(KERN_INFO
+ " 0x%p-0x%p (%u)\n",
+ st[i].start, (int8_t *) st[i].start + st[i].size,
+ st[i].size);
+ printk(KERN_INFO "\n");
+
+ printk(KERN_INFO " Taken:\n");
+ nr = rh_get_stats(info, RHGS_TAKEN, maxnr, st);
+ if (nr > maxnr)
+ nr = maxnr;
+ for (i = 0; i < nr; i++)
+ printk(KERN_INFO
+ " 0x%p-0x%p (%u) %s\n",
+ st[i].start, (int8_t *) st[i].start + st[i].size,
+ st[i].size, st[i].owner != NULL ? st[i].owner : "");
+ printk(KERN_INFO "\n");
+}
+
+void rh_dump_blk(rh_info_t * info, rh_block_t * blk)
+{
+ printk(KERN_INFO
+ "blk @0x%p: 0x%p-0x%p (%u)\n",
+ blk, blk->start, (int8_t *) blk->start + blk->size, blk->size);
+}
diff --git a/arch/ppc/lib/strcase.c b/arch/ppc/lib/strcase.c
new file mode 100644
index 000000000000..36b521091bbc
--- /dev/null
+++ b/arch/ppc/lib/strcase.c
@@ -0,0 +1,23 @@
+#include <linux/ctype.h>
+
+int strcasecmp(const char *s1, const char *s2)
+{
+ int c1, c2;
+
+ do {
+ c1 = tolower(*s1++);
+ c2 = tolower(*s2++);
+ } while (c1 == c2 && c1 != 0);
+ return c1 - c2;
+}
+
+int strncasecmp(const char *s1, const char *s2, int n)
+{
+ int c1, c2;
+
+ do {
+ c1 = tolower(*s1++);
+ c2 = tolower(*s2++);
+ } while ((--n > 0) && c1 == c2 && c1 != 0);
+ return c1 - c2;
+}
diff --git a/arch/ppc/lib/string.S b/arch/ppc/lib/string.S
new file mode 100644
index 000000000000..8d08a2eb225e
--- /dev/null
+++ b/arch/ppc/lib/string.S
@@ -0,0 +1,716 @@
+/*
+ * String handling functions for PowerPC.
+ *
+ * Copyright (C) 1996 Paul Mackerras.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/cache.h>
+#include <asm/errno.h>
+#include <asm/ppc_asm.h>
+
+#define COPY_16_BYTES \
+ lwz r7,4(r4); \
+ lwz r8,8(r4); \
+ lwz r9,12(r4); \
+ lwzu r10,16(r4); \
+ stw r7,4(r6); \
+ stw r8,8(r6); \
+ stw r9,12(r6); \
+ stwu r10,16(r6)
+
+#define COPY_16_BYTES_WITHEX(n) \
+8 ## n ## 0: \
+ lwz r7,4(r4); \
+8 ## n ## 1: \
+ lwz r8,8(r4); \
+8 ## n ## 2: \
+ lwz r9,12(r4); \
+8 ## n ## 3: \
+ lwzu r10,16(r4); \
+8 ## n ## 4: \
+ stw r7,4(r6); \
+8 ## n ## 5: \
+ stw r8,8(r6); \
+8 ## n ## 6: \
+ stw r9,12(r6); \
+8 ## n ## 7: \
+ stwu r10,16(r6)
+
+#define COPY_16_BYTES_EXCODE(n) \
+9 ## n ## 0: \
+ addi r5,r5,-(16 * n); \
+ b 104f; \
+9 ## n ## 1: \
+ addi r5,r5,-(16 * n); \
+ b 105f; \
+.section __ex_table,"a"; \
+ .align 2; \
+ .long 8 ## n ## 0b,9 ## n ## 0b; \
+ .long 8 ## n ## 1b,9 ## n ## 0b; \
+ .long 8 ## n ## 2b,9 ## n ## 0b; \
+ .long 8 ## n ## 3b,9 ## n ## 0b; \
+ .long 8 ## n ## 4b,9 ## n ## 1b; \
+ .long 8 ## n ## 5b,9 ## n ## 1b; \
+ .long 8 ## n ## 6b,9 ## n ## 1b; \
+ .long 8 ## n ## 7b,9 ## n ## 1b; \
+ .text
+
+ .text
+ .stabs "arch/ppc/lib/",N_SO,0,0,0f
+ .stabs "string.S",N_SO,0,0,0f
+
+CACHELINE_BYTES = L1_CACHE_LINE_SIZE
+LG_CACHELINE_BYTES = LG_L1_CACHE_LINE_SIZE
+CACHELINE_MASK = (L1_CACHE_LINE_SIZE-1)
+
+_GLOBAL(strcpy)
+ addi r5,r3,-1
+ addi r4,r4,-1
+1: lbzu r0,1(r4)
+ cmpwi 0,r0,0
+ stbu r0,1(r5)
+ bne 1b
+ blr
+
+/* This clears out any unused part of the destination buffer,
+ just as the libc version does. -- paulus */
+_GLOBAL(strncpy)
+ cmpwi 0,r5,0
+ beqlr
+ mtctr r5
+ addi r6,r3,-1
+ addi r4,r4,-1
+1: lbzu r0,1(r4)
+ cmpwi 0,r0,0
+ stbu r0,1(r6)
+ bdnzf 2,1b /* dec ctr, branch if ctr != 0 && !cr0.eq */
+ bnelr /* if we didn't hit a null char, we're done */
+ mfctr r5
+ cmpwi 0,r5,0 /* any space left in destination buffer? */
+ beqlr /* we know r0 == 0 here */
+2: stbu r0,1(r6) /* clear it out if so */
+ bdnz 2b
+ blr
+
+_GLOBAL(strcat)
+ addi r5,r3,-1
+ addi r4,r4,-1
+1: lbzu r0,1(r5)
+ cmpwi 0,r0,0
+ bne 1b
+ addi r5,r5,-1
+1: lbzu r0,1(r4)
+ cmpwi 0,r0,0
+ stbu r0,1(r5)
+ bne 1b
+ blr
+
+_GLOBAL(strcmp)
+ addi r5,r3,-1
+ addi r4,r4,-1
+1: lbzu r3,1(r5)
+ cmpwi 1,r3,0
+ lbzu r0,1(r4)
+ subf. r3,r0,r3
+ beqlr 1
+ beq 1b
+ blr
+
+_GLOBAL(strlen)
+ addi r4,r3,-1
+1: lbzu r0,1(r4)
+ cmpwi 0,r0,0
+ bne 1b
+ subf r3,r3,r4
+ blr
+
+/*
+ * Use dcbz on the complete cache lines in the destination
+ * to set them to zero. This requires that the destination
+ * area is cacheable. -- paulus
+ */
+_GLOBAL(cacheable_memzero)
+ mr r5,r4
+ li r4,0
+ addi r6,r3,-4
+ cmplwi 0,r5,4
+ blt 7f
+ stwu r4,4(r6)
+ beqlr
+ andi. r0,r6,3
+ add r5,r0,r5
+ subf r6,r0,r6
+ clrlwi r7,r6,32-LG_CACHELINE_BYTES
+ add r8,r7,r5
+ srwi r9,r8,LG_CACHELINE_BYTES
+ addic. r9,r9,-1 /* total number of complete cachelines */
+ ble 2f
+ xori r0,r7,CACHELINE_MASK & ~3
+ srwi. r0,r0,2
+ beq 3f
+ mtctr r0
+4: stwu r4,4(r6)
+ bdnz 4b
+3: mtctr r9
+ li r7,4
+#if !defined(CONFIG_8xx)
+10: dcbz r7,r6
+#else
+10: stw r4, 4(r6)
+ stw r4, 8(r6)
+ stw r4, 12(r6)
+ stw r4, 16(r6)
+#if CACHE_LINE_SIZE >= 32
+ stw r4, 20(r6)
+ stw r4, 24(r6)
+ stw r4, 28(r6)
+ stw r4, 32(r6)
+#endif /* CACHE_LINE_SIZE */
+#endif
+ addi r6,r6,CACHELINE_BYTES
+ bdnz 10b
+ clrlwi r5,r8,32-LG_CACHELINE_BYTES
+ addi r5,r5,4
+2: srwi r0,r5,2
+ mtctr r0
+ bdz 6f
+1: stwu r4,4(r6)
+ bdnz 1b
+6: andi. r5,r5,3
+7: cmpwi 0,r5,0
+ beqlr
+ mtctr r5
+ addi r6,r6,3
+8: stbu r4,1(r6)
+ bdnz 8b
+ blr
+
+_GLOBAL(memset)
+ rlwimi r4,r4,8,16,23
+ rlwimi r4,r4,16,0,15
+ addi r6,r3,-4
+ cmplwi 0,r5,4
+ blt 7f
+ stwu r4,4(r6)
+ beqlr
+ andi. r0,r6,3
+ add r5,r0,r5
+ subf r6,r0,r6
+ srwi r0,r5,2
+ mtctr r0
+ bdz 6f
+1: stwu r4,4(r6)
+ bdnz 1b
+6: andi. r5,r5,3
+7: cmpwi 0,r5,0
+ beqlr
+ mtctr r5
+ addi r6,r6,3
+8: stbu r4,1(r6)
+ bdnz 8b
+ blr
+
+/*
+ * This version uses dcbz on the complete cache lines in the
+ * destination area to reduce memory traffic. This requires that
+ * the destination area is cacheable.
+ * We only use this version if the source and dest don't overlap.
+ * -- paulus.
+ */
+_GLOBAL(cacheable_memcpy)
+ add r7,r3,r5 /* test if the src & dst overlap */
+ add r8,r4,r5
+ cmplw 0,r4,r7
+ cmplw 1,r3,r8
+ crand 0,0,4 /* cr0.lt &= cr1.lt */
+ blt memcpy /* if regions overlap */
+
+ addi r4,r4,-4
+ addi r6,r3,-4
+ neg r0,r3
+ andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */
+ beq 58f
+
+ cmplw 0,r5,r0 /* is this more than total to do? */
+ blt 63f /* if not much to do */
+ andi. r8,r0,3 /* get it word-aligned first */
+ subf r5,r0,r5
+ mtctr r8
+ beq+ 61f
+70: lbz r9,4(r4) /* do some bytes */
+ stb r9,4(r6)
+ addi r4,r4,1
+ addi r6,r6,1
+ bdnz 70b
+61: srwi. r0,r0,2
+ mtctr r0
+ beq 58f
+72: lwzu r9,4(r4) /* do some words */
+ stwu r9,4(r6)
+ bdnz 72b
+
+58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
+ clrlwi r5,r5,32-LG_CACHELINE_BYTES
+ li r11,4
+ mtctr r0
+ beq 63f
+53:
+#if !defined(CONFIG_8xx)
+ dcbz r11,r6
+#endif
+ COPY_16_BYTES
+#if L1_CACHE_LINE_SIZE >= 32
+ COPY_16_BYTES
+#if L1_CACHE_LINE_SIZE >= 64
+ COPY_16_BYTES
+ COPY_16_BYTES
+#if L1_CACHE_LINE_SIZE >= 128
+ COPY_16_BYTES
+ COPY_16_BYTES
+ COPY_16_BYTES
+ COPY_16_BYTES
+#endif
+#endif
+#endif
+ bdnz 53b
+
+63: srwi. r0,r5,2
+ mtctr r0
+ beq 64f
+30: lwzu r0,4(r4)
+ stwu r0,4(r6)
+ bdnz 30b
+
+64: andi. r0,r5,3
+ mtctr r0
+ beq+ 65f
+40: lbz r0,4(r4)
+ stb r0,4(r6)
+ addi r4,r4,1
+ addi r6,r6,1
+ bdnz 40b
+65: blr
+
+_GLOBAL(memmove)
+ cmplw 0,r3,r4
+ bgt backwards_memcpy
+ /* fall through */
+
+_GLOBAL(memcpy)
+ srwi. r7,r5,3
+ addi r6,r3,-4
+ addi r4,r4,-4
+ beq 2f /* if less than 8 bytes to do */
+ andi. r0,r6,3 /* get dest word aligned */
+ mtctr r7
+ bne 5f
+1: lwz r7,4(r4)
+ lwzu r8,8(r4)
+ stw r7,4(r6)
+ stwu r8,8(r6)
+ bdnz 1b
+ andi. r5,r5,7
+2: cmplwi 0,r5,4
+ blt 3f
+ lwzu r0,4(r4)
+ addi r5,r5,-4
+ stwu r0,4(r6)
+3: cmpwi 0,r5,0
+ beqlr
+ mtctr r5
+ addi r4,r4,3
+ addi r6,r6,3
+4: lbzu r0,1(r4)
+ stbu r0,1(r6)
+ bdnz 4b
+ blr
+5: subfic r0,r0,4
+ mtctr r0
+6: lbz r7,4(r4)
+ addi r4,r4,1
+ stb r7,4(r6)
+ addi r6,r6,1
+ bdnz 6b
+ subf r5,r0,r5
+ rlwinm. r7,r5,32-3,3,31
+ beq 2b
+ mtctr r7
+ b 1b
+
+_GLOBAL(backwards_memcpy)
+ rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */
+ add r6,r3,r5
+ add r4,r4,r5
+ beq 2f
+ andi. r0,r6,3
+ mtctr r7
+ bne 5f
+1: lwz r7,-4(r4)
+ lwzu r8,-8(r4)
+ stw r7,-4(r6)
+ stwu r8,-8(r6)
+ bdnz 1b
+ andi. r5,r5,7
+2: cmplwi 0,r5,4
+ blt 3f
+ lwzu r0,-4(r4)
+ subi r5,r5,4
+ stwu r0,-4(r6)
+3: cmpwi 0,r5,0
+ beqlr
+ mtctr r5
+4: lbzu r0,-1(r4)
+ stbu r0,-1(r6)
+ bdnz 4b
+ blr
+5: mtctr r0
+6: lbzu r7,-1(r4)
+ stbu r7,-1(r6)
+ bdnz 6b
+ subf r5,r0,r5
+ rlwinm. r7,r5,32-3,3,31
+ beq 2b
+ mtctr r7
+ b 1b
+
+_GLOBAL(memcmp)
+ cmpwi 0,r5,0
+ ble- 2f
+ mtctr r5
+ addi r6,r3,-1
+ addi r4,r4,-1
+1: lbzu r3,1(r6)
+ lbzu r0,1(r4)
+ subf. r3,r0,r3
+ bdnzt 2,1b
+ blr
+2: li r3,0
+ blr
+
+_GLOBAL(memchr)
+ cmpwi 0,r5,0
+ ble- 2f
+ mtctr r5
+ addi r3,r3,-1
+1: lbzu r0,1(r3)
+ cmpw 0,r0,r4
+ bdnzf 2,1b
+ beqlr
+2: li r3,0
+ blr
+
+_GLOBAL(__copy_tofrom_user)
+ addi r4,r4,-4
+ addi r6,r3,-4
+ neg r0,r3
+ andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */
+ beq 58f
+
+ cmplw 0,r5,r0 /* is this more than total to do? */
+ blt 63f /* if not much to do */
+ andi. r8,r0,3 /* get it word-aligned first */
+ mtctr r8
+ beq+ 61f
+70: lbz r9,4(r4) /* do some bytes */
+71: stb r9,4(r6)
+ addi r4,r4,1
+ addi r6,r6,1
+ bdnz 70b
+61: subf r5,r0,r5
+ srwi. r0,r0,2
+ mtctr r0
+ beq 58f
+72: lwzu r9,4(r4) /* do some words */
+73: stwu r9,4(r6)
+ bdnz 72b
+
+ .section __ex_table,"a"
+ .align 2
+ .long 70b,100f
+ .long 71b,101f
+ .long 72b,102f
+ .long 73b,103f
+ .text
+
+58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
+ clrlwi r5,r5,32-LG_CACHELINE_BYTES
+ li r11,4
+ beq 63f
+
+#ifdef CONFIG_8xx
+ /* Don't use prefetch on 8xx */
+ mtctr r0
+53: COPY_16_BYTES_WITHEX(0)
+ bdnz 53b
+
+#else /* not CONFIG_8xx */
+ /* Here we decide how far ahead to prefetch the source */
+ li r3,4
+ cmpwi r0,1
+ li r7,0
+ ble 114f
+ li r7,1
+#if MAX_COPY_PREFETCH > 1
+ /* Heuristically, for large transfers we prefetch
+ MAX_COPY_PREFETCH cachelines ahead. For small transfers
+ we prefetch 1 cacheline ahead. */
+ cmpwi r0,MAX_COPY_PREFETCH
+ ble 112f
+ li r7,MAX_COPY_PREFETCH
+112: mtctr r7
+111: dcbt r3,r4
+ addi r3,r3,CACHELINE_BYTES
+ bdnz 111b
+#else
+ dcbt r3,r4
+ addi r3,r3,CACHELINE_BYTES
+#endif /* MAX_COPY_PREFETCH > 1 */
+
+114: subf r8,r7,r0
+ mr r0,r7
+ mtctr r8
+
+53: dcbt r3,r4
+54: dcbz r11,r6
+ .section __ex_table,"a"
+ .align 2
+ .long 54b,105f
+ .text
+/* the main body of the cacheline loop */
+ COPY_16_BYTES_WITHEX(0)
+#if L1_CACHE_LINE_SIZE >= 32
+ COPY_16_BYTES_WITHEX(1)
+#if L1_CACHE_LINE_SIZE >= 64
+ COPY_16_BYTES_WITHEX(2)
+ COPY_16_BYTES_WITHEX(3)
+#if L1_CACHE_LINE_SIZE >= 128
+ COPY_16_BYTES_WITHEX(4)
+ COPY_16_BYTES_WITHEX(5)
+ COPY_16_BYTES_WITHEX(6)
+ COPY_16_BYTES_WITHEX(7)
+#endif
+#endif
+#endif
+ bdnz 53b
+ cmpwi r0,0
+ li r3,4
+ li r7,0
+ bne 114b
+#endif /* CONFIG_8xx */
+
+63: srwi. r0,r5,2
+ mtctr r0
+ beq 64f
+30: lwzu r0,4(r4)
+31: stwu r0,4(r6)
+ bdnz 30b
+
+64: andi. r0,r5,3
+ mtctr r0
+ beq+ 65f
+40: lbz r0,4(r4)
+41: stb r0,4(r6)
+ addi r4,r4,1
+ addi r6,r6,1
+ bdnz 40b
+65: li r3,0
+ blr
+
+/* read fault, initial single-byte copy */
+100: li r9,0
+ b 90f
+/* write fault, initial single-byte copy */
+101: li r9,1
+90: subf r5,r8,r5
+ li r3,0
+ b 99f
+/* read fault, initial word copy */
+102: li r9,0
+ b 91f
+/* write fault, initial word copy */
+103: li r9,1
+91: li r3,2
+ b 99f
+
+/*
+ * this stuff handles faults in the cacheline loop and branches to either
+ * 104f (if in read part) or 105f (if in write part), after updating r5
+ */
+ COPY_16_BYTES_EXCODE(0)
+#if L1_CACHE_LINE_SIZE >= 32
+ COPY_16_BYTES_EXCODE(1)
+#if L1_CACHE_LINE_SIZE >= 64
+ COPY_16_BYTES_EXCODE(2)
+ COPY_16_BYTES_EXCODE(3)
+#if L1_CACHE_LINE_SIZE >= 128
+ COPY_16_BYTES_EXCODE(4)
+ COPY_16_BYTES_EXCODE(5)
+ COPY_16_BYTES_EXCODE(6)
+ COPY_16_BYTES_EXCODE(7)
+#endif
+#endif
+#endif
+
+/* read fault in cacheline loop */
+104: li r9,0
+ b 92f
+/* fault on dcbz (effectively a write fault) */
+/* or write fault in cacheline loop */
+105: li r9,1
+92: li r3,LG_CACHELINE_BYTES
+ b 99f
+/* read fault in final word loop */
+108: li r9,0
+ b 93f
+/* write fault in final word loop */
+109: li r9,1
+93: andi. r5,r5,3
+ li r3,2
+ b 99f
+/* read fault in final byte loop */
+110: li r9,0
+ b 94f
+/* write fault in final byte loop */
+111: li r9,1
+94: li r5,0
+ li r3,0
+/*
+ * At this stage the number of bytes not copied is
+ * r5 + (ctr << r3), and r9 is 0 for read or 1 for write.
+ */
+99: mfctr r0
+ slw r3,r0,r3
+ add. r3,r3,r5
+ beq 120f /* shouldn't happen */
+ cmpwi 0,r9,0
+ bne 120f
+/* for a read fault, first try to continue the copy one byte at a time */
+ mtctr r3
+130: lbz r0,4(r4)
+131: stb r0,4(r6)
+ addi r4,r4,1
+ addi r6,r6,1
+ bdnz 130b
+/* then clear out the destination: r3 bytes starting at 4(r6) */
+132: mfctr r3
+ srwi. r0,r3,2
+ li r9,0
+ mtctr r0
+ beq 113f
+112: stwu r9,4(r6)
+ bdnz 112b
+113: andi. r0,r3,3
+ mtctr r0
+ beq 120f
+114: stb r9,4(r6)
+ addi r6,r6,1
+ bdnz 114b
+120: blr
+
+ .section __ex_table,"a"
+ .align 2
+ .long 30b,108b
+ .long 31b,109b
+ .long 40b,110b
+ .long 41b,111b
+ .long 130b,132b
+ .long 131b,120b
+ .long 112b,120b
+ .long 114b,120b
+ .text
+
+_GLOBAL(__clear_user)
+ addi r6,r3,-4
+ li r3,0
+ li r5,0
+ cmplwi 0,r4,4
+ blt 7f
+ /* clear a single word */
+11: stwu r5,4(r6)
+ beqlr
+ /* clear word sized chunks */
+ andi. r0,r6,3
+ add r4,r0,r4
+ subf r6,r0,r6
+ srwi r0,r4,2
+ andi. r4,r4,3
+ mtctr r0
+ bdz 7f
+1: stwu r5,4(r6)
+ bdnz 1b
+ /* clear byte sized chunks */
+7: cmpwi 0,r4,0
+ beqlr
+ mtctr r4
+ addi r6,r6,3
+8: stbu r5,1(r6)
+ bdnz 8b
+ blr
+90: mr r3,r4
+ blr
+91: mfctr r3
+ slwi r3,r3,2
+ add r3,r3,r4
+ blr
+92: mfctr r3
+ blr
+
+ .section __ex_table,"a"
+ .align 2
+ .long 11b,90b
+ .long 1b,91b
+ .long 8b,92b
+ .text
+
+_GLOBAL(__strncpy_from_user)
+ addi r6,r3,-1
+ addi r4,r4,-1
+ cmpwi 0,r5,0
+ beq 2f
+ mtctr r5
+1: lbzu r0,1(r4)
+ cmpwi 0,r0,0
+ stbu r0,1(r6)
+ bdnzf 2,1b /* dec ctr, branch if ctr != 0 && !cr0.eq */
+ beq 3f
+2: addi r6,r6,1
+3: subf r3,r3,r6
+ blr
+99: li r3,-EFAULT
+ blr
+
+ .section __ex_table,"a"
+ .align 2
+ .long 1b,99b
+ .text
+
+/* r3 = str, r4 = len (> 0), r5 = top (highest addr) */
+_GLOBAL(__strnlen_user)
+ addi r7,r3,-1
+ subf r6,r7,r5 /* top+1 - str */
+ cmplw 0,r4,r6
+ bge 0f
+ mr r6,r4
+0: mtctr r6 /* ctr = min(len, top - str) */
+1: lbzu r0,1(r7) /* get next byte */
+ cmpwi 0,r0,0
+ bdnzf 2,1b /* loop if --ctr != 0 && byte != 0 */
+ addi r7,r7,1
+ subf r3,r3,r7 /* number of bytes we have looked at */
+ beqlr /* return if we found a 0 byte */
+ cmpw 0,r3,r4 /* did we look at all len bytes? */
+ blt 99f /* if not, must have hit top */
+ addi r3,r4,1 /* return len + 1 to indicate no null found */
+ blr
+99: li r3,0 /* bad address, return 0 */
+ blr
+
+ .section __ex_table,"a"
+ .align 2
+ .long 1b,99b