aboutsummaryrefslogtreecommitdiff
path: root/arch/alpha
diff options
context:
space:
mode:
Diffstat (limited to 'arch/alpha')
-rw-r--r--arch/alpha/Kconfig2
-rw-r--r--arch/alpha/include/asm/atomic.h4
-rw-r--r--arch/alpha/include/asm/fpu.h2
-rw-r--r--arch/alpha/include/asm/ptrace.h5
-rw-r--r--arch/alpha/include/asm/socket.h2
-rw-r--r--arch/alpha/include/asm/uaccess.h34
-rw-r--r--arch/alpha/include/asm/unistd.h4
-rw-r--r--arch/alpha/include/asm/word-at-a-time.h55
-rw-r--r--arch/alpha/include/uapi/asm/Kbuild3
-rw-r--r--arch/alpha/kernel/alpha_ksyms.c3
-rw-r--r--arch/alpha/kernel/entry.S161
-rw-r--r--arch/alpha/kernel/osf_sys.c66
-rw-r--r--arch/alpha/kernel/pci.c6
-rw-r--r--arch/alpha/kernel/process.c25
-rw-r--r--arch/alpha/kernel/smp.c1
-rw-r--r--arch/alpha/kernel/srmcons.c1
-rw-r--r--arch/alpha/kernel/systbls.S4
-rw-r--r--arch/alpha/lib/Makefile2
-rw-r--r--arch/alpha/lib/ev6-strncpy_from_user.S424
-rw-r--r--arch/alpha/lib/ev67-strlen_user.S107
-rw-r--r--arch/alpha/lib/strlen_user.S91
-rw-r--r--arch/alpha/lib/strncpy_from_user.S339
-rw-r--r--arch/alpha/mm/fault.c36
-rw-r--r--arch/alpha/oprofile/common.c1
24 files changed, 194 insertions, 1184 deletions
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index d5b9b5e645cc..9944dedee5b1 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -18,6 +18,8 @@ config ALPHA
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select GENERIC_SMP_IDLE_THREAD
select GENERIC_CMOS_UPDATE
+ select GENERIC_STRNCPY_FROM_USER
+ select GENERIC_STRNLEN_USER
help
The Alpha is a 64-bit general-purpose processor designed and
marketed by the Digital Equipment Corporation of blessed memory,
diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index 3bb7ffeae3bc..c2cbe4fc391c 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -14,8 +14,8 @@
*/
-#define ATOMIC_INIT(i) ( (atomic_t) { (i) } )
-#define ATOMIC64_INIT(i) ( (atomic64_t) { (i) } )
+#define ATOMIC_INIT(i) { (i) }
+#define ATOMIC64_INIT(i) { (i) }
#define atomic_read(v) (*(volatile int *)&(v)->counter)
#define atomic64_read(v) (*(volatile long *)&(v)->counter)
diff --git a/arch/alpha/include/asm/fpu.h b/arch/alpha/include/asm/fpu.h
index db00f7885faa..e477bcd5b94a 100644
--- a/arch/alpha/include/asm/fpu.h
+++ b/arch/alpha/include/asm/fpu.h
@@ -1,7 +1,9 @@
#ifndef __ASM_ALPHA_FPU_H
#define __ASM_ALPHA_FPU_H
+#ifdef __KERNEL__
#include <asm/special_insns.h>
+#endif
/*
* Alpha floating-point control register defines:
diff --git a/arch/alpha/include/asm/ptrace.h b/arch/alpha/include/asm/ptrace.h
index fd698a174f26..b87755a19554 100644
--- a/arch/alpha/include/asm/ptrace.h
+++ b/arch/alpha/include/asm/ptrace.h
@@ -76,7 +76,10 @@ struct switch_stack {
#define task_pt_regs(task) \
((struct pt_regs *) (task_stack_page(task) + 2*PAGE_SIZE) - 1)
-#define force_successful_syscall_return() (task_pt_regs(current)->r0 = 0)
+#define current_pt_regs() \
+ ((struct pt_regs *) ((char *)current_thread_info() + 2*PAGE_SIZE) - 1)
+
+#define force_successful_syscall_return() (current_pt_regs()->r0 = 0)
#endif
diff --git a/arch/alpha/include/asm/socket.h b/arch/alpha/include/asm/socket.h
index dcb221a4b5be..7d2f75be932e 100644
--- a/arch/alpha/include/asm/socket.h
+++ b/arch/alpha/include/asm/socket.h
@@ -76,9 +76,11 @@
/* Instruct lower device to use last 4-bytes of skb data as FCS */
#define SO_NOFCS 43
+#ifdef __KERNEL__
/* O_NONBLOCK clashes with the bits used for socket types. Therefore we
* have to define SOCK_NONBLOCK to a different value here.
*/
#define SOCK_NONBLOCK 0x40000000
+#endif /* __KERNEL__ */
#endif /* _ASM_SOCKET_H */
diff --git a/arch/alpha/include/asm/uaccess.h b/arch/alpha/include/asm/uaccess.h
index b49ec2f8d6e3..766fdfde2b7a 100644
--- a/arch/alpha/include/asm/uaccess.h
+++ b/arch/alpha/include/asm/uaccess.h
@@ -433,36 +433,12 @@ clear_user(void __user *to, long len)
#undef __module_address
#undef __module_call
-/* Returns: -EFAULT if exception before terminator, N if the entire
- buffer filled, else strlen. */
+#define user_addr_max() \
+ (segment_eq(get_fs(), USER_DS) ? TASK_SIZE : ~0UL)
-extern long __strncpy_from_user(char *__to, const char __user *__from, long __to_len);
-
-extern inline long
-strncpy_from_user(char *to, const char __user *from, long n)
-{
- long ret = -EFAULT;
- if (__access_ok((unsigned long)from, 0, get_fs()))
- ret = __strncpy_from_user(to, from, n);
- return ret;
-}
-
-/* Returns: 0 if bad, string length+1 (memory size) of string if ok */
-extern long __strlen_user(const char __user *);
-
-extern inline long strlen_user(const char __user *str)
-{
- return access_ok(VERIFY_READ,str,0) ? __strlen_user(str) : 0;
-}
-
-/* Returns: 0 if exception before NUL or reaching the supplied limit (N),
- * a value greater than N if the limit would be exceeded, else strlen. */
-extern long __strnlen_user(const char __user *, long);
-
-extern inline long strnlen_user(const char __user *str, long n)
-{
- return access_ok(VERIFY_READ,str,0) ? __strnlen_user(str, n) : 0;
-}
+extern long strncpy_from_user(char *dest, const char __user *src, long count);
+extern __must_check long strlen_user(const char __user *str);
+extern __must_check long strnlen_user(const char __user *str, long n);
/*
* About the exception table:
diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h
index 633b23b0664a..a31a78eac9b9 100644
--- a/arch/alpha/include/asm/unistd.h
+++ b/arch/alpha/include/asm/unistd.h
@@ -465,10 +465,12 @@
#define __NR_setns 501
#define __NR_accept4 502
#define __NR_sendmmsg 503
+#define __NR_process_vm_readv 504
+#define __NR_process_vm_writev 505
#ifdef __KERNEL__
-#define NR_SYSCALLS 504
+#define NR_SYSCALLS 506
#define __ARCH_WANT_OLD_READDIR
#define __ARCH_WANT_STAT64
diff --git a/arch/alpha/include/asm/word-at-a-time.h b/arch/alpha/include/asm/word-at-a-time.h
new file mode 100644
index 000000000000..6b340d0f1521
--- /dev/null
+++ b/arch/alpha/include/asm/word-at-a-time.h
@@ -0,0 +1,55 @@
+#ifndef _ASM_WORD_AT_A_TIME_H
+#define _ASM_WORD_AT_A_TIME_H
+
+#include <asm/compiler.h>
+
+/*
+ * word-at-a-time interface for Alpha.
+ */
+
+/*
+ * We do not use the word_at_a_time struct on Alpha, but it needs to be
+ * implemented to humour the generic code.
+ */
+struct word_at_a_time {
+ const unsigned long unused;
+};
+
+#define WORD_AT_A_TIME_CONSTANTS { 0 }
+
+/* Return nonzero if val has a zero */
+static inline unsigned long has_zero(unsigned long val, unsigned long *bits, const struct word_at_a_time *c)
+{
+ unsigned long zero_locations = __kernel_cmpbge(0, val);
+ *bits = zero_locations;
+ return zero_locations;
+}
+
+static inline unsigned long prep_zero_mask(unsigned long val, unsigned long bits, const struct word_at_a_time *c)
+{
+ return bits;
+}
+
+#define create_zero_mask(bits) (bits)
+
+static inline unsigned long find_zero(unsigned long bits)
+{
+#if defined(CONFIG_ALPHA_EV6) && defined(CONFIG_ALPHA_EV67)
+ /* Simple if have CIX instructions */
+ return __kernel_cttz(bits);
+#else
+ unsigned long t1, t2, t3;
+ /* Retain lowest set bit only */
+ bits &= -bits;
+ /* Binary search for lowest set bit */
+ t1 = bits & 0xf0;
+ t2 = bits & 0xcc;
+ t3 = bits & 0xaa;
+ if (t1) t1 = 4;
+ if (t2) t2 = 2;
+ if (t3) t3 = 1;
+ return t1 + t2 + t3;
+#endif
+}
+
+#endif /* _ASM_WORD_AT_A_TIME_H */
diff --git a/arch/alpha/include/uapi/asm/Kbuild b/arch/alpha/include/uapi/asm/Kbuild
new file mode 100644
index 000000000000..baebb3da1d44
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/alpha/kernel/alpha_ksyms.c b/arch/alpha/kernel/alpha_ksyms.c
index d96e742d4dc2..15fa821d09cd 100644
--- a/arch/alpha/kernel/alpha_ksyms.c
+++ b/arch/alpha/kernel/alpha_ksyms.c
@@ -52,7 +52,6 @@ EXPORT_SYMBOL(alpha_write_fp_reg_s);
/* entry.S */
EXPORT_SYMBOL(kernel_thread);
-EXPORT_SYMBOL(kernel_execve);
/* Networking helper routines. */
EXPORT_SYMBOL(csum_tcpudp_magic);
@@ -74,8 +73,6 @@ EXPORT_SYMBOL(alpha_fp_emul);
*/
EXPORT_SYMBOL(__copy_user);
EXPORT_SYMBOL(__do_clear_user);
-EXPORT_SYMBOL(__strncpy_from_user);
-EXPORT_SYMBOL(__strnlen_user);
/*
* SMP-specific symbols.
diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S
index 6d159cee5f2f..ec0da0567ab5 100644
--- a/arch/alpha/kernel/entry.S
+++ b/arch/alpha/kernel/entry.S
@@ -663,58 +663,6 @@ kernel_thread:
br ret_to_kernel
.end kernel_thread
-/*
- * kernel_execve(path, argv, envp)
- */
- .align 4
- .globl kernel_execve
- .ent kernel_execve
-kernel_execve:
- /* We can be called from a module. */
- ldgp $gp, 0($27)
- lda $sp, -(32+SIZEOF_PT_REGS+8)($sp)
- .frame $sp, 32+SIZEOF_PT_REGS+8, $26, 0
- stq $26, 0($sp)
- stq $16, 8($sp)
- stq $17, 16($sp)
- stq $18, 24($sp)
- .prologue 1
-
- lda $16, 32($sp)
- lda $17, 0
- lda $18, SIZEOF_PT_REGS
- bsr $26, memset !samegp
-
- /* Avoid the HAE being gratuitously wrong, which would cause us
- to do the whole turn off interrupts thing and restore it. */
- ldq $2, alpha_mv+HAE_CACHE
- stq $2, 152+32($sp)
-
- ldq $16, 8($sp)
- ldq $17, 16($sp)
- ldq $18, 24($sp)
- lda $19, 32($sp)
- bsr $26, do_execve !samegp
-
- ldq $26, 0($sp)
- bne $0, 1f /* error! */
-
- /* Move the temporary pt_regs struct from its current location
- to the top of the kernel stack frame. See copy_thread for
- details for a normal process. */
- lda $16, 0x4000 - SIZEOF_PT_REGS($8)
- lda $17, 32($sp)
- lda $18, SIZEOF_PT_REGS
- bsr $26, memmove !samegp
-
- /* Take that over as our new stack frame and visit userland! */
- lda $sp, 0x4000 - SIZEOF_PT_REGS($8)
- br $31, ret_from_sys_call
-
-1: lda $sp, 32+SIZEOF_PT_REGS+8($sp)
- ret
-.end kernel_execve
-
/*
* Special system calls. Most of these are special in that they either
@@ -797,115 +745,6 @@ sys_rt_sigreturn:
.end sys_rt_sigreturn
.align 4
- .globl sys_sethae
- .ent sys_sethae
-sys_sethae:
- .prologue 0
- stq $16, 152($sp)
- ret
-.end sys_sethae
-
- .align 4
- .globl osf_getpriority
- .ent osf_getpriority
-osf_getpriority:
- lda $sp, -16($sp)
- stq $26, 0($sp)
- .prologue 0
-
- jsr $26, sys_getpriority
-
- ldq $26, 0($sp)
- blt $0, 1f
-
- /* Return value is the unbiased priority, i.e. 20 - prio.
- This does result in negative return values, so signal
- no error by writing into the R0 slot. */
- lda $1, 20
- stq $31, 16($sp)
- subl $1, $0, $0
- unop
-
-1: lda $sp, 16($sp)
- ret
-.end osf_getpriority
-
- .align 4
- .globl sys_getxuid
- .ent sys_getxuid
-sys_getxuid:
- .prologue 0
- ldq $2, TI_TASK($8)
- ldq $3, TASK_CRED($2)
- ldl $0, CRED_UID($3)
- ldl $1, CRED_EUID($3)
- stq $1, 80($sp)
- ret
-.end sys_getxuid
-
- .align 4
- .globl sys_getxgid
- .ent sys_getxgid
-sys_getxgid:
- .prologue 0
- ldq $2, TI_TASK($8)
- ldq $3, TASK_CRED($2)
- ldl $0, CRED_GID($3)
- ldl $1, CRED_EGID($3)
- stq $1, 80($sp)
- ret
-.end sys_getxgid
-
- .align 4
- .globl sys_getxpid
- .ent sys_getxpid
-sys_getxpid:
- .prologue 0
- ldq $2, TI_TASK($8)
-
- /* See linux/kernel/timer.c sys_getppid for discussion
- about this loop. */
- ldq $3, TASK_GROUP_LEADER($2)
- ldq $4, TASK_REAL_PARENT($3)
- ldl $0, TASK_TGID($2)
-1: ldl $1, TASK_TGID($4)
-#ifdef CONFIG_SMP
- mov $4, $5
- mb
- ldq $3, TASK_GROUP_LEADER($2)
- ldq $4, TASK_REAL_PARENT($3)
- cmpeq $4, $5, $5
- beq $5, 1b
-#endif
- stq $1, 80($sp)
- ret
-.end sys_getxpid
-
- .align 4
- .globl sys_alpha_pipe
- .ent sys_alpha_pipe
-sys_alpha_pipe:
- lda $sp, -16($sp)
- stq $26, 0($sp)
- .prologue 0
-
- mov $31, $17
- lda $16, 8($sp)
- jsr $26, do_pipe_flags
-
- ldq $26, 0($sp)
- bne $0, 1f
-
- /* The return values are in $0 and $20. */
- ldl $1, 12($sp)
- ldl $0, 8($sp)
-
- stq $1, 80+16($sp)
-1: lda $sp, 16($sp)
- ret
-.end sys_alpha_pipe
-
- .align 4
.globl sys_execve
.ent sys_execve
sys_execve:
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 98a103621af6..63e77e3944ce 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -145,27 +145,24 @@ SYSCALL_DEFINE4(osf_getdirentries, unsigned int, fd,
long __user *, basep)
{
int error;
- struct file *file;
+ struct fd arg = fdget(fd);
struct osf_dirent_callback buf;
- error = -EBADF;
- file = fget(fd);
- if (!file)
- goto out;
+ if (!arg.file)
+ return -EBADF;
buf.dirent = dirent;
buf.basep = basep;
buf.count = count;
buf.error = 0;
- error = vfs_readdir(file, osf_filldir, &buf);
+ error = vfs_readdir(arg.file, osf_filldir, &buf);
if (error >= 0)
error = buf.error;
if (count != buf.count)
error = count - buf.count;
- fput(file);
- out:
+ fdput(arg);
return error;
}
@@ -278,8 +275,8 @@ linux_to_osf_stat(struct kstat *lstat, struct osf_stat __user *osf_stat)
tmp.st_dev = lstat->dev;
tmp.st_mode = lstat->mode;
tmp.st_nlink = lstat->nlink;
- tmp.st_uid = lstat->uid;
- tmp.st_gid = lstat->gid;
+ tmp.st_uid = from_kuid_munged(current_user_ns(), lstat->uid);
+ tmp.st_gid = from_kgid_munged(current_user_ns(), lstat->gid);
tmp.st_rdev = lstat->rdev;
tmp.st_ldev = lstat->rdev;
tmp.st_size = lstat->size;
@@ -1404,3 +1401,52 @@ SYSCALL_DEFINE3(osf_writev, unsigned long, fd,
}
#endif
+
+SYSCALL_DEFINE2(osf_getpriority, int, which, int, who)
+{
+ int prio = sys_getpriority(which, who);
+ if (prio >= 0) {
+ /* Return value is the unbiased priority, i.e. 20 - prio.
+ This does result in negative return values, so signal
+ no error */
+ force_successful_syscall_return();
+ prio = 20 - prio;
+ }
+ return prio;
+}
+
+SYSCALL_DEFINE0(getxuid)
+{
+ current_pt_regs()->r20 = sys_geteuid();
+ return sys_getuid();
+}
+
+SYSCALL_DEFINE0(getxgid)
+{
+ current_pt_regs()->r20 = sys_getegid();
+ return sys_getgid();
+}
+
+SYSCALL_DEFINE0(getxpid)
+{
+ current_pt_regs()->r20 = sys_getppid();
+ return sys_getpid();
+}
+
+SYSCALL_DEFINE0(alpha_pipe)
+{
+ int fd[2];
+ int res = do_pipe_flags(fd, 0);
+ if (!res) {
+ /* The return values are in $0 and $20. */
+ current_pt_regs()->r20 = fd[1];
+ res = fd[0];
+ }
+ return res;
+}
+
+SYSCALL_DEFINE1(sethae, unsigned long, val)
+{
+ current_pt_regs()->hae = val;
+ return 0;
+}
diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c
index 9816d5a4d176..ef757147cbf9 100644
--- a/arch/alpha/kernel/pci.c
+++ b/arch/alpha/kernel/pci.c
@@ -256,12 +256,6 @@ pcibios_fixup_bus(struct pci_bus *bus)
}
}
-void __init
-pcibios_update_irq(struct pci_dev *dev, int irq)
-{
- pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
-}
-
int
pcibios_enable_device(struct pci_dev *dev, int mask)
{
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index 153d3fce3e8e..83638aa096d5 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -28,6 +28,7 @@
#include <linux/tty.h>
#include <linux/console.h>
#include <linux/slab.h>
+#include <linux/rcupdate.h>
#include <asm/reg.h>
#include <asm/uaccess.h>
@@ -54,9 +55,12 @@ cpu_idle(void)
/* FIXME -- EV6 and LCA45 know how to power down
the CPU. */
+ rcu_idle_enter();
while (!need_resched())
cpu_relax();
- schedule();
+
+ rcu_idle_exit();
+ schedule_preempt_disabled();
}
}
@@ -455,3 +459,22 @@ get_wchan(struct task_struct *p)
}
return pc;
}
+
+int kernel_execve(const char *path, const char *const argv[], const char *const envp[])
+{
+ /* Avoid the HAE being gratuitously wrong, which would cause us
+ to do the whole turn off interrupts thing and restore it. */
+ struct pt_regs regs = {.hae = alpha_mv.hae_cache};
+ int err = do_execve(path, argv, envp, &regs);
+ if (!err) {
+ struct pt_regs *p = current_pt_regs();
+ /* copy regs to normal position and off to userland we go... */
+ *p = regs;
+ __asm__ __volatile__ (
+ "mov %0, $sp;"
+ "br $31, ret_from_sys_call"
+ : : "r"(p));
+ }
+ return err;
+}
+EXPORT_SYMBOL(kernel_execve);
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index 35ddc02bfa4a..a41ad90a97a6 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -166,6 +166,7 @@ smp_callin(void)
DBGS(("smp_callin: commencing CPU %d current %p active_mm %p\n",
cpuid, current, current->active_mm));
+ preempt_disable();
/* Do nothing. */
cpu_idle();
}
diff --git a/arch/alpha/kernel/srmcons.c b/arch/alpha/kernel/srmcons.c
index 3ea809430eda..5d5865204a1d 100644
--- a/arch/alpha/kernel/srmcons.c
+++ b/arch/alpha/kernel/srmcons.c
@@ -223,6 +223,7 @@ srmcons_init(void)
driver->subtype = SYSTEM_TYPE_SYSCONS;
driver->init_termios = tty_std_termios;
tty_set_operations(driver, &srmcons_ops);
+ tty_port_link_device(&srmcons_singleton.port, driver, 0);
err = tty_register_driver(driver);
if (err) {
put_tty_driver(driver);
diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S
index 87835235f114..2ac6b45c3e00 100644
--- a/arch/alpha/kernel/systbls.S
+++ b/arch/alpha/kernel/systbls.S
@@ -111,7 +111,7 @@ sys_call_table:
.quad sys_socket
.quad sys_connect
.quad sys_accept
- .quad osf_getpriority /* 100 */
+ .quad sys_osf_getpriority /* 100 */
.quad sys_send
.quad sys_recv
.quad sys_sigreturn
@@ -522,6 +522,8 @@ sys_call_table:
.quad sys_setns
.quad sys_accept4
.quad sys_sendmmsg
+ .quad sys_process_vm_readv
+ .quad sys_process_vm_writev /* 505 */
.size sys_call_table, . - sys_call_table
.type sys_call_table, @object
diff --git a/arch/alpha/lib/Makefile b/arch/alpha/lib/Makefile
index c0a83ab62b78..59660743237c 100644
--- a/arch/alpha/lib/Makefile
+++ b/arch/alpha/lib/Makefile
@@ -31,8 +31,6 @@ lib-y = __divqu.o __remqu.o __divlu.o __remlu.o \
$(ev6-y)memchr.o \
$(ev6-y)copy_user.o \
$(ev6-y)clear_user.o \
- $(ev6-y)strncpy_from_user.o \
- $(ev67-y)strlen_user.o \
$(ev6-y)csum_ipv6_magic.o \
$(ev6-y)clear_page.o \
$(ev6-y)copy_page.o \
diff --git a/arch/alpha/lib/ev6-strncpy_from_user.S b/arch/alpha/lib/ev6-strncpy_from_user.S
deleted file mode 100644
index d2e28178cacc..000000000000
--- a/arch/alpha/lib/ev6-strncpy_from_user.S
+++ /dev/null
@@ -1,424 +0,0 @@
-/*
- * arch/alpha/lib/ev6-strncpy_from_user.S
- * 21264 version contributed by Rick Gorton <rick.gorton@alpha-processor.com>
- *
- * Just like strncpy except in the return value:
- *
- * -EFAULT if an exception occurs before the terminator is copied.
- * N if the buffer filled.
- *
- * Otherwise the length of the string is returned.
- *
- * Much of the information about 21264 scheduling/coding comes from:
- * Compiler Writer's Guide for the Alpha 21264
- * abbreviated as 'CWG' in other comments here
- * ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html
- * Scheduling notation:
- * E - either cluster
- * U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1
- * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1
- * A bunch of instructions got moved and temp registers were changed
- * to aid in scheduling. Control flow was also re-arranged to eliminate
- * branches, and to provide longer code sequences to enable better scheduling.
- * A total rewrite (using byte load/stores for start & tail sequences)
- * is desirable, but very difficult to do without a from-scratch rewrite.
- * Save that for the future.
- */
-
-
-#include <asm/errno.h>
-#include <asm/regdef.h>
-
-
-/* Allow an exception for an insn; exit if we get one. */
-#define EX(x,y...) \
- 99: x,##y; \
- .section __ex_table,"a"; \
- .long 99b - .; \
- lda $31, $exception-99b($0); \
- .previous
-
-
- .set noat
- .set noreorder
- .text
-
- .globl __strncpy_from_user
- .ent __strncpy_from_user
- .frame $30, 0, $26
- .prologue 0
-
- .align 4
-__strncpy_from_user:
- and a0, 7, t3 # E : find dest misalignment
- beq a2, $zerolength # U :
-
- /* Are source and destination co-aligned? */
- mov a0, v0 # E : save the string start
- xor a0, a1, t4 # E :
- EX( ldq_u t1, 0(a1) ) # L : Latency=3 load first quadword
- ldq_u t0, 0(a0) # L : load first (partial) aligned dest quadword
-
- addq a2, t3, a2 # E : bias count by dest misalignment
- subq a2, 1, a3 # E :
- addq zero, 1, t10 # E :
- and t4, 7, t4 # E : misalignment between the two
-
- and a3, 7, t6 # E : number of tail bytes
- sll t10, t6, t10 # E : t10 = bitmask of last count byte
- bne t4, $unaligned # U :
- lda t2, -1 # E : build a mask against false zero
-
- /*
- * We are co-aligned; take care of a partial first word.
- * On entry to this basic block:
- * t0 == the first destination word for masking back in
- * t1 == the first source word.
- */
-
- srl a3, 3, a2 # E : a2 = loop counter = (count - 1)/8
- addq a1, 8, a1 # E :
- mskqh t2, a1, t2 # U : detection in the src word
- nop
-
- /* Create the 1st output word and detect 0's in the 1st input word. */
- mskqh t1, a1, t3 # U :
- mskql t0, a1, t0 # U : assemble the first output word
- ornot t1, t2, t2 # E :
- nop
-
- cmpbge zero, t2, t8 # E : bits set iff null found
- or t0, t3, t0 # E :
- beq a2, $a_eoc # U :
- bne t8, $a_eos # U : 2nd branch in a quad. Bad.
-
- /* On entry to this basic block:
- * t0 == a source quad not containing a null.
- * a0 - current aligned destination address
- * a1 - current aligned source address
- * a2 - count of quadwords to move.
- * NOTE: Loop improvement - unrolling this is going to be
- * a huge win, since we're going to stall otherwise.
- * Fix this later. For _really_ large copies, look
- * at using wh64 on a look-ahead basis. See the code
- * in clear_user.S and copy_user.S.
- * Presumably, since (a0) and (a1) do not overlap (by C definition)
- * Lots of nops here:
- * - Separate loads from stores
- * - Keep it to 1 branch/quadpack so the branch predictor
- * can train.
- */
-$a_loop:
- stq_u t0, 0(a0) # L :
- addq a0, 8, a0 # E :
- nop
- subq a2, 1, a2 # E :
-
- EX( ldq_u t0, 0(a1) ) # L :
- addq a1, 8, a1 # E :
- cmpbge zero, t0, t8 # E : Stall 2 cycles on t0
- beq a2, $a_eoc # U :
-
- beq t8, $a_loop # U :
- nop
- nop
- nop
-
- /* Take care of the final (partial) word store. At this point
- * the end-of-count bit is set in t8 iff it applies.
- *
- * On entry to this basic block we have:
- * t0 == the source word containing the null
- * t8 == the cmpbge mask that found it.
- */
-$a_eos:
- negq t8, t12 # E : find low bit set
- and t8, t12, t12 # E :
-
- /* We're doing a partial word store and so need to combine
- our source and original destination words. */
- ldq_u t1, 0(a0) # L :
- subq t12, 1, t6 # E :
-
- or t12, t6, t8 # E :
- zapnot t0, t8, t0 # U : clear src bytes > null
- zap t1, t8, t1 # U : clear dst bytes <= null
- or t0, t1, t0 # E :
-
- stq_u t0, 0(a0) # L :
- br $finish_up # L0 :
- nop
- nop
-
- /* Add the end-of-count bit to the eos detection bitmask. */
- .align 4
-$a_eoc:
- or t10, t8, t8
- br $a_eos
- nop
- nop
-
-
-/* The source and destination are not co-aligned. Align the destination
- and cope. We have to be very careful about not reading too much and
- causing a SEGV. */
-
- .align 4
-$u_head:
- /* We know just enough now to be able to assemble the first
- full source word. We can still find a zero at the end of it
- that prevents us from outputting the whole thing.
-
- On entry to this basic block:
- t0 == the first dest word, unmasked
- t1 == the shifted low bits of the first source word
- t6 == bytemask that is -1 in dest word bytes */
-
- EX( ldq_u t2, 8(a1) ) # L : load second src word
- addq a1, 8, a1 # E :
- mskql t0, a0, t0 # U : mask trailing garbage in dst
- extqh t2, a1, t4 # U :
-
- or t1, t4, t1 # E : first aligned src word complete
- mskqh t1, a0, t1 # U : mask leading garbage in src
- or t0, t1, t0 # E : first output word complete
- or t0, t6, t6 # E : mask original data for zero test
-
- cmpbge zero, t6, t8 # E :
- beq a2, $u_eocfin # U :
- bne t8, $u_final # U : bad news - 2nd branch in a quad
- lda t6, -1 # E : mask out the bits we have
-
- mskql t6, a1, t6 # U : already seen
- stq_u t0, 0(a0) # L : store first output word
- or t6, t2, t2 # E :
- cmpbge zero, t2, t8 # E : find nulls in second partial
-
- addq a0, 8, a0 # E :
- subq a2, 1, a2 # E :
- bne t8, $u_late_head_exit # U :
- nop
-
- /* Finally, we've got all the stupid leading edge cases taken care
- of and we can set up to enter the main loop. */
-
- extql t2, a1, t1 # U : position hi-bits of lo word
- EX( ldq_u t2, 8(a1) ) # L : read next high-order source word
- addq a1, 8, a1 # E :
- cmpbge zero, t2, t8 # E :
-
- beq a2, $u_eoc # U :
- bne t8, $u_eos # U :
- nop
- nop
-
- /* Unaligned copy main loop. In order to avoid reading too much,
- the loop is structured to detect zeros in aligned source words.
- This has, unfortunately, effectively pulled half of a loop
- iteration out into the head and half into the tail, but it does
- prevent nastiness from accumulating in the very thing we want
- to run as fast as possible.
-
- On entry to this basic block:
- t1 == the shifted high-order bits from the previous source word
- t2 == the unshifted current source word
-
- We further know that t2 does not contain a null terminator. */
-
- /*
- * Extra nops here:
- * separate load quads from store quads
- * only one branch/quad to permit predictor training
- */
-
- .align 4
-$u_loop:
- extqh t2, a1, t0 # U : extract high bits for current word
- addq a1, 8, a1 # E :
- extql t2, a1, t3 # U : extract low bits for next time
- addq a0, 8, a0 # E :
-
- or t0, t1, t0 # E : current dst word now complete
- EX( ldq_u t2, 0(a1) ) # L : load high word for next time
- subq a2, 1, a2 # E :
- nop
-
- stq_u t0, -8(a0) # L : save the current word
- mov t3, t1 # E :
- cmpbge zero, t2, t8 # E : test new word for eos
- beq a2, $u_eoc # U :
-
- beq t8, $u_loop # U :
- nop
- nop
- nop
-
- /* We've found a zero somewhere in the source word we just read.
- If it resides in the lower half, we have one (probably partial)
- word to write out, and if it resides in the upper half, we
- have one full and one partial word left to write out.
-
- On entry to this basic block:
- t1 == the shifted high-order bits from the previous source word
- t2 == the unshifted current source word. */
- .align 4
-$u_eos:
- extqh t2, a1, t0 # U :
- or t0, t1, t0 # E : first (partial) source word complete
- cmpbge zero, t0, t8 # E : is the null in this first bit?
- nop
-
- bne t8, $u_final # U :
- stq_u t0, 0(a0) # L : the null was in the high-order bits
- addq a0, 8, a0 # E :
- subq a2, 1, a2 # E :
-
- .align 4
-$u_late_head_exit:
- extql t2, a1, t0 # U :
- cmpbge zero, t0, t8 # E :
- or t8, t10, t6 # E :
- cmoveq a2, t6, t8 # E :
-
- /* Take care of a final (probably partial) result word.
- On entry to this basic block:
- t0 == assembled source word
- t8 == cmpbge mask that found the null. */
- .align 4
-$u_final:
- negq t8, t6 # E : isolate low bit set
- and t6, t8, t12 # E :
- ldq_u t1, 0(a0) # L :
- subq t12, 1, t6 # E :
-
- or t6, t12, t8 # E :
- zapnot t0, t8, t0 # U : kill source bytes > null
- zap t1, t8, t1 # U : kill dest bytes <= null
- or t0, t1, t0 # E :
-
- stq_u t0, 0(a0) # E :
- br $finish_up # U :
- nop
- nop
-
- .align 4
-$u_eoc: # end-of-count
- extqh t2, a1, t0 # U :
- or t0, t1, t0 # E :
- cmpbge zero, t0, t8 # E :
- nop
-
- .align 4
-$u_eocfin: # end-of-count, final word
- or t10, t8, t8 # E :
- br $u_final # U :
- nop
- nop
-
- /* Unaligned copy entry point. */
- .align 4
-$unaligned:
-
- srl a3, 3, a2 # U : a2 = loop counter = (count - 1)/8
- and a0, 7, t4 # E : find dest misalignment
- and a1, 7, t5 # E : find src misalignment
- mov zero, t0 # E :
-
- /* Conditionally load the first destination word and a bytemask
- with 0xff indicating that the destination byte is sacrosanct. */
-
- mov zero, t6 # E :
- beq t4, 1f # U :
- ldq_u t0, 0(a0) # L :
- lda t6, -1 # E :
-
- mskql t6, a0, t6 # E :
- nop
- nop
- nop
-
- .align 4
-1:
- subq a1, t4, a1 # E : sub dest misalignment from src addr
- /* If source misalignment is larger than dest misalignment, we need
- extra startup checks to avoid SEGV. */
- cmplt t4, t5, t12 # E :
- extql t1, a1, t1 # U : shift src into place
- lda t2, -1 # E : for creating masks later
-
- beq t12, $u_head # U :
- mskqh t2, t5, t2 # U : begin src byte validity mask
- cmpbge zero, t1, t8 # E : is there a zero?
- nop
-
- extql t2, a1, t2 # U :
- or t8, t10, t5 # E : test for end-of-count too
- cmpbge zero, t2, t3 # E :
- cmoveq a2, t5, t8 # E : Latency=2, extra map slot
-
- nop # E : goes with cmov
- andnot t8, t3, t8 # E :
- beq t8, $u_head # U :
- nop
-
- /* At this point we've found a zero in the first partial word of
- the source. We need to isolate the valid source data and mask
- it into the original destination data. (Incidentally, we know
- that we'll need at least one byte of that original dest word.) */
-
- ldq_u t0, 0(a0) # L :
- negq t8, t6 # E : build bitmask of bytes <= zero
- mskqh t1, t4, t1 # U :
- and t6, t8, t12 # E :
-
- subq t12, 1, t6 # E :
- or t6, t12, t8 # E :
- zapnot t2, t8, t2 # U : prepare source word; mirror changes
- zapnot t1, t8, t1 # U : to source validity mask
-
- andnot t0, t2, t0 # E : zero place for source to reside
- or t0, t1, t0 # E : and put it there
- stq_u t0, 0(a0) # L :
- nop
-
- .align 4
-$finish_up:
- zapnot t0, t12, t4 # U : was last byte written null?
- and t12, 0xf0, t3 # E : binary search for the address of the
- cmovne t4, 1, t4 # E : Latency=2, extra map slot
- nop # E : with cmovne
-
- and t12, 0xcc, t2 # E : last byte written
- and t12, 0xaa, t1 # E :
- cmovne t3, 4, t3 # E : Latency=2, extra map slot
- nop # E : with cmovne
-
- bic a0, 7, t0
- cmovne t2, 2, t2 # E : Latency=2, extra map slot
- nop # E : with cmovne
- nop
-
- cmovne t1, 1, t1 # E : Latency=2, extra map slot
- nop # E : with cmovne
- addq t0, t3, t0 # E :
- addq t1, t2, t1 # E :
-
- addq t0, t1, t0 # E :
- addq t0, t4, t0 # add one if we filled the buffer
- subq t0, v0, v0 # find string length
- ret # L0 :
-
- .align 4
-$zerolength:
- nop
- nop
- nop
- clr v0
-
-$exception:
- nop
- nop
- nop
- ret
-
- .end __strncpy_from_user
diff --git a/arch/alpha/lib/ev67-strlen_user.S b/arch/alpha/lib/ev67-strlen_user.S
deleted file mode 100644
index 57e0d77b81a6..000000000000
--- a/arch/alpha/lib/ev67-strlen_user.S
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * arch/alpha/lib/ev67-strlen_user.S
- * 21264 version contributed by Rick Gorton <rick.gorton@api-networks.com>
- *
- * Return the length of the string including the NULL terminator
- * (strlen+1) or zero if an error occurred.
- *
- * In places where it is critical to limit the processing time,
- * and the data is not trusted, strnlen_user() should be used.
- * It will return a value greater than its second argument if
- * that limit would be exceeded. This implementation is allowed
- * to access memory beyond the limit, but will not cross a page
- * boundary when doing so.
- *
- * Much of the information about 21264 scheduling/coding comes from:
- * Compiler Writer's Guide for the Alpha 21264
- * abbreviated as 'CWG' in other comments here
- * ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html
- * Scheduling notation:
- * E - either cluster
- * U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1
- * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1
- * Try not to change the actual algorithm if possible for consistency.
- */
-
-#include <asm/regdef.h>
-
-
-/* Allow an exception for an insn; exit if we get one. */
-#define EX(x,y...) \
- 99: x,##y; \
- .section __ex_table,"a"; \
- .long 99b - .; \
- lda v0, $exception-99b(zero); \
- .previous
-
-
- .set noreorder
- .set noat
- .text
-
- .globl __strlen_user
- .ent __strlen_user
- .frame sp, 0, ra
-
- .align 4
-__strlen_user:
- ldah a1, 32767(zero) # do not use plain strlen_user() for strings
- # that might be almost 2 GB long; you should
- # be using strnlen_user() instead
- nop
- nop
- nop
-
- .globl __strnlen_user
-
- .align 4
-__strnlen_user:
- .prologue 0
- EX( ldq_u t0, 0(a0) ) # L : load first quadword (a0 may be misaligned)
- lda t1, -1(zero) # E :
-
- insqh t1, a0, t1 # U :
- andnot a0, 7, v0 # E :
- or t1, t0, t0 # E :
- subq a0, 1, a0 # E : get our +1 for the return
-
- cmpbge zero, t0, t1 # E : t1 <- bitmask: bit i == 1 <==> i-th byte == 0
- subq a1, 7, t2 # E :
- subq a0, v0, t0 # E :
- bne t1, $found # U :
-
- addq t2, t0, t2 # E :
- addq a1, 1, a1 # E :
- nop # E :
- nop # E :
-
- .align 4
-$loop: ble t2, $limit # U :
- EX( ldq t0, 8(v0) ) # L :
- nop # E :
- nop # E :
-
- cmpbge zero, t0, t1 # E :
- subq t2, 8, t2 # E :
- addq v0, 8, v0 # E : addr += 8
- beq t1, $loop # U :
-
-$found: cttz t1, t2 # U0 :
- addq v0, t2, v0 # E :
- subq v0, a0, v0 # E :
- ret # L0 :
-
-$exception:
- nop
- nop
- nop
- ret
-
- .align 4 # currently redundant
-$limit:
- nop
- nop
- subq a1, t2, v0
- ret
-
- .end __strlen_user
diff --git a/arch/alpha/lib/strlen_user.S b/arch/alpha/lib/strlen_user.S
deleted file mode 100644
index 508a18e96479..000000000000
--- a/arch/alpha/lib/strlen_user.S
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * arch/alpha/lib/strlen_user.S
- *
- * Return the length of the string including the NUL terminator
- * (strlen+1) or zero if an error occurred.
- *
- * In places where it is critical to limit the processing time,
- * and the data is not trusted, strnlen_user() should be used.
- * It will return a value greater than its second argument if
- * that limit would be exceeded. This implementation is allowed
- * to access memory beyond the limit, but will not cross a page
- * boundary when doing so.
- */
-
-#include <asm/regdef.h>
-
-
-/* Allow an exception for an insn; exit if we get one. */
-#define EX(x,y...) \
- 99: x,##y; \
- .section __ex_table,"a"; \
- .long 99b - .; \
- lda v0, $exception-99b(zero); \
- .previous
-
-
- .set noreorder
- .set noat
- .text
-
- .globl __strlen_user
- .ent __strlen_user
- .frame sp, 0, ra
-
- .align 3
-__strlen_user:
- ldah a1, 32767(zero) # do not use plain strlen_user() for strings
- # that might be almost 2 GB long; you should
- # be using strnlen_user() instead
-
- .globl __strnlen_user
-
- .align 3
-__strnlen_user:
- .prologue 0
-
- EX( ldq_u t0, 0(a0) ) # load first quadword (a0 may be misaligned)
- lda t1, -1(zero)
- insqh t1, a0, t1
- andnot a0, 7, v0
- or t1, t0, t0
- subq a0, 1, a0 # get our +1 for the return
- cmpbge zero, t0, t1 # t1 <- bitmask: bit i == 1 <==> i-th byte == 0
- subq a1, 7, t2
- subq a0, v0, t0
- bne t1, $found
-
- addq t2, t0, t2
- addq a1, 1, a1
-
- .align 3
-$loop: ble t2, $limit
- EX( ldq t0, 8(v0) )
- subq t2, 8, t2
- addq v0, 8, v0 # addr += 8
- cmpbge zero, t0, t1
- beq t1, $loop
-
-$found: negq t1, t2 # clear all but least set bit
- and t1, t2, t1
-
- and t1, 0xf0, t2 # binary search for that set bit
- and t1, 0xcc, t3
- and t1, 0xaa, t4
- cmovne t2, 4, t2
- cmovne t3, 2, t3
- cmovne t4, 1, t4
- addq t2, t3, t2
- addq v0, t4, v0
- addq v0, t2, v0
- nop # dual issue next two on ev4 and ev5
- subq v0, a0, v0
-$exception:
- ret
-
- .align 3 # currently redundant
-$limit:
- subq a1, t2, v0
- ret
-
- .end __strlen_user
diff --git a/arch/alpha/lib/strncpy_from_user.S b/arch/alpha/lib/strncpy_from_user.S
deleted file mode 100644
index 73ee21160ff7..000000000000
--- a/arch/alpha/lib/strncpy_from_user.S
+++ /dev/null
@@ -1,339 +0,0 @@
-/*
- * arch/alpha/lib/strncpy_from_user.S
- * Contributed by Richard Henderson (rth@tamu.edu)
- *
- * Just like strncpy except in the return value:
- *
- * -EFAULT if an exception occurs before the terminator is copied.
- * N if the buffer filled.
- *
- * Otherwise the length of the string is returned.
- */
-
-
-#include <asm/errno.h>
-#include <asm/regdef.h>
-
-
-/* Allow an exception for an insn; exit if we get one. */
-#define EX(x,y...) \
- 99: x,##y; \
- .section __ex_table,"a"; \
- .long 99b - .; \
- lda $31, $exception-99b($0); \
- .previous
-
-
- .set noat
- .set noreorder
- .text
-
- .globl __strncpy_from_user
- .ent __strncpy_from_user
- .frame $30, 0, $26
- .prologue 0
-
- .align 3
-$aligned:
- /* On entry to this basic block:
- t0 == the first destination word for masking back in
- t1 == the first source word. */
-
- /* Create the 1st output word and detect 0's in the 1st input word. */
- lda t2, -1 # e1 : build a mask against false zero
- mskqh t2, a1, t2 # e0 : detection in the src word
- mskqh t1, a1, t3 # e0 :
- ornot t1, t2, t2 # .. e1 :
- mskql t0, a1, t0 # e0 : assemble the first output word
- cmpbge zero, t2, t8 # .. e1 : bits set iff null found
- or t0, t3, t0 # e0 :
- beq a2, $a_eoc # .. e1 :
- bne t8, $a_eos # .. e1 :
-
- /* On entry to this basic block:
- t0 == a source word not containing a null. */
-
-$a_loop:
- stq_u t0, 0(a0) # e0 :
- addq a0, 8, a0 # .. e1 :
- EX( ldq_u t0, 0(a1) ) # e0 :
- addq a1, 8, a1 # .. e1 :
- subq a2, 1, a2 # e0 :
- cmpbge zero, t0, t8 # .. e1 (stall)
- beq a2, $a_eoc # e1 :
- beq t8, $a_loop # e1 :
-
- /* Take care of the final (partial) word store. At this point
- the end-of-count bit is set in t8 iff it applies.
-
- On entry to this basic block we have:
- t0 == the source word containing the null
- t8 == the cmpbge mask that found it. */
-
-$a_eos:
- negq t8, t12 # e0 : find low bit set
- and t8, t12, t12 # e1 (stall)
-
- /* For the sake of the cache, don't read a destination word
- if we're not going to need it. */
- and t12, 0x80, t6 # e0 :
- bne t6, 1f # .. e1 (zdb)
-
- /* We're doing a partial word store and so need to combine
- our source and original destination words. */
- ldq_u t1, 0(a0) # e0 :
- subq t12, 1, t6 # .. e1 :
- or t12, t6, t8 # e0 :
- unop #
- zapnot t0, t8, t0 # e0 : clear src bytes > null
- zap t1, t8, t1 # .. e1 : clear dst bytes <= null
- or t0, t1, t0 # e1 :
-
-1: stq_u t0, 0(a0)
- br $finish_up
-
- /* Add the end-of-count bit to the eos detection bitmask. */
-$a_eoc:
- or t10, t8, t8
- br $a_eos
-
- /*** The Function Entry Point ***/
- .align 3
-__strncpy_from_user:
- mov a0, v0 # save the string start
- beq a2, $zerolength
-
- /* Are source and destination co-aligned? */
- xor a0, a1, t1 # e0 :
- and a0, 7, t0 # .. e1 : find dest misalignment
- and t1, 7, t1 # e0 :
- addq a2, t0, a2 # .. e1 : bias count by dest misalignment
- subq a2, 1, a2 # e0 :
- and a2, 7, t2 # e1 :
- srl a2, 3, a2 # e0 : a2 = loop counter = (count - 1)/8
- addq zero, 1, t10 # .. e1 :
- sll t10, t2, t10 # e0 : t10 = bitmask of last count byte
- bne t1, $unaligned # .. e1 :
-
- /* We are co-aligned; take care of a partial first word. */
-
- EX( ldq_u t1, 0(a1) ) # e0 : load first src word
- addq a1, 8, a1 # .. e1 :
-
- beq t0, $aligned # avoid loading dest word if not needed
- ldq_u t0, 0(a0) # e0 :
- br $aligned # .. e1 :
-
-
-/* The source and destination are not co-aligned. Align the destination
- and cope. We have to be very careful about not reading too much and
- causing a SEGV. */
-
- .align 3
-$u_head:
- /* We know just enough now to be able to assemble the first
- full source word. We can still find a zero at the end of it
- that prevents us from outputting the whole thing.
-
- On entry to this basic block:
- t0 == the first dest word, unmasked
- t1 == the shifted low bits of the first source word
- t6 == bytemask that is -1 in dest word bytes */
-
- EX( ldq_u t2, 8(a1) ) # e0 : load second src word
- addq a1, 8, a1 # .. e1 :
- mskql t0, a0, t0 # e0 : mask trailing garbage in dst
- extqh t2, a1, t4 # e0 :
- or t1, t4, t1 # e1 : first aligned src word complete
- mskqh t1, a0, t1 # e0 : mask leading garbage in src
- or t0, t1, t0 # e0 : first output word complete
- or t0, t6, t6 # e1 : mask original data for zero test
- cmpbge zero, t6, t8 # e0 :
- beq a2, $u_eocfin # .. e1 :
- bne t8, $u_final # e1 :
-
- lda t6, -1 # e1 : mask out the bits we have
- mskql t6, a1, t6 # e0 : already seen
- stq_u t0, 0(a0) # e0 : store first output word
- or t6, t2, t2 # .. e1 :
- cmpbge zero, t2, t8 # e0 : find nulls in second partial
- addq a0, 8, a0 # .. e1 :
- subq a2, 1, a2 # e0 :
- bne t8, $u_late_head_exit # .. e1 :
-
- /* Finally, we've got all the stupid leading edge cases taken care
- of and we can set up to enter the main loop. */
-
- extql t2, a1, t1 # e0 : position hi-bits of lo word
- EX( ldq_u t2, 8(a1) ) # .. e1 : read next high-order source word
- addq a1, 8, a1 # e0 :
- cmpbge zero, t2, t8 # e1 (stall)
- beq a2, $u_eoc # e1 :
- bne t8, $u_eos # e1 :
-
- /* Unaligned copy main loop. In order to avoid reading too much,
- the loop is structured to detect zeros in aligned source words.
- This has, unfortunately, effectively pulled half of a loop
- iteration out into the head and half into the tail, but it does
- prevent nastiness from accumulating in the very thing we want
- to run as fast as possible.
-
- On entry to this basic block:
- t1 == the shifted high-order bits from the previous source word
- t2 == the unshifted current source word
-
- We further know that t2 does not contain a null terminator. */
-
- .align 3
-$u_loop:
- extqh t2, a1, t0 # e0 : extract high bits for current word
- addq a1, 8, a1 # .. e1 :
- extql t2, a1, t3 # e0 : extract low bits for next time
- addq a0, 8, a0 # .. e1 :
- or t0, t1, t0 # e0 : current dst word now complete
- EX( ldq_u t2, 0(a1) ) # .. e1 : load high word for next time
- stq_u t0, -8(a0) # e0 : save the current word
- mov t3, t1 # .. e1 :
- subq a2, 1, a2 # e0 :
- cmpbge zero, t2, t8 # .. e1 : test new word for eos
- beq a2, $u_eoc # e1 :
- beq t8, $u_loop # e1 :
-
- /* We've found a zero somewhere in the source word we just read.
- If it resides in the lower half, we have one (probably partial)
- word to write out, and if it resides in the upper half, we
- have one full and one partial word left to write out.
-
- On entry to this basic block:
- t1 == the shifted high-order bits from the previous source word
- t2 == the unshifted current source word. */
-$u_eos:
- extqh t2, a1, t0 # e0 :
- or t0, t1, t0 # e1 : first (partial) source word complete
-
- cmpbge zero, t0, t8 # e0 : is the null in this first bit?
- bne t8, $u_final # .. e1 (zdb)
-
- stq_u t0, 0(a0) # e0 : the null was in the high-order bits
- addq a0, 8, a0 # .. e1 :
- subq a2, 1, a2 # e1 :
-
-$u_late_head_exit:
- extql t2, a1, t0 # .. e0 :
- cmpbge zero, t0, t8 # e0 :
- or t8, t10, t6 # e1 :
- cmoveq a2, t6, t8 # e0 :
- nop # .. e1 :
-
- /* Take care of a final (probably partial) result word.
- On entry to this basic block:
- t0 == assembled source word
- t8 == cmpbge mask that found the null. */
-$u_final:
- negq t8, t6 # e0 : isolate low bit set
- and t6, t8, t12 # e1 :
-
- and t12, 0x80, t6 # e0 : avoid dest word load if we can
- bne t6, 1f # .. e1 (zdb)
-
- ldq_u t1, 0(a0) # e0 :
- subq t12, 1, t6 # .. e1 :
- or t6, t12, t8 # e0 :
- zapnot t0, t8, t0 # .. e1 : kill source bytes > null
- zap t1, t8, t1 # e0 : kill dest bytes <= null
- or t0, t1, t0 # e1 :
-
-1: stq_u t0, 0(a0) # e0 :
- br $finish_up
-
-$u_eoc: # end-of-count
- extqh t2, a1, t0
- or t0, t1, t0
- cmpbge zero, t0, t8
-
-$u_eocfin: # end-of-count, final word
- or t10, t8, t8
- br $u_final
-
- /* Unaligned copy entry point. */
- .align 3
-$unaligned:
-
- EX( ldq_u t1, 0(a1) ) # e0 : load first source word
-
- and a0, 7, t4 # .. e1 : find dest misalignment
- and a1, 7, t5 # e0 : find src misalignment
-
- /* Conditionally load the first destination word and a bytemask
- with 0xff indicating that the destination byte is sacrosanct. */
-
- mov zero, t0 # .. e1 :
- mov zero, t6 # e0 :
- beq t4, 1f # .. e1 :
- ldq_u t0, 0(a0) # e0 :
- lda t6, -1 # .. e1 :
- mskql t6, a0, t6 # e0 :
-1:
- subq a1, t4, a1 # .. e1 : sub dest misalignment from src addr
-
- /* If source misalignment is larger than dest misalignment, we need
- extra startup checks to avoid SEGV. */
-
- cmplt t4, t5, t12 # e1 :
- extql t1, a1, t1 # .. e0 : shift src into place
- lda t2, -1 # e0 : for creating masks later
- beq t12, $u_head # e1 :
-
- mskqh t2, t5, t2 # e0 : begin src byte validity mask
- cmpbge zero, t1, t8 # .. e1 : is there a zero?
- extql t2, a1, t2 # e0 :
- or t8, t10, t5 # .. e1 : test for end-of-count too
- cmpbge zero, t2, t3 # e0 :
- cmoveq a2, t5, t8 # .. e1 :
- andnot t8, t3, t8 # e0 :
- beq t8, $u_head # .. e1 (zdb)
-
- /* At this point we've found a zero in the first partial word of
- the source. We need to isolate the valid source data and mask
- it into the original destination data. (Incidentally, we know
- that we'll need at least one byte of that original dest word.) */
-
- ldq_u t0, 0(a0) # e0 :
- negq t8, t6 # .. e1 : build bitmask of bytes <= zero
- mskqh t1, t4, t1 # e0 :
- and t6, t8, t12 # .. e1 :
- subq t12, 1, t6 # e0 :
- or t6, t12, t8 # e1 :
-
- zapnot t2, t8, t2 # e0 : prepare source word; mirror changes
- zapnot t1, t8, t1 # .. e1 : to source validity mask
-
- andnot t0, t2, t0 # e0 : zero place for source to reside
- or t0, t1, t0 # e1 : and put it there
- stq_u t0, 0(a0) # e0 :
-
-$finish_up:
- zapnot t0, t12, t4 # was last byte written null?
- cmovne t4, 1, t4
-
- and t12, 0xf0, t3 # binary search for the address of the
- and t12, 0xcc, t2 # last byte written
- and t12, 0xaa, t1
- bic a0, 7, t0
- cmovne t3, 4, t3
- cmovne t2, 2, t2
- cmovne t1, 1, t1
- addq t0, t3, t0
- addq t1, t2, t1
- addq t0, t1, t0
- addq t0, t4, t0 # add one if we filled the buffer
-
- subq t0, v0, v0 # find string length
- ret
-
-$zerolength:
- clr v0
-$exception:
- ret
-
- .end __strncpy_from_user
diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c
index 5eecab1a84ef..0c4132dd3507 100644
--- a/arch/alpha/mm/fault.c
+++ b/arch/alpha/mm/fault.c
@@ -89,6 +89,8 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
const struct exception_table_entry *fixup;
int fault, si_code = SEGV_MAPERR;
siginfo_t info;
+ unsigned int flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
+ (cause > 0 ? FAULT_FLAG_WRITE : 0));
/* As of EV6, a load into $31/$f31 is a prefetch, and never faults
(or is suppressed by the PALcode). Support that for older CPUs
@@ -114,6 +116,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
goto vmalloc_fault;
#endif
+retry:
down_read(&mm->mmap_sem);
vma = find_vma(mm, address);
if (!vma)
@@ -144,8 +147,11 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
/* If for any reason at all we couldn't handle the fault,
make sure we exit gracefully rather than endlessly redo
the fault. */
- fault = handle_mm_fault(mm, vma, address, cause > 0 ? FAULT_FLAG_WRITE : 0);
- up_read(&mm->mmap_sem);
+ fault = handle_mm_fault(mm, vma, address, flags);
+
+ if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
+ return;
+
if (unlikely(fault & VM_FAULT_ERROR)) {
if (fault & VM_FAULT_OOM)
goto out_of_memory;
@@ -153,10 +159,26 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
goto do_sigbus;
BUG();
}
- if (fault & VM_FAULT_MAJOR)
- current->maj_flt++;
- else
- current->min_flt++;
+
+ if (flags & FAULT_FLAG_ALLOW_RETRY) {
+ if (fault & VM_FAULT_MAJOR)
+ current->maj_flt++;
+ else
+ current->min_flt++;
+ if (fault & VM_FAULT_RETRY) {
+ flags &= ~FAULT_FLAG_ALLOW_RETRY;
+
+ /* No need to up_read(&mm->mmap_sem) as we would
+ * have already released it in __lock_page_or_retry
+ * in mm/filemap.c.
+ */
+
+ goto retry;
+ }
+ }
+
+ up_read(&mm->mmap_sem);
+
return;
/* Something tried to access memory that isn't in our memory map.
@@ -186,12 +208,14 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
/* We ran out of memory, or some other thing happened to us that
made us unable to handle the page fault gracefully. */
out_of_memory:
+ up_read(&mm->mmap_sem);
if (!user_mode(regs))
goto no_context;
pagefault_out_of_memory();
return;
do_sigbus:
+ up_read(&mm->mmap_sem);
/* Send a sigbus, regardless of whether we were in kernel
or user mode. */
info.si_signo = SIGBUS;
diff --git a/arch/alpha/oprofile/common.c b/arch/alpha/oprofile/common.c
index a0a5d27aa215..b8ce18f485d3 100644
--- a/arch/alpha/oprofile/common.c
+++ b/arch/alpha/oprofile/common.c
@@ -12,6 +12,7 @@
#include <linux/smp.h>
#include <linux/errno.h>
#include <asm/ptrace.h>
+#include <asm/special_insns.h>
#include "op_impl.h"