119 files changed, 25018 insertions, 0 deletions
diff --git a/arch/arm26/ACKNOWLEDGEMENTS b/arch/arm26/ACKNOWLEDGEMENTS
new file mode 100644
index 00000000000..0a17a45110e
--- /dev/null
+++ b/arch/arm26/ACKNOWLEDGEMENTS
@@ -0,0 +1,29 @@
+The work in this architecture (ARM26) is that of a great many people.
+
+This is what has happened:
+
+I [Ian Molton] have been trying to repair the ARM26 architecture support, but it has become an impossible task whilst it is still merged with the ARM32 (arch/arm) code. The ARM26 code is too different to be sensible to keep with the ARM32 code now, and Russell King really doesnt have the time to maintain the ARM26 code. Add to that that most ARM32 developers dont know about or care about ARM26 when writing patches, and you have a reall mess.
+
+As a result, I've split it off into a new architecture of its own. I've named it arm26 since these CPUs have only a 26 bit address space, unlike the other ARMs.
+
+The upheaval in moving around so many source files and chopping out vasty ammounts of cruft was enormous, and the copyright of many files is sometimes unclear. Because of this, I am writing this, in order that no-one is left out / misaccredited / blamed for any of the code.
+
+People I KNOW have made major contributions to the code:
+
+David Alan Gilbert (former maintainer of ARM26 bits)
+Philip Blundell
+Russell King
+Keith Owens
+
+also thanks to Nicholas Pitre for hints, and for the basis or our XIP support.
+
+Currently maintaing the code are
+
+Ian Molton (Maintainer / Archimedes)
+John Appleby (kernel / A5K)
+
+If anyone has a problem with attributions in header files / source files, please do contact me to straighten things out.
+
+Ian Molton (aka spyro)  -  ARM26 maintainer
+spyro@f2s.com
+
diff --git a/arch/arm26/Kconfig b/arch/arm26/Kconfig
new file mode 100644
index 00000000000..3955de5af4c
--- /dev/null
+++ b/arch/arm26/Kconfig
@@ -0,0 +1,227 @@
+#
+# For a description of the syntax of this configuration file,
+# see Documentation/kbuild/kconfig-language.txt.
+#
+
+mainmenu "Linux Kernel Configuration"
+
+config ARM
+	bool
+	default y
+
+config ARM26
+	bool
+	default y
+
+config MMU
+	bool
+	default y
+
+config ARCH_ACORN
+        bool
+        default y
+
+config CPU_26
+        bool
+        default y
+
+config FIQ
+        bool
+        default y
+
+# 9 = 512 pages 8 = 256 pages 7 = 128 pages
+config FORCE_MAX_ZONEORDER
+        int
+        default 9
+
+config UID16
+	bool
+	default y
+
+config RWSEM_GENERIC_SPINLOCK
+	bool
+	default y
+
+config RWSEM_XCHGADD_ALGORITHM
+	bool
+
+config GENERIC_CALIBRATE_DELAY
+	bool
+	default y
+
+config GENERIC_BUST_SPINLOCK
+	bool
+
+config GENERIC_ISA_DMA
+	bool
+
+source "init/Kconfig"
+
+
+menu "System Type"
+
+comment "Archimedes/A5000 Implementations (select only ONE)"
+
+config ARCH_ARC
+        bool "Archimedes"
+        help
+          Say Y to support the Acorn Archimedes.
+
+	  The Acorn Archimedes was an personal computer based on an 8MHz ARM2
+          processor, released in 1987.  It supported up to 16MB of RAM in
+	  later models and floppy, harddisc, ethernet etc.
+
+config ARCH_A5K
+        bool "A5000"
+        help
+          Say Y here to to support the Acorn A5000.
+
+	  Linux can support the
+          internal IDE disk and CD-ROM interface, serial and parallel port,
+          and the floppy drive.  Note that on some A5000s the floppy is
+          plugged into the wrong socket on the motherboard.
+
+config PAGESIZE_16
+        bool "2MB physical memory (broken)"
+        help
+          Say Y here if your Archimedes or A5000 system has only 2MB of
+          memory, otherwise say N.  The resulting kernel will not run on a
+          machine with 4MB of memory.
+endmenu
+
+menu "General setup"
+
+# Compressed boot loader in ROM.  Yes, we really want to ask about
+# TEXT and BSS so we preserve their values in the config files.
+config ZBOOT_ROM
+	bool "Compressed boot loader in ROM/flash"
+	help
+	  Say Y here if you intend to execute your compressed kernel image (zImage)
+	  directly from ROM or flash.  If unsure, say N.
+
+config ZBOOT_ROM_TEXT
+	depends on ZBOOT_ROM
+	hex "Compressed ROM boot loader base address"
+	default "0"
+	help
+	  The base address for zImage.  Unless you have special requirements, you
+	  should not change this value.
+
+config ZBOOT_ROM_BSS
+	depends on ZBOOT_ROM
+	hex "Compressed ROM boot loader BSS address"
+	default "0"
+	help
+	  The base address of 64KiB of read/write memory, which must be available
+	  while the decompressor is running.  Unless you have special requirements,
+	  you should not change this value.
+
+config XIP_KERNEL
+	bool "Execute In Place (XIP) kernel image"
+	help
+	  Select this option to create a kernel that can be programed into
+	  the OS ROMs.
+
+comment "At least one math emulation must be selected"
+
+config FPE_NWFPE
+	tristate "NWFPE math emulation"
+	---help---
+	  Say Y to include the NWFPE floating point emulator in the kernel.
+	  This is necessary to run most binaries. Linux does not currently
+	  support floating point hardware so you need to say Y here even if
+	  your machine has an FPA or floating point co-processor podule.
+
+	  It is also possible to say M to build the emulator as a module
+	  (nwfpe) or indeed to leave it out altogether. However, unless you
+	  know what you are doing this can easily render your machine
+	  unbootable. Saying Y is the safe option.
+
+	  You may say N here if you are going to load the Acorn FPEmulator
+	  early in the bootup.
+
+source "fs/Kconfig.binfmt"
+
+config PREEMPT
+	bool "Preemptible Kernel (EXPERIMENTAL)"
+	depends on CPU_32 && EXPERIMENTAL
+	help
+	  This option reduces the latency of the kernel when reacting to
+	  real-time or interactive events by allowing a low priority process to
+	  be preempted even if it is in kernel mode executing a system call.
+	  This allows applications to run more reliably even when the system is
+	  under load.
+
+	  Say Y here if you are building a kernel for a desktop, embedded
+	  or real-time system.  Say N if you are unsure.
+
+config ARTHUR
+	tristate "RISC OS personality"
+	depends on CPU_32
+	help
+	  Say Y here to include the kernel code necessary if you want to run
+	  Acorn RISC OS/Arthur binaries under Linux. This code is still very
+	  experimental; if this sounds frightening, say N and sleep in peace.
+	  You can also say M here to compile this support as a module (which
+	  will be called arthur).
+
+config CMDLINE
+	string "Default kernel command string"
+	default ""
+	help
+	  On some architectures (EBSA110 and CATS), there is currently no way
+	  for the boot loader to pass arguments to the kernel. For these
+	  architectures, you should supply some command-line options at build
+	  time by entering them here. As a minimum, you should specify the
+	  memory size and the root device (e.g., mem=64M root=/dev/nfs).
+
+endmenu
+
+source "drivers/base/Kconfig"
+
+source "drivers/parport/Kconfig"
+
+source "drivers/pnp/Kconfig"
+
+source "drivers/block/Kconfig"
+
+source "drivers/md/Kconfig"
+
+source "net/Kconfig"
+
+source "drivers/ide/Kconfig"
+
+source "drivers/scsi/Kconfig"
+
+source "drivers/isdn/Kconfig"
+
+#
+# input before char - char/joystick depends on it. As does USB.
+#
+source "drivers/input/Kconfig"
+
+source "drivers/char/Kconfig"
+
+source "drivers/media/Kconfig"
+
+source "fs/Kconfig"
+
+source "drivers/video/Kconfig"
+
+if ARCH_ACORN
+
+source "sound/Kconfig"
+
+endif
+
+source "drivers/misc/Kconfig"
+
+source "drivers/usb/Kconfig"
+
+source "arch/arm26/Kconfig.debug"
+
+source "security/Kconfig"
+
+source "crypto/Kconfig"
+
+source "lib/Kconfig"
diff --git a/arch/arm26/Kconfig.debug b/arch/arm26/Kconfig.debug
new file mode 100644
index 00000000000..611fc86503f
--- /dev/null
+++ b/arch/arm26/Kconfig.debug
@@ -0,0 +1,50 @@
+menu "Kernel hacking"
+
+source "lib/Kconfig.debug"
+
+# RMK wants arm kernels compiled with frame pointers so hardwire this to y.
+# If you know what you are doing and are willing to live without stack
+# traces, you can get a slightly smaller kernel by setting this option to
+# n, but then RMK will have to kill you ;).
+config FRAME_POINTER
+	bool
+	default y
+	help
+	  If you say N here, the resulting kernel will be slightly smaller and
+	  faster. However, when a problem occurs with the kernel, the
+	  information that is reported is severely limited. Most people
+	  should say Y here.
+
+config DEBUG_USER
+	bool "Verbose user fault messages"
+	help
+	  When a user program crashes due to an exception, the kernel can
+	  print a brief message explaining what the problem was. This is
+	  sometimes helpful for debugging but serves no purpose on a
+	  production system. Most people should say N here.
+
+config DEBUG_WAITQ
+	bool "Wait queue debugging"
+	depends on DEBUG_KERNEL
+
+config DEBUG_ERRORS
+	bool "Verbose kernel error messages"
+	depends on DEBUG_KERNEL
+	help
+	  This option controls verbose debugging information which can be
+	  printed when the kernel detects an internal error. This debugging
+	  information is useful to kernel hackers when tracking down problems,
+	  but mostly meaningless to other people. It's safe to say Y unless
+	  you are concerned with the code size or don't want to see these
+	  messages.
+
+# These options are only for real kernel hackers who want to get their hands dirty.
+config DEBUG_LL
+	bool "Kernel low-level debugging functions"
+	depends on DEBUG_KERNEL
+	help
+	  Say Y here to include definitions of printascii, printchar, printhex
+	  in the kernel.  This is helpful if you are debugging code that
+	  executes before the console is initialized.
+
+endmenu
diff --git a/arch/arm26/Makefile b/arch/arm26/Makefile
new file mode 100644
index 00000000000..ada8985530a
--- /dev/null
+++ b/arch/arm26/Makefile
@@ -0,0 +1,118 @@
+#
+# arch/arm26/Makefile
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1995-2001 by Russell King
+# Copyright (c) 2004 Ian Molton
+
+LDFLAGS_vmlinux	:=-p -X
+CPPFLAGS_vmlinux.lds = -DTEXTADDR=$(TEXTADDR) -DDATAADDR=$(DATAADDR)
+OBJCOPYFLAGS	:=-O binary -R .note -R .comment -S
+GZFLAGS		:=-9
+
+ifeq ($(CONFIG_FRAME_POINTER),y)
+CFLAGS		+=-fno-omit-frame-pointer -mno-sched-prolog
+endif
+
+ifeq ($(CONFIG_DEBUG_INFO),y)
+CFLAGS		+=-g
+endif
+
+CFLAGS_BOOT	:=-mapcs-26 -mcpu=arm3 -msoft-float -Uarm
+CFLAGS		+=-mapcs-26 -mcpu=arm3 -msoft-float -Uarm
+AFLAGS		+=-mapcs-26 -mcpu=arm3 -msoft-float
+
+ifeq ($(CONFIG_XIP_KERNEL),y)
+  TEXTADDR	 := 0x03880000
+  DATAADDR	 := 0x02080000
+else
+  TEXTADDR	 := 0x02080000
+  DATAADDR       := .
+endif
+
+head-y          := arch/arm26/kernel/head.o arch/arm26/kernel/init_task.o
+
+ifeq ($(incdir-y),)
+incdir-y :=
+endif
+INCDIR   :=
+  
+export	MACHINE TEXTADDR GZFLAGS CFLAGS_BOOT
+
+# If we have a machine-specific directory, then include it in the build.
+core-y				+= arch/arm26/kernel/ arch/arm26/mm/ arch/arm26/machine/
+core-$(CONFIG_FPE_NWFPE)	+= arch/arm26/nwfpe/
+
+libs-y				+= arch/arm26/lib/
+
+# Default target when executing plain make
+all: zImage
+
+boot := arch/arm26/boot
+
+prepare: include/asm-$(ARCH)/asm_offsets.h
+CLEAN_FILES += include/asm-$(ARCH)/asm_offsets.h
+
+
+.PHONY: maketools FORCE
+maketools: FORCE
+	
+
+# Convert bzImage to zImage
+bzImage: vmlinux
+	$(Q)$(MAKE) $(build)=$(boot) $(boot)/zImage
+
+zImage Image bootpImage xipImage: vmlinux
+	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
+
+zinstall install: vmlinux
+	$(Q)$(MAKE) $(build)=$(boot) $@
+
+# We use MRPROPER_FILES and CLEAN_FILES now
+archclean:
+	$(Q)$(MAKE) $(clean)=$(boot)
+
+# My testing targets (that short circuit a few dependencies)
+zImg:;	$(Q)$(MAKE) $(build)=$(boot) $(boot)/zImage
+Img:;	$(Q)$(MAKE) $(build)=$(boot) $(boot)/Image
+bp:;	$(Q)$(MAKE) $(build)=$(boot) $(boot)/bootpImage
+i:;	$(Q)$(MAKE) $(build)=$(boot) install
+zi:;	$(Q)$(MAKE) $(build)=$(boot) zinstall
+
+#
+# Configuration targets.  Use these to select a
+# configuration for your architecture
+%_config:
+	@( \
+	CFG=$(@:_config=); \
+	if [ -f arch/arm26/def-configs/$$CFG ]; then \
+	  [ -f .config ] && mv -f .config .config.old; \
+	  cp arch/arm26/def-configs/$$CFG .config; \
+	  echo "*** Default configuration for $$CFG installed"; \
+	  echo "*** Next, you may run 'make oldconfig'"; \
+	else \
+	  echo "$$CFG does not exist"; \
+	fi; \
+	)
+
+arch/$(ARCH)/kernel/asm-offsets.s: include/asm include/linux/version.h \
+				   include/config/MARKER
+
+include/asm-$(ARCH)/asm_offsets.h: arch/$(ARCH)/kernel/asm-offsets.s
+	$(call filechk,gen-asm-offsets)
+
+define archhelp
+  echo  '* zImage        - Compressed kernel image (arch/$(ARCH)/boot/zImage)'
+  echo  '  Image         - Uncompressed kernel image (arch/$(ARCH)/boot/Image)'
+  echo  '  bootpImage    - Combined zImage and initial RAM disk' 
+  echo  '  xipImage      - eXecute In Place capable image for ROM use (arch/$(ARCH)/boot/xipImage)'
+  echo  '  initrd        - Create an initial image'
+  echo  '  install       - Install uncompressed kernel'
+  echo  '  zinstall      - Install compressed kernel'
+  echo  '                  Install using (your) ~/bin/installkernel or'
+  echo  '                  (distribution) /sbin/installkernel or'
+  echo  '                  install to $$(INSTALL_PATH) and run lilo'
+endef
diff --git a/arch/arm26/boot/Makefile b/arch/arm26/boot/Makefile
new file mode 100644
index 00000000000..b5c2277654d
--- /dev/null
+++ b/arch/arm26/boot/Makefile
@@ -0,0 +1,80 @@
+#
+# arch/arm26/boot/Makefile
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1995-2002 Russell King
+#
+
+# Note: the following conditions must always be true:
+#   ZRELADDR == virt_to_phys(TEXTADDR)
+#   PARAMS_PHYS must be with 4MB of ZRELADDR
+#   INITRD_PHYS must be in RAM
+
+   zreladdr-y		:= 0x02080000 
+params_phys-y		:= 0x0207c000
+initrd_phys-y		:= 0x02180000
+
+ZRELADDR    := 0x02080000
+ZTEXTADDR   := 0x0207c000
+PARAMS_PHYS := $(params_phys-y)
+INITRD_PHYS := 0x02180000
+
+# We now have a PIC decompressor implementation.  Decompressors running
+# from RAM should not define ZTEXTADDR.  Decompressors running directly
+# from ROM or Flash must define ZTEXTADDR (preferably via the config)
+# FIXME: Previous assignment to ztextaddr-y is lost here. See SHARK
+ifeq ($(CONFIG_ZBOOT_ROM),y)
+ZTEXTADDR	:= $(CONFIG_ZBOOT_ROM_TEXT)
+ZBSSADDR	:= $(CONFIG_ZBOOT_ROM_BSS)
+else
+ZTEXTADDR	:= 0
+ZBSSADDR	:= ALIGN(4)
+endif
+
+export	ZTEXTADDR ZBSSADDR ZRELADDR INITRD_PHYS PARAMS_PHYS
+
+targets := Image zImage bootpImage xipImage
+
+$(obj)/Image: vmlinux FORCE
+	$(call if_changed,objcopy)
+	@echo '  Kernel: $@ is ready'
+
+$(obj)/zImage:	$(obj)/compressed/vmlinux FORCE
+	$(call if_changed,objcopy)
+	@echo '  Kernel: $@ is ready'
+
+$(obj)/compressed/vmlinux: vmlinux FORCE
+	$(Q)$(MAKE) $(build)=$(obj)/compressed $@
+
+ifeq ($(CONFIG_XIP_KERNEL),y)
+$(obj)/xipImage: vmlinux FORCE
+#	$(OBJCOPY) -S -O binary -R .data -R .comment vmlinux vmlinux-text.bin
+# FIXME - where has .pci_fixup crept in from?
+	$(OBJCOPY) -S -O binary -R .data -R .pci_fixup -R .comment vmlinux vmlinux-text.bin
+	$(OBJCOPY) -S -O binary -R .init -R .text -R __ex_table -R .pci_fixup -R __ksymtab -R __ksymtab_gpl -R __kcrctab -R __kcrctab_gpl -R __param -R .comment vmlinux vmlinux-data.bin
+	cat vmlinux-text.bin vmlinux-data.bin > $@
+	$(RM) -f vmlinux-text.bin vmlinux-data.bin
+	@echo '  Kernel: $@ is ready'
+endif
+
+.PHONY: initrd
+initrd:
+	@test "$(INITRD_PHYS)" != "" || \
+	(echo This machine does not support INITRD; exit -1)
+	@test "$(INITRD)" != "" || \
+	(echo You must specify INITRD; exit -1)
+
+install: $(obj)/Image
+	$(CONFIG_SHELL) $(obj)/install.sh \
+	$(KERNELRELEASE) \
+	$(obj)/Image System.map "$(INSTALL_PATH)"
+
+zinstall: $(obj)/zImage
+	$(CONFIG_SHELL) $(obj)/install.sh \
+	$(KERNELRELEASE) \
+	$(obj)/zImage System.map "$(INSTALL_PATH)"
+
+subdir-	    := compressed
diff --git a/arch/arm26/boot/compressed/Makefile b/arch/arm26/boot/compressed/Makefile
new file mode 100644
index 00000000000..b1d9ddebbe7
--- /dev/null
+++ b/arch/arm26/boot/compressed/Makefile
@@ -0,0 +1,50 @@
+#
+# linux/arch/arm26/boot/compressed/Makefile
+#
+# create a compressed vmlinuz image from the original vmlinux
+#
+# Note! ZTEXTADDR, ZBSSADDR and ZRELADDR are now exported
+# from arch/arm26/boot/Makefile
+#
+
+HEAD	= head.o
+OBJS	= misc.o
+FONTC	= drivers/video/console/font_acorn_8x8.c
+
+OBJS		+= ll_char_wr.o font.o
+CFLAGS_misc.o	:= -DPARAMS_PHYS=$(PARAMS_PHYS)
+
+targets       := vmlinux vmlinux.lds piggy piggy.gz piggy.o font.o head.o $(OBJS)
+
+SEDFLAGS	= s/TEXT_START/$(ZTEXTADDR)/;s/LOAD_ADDR/$(ZRELADDR)/;s/BSS_START/$(ZBSSADDR)/
+
+EXTRA_CFLAGS  := $(CFLAGS_BOOT) -fpic
+EXTRA_AFLAGS  := -traditional
+
+LDFLAGS_vmlinux := -p -X \
+	$(shell $(CC) $(CFLAGS)) -T
+
+$(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/$(HEAD) $(obj)/piggy.o \
+	 	$(addprefix $(obj)/, $(OBJS)) FORCE
+	$(call if_changed,ld)
+	@:
+
+
+$(obj)/piggy: vmlinux FORCE
+	$(call if_changed,objcopy)
+
+$(obj)/piggy.gz: $(obj)/piggy FORCE
+	$(call if_changed,gzip)
+
+LDFLAGS_piggy.o := -r -b binary
+$(obj)/piggy.o:  $(obj)/piggy.gz FORCE
+	$(call if_changed,ld)
+
+$(obj)/font.o: $(FONTC)
+	$(CC) $(CFLAGS) -Dstatic= -c $(FONTC) -o $(obj)/font.o
+
+$(obj)/vmlinux.lds: $(obj)/vmlinux.lds.in Makefile arch/arm26/boot/Makefile .config
+	@sed "$(SEDFLAGS)" < $< > $@
+
+$(obj)/misc.o: $(obj)/misc.c $(obj)/uncompress.h lib/inflate.c
+
diff --git a/arch/arm26/boot/compressed/head.S b/arch/arm26/boot/compressed/head.S
new file mode 100644
index 00000000000..0307804a607
--- /dev/null
+++ b/arch/arm26/boot/compressed/head.S
@@ -0,0 +1,517 @@
+/*
+ *  linux/arch/arm26/boot/compressed/head.S
+ *
+ *  Copyright (C) 1996-2002 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/config.h>
+#include <linux/linkage.h>
+
+/*
+ * Debugging stuff
+ *
+ * Note that these macros must not contain any code which is not
+ * 100% relocatable.  Any attempt to do so will result in a crash.
+ * Please select one of the following when turning on debugging.
+ */
+
+		.macro	kputc,val
+		mov	r0, \val
+		bl	putc
+		.endm
+
+		.macro	kphex,val,len
+		mov	r0, \val
+		mov	r1, #\len
+		bl	phex
+		.endm
+
+		.macro	debug_reloc_start
+		.endm
+
+		.macro	debug_reloc_end
+		.endm
+
+		.section ".start", #alloc, #execinstr
+/*
+ * sort out different calling conventions
+ */
+		.align
+start:
+		.type	start,#function
+		.rept	8
+		mov	r0, r0
+		.endr
+
+		b	1f
+		.word	0x016f2818		@ Magic numbers to help the loader
+		.word	start			@ absolute load/run zImage address
+		.word	_edata			@ zImage end address
+1:		mov	r7, r1			@ save architecture ID
+		mov	r8, #0			@ save r0
+		teqp	pc, #0x0c000003		@ turn off interrupts
+
+		.text
+		adr	r0, LC0
+		ldmia	r0, {r1, r2, r3, r4, r5, r6, ip, sp}
+		subs	r0, r0, r1		@ calculate the delta offset
+
+		teq	r0, #0			@ if delta is zero, we're
+		beq	not_relocated		@ running at the address we
+						@ were linked at.
+
+		add	r2, r2, r0		@ different address, so we
+		add	r3, r3, r0		@ need to fix up various
+		add	r5, r5, r0		@ pointers.
+		add	r6, r6, r0
+		add	ip, ip, r0
+		add	sp, sp, r0
+
+1:		ldr	r1, [r6, #0]		@ relocate entries in the GOT
+		add	r1, r1, r0		@ table.  This fixes up the
+		str	r1, [r6], #4		@ C references.
+		cmp	r6, ip
+		blo	1b
+
+not_relocated:	mov	r0, #0
+1:		str	r0, [r2], #4		@ clear bss
+		str	r0, [r2], #4
+		str	r0, [r2], #4
+		str	r0, [r2], #4
+		cmp	r2, r3
+		blo	1b
+
+		bl	cache_on
+
+		mov	r1, sp			@ malloc space above stack
+		add	r2, sp, #0x10000	@ 64k max
+
+/*
+ * Check to see if we will overwrite ourselves.
+ *   r4 = final kernel address
+ *   r5 = start of this image
+ *   r2 = end of malloc space (and therefore this image)
+ * We basically want:
+ *   r4 >= r2 -> OK
+ *   r4 + image length <= r5 -> OK
+ */
+		cmp	r4, r2
+		bhs	wont_overwrite
+		add	r0, r4, #4096*1024	@ 4MB largest kernel size
+		cmp	r0, r5
+		bls	wont_overwrite
+
+		mov	r5, r2			@ decompress after malloc space
+		mov	r0, r5
+		mov	r3, r7
+		bl	decompress_kernel
+
+		add	r0, r0, #127
+		bic	r0, r0, #127		@ align the kernel length
+/*
+ * r0     = decompressed kernel length
+ * r1-r3  = unused
+ * r4     = kernel execution address
+ * r5     = decompressed kernel start
+ * r6     = processor ID
+ * r7     = architecture ID
+ * r8-r14 = unused
+ */
+		add	r1, r5, r0		@ end of decompressed kernel
+		adr	r2, reloc_start
+		ldr	r3, LC1
+		add	r3, r2, r3
+1:		ldmia	r2!, {r8 - r13}		@ copy relocation code
+		stmia	r1!, {r8 - r13}
+		ldmia	r2!, {r8 - r13}
+		stmia	r1!, {r8 - r13}
+		cmp	r2, r3
+		blo	1b
+
+		bl	cache_clean_flush
+		add	pc, r5, r0		@ call relocation code
+
+/*
+ * We're not in danger of overwriting ourselves.  Do this the simple way.
+ *
+ * r4     = kernel execution address
+ * r7     = architecture ID
+ */
+wont_overwrite:	mov	r0, r4
+		mov	r3, r7
+		bl	decompress_kernel
+		b	call_kernel
+
+		.type	LC0, #object
+LC0:		.word	LC0			@ r1
+		.word	__bss_start		@ r2
+		.word	_end			@ r3
+		.word	_load_addr		@ r4
+		.word	_start			@ r5
+		.word	_got_start		@ r6
+		.word	_got_end		@ ip
+		.word	user_stack+4096		@ sp
+LC1:		.word	reloc_end - reloc_start
+		.size	LC0, . - LC0
+
+/*
+ * Turn on the cache.  We need to setup some page tables so that we
+ * can have both the I and D caches on.
+ *
+ * We place the page tables 16k down from the kernel execution address,
+ * and we hope that nothing else is using it.  If we're using it, we
+ * will go pop!
+ *
+ * On entry,
+ *  r4 = kernel execution address
+ *  r6 = processor ID
+ *  r7 = architecture number
+ *  r8 = run-time address of "start"
+ * On exit,
+ *  r1, r2, r3, r8, r9, r12 corrupted
+ * This routine must preserve:
+ *  r4, r5, r6, r7
+ */
+		.align	5
+cache_on:	mov	r3, #8			@ cache_on function
+		b	call_cache_fn
+
+__setup_mmu:	sub	r3, r4, #16384		@ Page directory size
+		bic	r3, r3, #0xff		@ Align the pointer
+		bic	r3, r3, #0x3f00
+/*
+ * Initialise the page tables, turning on the cacheable and bufferable
+ * bits for the RAM area only.
+ */
+		mov	r0, r3
+		mov	r8, r0, lsr #18
+		mov	r8, r8, lsl #18		@ start of RAM
+		add	r9, r8, #0x10000000	@ a reasonable RAM size
+		mov	r1, #0x12
+		orr	r1, r1, #3 << 10
+		add	r2, r3, #16384
+1:		cmp	r1, r8			@ if virt > start of RAM
+		orrhs	r1, r1, #0x0c		@ set cacheable, bufferable
+		cmp	r1, r9			@ if virt > end of RAM
+		bichs	r1, r1, #0x0c		@ clear cacheable, bufferable
+		str	r1, [r0], #4		@ 1:1 mapping
+		add	r1, r1, #1048576
+		teq	r0, r2
+		bne	1b
+/*
+ * If ever we are running from Flash, then we surely want the cache
+ * to be enabled also for our execution instance...  We map 2MB of it
+ * so there is no map overlap problem for up to 1 MB compressed kernel.
+ * If the execution is in RAM then we would only be duplicating the above.
+ */
+		mov	r1, #0x1e
+		orr	r1, r1, #3 << 10
+		mov	r2, pc, lsr #20
+		orr	r1, r1, r2, lsl #20
+		add	r0, r3, r2, lsl #2
+		str	r1, [r0], #4
+		add	r1, r1, #1048576
+		str	r1, [r0]
+		mov	pc, lr
+
+__armv4_cache_on:
+		mov	r12, lr
+		bl	__setup_mmu
+		mov	r0, #0
+		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
+		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
+		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
+		orr	r0, r0, #0x1000		@ I-cache enable
+		orr	r0, r0, #0x0030
+		b	__common_cache_on
+
+__arm6_cache_on:
+		mov	r12, lr
+		bl	__setup_mmu
+		mov	r0, #0
+		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
+		mcr	p15, 0, r0, c5, c0, 0	@ invalidate whole TLB v3
+		mov	r0, #0x30
+__common_cache_on:
+#ifndef DEBUG
+		orr	r0, r0, #0x000d		@ Write buffer, mmu
+#endif
+		mov	r1, #-1
+		mcr	p15, 0, r3, c2, c0, 0	@ load page table pointer
+		mcr	p15, 0, r1, c3, c0, 0	@ load domain access control
+		mcr	p15, 0, r0, c1, c0, 0	@ load control register
+		mov	pc, r12
+
+/*
+ * All code following this line is relocatable.  It is relocated by
+ * the above code to the end of the decompressed kernel image and
+ * executed there.  During this time, we have no stacks.
+ *
+ * r0     = decompressed kernel length
+ * r1-r3  = unused
+ * r4     = kernel execution address
+ * r5     = decompressed kernel start
+ * r6     = processor ID
+ * r7     = architecture ID
+ * r8-r14 = unused
+ */
+		.align	5
+reloc_start:	add	r8, r5, r0
+		debug_reloc_start
+		mov	r1, r4
+1:
+		.rept	4
+		ldmia	r5!, {r0, r2, r3, r9 - r13}	@ relocate kernel
+		stmia	r1!, {r0, r2, r3, r9 - r13}
+		.endr
+
+		cmp	r5, r8
+		blo	1b
+		debug_reloc_end
+
+call_kernel:	bl	cache_clean_flush
+		bl	cache_off
+		mov	r0, #0
+		mov	r1, r7			@ restore architecture number
+		mov	pc, r4			@ call kernel
+
+/*
+ * Here follow the relocatable cache support functions for the
+ * various processors.  This is a generic hook for locating an
+ * entry and jumping to an instruction at the specified offset
+ * from the start of the block.  Please note this is all position
+ * independent code.
+ *
+ *  r1  = corrupted
+ *  r2  = corrupted
+ *  r3  = block offset
+ *  r6  = corrupted
+ *  r12 = corrupted
+ */
+
+call_cache_fn:	adr	r12, proc_types
+		mrc	p15, 0, r6, c0, c0	@ get processor ID
+1:		ldr	r1, [r12, #0]		@ get value
+		ldr	r2, [r12, #4]		@ get mask
+		eor	r1, r1, r6		@ (real ^ match)
+		tst	r1, r2			@       & mask
+		addeq	pc, r12, r3		@ call cache function
+		add	r12, r12, #4*5
+		b	1b
+
+/*
+ * Table for cache operations.  This is basically:
+ *   - CPU ID match
+ *   - CPU ID mask
+ *   - 'cache on' method instruction
+ *   - 'cache off' method instruction
+ *   - 'cache flush' method instruction
+ *
+ * We match an entry using: ((real_id ^ match) & mask) == 0
+ *
+ * Writethrough caches generally only need 'on' and 'off'
+ * methods.  Writeback caches _must_ have the flush method
+ * defined.
+ */
+		.type	proc_types,#object
+proc_types:
+		.word	0x41560600		@ ARM6/610
+		.word	0xffffffe0
+		b	__arm6_cache_off	@ works, but slow
+		b	__arm6_cache_off
+		mov	pc, lr
+@		b	__arm6_cache_on		@ untested
+@		b	__arm6_cache_off
+@		b	__armv3_cache_flush
+
+		.word	0x41007000		@ ARM7/710
+		.word	0xfff8fe00
+		b	__arm7_cache_off
+		b	__arm7_cache_off
+		mov	pc, lr
+
+		.word	0x41807200		@ ARM720T (writethrough)
+		.word	0xffffff00
+		b	__armv4_cache_on
+		b	__armv4_cache_off
+		mov	pc, lr
+
+		.word	0x41129200		@ ARM920T
+		.word	0xff00fff0
+		b	__armv4_cache_on
+		b	__armv4_cache_off
+		b	__armv4_cache_flush
+
+		.word	0x4401a100		@ sa110 / sa1100
+		.word	0xffffffe0
+		b	__armv4_cache_on
+		b	__armv4_cache_off
+		b	__armv4_cache_flush
+
+		.word	0x6901b110		@ sa1110
+		.word	0xfffffff0
+		b	__armv4_cache_on
+		b	__armv4_cache_off
+		b	__armv4_cache_flush
+
+		.word	0x69050000		@ xscale
+		.word	0xffff0000
+		b	__armv4_cache_on
+		b	__armv4_cache_off
+		b	__armv4_cache_flush
+
+		.word	0			@ unrecognised type
+		.word	0
+		mov	pc, lr
+		mov	pc, lr
+		mov	pc, lr
+
+		.size	proc_types, . - proc_types
+
+/*
+ * Turn off the Cache and MMU.  ARMv3 does not support
+ * reading the control register, but ARMv4 does.
+ *
+ * On entry,  r6 = processor ID
+ * On exit,   r0, r1, r2, r3, r12 corrupted
+ * This routine must preserve: r4, r6, r7
+ */
+		.align	5
+cache_off:	mov	r3, #12			@ cache_off function
+		b	call_cache_fn
+
+__armv4_cache_off:
+		mrc	p15, 0, r0, c1, c0
+		bic	r0, r0, #0x000d
+		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
+		mov	r0, #0
+		mcr	p15, 0, r0, c7, c7	@ invalidate whole cache v4
+		mcr	p15, 0, r0, c8, c7	@ invalidate whole TLB v4
+		mov	pc, lr
+
+__arm6_cache_off:
+		mov	r0, #0x00000030		@ ARM6 control reg.
+		b	__armv3_cache_off
+
+__arm7_cache_off:
+		mov	r0, #0x00000070		@ ARM7 control reg.
+		b	__armv3_cache_off
+
+__armv3_cache_off:
+		mcr	p15, 0, r0, c1, c0, 0	@ turn MMU and cache off
+		mov	r0, #0
+		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
+		mcr	p15, 0, r0, c5, c0, 0	@ invalidate whole TLB v3
+		mov	pc, lr
+
+/*
+ * Clean and flush the cache to maintain consistency.
+ *
+ * On entry,
+ *  r6 = processor ID
+ * On exit,
+ *  r1, r2, r3, r12 corrupted
+ * This routine must preserve:
+ *  r0, r4, r5, r6, r7
+ */
+		.align	5
+cache_clean_flush:
+		mov	r3, #16
+		b	call_cache_fn
+
+__armv4_cache_flush:
+		bic	r1, pc, #31
+		add	r2, r1, #65536		@ 2x the largest dcache size
+1:		ldr	r12, [r1], #32		@ s/w flush D cache
+		teq	r1, r2
+		bne	1b
+
+		mcr	p15, 0, r1, c7, c7, 0	@ flush I cache
+		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
+		mov	pc, lr
+
+__armv3_cache_flush:
+		mov	r1, #0
+		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
+		mov	pc, lr
+
+/*
+ * Various debugging routines for printing hex characters and
+ * memory, which again must be relocatable.
+ */
+#ifdef DEBUG
+		.type	phexbuf,#object
+phexbuf:	.space	12
+		.size	phexbuf, . - phexbuf
+
+phex:		adr	r3, phexbuf
+		mov	r2, #0
+		strb	r2, [r3, r1]
+1:		subs	r1, r1, #1
+		movmi	r0, r3
+		bmi	puts
+		and	r2, r0, #15
+		mov	r0, r0, lsr #4
+		cmp	r2, #10
+		addge	r2, r2, #7
+		add	r2, r2, #'0'
+		strb	r2, [r3, r1]
+		b	1b
+
+puts:		loadsp	r3
+1:		ldrb	r2, [r0], #1
+		teq	r2, #0
+		moveq	pc, lr
+2:		writeb	r2
+		mov	r1, #0x00020000
+3:		subs	r1, r1, #1
+		bne	3b
+		teq	r2, #'\n'
+		moveq	r2, #'\r'
+		beq	2b
+		teq	r0, #0
+		bne	1b
+		mov	pc, lr
+putc:
+		mov	r2, r0
+		mov	r0, #0
+		loadsp	r3
+		b	2b
+
+memdump:	mov	r12, r0
+		mov	r10, lr
+		mov	r11, #0
+2:		mov	r0, r11, lsl #2
+		add	r0, r0, r12
+		mov	r1, #8
+		bl	phex
+		mov	r0, #':'
+		bl	putc
+1:		mov	r0, #' '
+		bl	putc
+		ldr	r0, [r12, r11, lsl #2]
+		mov	r1, #8
+		bl	phex
+		and	r0, r11, #7
+		teq	r0, #3
+		moveq	r0, #' '
+		bleq	putc
+		and	r0, r11, #7
+		add	r11, r11, #1
+		teq	r0, #7
+		bne	1b
+		mov	r0, #'\n'
+		bl	putc
+		cmp	r11, #64
+		blt	2b
+		mov	pc, r10
+#endif
+
+reloc_end:
+
+		.align
+		.section ".stack", "aw"
+user_stack:	.space	4096
diff --git a/arch/arm26/boot/compressed/hw-bse.c b/arch/arm26/boot/compressed/hw-bse.c
new file mode 100644
index 00000000000..3e8f07f8e08
--- /dev/null
+++ b/arch/arm26/boot/compressed/hw-bse.c
@@ -0,0 +1,74 @@
+/*
+ * Bright Star Engineering Inc.
+ *
+ * code for readng parameters from the
+ * parameter blocks of the boot block
+ * flash memory
+ *
+ */
+
+static int strcmp(const char *s1, const char *s2)
+{
+  while (*s1 != '\0' && *s1 == *s2)
+    {
+      s1++;
+      s2++;
+    }
+
+  return (*(unsigned char *) s1) - (*(unsigned char *) s2);
+}
+
+struct pblk_t {
+  char type;
+  unsigned short size;
+};
+
+static char *bse_getflashparam(char *name) {
+  unsigned int esize;
+  char *q,*r;
+  unsigned char *p,*e;
+  struct pblk_t *thepb = (struct pblk_t *) 0x00004000;
+  struct pblk_t *altpb = (struct pblk_t *) 0x00006000;  
+  if (thepb->type&1) {
+    if (altpb->type&1) {
+      /* no valid param block */ 
+      return (char*)0;
+    } else {
+      /* altpb is valid */
+      struct pblk_t *tmp;
+      tmp = thepb;
+      thepb = altpb;
+      altpb = tmp;
+    }
+  }
+  p = (char*)thepb + sizeof(struct pblk_t);
+  e = p + thepb->size; 
+  while (p < e) {
+    q = p;
+    esize = *p;
+    if (esize == 0xFF) break;
+    if (esize == 0) break;
+    if (esize > 127) {
+      esize = (esize&0x7F)<<8 | p[1];
+      q++;
+    }
+    q++;
+    r=q;
+    if (*r && ((name == 0) || (!strcmp(name,r)))) {
+      while (*q++) ;
+      return q;
+    }
+    p+=esize;
+  }
+  return (char*)0;
+}
+
+void bse_setup(void) {
+  /* extract the linux cmdline from flash */
+  char *name=bse_getflashparam("linuxboot");
+  char *x = (char *)0xc0000100;
+  if (name) { 
+    while (*name) *x++=*name++;
+  }
+  *x=0;
+}
diff --git a/arch/arm26/boot/compressed/ll_char_wr.S b/arch/arm26/boot/compressed/ll_char_wr.S
new file mode 100644
index 00000000000..f024c3ebdfa
--- /dev/null
+++ b/arch/arm26/boot/compressed/ll_char_wr.S
@@ -0,0 +1,162 @@
+/*
+ *  linux/arch/arm26/lib/ll_char_wr.S
+ *
+ *  Copyright (C) 1995, 1996 Russell King.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  Speedups & 1bpp code (C) 1996 Philip Blundell & Russell King.
+ *
+ *  10-04-96	RMK	Various cleanups & reduced register usage.
+ *  08-04-98	RMK	Shifts re-ordered
+ */
+
+@ Regs: [] = corruptible
+@       {} = used
+@       () = do not use
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+		.text
+
+#define BOLD            0x01
+#define ITALIC          0x02
+#define UNDERLINE       0x04
+#define FLASH           0x08
+#define INVERSE         0x10
+
+LC0:		.word	bytes_per_char_h
+		.word	video_size_row
+		.word	acorndata_8x8
+		.word	con_charconvtable
+
+ENTRY(ll_write_char)
+		stmfd	sp!, {r4 - r7, lr}
+@
+@ Smashable regs: {r0 - r3}, [r4 - r7], (r8 - fp), [ip], (sp), [lr], (pc)
+@
+		eor	ip, r1, #UNDERLINE << 9
+/*
+ * calculate colours
+ */
+		tst	r1, #INVERSE << 9
+		moveq	r2, r1, lsr #16
+		moveq	r3, r1, lsr #24
+		movne	r2, r1, lsr #24
+		movne	r3, r1, lsr #16
+		and	r3, r3, #255
+		and	r2, r2, #255
+/*
+ * calculate offset into character table
+ */
+		mov	r1, r1, lsl #23
+		mov	r1, r1, lsr #20
+/*
+ * calculate offset required for each row [maybe I should make this an argument to this fn.
+ * Have to see what the register usage is like in the calling routines.
+ */
+		adr	r4, LC0
+		ldmia	r4, {r4, r5, r6, lr}
+		ldr	r4, [r4]
+		ldr	r5, [r5]
+/*
+ * Go to resolution-dependent routine...
+ */
+		cmp	r4, #4
+		blt	Lrow1bpp
+		eor	r2, r3, r2			@ Create eor mask to change colour from bg
+		orr	r3, r3, r3, lsl #8		@ to fg.
+		orr	r3, r3, r3, lsl #16
+		add	r0, r0, r5, lsl #3		@ Move to bottom of character
+		add	r1, r1, #7
+		ldrb	r7, [r6, r1]
+		tst	ip, #UNDERLINE << 9
+		eoreq	r7, r7, #255
+		teq	r4, #8
+		beq	Lrow8bpplp
+@
+@ Smashable regs: {r0 - r3}, [r4], {r5 - r7}, (r8 - fp), [ip], (sp), {lr}, (pc)
+@
+		orr	r3, r3, r3, lsl #4
+Lrow4bpplp:	ldr	r7, [lr, r7, lsl #2]
+		mul	r7, r2, r7
+		tst	r1, #7				@ avoid using r7 directly after
+		eor	ip, r3, r7
+		str	ip, [r0, -r5]!
+		LOADREGS(eqfd, sp!, {r4 - r7, pc})
+		sub	r1, r1, #1
+		ldrb	r7, [r6, r1]
+		ldr	r7, [lr, r7, lsl #2]
+		mul	r7, r2, r7
+		tst	r1, #7				@ avoid using r7 directly after
+		eor	ip, r3, r7
+		str	ip, [r0, -r5]!
+		subne	r1, r1, #1
+		ldrneb	r7, [r6, r1]
+		bne	Lrow4bpplp
+		LOADREGS(fd, sp!, {r4 - r7, pc})
+
+@
+@ Smashable regs: {r0 - r3}, [r4], {r5 - r7}, (r8 - fp), [ip], (sp), {lr}, (pc)
+@
+Lrow8bpplp:	mov	ip, r7, lsr #4
+		ldr	ip, [lr, ip, lsl #2]
+		mul	r4, r2, ip
+		and	ip, r7, #15			@ avoid r4
+		ldr	ip, [lr, ip, lsl #2]		@ avoid r4
+		mul	ip, r2, ip			@ avoid r4
+		eor	r4, r3, r4			@ avoid ip
+		tst	r1, #7				@ avoid ip
+		sub	r0, r0, r5			@ avoid ip
+		eor	ip, r3, ip
+		stmia	r0, {r4, ip}
+		LOADREGS(eqfd, sp!, {r4 - r7, pc})
+		sub	r1, r1, #1
+		ldrb	r7, [r6, r1]
+		mov	ip, r7, lsr #4
+		ldr	ip, [lr, ip, lsl #2]
+		mul	r4, r2, ip
+		and	ip, r7, #15			@ avoid r4
+		ldr	ip, [lr, ip, lsl #2]		@ avoid r4
+		mul	ip, r2, ip			@ avoid r4
+		eor	r4, r3, r4			@ avoid ip
+		tst	r1, #7				@ avoid ip
+		sub	r0, r0, r5			@ avoid ip
+		eor	ip, r3, ip
+		stmia	r0, {r4, ip}
+		subne	r1, r1, #1
+		ldrneb	r7, [r6, r1]
+		bne	Lrow8bpplp
+		LOADREGS(fd, sp!, {r4 - r7, pc})
+
+@
+@ Smashable regs: {r0 - r3}, [r4], {r5, r6}, [r7], (r8 - fp), [ip], (sp), [lr], (pc)
+@
+Lrow1bpp:	add	r6, r6, r1
+		ldmia	r6, {r4, r7}
+		tst	ip, #INVERSE << 9
+		mvnne	r4, r4
+		mvnne	r7, r7
+		strb	r4, [r0], r5
+		mov	r4, r4, lsr #8
+		strb	r4, [r0], r5
+		mov	r4, r4, lsr #8
+		strb	r4, [r0], r5
+		mov	r4, r4, lsr #8
+		strb	r4, [r0], r5
+		strb	r7, [r0], r5
+		mov	r7, r7, lsr #8
+		strb	r7, [r0], r5
+		mov	r7, r7, lsr #8
+		strb	r7, [r0], r5
+		mov	r7, r7, lsr #8
+		tst	ip, #UNDERLINE << 9
+		mvneq	r7, r7
+		strb	r7, [r0], r5
+		LOADREGS(fd, sp!, {r4 - r7, pc})
+
+		.bss
+ENTRY(con_charconvtable)
+		.space	1024
diff --git a/arch/arm26/boot/compressed/misc.c b/arch/arm26/boot/compressed/misc.c
new file mode 100644
index 00000000000..f17f50e5516
--- /dev/null
+++ b/arch/arm26/boot/compressed/misc.c
@@ -0,0 +1,316 @@
+/*
+ * misc.c
+ * 
+ * This is a collection of several routines from gzip-1.0.3 
+ * adapted for Linux.
+ *
+ * malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994
+ *
+ * Modified for ARM Linux by Russell King
+ *
+ * Nicolas Pitre <nico@visuaide.com>  1999/04/14 :
+ *  For this code to run directly from Flash, all constant variables must
+ *  be marked with 'const' and all other variables initialized at run-time 
+ *  only.  This way all non constant variables will end up in the bss segment,
+ *  which should point to addresses in RAM and cleared to 0 on start.
+ *  This allows for a much quicker boot time.
+ */
+
+unsigned int __machine_arch_type;
+
+#include <linux/kernel.h>
+
+#include <asm/uaccess.h>
+#include "uncompress.h"
+
+#ifdef STANDALONE_DEBUG
+#define puts printf
+#endif
+
+#define __ptr_t void *
+
+/*
+ * Optimised C version of memzero for the ARM.
+ */
+void __memzero (__ptr_t s, size_t n)
+{
+	union { void *vp; unsigned long *ulp; unsigned char *ucp; } u;
+	int i;
+
+	u.vp = s;
+
+	for (i = n >> 5; i > 0; i--) {
+		*u.ulp++ = 0;
+		*u.ulp++ = 0;
+		*u.ulp++ = 0;
+		*u.ulp++ = 0;
+		*u.ulp++ = 0;
+		*u.ulp++ = 0;
+		*u.ulp++ = 0;
+		*u.ulp++ = 0;
+	}
+
+	if (n & 1 << 4) {
+		*u.ulp++ = 0;
+		*u.ulp++ = 0;
+		*u.ulp++ = 0;
+		*u.ulp++ = 0;
+	}
+
+	if (n & 1 << 3) {
+		*u.ulp++ = 0;
+		*u.ulp++ = 0;
+	}
+
+	if (n & 1 << 2)
+		*u.ulp++ = 0;
+
+	if (n & 1 << 1) {
+		*u.ucp++ = 0;
+		*u.ucp++ = 0;
+	}
+
+	if (n & 1)
+		*u.ucp++ = 0;
+}
+
+static inline __ptr_t memcpy(__ptr_t __dest, __const __ptr_t __src,
+			    size_t __n)
+{
+	int i = 0;
+	unsigned char *d = (unsigned char *)__dest, *s = (unsigned char *)__src;
+
+	for (i = __n >> 3; i > 0; i--) {
+		*d++ = *s++;
+		*d++ = *s++;
+		*d++ = *s++;
+		*d++ = *s++;
+		*d++ = *s++;
+		*d++ = *s++;
+		*d++ = *s++;
+		*d++ = *s++;
+	}
+
+	if (__n & 1 << 2) {
+		*d++ = *s++;
+		*d++ = *s++;
+		*d++ = *s++;
+		*d++ = *s++;
+	}
+
+	if (__n & 1 << 1) {
+		*d++ = *s++;
+		*d++ = *s++;
+	}
+
+	if (__n & 1)
+		*d++ = *s++;
+
+	return __dest;
+}
+
+/*
+ * gzip delarations
+ */
+#define OF(args)  args
+#define STATIC static
+
+typedef unsigned char  uch;
+typedef unsigned short ush;
+typedef unsigned long  ulg;
+
+#define WSIZE 0x8000		/* Window size must be at least 32k, */
+				/* and a power of two */
+
+static uch *inbuf;		/* input buffer */
+static uch window[WSIZE];	/* Sliding window buffer */
+
+static unsigned insize;		/* valid bytes in inbuf */
+static unsigned inptr;		/* index of next byte to be processed in inbuf */
+static unsigned outcnt;		/* bytes in output buffer */
+
+/* gzip flag byte */
+#define ASCII_FLAG   0x01 /* bit 0 set: file probably ascii text */
+#define CONTINUATION 0x02 /* bit 1 set: continuation of multi-part gzip file */
+#define EXTRA_FIELD  0x04 /* bit 2 set: extra field present */
+#define ORIG_NAME    0x08 /* bit 3 set: original file name present */
+#define COMMENT      0x10 /* bit 4 set: file comment present */
+#define ENCRYPTED    0x20 /* bit 5 set: file is encrypted */
+#define RESERVED     0xC0 /* bit 6,7:   reserved */
+
+#define get_byte()  (inptr < insize ? inbuf[inptr++] : fill_inbuf())
+
+/* Diagnostic functions */
+#ifdef DEBUG
+#  define Assert(cond,msg) {if(!(cond)) error(msg);}
+#  define Trace(x) fprintf x
+#  define Tracev(x) {if (verbose) fprintf x ;}
+#  define Tracevv(x) {if (verbose>1) fprintf x ;}
+#  define Tracec(c,x) {if (verbose && (c)) fprintf x ;}
+#  define Tracecv(c,x) {if (verbose>1 && (c)) fprintf x ;}
+#else
+#  define Assert(cond,msg)
+#  define Trace(x)
+#  define Tracev(x)
+#  define Tracevv(x)
+#  define Tracec(c,x)
+#  define Tracecv(c,x)
+#endif
+
+static int  fill_inbuf(void);
+static void flush_window(void);
+static void error(char *m);
+static void gzip_mark(void **);
+static void gzip_release(void **);
+
+extern char input_data[];
+extern char input_data_end[];
+
+static uch *output_data;
+static ulg output_ptr;
+static ulg bytes_out;
+
+static void *malloc(int size);
+static void free(void *where);
+static void error(char *m);
+static void gzip_mark(void **);
+static void gzip_release(void **);
+
+static void puts(const char *);
+
+extern int end;
+static ulg free_mem_ptr;
+static ulg free_mem_ptr_end;
+
+#define HEAP_SIZE 0x2000
+
+#include "../../../../lib/inflate.c"
+
+#ifndef STANDALONE_DEBUG
+static void *malloc(int size)
+{
+	void *p;
+
+	if (size <0) error("Malloc error");
+	if (free_mem_ptr <= 0) error("Memory error");
+
+	free_mem_ptr = (free_mem_ptr + 3) & ~3;	/* Align */
+
+	p = (void *)free_mem_ptr;
+	free_mem_ptr += size;
+
+	if (free_mem_ptr >= free_mem_ptr_end)
+		error("Out of memory");
+	return p;
+}
+
+static void free(void *where)
+{ /* gzip_mark & gzip_release do the free */
+}
+
+static void gzip_mark(void **ptr)
+{
+	arch_decomp_wdog();
+	*ptr = (void *) free_mem_ptr;
+}
+
+static void gzip_release(void **ptr)
+{
+	arch_decomp_wdog();
+	free_mem_ptr = (long) *ptr;
+}
+#else
+static void gzip_mark(void **ptr)
+{
+}
+
+static void gzip_release(void **ptr)
+{
+}
+#endif
+
+/* ===========================================================================
+ * Fill the input buffer. This is called only when the buffer is empty
+ * and at least one byte is really needed.
+ */
+int fill_inbuf(void)
+{
+	if (insize != 0)
+		error("ran out of input data");
+
+	inbuf = input_data;
+	insize = &input_data_end[0] - &input_data[0];
+
+	inptr = 1;
+	return inbuf[0];
+}
+
+/* ===========================================================================
+ * Write the output window window[0..outcnt-1] and update crc and bytes_out.
+ * (Used for the decompressed data only.)
+ */
+void flush_window(void)
+{
+	ulg c = crc;
+	unsigned n;
+	uch *in, *out, ch;
+
+	in = window;
+	out = &output_data[output_ptr];
+	for (n = 0; n < outcnt; n++) {
+		ch = *out++ = *in++;
+		c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
+	}
+	crc = c;
+	bytes_out += (ulg)outcnt;
+	output_ptr += (ulg)outcnt;
+	outcnt = 0;
+	puts(".");
+}
+
+static void error(char *x)
+{
+	int ptr;
+
+	puts("\n\n");
+	puts(x);
+	puts("\n\n -- System halted");
+
+	while(1);	/* Halt */
+}
+
+#ifndef STANDALONE_DEBUG
+
+ulg
+decompress_kernel(ulg output_start, ulg free_mem_ptr_p, ulg free_mem_ptr_end_p,
+		  int arch_id)
+{
+	output_data		= (uch *)output_start;	/* Points to kernel start */
+	free_mem_ptr		= free_mem_ptr_p;
+	free_mem_ptr_end	= free_mem_ptr_end_p;
+	__machine_arch_type	= arch_id;
+
+	arch_decomp_setup();
+
+	makecrc();
+	puts("Uncompressing Linux...");
+	gunzip();
+	puts(" done, booting the kernel.\n");
+	return output_ptr;
+}
+#else
+
+char output_buffer[1500*1024];
+
+int main()
+{
+	output_data = output_buffer;
+
+	makecrc();
+	puts("Uncompressing Linux...");
+	gunzip();
+	puts("done.\n");
+	return 0;
+}
+#endif
+	
diff --git a/arch/arm26/boot/compressed/uncompress.h b/arch/arm26/boot/compressed/uncompress.h
new file mode 100644
index 00000000000..66d9b938a7a
--- /dev/null
+++ b/arch/arm26/boot/compressed/uncompress.h
@@ -0,0 +1,110 @@
+/*
+ *
+ *  Copyright (C) 1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#define VIDMEM ((char *)0x02000000)
+ 
+int video_num_columns, video_num_lines, video_size_row;
+int white, bytes_per_char_h;
+extern unsigned long con_charconvtable[256];
+
+struct param_struct {
+	unsigned long page_size;
+	unsigned long nr_pages;
+	unsigned long ramdisk_size;
+	unsigned long mountrootrdonly;
+	unsigned long rootdev;
+	unsigned long video_num_cols;
+	unsigned long video_num_rows;
+	unsigned long video_x;
+	unsigned long video_y;
+	unsigned long memc_control_reg;
+	unsigned char sounddefault;
+	unsigned char adfsdrives;
+	unsigned char bytes_per_char_h;
+	unsigned char bytes_per_char_v;
+	unsigned long unused[256/4-11];
+};
+
+static struct param_struct *params = (struct param_struct *)0x0207c000;
+ 
+/*
+ * This does not append a newline
+ */
+static void puts(const char *s)
+{
+	extern void ll_write_char(char *, unsigned long);
+	int x,y;
+	unsigned char c;
+	char *ptr;
+
+	x = params->video_x;
+	y = params->video_y;
+
+	while ( ( c = *(unsigned char *)s++ ) != '\0' ) {
+		if ( c == '\n' ) {
+			x = 0;
+			if ( ++y >= video_num_lines ) {
+				y--;
+			}
+		} else {
+			ptr = VIDMEM + ((y*video_num_columns*params->bytes_per_char_v+x)*bytes_per_char_h);
+			ll_write_char(ptr, c|(white<<16));
+			if ( ++x >= video_num_columns ) {
+				x = 0;
+				if ( ++y >= video_num_lines ) {
+					y--;
+				}
+			}
+		}
+	}
+
+	params->video_x = x;
+	params->video_y = y;
+}
+
+static void error(char *x);
+
+/*
+ * Setup for decompression
+ */
+static void arch_decomp_setup(void)
+{
+	int i;
+	
+	video_num_lines = params->video_num_rows;
+	video_num_columns = params->video_num_cols;
+	bytes_per_char_h = params->bytes_per_char_h;
+	video_size_row = video_num_columns * bytes_per_char_h;
+	if (bytes_per_char_h == 4)
+		for (i = 0; i < 256; i++)
+			con_charconvtable[i] =
+				(i & 128 ? 1 << 0  : 0) |
+				(i & 64  ? 1 << 4  : 0) |
+				(i & 32  ? 1 << 8  : 0) |
+				(i & 16  ? 1 << 12 : 0) |
+				(i & 8   ? 1 << 16 : 0) |
+				(i & 4   ? 1 << 20 : 0) |
+				(i & 2   ? 1 << 24 : 0) |
+				(i & 1   ? 1 << 28 : 0);
+	else
+		for (i = 0; i < 16; i++)
+			con_charconvtable[i] =
+				(i & 8   ? 1 << 0  : 0) |
+				(i & 4   ? 1 << 8  : 0) |
+				(i & 2   ? 1 << 16 : 0) |
+				(i & 1   ? 1 << 24 : 0);
+
+	white = bytes_per_char_h == 8 ? 0xfc : 7;
+
+	if (params->nr_pages * params->page_size < 4096*1024) error("<4M of mem\n");
+}
+
+/*
+ * nothing to do
+ */
+#define arch_decomp_wdog()
diff --git a/arch/arm26/boot/compressed/vmlinux.lds.in b/arch/arm26/boot/compressed/vmlinux.lds.in
new file mode 100644
index 00000000000..86d821d5ab7
--- /dev/null
+++ b/arch/arm26/boot/compressed/vmlinux.lds.in
@@ -0,0 +1,60 @@
+/*
+ *  linux/arch/arm26/boot/compressed/vmlinux.lds.in
+ *
+ *  Copyright (C) 2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+OUTPUT_ARCH(arm)
+ENTRY(_start)
+SECTIONS
+{
+  . = LOAD_ADDR;
+  _load_addr = .;
+
+  . = TEXT_START;
+  _text = .;
+
+  .text : {
+    _start = .;
+    *(.start)
+    *(.text)
+    *(.fixup)
+    *(.gnu.warning)
+    *(.rodata)
+    *(.rodata.*)
+    *(.glue_7)
+    *(.glue_7t)
+    input_data = .;
+    arch/arm26/boot/compressed/piggy.o
+    input_data_end = .;
+    . = ALIGN(4);
+  }
+
+  _etext = .;
+
+  _got_start = .;
+  .got			: { *(.got) }
+  _got_end = .;
+  .got.plt		: { *(.got.plt) }
+  .data			: { *(.data) }
+  _edata = .;
+
+  . = BSS_START;
+  __bss_start = .;
+  .bss			: { *(.bss) }
+  _end = .;
+
+  .stack (NOLOAD)	: { *(.stack) }
+
+  .stab 0		: { *(.stab) }
+  .stabstr 0		: { *(.stabstr) }
+  .stab.excl 0		: { *(.stab.excl) }
+  .stab.exclstr 0	: { *(.stab.exclstr) }
+  .stab.index 0		: { *(.stab.index) }
+  .stab.indexstr 0	: { *(.stab.indexstr) }
+  .comment 0		: { *(.comment) }
+}
+
diff --git a/arch/arm26/boot/install.sh b/arch/arm26/boot/install.sh
new file mode 100644
index 00000000000..c628328dd9e
--- /dev/null
+++ b/arch/arm26/boot/install.sh
@@ -0,0 +1,62 @@
+#!/bin/sh
+#
+# arch/arm26/boot/install.sh
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1995 by Linus Torvalds
+#
+# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin
+# Adapted from code in arch/i386/boot/install.sh by Russell King
+# Stolen from arm32 by Ian Molton
+#
+# "make install" script for arm architecture
+#
+# Arguments:
+#   $1 - kernel version
+#   $2 - kernel image file
+#   $3 - kernel map file
+#   $4 - default install path (blank if root directory)
+#
+
+# User may have a custom install script
+
+if [ -x /sbin/installkernel ]; then
+  exec /sbin/installkernel "$@"
+fi
+
+if [ "$2" = "zImage" ]; then
+# Compressed install
+  echo "Installing compressed kernel"
+  if [ -f $4/vmlinuz-$1 ]; then
+    mv $4/vmlinuz-$1 $4/vmlinuz.old
+  fi
+
+  if [ -f $4/System.map-$1 ]; then
+    mv $4/System.map-$1 $4/System.old
+  fi
+
+  cat $2 > $4/vmlinuz-$1
+  cp $3 $4/System.map-$1
+else
+# Normal install
+  echo "Installing normal kernel"
+  if [ -f $4/vmlinux-$1 ]; then
+    mv $4/vmlinux-$1 $4/vmlinux.old
+  fi
+
+  if [ -f $4/System.map ]; then
+    mv $4/System.map $4/System.old
+  fi
+
+  cat $2 > $4/vmlinux-$1
+  cp $3 $4/System.map
+fi
+
+if [ -x /sbin/loadmap ]; then
+  /sbin/loadmap --rdev /dev/ima
+else
+  echo "You have to install it yourself"
+fi
diff --git a/arch/arm26/defconfig b/arch/arm26/defconfig
new file mode 100644
index 00000000000..c4a89703c3d
--- /dev/null
+++ b/arch/arm26/defconfig
@@ -0,0 +1,362 @@
+#
+# Automatically generated by make menuconfig: don't edit
+#
+CONFIG_ARM=y
+# CONFIG_EISA is not set
+# CONFIG_SBUS is not set
+# CONFIG_MCA is not set
+CONFIG_UID16=y
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+# CONFIG_RWSEM_XCHGADD_ALGORITHM is not set
+# CONFIG_GENERIC_BUST_SPINLOCK is not set
+# CONFIG_GENERIC_ISA_DMA is not set
+
+#
+# Code maturity level options
+#
+CONFIG_EXPERIMENTAL=y
+
+#
+# General setup
+#
+# CONFIG_NET is not set
+# CONFIG_SYSVIPC is not set
+# CONFIG_BSD_PROCESS_ACCT is not set
+# CONFIG_SYSCTL is not set
+
+#
+# Loadable module support
+#
+# CONFIG_MODULES is not set
+
+#
+# System Type
+#
+CONFIG_ARCH_ARC=y
+# CONFIG_ARCH_A5K is not set
+CONFIG_ARCH_ACORN=y
+# CONFIG_CPU_32 is not set
+CONFIG_CPU_26=y
+# CONFIG_PAGESIZE_16 is not set
+
+#
+# General setup
+#
+CONFIG_FIQ=y
+# CONFIG_ZBOOT_ROM is not set
+CONFIG_ZBOOT_ROM_TEXT=0
+CONFIG_ZBOOT_ROM_BSS=0
+CONFIG_FPE_NWFPE=y
+CONFIG_KCORE_ELF=y
+# CONFIG_KCORE_AOUT is not set
+# CONFIG_BINFMT_AOUT is not set
+# CONFIG_BINFMT_ELF is not set
+# CONFIG_BINFMT_MISC is not set
+CONFIG_CMDLINE=""
+# CONFIG_ALIGNMENT_TRAP is not set
+
+#
+# Parallel port support
+#
+# CONFIG_PARPORT is not set
+
+#
+# Plug and Play configuration
+#
+# CONFIG_PNP is not set
+# CONFIG_ISAPNP is not set
+# CONFIG_PNPBIOS is not set
+
+#
+# Block devices
+#
+# CONFIG_BLK_DEV_FD is not set
+# CONFIG_BLK_DEV_XD is not set
+# CONFIG_PARIDE is not set
+# CONFIG_BLK_CPQ_DA is not set
+# CONFIG_BLK_CPQ_CISS_DA is not set
+# CONFIG_CISS_SCSI_TAPE is not set
+# CONFIG_BLK_DEV_DAC960 is not set
+# CONFIG_BLK_DEV_UMEM is not set
+# CONFIG_BLK_DEV_LOOP is not set
+# CONFIG_BLK_DEV_NBD is not set
+# CONFIG_BLK_DEV_RAM is not set
+# CONFIG_BLK_DEV_INITRD is not set
+
+#
+# Multi-device support (RAID and LVM)
+#
+# CONFIG_MD is not set
+# CONFIG_BLK_DEV_MD is not set
+# CONFIG_MD_LINEAR is not set
+# CONFIG_MD_RAID0 is not set
+# CONFIG_MD_RAID1 is not set
+# CONFIG_MD_RAID5 is not set
+# CONFIG_MD_MULTIPATH is not set
+# CONFIG_BLK_DEV_LVM is not set
+
+#
+# Acorn-specific block devices
+#
+# CONFIG_BLK_DEV_FD1772 is not set
+# CONFIG_BLK_DEV_MFM is not set
+
+#
+# ATA/ATAPI/MFM/RLL support
+#
+# CONFIG_IDE is not set
+# CONFIG_BLK_DEV_HD is not set
+
+#
+# SCSI support
+#
+# CONFIG_SCSI is not set
+
+#
+# ISDN subsystem
+#
+
+#
+# Input device support
+#
+# CONFIG_INPUT is not set
+# CONFIG_INPUT_KEYBDEV is not set
+# CONFIG_INPUT_MOUSEDEV is not set
+# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
+# CONFIG_INPUT_JOYDEV is not set
+# CONFIG_INPUT_TSDEV is not set
+# CONFIG_INPUT_TSLIBDEV is not set
+# CONFIG_INPUT_EVDEV is not set
+# CONFIG_INPUT_EVBUG is not set
+# CONFIG_INPUT_UINPUT is not set
+# CONFIG_GAMEPORT is not set
+CONFIG_SOUND_GAMEPORT=y
+# CONFIG_GAMEPORT_NS558 is not set
+# CONFIG_GAMEPORT_L4 is not set
+# CONFIG_GAMEPORT_EMU10K1 is not set
+# CONFIG_GAMEPORT_VORTEX is not set
+# CONFIG_GAMEPORT_FM801 is not set
+# CONFIG_GAMEPORT_CS461x is not set
+# CONFIG_SERIO is not set
+# CONFIG_SERIO_I8042 is not set
+# CONFIG_SERIO_SERPORT is not set
+# CONFIG_SERIO_CT82C710 is not set
+# CONFIG_SERIO_PARKBD is not set
+# CONFIG_SERIO_ACORN is not set
+
+#
+# Character devices
+#
+# CONFIG_VT is not set
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+# CONFIG_SERIAL_8250 is not set
+# CONFIG_SERIAL_8250_CONSOLE is not set
+# CONFIG_SERIAL_8250_CS is not set
+# CONFIG_SERIAL_8250_EXTENDED is not set
+# CONFIG_SERIAL_8250_MANY_PORTS is not set
+# CONFIG_SERIAL_8250_SHARE_IRQ is not set
+# CONFIG_SERIAL_8250_DETECT_IRQ is not set
+# CONFIG_SERIAL_8250_MULTIPORT is not set
+# CONFIG_SERIAL_8250_RSA is not set
+# CONFIG_ATOMWIDE_SERIAL is not set
+# CONFIG_DUALSP_SERIAL is not set
+# CONFIG_SERIAL_AMBA is not set
+# CONFIG_SERIAL_AMBA_CONSOLE is not set
+# CONFIG_SERIAL_CLPS711X is not set
+# CONFIG_SERIAL_CLPS711X_CONSOLE is not set
+# CONFIG_SERIAL_CLPS711X_OLD_NAME is not set
+# CONFIG_SERIAL_21285 is not set
+# CONFIG_SERIAL_21285_OLD is not set
+# CONFIG_SERIAL_21285_CONSOLE is not set
+# CONFIG_SERIAL_UART00 is not set
+# CONFIG_SERIAL_UART00_CONSOLE is not set
+# CONFIG_SERIAL_SA1100 is not set
+# CONFIG_SERIAL_SA1100_CONSOLE is not set
+# CONFIG_UNIX98_PTYS is not set
+
+#
+# I2C support
+#
+CONFIG_I2C=y
+CONFIG_I2C_ALGOBIT=y
+CONFIG_I2C_ALGOPCF=y
+# CONFIG_I2C_ELEKTOR is not set
+CONFIG_I2C_CHARDEV=y
+# CONFIG_I2C_PROC is not set
+
+#
+# L3 serial bus support
+#
+# CONFIG_L3 is not set
+# CONFIG_L3_ALGOBIT is not set
+# CONFIG_L3_BIT_SA1100_GPIO is not set
+# CONFIG_L3_SA1111 is not set
+# CONFIG_BIT_SA1100_GPIO is not set
+
+#
+# Mice
+#
+# CONFIG_BUSMOUSE is not set
+# CONFIG_PSMOUSE is not set
+# CONFIG_QIC02_TAPE is not set
+
+#
+# Watchdog Cards
+#
+# CONFIG_WATCHDOG is not set
+# CONFIG_NVRAM is not set
+# CONFIG_RTC is not set
+# CONFIG_DTLK is not set
+# CONFIG_R3964 is not set
+# CONFIG_APPLICOM is not set
+
+#
+# Ftape, the floppy tape device driver
+#
+# CONFIG_FTAPE is not set
+# CONFIG_AGP is not set
+# CONFIG_DRM is not set
+# CONFIG_RAW_DRIVER is not set
+
+#
+# Multimedia devices
+#
+# CONFIG_VIDEO_DEV is not set
+
+#
+# File systems
+#
+# CONFIG_QUOTA is not set
+# CONFIG_QFMT_V1 is not set
+# CONFIG_QFMT_V2 is not set
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+# CONFIG_REISERFS_FS is not set
+# CONFIG_REISERFS_CHECK is not set
+# CONFIG_REISERFS_PROC_INFO is not set
+# CONFIG_ADFS_FS is not set
+# CONFIG_ADFS_FS_RW is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EXT3_FS is not set
+# CONFIG_JBD is not set
+# CONFIG_JBD_DEBUG is not set
+# CONFIG_FAT_FS is not set
+# CONFIG_MSDOS_FS is not set
+# CONFIG_UMSDOS_FS is not set
+# CONFIG_VFAT_FS is not set
+# CONFIG_EFS_FS is not set
+# CONFIG_JFFS_FS is not set
+# CONFIG_JFFS2_FS is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_TMPFS is not set
+CONFIG_RAMFS=y
+# CONFIG_ISO9660_FS is not set
+# CONFIG_JOLIET is not set
+# CONFIG_ZISOFS is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_JFS_DEBUG is not set
+# CONFIG_JFS_STATISTICS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_NTFS_FS is not set
+# CONFIG_NTFS_DEBUG is not set
+# CONFIG_HPFS_FS is not set
+CONFIG_PROC_FS=y
+# CONFIG_DEVFS_FS is not set
+# CONFIG_DEVFS_MOUNT is not set
+# CONFIG_DEVFS_DEBUG is not set
+# CONFIG_DEVPTS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_QNX4FS_RW is not set
+# CONFIG_ROMFS_FS is not set
+CONFIG_EXT2_FS=y
+# CONFIG_SYSV_FS is not set
+# CONFIG_UDF_FS is not set
+# CONFIG_UDF_RW is not set
+# CONFIG_UFS_FS is not set
+# CONFIG_UFS_FS_WRITE is not set
+# CONFIG_NCPFS_NLS is not set
+# CONFIG_SMB_FS is not set
+# CONFIG_ZISOFS_FS is not set
+
+#
+# Partition Types
+#
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_ACORN_PARTITION=y
+# CONFIG_ACORN_PARTITION_EESOX is not set
+# CONFIG_ACORN_PARTITION_ICS is not set
+CONFIG_ACORN_PARTITION_ADFS=y
+# CONFIG_ACORN_PARTITION_POWERTEC is not set
+CONFIG_ACORN_PARTITION_RISCIX=y
+# CONFIG_OSF_PARTITION is not set
+# CONFIG_AMIGA_PARTITION is not set
+# CONFIG_ATARI_PARTITION is not set
+# CONFIG_MAC_PARTITION is not set
+# CONFIG_MSDOS_PARTITION is not set
+# CONFIG_LDM_PARTITION is not set
+# CONFIG_SGI_PARTITION is not set
+# CONFIG_ULTRIX_PARTITION is not set
+# CONFIG_SUN_PARTITION is not set
+# CONFIG_EFI_PARTITION is not set
+# CONFIG_SMB_NLS is not set
+# CONFIG_NLS is not set
+
+#
+# Sound
+#
+# CONFIG_SOUND is not set
+
+#
+# Multimedia Capabilities Port drivers
+#
+# CONFIG_MCP is not set
+# CONFIG_MCP_SA1100 is not set
+# CONFIG_MCP_UCB1200 is not set
+# CONFIG_MCP_UCB1200_AUDIO is not set
+# CONFIG_MCP_UCB1200_TS is not set
+
+#
+# Console Switches
+#
+# CONFIG_SWITCHES is not set
+# CONFIG_SWITCHES_SA1100 is not set
+# CONFIG_SWITCHES_UCB1X00 is not set
+
+#
+# USB support
+#
+# CONFIG_USB is not set
+
+#
+# Kernel hacking
+#
+# CONFIG_NO_FRAME_POINTER is not set
+CONFIG_DEBUG_USER=y
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_SLAB=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_SPINLOCK=y
+CONFIG_DEBUG_WAITQ=y
+CONFIG_DEBUG_BUGVERBOSE=y
+CONFIG_DEBUG_ERRORS=y
+CONFIG_DEBUG_LL=y
+
+#
+# Security options
+#
+CONFIG_SECURITY_CAPABILITIES=y
+
+#
+# Library routines
+#
+CONFIG_CRC32=y
+# CONFIG_ZLIB_INFLATE is not set
+# CONFIG_ZLIB_DEFLATE is not set
diff --git a/arch/arm26/kernel/Makefile b/arch/arm26/kernel/Makefile
new file mode 100644
index 00000000000..ee9fb49fdb7
--- /dev/null
+++ b/arch/arm26/kernel/Makefile
@@ -0,0 +1,17 @@
+#
+# Makefile for the linux kernel.
+#
+
+# Object file lists.
+
+AFLAGS_head.o           := -DTEXTADDR=$(TEXTADDR)
+
+obj-y		:= compat.o dma.o entry.o irq.o process.o ptrace.o       \
+		   semaphore.o setup.o signal.o sys_arm.o time.o traps.o \
+		   ecard.o dma.o ecard.o fiq.o time.o
+
+extra-y		:= head.o init_task.o vmlinux.lds
+
+obj-$(CONFIG_FIQ)		+= fiq.o
+obj-$(CONFIG_MODULES)		+= armksyms.o
+
diff --git a/arch/arm26/kernel/armksyms.c b/arch/arm26/kernel/armksyms.c
new file mode 100644
index 00000000000..35514b398e2
--- /dev/null
+++ b/arch/arm26/kernel/armksyms.c
@@ -0,0 +1,220 @@
+/*
+ *  linux/arch/arm26/kernel/armksyms.c
+ *
+ *  Copyright (C) 2003 Ian Molton
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/user.h>
+#include <linux/string.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/delay.h>
+#include <linux/in6.h>
+#include <linux/interrupt.h>
+#include <linux/pm.h>
+#include <linux/tty.h>
+#include <linux/vt_kern.h>
+#include <linux/smp_lock.h>
+#include <linux/syscalls.h>
+
+#include <asm/byteorder.h>
+#include <asm/elf.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/processor.h>
+#include <asm/semaphore.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/checksum.h>
+#include <asm/mach-types.h>
+
+extern void dump_thread(struct pt_regs *, struct user *);
+extern int dump_fpu(struct pt_regs *, struct user_fp_struct *);
+extern void inswb(unsigned int port, void *to, int len);
+extern void outswb(unsigned int port, const void *to, int len);
+
+extern void __bad_xchg(volatile void *ptr, int size);
+
+/*
+ * libgcc functions - functions that are used internally by the
+ * compiler...  (prototypes are not correct though, but that
+ * doesn't really matter since they're not versioned).
+ */
+extern void __ashldi3(void);
+extern void __ashrdi3(void);
+extern void __divsi3(void);
+extern void __lshrdi3(void);
+extern void __modsi3(void);
+extern void __muldi3(void);
+extern void __ucmpdi2(void);
+extern void __udivdi3(void);
+extern void __umoddi3(void);
+extern void __udivmoddi4(void);
+extern void __udivsi3(void);
+extern void __umodsi3(void);
+extern void abort(void);
+
+extern void ret_from_exception(void);
+extern void fpundefinstr(void);
+extern void fp_enter(void);
+
+/*
+ * This has a special calling convention; it doesn't
+ * modify any of the usual registers, except for LR.
+ * FIXME - we used to use our own local version - looks to be in kernel/softirq now
+ */
+//extern void __do_softirq(void);
+
+#define EXPORT_SYMBOL_ALIAS(sym,orig)		\
+ const char __kstrtab_##sym[]			\
+  __attribute__((section(".kstrtab"))) =	\
+    __MODULE_STRING(sym);			\
+ const struct module_symbol __ksymtab_##sym	\
+  __attribute__((section("__ksymtab"))) =	\
+    { (unsigned long)&orig, __kstrtab_##sym };
+
+/*
+ * floating point math emulator support.
+ * These symbols will never change their calling convention...
+ */
+EXPORT_SYMBOL_ALIAS(kern_fp_enter,fp_enter);
+EXPORT_SYMBOL_ALIAS(fp_printk,printk);
+EXPORT_SYMBOL_ALIAS(fp_send_sig,send_sig);
+
+EXPORT_SYMBOL(fpundefinstr);
+EXPORT_SYMBOL(ret_from_exception);
+
+#ifdef CONFIG_VT
+EXPORT_SYMBOL(kd_mksound);
+#endif
+
+//EXPORT_SYMBOL(__do_softirq);
+
+	/* platform dependent support */
+EXPORT_SYMBOL(dump_thread);
+EXPORT_SYMBOL(dump_fpu);
+EXPORT_SYMBOL(udelay);
+EXPORT_SYMBOL(kernel_thread);
+EXPORT_SYMBOL(system_rev);
+EXPORT_SYMBOL(system_serial_low);
+EXPORT_SYMBOL(system_serial_high);
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+EXPORT_SYMBOL(__bug);
+#endif
+EXPORT_SYMBOL(__bad_xchg);
+EXPORT_SYMBOL(__readwrite_bug);
+EXPORT_SYMBOL(enable_irq);
+EXPORT_SYMBOL(disable_irq);
+EXPORT_SYMBOL(set_irq_type);
+EXPORT_SYMBOL(pm_idle);
+EXPORT_SYMBOL(pm_power_off);
+
+	/* processor dependencies */
+EXPORT_SYMBOL(__machine_arch_type);
+
+	/* networking */
+EXPORT_SYMBOL(csum_partial_copy_nocheck);
+EXPORT_SYMBOL(__csum_ipv6_magic);
+
+	/* io */
+#ifndef __raw_readsb
+EXPORT_SYMBOL(__raw_readsb);
+#endif
+#ifndef __raw_readsw
+EXPORT_SYMBOL(__raw_readsw);
+#endif
+#ifndef __raw_readsl
+EXPORT_SYMBOL(__raw_readsl);
+#endif
+#ifndef __raw_writesb
+EXPORT_SYMBOL(__raw_writesb);
+#endif
+#ifndef __raw_writesw
+EXPORT_SYMBOL(__raw_writesw);
+#endif
+#ifndef __raw_writesl
+EXPORT_SYMBOL(__raw_writesl);
+#endif
+
+	/* string / mem functions */
+EXPORT_SYMBOL(strcpy);
+EXPORT_SYMBOL(strncpy);
+EXPORT_SYMBOL(strcat);
+EXPORT_SYMBOL(strncat);
+EXPORT_SYMBOL(strcmp);
+EXPORT_SYMBOL(strncmp);
+EXPORT_SYMBOL(strchr);
+EXPORT_SYMBOL(strlen);
+EXPORT_SYMBOL(strnlen);
+EXPORT_SYMBOL(strpbrk);
+EXPORT_SYMBOL(strrchr);
+EXPORT_SYMBOL(strstr);
+EXPORT_SYMBOL(memset);
+EXPORT_SYMBOL(memcpy);
+EXPORT_SYMBOL(memmove);
+EXPORT_SYMBOL(memcmp);
+EXPORT_SYMBOL(memscan);
+EXPORT_SYMBOL(__memzero);
+
+	/* user mem (segment) */
+EXPORT_SYMBOL(uaccess_kernel);
+EXPORT_SYMBOL(uaccess_user);
+
+EXPORT_SYMBOL(__get_user_1);
+EXPORT_SYMBOL(__get_user_2);
+EXPORT_SYMBOL(__get_user_4);
+EXPORT_SYMBOL(__get_user_8);
+
+EXPORT_SYMBOL(__put_user_1);
+EXPORT_SYMBOL(__put_user_2);
+EXPORT_SYMBOL(__put_user_4);
+EXPORT_SYMBOL(__put_user_8);
+
+	/* gcc lib functions */
+EXPORT_SYMBOL(__ashldi3);
+EXPORT_SYMBOL(__ashrdi3);
+EXPORT_SYMBOL(__divsi3);
+EXPORT_SYMBOL(__lshrdi3);
+EXPORT_SYMBOL(__modsi3);
+EXPORT_SYMBOL(__muldi3);
+EXPORT_SYMBOL(__ucmpdi2);
+EXPORT_SYMBOL(__udivdi3);
+EXPORT_SYMBOL(__umoddi3);
+EXPORT_SYMBOL(__udivmoddi4);
+EXPORT_SYMBOL(__udivsi3);
+EXPORT_SYMBOL(__umodsi3);
+
+	/* bitops */
+EXPORT_SYMBOL(_set_bit_le);
+EXPORT_SYMBOL(_test_and_set_bit_le);
+EXPORT_SYMBOL(_clear_bit_le);
+EXPORT_SYMBOL(_test_and_clear_bit_le);
+EXPORT_SYMBOL(_change_bit_le);
+EXPORT_SYMBOL(_test_and_change_bit_le);
+EXPORT_SYMBOL(_find_first_zero_bit_le);
+EXPORT_SYMBOL(_find_next_zero_bit_le);
+
+	/* elf */
+EXPORT_SYMBOL(elf_platform);
+EXPORT_SYMBOL(elf_hwcap);
+
+	/* syscalls */
+EXPORT_SYMBOL(sys_write);
+EXPORT_SYMBOL(sys_read);
+EXPORT_SYMBOL(sys_lseek);
+EXPORT_SYMBOL(sys_open);
+EXPORT_SYMBOL(sys_exit);
+EXPORT_SYMBOL(sys_wait4);
+
+EXPORT_SYMBOL(get_wchan);
+
+#ifdef CONFIG_PREEMPT
+EXPORT_SYMBOL(kernel_flag);
+#endif
diff --git a/arch/arm26/kernel/asm-offsets.c b/arch/arm26/kernel/asm-offsets.c
new file mode 100644
index 00000000000..4ccacaef94d
--- /dev/null
+++ b/arch/arm26/kernel/asm-offsets.c
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 1995-2001 Russell King
+ *               2001-2002 Keith Owens
+ *               2003      Ian Molton
+ *     
+ * Generate definitions needed by assembly language modules.
+ * This code generates raw asm output which is post-processed to extract
+ * and format the required data.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+
+#include <asm/pgtable.h>
+#include <asm/uaccess.h>
+
+/*
+ * Make sure that the compiler and target are compatible.
+ */
+#if defined(__APCS_32__) && defined(CONFIG_CPU_26)
+#error Sorry, your compiler targets APCS-32 but this kernel requires APCS-26
+#endif
+#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 95)
+#error Sorry, your compiler is known to miscompile kernels.  Only use gcc 2.95.3 and later.
+#endif
+#if __GNUC__ == 2 && __GNUC_MINOR__ == 95
+/* shame we can't detect the .1 or .2 releases */
+#warning GCC 2.95.2 and earlier miscompiles kernels.
+#endif
+
+/* Use marker if you need to separate the values later */
+
+#define DEFINE(sym, val) \
+        asm volatile("\n->" #sym " %0 " #val : : "i" (val))
+
+#define BLANK() asm volatile("\n->" : : )
+
+int main(void)
+{
+  DEFINE(TSK_ACTIVE_MM,		offsetof(struct task_struct, active_mm));
+  BLANK();
+  DEFINE(VMA_VM_MM,		offsetof(struct vm_area_struct, vm_mm));
+  DEFINE(VMA_VM_FLAGS,		offsetof(struct vm_area_struct, vm_flags));
+  BLANK();
+  DEFINE(VM_EXEC,	       	VM_EXEC);
+  BLANK();
+  BLANK();
+  DEFINE(PAGE_PRESENT,		_PAGE_PRESENT);
+  DEFINE(PAGE_READONLY,		_PAGE_READONLY);
+  DEFINE(PAGE_NOT_USER,		_PAGE_NOT_USER);
+  DEFINE(PAGE_OLD,		_PAGE_OLD);
+  DEFINE(PAGE_CLEAN,		_PAGE_CLEAN);
+  BLANK();
+  DEFINE(PAGE_SZ,	       	PAGE_SIZE);
+  BLANK();
+  DEFINE(SYS_ERROR0,		0x9f0000);
+  return 0; 
+}
diff --git a/arch/arm26/kernel/calls.S b/arch/arm26/kernel/calls.S
new file mode 100644
index 00000000000..e3d276827c8
--- /dev/null
+++ b/arch/arm26/kernel/calls.S
@@ -0,0 +1,265 @@
+/*
+ *  linux/arch/arm26/kernel/calls.S
+ *
+ *  Copyright (C) 2003 Ian Molton
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  FIXME
+ *  This file is included twice in entry.S which may not be necessary
+ */
+
+//FIXME - clearly NR_syscalls is never defined here
+
+#ifndef NR_syscalls
+#define NR_syscalls 256
+#else
+
+__syscall_start:
+/* 0 */		.long	sys_ni_syscall
+		.long	sys_exit
+		.long	sys_fork_wrapper
+		.long	sys_read
+		.long	sys_write
+/* 5 */		.long	sys_open
+		.long	sys_close
+		.long	sys_ni_syscall		/* was sys_waitpid */
+		.long	sys_creat
+		.long	sys_link
+/* 10 */	.long	sys_unlink
+		.long	sys_execve_wrapper
+		.long	sys_chdir
+		.long	sys_time		/* used by libc4 */
+		.long	sys_mknod
+/* 15 */	.long	sys_chmod
+		.long	sys_lchown16
+		.long	sys_ni_syscall		/* was sys_break */
+		.long	sys_ni_syscall		/* was sys_stat */
+		.long	sys_lseek
+/* 20 */	.long	sys_getpid
+		.long	sys_mount
+		.long	sys_oldumount		/* used by libc4 */
+		.long	sys_setuid16
+		.long	sys_getuid16
+/* 25 */	.long	sys_stime
+		.long	sys_ptrace
+		.long	sys_alarm		/* used by libc4 */
+		.long	sys_ni_syscall		/* was sys_fstat */
+		.long	sys_pause
+/* 30 */	.long	sys_utime		/* used by libc4 */
+		.long	sys_ni_syscall		/* was sys_stty */
+		.long	sys_ni_syscall		/* was sys_getty */
+		.long	sys_access
+		.long	sys_nice
+/* 35 */	.long	sys_ni_syscall		/* was sys_ftime */
+		.long	sys_sync
+		.long	sys_kill
+		.long	sys_rename
+		.long	sys_mkdir
+/* 40 */	.long	sys_rmdir
+		.long	sys_dup
+		.long	sys_pipe
+		.long	sys_times
+		.long	sys_ni_syscall		/* was sys_prof */
+/* 45 */	.long	sys_brk
+		.long	sys_setgid16
+		.long	sys_getgid16
+		.long	sys_ni_syscall		/* was sys_signal */
+		.long	sys_geteuid16
+/* 50 */	.long	sys_getegid16
+		.long	sys_acct
+		.long	sys_umount
+		.long	sys_ni_syscall		/* was sys_lock */
+		.long	sys_ioctl
+/* 55 */	.long	sys_fcntl
+		.long	sys_ni_syscall		/* was sys_mpx */
+		.long	sys_setpgid
+		.long	sys_ni_syscall		/* was sys_ulimit */
+		.long	sys_ni_syscall		/* was sys_olduname */
+/* 60 */	.long	sys_umask
+		.long	sys_chroot
+		.long	sys_ustat
+		.long	sys_dup2
+		.long	sys_getppid
+/* 65 */	.long	sys_getpgrp
+		.long	sys_setsid
+		.long	sys_sigaction
+		.long	sys_ni_syscall		/* was sys_sgetmask */
+		.long	sys_ni_syscall		/* was sys_ssetmask */
+/* 70 */	.long	sys_setreuid16
+		.long	sys_setregid16
+		.long	sys_sigsuspend_wrapper
+		.long	sys_sigpending
+		.long	sys_sethostname
+/* 75 */	.long	sys_setrlimit
+		.long	sys_old_getrlimit	/* used by libc4 */
+		.long	sys_getrusage
+		.long	sys_gettimeofday
+		.long	sys_settimeofday
+/* 80 */	.long	sys_getgroups16
+		.long	sys_setgroups16
+		.long	old_select		/* used by libc4 */
+		.long	sys_symlink
+		.long	sys_ni_syscall		/* was sys_lstat */
+/* 85 */	.long	sys_readlink
+		.long	sys_uselib
+		.long	sys_swapon
+		.long	sys_reboot
+		.long	old_readdir		/* used by libc4 */
+/* 90 */	.long	old_mmap		/* used by libc4 */
+		.long	sys_munmap
+		.long	sys_truncate
+		.long	sys_ftruncate
+		.long	sys_fchmod
+/* 95 */	.long	sys_fchown16
+		.long	sys_getpriority
+		.long	sys_setpriority
+		.long	sys_ni_syscall		/* was sys_profil */
+		.long	sys_statfs
+/* 100 */	.long	sys_fstatfs
+		.long	sys_ni_syscall
+		.long	sys_socketcall
+		.long	sys_syslog
+		.long	sys_setitimer
+/* 105 */	.long	sys_getitimer
+		.long	sys_newstat
+		.long	sys_newlstat
+		.long	sys_newfstat
+		.long	sys_ni_syscall		/* was sys_uname */
+/* 110 */	.long	sys_ni_syscall		/* was sys_iopl */
+		.long	sys_vhangup
+		.long	sys_ni_syscall
+		.long	sys_syscall		/* call a syscall */
+		.long	sys_wait4
+/* 115 */	.long	sys_swapoff
+		.long	sys_sysinfo
+		.long	sys_ipc
+		.long	sys_fsync
+		.long	sys_sigreturn_wrapper
+/* 120 */	.long	sys_clone_wapper
+		.long	sys_setdomainname
+		.long	sys_newuname
+		.long	sys_ni_syscall
+		.long	sys_adjtimex
+/* 125 */	.long	sys_mprotect
+		.long	sys_sigprocmask
+		.long	sys_ni_syscall  /* WAS: sys_create_module */
+		.long	sys_init_module
+		.long	sys_delete_module
+/* 130 */	.long	sys_ni_syscall  /* WAS: sys_get_kernel_syms */
+		.long	sys_quotactl
+		.long	sys_getpgid
+		.long	sys_fchdir
+		.long	sys_bdflush
+/* 135 */	.long	sys_sysfs
+		.long	sys_personality
+		.long	sys_ni_syscall		/* .long	_sys_afs_syscall */
+		.long	sys_setfsuid16
+		.long	sys_setfsgid16
+/* 140 */	.long	sys_llseek
+		.long	sys_getdents
+		.long	sys_select
+		.long	sys_flock
+		.long	sys_msync
+/* 145 */	.long	sys_readv
+		.long	sys_writev
+		.long	sys_getsid
+		.long	sys_fdatasync
+		.long	sys_sysctl
+/* 150 */	.long	sys_mlock
+		.long	sys_munlock
+		.long	sys_mlockall
+		.long	sys_munlockall
+		.long	sys_sched_setparam
+/* 155 */	.long	sys_sched_getparam
+		.long	sys_sched_setscheduler
+		.long	sys_sched_getscheduler
+		.long	sys_sched_yield
+		.long	sys_sched_get_priority_max
+/* 160 */	.long	sys_sched_get_priority_min
+		.long	sys_sched_rr_get_interval
+		.long	sys_nanosleep
+		.long	sys_arm_mremap
+		.long	sys_setresuid16
+/* 165 */	.long	sys_getresuid16
+		.long	sys_ni_syscall
+		.long	sys_ni_syscall /* WAS: sys_query_module */
+		.long	sys_poll
+		.long	sys_nfsservctl
+/* 170 */	.long	sys_setresgid16
+		.long	sys_getresgid16
+		.long	sys_prctl
+		.long	sys_rt_sigreturn_wrapper
+		.long	sys_rt_sigaction
+/* 175 */	.long	sys_rt_sigprocmask
+		.long	sys_rt_sigpending
+		.long	sys_rt_sigtimedwait
+		.long	sys_rt_sigqueueinfo
+		.long	sys_rt_sigsuspend_wrapper
+/* 180 */	.long	sys_pread64
+		.long	sys_pwrite64
+		.long	sys_chown16
+		.long	sys_getcwd
+		.long	sys_capget
+/* 185 */	.long	sys_capset
+		.long	sys_sigaltstack_wrapper
+		.long	sys_sendfile
+		.long	sys_ni_syscall
+		.long	sys_ni_syscall
+/* 190 */	.long	sys_vfork_wrapper
+		.long	sys_getrlimit
+		.long	sys_mmap2
+		.long	sys_truncate64
+		.long	sys_ftruncate64
+/* 195 */	.long	sys_stat64
+		.long	sys_lstat64
+		.long	sys_fstat64
+		.long	sys_lchown
+		.long	sys_getuid
+/* 200 */	.long	sys_getgid
+		.long	sys_geteuid
+		.long	sys_getegid
+		.long	sys_setreuid
+		.long	sys_setregid
+/* 205 */	.long	sys_getgroups
+		.long	sys_setgroups
+		.long	sys_fchown
+		.long	sys_setresuid
+		.long	sys_getresuid
+/* 210 */	.long	sys_setresgid
+		.long	sys_getresgid
+		.long	sys_chown
+		.long	sys_setuid
+		.long	sys_setgid
+/* 215 */	.long	sys_setfsuid
+		.long	sys_setfsgid
+		.long	sys_getdents64
+		.long	sys_pivot_root
+		.long	sys_mincore
+/* 220 */	.long	sys_madvise
+		.long	sys_fcntl64
+		.long	sys_ni_syscall /* TUX */
+		.long	sys_ni_syscall /* WAS: sys_security */
+		.long	sys_gettid
+/* 225 */	.long	sys_readahead
+		.long	sys_setxattr
+		.long	sys_lsetxattr
+		.long	sys_fsetxattr
+		.long	sys_getxattr
+/* 230 */	.long	sys_lgetxattr
+		.long	sys_fgetxattr
+		.long	sys_listxattr
+		.long	sys_llistxattr
+		.long	sys_flistxattr
+/* 235 */	.long	sys_removexattr
+		.long	sys_lremovexattr
+		.long	sys_fremovexattr
+		.long	sys_tkill
+__syscall_end:
+
+		.rept	NR_syscalls - (__syscall_end - __syscall_start) / 4
+			.long	sys_ni_syscall
+		.endr
+#endif
diff --git a/arch/arm26/kernel/compat.c b/arch/arm26/kernel/compat.c
new file mode 100644
index 00000000000..db0310db899
--- /dev/null
+++ b/arch/arm26/kernel/compat.c
@@ -0,0 +1,174 @@
+/*
+ *  linux/arch/arm26/kernel/compat.c
+ *
+ *  Copyright (C) 2001 Russell King
+ *                2003 Ian Molton
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * We keep the old params compatibility cruft in one place (here)
+ * so we don't end up with lots of mess around other places.
+ *
+ * NOTE:
+ *  The old struct param_struct is deprecated, but it will be kept in
+ *  the kernel for 5 years from now (2001). This will allow boot loaders
+ *  to convert to the new struct tag way.
+ */
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/init.h>
+
+#include <asm/setup.h>
+#include <asm/mach-types.h>
+#include <asm/page.h>
+
+//#include <asm/arch.h>
+//#include <asm/mach/irq.h>
+
+/*
+ * Usage:
+ *  - do not go blindly adding fields, add them at the end
+ *  - when adding fields, don't rely on the address until
+ *    a patch from me has been released
+ *  - unused fields should be zero (for future expansion)
+ *  - this structure is relatively short-lived - only
+ *    guaranteed to contain useful data in setup_arch()
+ *
+ * This is the old deprecated way to pass parameters to the kernel
+ */
+struct param_struct {
+    union {
+	struct {
+	    unsigned long page_size;		/*  0 */
+	    unsigned long nr_pages;		/*  4 */
+	    unsigned long ramdisk_size;		/*  8 */
+	    unsigned long flags;		/* 12 */
+#define FLAG_READONLY	1
+#define FLAG_RDLOAD	4
+#define FLAG_RDPROMPT	8
+	    unsigned long rootdev;		/* 16 */
+	    unsigned long video_num_cols;	/* 20 */
+	    unsigned long video_num_rows;	/* 24 */
+	    unsigned long video_x;		/* 28 */
+	    unsigned long video_y;		/* 32 */
+	    unsigned long memc_control_reg;	/* 36 */
+	    unsigned char sounddefault;		/* 40 */
+	    unsigned char adfsdrives;		/* 41 */
+	    unsigned char bytes_per_char_h;	/* 42 */
+	    unsigned char bytes_per_char_v;	/* 43 */
+	    unsigned long pages_in_bank[4];	/* 44 */
+	    unsigned long pages_in_vram;	/* 60 */
+	    unsigned long initrd_start;		/* 64 */
+	    unsigned long initrd_size;		/* 68 */
+	    unsigned long rd_start;		/* 72 */
+	    unsigned long system_rev;		/* 76 */
+	    unsigned long system_serial_low;	/* 80 */
+	    unsigned long system_serial_high;	/* 84 */
+	    unsigned long mem_fclk_21285;       /* 88 */
+	} s;
+	char unused[256];
+    } u1;
+    union {
+	char paths[8][128];
+	struct {
+	    unsigned long magic;
+	    char n[1024 - sizeof(unsigned long)];
+	} s;
+    } u2;
+    char commandline[COMMAND_LINE_SIZE];
+};
+
+static struct tag * __init memtag(struct tag *tag, unsigned long start, unsigned long size)
+{
+	tag = tag_next(tag);
+	tag->hdr.tag = ATAG_MEM;
+	tag->hdr.size = tag_size(tag_mem32);
+	tag->u.mem.size = size;
+	tag->u.mem.start = start;
+
+	return tag;
+}
+
+static void __init build_tag_list(struct param_struct *params, void *taglist)
+{
+	struct tag *tag = taglist;
+
+	if (params->u1.s.page_size != PAGE_SIZE) {
+		printk(KERN_WARNING "Warning: bad configuration page, "
+		       "trying to continue\n");
+		return;
+	}
+
+	printk(KERN_DEBUG "Converting old-style param struct to taglist\n");
+
+	tag->hdr.tag  = ATAG_CORE;
+	tag->hdr.size = tag_size(tag_core);
+	tag->u.core.flags = params->u1.s.flags & FLAG_READONLY;
+	tag->u.core.pagesize = params->u1.s.page_size;
+	tag->u.core.rootdev = params->u1.s.rootdev;
+
+	tag = tag_next(tag);
+	tag->hdr.tag = ATAG_RAMDISK;
+	tag->hdr.size = tag_size(tag_ramdisk);
+	tag->u.ramdisk.flags = (params->u1.s.flags & FLAG_RDLOAD ? 1 : 0) |
+			       (params->u1.s.flags & FLAG_RDPROMPT ? 2 : 0);
+	tag->u.ramdisk.size  = params->u1.s.ramdisk_size;
+	tag->u.ramdisk.start = params->u1.s.rd_start;
+
+	tag = tag_next(tag);
+	tag->hdr.tag = ATAG_INITRD;
+	tag->hdr.size = tag_size(tag_initrd);
+	tag->u.initrd.start = params->u1.s.initrd_start;
+	tag->u.initrd.size  = params->u1.s.initrd_size;
+
+	tag = tag_next(tag);
+	tag->hdr.tag = ATAG_SERIAL;
+	tag->hdr.size = tag_size(tag_serialnr);
+	tag->u.serialnr.low = params->u1.s.system_serial_low;
+	tag->u.serialnr.high = params->u1.s.system_serial_high;
+
+	tag = tag_next(tag);
+	tag->hdr.tag = ATAG_REVISION;
+	tag->hdr.size = tag_size(tag_revision);
+	tag->u.revision.rev = params->u1.s.system_rev;
+
+	tag = memtag(tag, PHYS_OFFSET, params->u1.s.nr_pages * PAGE_SIZE);
+
+	tag = tag_next(tag);
+	tag->hdr.tag = ATAG_ACORN;
+	tag->hdr.size = tag_size(tag_acorn);
+	tag->u.acorn.memc_control_reg = params->u1.s.memc_control_reg;
+	tag->u.acorn.vram_pages       = params->u1.s.pages_in_vram;
+	tag->u.acorn.sounddefault     = params->u1.s.sounddefault;
+	tag->u.acorn.adfsdrives       = params->u1.s.adfsdrives;
+
+	tag = tag_next(tag);
+	tag->hdr.tag = ATAG_CMDLINE;
+	tag->hdr.size = (strlen(params->commandline) + 3 +
+			 sizeof(struct tag_header)) >> 2;
+	strcpy(tag->u.cmdline.cmdline, params->commandline);
+
+	tag = tag_next(tag);
+	tag->hdr.tag = ATAG_NONE;
+	tag->hdr.size = 0;
+
+	memmove(params, taglist, ((int)tag) - ((int)taglist) +
+				 sizeof(struct tag_header));
+}
+
+void __init convert_to_tag_list(struct tag *tags)
+{
+	struct param_struct *params = (struct param_struct *)tags;
+	build_tag_list(params, &params->u2);
+}
+
+void __init squash_mem_tags(struct tag *tag)
+{
+	for (; tag->hdr.size; tag = tag_next(tag))
+		if (tag->hdr.tag == ATAG_MEM)
+			tag->hdr.tag = ATAG_NONE;
+}
diff --git a/arch/arm26/kernel/dma.c b/arch/arm26/kernel/dma.c
new file mode 100644
index 00000000000..80b5a774d90
--- /dev/null
+++ b/arch/arm26/kernel/dma.c
@@ -0,0 +1,273 @@
+/*
+ *  linux/arch/arm26/kernel/dma.c
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *                2003      Ian Molton
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  Front-end to the DMA handling.  This handles the allocation/freeing
+ *  of DMA channels, and provides a unified interface to the machines
+ *  DMA facilities.
+ */
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/mman.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/errno.h>
+
+#include <asm/dma.h>
+
+DEFINE_SPINLOCK(dma_spin_lock);
+
+static dma_t dma_chan[MAX_DMA_CHANNELS];
+
+/*
+ * Get dma list for /proc/dma
+ */
+int get_dma_list(char *buf)
+{
+	dma_t *dma;
+	char *p = buf;
+	int i;
+
+	for (i = 0, dma = dma_chan; i < MAX_DMA_CHANNELS; i++, dma++)
+		if (dma->lock)
+			p += sprintf(p, "%2d: %14s %s\n", i,
+				     dma->d_ops->type, dma->device_id);
+
+	return p - buf;
+}
+
+/*
+ * Request DMA channel
+ *
+ * On certain platforms, we have to allocate an interrupt as well...
+ */
+int request_dma(dmach_t channel, const char *device_id)
+{
+	dma_t *dma = dma_chan + channel;
+	int ret;
+
+	if (channel >= MAX_DMA_CHANNELS || !dma->d_ops)
+		goto bad_dma;
+
+	if (xchg(&dma->lock, 1) != 0)
+		goto busy;
+
+	dma->device_id = device_id;
+	dma->active    = 0;
+	dma->invalid   = 1;
+
+	ret = 0;
+	if (dma->d_ops->request)
+		ret = dma->d_ops->request(channel, dma);
+
+	if (ret)
+		xchg(&dma->lock, 0);
+
+	return ret;
+
+bad_dma:
+	printk(KERN_ERR "dma: trying to allocate DMA%d\n", channel);
+	return -EINVAL;
+
+busy:
+	return -EBUSY;
+}
+
+/*
+ * Free DMA channel
+ *
+ * On certain platforms, we have to free interrupt as well...
+ */
+void free_dma(dmach_t channel)
+{
+	dma_t *dma = dma_chan + channel;
+
+	if (channel >= MAX_DMA_CHANNELS || !dma->d_ops)
+		goto bad_dma;
+
+	if (dma->active) {
+		printk(KERN_ERR "dma%d: freeing active DMA\n", channel);
+		dma->d_ops->disable(channel, dma);
+		dma->active = 0;
+	}
+
+	if (xchg(&dma->lock, 0) != 0) {
+		if (dma->d_ops->free)
+			dma->d_ops->free(channel, dma);
+		return;
+	}
+
+	printk(KERN_ERR "dma%d: trying to free free DMA\n", channel);
+	return;
+
+bad_dma:
+	printk(KERN_ERR "dma: trying to free DMA%d\n", channel);
+}
+
+/* Set DMA Scatter-Gather list
+ */
+void set_dma_sg (dmach_t channel, struct scatterlist *sg, int nr_sg)
+{
+	dma_t *dma = dma_chan + channel;
+
+	if (dma->active)
+		printk(KERN_ERR "dma%d: altering DMA SG while "
+		       "DMA active\n", channel);
+
+	dma->sg = sg;
+	dma->sgcount = nr_sg;
+	dma->using_sg = 1;
+	dma->invalid = 1;
+}
+
+/* Set DMA address
+ *
+ * Copy address to the structure, and set the invalid bit
+ */
+void set_dma_addr (dmach_t channel, unsigned long physaddr)
+{
+	dma_t *dma = dma_chan + channel;
+
+	if (dma->active)
+		printk(KERN_ERR "dma%d: altering DMA address while "
+		       "DMA active\n", channel);
+
+	dma->sg = &dma->buf;
+	dma->sgcount = 1;
+	dma->buf.__address = (char *)physaddr;//FIXME - not pretty
+	dma->using_sg = 0;
+	dma->invalid = 1;
+}
+
+/* Set DMA byte count
+ *
+ * Copy address to the structure, and set the invalid bit
+ */
+void set_dma_count (dmach_t channel, unsigned long count)
+{
+	dma_t *dma = dma_chan + channel;
+
+	if (dma->active)
+		printk(KERN_ERR "dma%d: altering DMA count while "
+		       "DMA active\n", channel);
+
+	dma->sg = &dma->buf;
+	dma->sgcount = 1;
+	dma->buf.length = count;
+	dma->using_sg = 0;
+	dma->invalid = 1;
+}
+
+/* Set DMA direction mode
+ */
+void set_dma_mode (dmach_t channel, dmamode_t mode)
+{
+	dma_t *dma = dma_chan + channel;
+
+	if (dma->active)
+		printk(KERN_ERR "dma%d: altering DMA mode while "
+		       "DMA active\n", channel);
+
+	dma->dma_mode = mode;
+	dma->invalid = 1;
+}
+
+/* Enable DMA channel
+ */
+void enable_dma (dmach_t channel)
+{
+	dma_t *dma = dma_chan + channel;
+
+	if (!dma->lock)
+		goto free_dma;
+
+	if (dma->active == 0) {
+		dma->active = 1;
+		dma->d_ops->enable(channel, dma);
+	}
+	return;
+
+free_dma:
+	printk(KERN_ERR "dma%d: trying to enable free DMA\n", channel);
+	BUG();
+}
+
+/* Disable DMA channel
+ */
+void disable_dma (dmach_t channel)
+{
+	dma_t *dma = dma_chan + channel;
+
+	if (!dma->lock)
+		goto free_dma;
+
+	if (dma->active == 1) {
+		dma->active = 0;
+		dma->d_ops->disable(channel, dma);
+	}
+	return;
+
+free_dma:
+	printk(KERN_ERR "dma%d: trying to disable free DMA\n", channel);
+	BUG();
+}
+
+/*
+ * Is the specified DMA channel active?
+ */
+int dma_channel_active(dmach_t channel)
+{
+	return dma_chan[channel].active;
+}
+
+void set_dma_page(dmach_t channel, char pagenr)
+{
+	printk(KERN_ERR "dma%d: trying to set_dma_page\n", channel);
+}
+
+void set_dma_speed(dmach_t channel, int cycle_ns)
+{
+	dma_t *dma = dma_chan + channel;
+	int ret = 0;
+
+	if (dma->d_ops->setspeed)
+		ret = dma->d_ops->setspeed(channel, dma, cycle_ns);
+	dma->speed = ret;
+}
+
+int get_dma_residue(dmach_t channel)
+{
+	dma_t *dma = dma_chan + channel;
+	int ret = 0;
+
+	if (dma->d_ops->residue)
+		ret = dma->d_ops->residue(channel, dma);
+
+	return ret;
+}
+
+void __init init_dma(void)
+{
+	arch_dma_init(dma_chan);
+}
+
+EXPORT_SYMBOL(request_dma);
+EXPORT_SYMBOL(free_dma);
+EXPORT_SYMBOL(enable_dma);
+EXPORT_SYMBOL(disable_dma);
+EXPORT_SYMBOL(set_dma_addr);
+EXPORT_SYMBOL(set_dma_count);
+EXPORT_SYMBOL(set_dma_mode);
+EXPORT_SYMBOL(set_dma_page);
+EXPORT_SYMBOL(get_dma_residue);
+EXPORT_SYMBOL(set_dma_sg);
+EXPORT_SYMBOL(set_dma_speed);
+
+EXPORT_SYMBOL(dma_spin_lock);
diff --git a/arch/arm26/kernel/ecard.c b/arch/arm26/kernel/ecard.c
new file mode 100644
index 00000000000..824c6b571ad
--- /dev/null
+++ b/arch/arm26/kernel/ecard.c
@@ -0,0 +1,850 @@
+/*
+ *  linux/arch/arm26/kernel/ecard.c
+ *
+ *  Copyright 1995-2001 Russell King
+ *  Copyright 2003 Ian Molton
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  Find all installed expansion cards, and handle interrupts from them.
+ *
+ *  Created from information from Acorns RiscOS3 PRMs
+ *  15-Jun-2003 IM      Modified from ARM32 (RiscPC capable) version
+ *  10-Jan-1999	RMK	Run loaders in a simulated RISC OS environment.
+ *  06-May-1997	RMK	Added blacklist for cards whose loader doesn't work.
+ *  12-Sep-1997	RMK	Created new handling of interrupt enables/disables
+ *			- cards can now register their own routine to control
+ *			interrupts (recommended).
+ *  29-Sep-1997	RMK	Expansion card interrupt hardware not being re-enabled
+ *			on reset from Linux. (Caused cards not to respond
+ *			under RiscOS without hard reset).
+ *
+ */
+#define ECARD_C
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/reboot.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/proc_fs.h>
+#include <linux/device.h>
+#include <linux/init.h>
+
+#include <asm/dma.h>
+#include <asm/ecard.h>
+#include <asm/hardware.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/mmu_context.h>
+#include <asm/irqchip.h>
+#include <asm/tlbflush.h>
+
+enum req {
+	req_readbytes,
+	req_reset
+};
+
+struct ecard_request {
+	enum req	req;
+	ecard_t		*ec;
+	unsigned int	address;
+	unsigned int	length;
+	unsigned int	use_loader;
+	void		*buffer;
+};
+
+struct expcard_blacklist {
+	unsigned short	 manufacturer;
+	unsigned short	 product;
+	const char	*type;
+};
+
+static ecard_t *cards;
+static ecard_t *slot_to_expcard[MAX_ECARDS];
+static unsigned int ectcr;
+
+/* List of descriptions of cards which don't have an extended
+ * identification, or chunk directories containing a description.
+ */
+static struct expcard_blacklist __initdata blacklist[] = {
+	{ MANU_ACORN, PROD_ACORN_ETHER1, "Acorn Ether1" }
+};
+
+asmlinkage extern int
+ecard_loader_reset(volatile unsigned char *pa, loader_t loader);
+asmlinkage extern int
+ecard_loader_read(int off, volatile unsigned char *pa, loader_t loader);
+
+static const struct ecard_id *
+ecard_match_device(const struct ecard_id *ids, struct expansion_card *ec);
+
+static inline unsigned short
+ecard_getu16(unsigned char *v)
+{
+	return v[0] | v[1] << 8;
+}
+
+static inline signed long
+ecard_gets24(unsigned char *v)
+{
+	return v[0] | v[1] << 8 | v[2] << 16 | ((v[2] & 0x80) ? 0xff000000 : 0);
+}
+
+static inline ecard_t *
+slot_to_ecard(unsigned int slot)
+{
+	return slot < MAX_ECARDS ? slot_to_expcard[slot] : NULL;
+}
+
+/* ===================== Expansion card daemon ======================== */
+/*
+ * Since the loader programs on the expansion cards need to be run
+ * in a specific environment, create a separate task with this
+ * environment up, and pass requests to this task as and when we
+ * need to.
+ *
+ * This should allow 99% of loaders to be called from Linux.
+ *
+ * From a security standpoint, we trust the card vendors.  This
+ * may be a misplaced trust.
+ */
+#define BUS_ADDR(x) ((((unsigned long)(x)) << 2) + IO_BASE)
+#define POD_INT_ADDR(x)	((volatile unsigned char *)\
+			 ((BUS_ADDR((x)) - IO_BASE) + IO_START))
+
+static inline void ecard_task_reset(struct ecard_request *req)
+{
+	struct expansion_card *ec = req->ec;
+	if (ec->loader)
+		ecard_loader_reset(POD_INT_ADDR(ec->podaddr), ec->loader);
+}
+
+static void
+ecard_task_readbytes(struct ecard_request *req)
+{
+	unsigned char *buf = (unsigned char *)req->buffer;
+	volatile unsigned char *base_addr =
+		(volatile unsigned char *)POD_INT_ADDR(req->ec->podaddr);
+	unsigned int len = req->length;
+	unsigned int off = req->address;
+
+	if (!req->use_loader || !req->ec->loader) {
+		off *= 4;
+		while (len--) {
+			*buf++ = base_addr[off];
+			off += 4;
+		}
+	} else {
+		while(len--) {
+			/*
+			 * The following is required by some
+			 * expansion card loader programs.
+			 */
+			*(unsigned long *)0x108 = 0;
+			*buf++ = ecard_loader_read(off++, base_addr,
+						   req->ec->loader);
+		}
+	}
+}
+
+static void ecard_do_request(struct ecard_request *req)
+{
+	switch (req->req) {
+	case req_readbytes:
+		ecard_task_readbytes(req);
+		break;
+
+	case req_reset:
+		ecard_task_reset(req);
+		break;
+	}
+}
+
+/*
+ * On 26-bit processors, we don't need the kcardd thread to access the
+ * expansion card loaders.  We do it directly.
+ */
+#define ecard_call(req)	ecard_do_request(req)
+
+/* ======================= Mid-level card control ===================== */
+
+static void
+ecard_readbytes(void *addr, ecard_t *ec, int off, int len, int useld)
+{
+	struct ecard_request req;
+
+	req.req		= req_readbytes;
+	req.ec		= ec;
+	req.address	= off;
+	req.length	= len;
+	req.use_loader	= useld;
+	req.buffer	= addr;
+
+	ecard_call(&req);
+}
+
+int ecard_readchunk(struct in_chunk_dir *cd, ecard_t *ec, int id, int num)
+{
+	struct ex_chunk_dir excd;
+	int index = 16;
+	int useld = 0;
+
+	if (!ec->cid.cd)
+		return 0;
+
+	while(1) {
+		ecard_readbytes(&excd, ec, index, 8, useld);
+		index += 8;
+		if (c_id(&excd) == 0) {
+			if (!useld && ec->loader) {
+				useld = 1;
+				index = 0;
+				continue;
+			}
+			return 0;
+		}
+		if (c_id(&excd) == 0xf0) { /* link */
+			index = c_start(&excd);
+			continue;
+		}
+		if (c_id(&excd) == 0x80) { /* loader */
+			if (!ec->loader) {
+				ec->loader = (loader_t)kmalloc(c_len(&excd),
+							       GFP_KERNEL);
+				if (ec->loader)
+					ecard_readbytes(ec->loader, ec,
+							(int)c_start(&excd),
+							c_len(&excd), useld);
+				else
+					return 0;
+			}
+			continue;
+		}
+		if (c_id(&excd) == id && num-- == 0)
+			break;
+	}
+
+	if (c_id(&excd) & 0x80) {
+		switch (c_id(&excd) & 0x70) {
+		case 0x70:
+			ecard_readbytes((unsigned char *)excd.d.string, ec,
+					(int)c_start(&excd), c_len(&excd),
+					useld);
+			break;
+		case 0x00:
+			break;
+		}
+	}
+	cd->start_offset = c_start(&excd);
+	memcpy(cd->d.string, excd.d.string, 256);
+	return 1;
+}
+
+/* ======================= Interrupt control ============================ */
+
+static void ecard_def_irq_enable(ecard_t *ec, int irqnr)
+{
+}
+
+static void ecard_def_irq_disable(ecard_t *ec, int irqnr)
+{
+}
+
+static int ecard_def_irq_pending(ecard_t *ec)
+{
+	return !ec->irqmask || ec->irqaddr[0] & ec->irqmask;
+}
+
+static void ecard_def_fiq_enable(ecard_t *ec, int fiqnr)
+{
+	panic("ecard_def_fiq_enable called - impossible");
+}
+
+static void ecard_def_fiq_disable(ecard_t *ec, int fiqnr)
+{
+	panic("ecard_def_fiq_disable called - impossible");
+}
+
+static int ecard_def_fiq_pending(ecard_t *ec)
+{
+	return !ec->fiqmask || ec->fiqaddr[0] & ec->fiqmask;
+}
+
+static expansioncard_ops_t ecard_default_ops = {
+	ecard_def_irq_enable,
+	ecard_def_irq_disable,
+	ecard_def_irq_pending,
+	ecard_def_fiq_enable,
+	ecard_def_fiq_disable,
+	ecard_def_fiq_pending
+};
+
+/*
+ * Enable and disable interrupts from expansion cards.
+ * (interrupts are disabled for these functions).
+ *
+ * They are not meant to be called directly, but via enable/disable_irq.
+ */
+static void ecard_irq_unmask(unsigned int irqnr)
+{
+	ecard_t *ec = slot_to_ecard(irqnr - 32);
+
+	if (ec) {
+		if (!ec->ops)
+			ec->ops = &ecard_default_ops;
+
+		if (ec->claimed && ec->ops->irqenable)
+			ec->ops->irqenable(ec, irqnr);
+		else
+			printk(KERN_ERR "ecard: rejecting request to "
+				"enable IRQs for %d\n", irqnr);
+	}
+}
+
+static void ecard_irq_mask(unsigned int irqnr)
+{
+	ecard_t *ec = slot_to_ecard(irqnr - 32);
+
+	if (ec) {
+		if (!ec->ops)
+			ec->ops = &ecard_default_ops;
+
+		if (ec->ops && ec->ops->irqdisable)
+			ec->ops->irqdisable(ec, irqnr);
+	}
+}
+
+static struct irqchip ecard_chip = {
+	.ack	= ecard_irq_mask,
+	.mask	= ecard_irq_mask,
+	.unmask = ecard_irq_unmask,
+};
+
+void ecard_enablefiq(unsigned int fiqnr)
+{
+	ecard_t *ec = slot_to_ecard(fiqnr);
+
+	if (ec) {
+		if (!ec->ops)
+			ec->ops = &ecard_default_ops;
+
+		if (ec->claimed && ec->ops->fiqenable)
+			ec->ops->fiqenable(ec, fiqnr);
+		else
+			printk(KERN_ERR "ecard: rejecting request to "
+				"enable FIQs for %d\n", fiqnr);
+	}
+}
+
+void ecard_disablefiq(unsigned int fiqnr)
+{
+	ecard_t *ec = slot_to_ecard(fiqnr);
+
+	if (ec) {
+		if (!ec->ops)
+			ec->ops = &ecard_default_ops;
+
+		if (ec->ops->fiqdisable)
+			ec->ops->fiqdisable(ec, fiqnr);
+	}
+}
+
+static void
+ecard_dump_irq_state(ecard_t *ec)
+{
+	printk("  %d: %sclaimed, ",
+	       ec->slot_no,
+	       ec->claimed ? "" : "not ");
+
+	if (ec->ops && ec->ops->irqpending &&
+	    ec->ops != &ecard_default_ops)
+		printk("irq %spending\n",
+		       ec->ops->irqpending(ec) ? "" : "not ");
+	else
+		printk("irqaddr %p, mask = %02X, status = %02X\n",
+		       ec->irqaddr, ec->irqmask, *ec->irqaddr);
+}
+
+static void ecard_check_lockup(struct irqdesc *desc)
+{
+	static int last, lockup;
+	ecard_t *ec;
+
+	/*
+	 * If the timer interrupt has not run since the last million
+	 * unrecognised expansion card interrupts, then there is
+	 * something seriously wrong.  Disable the expansion card
+	 * interrupts so at least we can continue.
+	 *
+	 * Maybe we ought to start a timer to re-enable them some time
+	 * later?
+	 */
+	if (last == jiffies) {
+		lockup += 1;
+		if (lockup > 1000000) {
+			printk(KERN_ERR "\nInterrupt lockup detected - "
+			       "disabling all expansion card interrupts\n");
+
+			desc->chip->mask(IRQ_EXPANSIONCARD);
+
+			printk("Expansion card IRQ state:\n");
+
+			for (ec = cards; ec; ec = ec->next)
+				ecard_dump_irq_state(ec);
+		}
+	} else
+		lockup = 0;
+
+	/*
+	 * If we did not recognise the source of this interrupt,
+	 * warn the user, but don't flood the user with these messages.
+	 */
+	if (!last || time_after(jiffies, (unsigned long)(last + 5*HZ))) {
+		last = jiffies;
+		printk(KERN_WARNING "Unrecognised interrupt from backplane\n");
+	}
+}
+
+static void
+ecard_irq_handler(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs)
+{
+	ecard_t *ec;
+	int called = 0;
+
+	desc->chip->mask(irq);
+	for (ec = cards; ec; ec = ec->next) {
+		int pending;
+
+		if (!ec->claimed || ec->irq == NO_IRQ)
+			continue;
+
+		if (ec->ops && ec->ops->irqpending)
+			pending = ec->ops->irqpending(ec);
+		else
+			pending = ecard_default_ops.irqpending(ec);
+
+		if (pending) {
+			struct irqdesc *d = irq_desc + ec->irq;
+			d->handle(ec->irq, d, regs);
+			called ++;
+		}
+	}
+	desc->chip->unmask(irq);
+
+	if (called == 0)
+		ecard_check_lockup(desc);
+}
+
+#define ecard_irqexp_handler NULL
+#define ecard_probeirqhw() (0)
+
+unsigned int ecard_address(ecard_t *ec, card_type_t type, card_speed_t speed)
+{
+	unsigned long address = 0;
+	int slot = ec->slot_no;
+
+	ectcr &= ~(1 << slot);
+
+	switch (type) {
+	case ECARD_MEMC:
+		address = IO_EC_MEMC_BASE + (slot << 12);
+		break;
+
+	case ECARD_IOC:
+		address = IO_EC_IOC_BASE + (slot << 12) + (speed << 17);
+		break;
+
+	default:
+		break;
+	}
+
+	return address;
+}
+
+static int ecard_prints(char *buffer, ecard_t *ec)
+{
+	char *start = buffer;
+
+	buffer += sprintf(buffer, "  %d: ", ec->slot_no);
+
+	if (ec->cid.id == 0) {
+		struct in_chunk_dir incd;
+
+		buffer += sprintf(buffer, "[%04X:%04X] ",
+			ec->cid.manufacturer, ec->cid.product);
+
+		if (!ec->card_desc && ec->cid.cd &&
+		    ecard_readchunk(&incd, ec, 0xf5, 0)) {
+			ec->card_desc = kmalloc(strlen(incd.d.string)+1, GFP_KERNEL);
+
+			if (ec->card_desc)
+				strcpy((char *)ec->card_desc, incd.d.string);
+		}
+
+		buffer += sprintf(buffer, "%s\n", ec->card_desc ? ec->card_desc : "*unknown*");
+	} else
+		buffer += sprintf(buffer, "Simple card %d\n", ec->cid.id);
+
+	return buffer - start;
+}
+
+static int get_ecard_dev_info(char *buf, char **start, off_t pos, int count)
+{
+	ecard_t *ec = cards;
+	off_t at = 0;
+	int len, cnt;
+
+	cnt = 0;
+	while (ec && count > cnt) {
+		len = ecard_prints(buf, ec);
+		at += len;
+		if (at >= pos) {
+			if (!*start) {
+				*start = buf + (pos - (at - len));
+				cnt = at - pos;
+			} else
+				cnt += len;
+			buf += len;
+		}
+		ec = ec->next;
+	}
+	return (count > cnt) ? cnt : count;
+}
+
+static struct proc_dir_entry *proc_bus_ecard_dir = NULL;
+
+static void ecard_proc_init(void)
+{
+	proc_bus_ecard_dir = proc_mkdir("ecard", proc_bus);
+	create_proc_info_entry("devices", 0, proc_bus_ecard_dir,
+		get_ecard_dev_info);
+}
+
+#define ec_set_resource(ec,nr,st,sz,flg)			\
+	do {							\
+		(ec)->resource[nr].name = ec->dev.bus_id;	\
+		(ec)->resource[nr].start = st;			\
+		(ec)->resource[nr].end = (st) + (sz) - 1;	\
+		(ec)->resource[nr].flags = flg;			\
+	} while (0)
+
+static void __init ecard_init_resources(struct expansion_card *ec)
+{
+	unsigned long base = PODSLOT_IOC0_BASE;
+	unsigned int slot = ec->slot_no;
+	int i;
+
+	ec_set_resource(ec, ECARD_RES_MEMC,
+			PODSLOT_MEMC_BASE + (slot << 14),
+			PODSLOT_MEMC_SIZE, IORESOURCE_MEM);
+
+	for (i = 0; i < ECARD_RES_IOCSYNC - ECARD_RES_IOCSLOW; i++) {
+		ec_set_resource(ec, i + ECARD_RES_IOCSLOW,
+				base + (slot << 14) + (i << 19),
+				PODSLOT_IOC_SIZE, IORESOURCE_MEM);
+	}
+
+	for (i = 0; i < ECARD_NUM_RESOURCES; i++) {
+		if (ec->resource[i].start &&
+		    request_resource(&iomem_resource, &ec->resource[i])) {
+			printk(KERN_ERR "%s: resource(s) not available\n",
+				ec->dev.bus_id);
+			ec->resource[i].end -= ec->resource[i].start;
+			ec->resource[i].start = 0;
+		}
+	}
+}
+
+static ssize_t ecard_show_irq(struct device *dev, char *buf)
+{
+	struct expansion_card *ec = ECARD_DEV(dev);
+	return sprintf(buf, "%u\n", ec->irq);
+}
+
+static ssize_t ecard_show_vendor(struct device *dev, char *buf)
+{
+	struct expansion_card *ec = ECARD_DEV(dev);
+	return sprintf(buf, "%u\n", ec->cid.manufacturer);
+}
+
+static ssize_t ecard_show_device(struct device *dev, char *buf)
+{
+	struct expansion_card *ec = ECARD_DEV(dev);
+	return sprintf(buf, "%u\n", ec->cid.product);
+}
+
+static ssize_t ecard_show_dma(struct device *dev, char *buf)
+{
+	struct expansion_card *ec = ECARD_DEV(dev);
+	return sprintf(buf, "%u\n", ec->dma);
+}
+
+static ssize_t ecard_show_resources(struct device *dev, char *buf)
+{
+	struct expansion_card *ec = ECARD_DEV(dev);
+	char *str = buf;
+	int i;
+
+	for (i = 0; i < ECARD_NUM_RESOURCES; i++)
+		str += sprintf(str, "%08lx %08lx %08lx\n",
+				ec->resource[i].start,
+				ec->resource[i].end,
+				ec->resource[i].flags);
+
+	return str - buf;
+}
+
+static DEVICE_ATTR(irq, S_IRUGO, ecard_show_irq, NULL);
+static DEVICE_ATTR(vendor, S_IRUGO, ecard_show_vendor, NULL);
+static DEVICE_ATTR(device, S_IRUGO, ecard_show_device, NULL);
+static DEVICE_ATTR(dma, S_IRUGO, ecard_show_dma, NULL);
+static DEVICE_ATTR(resource, S_IRUGO, ecard_show_resources, NULL);
+
+/*
+ * Probe for an expansion card.
+ *
+ * If bit 1 of the first byte of the card is set, then the
+ * card does not exist.
+ */
+static int __init
+ecard_probe(int slot, card_type_t type)
+{
+	ecard_t **ecp;
+	ecard_t *ec;
+	struct ex_ecid cid;
+	int i, rc = -ENOMEM;
+
+	ec = kmalloc(sizeof(ecard_t), GFP_KERNEL);
+	if (!ec)
+		goto nomem;
+
+	memset(ec, 0, sizeof(ecard_t));
+
+	ec->slot_no	= slot;
+	ec->type        = type;
+	ec->irq		= NO_IRQ;
+	ec->fiq		= NO_IRQ;
+	ec->dma		= NO_DMA;
+	ec->card_desc	= NULL;
+	ec->ops		= &ecard_default_ops;
+
+	rc = -ENODEV;
+	if ((ec->podaddr = ecard_address(ec, type, ECARD_SYNC)) == 0)
+		goto nodev;
+
+	cid.r_zero = 1;
+	ecard_readbytes(&cid, ec, 0, 16, 0);
+	if (cid.r_zero)
+		goto nodev;
+
+	ec->cid.id	= cid.r_id;
+	ec->cid.cd	= cid.r_cd;
+	ec->cid.is	= cid.r_is;
+	ec->cid.w	= cid.r_w;
+	ec->cid.manufacturer = ecard_getu16(cid.r_manu);
+	ec->cid.product = ecard_getu16(cid.r_prod);
+	ec->cid.country = cid.r_country;
+	ec->cid.irqmask = cid.r_irqmask;
+	ec->cid.irqoff  = ecard_gets24(cid.r_irqoff);
+	ec->cid.fiqmask = cid.r_fiqmask;
+	ec->cid.fiqoff  = ecard_gets24(cid.r_fiqoff);
+	ec->fiqaddr	=
+	ec->irqaddr	= (unsigned char *)ioaddr(ec->podaddr);
+
+	if (ec->cid.is) {
+		ec->irqmask = ec->cid.irqmask;
+		ec->irqaddr += ec->cid.irqoff;
+		ec->fiqmask = ec->cid.fiqmask;
+		ec->fiqaddr += ec->cid.fiqoff;
+	} else {
+		ec->irqmask = 1;
+		ec->fiqmask = 4;
+	}
+
+	for (i = 0; i < sizeof(blacklist) / sizeof(*blacklist); i++)
+		if (blacklist[i].manufacturer == ec->cid.manufacturer &&
+		    blacklist[i].product == ec->cid.product) {
+			ec->card_desc = blacklist[i].type;
+			break;
+		}
+
+	snprintf(ec->dev.bus_id, sizeof(ec->dev.bus_id), "ecard%d", slot);
+	ec->dev.parent = NULL;
+	ec->dev.bus    = &ecard_bus_type;
+	ec->dev.dma_mask = &ec->dma_mask;
+	ec->dma_mask = (u64)0xffffffff;
+
+	ecard_init_resources(ec);
+
+	/*
+	 * hook the interrupt handlers
+	 */
+	ec->irq = 32 + slot;
+	set_irq_chip(ec->irq, &ecard_chip);
+	set_irq_handler(ec->irq, do_level_IRQ);
+	set_irq_flags(ec->irq, IRQF_VALID);
+
+	for (ecp = &cards; *ecp; ecp = &(*ecp)->next);
+
+	*ecp = ec;
+	slot_to_expcard[slot] = ec;
+
+	device_register(&ec->dev);
+	device_create_file(&ec->dev, &dev_attr_dma);
+	device_create_file(&ec->dev, &dev_attr_irq);
+	device_create_file(&ec->dev, &dev_attr_resource);
+	device_create_file(&ec->dev, &dev_attr_vendor);
+	device_create_file(&ec->dev, &dev_attr_device); 
+
+	return 0;
+
+nodev:
+	kfree(ec);
+nomem:
+	return rc;
+}
+
+/*
+ * Initialise the expansion card system.
+ * Locate all hardware - interrupt management and
+ * actual cards.
+ */
+static int __init ecard_init(void)
+{
+	int slot, irqhw;
+
+	printk("Probing expansion cards\n");
+
+	for (slot = 0; slot < MAX_ECARDS; slot ++) {
+		ecard_probe(slot, ECARD_IOC);
+	}
+
+	irqhw = ecard_probeirqhw();
+
+	set_irq_chained_handler(IRQ_EXPANSIONCARD,
+				irqhw ? ecard_irqexp_handler : ecard_irq_handler);
+
+	ecard_proc_init();
+
+	return 0;
+}
+
+subsys_initcall(ecard_init);
+
+/*
+ *	ECARD "bus"
+ */
+static const struct ecard_id *
+ecard_match_device(const struct ecard_id *ids, struct expansion_card *ec)
+{
+	int i;
+
+	for (i = 0; ids[i].manufacturer != 65535; i++)
+		if (ec->cid.manufacturer == ids[i].manufacturer &&
+		    ec->cid.product == ids[i].product)
+			return ids + i;
+
+	return NULL;
+}
+
+static int ecard_drv_probe(struct device *dev)
+{
+	struct expansion_card *ec = ECARD_DEV(dev);
+	struct ecard_driver *drv = ECARD_DRV(dev->driver);
+	const struct ecard_id *id;
+	int ret;
+
+	id = ecard_match_device(drv->id_table, ec);
+
+	ecard_claim(ec);
+	ret = drv->probe(ec, id);
+	if (ret)
+		ecard_release(ec);
+	return ret;
+}
+
+static int ecard_drv_remove(struct device *dev)
+{
+	struct expansion_card *ec = ECARD_DEV(dev);
+	struct ecard_driver *drv = ECARD_DRV(dev->driver);
+
+	drv->remove(ec);
+	ecard_release(ec);
+
+	return 0;
+}
+
+/*
+ * Before rebooting, we must make sure that the expansion card is in a
+ * sensible state, so it can be re-detected.  This means that the first
+ * page of the ROM must be visible.  We call the expansion cards reset
+ * handler, if any.
+ */
+static void ecard_drv_shutdown(struct device *dev)
+{
+	struct expansion_card *ec = ECARD_DEV(dev);
+	struct ecard_driver *drv = ECARD_DRV(dev->driver);
+	struct ecard_request req;
+
+	if (drv->shutdown)
+		drv->shutdown(ec);
+	ecard_release(ec);
+	req.req = req_reset;
+	req.ec = ec;
+	ecard_call(&req);
+}
+
+int ecard_register_driver(struct ecard_driver *drv)
+{
+	drv->drv.bus = &ecard_bus_type;
+	drv->drv.probe = ecard_drv_probe;
+	drv->drv.remove = ecard_drv_remove;
+	drv->drv.shutdown = ecard_drv_shutdown;
+
+	return driver_register(&drv->drv);
+}
+
+void ecard_remove_driver(struct ecard_driver *drv)
+{
+	driver_unregister(&drv->drv);
+}
+
+static int ecard_match(struct device *_dev, struct device_driver *_drv)
+{
+	struct expansion_card *ec = ECARD_DEV(_dev);
+	struct ecard_driver *drv = ECARD_DRV(_drv);
+	int ret;
+
+	if (drv->id_table) {
+		ret = ecard_match_device(drv->id_table, ec) != NULL;
+	} else {
+		ret = ec->cid.id == drv->id;
+	}
+
+	return ret;
+}
+
+struct bus_type ecard_bus_type = {
+	.name	= "ecard",
+	.match	= ecard_match,
+};
+
+static int ecard_bus_init(void)
+{
+	return bus_register(&ecard_bus_type);
+}
+
+postcore_initcall(ecard_bus_init);
+
+EXPORT_SYMBOL(ecard_readchunk);
+EXPORT_SYMBOL(ecard_address);
+EXPORT_SYMBOL(ecard_register_driver);
+EXPORT_SYMBOL(ecard_remove_driver);
+EXPORT_SYMBOL(ecard_bus_type);
diff --git a/arch/arm26/kernel/entry.S b/arch/arm26/kernel/entry.S
new file mode 100644
index 00000000000..a231dd88d0e
--- /dev/null
+++ b/arch/arm26/kernel/entry.S
@@ -0,0 +1,961 @@
+/* arch/arm26/kernel/entry.S
+ * 
+ * Assembled from chunks of code in arch/arm
+ *
+ * Copyright (C) 2003 Ian Molton
+ * Based on the work of RMK.
+ *
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/assembler.h>
+#include <asm/asm_offsets.h>
+#include <asm/errno.h>
+#include <asm/hardware.h>
+#include <asm/sysirq.h>
+#include <asm/thread_info.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+
+	.macro	zero_fp
+#ifndef CONFIG_NO_FRAME_POINTER
+	mov	fp, #0
+#endif
+	.endm
+
+	.text
+
+@ Bad Abort numbers
+@ -----------------
+@
+#define BAD_PREFETCH	0
+#define BAD_DATA	1
+#define BAD_ADDREXCPTN	2
+#define BAD_IRQ		3
+#define BAD_UNDEFINSTR	4
+
+@ OS version number used in SWIs
+@  RISC OS is 0
+@  RISC iX is 8
+@
+#define OS_NUMBER	9
+#define ARMSWI_OFFSET	0x000f0000
+
+@
+@ Stack format (ensured by USER_* and SVC_*)
+@ PSR and PC are comined on arm26
+@
+
+#define S_OFF		8
+
+#define S_OLD_R0	64
+#define S_PC		60
+#define S_LR		56
+#define S_SP		52
+#define S_IP		48
+#define S_FP		44
+#define S_R10		40
+#define S_R9		36
+#define S_R8		32
+#define S_R7		28
+#define S_R6		24
+#define S_R5		20
+#define S_R4		16
+#define S_R3		12
+#define S_R2		8
+#define S_R1		4
+#define S_R0		0
+
+	.macro	save_user_regs
+	str	r0, [sp, #-4]!   @ Store SVC r0
+	str	lr, [sp, #-4]!   @ Store user mode PC
+	sub	sp, sp, #15*4
+	stmia	sp, {r0 - lr}^   @ Store the other user-mode regs
+	mov	r0, r0
+	.endm
+
+	.macro	slow_restore_user_regs
+	ldmia	sp, {r0 - lr}^   @ restore the user regs not including PC
+	mov	r0, r0
+	ldr	lr, [sp, #15*4]  @ get user PC
+	add	sp, sp, #15*4+8  @ free stack
+	movs	pc, lr           @ return
+	.endm
+
+	.macro	fast_restore_user_regs
+	add	sp, sp, #S_OFF
+	ldmib	sp, {r1 - lr}^
+	mov	r0, r0
+	ldr	lr, [sp, #15*4]
+	add	sp, sp, #15*4+8
+	movs	pc, lr
+	.endm
+
+	.macro	save_svc_regs
+	str     sp, [sp, #-16]!
+	str     lr, [sp, #8]
+	str     lr, [sp, #4]
+	stmfd   sp!, {r0 - r12}
+	mov     r0, #-1
+	str     r0, [sp, #S_OLD_R0]
+	zero_fp
+	.endm
+
+	.macro	save_svc_regs_irq
+	str     sp, [sp, #-16]!
+	str     lr, [sp, #4]
+	ldr     lr, .LCirq
+	ldr     lr, [lr]
+	str     lr, [sp, #8]
+	stmfd   sp!, {r0 - r12}
+	mov     r0, #-1
+	str     r0, [sp, #S_OLD_R0]
+	zero_fp
+	.endm
+
+	.macro	restore_svc_regs
+                ldmfd   sp, {r0 - pc}^
+	.endm
+
+	.macro	mask_pc, rd, rm
+	bic	\rd, \rm, #PCMASK
+	.endm
+
+	.macro  disable_irqs, temp
+	mov     \temp, pc
+	orr     \temp, \temp, #PSR_I_BIT
+	teqp    \temp, #0
+	.endm
+
+	.macro	enable_irqs, temp
+	mov     \temp, pc
+	and     \temp, \temp, #~PSR_I_BIT
+	teqp	\temp, #0
+	.endm
+
+	.macro	initialise_traps_extra
+	.endm
+
+	.macro	get_thread_info, rd
+	mov	\rd, sp, lsr #13
+	mov	\rd, \rd, lsl #13
+	.endm
+
+/*
+ * These are the registers used in the syscall handler, and allow us to
+ * have in theory up to 7 arguments to a function - r0 to r6.
+ *
+ * Note that tbl == why is intentional.
+ *
+ * We must set at least "tsk" and "why" when calling ret_with_reschedule.
+ */
+scno	.req	r7		@ syscall number
+tbl	.req	r8		@ syscall table pointer
+why	.req	r8		@ Linux syscall (!= 0)
+tsk	.req	r9		@ current thread_info
+
+/*
+ * Get the system call number.
+ */
+	.macro	get_scno
+	mask_pc	lr, lr
+	ldr	scno, [lr, #-4]		@ get SWI instruction
+	.endm
+/*
+ *  -----------------------------------------------------------------------
+ */
+
+/* 
+ * We rely on the fact that R0 is at the bottom of the stack (due to
+ * slow/fast restore user regs).
+ */
+#if S_R0 != 0
+#error "Please fix"
+#endif
+
+/*
+ * This is the fast syscall return path.  We do as little as
+ * possible here, and this includes saving r0 back into the SVC
+ * stack.
+ */
+ret_fast_syscall:
+	disable_irqs r1				@ disable interrupts
+	ldr	r1, [tsk, #TI_FLAGS]
+	tst	r1, #_TIF_WORK_MASK
+	bne	fast_work_pending
+	fast_restore_user_regs
+
+/*
+ * Ok, we need to do extra processing, enter the slow path.
+ */
+fast_work_pending:
+	str	r0, [sp, #S_R0+S_OFF]!		@ returned r0
+work_pending:
+	tst	r1, #_TIF_NEED_RESCHED
+	bne	work_resched
+	tst	r1, #_TIF_NOTIFY_RESUME | _TIF_SIGPENDING
+	beq	no_work_pending
+	mov	r0, sp				@ 'regs'
+	mov	r2, why				@ 'syscall'
+	bl	do_notify_resume
+	disable_irqs r1				@ disable interrupts
+	b	no_work_pending
+
+work_resched:
+	bl	schedule
+/*
+ * "slow" syscall return path.  "why" tells us if this was a real syscall.
+ */
+ENTRY(ret_to_user)
+ret_slow_syscall:
+	disable_irqs r1				@ disable interrupts
+	ldr	r1, [tsk, #TI_FLAGS]
+	tst	r1, #_TIF_WORK_MASK
+	bne	work_pending
+no_work_pending:
+	slow_restore_user_regs
+
+/*
+ * This is how we return from a fork.
+ */
+ENTRY(ret_from_fork)
+	bl	schedule_tail
+	get_thread_info tsk
+	ldr	r1, [tsk, #TI_FLAGS]		@ check for syscall tracing
+	mov	why, #1
+	tst	r1, #_TIF_SYSCALL_TRACE		@ are we tracing syscalls?
+	beq	ret_slow_syscall
+	mov	r1, sp
+	mov	r0, #1				@ trace exit [IP = 1]
+	bl	syscall_trace
+	b	ret_slow_syscall
+	
+// FIXME - is this strictly necessary?
+#include "calls.S"
+
+/*=============================================================================
+ * SWI handler
+ *-----------------------------------------------------------------------------
+ */
+
+	.align	5
+ENTRY(vector_swi)
+	save_user_regs
+	zero_fp
+	get_scno
+
+#ifdef CONFIG_ALIGNMENT_TRAP
+	ldr	ip, __cr_alignment
+	ldr	ip, [ip]
+	mcr	p15, 0, ip, c1, c0		@ update control register
+#endif
+	enable_irqs ip
+
+	str	r4, [sp, #-S_OFF]!		@ push fifth arg
+
+	get_thread_info tsk
+	ldr	ip, [tsk, #TI_FLAGS]		@ check for syscall tracing
+	bic	scno, scno, #0xff000000		@ mask off SWI op-code
+	eor	scno, scno, #OS_NUMBER << 20	@ check OS number
+	adr	tbl, sys_call_table		@ load syscall table pointer
+	tst	ip, #_TIF_SYSCALL_TRACE		@ are we tracing syscalls?
+	bne	__sys_trace
+
+	adral	lr, ret_fast_syscall            @ set return address
+        orral	lr, lr, #PSR_I_BIT | MODE_SVC26 @ Force SVC mode on return
+	cmp	scno, #NR_syscalls		@ check upper syscall limit
+	ldrcc	pc, [tbl, scno, lsl #2]		@ call sys_* routine
+
+	add	r1, sp, #S_OFF
+2:	mov	why, #0				@ no longer a real syscall
+	cmp	scno, #ARMSWI_OFFSET
+	eor	r0, scno, #OS_NUMBER << 20	@ put OS number back
+	bcs	arm_syscall	
+	b	sys_ni_syscall			@ not private func
+
+	/*
+	 * This is the really slow path.  We're going to be doing
+	 * context switches, and waiting for our parent to respond.
+	 */
+__sys_trace:
+	add	r1, sp, #S_OFF
+	mov	r0, #0				@ trace entry [IP = 0]
+	bl	syscall_trace
+
+	adral   lr, __sys_trace_return          @ set return address
+        orral   lr, lr, #PSR_I_BIT | MODE_SVC26 @ Force SVC mode on return
+	add	r1, sp, #S_R0 + S_OFF		@ pointer to regs
+	cmp	scno, #NR_syscalls		@ check upper syscall limit
+	ldmccia	r1, {r0 - r3}			@ have to reload r0 - r3
+	ldrcc	pc, [tbl, scno, lsl #2]		@ call sys_* routine
+	b	2b
+
+__sys_trace_return:
+	str	r0, [sp, #S_R0 + S_OFF]!	@ save returned r0
+	mov	r1, sp
+	mov	r0, #1				@ trace exit [IP = 1]
+	bl	syscall_trace
+	b	ret_slow_syscall
+
+	.align	5
+#ifdef CONFIG_ALIGNMENT_TRAP
+	.type	__cr_alignment, #object
+__cr_alignment:
+	.word	cr_alignment
+#endif
+
+	.type	sys_call_table, #object
+ENTRY(sys_call_table)
+#include "calls.S"
+
+/*============================================================================
+ * Special system call wrappers
+ */
+@ r0 = syscall number
+@ r5 = syscall table
+		.type	sys_syscall, #function
+sys_syscall:
+		eor	scno, r0, #OS_NUMBER << 20
+		cmp	scno, #NR_syscalls	@ check range
+		stmleia	sp, {r5, r6}		@ shuffle args
+		movle	r0, r1
+		movle	r1, r2
+		movle	r2, r3
+		movle	r3, r4
+		ldrle	pc, [tbl, scno, lsl #2]
+		b	sys_ni_syscall
+
+sys_fork_wrapper:
+		add	r0, sp, #S_OFF
+		b	sys_fork
+
+sys_vfork_wrapper:
+		add	r0, sp, #S_OFF
+		b	sys_vfork
+
+sys_execve_wrapper:
+		add	r3, sp, #S_OFF
+		b	sys_execve
+
+sys_clone_wapper:
+		add	r2, sp, #S_OFF
+		b	sys_clone
+
+sys_sigsuspend_wrapper:
+		add	r3, sp, #S_OFF
+		b	sys_sigsuspend
+
+sys_rt_sigsuspend_wrapper:
+		add	r2, sp, #S_OFF
+		b	sys_rt_sigsuspend
+
+sys_sigreturn_wrapper:
+		add	r0, sp, #S_OFF
+		b	sys_sigreturn
+
+sys_rt_sigreturn_wrapper:
+		add	r0, sp, #S_OFF
+		b	sys_rt_sigreturn
+
+sys_sigaltstack_wrapper:
+		ldr	r2, [sp, #S_OFF + S_SP]
+		b	do_sigaltstack
+
+/*
+ * Note: off_4k (r5) is always units of 4K.  If we can't do the requested
+ * offset, we return EINVAL.  FIXME - this lost some stuff from arm32 to
+ * ifdefs. check it out.
+ */
+sys_mmap2:
+		tst	r5, #((1 << (PAGE_SHIFT - 12)) - 1)
+		moveq	r5, r5, lsr #PAGE_SHIFT - 12
+		streq	r5, [sp, #4]
+		beq	do_mmap2
+		mov	r0, #-EINVAL
+		RETINSTR(mov,pc, lr)
+
+/*
+ *  Design issues:
+ *   - We have several modes that each vector can be called from,
+ *     each with its own set of registers.  On entry to any vector,
+ *     we *must* save the registers used in *that* mode.
+ *
+ *   - This code must be as fast as possible.
+ *
+ *  There are a few restrictions on the vectors:
+ *   - the SWI vector cannot be called from *any* non-user mode
+ *
+ *   - the FP emulator is *never* called from *any* non-user mode undefined
+ *     instruction.
+ *
+ */
+
+		.text
+
+		.macro handle_irq
+1:		mov     r4, #IOC_BASE
+		ldrb    r6, [r4, #0x24]            @ get high priority first
+		adr     r5, irq_prio_h
+		teq     r6, #0
+		ldreqb  r6, [r4, #0x14]            @ get low priority
+		adreq   r5, irq_prio_l
+
+                teq     r6, #0                     @ If an IRQ happened...
+                ldrneb  r0, [r5, r6]               @ get IRQ number
+                movne   r1, sp                     @ get struct pt_regs
+                adrne   lr, 1b                     @ Set return address to 1b
+                orrne   lr, lr, #PSR_I_BIT | MODE_SVC26  @ (and force SVC mode)
+                bne     asm_do_IRQ                 @ process IRQ (if asserted)
+		.endm
+
+
+/*
+ * Interrupt table (incorporates priority)
+ */
+		.macro	irq_prio_table
+irq_prio_l:	.byte	 0, 0, 1, 0, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3
+		.byte	 4, 0, 1, 0, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3
+		.byte	 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
+		.byte	 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
+		.byte	 6, 6, 6, 6, 6, 6, 6, 6, 3, 3, 3, 3, 3, 3, 3, 3
+		.byte	 6, 6, 6, 6, 6, 6, 6, 6, 3, 3, 3, 3, 3, 3, 3, 3
+		.byte	 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
+		.byte	 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
+		.byte	 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
+		.byte	 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
+		.byte	 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
+		.byte	 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
+		.byte	 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
+		.byte	 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
+		.byte	 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
+		.byte	 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
+irq_prio_h:	.byte	 0, 8, 9, 8,10,10,10,10,11,11,11,11,10,10,10,10
+		.byte	12, 8, 9, 8,10,10,10,10,11,11,11,11,10,10,10,10
+		.byte	13,13,13,13,10,10,10,10,11,11,11,11,10,10,10,10
+		.byte	13,13,13,13,10,10,10,10,11,11,11,11,10,10,10,10
+		.byte	14,14,14,14,10,10,10,10,11,11,11,11,10,10,10,10
+		.byte	14,14,14,14,10,10,10,10,11,11,11,11,10,10,10,10
+		.byte	13,13,13,13,10,10,10,10,11,11,11,11,10,10,10,10
+		.byte	13,13,13,13,10,10,10,10,11,11,11,11,10,10,10,10
+		.byte	15,15,15,15,10,10,10,10,11,11,11,11,10,10,10,10
+		.byte	15,15,15,15,10,10,10,10,11,11,11,11,10,10,10,10
+		.byte	13,13,13,13,10,10,10,10,11,11,11,11,10,10,10,10
+		.byte	13,13,13,13,10,10,10,10,11,11,11,11,10,10,10,10
+		.byte	15,15,15,15,10,10,10,10,11,11,11,11,10,10,10,10
+		.byte	15,15,15,15,10,10,10,10,11,11,11,11,10,10,10,10
+		.byte	13,13,13,13,10,10,10,10,11,11,11,11,10,10,10,10
+		.byte	13,13,13,13,10,10,10,10,11,11,11,11,10,10,10,10
+		.endm
+
+#if 1
+/*
+ * Uncomment these if you wish to get more debugging into about data aborts.
+ * FIXME - I bet we can find a way to encode these and keep performance.
+ */
+#define FAULT_CODE_LDRSTRPOST	0x80
+#define FAULT_CODE_LDRSTRPRE	0x40
+#define FAULT_CODE_LDRSTRREG	0x20
+#define FAULT_CODE_LDMSTM	0x10
+#define FAULT_CODE_LDCSTC	0x08
+#endif
+#define FAULT_CODE_PREFETCH	0x04
+#define FAULT_CODE_WRITE	0x02
+#define FAULT_CODE_FORCECOW	0x01
+
+/*=============================================================================
+ * Undefined FIQs
+ *-----------------------------------------------------------------------------
+ */
+_unexp_fiq:	ldr     sp, .LCfiq
+		mov	r12, #IOC_BASE
+		strb	r12, [r12, #0x38]	@ Disable FIQ register
+		teqp	pc, #PSR_I_BIT | PSR_F_BIT | MODE_SVC26
+		mov	r0, r0
+		stmfd	sp!, {r0 - r3, ip, lr}
+		adr	r0, Lfiqmsg
+		bl	printk
+		ldmfd	sp!, {r0 - r3, ip, lr}
+		teqp	pc, #PSR_I_BIT | PSR_F_BIT | MODE_FIQ26
+		mov	r0, r0
+		movs	pc, lr
+
+Lfiqmsg:	.ascii	"*** Unexpected FIQ\n\0"
+		.align
+
+.LCfiq:		.word	__temp_fiq
+.LCirq:		.word	__temp_irq
+
+/*=============================================================================
+ * Undefined instruction handler
+ *-----------------------------------------------------------------------------
+ * Handles floating point instructions
+ */
+vector_undefinstr:
+		tst	lr, #MODE_SVC26          @ did we come from a non-user mode?
+		bne	__und_svc                @ yes - deal with it.
+/* Otherwise, fall through for the user-space (common) case. */
+		save_user_regs
+		zero_fp                                 @ zero frame pointer
+		teqp	pc, #PSR_I_BIT | MODE_SVC26     @ disable IRQs
+.Lbug_undef:
+		ldr	r4, .LC2
+                ldr     pc, [r4]         @ Call FP module entry point
+/* FIXME - should we trap for a null pointer here? */
+
+/* The SVC mode case */
+__und_svc:	save_svc_regs                           @ Non-user mode
+                mask_pc r0, lr
+                and     r2, lr, #3
+                sub     r0, r0, #4
+                mov     r1, sp
+                bl      do_undefinstr
+                restore_svc_regs
+
+/* We get here if the FP emulator doesnt handle the undef instr.
+ * If the insn WAS handled, the emulator jumps to ret_from_exception by itself/
+ */
+		.globl	fpundefinstr 
+fpundefinstr:
+		mov	r0, lr
+		mov	r1, sp
+		teqp	pc, #MODE_SVC26
+		bl	do_undefinstr
+		b	ret_from_exception		@ Normal FP exit
+
+#if defined CONFIG_FPE_NWFPE || defined CONFIG_FPE_FASTFPE
+		/* The FPE is always present */
+		.equ	fpe_not_present, 0
+#else
+/* We get here if an undefined instruction happens and the floating
+ * point emulator is not present.  If the offending instruction was
+ * a WFS, we just perform a normal return as if we had emulated the
+ * operation.  This is a hack to allow some basic userland binaries
+ * to run so that the emulator module proper can be loaded. --philb
+ * FIXME - probably a broken useless hack...
+ */
+fpe_not_present:
+		adr	r10, wfs_mask_data
+		ldmia	r10, {r4, r5, r6, r7, r8}
+		ldr	r10, [sp, #S_PC]		@ Load PC
+		sub	r10, r10, #4
+		mask_pc	r10, r10
+		ldrt	r10, [r10]			@ get instruction
+		and	r5, r10, r5
+		teq	r5, r4				@ Is it WFS?
+		beq	ret_from_exception
+		and	r5, r10, r8
+		teq	r5, r6				@ Is it LDF/STF on sp or fp?
+		teqne	r5, r7
+		bne	fpundefinstr
+		tst	r10, #0x00200000		@ Does it have WB
+		beq	ret_from_exception
+		and	r4, r10, #255			@ get offset
+		and	r6, r10, #0x000f0000
+		tst	r10, #0x00800000		@ +/-
+		ldr	r5, [sp, r6, lsr #14]		@ Load reg
+		rsbeq	r4, r4, #0
+		add	r5, r5, r4, lsl #2
+		str	r5, [sp, r6, lsr #14]		@ Save reg
+		b	ret_from_exception
+
+wfs_mask_data:	.word	0x0e200110			@ WFS/RFS
+		.word	0x0fef0fff
+		.word	0x0d0d0100			@ LDF [sp]/STF [sp]
+		.word	0x0d0b0100			@ LDF [fp]/STF [fp]
+		.word	0x0f0f0f00
+#endif
+
+.LC2:		.word	fp_enter
+
+/*=============================================================================
+ * Prefetch abort handler
+ *-----------------------------------------------------------------------------
+ */
+#define DEBUG_UNDEF
+/* remember: lr = USR pc */
+vector_prefetch:
+		sub	lr, lr, #4
+		tst	lr, #MODE_SVC26
+		bne	__pabt_invalid
+		save_user_regs
+		teqp	pc, #MODE_SVC26         @ Enable IRQs...
+		mask_pc	r0, lr			@ Address of abort
+		mov	r1, sp			@ Tasks registers
+		bl	do_PrefetchAbort
+		teq	r0, #0			@ If non-zero, we believe this abort..
+		bne	ret_from_exception
+#ifdef DEBUG_UNDEF
+		adr	r0, t
+		bl	printk
+#endif
+		ldr	lr, [sp,#S_PC]		@ FIXME program to test this on.  I think its
+		b	.Lbug_undef		@ broken at the moment though!)
+
+__pabt_invalid:	save_svc_regs
+		mov	r0, sp			@ Prefetch aborts are definitely *not*
+		mov	r1, #BAD_PREFETCH	@ allowed in non-user modes.  We cant
+		and	r2, lr, #3		@ recover from this problem.
+		b	bad_mode
+
+#ifdef DEBUG_UNDEF
+t:		.ascii "*** undef ***\r\n\0"
+		.align
+#endif
+
+/*=============================================================================
+ * Address exception handler
+ *-----------------------------------------------------------------------------
+ * These aren't too critical.
+ * (they're not supposed to happen).
+ * In order to debug the reason for address exceptions in non-user modes,
+ * we have to obtain all the registers so that we can see what's going on.
+ */
+
+vector_addrexcptn:
+		sub	lr, lr, #8
+		tst	lr, #3
+		bne	Laddrexcptn_not_user
+		save_user_regs
+		teq	pc, #MODE_SVC26
+		mask_pc	r0, lr			@ Point to instruction
+		mov	r1, sp			@ Point to registers
+		mov	r2, #0x400
+		mov	lr, pc
+		bl	do_excpt
+		b	ret_from_exception
+
+Laddrexcptn_not_user:
+		save_svc_regs
+		and	r2, lr, #3
+		teq	r2, #3
+		bne	Laddrexcptn_illegal_mode
+		teqp	pc, #MODE_SVC26
+		mask_pc	r0, lr
+		mov	r1, sp
+		orr	r2, r2, #0x400
+		bl	do_excpt
+		ldmia	sp, {r0 - lr}		@ I cant remember the reason I changed this...
+		add	sp, sp, #15*4
+		movs	pc, lr
+
+Laddrexcptn_illegal_mode:
+		mov	r0, sp
+		str	lr, [sp, #-4]!
+		orr	r1, r2, #PSR_I_BIT | PSR_F_BIT
+		teqp	r1, #0			@ change into mode (wont be user mode)
+		mov	r0, r0
+		mov	r1, r8			@ Any register from r8 - r14 can be banked
+		mov	r2, r9
+		mov	r3, r10
+		mov	r4, r11
+		mov	r5, r12
+		mov	r6, r13
+		mov	r7, r14
+		teqp	pc, #PSR_F_BIT | MODE_SVC26 @ back to svc
+		mov	r0, r0
+		stmfd	sp!, {r1-r7}
+		ldmia	r0, {r0-r7}
+		stmfd	sp!, {r0-r7}
+		mov	r0, sp
+		mov	r1, #BAD_ADDREXCPTN
+		b	bad_mode
+
+/*=============================================================================
+ * Interrupt (IRQ) handler
+ *-----------------------------------------------------------------------------
+ * Note: if the IRQ was taken whilst in user mode, then *no* kernel routine
+ * is running, so do not have to save svc lr.
+ *
+ * Entered in IRQ mode.
+ */
+
+vector_IRQ:	ldr     sp, .LCirq         @ Setup some temporary stack
+                sub     lr, lr, #4
+                str     lr, [sp]           @ push return address
+
+		tst     lr, #3
+		bne	__irq_non_usr
+
+__irq_usr:	teqp	pc, #PSR_I_BIT | MODE_SVC26     @ Enter SVC mode
+		mov	r0, r0
+
+		ldr	lr, .LCirq
+		ldr	lr, [lr]           @ Restore lr for jump back to USR
+
+		save_user_regs
+
+		handle_irq
+
+		mov	why, #0
+		get_thread_info tsk
+		b	ret_to_user
+
+@ Place the IRQ priority table here so that the handle_irq macros above
+@ and below here can access it.
+
+		irq_prio_table
+
+__irq_non_usr:	teqp	pc, #PSR_I_BIT | MODE_SVC26     @ Enter SVC mode
+		mov	r0, r0
+
+		save_svc_regs_irq
+
+                and	r2, lr, #3
+		teq	r2, #3
+		bne	__irq_invalid                @ IRQ not from SVC mode
+
+		handle_irq
+
+		restore_svc_regs
+
+__irq_invalid:	mov	r0, sp
+		mov	r1, #BAD_IRQ
+		b	bad_mode
+
+/*=============================================================================
+ * Data abort handler code
+ *-----------------------------------------------------------------------------
+ *
+ * This handles both exceptions from user and SVC modes, computes the address
+ *  range of the problem, and does any correction that is required.  It then
+ *  calls the kernel data abort routine.
+ *
+ * This is where I wish that the ARM would tell you which address aborted.
+ */
+
+vector_data:	sub	lr, lr, #8		@ Correct lr
+		tst	lr, #3
+		bne	Ldata_not_user
+		save_user_regs
+		teqp	pc, #MODE_SVC26
+		mask_pc	r0, lr
+		bl	Ldata_do
+		b	ret_from_exception
+
+Ldata_not_user:
+		save_svc_regs
+		and	r2, lr, #3
+		teq	r2, #3
+		bne	Ldata_illegal_mode
+		tst	lr, #PSR_I_BIT
+		teqeqp	pc, #MODE_SVC26
+		mask_pc	r0, lr
+		bl	Ldata_do
+		restore_svc_regs
+
+Ldata_illegal_mode:
+		mov	r0, sp
+		mov	r1, #BAD_DATA
+		b	bad_mode
+
+Ldata_do:	mov	r3, sp
+		ldr	r4, [r0]		@ Get instruction
+		mov	r2, #0
+		tst	r4, #1 << 20		@ Check to see if it is a write instruction
+		orreq	r2, r2, #FAULT_CODE_WRITE @ Indicate write instruction
+		mov	r1, r4, lsr #22		@ Now branch to the relevent processing routine
+		and	r1, r1, #15 << 2
+		add	pc, pc, r1
+		movs	pc, lr
+		b	Ldata_unknown
+		b	Ldata_unknown
+		b	Ldata_unknown
+		b	Ldata_unknown
+		b	Ldata_ldrstr_post	@ ldr	rd, [rn], #m
+		b	Ldata_ldrstr_numindex	@ ldr	rd, [rn, #m]	@ RegVal
+		b	Ldata_ldrstr_post	@ ldr	rd, [rn], rm
+		b	Ldata_ldrstr_regindex	@ ldr	rd, [rn, rm]
+		b	Ldata_ldmstm		@ ldm*a	rn, <rlist>
+		b	Ldata_ldmstm		@ ldm*b	rn, <rlist>
+		b	Ldata_unknown
+		b	Ldata_unknown
+		b	Ldata_ldrstr_post	@ ldc	rd, [rn], #m	@ Same as ldr	rd, [rn], #m
+		b	Ldata_ldcstc_pre	@ ldc	rd, [rn, #m]
+		b	Ldata_unknown
+Ldata_unknown:	@ Part of jumptable
+		mov	r0, r1
+		mov	r1, r4
+		mov	r2, r3
+		b	baddataabort
+
+Ldata_ldrstr_post:
+		mov	r0, r4, lsr #14		@ Get Rn
+		and	r0, r0, #15 << 2	@ Mask out reg.
+		teq	r0, #15 << 2
+		ldr	r0, [r3, r0]		@ Get register
+		biceq	r0, r0, #PCMASK
+		mov	r1, r0
+#ifdef FAULT_CODE_LDRSTRPOST
+		orr	r2, r2, #FAULT_CODE_LDRSTRPOST
+#endif
+		b	do_DataAbort
+
+Ldata_ldrstr_numindex:
+		mov	r0, r4, lsr #14		@ Get Rn
+		and	r0, r0, #15 << 2	@ Mask out reg.
+		teq	r0, #15 << 2
+		ldr	r0, [r3, r0]		@ Get register
+		mov	r1, r4, lsl #20
+		biceq	r0, r0, #PCMASK
+		tst	r4, #1 << 23
+		addne	r0, r0, r1, lsr #20
+		subeq	r0, r0, r1, lsr #20
+		mov	r1, r0
+#ifdef FAULT_CODE_LDRSTRPRE
+		orr	r2, r2, #FAULT_CODE_LDRSTRPRE
+#endif
+		b	do_DataAbort
+
+Ldata_ldrstr_regindex:
+		mov	r0, r4, lsr #14		@ Get Rn
+		and	r0, r0, #15 << 2	@ Mask out reg.
+		teq	r0, #15 << 2
+		ldr	r0, [r3, r0]		@ Get register
+		and	r7, r4, #15
+		biceq	r0, r0, #PCMASK
+		teq	r7, #15			@ Check for PC
+		ldr	r7, [r3, r7, lsl #2]	@ Get Rm
+		and	r8, r4, #0x60		@ Get shift types
+		biceq	r7, r7, #PCMASK
+		mov	r9, r4, lsr #7		@ Get shift amount
+		and	r9, r9, #31
+		teq	r8, #0
+		moveq	r7, r7, lsl r9
+		teq	r8, #0x20		@ LSR shift
+		moveq	r7, r7, lsr r9
+		teq	r8, #0x40		@ ASR shift
+		moveq	r7, r7, asr r9
+		teq	r8, #0x60		@ ROR shift
+		moveq	r7, r7, ror r9
+		tst	r4, #1 << 23
+		addne	r0, r0, r7
+		subeq	r0, r0, r7		@ Apply correction
+		mov	r1, r0
+#ifdef FAULT_CODE_LDRSTRREG
+		orr	r2, r2, #FAULT_CODE_LDRSTRREG
+#endif
+		b	do_DataAbort
+
+Ldata_ldmstm:
+		mov	r7, #0x11
+		orr	r7, r7, r7, lsl #8
+		and	r0, r4, r7
+		and	r1, r4, r7, lsl #1
+		add	r0, r0, r1, lsr #1
+		and	r1, r4, r7, lsl #2
+		add	r0, r0, r1, lsr #2
+		and	r1, r4, r7, lsl #3
+		add	r0, r0, r1, lsr #3
+		add	r0, r0, r0, lsr #8
+		add	r0, r0, r0, lsr #4
+		and	r7, r0, #15		@ r7 = no. of registers to transfer.
+		mov	r5, r4, lsr #14		@ Get Rn
+		and	r5, r5, #15 << 2
+		ldr	r0, [r3, r5]		@ Get reg
+		eor	r6, r4, r4, lsl #2
+		tst	r6, #1 << 23		@ Check inc/dec ^ writeback
+		rsbeq	r7, r7, #0
+		add	r7, r0, r7, lsl #2	@ Do correction (signed)
+		subne	r1, r7, #1
+		subeq	r1, r0, #1
+		moveq	r0, r7
+		tst	r4, #1 << 21		@ Check writeback
+		strne	r7, [r3, r5]
+		eor	r6, r4, r4, lsl #1
+		tst	r6, #1 << 24		@ Check Pre/Post ^ inc/dec
+		addeq	r0, r0, #4
+		addeq	r1, r1, #4
+		teq	r5, #15*4		@ CHECK FOR PC
+		biceq	r1, r1, #PCMASK
+		biceq	r0, r0, #PCMASK
+#ifdef FAULT_CODE_LDMSTM
+		orr	r2, r2, #FAULT_CODE_LDMSTM
+#endif
+		b	do_DataAbort
+
+Ldata_ldcstc_pre:
+		mov	r0, r4, lsr #14		@ Get Rn
+		and	r0, r0, #15 << 2	@ Mask out reg.
+		teq	r0, #15 << 2
+		ldr	r0, [r3, r0]		@ Get register
+		mov	r1, r4, lsl #24		@ Get offset
+		biceq	r0, r0, #PCMASK
+		tst	r4, #1 << 23
+		addne	r0, r0, r1, lsr #24
+		subeq	r0, r0, r1, lsr #24
+		mov	r1, r0
+#ifdef FAULT_CODE_LDCSTC
+		orr	r2, r2, #FAULT_CODE_LDCSTC
+#endif
+		b	do_DataAbort
+
+
+/*
+ * This is the return code to user mode for abort handlers
+ */
+ENTRY(ret_from_exception)
+		get_thread_info tsk
+		mov	why, #0
+		b	ret_to_user
+
+		.data
+ENTRY(fp_enter)
+		.word	fpe_not_present
+		.text
+/*
+ * Register switch for older 26-bit only ARMs
+ */
+ENTRY(__switch_to)
+		add	r0, r0, #TI_CPU_SAVE
+		stmia	r0, {r4 - sl, fp, sp, lr}
+		add	r1, r1, #TI_CPU_SAVE
+		ldmia	r1, {r4 - sl, fp, sp, pc}^
+
+/*
+ *=============================================================================
+ *		Low-level interface code
+ *-----------------------------------------------------------------------------
+ *		Trap initialisation
+ *-----------------------------------------------------------------------------
+ *
+ * Note - FIQ code has changed.  The default is a couple of words in 0x1c, 0x20
+ * that call _unexp_fiq.  Nowever, we now copy the FIQ routine to 0x1c (removes
+ * some excess cycles).
+ *
+ * What we need to put into 0-0x1c are branches to branch to the kernel.
+ */
+
+		.section ".init.text",#alloc,#execinstr
+
+.Ljump_addresses:
+		swi	SYS_ERROR0
+		.word	vector_undefinstr	- 12
+		.word	vector_swi		- 16
+		.word	vector_prefetch		- 20
+		.word	vector_data		- 24
+		.word	vector_addrexcptn	- 28
+		.word	vector_IRQ		- 32
+		.word	_unexp_fiq		- 36
+		b	. + 8
+/*
+ * initialise the trap system
+ */
+ENTRY(__trap_init)
+		stmfd	sp!, {r4 - r7, lr}
+		adr	r1, .Ljump_addresses
+		ldmia	r1, {r1 - r7, ip, lr}
+		orr	r2, lr, r2, lsr #2
+		orr	r3, lr, r3, lsr #2
+		orr	r4, lr, r4, lsr #2
+		orr	r5, lr, r5, lsr #2
+		orr	r6, lr, r6, lsr #2
+		orr	r7, lr, r7, lsr #2
+		orr	ip, lr, ip, lsr #2
+		mov	r0, #0
+		stmia	r0, {r1 - r7, ip}
+		ldmfd	sp!, {r4 - r7, pc}^
+
+		.bss
+__temp_irq:	.space	4				@ saved lr_irq
+__temp_fiq:	.space	128
diff --git a/arch/arm26/kernel/fiq.c b/arch/arm26/kernel/fiq.c
new file mode 100644
index 00000000000..08a97c9498f
--- /dev/null
+++ b/arch/arm26/kernel/fiq.c
@@ -0,0 +1,202 @@
+/*
+ *  linux/arch/arm26/kernel/fiq.c
+ *
+ *  Copyright (C) 1998 Russell King
+ *  Copyright (C) 1998, 1999 Phil Blundell
+ *  Copyright (C) 2003 Ian Molton
+ *
+ *  FIQ support written by Philip Blundell <philb@gnu.org>, 1998.
+ *
+ *  FIQ support re-written by Russell King to be more generic
+ *
+ * We now properly support a method by which the FIQ handlers can
+ * be stacked onto the vector.  We still do not support sharing
+ * the FIQ vector itself.
+ *
+ * Operation is as follows:
+ *  1. Owner A claims FIQ:
+ *     - default_fiq relinquishes control.
+ *  2. Owner A:
+ *     - inserts code.
+ *     - sets any registers,
+ *     - enables FIQ.
+ *  3. Owner B claims FIQ:
+ *     - if owner A has a relinquish function.
+ *       - disable FIQs.
+ *       - saves any registers.
+ *       - returns zero.
+ *  4. Owner B:
+ *     - inserts code.
+ *     - sets any registers,
+ *     - enables FIQ.
+ *  5. Owner B releases FIQ:
+ *     - Owner A is asked to reacquire FIQ:
+ *	 - inserts code.
+ *	 - restores saved registers.
+ *	 - enables FIQ.
+ *  6. Goto 3
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+
+#include <asm/fiq.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/pgalloc.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#define FIQ_VECTOR (vectors_base() + 0x1c)
+
+static unsigned long no_fiq_insn;
+
+#define unprotect_page_0()
+#define protect_page_0()
+
+/* Default reacquire function
+ * - we always relinquish FIQ control
+ * - we always reacquire FIQ control
+ */
+static int fiq_def_op(void *ref, int relinquish)
+{
+	if (!relinquish) {
+		unprotect_page_0();
+		*(unsigned long *)FIQ_VECTOR = no_fiq_insn;
+		protect_page_0();
+	}
+
+	return 0;
+}
+
+static struct fiq_handler default_owner = {
+	.name	= "default",
+	.fiq_op = fiq_def_op,
+};
+
+static struct fiq_handler *current_fiq = &default_owner;
+
+int show_fiq_list(struct seq_file *p, void *v)
+{
+	if (current_fiq != &default_owner)
+		seq_printf(p, "FIQ:              %s\n", current_fiq->name);
+
+	return 0;
+}
+
+void set_fiq_handler(void *start, unsigned int length)
+{
+	unprotect_page_0();
+
+	memcpy((void *)FIQ_VECTOR, start, length);
+
+	protect_page_0();
+}
+
+/*
+ * Taking an interrupt in FIQ mode is death, so both these functions
+ * disable irqs for the duration. 
+ */
+void set_fiq_regs(struct pt_regs *regs)
+{
+	register unsigned long tmp, tmp2;
+	__asm__ volatile (
+	"mov	%0, pc
+	bic	%1, %0, #0x3
+	orr	%1, %1, %3
+	teqp	%1, #0		@ select FIQ mode
+	mov	r0, r0
+	ldmia	%2, {r8 - r14}
+	teqp	%0, #0		@ return to SVC mode
+	mov	r0, r0"
+	: "=&r" (tmp), "=&r" (tmp2)
+	: "r" (&regs->ARM_r8), "I" (PSR_I_BIT | PSR_F_BIT | MODE_FIQ26)
+	/* These registers aren't modified by the above code in a way
+	   visible to the compiler, but we mark them as clobbers anyway
+	   so that GCC won't put any of the input or output operands in
+	   them.  */
+	: "r8", "r9", "r10", "r11", "r12", "r13", "r14");
+}
+
+void get_fiq_regs(struct pt_regs *regs)
+{
+	register unsigned long tmp, tmp2;
+	__asm__ volatile (
+	"mov	%0, pc
+	bic	%1, %0, #0x3
+	orr	%1, %1, %3
+	teqp	%1, #0		@ select FIQ mode
+	mov	r0, r0
+	stmia	%2, {r8 - r14}
+	teqp	%0, #0		@ return to SVC mode
+	mov	r0, r0"
+	: "=&r" (tmp), "=&r" (tmp2)
+	: "r" (&regs->ARM_r8), "I" (PSR_I_BIT | PSR_F_BIT | MODE_FIQ26)
+	/* These registers aren't modified by the above code in a way
+	   visible to the compiler, but we mark them as clobbers anyway
+	   so that GCC won't put any of the input or output operands in
+	   them.  */
+	: "r8", "r9", "r10", "r11", "r12", "r13", "r14");
+}
+
+int claim_fiq(struct fiq_handler *f)
+{
+	int ret = 0;
+
+	if (current_fiq) {
+		ret = -EBUSY;
+
+		if (current_fiq->fiq_op != NULL)
+			ret = current_fiq->fiq_op(current_fiq->dev_id, 1);
+	}
+
+	if (!ret) {
+		f->next = current_fiq;
+		current_fiq = f;
+	}
+
+	return ret;
+}
+
+void release_fiq(struct fiq_handler *f)
+{
+	if (current_fiq != f) {
+		printk(KERN_ERR "%s FIQ trying to release %s FIQ\n",
+		       f->name, current_fiq->name);
+#ifdef CONFIG_DEBUG_ERRORS
+		__backtrace();
+#endif
+		return;
+	}
+
+	do
+		current_fiq = current_fiq->next;
+	while (current_fiq->fiq_op(current_fiq->dev_id, 0));
+}
+
+void enable_fiq(int fiq)
+{
+	enable_irq(fiq + FIQ_START);
+}
+
+void disable_fiq(int fiq)
+{
+	disable_irq(fiq + FIQ_START);
+}
+
+EXPORT_SYMBOL(set_fiq_handler);
+EXPORT_SYMBOL(set_fiq_regs);
+EXPORT_SYMBOL(get_fiq_regs);
+EXPORT_SYMBOL(claim_fiq);
+EXPORT_SYMBOL(release_fiq);
+EXPORT_SYMBOL(enable_fiq);
+EXPORT_SYMBOL(disable_fiq);
+
+void __init init_FIQ(void)
+{
+	no_fiq_insn = *(unsigned long *)FIQ_VECTOR;
+	set_fs(get_fs());
+}
diff --git a/arch/arm26/kernel/head.S b/arch/arm26/kernel/head.S
new file mode 100644
index 00000000000..8bfc62539ba
--- /dev/null
+++ b/arch/arm26/kernel/head.S
@@ -0,0 +1,113 @@
+/*
+ *  linux/arch/arm26/kernel/head.S
+ *
+ *  Copyright (C) 1994-2000 Russell King
+ *  Copyright (C) 2003 Ian Molton
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  26-bit kernel startup code
+ */
+#include <linux/config.h>
+#include <linux/linkage.h>
+#include <asm/mach-types.h>
+
+		.globl	swapper_pg_dir
+		.equ	swapper_pg_dir,	0x0207d000
+
+/*
+ * Entry point.
+ */
+		.section ".init.text",#alloc,#execinstr
+ENTRY(stext)
+
+__entry:
+		cmp	pc, #0x02000000
+		ldrlt	pc, LC0			@ if 0x01800000, call at 0x02080000
+		teq	r0, #0                  @ Check for old calling method
+		blne	oldparams		@ Move page if old
+
+		adr	r0, LC0
+		ldmib	r0, {r2-r5, sp}		@ Setup stack (and fetch other values)
+
+		mov	r0, #0			@ Clear BSS
+1:		cmp	r2, r3
+		strcc	r0, [r2], #4
+		bcc	1b
+
+		bl	detect_proc_type
+		str	r0, [r4]
+		bl	detect_arch_type
+		str	r0, [r5]
+
+#ifdef CONFIG_XIP_KERNEL
+               ldr     r3, ETEXT                       @ data section copy
+               ldr     r4, SDATA
+               ldr     r5, EDATA
+1:
+               ldr     r6, [r3], #4
+               str     r6, [r4], #4
+               cmp     r4, r5
+               blt     1b
+#endif
+		mov	fp, #0
+		b	start_kernel
+
+LC0:		.word	_stext
+		.word	__bss_start		@ r2
+		.word	_end			@ r3
+		.word	processor_id		@ r4
+		.word	__machine_arch_type	@ r5
+		.word	init_thread_union+8192	@ sp
+#ifdef CONFIG_XIP_KERNEL
+ETEXT:		.word   _endtext
+SDATA:		.word   _sdata
+EDATA:		.word   __bss_start
+#endif
+
+arm2_id:	.long	0x41560200  @ ARM2 and 250 dont have a CPUID
+arm250_id:	.long	0x41560250  @ So we create some after probing for them
+		.align
+
+oldparams:	mov     r4, #0x02000000
+		add	r3, r4, #0x00080000
+		add	r4, r4, #0x0007c000
+1:		ldmia	r0!, {r5 - r12}
+		stmia	r4!, {r5 - r12}
+		cmp	r4, r3
+		blt	1b
+		mov	pc, lr
+
+/*
+ * We need some way to automatically detect the difference between
+ * these two machines.  Unfortunately, it is not possible to detect
+ * the presence of the SuperIO chip, because that will hang the old
+ * Archimedes machines solid.
+ */
+/* DAG: Outdated, these have been combined !!!!!!! */
+detect_arch_type:
+#if defined(CONFIG_ARCH_ARC)
+		mov	r0, #MACH_TYPE_ARCHIMEDES
+#elif defined(CONFIG_ARCH_A5K)
+		mov	r0, #MACH_TYPE_A5K
+#endif
+		mov	pc, lr
+
+detect_proc_type:
+		mov	ip, lr
+		mov	r2, #0xea000000		@ Point undef instr to continuation
+		adr	r0, continue - 12
+		orr	r0, r2, r0, lsr #2
+		mov	r1, #0
+		str	r0, [r1, #4]
+		ldr	r0, arm2_id
+		swp	r2, r2, [r1]		@ check for swp (ARM2 cant)
+		ldr	r0, arm250_id
+		mrc	15, 0, r3, c0, c0	@ check for CP#15 (ARM250 cant)
+		mov	r0, r3
+continue:	mov	r2, #0xeb000000		@ Make undef vector loop
+		sub	r2, r2, #2
+		str	r2, [r1, #4]
+		mov	pc, ip
diff --git a/arch/arm26/kernel/init_task.c b/arch/arm26/kernel/init_task.c
new file mode 100644
index 00000000000..4191565b889
--- /dev/null
+++ b/arch/arm26/kernel/init_task.c
@@ -0,0 +1,49 @@
+/*
+ *  linux/arch/arm26/kernel/init_task.c
+ *
+ * Copyright (C) 2003 Ian Molton
+ *
+ */
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/init_task.h>
+#include <linux/mqueue.h>
+
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+
+static struct fs_struct init_fs = INIT_FS;
+static struct files_struct init_files = INIT_FILES;
+static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
+static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
+struct mm_struct init_mm = INIT_MM(init_mm);
+
+EXPORT_SYMBOL(init_mm);
+
+/*
+ * Initial thread structure.
+ *
+ * We need to make sure that this is 8192-byte aligned due to the
+ * way process stacks are handled. This is done by making sure
+ * the linker maps this in the .text segment right after head.S,
+ * and making the linker scripts ensure the proper alignment.
+ *
+ * FIXME - should this be 32K alignment on arm26?
+ *
+ * The things we do for performance...
+ */
+union thread_union init_thread_union
+	__attribute__((__section__(".init.task"))) =
+		{ INIT_THREAD_INFO(init_task) };
+
+/*
+ * Initial task structure.
+ *
+ * All other task structs will be allocated on slabs in fork.c
+ */
+struct task_struct init_task = INIT_TASK(init_task);
+
+EXPORT_SYMBOL(init_task);
diff --git a/arch/arm26/kernel/irq.c b/arch/arm26/kernel/irq.c
new file mode 100644
index 00000000000..f3cc1036e5b
--- /dev/null
+++ b/arch/arm26/kernel/irq.c
@@ -0,0 +1,716 @@
+/*
+ *  linux/arch/arm/kernel/irq.c
+ *
+ *  Copyright (C) 1992 Linus Torvalds
+ *  Modifications for ARM processor Copyright (C) 1995-2000 Russell King.
+ *  'Borrowed' for ARM26 and (C) 2003 Ian Molton.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  This file contains the code used by various IRQ handling routines:
+ *  asking for different IRQ's should be done through these routines
+ *  instead of just grabbing them. Thus setups with different IRQ numbers
+ *  shouldn't result in any weird surprises, and installing new handlers
+ *  should be easier.
+ *
+ *  IRQ's are in fact implemented a bit like signal handlers for the kernel.
+ *  Naturally it's not a 1:1 relation, but there are similarities.
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/ptrace.h>
+#include <linux/kernel_stat.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/random.h>
+#include <linux/smp.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+#include <linux/errno.h>
+
+#include <asm/irq.h>
+#include <asm/system.h>
+#include <asm/irqchip.h>
+
+//FIXME - this ought to be in a header IMO
+void __init arc_init_irq(void);
+
+/*
+ * Maximum IRQ count.  Currently, this is arbitary.  However, it should
+ * not be set too low to prevent false triggering.  Conversely, if it
+ * is set too high, then you could miss a stuck IRQ.
+ *
+ * FIXME Maybe we ought to set a timer and re-enable the IRQ at a later time?
+ */
+#define MAX_IRQ_CNT	100000
+
+static volatile unsigned long irq_err_count;
+static DEFINE_SPINLOCK(irq_controller_lock);
+
+struct irqdesc irq_desc[NR_IRQS];
+
+/*
+ * Dummy mask/unmask handler
+ */
+void dummy_mask_unmask_irq(unsigned int irq)
+{
+}
+
+void do_bad_IRQ(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs)
+{
+	irq_err_count += 1;
+	printk(KERN_ERR "IRQ: spurious interrupt %d\n", irq);
+}
+
+static struct irqchip bad_chip = {
+	.ack	= dummy_mask_unmask_irq,
+	.mask	= dummy_mask_unmask_irq,
+	.unmask = dummy_mask_unmask_irq,
+};
+
+static struct irqdesc bad_irq_desc = {
+	.chip	= &bad_chip,
+	.handle = do_bad_IRQ,
+	.depth	= 1,
+};
+
+/**
+ *	disable_irq - disable an irq and wait for completion
+ *	@irq: Interrupt to disable
+ *
+ *	Disable the selected interrupt line.  We do this lazily.
+ *
+ *	This function may be called from IRQ context.
+ */
+void disable_irq(unsigned int irq)
+{
+	struct irqdesc *desc = irq_desc + irq;
+	unsigned long flags;
+	spin_lock_irqsave(&irq_controller_lock, flags);
+	if (!desc->depth++)
+		desc->enabled = 0;
+	spin_unlock_irqrestore(&irq_controller_lock, flags);
+}
+
+/**
+ *	enable_irq - enable interrupt handling on an irq
+ *	@irq: Interrupt to enable
+ *
+ *	Re-enables the processing of interrupts on this IRQ line.
+ *	Note that this may call the interrupt handler, so you may
+ *	get unexpected results if you hold IRQs disabled.
+ *
+ *	This function may be called from IRQ context.
+ */
+void enable_irq(unsigned int irq)
+{
+	struct irqdesc *desc = irq_desc + irq;
+	unsigned long flags;
+	int pending = 0;
+
+	spin_lock_irqsave(&irq_controller_lock, flags);
+	if (unlikely(!desc->depth)) {
+		printk("enable_irq(%u) unbalanced from %p\n", irq,
+			__builtin_return_address(0)); //FIXME bum addresses reported - why?
+	} else if (!--desc->depth) {
+		desc->probing = 0;
+		desc->enabled = 1;
+		desc->chip->unmask(irq);
+		pending = desc->pending;
+		desc->pending = 0;
+		/*
+		 * If the interrupt was waiting to be processed,
+		 * retrigger it.
+		 */
+		if (pending)
+			desc->chip->rerun(irq);
+	}
+	spin_unlock_irqrestore(&irq_controller_lock, flags);
+}
+
+int show_interrupts(struct seq_file *p, void *v)
+{
+	int i = *(loff_t *) v;
+	struct irqaction * action;
+
+	if (i < NR_IRQS) {
+	    	action = irq_desc[i].action;
+		if (!action)
+			continue;
+		seq_printf(p, "%3d: %10u ", i, kstat_irqs(i));
+		seq_printf(p, "  %s", action->name);
+		for (action = action->next; action; action = action->next) {
+			seq_printf(p, ", %s", action->name);
+		}
+		seq_putc(p, '\n');
+	} else if (i == NR_IRQS) {
+		show_fiq_list(p, v);
+		seq_printf(p, "Err: %10lu\n", irq_err_count);
+	}
+	return 0;
+}
+
+/*
+ * IRQ lock detection.
+ *
+ * Hopefully, this should get us out of a few locked situations.
+ * However, it may take a while for this to happen, since we need
+ * a large number if IRQs to appear in the same jiffie with the
+ * same instruction pointer (or within 2 instructions).
+ */
+static int check_irq_lock(struct irqdesc *desc, int irq, struct pt_regs *regs)
+{
+	unsigned long instr_ptr = instruction_pointer(regs);
+
+	if (desc->lck_jif == jiffies &&
+	    desc->lck_pc >= instr_ptr && desc->lck_pc < instr_ptr + 8) {
+		desc->lck_cnt += 1;
+
+		if (desc->lck_cnt > MAX_IRQ_CNT) {
+			printk(KERN_ERR "IRQ LOCK: IRQ%d is locking the system, disabled\n", irq);
+			return 1;
+		}
+	} else {
+		desc->lck_cnt = 0;
+		desc->lck_pc  = instruction_pointer(regs);
+		desc->lck_jif = jiffies;
+	}
+	return 0;
+}
+
+static void
+__do_irq(unsigned int irq, struct irqaction *action, struct pt_regs *regs)
+{
+	unsigned int status;
+	int ret;
+
+	spin_unlock(&irq_controller_lock);
+	if (!(action->flags & SA_INTERRUPT))
+		local_irq_enable();
+
+	status = 0;
+	do {
+		ret = action->handler(irq, action->dev_id, regs);
+		if (ret == IRQ_HANDLED)
+			status |= action->flags;
+		action = action->next;
+	} while (action);
+
+	if (status & SA_SAMPLE_RANDOM)
+		add_interrupt_randomness(irq);
+
+	spin_lock_irq(&irq_controller_lock);
+}
+
+/*
+ * This is for software-decoded IRQs.  The caller is expected to
+ * handle the ack, clear, mask and unmask issues.
+ */
+void
+do_simple_IRQ(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs)
+{
+	struct irqaction *action;
+	const int cpu = smp_processor_id();
+
+	desc->triggered = 1;
+
+	kstat_cpu(cpu).irqs[irq]++;
+
+	action = desc->action;
+	if (action)
+		__do_irq(irq, desc->action, regs);
+}
+
+/*
+ * Most edge-triggered IRQ implementations seem to take a broken
+ * approach to this.  Hence the complexity.
+ */
+void
+do_edge_IRQ(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs)
+{
+	const int cpu = smp_processor_id();
+
+	desc->triggered = 1;
+
+	/*
+	 * If we're currently running this IRQ, or its disabled,
+	 * we shouldn't process the IRQ.  Instead, turn on the
+	 * hardware masks.
+	 */
+	if (unlikely(desc->running || !desc->enabled))
+		goto running;
+
+	/*
+	 * Acknowledge and clear the IRQ, but don't mask it.
+	 */
+	desc->chip->ack(irq);
+
+	/*
+	 * Mark the IRQ currently in progress.
+	 */
+	desc->running = 1;
+
+	kstat_cpu(cpu).irqs[irq]++;
+
+	do {
+		struct irqaction *action;
+
+		action = desc->action;
+		if (!action)
+			break;
+
+		if (desc->pending && desc->enabled) {
+			desc->pending = 0;
+			desc->chip->unmask(irq);
+		}
+
+		__do_irq(irq, action, regs);
+	} while (desc->pending);
+
+	desc->running = 0;
+
+	/*
+	 * If we were disabled or freed, shut down the handler.
+	 */
+	if (likely(desc->action && !check_irq_lock(desc, irq, regs)))
+		return;
+
+ running:
+	/*
+	 * We got another IRQ while this one was masked or
+	 * currently running.  Delay it.
+	 */
+	desc->pending = 1;
+	desc->chip->mask(irq);
+	desc->chip->ack(irq);
+}
+
+/*
+ * Level-based IRQ handler.  Nice and simple.
+ */
+void
+do_level_IRQ(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs)
+{
+	struct irqaction *action;
+	const int cpu = smp_processor_id();
+
+	desc->triggered = 1;
+
+	/*
+	 * Acknowledge, clear _AND_ disable the interrupt.
+	 */
+	desc->chip->ack(irq);
+
+	if (likely(desc->enabled)) {
+		kstat_cpu(cpu).irqs[irq]++;
+
+		/*
+		 * Return with this interrupt masked if no action
+		 */
+		action = desc->action;
+		if (action) {
+			__do_irq(irq, desc->action, regs);
+
+			if (likely(desc->enabled &&
+				   !check_irq_lock(desc, irq, regs)))
+				desc->chip->unmask(irq);
+		}
+	}
+}
+
+/*
+ * do_IRQ handles all hardware IRQ's.  Decoded IRQs should not
+ * come via this function.  Instead, they should provide their
+ * own 'handler'
+ */
+asmlinkage void asm_do_IRQ(int irq, struct pt_regs *regs)
+{
+	struct irqdesc *desc = irq_desc + irq;
+
+	/*
+	 * Some hardware gives randomly wrong interrupts.  Rather
+	 * than crashing, do something sensible.
+	 */
+	if (irq >= NR_IRQS)
+		desc = &bad_irq_desc;
+
+	irq_enter();
+	spin_lock(&irq_controller_lock);
+	desc->handle(irq, desc, regs);
+	spin_unlock(&irq_controller_lock);
+	irq_exit();
+}
+
+void __set_irq_handler(unsigned int irq, irq_handler_t handle, int is_chained)
+{
+	struct irqdesc *desc;
+	unsigned long flags;
+
+	if (irq >= NR_IRQS) {
+		printk(KERN_ERR "Trying to install handler for IRQ%d\n", irq);
+		return;
+	}
+
+	if (handle == NULL)
+		handle = do_bad_IRQ;
+
+	desc = irq_desc + irq;
+
+	if (is_chained && desc->chip == &bad_chip)
+		printk(KERN_WARNING "Trying to install chained handler for IRQ%d\n", irq);
+
+	spin_lock_irqsave(&irq_controller_lock, flags);
+	if (handle == do_bad_IRQ) {
+		desc->chip->mask(irq);
+		desc->chip->ack(irq);
+		desc->depth = 1;
+		desc->enabled = 0;
+	}
+	desc->handle = handle;
+	if (handle != do_bad_IRQ && is_chained) {
+		desc->valid = 0;
+		desc->probe_ok = 0;
+		desc->depth = 0;
+		desc->chip->unmask(irq);
+	}
+	spin_unlock_irqrestore(&irq_controller_lock, flags);
+}
+
+void set_irq_chip(unsigned int irq, struct irqchip *chip)
+{
+	struct irqdesc *desc;
+	unsigned long flags;
+
+	if (irq >= NR_IRQS) {
+		printk(KERN_ERR "Trying to install chip for IRQ%d\n", irq);
+		return;
+	}
+
+	if (chip == NULL)
+		chip = &bad_chip;
+
+	desc = irq_desc + irq;
+	spin_lock_irqsave(&irq_controller_lock, flags);
+	desc->chip = chip;
+	spin_unlock_irqrestore(&irq_controller_lock, flags);
+}
+
+int set_irq_type(unsigned int irq, unsigned int type)
+{
+	struct irqdesc *desc;
+	unsigned long flags;
+	int ret = -ENXIO;
+
+	if (irq >= NR_IRQS) {
+		printk(KERN_ERR "Trying to set irq type for IRQ%d\n", irq);
+		return -ENODEV;
+	}
+
+	desc = irq_desc + irq;
+	if (desc->chip->type) {
+		spin_lock_irqsave(&irq_controller_lock, flags);
+		ret = desc->chip->type(irq, type);
+		spin_unlock_irqrestore(&irq_controller_lock, flags);
+	}
+
+	return ret;
+}
+
+void set_irq_flags(unsigned int irq, unsigned int iflags)
+{
+	struct irqdesc *desc;
+	unsigned long flags;
+
+	if (irq >= NR_IRQS) {
+		printk(KERN_ERR "Trying to set irq flags for IRQ%d\n", irq);
+		return;
+	}
+
+	desc = irq_desc + irq;
+	spin_lock_irqsave(&irq_controller_lock, flags);
+	desc->valid = (iflags & IRQF_VALID) != 0;
+	desc->probe_ok = (iflags & IRQF_PROBE) != 0;
+	desc->noautoenable = (iflags & IRQF_NOAUTOEN) != 0;
+	spin_unlock_irqrestore(&irq_controller_lock, flags);
+}
+
+int setup_irq(unsigned int irq, struct irqaction *new)
+{
+	int shared = 0;
+	struct irqaction *old, **p;
+	unsigned long flags;
+	struct irqdesc *desc;
+
+	/*
+	 * Some drivers like serial.c use request_irq() heavily,
+	 * so we have to be careful not to interfere with a
+	 * running system.
+	 */
+	if (new->flags & SA_SAMPLE_RANDOM) {
+		/*
+		 * This function might sleep, we want to call it first,
+		 * outside of the atomic block.
+		 * Yes, this might clear the entropy pool if the wrong
+		 * driver is attempted to be loaded, without actually
+		 * installing a new handler, but is this really a problem,
+		 * only the sysadmin is able to do this.
+		 */
+	        rand_initialize_irq(irq);
+	}
+
+	/*
+	 * The following block of code has to be executed atomically
+	 */
+	desc = irq_desc + irq;
+	spin_lock_irqsave(&irq_controller_lock, flags);
+	p = &desc->action;
+	if ((old = *p) != NULL) {
+		/* Can't share interrupts unless both agree to */
+		if (!(old->flags & new->flags & SA_SHIRQ)) {
+			spin_unlock_irqrestore(&irq_controller_lock, flags);
+			return -EBUSY;
+		}
+
+		/* add new interrupt at end of irq queue */
+		do {
+			p = &old->next;
+			old = *p;
+		} while (old);
+		shared = 1;
+	}
+
+	*p = new;
+
+	if (!shared) {
+ 		desc->probing = 0;
+		desc->running = 0;
+		desc->pending = 0;
+		desc->depth = 1;
+		if (!desc->noautoenable) {
+			desc->depth = 0;
+			desc->enabled = 1;
+			desc->chip->unmask(irq);
+		}
+	}
+
+	spin_unlock_irqrestore(&irq_controller_lock, flags);
+	return 0;
+}
+
+/**
+ *	request_irq - allocate an interrupt line
+ *	@irq: Interrupt line to allocate
+ *	@handler: Function to be called when the IRQ occurs
+ *	@irqflags: Interrupt type flags
+ *	@devname: An ascii name for the claiming device
+ *	@dev_id: A cookie passed back to the handler function
+ *
+ *	This call allocates interrupt resources and enables the
+ *	interrupt line and IRQ handling. From the point this
+ *	call is made your handler function may be invoked. Since
+ *	your handler function must clear any interrupt the board
+ *	raises, you must take care both to initialise your hardware
+ *	and to set up the interrupt handler in the right order.
+ *
+ *	Dev_id must be globally unique. Normally the address of the
+ *	device data structure is used as the cookie. Since the handler
+ *	receives this value it makes sense to use it.
+ *
+ *	If your interrupt is shared you must pass a non NULL dev_id
+ *	as this is required when freeing the interrupt.
+ *
+ *	Flags:
+ *
+ *	SA_SHIRQ		Interrupt is shared
+ *
+ *	SA_INTERRUPT		Disable local interrupts while processing
+ *
+ *	SA_SAMPLE_RANDOM	The interrupt can be used for entropy
+ *
+ */
+
+//FIXME - handler used to return void - whats the significance of the change?
+int request_irq(unsigned int irq, irqreturn_t (*handler)(int, void *, struct pt_regs *),
+		 unsigned long irq_flags, const char * devname, void *dev_id)
+{
+	unsigned long retval;
+	struct irqaction *action;
+
+	if (irq >= NR_IRQS || !irq_desc[irq].valid || !handler ||
+	    (irq_flags & SA_SHIRQ && !dev_id))
+		return -EINVAL;
+
+	action = (struct irqaction *)kmalloc(sizeof(struct irqaction), GFP_KERNEL);
+	if (!action)
+		return -ENOMEM;
+
+	action->handler = handler;
+	action->flags = irq_flags;
+	cpus_clear(action->mask);
+	action->name = devname;
+	action->next = NULL;
+	action->dev_id = dev_id;
+
+	retval = setup_irq(irq, action);
+
+	if (retval)
+		kfree(action);
+	return retval;
+}
+
+EXPORT_SYMBOL(request_irq);
+
+/**
+ *	free_irq - free an interrupt
+ *	@irq: Interrupt line to free
+ *	@dev_id: Device identity to free
+ *
+ *	Remove an interrupt handler. The handler is removed and if the
+ *	interrupt line is no longer in use by any driver it is disabled.
+ *	On a shared IRQ the caller must ensure the interrupt is disabled
+ *	on the card it drives before calling this function.
+ *
+ *	This function may be called from interrupt context.
+ */
+void free_irq(unsigned int irq, void *dev_id)
+{
+	struct irqaction * action, **p;
+	unsigned long flags;
+
+	if (irq >= NR_IRQS || !irq_desc[irq].valid) {
+		printk(KERN_ERR "Trying to free IRQ%d\n",irq);
+#ifdef CONFIG_DEBUG_ERRORS
+		__backtrace();
+#endif
+		return;
+	}
+
+	spin_lock_irqsave(&irq_controller_lock, flags);
+	for (p = &irq_desc[irq].action; (action = *p) != NULL; p = &action->next) {
+		if (action->dev_id != dev_id)
+			continue;
+
+	    	/* Found it - now free it */
+		*p = action->next;
+		kfree(action);
+		goto out;
+	}
+	printk(KERN_ERR "Trying to free free IRQ%d\n",irq);
+#ifdef CONFIG_DEBUG_ERRORS
+	__backtrace();
+#endif
+out:
+	spin_unlock_irqrestore(&irq_controller_lock, flags);
+}
+
+EXPORT_SYMBOL(free_irq);
+
+/* Start the interrupt probing.  Unlike other architectures,
+ * we don't return a mask of interrupts from probe_irq_on,
+ * but return the number of interrupts enabled for the probe.
+ * The interrupts which have been enabled for probing is
+ * instead recorded in the irq_desc structure.
+ */
+unsigned long probe_irq_on(void)
+{
+	unsigned int i, irqs = 0;
+	unsigned long delay;
+
+	/*
+	 * first snaffle up any unassigned but
+	 * probe-able interrupts
+	 */
+	spin_lock_irq(&irq_controller_lock);
+	for (i = 0; i < NR_IRQS; i++) {
+		if (!irq_desc[i].probe_ok || irq_desc[i].action)
+			continue;
+
+		irq_desc[i].probing = 1;
+		irq_desc[i].triggered = 0;
+		if (irq_desc[i].chip->type)
+			irq_desc[i].chip->type(i, IRQT_PROBE);
+		irq_desc[i].chip->unmask(i);
+		irqs += 1;
+	}
+	spin_unlock_irq(&irq_controller_lock);
+
+	/*
+	 * wait for spurious interrupts to mask themselves out again
+	 */
+	for (delay = jiffies + HZ/10; time_before(jiffies, delay); )
+		/* min 100ms delay */;
+
+	/*
+	 * now filter out any obviously spurious interrupts
+	 */
+	spin_lock_irq(&irq_controller_lock);
+	for (i = 0; i < NR_IRQS; i++) {
+		if (irq_desc[i].probing && irq_desc[i].triggered) {
+			irq_desc[i].probing = 0;
+			irqs -= 1;
+		}
+	}
+	spin_unlock_irq(&irq_controller_lock);
+
+	return irqs;
+}
+
+EXPORT_SYMBOL(probe_irq_on);
+
+/*
+ * Possible return values:
+ *  >= 0 - interrupt number
+ *    -1 - no interrupt/many interrupts
+ */
+int probe_irq_off(unsigned long irqs)
+{
+	unsigned int i;
+	int irq_found = NO_IRQ;
+
+	/*
+	 * look at the interrupts, and find exactly one
+	 * that we were probing has been triggered
+	 */
+	spin_lock_irq(&irq_controller_lock);
+	for (i = 0; i < NR_IRQS; i++) {
+		if (irq_desc[i].probing &&
+		    irq_desc[i].triggered) {
+			if (irq_found != NO_IRQ) {
+				irq_found = NO_IRQ;
+				goto out;
+			}
+			irq_found = i;
+		}
+	}
+
+	if (irq_found == -1)
+		irq_found = NO_IRQ;
+out:
+	spin_unlock_irq(&irq_controller_lock);
+
+	return irq_found;
+}
+
+EXPORT_SYMBOL(probe_irq_off);
+
+void __init init_irq_proc(void)
+{
+}
+
+void __init init_IRQ(void)
+{
+	struct irqdesc *desc;
+	extern void init_dma(void);
+	int irq;
+
+	for (irq = 0, desc = irq_desc; irq < NR_IRQS; irq++, desc++)
+		*desc = bad_irq_desc;
+
+	arc_init_irq();
+	init_dma();
+}
diff --git a/arch/arm26/kernel/process.c b/arch/arm26/kernel/process.c
new file mode 100644
index 00000000000..46aea6ac194
--- /dev/null
+++ b/arch/arm26/kernel/process.c
@@ -0,0 +1,401 @@
+/*
+ *  linux/arch/arm26/kernel/process.c
+ *
+ *  Copyright (C) 2003 Ian Molton - adapted for ARM26
+ *  Copyright (C) 1996-2000 Russell King - Converted to ARM.
+ *  Origional Copyright (C) 1995  Linus Torvalds
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <stdarg.h>
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/slab.h>
+#include <linux/user.h>
+#include <linux/a.out.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/leds.h>
+#include <asm/processor.h>
+#include <asm/uaccess.h>
+
+extern const char *processor_modes[];
+extern void setup_mm_for_reboot(char mode);
+
+static volatile int hlt_counter;
+
+void disable_hlt(void)
+{
+	hlt_counter++;
+}
+
+EXPORT_SYMBOL(disable_hlt);
+
+void enable_hlt(void)
+{
+	hlt_counter--;
+}
+
+EXPORT_SYMBOL(enable_hlt);
+
+static int __init nohlt_setup(char *__unused)
+{
+	hlt_counter = 1;
+	return 1;
+}
+
+static int __init hlt_setup(char *__unused)
+{
+	hlt_counter = 0;
+	return 1;
+}
+
+__setup("nohlt", nohlt_setup);
+__setup("hlt", hlt_setup);
+
+/*
+ * This is our default idle handler.  We need to disable
+ * interrupts here to ensure we don't miss a wakeup call.
+ */
+void cpu_idle(void)
+{
+	/* endless idle loop with no priority at all */
+	preempt_disable();
+	while (1) {
+		while (!need_resched()) {
+			local_irq_disable();
+			if (!need_resched() && !hlt_counter)
+				local_irq_enable();
+		}
+	}
+	schedule();
+}
+
+static char reboot_mode = 'h';
+
+int __init reboot_setup(char *str)
+{
+	reboot_mode = str[0];
+	return 1;
+}
+
+__setup("reboot=", reboot_setup);
+
+/* ARM26 cant do these but we still need to define them. */
+void machine_halt(void)
+{
+}
+void machine_power_off(void)
+{
+}
+
+EXPORT_SYMBOL(machine_halt);
+EXPORT_SYMBOL(machine_power_off);
+
+void machine_restart(char * __unused)
+{
+	/*
+	 * Clean and disable cache, and turn off interrupts
+	 */
+	cpu_proc_fin();
+
+	/*
+	 * Tell the mm system that we are going to reboot -
+	 * we may need it to insert some 1:1 mappings so that
+	 * soft boot works.
+	 */
+	setup_mm_for_reboot(reboot_mode);
+
+	/*
+         * copy branch instruction to reset location and call it
+         */
+
+        *(unsigned long *)0 = *(unsigned long *)0x03800000;
+        ((void(*)(void))0)();
+
+	/*
+	 * Whoops - the architecture was unable to reboot.
+	 * Tell the user! Should never happen...
+	 */
+	mdelay(1000);
+	printk("Reboot failed -- System halted\n");
+	while (1);
+}
+
+EXPORT_SYMBOL(machine_restart);
+
+void show_regs(struct pt_regs * regs)
+{
+	unsigned long flags;
+
+	flags = condition_codes(regs);
+
+	printk("pc : [<%08lx>]    lr : [<%08lx>]    %s\n"
+	       "sp : %08lx  ip : %08lx  fp : %08lx\n",
+		instruction_pointer(regs),
+		regs->ARM_lr, print_tainted(), regs->ARM_sp,
+		regs->ARM_ip, regs->ARM_fp);
+	printk("r10: %08lx  r9 : %08lx  r8 : %08lx\n",
+		regs->ARM_r10, regs->ARM_r9,
+		regs->ARM_r8);
+	printk("r7 : %08lx  r6 : %08lx  r5 : %08lx  r4 : %08lx\n",
+		regs->ARM_r7, regs->ARM_r6,
+		regs->ARM_r5, regs->ARM_r4);
+	printk("r3 : %08lx  r2 : %08lx  r1 : %08lx  r0 : %08lx\n",
+		regs->ARM_r3, regs->ARM_r2,
+		regs->ARM_r1, regs->ARM_r0);
+	printk("Flags: %c%c%c%c",
+		flags & PSR_N_BIT ? 'N' : 'n',
+		flags & PSR_Z_BIT ? 'Z' : 'z',
+		flags & PSR_C_BIT ? 'C' : 'c',
+		flags & PSR_V_BIT ? 'V' : 'v');
+	printk("  IRQs o%s  FIQs o%s  Mode %s  Segment %s\n",
+		interrupts_enabled(regs) ? "n" : "ff",
+		fast_interrupts_enabled(regs) ? "n" : "ff",
+		processor_modes[processor_mode(regs)],
+		get_fs() == get_ds() ? "kernel" : "user");
+}
+
+void show_fpregs(struct user_fp *regs)
+{
+	int i;
+
+	for (i = 0; i < 8; i++) {
+		unsigned long *p;
+		char type;
+
+		p = (unsigned long *)(regs->fpregs + i);
+
+		switch (regs->ftype[i]) {
+			case 1: type = 'f'; break;
+			case 2: type = 'd'; break;
+			case 3: type = 'e'; break;
+			default: type = '?'; break;
+		}
+		if (regs->init_flag)
+			type = '?';
+
+		printk("  f%d(%c): %08lx %08lx %08lx%c",
+			i, type, p[0], p[1], p[2], i & 1 ? '\n' : ' ');
+	}
+			
+
+	printk("FPSR: %08lx FPCR: %08lx\n",
+		(unsigned long)regs->fpsr,
+		(unsigned long)regs->fpcr);
+}
+
+/*
+ * Task structure and kernel stack allocation.
+ */
+static unsigned long *thread_info_head;
+static unsigned int nr_thread_info;
+
+extern unsigned long get_page_8k(int priority);
+extern void free_page_8k(unsigned long page);
+
+// FIXME - is this valid?
+#define EXTRA_TASK_STRUCT	0
+#define ll_alloc_task_struct()	((struct thread_info *)get_page_8k(GFP_KERNEL))
+#define ll_free_task_struct(p)  free_page_8k((unsigned long)(p))
+
+//FIXME - do we use *task param below looks like we dont, which is ok?
+//FIXME - if EXTRA_TASK_STRUCT is zero we can optimise the below away permanently. *IF* its supposed to be zero.
+struct thread_info *alloc_thread_info(struct task_struct *task)
+{
+	struct thread_info *thread = NULL;
+
+	if (EXTRA_TASK_STRUCT) {
+		unsigned long *p = thread_info_head;
+
+		if (p) {
+			thread_info_head = (unsigned long *)p[0];
+			nr_thread_info -= 1;
+		}
+		thread = (struct thread_info *)p;
+	}
+
+	if (!thread)
+		thread = ll_alloc_task_struct();
+
+#ifdef CONFIG_MAGIC_SYSRQ
+	/*
+	 * The stack must be cleared if you want SYSRQ-T to
+	 * give sensible stack usage information
+	 */
+	if (thread) {
+		char *p = (char *)thread;
+		memzero(p+KERNEL_STACK_SIZE, KERNEL_STACK_SIZE);
+	}
+#endif
+	return thread;
+}
+
+void free_thread_info(struct thread_info *thread)
+{
+	if (EXTRA_TASK_STRUCT && nr_thread_info < EXTRA_TASK_STRUCT) {
+		unsigned long *p = (unsigned long *)thread;
+		p[0] = (unsigned long)thread_info_head;
+		thread_info_head = p;
+		nr_thread_info += 1;
+	} else
+		ll_free_task_struct(thread);
+}
+
+/*
+ * Free current thread data structures etc..
+ */
+void exit_thread(void)
+{
+}
+
+void flush_thread(void)
+{
+	struct thread_info *thread = current_thread_info();
+	struct task_struct *tsk = current;
+
+	memset(&tsk->thread.debug, 0, sizeof(struct debug_info));
+	memset(&thread->fpstate, 0, sizeof(union fp_state));
+
+	clear_used_math();
+}
+
+void release_thread(struct task_struct *dead_task)
+{
+}
+
+asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
+
+int
+copy_thread(int nr, unsigned long clone_flags, unsigned long stack_start,
+	    unsigned long unused, struct task_struct *p, struct pt_regs *regs)
+{
+	struct thread_info *thread = p->thread_info;
+	struct pt_regs *childregs;
+
+	childregs = __get_user_regs(thread);
+	*childregs = *regs;
+	childregs->ARM_r0 = 0;
+	childregs->ARM_sp = stack_start;
+
+	memset(&thread->cpu_context, 0, sizeof(struct cpu_context_save));
+	thread->cpu_context.sp = (unsigned long)childregs;
+	thread->cpu_context.pc = (unsigned long)ret_from_fork | MODE_SVC26 | PSR_I_BIT;
+
+	return 0;
+}
+
+/*
+ * fill in the fpe structure for a core dump...
+ */
+int dump_fpu (struct pt_regs *regs, struct user_fp *fp)
+{
+	struct thread_info *thread = current_thread_info();
+	int used_math = !!used_math();
+
+	if (used_math)
+		memcpy(fp, &thread->fpstate.soft, sizeof (*fp));
+
+	return used_math;
+}
+
+/*
+ * fill in the user structure for a core dump..
+ */
+void dump_thread(struct pt_regs * regs, struct user * dump)
+{
+	struct task_struct *tsk = current;
+
+	dump->magic = CMAGIC;
+	dump->start_code = tsk->mm->start_code;
+	dump->start_stack = regs->ARM_sp & ~(PAGE_SIZE - 1);
+
+	dump->u_tsize = (tsk->mm->end_code - tsk->mm->start_code) >> PAGE_SHIFT;
+	dump->u_dsize = (tsk->mm->brk - tsk->mm->start_data + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	dump->u_ssize = 0;
+
+	dump->u_debugreg[0] = tsk->thread.debug.bp[0].address;
+	dump->u_debugreg[1] = tsk->thread.debug.bp[1].address;
+	dump->u_debugreg[2] = tsk->thread.debug.bp[0].insn;
+	dump->u_debugreg[3] = tsk->thread.debug.bp[1].insn;
+	dump->u_debugreg[4] = tsk->thread.debug.nsaved;
+
+	if (dump->start_stack < 0x04000000)
+		dump->u_ssize = (0x04000000 - dump->start_stack) >> PAGE_SHIFT;
+
+	dump->regs = *regs;
+	dump->u_fpvalid = dump_fpu (regs, &dump->u_fp);
+}
+
+/*
+ * Shuffle the argument into the correct register before calling the
+ * thread function.  r1 is the thread argument, r2 is the pointer to
+ * the thread function, and r3 points to the exit function.
+ * FIXME - make sure this is right - the older code used to zero fp
+ * and cause the parent to call sys_exit (do_exit in this version)
+ */
+extern void kernel_thread_helper(void);
+
+asm(    ".section .text\n"
+"       .align\n"
+"       .type   kernel_thread_helper, #function\n"
+"kernel_thread_helper:\n"
+"       mov     r0, r1\n"
+"       mov     lr, r3\n"
+"       mov     pc, r2\n"
+"       .size   kernel_thread_helper, . - kernel_thread_helper\n"
+"       .previous");
+
+/*
+ * Create a kernel thread.
+ */
+pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
+{
+        struct pt_regs regs;
+
+        memset(&regs, 0, sizeof(regs));
+
+        regs.ARM_r1 = (unsigned long)arg;
+        regs.ARM_r2 = (unsigned long)fn;
+        regs.ARM_r3 = (unsigned long)do_exit;
+        regs.ARM_pc = (unsigned long)kernel_thread_helper | MODE_SVC26;
+
+        return do_fork(flags|CLONE_VM|CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
+}
+EXPORT_SYMBOL(kernel_thread);
+
+
+unsigned long get_wchan(struct task_struct *p)
+{
+	unsigned long fp, lr;
+	unsigned long stack_page;
+	int count = 0;
+	if (!p || p == current || p->state == TASK_RUNNING)
+		return 0;
+
+	stack_page = 4096 + (unsigned long)p;
+	fp = thread_saved_fp(p);
+	do {
+		if (fp < stack_page || fp > 4092+stack_page)
+			return 0;
+		lr = pc_pointer (((unsigned long *)fp)[-1]);
+		if (!in_sched_functions(lr))
+			return lr;
+		fp = *(unsigned long *) (fp - 12);
+	} while (count ++ < 16);
+	return 0;
+}
diff --git a/arch/arm26/kernel/ptrace.c b/arch/arm26/kernel/ptrace.c
new file mode 100644
index 00000000000..2a137146a77
--- /dev/null
+++ b/arch/arm26/kernel/ptrace.c
@@ -0,0 +1,744 @@
+/*
+ *  linux/arch/arm26/kernel/ptrace.c
+ *
+ *  By Ross Biro 1/23/92
+ * edited by Linus Torvalds
+ * ARM modifications Copyright (C) 2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+#include <linux/security.h>
+
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/system.h>
+//#include <asm/processor.h>
+
+#include "ptrace.h"
+
+#define REG_PC	15
+#define REG_PSR 15
+/*
+ * does not yet catch signals sent when the child dies.
+ * in exit.c or in signal.c.
+ */
+
+/*
+ * Breakpoint SWI instruction: SWI &9F0001
+ */
+#define BREAKINST_ARM	0xef9f0001
+
+/*
+ * Get the address of the live pt_regs for the specified task.
+ * These are saved onto the top kernel stack when the process
+ * is not running.
+ *
+ * Note: if a user thread is execve'd from kernel space, the
+ * kernel stack will not be empty on entry to the kernel, so
+ * ptracing these tasks will fail.
+ */
+static inline struct pt_regs *
+get_user_regs(struct task_struct *task)
+{
+	return __get_user_regs(task->thread_info);
+}
+
+/*
+ * this routine will get a word off of the processes privileged stack.
+ * the offset is how far from the base addr as stored in the THREAD.
+ * this routine assumes that all the privileged stacks are in our
+ * data space.
+ */
+static inline long get_user_reg(struct task_struct *task, int offset)
+{
+	return get_user_regs(task)->uregs[offset];
+}
+
+/*
+ * this routine will put a word on the processes privileged stack.
+ * the offset is how far from the base addr as stored in the THREAD.
+ * this routine assumes that all the privileged stacks are in our
+ * data space.
+ */
+static inline int
+put_user_reg(struct task_struct *task, int offset, long data)
+{
+	struct pt_regs newregs, *regs = get_user_regs(task);
+	int ret = -EINVAL;
+
+	newregs = *regs;
+	newregs.uregs[offset] = data;
+
+	if (valid_user_regs(&newregs)) {
+		regs->uregs[offset] = data;
+		ret = 0;
+	}
+
+	return ret;
+}
+
+static inline int
+read_u32(struct task_struct *task, unsigned long addr, u32 *res)
+{
+	int ret;
+
+	ret = access_process_vm(task, addr, res, sizeof(*res), 0);
+
+	return ret == sizeof(*res) ? 0 : -EIO;
+}
+
+static inline int
+read_instr(struct task_struct *task, unsigned long addr, u32 *res)
+{
+	int ret;
+	u32 val;
+	ret = access_process_vm(task, addr & ~3, &val, sizeof(val), 0);
+	ret = ret == sizeof(val) ? 0 : -EIO;
+	*res = val;
+	return ret;
+}
+
+/*
+ * Get value of register `rn' (in the instruction)
+ */
+static unsigned long
+ptrace_getrn(struct task_struct *child, unsigned long insn)
+{
+	unsigned int reg = (insn >> 16) & 15;
+	unsigned long val;
+
+	val = get_user_reg(child, reg);
+	if (reg == 15)
+		val = pc_pointer(val + 8); //FIXME - correct for arm26?
+
+	return val;
+}
+
+/*
+ * Get value of operand 2 (in an ALU instruction)
+ */
+static unsigned long
+ptrace_getaluop2(struct task_struct *child, unsigned long insn)
+{
+	unsigned long val;
+	int shift;
+	int type;
+
+	if (insn & 1 << 25) {
+		val = insn & 255;
+		shift = (insn >> 8) & 15;
+		type = 3;
+	} else {
+		val = get_user_reg (child, insn & 15);
+
+		if (insn & (1 << 4))
+			shift = (int)get_user_reg (child, (insn >> 8) & 15);
+		else
+			shift = (insn >> 7) & 31;
+
+		type = (insn >> 5) & 3;
+	}
+
+	switch (type) {
+	case 0:	val <<= shift;	break;
+	case 1:	val >>= shift;	break;
+	case 2:
+		val = (((signed long)val) >> shift);
+		break;
+	case 3:
+ 		val = (val >> shift) | (val << (32 - shift));
+		break;
+	}
+	return val;
+}
+
+/*
+ * Get value of operand 2 (in a LDR instruction)
+ */
+static unsigned long
+ptrace_getldrop2(struct task_struct *child, unsigned long insn)
+{
+	unsigned long val;
+	int shift;
+	int type;
+
+	val = get_user_reg(child, insn & 15);
+	shift = (insn >> 7) & 31;
+	type = (insn >> 5) & 3;
+
+	switch (type) {
+	case 0:	val <<= shift;	break;
+	case 1:	val >>= shift;	break;
+	case 2:
+		val = (((signed long)val) >> shift);
+		break;
+	case 3:
+ 		val = (val >> shift) | (val << (32 - shift));
+		break;
+	}
+	return val;
+}
+
+#define OP_MASK	0x01e00000
+#define OP_AND	0x00000000
+#define OP_EOR	0x00200000
+#define OP_SUB	0x00400000
+#define OP_RSB	0x00600000
+#define OP_ADD	0x00800000
+#define OP_ADC	0x00a00000
+#define OP_SBC	0x00c00000
+#define OP_RSC	0x00e00000
+#define OP_ORR	0x01800000
+#define OP_MOV	0x01a00000
+#define OP_BIC	0x01c00000
+#define OP_MVN	0x01e00000
+
+static unsigned long
+get_branch_address(struct task_struct *child, unsigned long pc, unsigned long insn)
+{
+	u32 alt = 0;
+
+	switch (insn & 0x0e000000) {
+	case 0x00000000:
+	case 0x02000000: {
+		/*
+		 * data processing
+		 */
+		long aluop1, aluop2, ccbit;
+
+		if ((insn & 0xf000) != 0xf000)
+			break;
+
+		aluop1 = ptrace_getrn(child, insn);
+		aluop2 = ptrace_getaluop2(child, insn);
+		ccbit  = get_user_reg(child, REG_PSR) & PSR_C_BIT ? 1 : 0;
+
+		switch (insn & OP_MASK) {
+		case OP_AND: alt = aluop1 & aluop2;		break;
+		case OP_EOR: alt = aluop1 ^ aluop2;		break;
+		case OP_SUB: alt = aluop1 - aluop2;		break;
+		case OP_RSB: alt = aluop2 - aluop1;		break;
+		case OP_ADD: alt = aluop1 + aluop2;		break;
+		case OP_ADC: alt = aluop1 + aluop2 + ccbit;	break;
+		case OP_SBC: alt = aluop1 - aluop2 + ccbit;	break;
+		case OP_RSC: alt = aluop2 - aluop1 + ccbit;	break;
+		case OP_ORR: alt = aluop1 | aluop2;		break;
+		case OP_MOV: alt = aluop2;			break;
+		case OP_BIC: alt = aluop1 & ~aluop2;		break;
+		case OP_MVN: alt = ~aluop2;			break;
+		}
+		break;
+	}
+
+	case 0x04000000:
+	case 0x06000000:
+		/*
+		 * ldr
+		 */
+		if ((insn & 0x0010f000) == 0x0010f000) {
+			unsigned long base;
+
+			base = ptrace_getrn(child, insn);
+			if (insn & 1 << 24) {
+				long aluop2;
+
+				if (insn & 0x02000000)
+					aluop2 = ptrace_getldrop2(child, insn);
+				else
+					aluop2 = insn & 0xfff;
+
+				if (insn & 1 << 23)
+					base += aluop2;
+				else
+					base -= aluop2;
+			}
+			if (read_u32(child, base, &alt) == 0)
+				alt = pc_pointer(alt);
+		}
+		break;
+
+	case 0x08000000:
+		/*
+		 * ldm
+		 */
+		if ((insn & 0x00108000) == 0x00108000) {
+			unsigned long base;
+			unsigned int nr_regs;
+
+			if (insn & (1 << 23)) {
+				nr_regs = hweight16(insn & 65535) << 2;
+
+				if (!(insn & (1 << 24)))
+					nr_regs -= 4;
+			} else {
+				if (insn & (1 << 24))
+					nr_regs = -4;
+				else
+					nr_regs = 0;
+			}
+
+			base = ptrace_getrn(child, insn);
+
+			if (read_u32(child, base + nr_regs, &alt) == 0)
+				alt = pc_pointer(alt);
+			break;
+		}
+		break;
+
+	case 0x0a000000: {
+		/*
+		 * bl or b
+		 */
+		signed long displ;
+		/* It's a branch/branch link: instead of trying to
+		 * figure out whether the branch will be taken or not,
+		 * we'll put a breakpoint at both locations.  This is
+		 * simpler, more reliable, and probably not a whole lot
+		 * slower than the alternative approach of emulating the
+		 * branch.
+		 */
+		displ = (insn & 0x00ffffff) << 8;
+		displ = (displ >> 6) + 8;
+		if (displ != 0 && displ != 4)
+			alt = pc + displ;
+	    }
+	    break;
+	}
+
+	return alt;
+}
+
+static int
+swap_insn(struct task_struct *task, unsigned long addr,
+	  void *old_insn, void *new_insn, int size)
+{
+	int ret;
+
+	ret = access_process_vm(task, addr, old_insn, size, 0);
+	if (ret == size)
+		ret = access_process_vm(task, addr, new_insn, size, 1);
+	return ret;
+}
+
+static void
+add_breakpoint(struct task_struct *task, struct debug_info *dbg, unsigned long addr)
+{
+	int nr = dbg->nsaved;
+
+	if (nr < 2) {
+		u32 new_insn = BREAKINST_ARM;
+		int res;
+
+		res = swap_insn(task, addr, &dbg->bp[nr].insn, &new_insn, 4);
+
+		if (res == 4) {
+			dbg->bp[nr].address = addr;
+			dbg->nsaved += 1;
+		}
+	} else
+		printk(KERN_ERR "ptrace: too many breakpoints\n");
+}
+
+/*
+ * Clear one breakpoint in the user program.  We copy what the hardware
+ * does and use bit 0 of the address to indicate whether this is a Thumb
+ * breakpoint or an ARM breakpoint.
+ */
+static void clear_breakpoint(struct task_struct *task, struct debug_entry *bp)
+{
+	unsigned long addr = bp->address;
+	u32 old_insn;
+	int ret;
+
+	ret = swap_insn(task, addr & ~3, &old_insn,
+			&bp->insn, 4);
+
+	if (ret != 4 || old_insn != BREAKINST_ARM)
+		printk(KERN_ERR "%s:%d: corrupted ARM breakpoint at "
+			"0x%08lx (0x%08x)\n", task->comm, task->pid,
+			addr, old_insn);
+}
+
+void ptrace_set_bpt(struct task_struct *child)
+{
+	struct pt_regs *regs;
+	unsigned long pc;
+	u32 insn;
+	int res;
+
+	regs = get_user_regs(child);
+	pc = instruction_pointer(regs);
+
+	res = read_instr(child, pc, &insn);
+	if (!res) {
+		struct debug_info *dbg = &child->thread.debug;
+		unsigned long alt;
+
+		dbg->nsaved = 0;
+
+		alt = get_branch_address(child, pc, insn);
+		if (alt)
+			add_breakpoint(child, dbg, alt);
+
+		/*
+		 * Note that we ignore the result of setting the above
+		 * breakpoint since it may fail.  When it does, this is
+		 * not so much an error, but a forewarning that we may
+		 * be receiving a prefetch abort shortly.
+		 *
+		 * If we don't set this breakpoint here, then we can
+		 * lose control of the thread during single stepping.
+		 */
+		if (!alt || predicate(insn) != PREDICATE_ALWAYS)
+			add_breakpoint(child, dbg, pc + 4);
+	}
+}
+
+/*
+ * Ensure no single-step breakpoint is pending.  Returns non-zero
+ * value if child was being single-stepped.
+ */
+void ptrace_cancel_bpt(struct task_struct *child)
+{
+	int i, nsaved = child->thread.debug.nsaved;
+
+	child->thread.debug.nsaved = 0;
+
+	if (nsaved > 2) {
+		printk("ptrace_cancel_bpt: bogus nsaved: %d!\n", nsaved);
+		nsaved = 2;
+	}
+
+	for (i = 0; i < nsaved; i++)
+		clear_breakpoint(child, &child->thread.debug.bp[i]);
+}
+
+/*
+ * Called by kernel/ptrace.c when detaching..
+ *
+ * Make sure the single step bit is not set.
+ */
+void ptrace_disable(struct task_struct *child)
+{
+	child->ptrace &= ~PT_SINGLESTEP;
+	ptrace_cancel_bpt(child);
+}
+
+/*
+ * Handle hitting a breakpoint.
+ */
+void ptrace_break(struct task_struct *tsk, struct pt_regs *regs)
+{
+	siginfo_t info;
+
+	/*
+	 * The PC is always left pointing at the next instruction.  Fix this.
+	 */
+	regs->ARM_pc -= 4;
+
+	if (tsk->thread.debug.nsaved == 0)
+		printk(KERN_ERR "ptrace: bogus breakpoint trap\n");
+
+	ptrace_cancel_bpt(tsk);
+
+	info.si_signo = SIGTRAP;
+	info.si_errno = 0;
+	info.si_code  = TRAP_BRKPT;
+	info.si_addr  = (void *)instruction_pointer(regs) - 4;
+
+	force_sig_info(SIGTRAP, &info, tsk);
+}
+
+/*
+ * Read the word at offset "off" into the "struct user".  We
+ * actually access the pt_regs stored on the kernel stack.
+ */
+static int ptrace_read_user(struct task_struct *tsk, unsigned long off,
+			    unsigned long *ret)
+{
+	unsigned long tmp;
+
+	if (off & 3 || off >= sizeof(struct user))
+		return -EIO;
+
+	tmp = 0;
+	if (off < sizeof(struct pt_regs))
+		tmp = get_user_reg(tsk, off >> 2);
+
+	return put_user(tmp, ret);
+}
+
+/*
+ * Write the word at offset "off" into "struct user".  We
+ * actually access the pt_regs stored on the kernel stack.
+ */
+static int ptrace_write_user(struct task_struct *tsk, unsigned long off,
+			     unsigned long val)
+{
+	if (off & 3 || off >= sizeof(struct user))
+		return -EIO;
+
+	if (off >= sizeof(struct pt_regs))
+		return 0;
+
+	return put_user_reg(tsk, off >> 2, val);
+}
+
+/*
+ * Get all user integer registers.
+ */
+static int ptrace_getregs(struct task_struct *tsk, void *uregs)
+{
+	struct pt_regs *regs = get_user_regs(tsk);
+
+	return copy_to_user(uregs, regs, sizeof(struct pt_regs)) ? -EFAULT : 0;
+}
+
+/*
+ * Set all user integer registers.
+ */
+static int ptrace_setregs(struct task_struct *tsk, void *uregs)
+{
+	struct pt_regs newregs;
+	int ret;
+
+	ret = -EFAULT;
+	if (copy_from_user(&newregs, uregs, sizeof(struct pt_regs)) == 0) {
+		struct pt_regs *regs = get_user_regs(tsk);
+
+		ret = -EINVAL;
+		if (valid_user_regs(&newregs)) {
+			*regs = newregs;
+			ret = 0;
+		}
+	}
+
+	return ret;
+}
+
+/*
+ * Get the child FPU state.
+ */
+static int ptrace_getfpregs(struct task_struct *tsk, void *ufp)
+{
+	return copy_to_user(ufp, &tsk->thread_info->fpstate,
+			    sizeof(struct user_fp)) ? -EFAULT : 0;
+}
+
+/*
+ * Set the child FPU state.
+ */
+static int ptrace_setfpregs(struct task_struct *tsk, void *ufp)
+{
+	set_stopped_child_used_math(tsk);
+	return copy_from_user(&tsk->thread_info->fpstate, ufp,
+			      sizeof(struct user_fp)) ? -EFAULT : 0;
+}
+
+static int do_ptrace(int request, struct task_struct *child, long addr, long data)
+{
+	unsigned long tmp;
+	int ret;
+
+	switch (request) {
+		/*
+		 * read word at location "addr" in the child process.
+		 */
+		case PTRACE_PEEKTEXT:
+		case PTRACE_PEEKDATA:
+			ret = access_process_vm(child, addr, &tmp,
+						sizeof(unsigned long), 0);
+			if (ret == sizeof(unsigned long))
+				ret = put_user(tmp, (unsigned long *) data);
+			else
+				ret = -EIO;
+			break;
+
+		case PTRACE_PEEKUSR:
+			ret = ptrace_read_user(child, addr, (unsigned long *)data);
+			break;
+
+		/*
+		 * write the word at location addr.
+		 */
+		case PTRACE_POKETEXT:
+		case PTRACE_POKEDATA:
+			ret = access_process_vm(child, addr, &data,
+						sizeof(unsigned long), 1);
+			if (ret == sizeof(unsigned long))
+				ret = 0;
+			else
+				ret = -EIO;
+			break;
+
+		case PTRACE_POKEUSR:
+			ret = ptrace_write_user(child, addr, data);
+			break;
+
+		/*
+		 * continue/restart and stop at next (return from) syscall
+		 */
+		case PTRACE_SYSCALL:
+		case PTRACE_CONT:
+			ret = -EIO;
+			if ((unsigned long) data > _NSIG)
+				break;
+			if (request == PTRACE_SYSCALL)
+				set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+			else
+				clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+			child->exit_code = data;
+			/* make sure single-step breakpoint is gone. */
+			child->ptrace &= ~PT_SINGLESTEP;
+			ptrace_cancel_bpt(child);
+			wake_up_process(child);
+			ret = 0;
+			break;
+
+		/*
+		 * make the child exit.  Best I can do is send it a sigkill.
+		 * perhaps it should be put in the status that it wants to
+		 * exit.
+		 */
+		case PTRACE_KILL:
+			/* make sure single-step breakpoint is gone. */
+			child->ptrace &= ~PT_SINGLESTEP;
+			ptrace_cancel_bpt(child);
+			if (child->exit_state != EXIT_ZOMBIE) {
+				child->exit_code = SIGKILL;
+				wake_up_process(child);
+			}
+			ret = 0;
+			break;
+
+		/*
+		 * execute single instruction.
+		 */
+		case PTRACE_SINGLESTEP:
+			ret = -EIO;
+			if ((unsigned long) data > _NSIG)
+				break;
+			child->ptrace |= PT_SINGLESTEP;
+			clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+			child->exit_code = data;
+			/* give it a chance to run. */
+			wake_up_process(child);
+			ret = 0;
+			break;
+
+		case PTRACE_DETACH:
+			ret = ptrace_detach(child, data);
+			break;
+
+		case PTRACE_GETREGS:
+			ret = ptrace_getregs(child, (void *)data);
+			break;
+
+		case PTRACE_SETREGS:
+			ret = ptrace_setregs(child, (void *)data);
+			break;
+
+		case PTRACE_GETFPREGS:
+			ret = ptrace_getfpregs(child, (void *)data);
+			break;
+		
+		case PTRACE_SETFPREGS:
+			ret = ptrace_setfpregs(child, (void *)data);
+			break;
+
+		default:
+			ret = ptrace_request(child, request, addr, data);
+			break;
+	}
+
+	return ret;
+}
+
+asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
+{
+	struct task_struct *child;
+	int ret;
+
+	lock_kernel();
+	ret = -EPERM;
+	if (request == PTRACE_TRACEME) {
+		/* are we already being traced? */
+		if (current->ptrace & PT_PTRACED)
+			goto out;
+		ret = security_ptrace(current->parent, current);
+		if (ret)
+			goto out;
+		/* set the ptrace bit in the process flags. */
+		current->ptrace |= PT_PTRACED;
+		ret = 0;
+		goto out;
+	}
+	ret = -ESRCH;
+	read_lock(&tasklist_lock);
+	child = find_task_by_pid(pid);
+	if (child)
+		get_task_struct(child);
+	read_unlock(&tasklist_lock);
+	if (!child)
+		goto out;
+
+	ret = -EPERM;
+	if (pid == 1)		/* you may not mess with init */
+		goto out_tsk;
+
+	if (request == PTRACE_ATTACH) {
+		ret = ptrace_attach(child);
+		goto out_tsk;
+	}
+	ret = ptrace_check_attach(child, request == PTRACE_KILL);
+	if (ret == 0)
+		ret = do_ptrace(request, child, addr, data);
+
+out_tsk:
+	put_task_struct(child);
+out:
+	unlock_kernel();
+	return ret;
+}
+
+asmlinkage void syscall_trace(int why, struct pt_regs *regs)
+{
+	unsigned long ip;
+
+	if (!test_thread_flag(TIF_SYSCALL_TRACE))
+		return;
+	if (!(current->ptrace & PT_PTRACED))
+		return;
+
+	/*
+	 * Save IP.  IP is used to denote syscall entry/exit:
+	 *  IP = 0 -> entry, = 1 -> exit
+	 */
+	ip = regs->ARM_ip;
+	regs->ARM_ip = why;
+
+	/* the 0x80 provides a way for the tracing parent to distinguish
+	   between a syscall stop and SIGTRAP delivery */
+	ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
+				 ? 0x80 : 0));
+	/*
+	 * this isn't the same as continuing with a signal, but it will do
+	 * for normal use.  strace only continues with a signal if the
+	 * stopping signal is not SIGTRAP.  -brl
+	 */
+	if (current->exit_code) {
+		send_sig(current->exit_code, current, 1);
+		current->exit_code = 0;
+	}
+	regs->ARM_ip = ip;
+}
diff --git a/arch/arm26/kernel/ptrace.h b/arch/arm26/kernel/ptrace.h
new file mode 100644
index 00000000000..846c9d8d36e
--- /dev/null
+++ b/arch/arm26/kernel/ptrace.h
@@ -0,0 +1,13 @@
+/*
+ *  linux/arch/arm26/kernel/ptrace.h
+ *
+ *  Copyright (C) 2000-2003 Russell King
+ *  Copyright (C) 2003 Ian Molton
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+extern void ptrace_cancel_bpt(struct task_struct *);
+extern void ptrace_set_bpt(struct task_struct *);
+extern void ptrace_break(struct task_struct *, struct pt_regs *);
diff --git a/arch/arm26/kernel/semaphore.c b/arch/arm26/kernel/semaphore.c
new file mode 100644
index 00000000000..3023a53431f
--- /dev/null
+++ b/arch/arm26/kernel/semaphore.c
@@ -0,0 +1,223 @@
+/*
+ *  ARM semaphore implementation, taken from
+ *
+ *  i386 semaphore implementation.
+ *
+ *  (C) Copyright 1999 Linus Torvalds
+ *  (C) Copyright 2003 Ian Molton (ARM26 mods)
+ *
+ *  Modified for ARM by Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+
+#include <asm/semaphore.h>
+
+/*
+ * Semaphores are implemented using a two-way counter:
+ * The "count" variable is decremented for each process
+ * that tries to acquire the semaphore, while the "sleeping"
+ * variable is a count of such acquires.
+ *
+ * Notably, the inline "up()" and "down()" functions can
+ * efficiently test if they need to do any extra work (up
+ * needs to do something only if count was negative before
+ * the increment operation.
+ *
+ * "sleeping" and the contention routine ordering is
+ * protected by the semaphore spinlock.
+ *
+ * Note that these functions are only called when there is
+ * contention on the lock, and as such all this is the
+ * "non-critical" part of the whole semaphore business. The
+ * critical part is the inline stuff in <asm/semaphore.h>
+ * where we want to avoid any extra jumps and calls.
+ */
+
+/*
+ * Logic:
+ *  - only on a boundary condition do we need to care. When we go
+ *    from a negative count to a non-negative, we wake people up.
+ *  - when we go from a non-negative count to a negative do we
+ *    (a) synchronize with the "sleeper" count and (b) make sure
+ *    that we're on the wakeup list before we synchronize so that
+ *    we cannot lose wakeup events.
+ */
+
+void __up(struct semaphore *sem)
+{
+	wake_up(&sem->wait);
+}
+
+static DEFINE_SPINLOCK(semaphore_lock);
+
+void __sched __down(struct semaphore * sem)
+{
+	struct task_struct *tsk = current;
+	DECLARE_WAITQUEUE(wait, tsk);
+	tsk->state = TASK_UNINTERRUPTIBLE;
+	add_wait_queue_exclusive(&sem->wait, &wait);
+
+	spin_lock_irq(&semaphore_lock);
+	sem->sleepers++;
+	for (;;) {
+		int sleepers = sem->sleepers;
+
+		/*
+		 * Add "everybody else" into it. They aren't
+		 * playing, because we own the spinlock.
+		 */
+		if (!atomic_add_negative(sleepers - 1, &sem->count)) {
+			sem->sleepers = 0;
+			break;
+		}
+		sem->sleepers = 1;	/* us - see -1 above */
+		spin_unlock_irq(&semaphore_lock);
+
+		schedule();
+		tsk->state = TASK_UNINTERRUPTIBLE;
+		spin_lock_irq(&semaphore_lock);
+	}
+	spin_unlock_irq(&semaphore_lock);
+	remove_wait_queue(&sem->wait, &wait);
+	tsk->state = TASK_RUNNING;
+	wake_up(&sem->wait);
+}
+
+int __sched __down_interruptible(struct semaphore * sem)
+{
+	int retval = 0;
+	struct task_struct *tsk = current;
+	DECLARE_WAITQUEUE(wait, tsk);
+	tsk->state = TASK_INTERRUPTIBLE;
+	add_wait_queue_exclusive(&sem->wait, &wait);
+
+	spin_lock_irq(&semaphore_lock);
+	sem->sleepers ++;
+	for (;;) {
+		int sleepers = sem->sleepers;
+
+		/*
+		 * With signals pending, this turns into
+		 * the trylock failure case - we won't be
+		 * sleeping, and we* can't get the lock as
+		 * it has contention. Just correct the count
+		 * and exit.
+		 */
+		if (signal_pending(current)) {
+			retval = -EINTR;
+			sem->sleepers = 0;
+			atomic_add(sleepers, &sem->count);
+			break;
+		}
+
+		/*
+		 * Add "everybody else" into it. They aren't
+		 * playing, because we own the spinlock. The
+		 * "-1" is because we're still hoping to get
+		 * the lock.
+		 */
+		if (!atomic_add_negative(sleepers - 1, &sem->count)) {
+			sem->sleepers = 0;
+			break;
+		}
+		sem->sleepers = 1;	/* us - see -1 above */
+		spin_unlock_irq(&semaphore_lock);
+
+		schedule();
+		tsk->state = TASK_INTERRUPTIBLE;
+		spin_lock_irq(&semaphore_lock);
+	}
+	spin_unlock_irq(&semaphore_lock);
+	tsk->state = TASK_RUNNING;
+	remove_wait_queue(&sem->wait, &wait);
+	wake_up(&sem->wait);
+	return retval;
+}
+
+/*
+ * Trylock failed - make sure we correct for
+ * having decremented the count.
+ *
+ * We could have done the trylock with a
+ * single "cmpxchg" without failure cases,
+ * but then it wouldn't work on a 386.
+ */
+int __down_trylock(struct semaphore * sem)
+{
+	int sleepers;
+	unsigned long flags;
+
+	spin_lock_irqsave(&semaphore_lock, flags);
+	sleepers = sem->sleepers + 1;
+	sem->sleepers = 0;
+
+	/*
+	 * Add "everybody else" and us into it. They aren't
+	 * playing, because we own the spinlock.
+	 */
+	if (!atomic_add_negative(sleepers, &sem->count))
+		wake_up(&sem->wait);
+
+	spin_unlock_irqrestore(&semaphore_lock, flags);
+	return 1;
+}
+
+/*
+ * The semaphore operations have a special calling sequence that
+ * allow us to do a simpler in-line version of them. These routines
+ * need to convert that sequence back into the C sequence when
+ * there is contention on the semaphore.
+ *
+ * ip contains the semaphore pointer on entry. Save the C-clobbered
+ * registers (r0 to r3 and lr), but not ip, as we use it as a return
+ * value in some cases..
+ */
+asm("	.section .sched.text , #alloc, #execinstr	\n\
+	.align	5				\n\
+	.globl	__down_failed			\n\
+__down_failed:					\n\
+	stmfd	sp!, {r0 - r3, lr}		\n\
+	mov	r0, ip				\n\
+	bl	__down				\n\
+	ldmfd	sp!, {r0 - r3, pc}^		\n\
+						\n\
+	.align	5				\n\
+	.globl	__down_interruptible_failed	\n\
+__down_interruptible_failed:			\n\
+	stmfd	sp!, {r0 - r3, lr}		\n\
+	mov	r0, ip				\n\
+	bl	__down_interruptible		\n\
+	mov	ip, r0				\n\
+	ldmfd	sp!, {r0 - r3, pc}^		\n\
+						\n\
+	.align	5				\n\
+	.globl	__down_trylock_failed		\n\
+__down_trylock_failed:				\n\
+	stmfd	sp!, {r0 - r3, lr}		\n\
+	mov	r0, ip				\n\
+	bl	__down_trylock			\n\
+	mov	ip, r0				\n\
+	ldmfd	sp!, {r0 - r3, pc}^		\n\
+						\n\
+	.align	5				\n\
+	.globl	__up_wakeup			\n\
+__up_wakeup:					\n\
+	stmfd	sp!, {r0 - r3, lr}		\n\
+	mov	r0, ip				\n\
+	bl	__up				\n\
+	ldmfd	sp!, {r0 - r3, pc}^		\n\
+	");
+
+EXPORT_SYMBOL(__down_failed);
+EXPORT_SYMBOL(__down_interruptible_failed);
+EXPORT_SYMBOL(__down_trylock_failed);
+EXPORT_SYMBOL(__up_wakeup);
+
diff --git a/arch/arm26/kernel/setup.c b/arch/arm26/kernel/setup.c
new file mode 100644
index 00000000000..4eb329e3828
--- /dev/null
+++ b/arch/arm26/kernel/setup.c
@@ -0,0 +1,573 @@
+/*
+ *  linux/arch/arm26/kernel/setup.c
+ *
+ *  Copyright (C) 1995-2001 Russell King
+ *  Copyright (C) 2003 Ian Molton
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/stddef.h>
+#include <linux/ioport.h>
+#include <linux/delay.h>
+#include <linux/utsname.h>
+#include <linux/blkdev.h>
+#include <linux/console.h>
+#include <linux/bootmem.h>
+#include <linux/seq_file.h>
+#include <linux/tty.h>
+#include <linux/init.h>
+#include <linux/root_dev.h>
+
+#include <asm/elf.h>
+#include <asm/hardware.h>
+#include <asm/io.h>
+#include <asm/procinfo.h>
+#include <asm/setup.h>
+#include <asm/mach-types.h>
+#include <asm/tlbflush.h>
+
+#include <asm/irqchip.h>
+
+#ifndef MEM_SIZE
+#define MEM_SIZE	(16*1024*1024)
+#endif
+
+#ifdef CONFIG_PREEMPT
+DEFINE_SPINLOCK(kernel_flag);
+#endif
+
+#if defined(CONFIG_FPE_NWFPE)
+char fpe_type[8];
+
+static int __init fpe_setup(char *line)
+{
+	memcpy(fpe_type, line, 8);
+	return 1;
+}
+
+__setup("fpe=", fpe_setup);
+#endif
+
+extern void paging_init(struct meminfo *);
+extern void convert_to_tag_list(struct tag *tags);
+extern void squash_mem_tags(struct tag *tag);
+extern void bootmem_init(struct meminfo *);
+extern int root_mountflags;
+extern int _stext, _text, _etext, _edata, _end;
+#ifdef CONFIG_XIP_KERNEL
+extern int _endtext, _sdata;
+#endif
+
+
+unsigned int processor_id;
+unsigned int __machine_arch_type;
+unsigned int system_rev;
+unsigned int system_serial_low;
+unsigned int system_serial_high;
+unsigned int elf_hwcap;
+unsigned int memc_ctrl_reg;
+unsigned int number_mfm_drives;
+
+struct processor processor;
+
+char elf_platform[ELF_PLATFORM_SIZE];
+
+unsigned long phys_initrd_start __initdata = 0;
+unsigned long phys_initrd_size __initdata = 0;
+static struct meminfo meminfo __initdata = { 0, };
+static struct proc_info_item proc_info;
+static const char *machine_name;
+static char command_line[COMMAND_LINE_SIZE];
+
+static char default_command_line[COMMAND_LINE_SIZE] __initdata = CONFIG_CMDLINE;
+
+/*
+ * Standard memory resources
+ */
+static struct resource mem_res[] = {
+	{ "Video RAM",   0,     0,     IORESOURCE_MEM			},
+	{ "Kernel code", 0,     0,     IORESOURCE_MEM			},
+	{ "Kernel data", 0,     0,     IORESOURCE_MEM			}
+};
+
+#define video_ram   mem_res[0]
+#define kernel_code mem_res[1]
+#define kernel_data mem_res[2]
+
+static struct resource io_res[] = {
+	{ "reserved",    0x3bc, 0x3be, IORESOURCE_IO | IORESOURCE_BUSY },
+	{ "reserved",    0x378, 0x37f, IORESOURCE_IO | IORESOURCE_BUSY },
+	{ "reserved",    0x278, 0x27f, IORESOURCE_IO | IORESOURCE_BUSY }
+};
+
+#define lp0 io_res[0]
+#define lp1 io_res[1]
+#define lp2 io_res[2]
+
+#define dump_cpu_info() do { } while (0)
+
+static void __init setup_processor(void)
+{
+	extern struct proc_info_list __proc_info_begin, __proc_info_end;
+	struct proc_info_list *list;
+
+	/*
+	 * locate processor in the list of supported processor
+	 * types.  The linker builds this table for us from the
+	 * entries in arch/arm26/mm/proc-*.S
+	 */
+	for (list = &__proc_info_begin; list < &__proc_info_end ; list++)
+		if ((processor_id & list->cpu_mask) == list->cpu_val)
+			break;
+
+	/*
+	 * If processor type is unrecognised, then we
+	 * can do nothing...
+	 */
+	if (list >= &__proc_info_end) {
+		printk("CPU configuration botched (ID %08x), unable "
+		       "to continue.\n", processor_id);
+		while (1);
+	}
+
+	proc_info = *list->info;
+	processor = *list->proc;
+
+
+	printk("CPU: %s %s revision %d\n",
+	       proc_info.manufacturer, proc_info.cpu_name,
+	       (int)processor_id & 15);
+
+	dump_cpu_info();
+
+	sprintf(system_utsname.machine, "%s", list->arch_name);
+	sprintf(elf_platform, "%s", list->elf_name);
+	elf_hwcap = list->elf_hwcap;
+
+	cpu_proc_init();
+}
+
+/*
+ * Initial parsing of the command line.  We need to pick out the
+ * memory size.  We look for mem=size@start, where start and size
+ * are "size[KkMm]"
+ */
+static void __init
+parse_cmdline(struct meminfo *mi, char **cmdline_p, char *from)
+{
+	char c = ' ', *to = command_line;
+	int usermem = 0, len = 0;
+
+	for (;;) {
+		if (c == ' ' && !memcmp(from, "mem=", 4)) {
+			unsigned long size, start;
+
+			if (to != command_line)
+				to -= 1;
+
+			/*
+			 * If the user specifies memory size, we
+			 * blow away any automatically generated
+			 * size.
+			 */
+			if (usermem == 0) {
+				usermem = 1;
+				mi->nr_banks = 0;
+			}
+
+			start = PHYS_OFFSET;
+			size  = memparse(from + 4, &from);
+			if (*from == '@')
+				start = memparse(from + 1, &from);
+
+			mi->bank[mi->nr_banks].start = start;
+			mi->bank[mi->nr_banks].size  = size;
+			mi->bank[mi->nr_banks].node  = PHYS_TO_NID(start);
+			mi->nr_banks += 1;
+		}
+		c = *from++;
+		if (!c)
+			break;
+		if (COMMAND_LINE_SIZE <= ++len)
+			break;
+		*to++ = c;
+	}
+	*to = '\0';
+	*cmdline_p = command_line;
+}
+
+static void __init
+setup_ramdisk(int doload, int prompt, int image_start, unsigned int rd_sz)
+{
+#ifdef CONFIG_BLK_DEV_RAM
+	extern int rd_size, rd_image_start, rd_prompt, rd_doload;
+
+	rd_image_start = image_start;
+	rd_prompt = prompt;
+	rd_doload = doload;
+
+	if (rd_sz)
+		rd_size = rd_sz;
+#endif
+}
+
+static void __init
+request_standard_resources(struct meminfo *mi)
+{
+	struct resource *res;
+	int i;
+
+	kernel_code.start  = init_mm.start_code;
+	kernel_code.end    = init_mm.end_code - 1;
+#ifdef CONFIG_XIP_KERNEL
+	kernel_data.start  = init_mm.start_data;
+#else
+	kernel_data.start  = init_mm.end_code;
+#endif
+	kernel_data.end    = init_mm.brk - 1;
+
+	for (i = 0; i < mi->nr_banks; i++) {
+		unsigned long virt_start, virt_end;
+
+		if (mi->bank[i].size == 0)
+			continue;
+
+		virt_start = mi->bank[i].start;
+		virt_end   = virt_start + mi->bank[i].size - 1;
+
+		res = alloc_bootmem_low(sizeof(*res));
+		res->name  = "System RAM";
+		res->start = virt_start;
+		res->end   = virt_end;
+		res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+
+		request_resource(&iomem_resource, res);
+
+		if (kernel_code.start >= res->start &&
+		    kernel_code.end <= res->end)
+			request_resource(res, &kernel_code);
+		if (kernel_data.start >= res->start &&
+		    kernel_data.end <= res->end)
+			request_resource(res, &kernel_data);
+	}
+
+/*	FIXME - needed? if (mdesc->video_start) {
+		video_ram.start = mdesc->video_start;
+		video_ram.end   = mdesc->video_end;
+		request_resource(&iomem_resource, &video_ram);
+	}*/
+
+	/*
+	 * Some machines don't have the possibility of ever
+	 * possessing lp1 or lp2
+	 */
+	if (0)  /* FIXME - need to do this for A5k at least */
+		request_resource(&ioport_resource, &lp0);
+}
+
+/*
+ *  Tag parsing.
+ *
+ * This is the new way of passing data to the kernel at boot time.  Rather
+ * than passing a fixed inflexible structure to the kernel, we pass a list
+ * of variable-sized tags to the kernel.  The first tag must be a ATAG_CORE
+ * tag for the list to be recognised (to distinguish the tagged list from
+ * a param_struct).  The list is terminated with a zero-length tag (this tag
+ * is not parsed in any way).
+ */
+static int __init parse_tag_core(const struct tag *tag)
+{
+	if (tag->hdr.size > 2) {
+		if ((tag->u.core.flags & 1) == 0)
+			root_mountflags &= ~MS_RDONLY;
+		ROOT_DEV = old_decode_dev(tag->u.core.rootdev);
+	}
+	return 0;
+}
+
+__tagtable(ATAG_CORE, parse_tag_core);
+
+static int __init parse_tag_mem32(const struct tag *tag)
+{
+	if (meminfo.nr_banks >= NR_BANKS) {
+		printk(KERN_WARNING
+		       "Ignoring memory bank 0x%08x size %dKB\n",
+			tag->u.mem.start, tag->u.mem.size / 1024);
+		return -EINVAL;
+	}
+	meminfo.bank[meminfo.nr_banks].start = tag->u.mem.start;
+	meminfo.bank[meminfo.nr_banks].size  = tag->u.mem.size;
+	meminfo.bank[meminfo.nr_banks].node  = PHYS_TO_NID(tag->u.mem.start);
+	meminfo.nr_banks += 1;
+
+	return 0;
+}
+
+__tagtable(ATAG_MEM, parse_tag_mem32);
+
+#if defined(CONFIG_DUMMY_CONSOLE)
+struct screen_info screen_info = {
+ .orig_video_lines	= 30,
+ .orig_video_cols	= 80,
+ .orig_video_mode	= 0,
+ .orig_video_ega_bx	= 0,
+ .orig_video_isVGA	= 1,
+ .orig_video_points	= 8
+};
+
+static int __init parse_tag_videotext(const struct tag *tag)
+{
+	screen_info.orig_x            = tag->u.videotext.x;
+	screen_info.orig_y            = tag->u.videotext.y;
+	screen_info.orig_video_page   = tag->u.videotext.video_page;
+	screen_info.orig_video_mode   = tag->u.videotext.video_mode;
+	screen_info.orig_video_cols   = tag->u.videotext.video_cols;
+	screen_info.orig_video_ega_bx = tag->u.videotext.video_ega_bx;
+	screen_info.orig_video_lines  = tag->u.videotext.video_lines;
+	screen_info.orig_video_isVGA  = tag->u.videotext.video_isvga;
+	screen_info.orig_video_points = tag->u.videotext.video_points;
+	return 0;
+}
+
+__tagtable(ATAG_VIDEOTEXT, parse_tag_videotext);
+#endif
+
+static int __init parse_tag_acorn(const struct tag *tag)
+{
+        memc_ctrl_reg = tag->u.acorn.memc_control_reg;
+        number_mfm_drives = tag->u.acorn.adfsdrives;
+        return 0;
+}
+
+__tagtable(ATAG_ACORN, parse_tag_acorn);
+
+static int __init parse_tag_ramdisk(const struct tag *tag)
+{
+	setup_ramdisk((tag->u.ramdisk.flags & 1) == 0,
+		      (tag->u.ramdisk.flags & 2) == 0,
+		      tag->u.ramdisk.start, tag->u.ramdisk.size);
+	return 0;
+}
+
+__tagtable(ATAG_RAMDISK, parse_tag_ramdisk);
+
+static int __init parse_tag_initrd(const struct tag *tag)
+{
+	printk(KERN_WARNING "ATAG_INITRD is deprecated; please update your bootloader. \n");
+        phys_initrd_start = (unsigned long)tag->u.initrd.start;
+        phys_initrd_size = (unsigned long)tag->u.initrd.size;
+	return 0;
+}
+
+__tagtable(ATAG_INITRD, parse_tag_initrd);
+
+static int __init parse_tag_initrd2(const struct tag *tag)
+{
+	printk(KERN_WARNING "ATAG_INITRD is deprecated; please update your bootloader. \n");
+	phys_initrd_start = (unsigned long)tag->u.initrd.start;
+	phys_initrd_size = (unsigned long)tag->u.initrd.size;
+	return 0;
+}
+
+__tagtable(ATAG_INITRD2, parse_tag_initrd2);
+
+static int __init parse_tag_serialnr(const struct tag *tag)
+{
+	system_serial_low = tag->u.serialnr.low;
+	system_serial_high = tag->u.serialnr.high;
+	return 0;
+}
+
+__tagtable(ATAG_SERIAL, parse_tag_serialnr);
+
+static int __init parse_tag_revision(const struct tag *tag)
+{
+	system_rev = tag->u.revision.rev;
+	return 0;
+}
+
+__tagtable(ATAG_REVISION, parse_tag_revision);
+
+static int __init parse_tag_cmdline(const struct tag *tag)
+{
+	strncpy(default_command_line, tag->u.cmdline.cmdline, COMMAND_LINE_SIZE);
+	default_command_line[COMMAND_LINE_SIZE - 1] = '\0';
+	return 0;
+}
+
+__tagtable(ATAG_CMDLINE, parse_tag_cmdline);
+
+/*
+ * Scan the tag table for this tag, and call its parse function.
+ * The tag table is built by the linker from all the __tagtable
+ * declarations.
+ */
+static int __init parse_tag(const struct tag *tag)
+{
+	extern struct tagtable __tagtable_begin, __tagtable_end;
+	struct tagtable *t;
+
+	for (t = &__tagtable_begin; t < &__tagtable_end; t++)
+		if (tag->hdr.tag == t->tag) {
+			t->parse(tag);
+			break;
+		}
+
+	return t < &__tagtable_end;
+}
+
+/*
+ * Parse all tags in the list, checking both the global and architecture
+ * specific tag tables.
+ */
+static void __init parse_tags(const struct tag *t)
+{
+	for (; t->hdr.size; t = tag_next(t))
+		if (!parse_tag(t))
+			printk(KERN_WARNING
+				"Ignoring unrecognised tag 0x%08x\n",
+				t->hdr.tag);
+}
+
+/*
+ * This holds our defaults.
+ */
+static struct init_tags {
+	struct tag_header hdr1;
+	struct tag_core   core;
+	struct tag_header hdr2;
+	struct tag_mem32  mem;
+	struct tag_header hdr3;
+} init_tags __initdata = {
+	{ tag_size(tag_core), ATAG_CORE },
+	{ 1, PAGE_SIZE, 0xff },
+	{ tag_size(tag_mem32), ATAG_MEM },
+	{ MEM_SIZE, PHYS_OFFSET },
+	{ 0, ATAG_NONE }
+};
+
+void __init setup_arch(char **cmdline_p)
+{
+	struct tag *tags = (struct tag *)&init_tags;
+	char *from = default_command_line;
+
+	setup_processor();
+	if(machine_arch_type == MACH_TYPE_A5K)
+		machine_name = "A5000";
+	else if(machine_arch_type == MACH_TYPE_ARCHIMEDES)
+		machine_name = "Archimedes";
+	else
+		machine_name = "UNKNOWN";
+
+	//FIXME - the tag struct is always copied here but this is a block
+	// of RAM that is accidentally reserved along with video RAM. perhaps
+	// it would be a good idea to explicitly reserve this?
+
+	tags = (struct tag *)0x0207c000;
+
+	/*
+	 * If we have the old style parameters, convert them to
+	 * a tag list.
+	 */
+	if (tags->hdr.tag != ATAG_CORE)
+		convert_to_tag_list(tags);
+	if (tags->hdr.tag != ATAG_CORE)
+		tags = (struct tag *)&init_tags;
+	if (tags->hdr.tag == ATAG_CORE) {
+		if (meminfo.nr_banks != 0)
+			squash_mem_tags(tags);
+		parse_tags(tags);
+	}
+
+	init_mm.start_code = (unsigned long) &_text;
+#ifndef CONFIG_XIP_KERNEL
+	init_mm.end_code   = (unsigned long) &_etext;
+#else
+	init_mm.end_code   = (unsigned long) &_endtext;
+	init_mm.start_data   = (unsigned long) &_sdata;
+#endif
+	init_mm.end_data   = (unsigned long) &_edata;
+	init_mm.brk	   = (unsigned long) &_end;
+
+	memcpy(saved_command_line, from, COMMAND_LINE_SIZE);
+	saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
+	parse_cmdline(&meminfo, cmdline_p, from);
+	bootmem_init(&meminfo);
+	paging_init(&meminfo);
+	request_standard_resources(&meminfo);
+
+#ifdef CONFIG_VT
+#if defined(CONFIG_DUMMY_CONSOLE)
+	conswitchp = &dummy_con;
+#endif
+#endif
+}
+
+static const char *hwcap_str[] = {
+	"swp",
+	"half",
+	"thumb",
+	"26bit",
+	"fastmult",
+	"fpa",
+	"vfp",
+	"edsp",
+	NULL
+};
+
+static int c_show(struct seq_file *m, void *v)
+{
+	int i;
+
+	seq_printf(m, "Processor\t: %s %s rev %d (%s)\n",
+		   proc_info.manufacturer, proc_info.cpu_name,
+		   (int)processor_id & 15, elf_platform);
+
+	seq_printf(m, "BogoMIPS\t: %lu.%02lu\n",
+		   loops_per_jiffy / (500000/HZ),
+		   (loops_per_jiffy / (5000/HZ)) % 100);
+
+	/* dump out the processor features */
+	seq_puts(m, "Features\t: ");
+
+	for (i = 0; hwcap_str[i]; i++)
+		if (elf_hwcap & (1 << i))
+			seq_printf(m, "%s ", hwcap_str[i]);
+
+	seq_puts(m, "\n");
+
+	seq_printf(m, "CPU part\t\t: %07x\n", processor_id >> 4);
+	seq_printf(m, "CPU revision\t: %d\n\n", processor_id & 15);
+	seq_printf(m, "Hardware\t: %s\n", machine_name);
+	seq_printf(m, "Revision\t: %04x\n", system_rev);
+	seq_printf(m, "Serial\t\t: %08x%08x\n",
+		   system_serial_high, system_serial_low);
+
+	return 0;
+}
+
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+	return *pos < 1 ? (void *)1 : NULL;
+}
+
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	++*pos;
+	return NULL;
+}
+
+static void c_stop(struct seq_file *m, void *v)
+{
+}
+
+struct seq_operations cpuinfo_op = {
+	.start	= c_start,
+	.next	= c_next,
+	.stop	= c_stop,
+	.show	= c_show
+};
diff --git a/arch/arm26/kernel/signal.c b/arch/arm26/kernel/signal.c
new file mode 100644
index 00000000000..356d9809cc0
--- /dev/null
+++ b/arch/arm26/kernel/signal.c
@@ -0,0 +1,540 @@
+/*
+ *  linux/arch/arm26/kernel/signal.c
+ *
+ *  Copyright (C) 1995-2002 Russell King
+ *  Copyright (C) 2003 Ian Molton (ARM26)
+ *
+ * FIXME!!! This is probably very broken (13/05/2003)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/signal.h>
+#include <linux/wait.h>
+#include <linux/ptrace.h>
+#include <linux/personality.h>
+#include <linux/tty.h>
+#include <linux/binfmts.h>
+#include <linux/elf.h>
+
+#include <asm/pgalloc.h>
+#include <asm/ucontext.h>
+#include <asm/uaccess.h>
+#include <asm/unistd.h>
+
+#include "ptrace.h"
+
+#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
+
+/*
+ * For ARM syscalls, we encode the syscall number into the instruction.
+ */
+#define SWI_SYS_SIGRETURN	(0xef000000|(__NR_sigreturn))
+#define SWI_SYS_RT_SIGRETURN	(0xef000000|(__NR_rt_sigreturn))
+
+static int do_signal(sigset_t *oldset, struct pt_regs * regs, int syscall);
+
+/*
+ * atomically swap in the new signal mask, and wait for a signal.
+ */
+asmlinkage int sys_sigsuspend(int restart, unsigned long oldmask, old_sigset_t mask, struct pt_regs *regs)
+{
+	sigset_t saveset;
+
+	mask &= _BLOCKABLE;
+	spin_lock_irq(&current->sighand->siglock);
+	saveset = current->blocked;
+	siginitset(&current->blocked, mask);
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+	regs->ARM_r0 = -EINTR;
+
+	while (1) {
+		current->state = TASK_INTERRUPTIBLE;
+		schedule();
+		if (do_signal(&saveset, regs, 0))
+			return regs->ARM_r0;
+	}
+}
+
+asmlinkage int
+sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize, struct pt_regs *regs)
+{
+	sigset_t saveset, newset;
+
+	/* XXX: Don't preclude handling different sized sigset_t's. */
+	if (sigsetsize != sizeof(sigset_t))
+		return -EINVAL;
+
+	if (copy_from_user(&newset, unewset, sizeof(newset)))
+		return -EFAULT;
+	sigdelsetmask(&newset, ~_BLOCKABLE);
+
+	spin_lock_irq(&current->sighand->siglock);
+	saveset = current->blocked;
+	current->blocked = newset;
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+	regs->ARM_r0 = -EINTR;
+
+	while (1) {
+		current->state = TASK_INTERRUPTIBLE;
+		schedule();
+		if (do_signal(&saveset, regs, 0))
+			return regs->ARM_r0;
+	}
+}
+
+asmlinkage int 
+sys_sigaction(int sig, const struct old_sigaction *act,
+	      struct old_sigaction *oact)
+{
+	struct k_sigaction new_ka, old_ka;
+	int ret;
+
+	if (act) {
+		old_sigset_t mask;
+		if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
+		    __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
+		    __get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
+			return -EFAULT;
+		__get_user(new_ka.sa.sa_flags, &act->sa_flags);
+		__get_user(mask, &act->sa_mask);
+		siginitset(&new_ka.sa.sa_mask, mask);
+	}
+
+	ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+
+	if (!ret && oact) {
+		if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
+		    __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
+		    __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
+			return -EFAULT;
+		__put_user(old_ka.sa.sa_flags, &oact->sa_flags);
+		__put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
+	}
+
+	return ret;
+}
+
+/*
+ * Do a signal return; undo the signal stack.
+ */
+struct sigframe
+{
+	struct sigcontext sc;
+	unsigned long extramask[_NSIG_WORDS-1];
+	unsigned long retcode;
+};
+
+struct rt_sigframe
+{
+	struct siginfo *pinfo;
+	void *puc;
+	struct siginfo info;
+	struct ucontext uc;
+	unsigned long retcode;
+};
+
+static int
+restore_sigcontext(struct pt_regs *regs, struct sigcontext *sc)
+{
+	int err = 0;
+
+	__get_user_error(regs->ARM_r0, &sc->arm_r0, err);
+	__get_user_error(regs->ARM_r1, &sc->arm_r1, err);
+	__get_user_error(regs->ARM_r2, &sc->arm_r2, err);
+	__get_user_error(regs->ARM_r3, &sc->arm_r3, err);
+	__get_user_error(regs->ARM_r4, &sc->arm_r4, err);
+	__get_user_error(regs->ARM_r5, &sc->arm_r5, err);
+	__get_user_error(regs->ARM_r6, &sc->arm_r6, err);
+	__get_user_error(regs->ARM_r7, &sc->arm_r7, err);
+	__get_user_error(regs->ARM_r8, &sc->arm_r8, err);
+	__get_user_error(regs->ARM_r9, &sc->arm_r9, err);
+	__get_user_error(regs->ARM_r10, &sc->arm_r10, err);
+	__get_user_error(regs->ARM_fp, &sc->arm_fp, err);
+	__get_user_error(regs->ARM_ip, &sc->arm_ip, err);
+	__get_user_error(regs->ARM_sp, &sc->arm_sp, err);
+	__get_user_error(regs->ARM_lr, &sc->arm_lr, err);
+	__get_user_error(regs->ARM_pc, &sc->arm_pc, err);
+
+	err |= !valid_user_regs(regs);
+
+	return err;
+}
+
+asmlinkage int sys_sigreturn(struct pt_regs *regs)
+{
+	struct sigframe *frame;
+	sigset_t set;
+
+	/*
+	 * Since we stacked the signal on a 64-bit boundary,
+	 * then 'sp' should be word aligned here.  If it's
+	 * not, then the user is trying to mess with us.
+	 */
+	if (regs->ARM_sp & 7)
+		goto badframe;
+
+	frame = (struct sigframe *)regs->ARM_sp;
+
+	if (!access_ok(VERIFY_READ, frame, sizeof (*frame)))
+		goto badframe;
+	if (__get_user(set.sig[0], &frame->sc.oldmask)
+	    || (_NSIG_WORDS > 1
+	        && __copy_from_user(&set.sig[1], &frame->extramask,
+				    sizeof(frame->extramask))))
+		goto badframe;
+
+	sigdelsetmask(&set, ~_BLOCKABLE);
+	spin_lock_irq(&current->sighand->siglock);
+	current->blocked = set;
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+
+	if (restore_sigcontext(regs, &frame->sc))
+		goto badframe;
+
+	/* Send SIGTRAP if we're single-stepping */
+	if (current->ptrace & PT_SINGLESTEP) {
+                ptrace_cancel_bpt(current);
+                send_sig(SIGTRAP, current, 1);
+        }
+
+	return regs->ARM_r0;
+
+badframe:
+	force_sig(SIGSEGV, current);
+	return 0;
+}
+
+asmlinkage int sys_rt_sigreturn(struct pt_regs *regs)
+{
+	struct rt_sigframe *frame;
+	sigset_t set;
+
+	/*
+	 * Since we stacked the signal on a 64-bit boundary,
+	 * then 'sp' should be word aligned here.  If it's
+	 * not, then the user is trying to mess with us.
+	 */
+	if (regs->ARM_sp & 7)
+		goto badframe;
+
+	frame = (struct rt_sigframe *)regs->ARM_sp;
+
+	if (!access_ok(VERIFY_READ, frame, sizeof (*frame)))
+		goto badframe;
+	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
+		goto badframe;
+
+	sigdelsetmask(&set, ~_BLOCKABLE);
+	spin_lock_irq(&current->sighand->siglock);
+	current->blocked = set;
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+
+	if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
+		goto badframe;
+
+	/* Send SIGTRAP if we're single-stepping */
+	if (current->ptrace & PT_SINGLESTEP) {
+                ptrace_cancel_bpt(current);
+                send_sig(SIGTRAP, current, 1);
+        }
+
+	return regs->ARM_r0;
+
+badframe:
+	force_sig(SIGSEGV, current);
+	return 0;
+}
+
+static int
+setup_sigcontext(struct sigcontext *sc, /*struct _fpstate *fpstate,*/
+		 struct pt_regs *regs, unsigned long mask)
+{
+	int err = 0;
+
+	__put_user_error(regs->ARM_r0, &sc->arm_r0, err);
+	__put_user_error(regs->ARM_r1, &sc->arm_r1, err);
+	__put_user_error(regs->ARM_r2, &sc->arm_r2, err);
+	__put_user_error(regs->ARM_r3, &sc->arm_r3, err);
+	__put_user_error(regs->ARM_r4, &sc->arm_r4, err);
+	__put_user_error(regs->ARM_r5, &sc->arm_r5, err);
+	__put_user_error(regs->ARM_r6, &sc->arm_r6, err);
+	__put_user_error(regs->ARM_r7, &sc->arm_r7, err);
+	__put_user_error(regs->ARM_r8, &sc->arm_r8, err);
+	__put_user_error(regs->ARM_r9, &sc->arm_r9, err);
+	__put_user_error(regs->ARM_r10, &sc->arm_r10, err);
+	__put_user_error(regs->ARM_fp, &sc->arm_fp, err);
+	__put_user_error(regs->ARM_ip, &sc->arm_ip, err);
+	__put_user_error(regs->ARM_sp, &sc->arm_sp, err);
+	__put_user_error(regs->ARM_lr, &sc->arm_lr, err);
+	__put_user_error(regs->ARM_pc, &sc->arm_pc, err);
+
+	__put_user_error(current->thread.trap_no, &sc->trap_no, err);
+	__put_user_error(current->thread.error_code, &sc->error_code, err);
+	__put_user_error(current->thread.address, &sc->fault_address, err);
+	__put_user_error(mask, &sc->oldmask, err);
+
+	return err;
+}
+
+static inline void *
+get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, int framesize)
+{
+	unsigned long sp = regs->ARM_sp;
+
+	/*
+	 * This is the X/Open sanctioned signal stack switching.
+	 */
+	if ((ka->sa.sa_flags & SA_ONSTACK) && !sas_ss_flags(sp))
+		sp = current->sas_ss_sp + current->sas_ss_size;
+
+	/*
+	 * ATPCS B01 mandates 8-byte alignment
+	 */
+	return (void *)((sp - framesize) & ~7);
+}
+
+static int
+setup_return(struct pt_regs *regs, struct k_sigaction *ka,
+	     unsigned long *rc, void *frame, int usig)
+{
+	unsigned long handler = (unsigned long)ka->sa.sa_handler;
+	unsigned long retcode;
+
+	if (ka->sa.sa_flags & SA_RESTORER) {
+		retcode = (unsigned long)ka->sa.sa_restorer;
+	} else {
+
+		if (__put_user((ka->sa.sa_flags & SA_SIGINFO)?SWI_SYS_RT_SIGRETURN:SWI_SYS_SIGRETURN, rc))
+			return 1;
+
+		retcode = ((unsigned long)rc);
+	}
+
+	regs->ARM_r0 = usig;
+	regs->ARM_sp = (unsigned long)frame;
+	regs->ARM_lr = retcode;
+	regs->ARM_pc = handler & ~3;
+
+	return 0;
+}
+
+static int
+setup_frame(int usig, struct k_sigaction *ka, sigset_t *set, struct pt_regs *regs)
+{
+	struct sigframe *frame = get_sigframe(ka, regs, sizeof(*frame));
+	int err = 0;
+
+	if (!access_ok(VERIFY_WRITE, frame, sizeof (*frame)))
+		return 1;
+
+	err |= setup_sigcontext(&frame->sc, /*&frame->fpstate,*/ regs, set->sig[0]);
+
+	if (_NSIG_WORDS > 1) {
+		err |= __copy_to_user(frame->extramask, &set->sig[1],
+				      sizeof(frame->extramask));
+	}
+
+	if (err == 0)
+		err = setup_return(regs, ka, &frame->retcode, frame, usig);
+
+	return err;
+}
+
+static int
+setup_rt_frame(int usig, struct k_sigaction *ka, siginfo_t *info,
+	       sigset_t *set, struct pt_regs *regs)
+{
+	struct rt_sigframe *frame = get_sigframe(ka, regs, sizeof(*frame));
+	int err = 0;
+
+	if (!access_ok(VERIFY_WRITE, frame, sizeof (*frame)))
+		return 1;
+
+	__put_user_error(&frame->info, &frame->pinfo, err);
+	__put_user_error(&frame->uc, &frame->puc, err);
+	err |= copy_siginfo_to_user(&frame->info, info);
+
+	/* Clear all the bits of the ucontext we don't use.  */
+	err |= __clear_user(&frame->uc, offsetof(struct ucontext, uc_mcontext));
+
+	err |= setup_sigcontext(&frame->uc.uc_mcontext, /*&frame->fpstate,*/
+				regs, set->sig[0]);
+	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+
+	if (err == 0)
+		err = setup_return(regs, ka, &frame->retcode, frame, usig);
+
+	if (err == 0) {
+		/*
+		 * For realtime signals we must also set the second and third
+		 * arguments for the signal handler.
+		 *   -- Peter Maydell <pmaydell@chiark.greenend.org.uk> 2000-12-06
+		 */
+		regs->ARM_r1 = (unsigned long)frame->pinfo;
+		regs->ARM_r2 = (unsigned long)frame->puc;
+	}
+
+	return err;
+}
+
+static inline void restart_syscall(struct pt_regs *regs)
+{
+        regs->ARM_r0 = regs->ARM_ORIG_r0;
+        regs->ARM_pc -= 4;
+}
+
+/*
+ * OK, we're invoking a handler
+ */	
+static void
+handle_signal(unsigned long sig, siginfo_t *info, sigset_t *oldset,
+	      struct pt_regs * regs, int syscall)
+{
+	struct thread_info *thread = current_thread_info();
+	struct task_struct *tsk = current;
+	struct k_sigaction *ka = &tsk->sighand->action[sig-1];
+	int usig = sig;
+	int ret;
+
+        /*
+         * If we were from a system call, check for system call restarting...
+         */
+        if (syscall) {
+                switch (regs->ARM_r0) {
+                case -ERESTART_RESTARTBLOCK:
+                        current_thread_info()->restart_block.fn =
+                                do_no_restart_syscall;
+                case -ERESTARTNOHAND:
+                        regs->ARM_r0 = -EINTR;
+                        break;
+                case -ERESTARTSYS:
+                        if (!(ka->sa.sa_flags & SA_RESTART)) {
+                                regs->ARM_r0 = -EINTR;
+                                break;
+                        }
+                        /* fallthrough */
+                case -ERESTARTNOINTR:
+                        restart_syscall(regs);
+                }
+        }
+
+	/*
+	 * translate the signal
+	 */
+	if (usig < 32 && thread->exec_domain && thread->exec_domain->signal_invmap)
+		usig = thread->exec_domain->signal_invmap[usig];
+
+	/*
+	 * Set up the stack frame
+	 */
+	if (ka->sa.sa_flags & SA_SIGINFO)
+		ret = setup_rt_frame(usig, ka, info, oldset, regs);
+	else
+		ret = setup_frame(usig, ka, oldset, regs);
+
+	/*
+	 * Check that the resulting registers are actually sane.
+	 */
+	ret |= !valid_user_regs(regs);
+
+	if (ret == 0) {
+		if (ka->sa.sa_flags & SA_ONESHOT)
+			ka->sa.sa_handler = SIG_DFL;
+
+		if (!(ka->sa.sa_flags & SA_NODEFER)) {
+			spin_lock_irq(&tsk->sighand->siglock);
+			sigorsets(&tsk->blocked, &tsk->blocked,
+				  &ka->sa.sa_mask);
+			sigaddset(&tsk->blocked, sig);
+			recalc_sigpending();
+			spin_unlock_irq(&tsk->sighand->siglock);
+		}
+		return;
+	}
+
+	force_sigsegv(sig, tsk);
+}
+
+/*
+ * Note that 'init' is a special process: it doesn't get signals it doesn't
+ * want to handle. Thus you cannot kill init even with a SIGKILL even by
+ * mistake.
+ *
+ * Note that we go through the signals twice: once to check the signals that
+ * the kernel can handle, and then we build all the user-level signal handling
+ * stack-frames in one go after that.
+ */
+static int do_signal(sigset_t *oldset, struct pt_regs *regs, int syscall)
+{
+	siginfo_t info;
+	int signr;
+
+	/*
+	 * We want the common case to go fast, which
+	 * is why we may in certain cases get here from
+	 * kernel mode. Just return without doing anything
+	 * if so.
+	 */
+	if (!user_mode(regs))
+		return 0;
+
+        if (current->ptrace & PT_SINGLESTEP)
+                ptrace_cancel_bpt(current);
+	
+        signr = get_signal_to_deliver(&info, regs, NULL);
+        if (signr > 0) {
+                handle_signal(signr, &info, oldset, regs, syscall);
+                if (current->ptrace & PT_SINGLESTEP)
+                        ptrace_set_bpt(current);
+                return 1;
+        }
+
+        /*
+         * No signal to deliver to the process - restart the syscall.
+         */
+        if (syscall) {
+                if (regs->ARM_r0 == -ERESTART_RESTARTBLOCK) {
+                        u32 *usp;
+
+                        regs->ARM_sp -= 12;
+                        usp = (u32 *)regs->ARM_sp;
+
+                        put_user(regs->ARM_pc, &usp[0]);
+                        /* swi __NR_restart_syscall */
+                        put_user(0xef000000 | __NR_restart_syscall, &usp[1]);
+                        /* ldr  pc, [sp], #12 */
+// FIXME!!! is #12 correct there?
+                        put_user(0xe49df00c, &usp[2]);
+
+                        regs->ARM_pc = regs->ARM_sp + 4;
+                }
+                if (regs->ARM_r0 == -ERESTARTNOHAND ||
+                    regs->ARM_r0 == -ERESTARTSYS ||
+                    regs->ARM_r0 == -ERESTARTNOINTR) {
+                        restart_syscall(regs);
+                }
+        }
+        if (current->ptrace & PT_SINGLESTEP)
+                ptrace_set_bpt(current);
+        return 0;
+}
+
+asmlinkage void
+do_notify_resume(struct pt_regs *regs, unsigned int thread_flags, int syscall)
+{
+	if (thread_flags & _TIF_SIGPENDING)
+		do_signal(&current->blocked, regs, syscall);
+}
diff --git a/arch/arm26/kernel/sys_arm.c b/arch/arm26/kernel/sys_arm.c
new file mode 100644
index 00000000000..e7edd201579
--- /dev/null
+++ b/arch/arm26/kernel/sys_arm.c
@@ -0,0 +1,324 @@
+/*
+ *  linux/arch/arm26/kernel/sys_arm.c
+ *
+ *  Copyright (C) People who wrote linux/arch/i386/kernel/sys_i386.c
+ *  Copyright (C) 1995, 1996 Russell King.
+ *  Copyright (C) 2003 Ian Molton.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  This file contains various random system calls that
+ *  have a non-standard calling sequence on the Linux/arm
+ *  platform.
+ */
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/sem.h>
+#include <linux/msg.h>
+#include <linux/shm.h>
+#include <linux/stat.h>
+#include <linux/syscalls.h>
+#include <linux/mman.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/utsname.h>
+
+#include <asm/uaccess.h>
+#include <asm/ipc.h>
+
+extern unsigned long do_mremap(unsigned long addr, unsigned long old_len,
+			       unsigned long new_len, unsigned long flags,
+			       unsigned long new_addr);
+
+/*
+ * sys_pipe() is the normal C calling standard for creating
+ * a pipe. It's not the way unix traditionally does this, though.
+ */
+asmlinkage int sys_pipe(unsigned long * fildes)
+{
+	int fd[2];
+	int error;
+
+	error = do_pipe(fd);
+	if (!error) {
+		if (copy_to_user(fildes, fd, 2*sizeof(int)))
+			error = -EFAULT;
+	}
+	return error;
+}
+
+/* common code for old and new mmaps */
+inline long do_mmap2(
+	unsigned long addr, unsigned long len,
+	unsigned long prot, unsigned long flags,
+	unsigned long fd, unsigned long pgoff)
+{
+	int error = -EINVAL;
+	struct file * file = NULL;
+
+	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
+
+	/*
+	 * If we are doing a fixed mapping, and address < PAGE_SIZE,
+	 * then deny it.
+	 */
+	if (flags & MAP_FIXED && addr < PAGE_SIZE && vectors_base() == 0)
+		goto out;
+
+	error = -EBADF;
+	if (!(flags & MAP_ANONYMOUS)) {
+		file = fget(fd);
+		if (!file)
+			goto out;
+	}
+
+	down_write(&current->mm->mmap_sem);
+	error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
+	up_write(&current->mm->mmap_sem);
+
+	if (file)
+		fput(file);
+out:
+	return error;
+}
+
+struct mmap_arg_struct {
+	unsigned long addr;
+	unsigned long len;
+	unsigned long prot;
+	unsigned long flags;
+	unsigned long fd;
+	unsigned long offset;
+};
+
+asmlinkage int old_mmap(struct mmap_arg_struct *arg)
+{
+	int error = -EFAULT;
+	struct mmap_arg_struct a;
+
+	if (copy_from_user(&a, arg, sizeof(a)))
+		goto out;
+
+	error = -EINVAL;
+	if (a.offset & ~PAGE_MASK)
+		goto out;
+
+	error = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT);
+out:
+	return error;
+}
+
+asmlinkage unsigned long
+sys_arm_mremap(unsigned long addr, unsigned long old_len,
+	       unsigned long new_len, unsigned long flags,
+	       unsigned long new_addr)
+{
+	unsigned long ret = -EINVAL;
+
+	/*
+	 * If we are doing a fixed mapping, and address < PAGE_SIZE,
+	 * then deny it.
+	 */
+	if (flags & MREMAP_FIXED && new_addr < PAGE_SIZE &&
+	    vectors_base() == 0)
+		goto out;
+
+	down_write(&current->mm->mmap_sem);
+	ret = do_mremap(addr, old_len, new_len, flags, new_addr);
+	up_write(&current->mm->mmap_sem);
+
+out:
+	return ret;
+}
+
+/*
+ * Perform the select(nd, in, out, ex, tv) and mmap() system
+ * calls.
+ */
+
+struct sel_arg_struct {
+	unsigned long n;
+	fd_set *inp, *outp, *exp;
+	struct timeval *tvp;
+};
+
+asmlinkage int old_select(struct sel_arg_struct *arg)
+{
+	struct sel_arg_struct a;
+
+	if (copy_from_user(&a, arg, sizeof(a)))
+		return -EFAULT;
+	/* sys_select() does the appropriate kernel locking */
+	return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp);
+}
+
+/*
+ * sys_ipc() is the de-multiplexer for the SysV IPC calls..
+ *
+ * This is really horribly ugly.
+ */
+asmlinkage int sys_ipc (uint call, int first, int second, int third, void *ptr, long fifth)
+{
+	int version, ret;
+
+	version = call >> 16; /* hack for backward compatibility */
+	call &= 0xffff;
+
+	switch (call) {
+	case SEMOP:
+		return sys_semop (first, (struct sembuf *)ptr, second);
+	case SEMGET:
+		return sys_semget (first, second, third);
+	case SEMCTL: {
+		union semun fourth;
+		if (!ptr)
+			return -EINVAL;
+		if (get_user(fourth.__pad, (void **) ptr))
+			return -EFAULT;
+		return sys_semctl (first, second, third, fourth);
+	}
+
+	case MSGSND:
+		return sys_msgsnd (first, (struct msgbuf *) ptr, 
+				   second, third);
+	case MSGRCV:
+		switch (version) {
+		case 0: {
+			struct ipc_kludge tmp;
+			if (!ptr)
+				return -EINVAL;
+			if (copy_from_user(&tmp,(struct ipc_kludge *) ptr,
+					   sizeof (tmp)))
+				return -EFAULT;
+			return sys_msgrcv (first, tmp.msgp, second,
+					   tmp.msgtyp, third);
+		}
+		default:
+			return sys_msgrcv (first,
+					   (struct msgbuf *) ptr,
+					   second, fifth, third);
+		}
+	case MSGGET:
+		return sys_msgget ((key_t) first, second);
+	case MSGCTL:
+		return sys_msgctl (first, second, (struct msqid_ds *) ptr);
+
+	case SHMAT:
+		switch (version) {
+		default: {
+			ulong raddr;
+			ret = do_shmat (first, (char *) ptr, second, &raddr);
+			if (ret)
+				return ret;
+			return put_user (raddr, (ulong *) third);
+		}
+		case 1:	/* iBCS2 emulator entry point */
+			if (!segment_eq(get_fs(), get_ds()))
+				return -EINVAL;
+			return do_shmat (first, (char *) ptr,
+					  second, (ulong *) third);
+		}
+	case SHMDT: 
+		return sys_shmdt ((char *)ptr);
+	case SHMGET:
+		return sys_shmget (first, second, third);
+	case SHMCTL:
+		return sys_shmctl (first, second,
+				   (struct shmid_ds *) ptr);
+	default:
+		return -EINVAL;
+	}
+}
+
+/* Fork a new task - this creates a new program thread.
+ * This is called indirectly via a small wrapper
+ */
+asmlinkage int sys_fork(struct pt_regs *regs)
+{
+	return do_fork(SIGCHLD, regs->ARM_sp, regs, 0, NULL, NULL);
+}
+
+/* Clone a task - this clones the calling program thread.
+ * This is called indirectly via a small wrapper
+ */
+asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp, struct pt_regs *regs)
+{
+	/*
+         * We don't support SETTID / CLEARTID  (FIXME!!! (nicked from arm32))
+         */
+        if (clone_flags & (CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID))
+                return -EINVAL;
+	
+	if (!newsp)
+		newsp = regs->ARM_sp;
+
+	return do_fork(clone_flags, newsp, regs, 0, NULL, NULL);
+}
+
+asmlinkage int sys_vfork(struct pt_regs *regs)
+{
+	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->ARM_sp, regs, 0, NULL, NULL);
+}
+
+/* sys_execve() executes a new program.
+ * This is called indirectly via a small wrapper
+ */
+asmlinkage int sys_execve(char *filenamei, char **argv, char **envp, struct pt_regs *regs)
+{
+	int error;
+	char * filename;
+
+	filename = getname(filenamei);
+	error = PTR_ERR(filename);
+	if (IS_ERR(filename))
+		goto out;
+	error = do_execve(filename, argv, envp, regs);
+	putname(filename);
+out:
+	return error;
+}
+
+/* FIXME - see if this is correct for arm26 */
+long execve(const char *filename, char **argv, char **envp)
+{
+	struct pt_regs regs;
+        int ret;
+         memset(&regs, 0, sizeof(struct pt_regs));
+        ret = do_execve((char *)filename, (char __user * __user *)argv,                         (char __user * __user *)envp, &regs);
+        if (ret < 0)
+                goto out;
+
+        /*
+         * Save argc to the register structure for userspace.
+         */
+        regs.ARM_r0 = ret;
+
+        /*
+         * We were successful.  We won't be returning to our caller, but
+         * instead to user space by manipulating the kernel stack.
+         */
+        asm(    "add    r0, %0, %1\n\t"
+                "mov    r1, %2\n\t"
+                "mov    r2, %3\n\t"
+                "bl     memmove\n\t"    /* copy regs to top of stack */
+                "mov    r8, #0\n\t"     /* not a syscall */
+                "mov    r9, %0\n\t"     /* thread structure */
+                "mov    sp, r0\n\t"     /* reposition stack pointer */
+                "b      ret_to_user"
+                :
+                : "r" (current_thread_info()),
+                  "Ir" (THREAD_SIZE - 8 - sizeof(regs)),
+                  "r" (&regs),
+                  "Ir" (sizeof(regs))
+                : "r0", "r1", "r2", "r3", "ip", "memory");
+
+ out:
+        return ret;
+}
+
+EXPORT_SYMBOL(execve);
diff --git a/arch/arm26/kernel/time.c b/arch/arm26/kernel/time.c
new file mode 100644
index 00000000000..549a6b2e177
--- /dev/null
+++ b/arch/arm26/kernel/time.c
@@ -0,0 +1,234 @@
+/*
+ *  linux/arch/arm26/kernel/time.c
+ *
+ *  Copyright (C) 1991, 1992, 1995  Linus Torvalds
+ *  Modifications for ARM (C) 1994-2001 Russell King
+ *  Mods for ARM26 (C) 2003 Ian Molton
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  This file contains the ARM-specific time handling details:
+ *  reading the RTC at bootup, etc...
+ *
+ *  1994-07-02  Alan Modra
+ *              fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime
+ *  1998-12-20  Updated NTP code according to technical memorandum Jan '96
+ *              "A Kernel Model for Precision Timekeeping" by Dave Mills
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/time.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/timex.h>
+#include <linux/errno.h>
+#include <linux/profile.h>
+
+#include <asm/hardware.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/ioc.h>
+
+u64 jiffies_64 = INITIAL_JIFFIES;
+
+EXPORT_SYMBOL(jiffies_64);
+
+extern unsigned long wall_jiffies;
+
+/* this needs a better home */
+DEFINE_SPINLOCK(rtc_lock);
+
+/* change this if you have some constant time drift */
+#define USECS_PER_JIFFY	(1000000/HZ)
+
+static int dummy_set_rtc(void)
+{
+	return 0;
+}
+
+/*
+ * hook for setting the RTC's idea of the current time.
+ */
+int (*set_rtc)(void) = dummy_set_rtc;
+
+/*
+ * Get time offset based on IOCs timer.
+ * FIXME - if this is called with interrutps off, why the shennanigans
+ * below ?
+ */
+static unsigned long gettimeoffset(void)
+{
+        unsigned int count1, count2, status;
+        long offset;
+
+        ioc_writeb (0, IOC_T0LATCH);
+        barrier ();
+        count1 = ioc_readb(IOC_T0CNTL) | (ioc_readb(IOC_T0CNTH) << 8);
+        barrier ();
+        status = ioc_readb(IOC_IRQREQA);
+        barrier ();
+        ioc_writeb (0, IOC_T0LATCH);
+        barrier ();
+        count2 = ioc_readb(IOC_T0CNTL) | (ioc_readb(IOC_T0CNTH) << 8);
+
+        offset = count2;
+        if (count2 < count1) {
+                /*
+                 * We have not had an interrupt between reading count1
+                 * and count2.
+                 */
+                if (status & (1 << 5))
+                        offset -= LATCH;
+        } else if (count2 > count1) {
+                /*
+                 * We have just had another interrupt between reading
+                 * count1 and count2.
+                 */
+                offset -= LATCH;
+        }
+
+        offset = (LATCH - offset) * (tick_nsec / 1000);
+        return (offset + LATCH/2) / LATCH;
+}
+
+/*
+ * Scheduler clock - returns current time in nanosec units.
+ */
+unsigned long long sched_clock(void)
+{
+	return (unsigned long long)jiffies * (1000000000 / HZ);
+}
+
+static unsigned long next_rtc_update;
+
+/*
+ * If we have an externally synchronized linux clock, then update
+ * CMOS clock accordingly every ~11 minutes.  set_rtc() has to be
+ * called as close as possible to 500 ms before the new second
+ * starts.
+ */
+static inline void do_set_rtc(void)
+{
+	if (time_status & STA_UNSYNC || set_rtc == NULL)
+		return;
+
+//FIXME - timespec.tv_sec is a time_t not unsigned long
+	if (next_rtc_update &&
+	    time_before((unsigned long)xtime.tv_sec, next_rtc_update))
+		return;
+
+	if (xtime.tv_nsec < 500000000 - ((unsigned) tick_nsec >> 1) &&
+	    xtime.tv_nsec >= 500000000 + ((unsigned) tick_nsec >> 1))
+		return;
+
+	if (set_rtc())
+		/*
+		 * rtc update failed.  Try again in 60s
+		 */
+		next_rtc_update = xtime.tv_sec + 60;
+	else
+		next_rtc_update = xtime.tv_sec + 660;
+}
+
+#define do_leds()
+
+void do_gettimeofday(struct timeval *tv)
+{
+	unsigned long flags;
+	unsigned long seq;
+	unsigned long usec, sec, lost;
+
+	do {
+		seq = read_seqbegin_irqsave(&xtime_lock, flags);
+		usec = gettimeoffset();
+
+		lost = jiffies - wall_jiffies;
+		if (lost)
+			usec += lost * USECS_PER_JIFFY;
+
+		sec = xtime.tv_sec;
+		usec += xtime.tv_nsec / 1000;
+	} while (read_seqretry_irqrestore(&xtime_lock, seq, flags));
+
+	/* usec may have gone up a lot: be safe */
+	while (usec >= 1000000) {
+		usec -= 1000000;
+		sec++;
+	}
+
+	tv->tv_sec = sec;
+	tv->tv_usec = usec;
+}
+
+EXPORT_SYMBOL(do_gettimeofday);
+
+int do_settimeofday(struct timespec *tv)
+{
+	if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
+		return -EINVAL;
+
+	write_seqlock_irq(&xtime_lock);
+	/*
+	 * This is revolting. We need to set "xtime" correctly. However, the
+	 * value in this location is the value at the most recent update of
+	 * wall time.  Discover what correction gettimeofday() would have
+	 * done, and then undo it!
+	 */
+	tv->tv_nsec -= 1000 * (gettimeoffset() +
+			(jiffies - wall_jiffies) * USECS_PER_JIFFY);
+
+	while (tv->tv_nsec < 0) {
+		tv->tv_nsec += NSEC_PER_SEC;
+		tv->tv_sec--;
+	}
+
+	xtime.tv_sec = tv->tv_sec;
+	xtime.tv_nsec = tv->tv_nsec;
+	time_adjust = 0;		/* stop active adjtime() */
+	time_status |= STA_UNSYNC;
+	time_maxerror = NTP_PHASE_LIMIT;
+	time_esterror = NTP_PHASE_LIMIT;
+	write_sequnlock_irq(&xtime_lock);
+	clock_was_set();
+	return 0;
+}
+
+EXPORT_SYMBOL(do_settimeofday);
+
+static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+        do_timer(regs);
+#ifndef CONFIG_SMP
+	update_process_times(user_mode(regs));
+#endif
+        do_set_rtc(); //FIME - EVERY timer IRQ?
+        profile_tick(CPU_PROFILING, regs);
+	return IRQ_HANDLED; //FIXME - is this right?
+}
+
+static struct irqaction timer_irq = {
+	.name	= "timer",
+	.flags	= SA_INTERRUPT,
+	.handler = timer_interrupt,
+};
+
+extern void ioctime_init(void);
+
+/*
+ * Set up timer interrupt.
+ */
+void __init time_init(void)
+{
+	ioc_writeb(LATCH & 255, IOC_T0LTCHL);
+        ioc_writeb(LATCH >> 8, IOC_T0LTCHH);
+        ioc_writeb(0, IOC_T0GO);
+
+
+        setup_irq(IRQ_TIMER, &timer_irq);
+}
+
diff --git a/arch/arm26/kernel/traps.c b/arch/arm26/kernel/traps.c
new file mode 100644
index 00000000000..f64f5902239
--- /dev/null
+++ b/arch/arm26/kernel/traps.c
@@ -0,0 +1,548 @@
+/*
+ *  linux/arch/arm26/kernel/traps.c
+ *
+ *  Copyright (C) 1995-2002 Russell King
+ *  Fragments that appear the same as linux/arch/i386/kernel/traps.c (C) Linus Torvalds
+ *  Copyright (C) 2003 Ian Molton (ARM26)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  'traps.c' handles hardware exceptions after we have saved some state in
+ *  'linux/arch/arm26/lib/traps.S'.  Mostly a debugging aid, but will probably
+ *  kill the offending process.
+ */
+
+#include <linux/module.h>
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/personality.h>
+#include <linux/ptrace.h>
+#include <linux/elf.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+
+#include <asm/atomic.h>
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/unistd.h>
+#include <asm/semaphore.h>
+
+#include "ptrace.h"
+
+extern void c_backtrace (unsigned long fp, int pmode);
+extern void show_pte(struct mm_struct *mm, unsigned long addr);
+
+const char *processor_modes[] = { "USER_26", "FIQ_26" , "IRQ_26" , "SVC_26" };
+
+static const char *handler[]= { "prefetch abort", "data abort", "address exception", "interrupt" "*bad reason*"};
+
+/*
+ * Stack pointers should always be within the kernels view of
+ * physical memory.  If it is not there, then we can't dump
+ * out any information relating to the stack.
+ */
+static int verify_stack(unsigned long sp)
+{
+	if (sp < PAGE_OFFSET || (sp > (unsigned long)high_memory && high_memory != 0))
+		return -EFAULT;
+
+	return 0;
+}
+
+/*
+ * Dump out the contents of some memory nicely...
+ */
+static void dump_mem(const char *str, unsigned long bottom, unsigned long top)
+{
+	unsigned long p = bottom & ~31;
+	mm_segment_t fs;
+	int i;
+
+	/*
+	 * We need to switch to kernel mode so that we can use __get_user
+	 * to safely read from kernel space.  Note that we now dump the
+	 * code first, just in case the backtrace kills us.
+	 */
+	fs = get_fs();
+	set_fs(KERNEL_DS);
+
+	printk("%s", str);
+	printk("(0x%08lx to 0x%08lx)\n", bottom, top);
+
+	for (p = bottom & ~31; p < top;) {
+		printk("%04lx: ", p & 0xffff);
+
+		for (i = 0; i < 8; i++, p += 4) {
+			unsigned int val;
+
+			if (p < bottom || p >= top)
+				printk("         ");
+			else {
+				__get_user(val, (unsigned long *)p);
+				printk("%08x ", val);
+			}
+		}
+		printk ("\n");
+	}
+
+	set_fs(fs);
+}
+
+static void dump_instr(struct pt_regs *regs)
+{
+	unsigned long addr = instruction_pointer(regs);
+	const int width = 8;
+	mm_segment_t fs;
+	int i;
+
+	/*
+	 * We need to switch to kernel mode so that we can use __get_user
+	 * to safely read from kernel space.  Note that we now dump the
+	 * code first, just in case the backtrace kills us.
+	 */
+	fs = get_fs();
+	set_fs(KERNEL_DS);
+
+	printk("Code: ");
+	for (i = -4; i < 1; i++) {
+		unsigned int val, bad;
+
+		bad = __get_user(val, &((u32 *)addr)[i]);
+
+		if (!bad)
+			printk(i == 0 ? "(%0*x) " : "%0*x ", width, val);
+		else {
+			printk("bad PC value.");
+			break;
+		}
+	}
+	printk("\n");
+
+	set_fs(fs);
+}
+
+/*static*/ void __dump_stack(struct task_struct *tsk, unsigned long sp)
+{
+	dump_mem("Stack: ", sp, 8192+(unsigned long)tsk->thread_info);
+}
+
+void dump_stack(void)
+{
+#ifdef CONFIG_DEBUG_ERRORS
+        __backtrace();
+#endif
+}
+
+EXPORT_SYMBOL(dump_stack);
+
+//FIXME - was a static fn
+void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
+{
+	unsigned int fp;
+	int ok = 1;
+
+	printk("Backtrace: ");
+	fp = regs->ARM_fp;
+	if (!fp) {
+		printk("no frame pointer");
+		ok = 0;
+	} else if (verify_stack(fp)) {
+		printk("invalid frame pointer 0x%08x", fp);
+		ok = 0;
+	} else if (fp < (unsigned long)(tsk->thread_info + 1))
+		printk("frame pointer underflow");
+	printk("\n");
+
+	if (ok)
+		c_backtrace(fp, processor_mode(regs));
+}
+
+/* FIXME - this is probably wrong.. */
+void show_stack(struct task_struct *task, unsigned long *sp) {
+	dump_mem("Stack: ", (unsigned long)sp, 8192+(unsigned long)task->thread_info);
+}
+
+DEFINE_SPINLOCK(die_lock);
+
+/*
+ * This function is protected against re-entrancy.
+ */
+NORET_TYPE void die(const char *str, struct pt_regs *regs, int err)
+{
+	struct task_struct *tsk = current;
+
+	console_verbose();
+	spin_lock_irq(&die_lock);
+
+	printk("Internal error: %s: %x\n", str, err);
+	printk("CPU: %d\n", smp_processor_id());
+	show_regs(regs);
+	printk("Process %s (pid: %d, stack limit = 0x%p)\n",
+		current->comm, current->pid, tsk->thread_info + 1);
+
+	if (!user_mode(regs) || in_interrupt()) {
+		__dump_stack(tsk, (unsigned long)(regs + 1));
+		dump_backtrace(regs, tsk);
+		dump_instr(regs);
+	}
+while(1);
+	spin_unlock_irq(&die_lock);
+	do_exit(SIGSEGV);
+}
+
+void die_if_kernel(const char *str, struct pt_regs *regs, int err)
+{
+	if (user_mode(regs))
+    		return;
+
+    	die(str, regs, err);
+}
+
+static DECLARE_MUTEX(undef_sem);
+static int (*undef_hook)(struct pt_regs *);
+
+int request_undef_hook(int (*fn)(struct pt_regs *))
+{
+	int ret = -EBUSY;
+
+	down(&undef_sem);
+	if (undef_hook == NULL) {
+		undef_hook = fn;
+		ret = 0;
+	}
+	up(&undef_sem);
+
+	return ret;
+}
+
+int release_undef_hook(int (*fn)(struct pt_regs *))
+{
+	int ret = -EINVAL;
+
+	down(&undef_sem);
+	if (undef_hook == fn) {
+		undef_hook = NULL;
+		ret = 0;
+	}
+	up(&undef_sem);
+
+	return ret;
+}
+
+static int undefined_extension(struct pt_regs *regs, unsigned int op)
+{
+	switch (op) {
+	case 1:	/* 0xde01 / 0x?7f001f0 */
+		ptrace_break(current, regs);
+		return 0;
+	}
+	return 1;
+}
+
+asmlinkage void do_undefinstr(struct pt_regs *regs)
+{
+	siginfo_t info;
+	void *pc;
+
+	regs->ARM_pc -= 4;
+
+	pc = (unsigned long *)instruction_pointer(regs); /* strip PSR */
+
+	if (user_mode(regs)) {
+		u32 instr;
+
+		get_user(instr, (u32 *)pc);
+
+		if ((instr & 0x0fff00ff) == 0x07f000f0 &&
+		    undefined_extension(regs, (instr >> 8) & 255) == 0) {
+			regs->ARM_pc += 4;
+			return;
+		}
+	} else {
+		if (undef_hook && undef_hook(regs) == 0) {
+			regs->ARM_pc += 4;
+			return;
+		}
+	}
+
+#ifdef CONFIG_DEBUG_USER
+	printk(KERN_INFO "%s (%d): undefined instruction: pc=%p\n",
+		current->comm, current->pid, pc);
+	dump_instr(regs);
+#endif
+
+	current->thread.error_code = 0;
+	current->thread.trap_no = 6;
+
+	info.si_signo = SIGILL;
+	info.si_errno = 0;
+	info.si_code  = ILL_ILLOPC;
+	info.si_addr  = pc;
+
+	force_sig_info(SIGILL, &info, current);
+
+	die_if_kernel("Oops - undefined instruction", regs, 0);
+}
+
+asmlinkage void do_excpt(unsigned long address, struct pt_regs *regs, int mode)
+{
+	siginfo_t info;
+
+#ifdef CONFIG_DEBUG_USER
+	printk(KERN_INFO "%s (%d): address exception: pc=%08lx\n",
+		current->comm, current->pid, instruction_pointer(regs));
+	dump_instr(regs);
+#endif
+
+	current->thread.error_code = 0;
+	current->thread.trap_no = 11;
+
+	info.si_signo = SIGBUS;
+	info.si_errno = 0;
+	info.si_code  = BUS_ADRERR;
+	info.si_addr  = (void *)address;
+
+	force_sig_info(SIGBUS, &info, current);
+
+	die_if_kernel("Oops - address exception", regs, mode);
+}
+
+asmlinkage void do_unexp_fiq (struct pt_regs *regs)
+{
+#ifndef CONFIG_IGNORE_FIQ
+	printk("Hmm.  Unexpected FIQ received, but trying to continue\n");
+	printk("You may have a hardware problem...\n");
+#endif
+}
+
+/*
+ * bad_mode handles the impossible case in the vectors.  If you see one of
+ * these, then it's extremely serious, and could mean you have buggy hardware.
+ * It never returns, and never tries to sync.  We hope that we can at least
+ * dump out some state information...
+ */
+asmlinkage void bad_mode(struct pt_regs *regs, int reason, int proc_mode)
+{
+	unsigned int vectors = vectors_base();
+
+	console_verbose();
+
+	printk(KERN_CRIT "Bad mode in %s handler detected: mode %s\n",
+		handler[reason<5?reason:4], processor_modes[proc_mode]);
+
+	/*
+	 * Dump out the vectors and stub routines.  Maybe a better solution
+	 * would be to dump them out only if we detect that they are corrupted.
+	 */
+	dump_mem(KERN_CRIT "Vectors: ", vectors, vectors + 0x40);
+	dump_mem(KERN_CRIT "Stubs: ", vectors + 0x200, vectors + 0x4b8);
+
+	die("Oops", regs, 0);
+	local_irq_disable();
+	panic("bad mode");
+}
+
+static int bad_syscall(int n, struct pt_regs *regs)
+{
+	struct thread_info *thread = current_thread_info();
+	siginfo_t info;
+
+	if (current->personality != PER_LINUX && thread->exec_domain->handler) {
+		thread->exec_domain->handler(n, regs);
+		return regs->ARM_r0;
+	}
+
+#ifdef CONFIG_DEBUG_USER
+	printk(KERN_ERR "[%d] %s: obsolete system call %08x.\n",
+		current->pid, current->comm, n);
+	dump_instr(regs);
+#endif
+
+	info.si_signo = SIGILL;
+	info.si_errno = 0;
+	info.si_code  = ILL_ILLTRP;
+	info.si_addr  = (void *)instruction_pointer(regs) - 4;
+
+	force_sig_info(SIGILL, &info, current);
+	die_if_kernel("Oops", regs, n);
+	return regs->ARM_r0;
+}
+
+static inline void
+do_cache_op(unsigned long start, unsigned long end, int flags)
+{
+	struct vm_area_struct *vma;
+
+	if (end < start)
+		return;
+
+	vma = find_vma(current->active_mm, start);
+	if (vma && vma->vm_start < end) {
+		if (start < vma->vm_start)
+			start = vma->vm_start;
+		if (end > vma->vm_end)
+			end = vma->vm_end;
+	}
+}
+
+/*
+ * Handle all unrecognised system calls.
+ *  0x9f0000 - 0x9fffff are some more esoteric system calls
+ */
+#define NR(x) ((__ARM_NR_##x) - __ARM_NR_BASE)
+asmlinkage int arm_syscall(int no, struct pt_regs *regs)
+{
+	siginfo_t info;
+
+	if ((no >> 16) != 0x9f)
+		return bad_syscall(no, regs);
+
+	switch (no & 0xffff) {
+	case 0: /* branch through 0 */
+		info.si_signo = SIGSEGV;
+		info.si_errno = 0;
+		info.si_code  = SEGV_MAPERR;
+		info.si_addr  = NULL;
+
+		force_sig_info(SIGSEGV, &info, current);
+
+		die_if_kernel("branch through zero", regs, 0);
+		return 0;
+
+	case NR(breakpoint): /* SWI BREAK_POINT */
+		ptrace_break(current, regs);
+		return regs->ARM_r0;
+
+	case NR(cacheflush):
+		return 0;
+
+	case NR(usr26):
+		break;
+
+	default:
+		/* Calls 9f00xx..9f07ff are defined to return -ENOSYS
+		   if not implemented, rather than raising SIGILL.  This
+		   way the calling program can gracefully determine whether
+		   a feature is supported.  */
+		if (no <= 0x7ff)
+			return -ENOSYS;
+		break;
+	}
+#ifdef CONFIG_DEBUG_USER
+	/*
+	 * experience shows that these seem to indicate that
+	 * something catastrophic has happened
+	 */
+	printk("[%d] %s: arm syscall %d\n", current->pid, current->comm, no);
+	dump_instr(regs);
+	if (user_mode(regs)) {
+		show_regs(regs);
+		c_backtrace(regs->ARM_fp, processor_mode(regs));
+	}
+#endif
+	info.si_signo = SIGILL;
+	info.si_errno = 0;
+	info.si_code  = ILL_ILLTRP;
+	info.si_addr  = (void *)instruction_pointer(regs) - 4;
+
+	force_sig_info(SIGILL, &info, current);
+	die_if_kernel("Oops", regs, no);
+	return 0;
+}
+
+void __bad_xchg(volatile void *ptr, int size)
+{
+	printk("xchg: bad data size: pc 0x%p, ptr 0x%p, size %d\n",
+		__builtin_return_address(0), ptr, size);
+	BUG();
+}
+
+/*
+ * A data abort trap was taken, but we did not handle the instruction.
+ * Try to abort the user program, or panic if it was the kernel.
+ */
+asmlinkage void
+baddataabort(int code, unsigned long instr, struct pt_regs *regs)
+{
+	unsigned long addr = instruction_pointer(regs);
+	siginfo_t info;
+
+#ifdef CONFIG_DEBUG_USER
+	printk(KERN_ERR "[%d] %s: bad data abort: code %d instr 0x%08lx\n",
+		current->pid, current->comm, code, instr);
+	dump_instr(regs);
+	show_pte(current->mm, addr);
+#endif
+
+	info.si_signo = SIGILL;
+	info.si_errno = 0;
+	info.si_code  = ILL_ILLOPC;
+	info.si_addr  = (void *)addr;
+
+	force_sig_info(SIGILL, &info, current);
+	die_if_kernel("unknown data abort code", regs, instr);
+}
+
+volatile void __bug(const char *file, int line, void *data)
+{
+	printk(KERN_CRIT"kernel BUG at %s:%d!", file, line);
+	if (data)
+		printk(KERN_CRIT" - extra data = %p", data);
+	printk("\n");
+	*(int *)0 = 0;
+}
+
+void __readwrite_bug(const char *fn)
+{
+	printk("%s called, but not implemented", fn);
+	BUG();
+}
+
+void __pte_error(const char *file, int line, unsigned long val)
+{
+	printk("%s:%d: bad pte %08lx.\n", file, line, val);
+}
+
+void __pmd_error(const char *file, int line, unsigned long val)
+{
+	printk("%s:%d: bad pmd %08lx.\n", file, line, val);
+}
+
+void __pgd_error(const char *file, int line, unsigned long val)
+{
+	printk("%s:%d: bad pgd %08lx.\n", file, line, val);
+}
+
+asmlinkage void __div0(void)
+{
+	printk("Division by zero in kernel.\n");
+	dump_stack();
+}
+
+void abort(void)
+{
+	BUG();
+
+	/* if that doesn't kill us, halt */
+	panic("Oops failed to kill thread");
+}
+
+void __init trap_init(void)
+{
+	extern void __trap_init(unsigned long);
+	unsigned long base = vectors_base();
+
+	__trap_init(base);
+	if (base != 0)
+		printk(KERN_DEBUG "Relocating machine vectors to 0x%08lx\n",
+			base);
+}
diff --git a/arch/arm26/kernel/vmlinux-arm26-xip.lds.in b/arch/arm26/kernel/vmlinux-arm26-xip.lds.in
new file mode 100644
index 00000000000..ca61ec8218f
--- /dev/null
+++ b/arch/arm26/kernel/vmlinux-arm26-xip.lds.in
@@ -0,0 +1,134 @@
+/* ld script to make ARM Linux kernel
+ * taken from the i386 version by Russell King
+ * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ * borrowed from Russels ARM port by Ian Molton
+ */
+
+#include <asm-generic/vmlinux.lds.h>
+
+OUTPUT_ARCH(arm)
+ENTRY(stext)
+jiffies = jiffies_64;
+SECTIONS
+{
+	. = TEXTADDR;
+	.init : {			/* Init code and data		*/
+		_stext = .;
+		__init_begin = .;
+			_sinittext = .;
+			*(.init.text)
+			_einittext = .;
+		__proc_info_begin = .;
+			*(.proc.info)
+		__proc_info_end = .;
+		__arch_info_begin = .;
+			*(.arch.info)
+		__arch_info_end = .;
+		__tagtable_begin = .;
+			*(.taglist)
+		__tagtable_end = .;
+		. = ALIGN(16);
+		__setup_start = .;
+			*(.init.setup)
+		__setup_end = .;
+		__early_begin = .;
+			*(__early_param)
+		__early_end = .;
+		__initcall_start = .;
+			*(.initcall1.init)
+			*(.initcall2.init)
+			*(.initcall3.init)
+			*(.initcall4.init)
+			*(.initcall5.init)
+			*(.initcall6.init)
+			*(.initcall7.init)
+		__initcall_end = .;
+		__con_initcall_start = .;
+			*(.con_initcall.init)
+		__con_initcall_end = .;
+		. = ALIGN(32);
+		__initramfs_start = .;
+			usr/built-in.o(.init.ramfs)
+		__initramfs_end = .;
+		. = ALIGN(32768);
+		__init_end = .;
+	}
+
+	/DISCARD/ : {			/* Exit code and data		*/
+		*(.exit.text)
+		*(.exit.data)
+		*(.exitcall.exit)
+	}
+
+	.text : {			/* Real text segment		*/
+		_text = .;		/* Text and read-only data	*/
+			*(.text)
+			SCHED_TEXT
+			LOCK_TEXT       /* FIXME - borrowed from arm32 - check*/
+			*(.fixup)
+			*(.gnu.warning)
+			*(.rodata)
+			*(.rodata.*)
+			*(.glue_7)
+			*(.glue_7t)
+		*(.got)			/* Global offset table		*/
+
+		_etext = .;		/* End of text section		*/
+	}
+
+	. = ALIGN(16);
+	__ex_table : {			/* Exception table		*/
+		__start___ex_table = .;
+			*(__ex_table)
+		__stop___ex_table = .;
+	}
+
+	RODATA
+
+	_endtext = .;
+
+	. = DATAADDR;
+
+	_sdata = .;
+
+	.data : {
+		. = ALIGN(8192);
+		/*
+		 * first, the init thread union, aligned
+		 * to an 8192 byte boundary. (see arm26/kernel/init_task.c)
+		 * FIXME - sould this be 32K aligned on arm26?
+		 */
+		*(.init.task)
+
+		/*
+		 * The cacheline aligned data
+		 */
+		. = ALIGN(32);
+		*(.data.cacheline_aligned)
+
+		/*
+		 * and the usual data section
+		 */
+		*(.data)
+		CONSTRUCTORS
+
+		*(.init.data)
+
+		_edata = .;
+	}
+
+	.bss : {
+		__bss_start = .;	/* BSS				*/
+		*(.bss)
+		*(COMMON)
+		_end = . ;
+	}
+					/* Stabs debugging sections.	*/
+	.stab 0 : { *(.stab) }
+	.stabstr 0 : { *(.stabstr) }
+	.stab.excl 0 : { *(.stab.excl) }
+	.stab.exclstr 0 : { *(.stab.exclstr) }
+	.stab.index 0 : { *(.stab.index) }
+	.stab.indexstr 0 : { *(.stab.indexstr) }
+	.comment 0 : { *(.comment) }
+}
diff --git a/arch/arm26/kernel/vmlinux-arm26.lds.in b/arch/arm26/kernel/vmlinux-arm26.lds.in
new file mode 100644
index 00000000000..d1d3418d7eb
--- /dev/null
+++ b/arch/arm26/kernel/vmlinux-arm26.lds.in
@@ -0,0 +1,127 @@
+/* ld script to make ARM Linux kernel
+ * taken from the i386 version by Russell King
+ * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ * borrowed from Russels ARM port by Ian Molton and subsequently modified.
+ */
+
+#include <asm-generic/vmlinux.lds.h>
+
+OUTPUT_ARCH(arm)
+ENTRY(stext)
+jiffies = jiffies_64;
+SECTIONS
+{
+	. = TEXTADDR;
+	.init : {			/* Init code and data		*/
+		_stext = .;
+		__init_begin = .;
+			_sinittext = .;
+			*(.init.text)
+			_einittext = .;
+		__proc_info_begin = .;
+			*(.proc.info)
+		__proc_info_end = .;
+		__arch_info_begin = .;
+			*(.arch.info)
+		__arch_info_end = .;
+		__tagtable_begin = .;
+			*(.taglist)
+		__tagtable_end = .;
+			*(.init.data)
+		. = ALIGN(16);
+		__setup_start = .;
+			*(.init.setup)
+		__setup_end = .;
+		__early_begin = .;
+			*(__early_param)
+		__early_end = .;
+		__initcall_start = .;
+			*(.initcall1.init)
+			*(.initcall2.init)
+			*(.initcall3.init)
+			*(.initcall4.init)
+			*(.initcall5.init)
+			*(.initcall6.init)
+			*(.initcall7.init)
+		__initcall_end = .;
+		__con_initcall_start = .;
+			*(.con_initcall.init)
+		__con_initcall_end = .;
+		. = ALIGN(32);
+		__initramfs_start = .;
+			usr/built-in.o(.init.ramfs)
+		__initramfs_end = .;
+		. = ALIGN(32768);
+		__init_end = .;
+	}
+
+	/DISCARD/ : {			/* Exit code and data		*/
+		*(.exit.text)
+		*(.exit.data)
+		*(.exitcall.exit)
+	}
+
+	.text : {			/* Real text segment		*/
+		_text = .;		/* Text and read-only data	*/
+			*(.text)
+			SCHED_TEXT
+			LOCK_TEXT
+			*(.fixup)
+			*(.gnu.warning)
+			*(.rodata)
+			*(.rodata.*)
+			*(.glue_7)
+			*(.glue_7t)
+		*(.got)			/* Global offset table		*/
+
+		_etext = .;		/* End of text section		*/
+	}
+
+	. = ALIGN(16);
+	__ex_table : {			/* Exception table		*/
+		__start___ex_table = .;
+			*(__ex_table)
+		__stop___ex_table = .;
+	}
+
+	RODATA
+
+	. = ALIGN(8192);
+
+	.data : {
+		/*
+		 * first, the init task union, aligned
+		 * to an 8192 byte boundary. (see arm26/kernel/init_task.c)
+		 */
+		*(.init.task)
+
+		/*
+		 * The cacheline aligned data
+		 */
+		. = ALIGN(32);
+		*(.data.cacheline_aligned)
+
+		/*
+		 * and the usual data section
+		 */
+		*(.data)
+		CONSTRUCTORS
+
+		_edata = .;
+	}
+
+	.bss : {
+		__bss_start = .;	/* BSS				*/
+		*(.bss)
+		*(COMMON)
+		_end = . ;
+	}
+					/* Stabs debugging sections.	*/
+	.stab 0 : { *(.stab) }
+	.stabstr 0 : { *(.stabstr) }
+	.stab.excl 0 : { *(.stab.excl) }
+	.stab.exclstr 0 : { *(.stab.exclstr) }
+	.stab.index 0 : { *(.stab.index) }
+	.stab.indexstr 0 : { *(.stab.indexstr) }
+	.comment 0 : { *(.comment) }
+}
diff --git a/arch/arm26/kernel/vmlinux.lds.S b/arch/arm26/kernel/vmlinux.lds.S
new file mode 100644
index 00000000000..811a6904801
--- /dev/null
+++ b/arch/arm26/kernel/vmlinux.lds.S
@@ -0,0 +1,12 @@
+#include <linux/config.h>
+
+#ifdef CONFIG_XIP_KERNEL
+
+#include "vmlinux-arm26-xip.lds.in"
+
+#else
+
+#include "vmlinux-arm26.lds.in"
+
+#endif
+
diff --git a/arch/arm26/lib/Makefile b/arch/arm26/lib/Makefile
new file mode 100644
index 00000000000..6df2b793d36
--- /dev/null
+++ b/arch/arm26/lib/Makefile
@@ -0,0 +1,26 @@
+#
+# linux/arch/arm26/lib/Makefile
+#
+# Copyright (C) 1995-2000 Russell King
+#
+
+lib-y		:= backtrace.o changebit.o csumipv6.o csumpartial.o   \
+		   csumpartialcopy.o csumpartialcopyuser.o clearbit.o \
+		   copy_page.o delay.o findbit.o memchr.o memcpy.o    \
+		   memset.o memzero.o setbit.o                        \
+		   strchr.o strrchr.o testchangebit.o                 \
+		   testclearbit.o testsetbit.o getuser.o              \
+		   putuser.o ashldi3.o ashrdi3.o lshrdi3.o muldi3.o   \
+		   ucmpdi2.o udivdi3.o lib1funcs.o ecard.o io-acorn.o \
+		   floppydma.o io-readsb.o io-writesb.o io-writesl.o  \
+		   uaccess-kernel.o uaccess-user.o io-readsw.o        \
+		   io-writesw.o io-readsl.o ecard.o io-acorn.o        \
+		   floppydma.o
+
+lib-n		:=
+
+lib-$(CONFIG_VT)+= kbd.o
+
+csumpartialcopy.o: csumpartialcopygeneric.S
+csumpartialcopyuser.o: csumpartialcopygeneric.S
+
diff --git a/arch/arm26/lib/ashldi3.c b/arch/arm26/lib/ashldi3.c
new file mode 100644
index 00000000000..130f5a83966
--- /dev/null
+++ b/arch/arm26/lib/ashldi3.c
@@ -0,0 +1,61 @@
+/* More subroutines needed by GCC output code on some machines.  */
+/* Compile this one with gcc.  */
+/* Copyright (C) 1989, 92-98, 1999 Free Software Foundation, Inc.
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+/* As a special exception, if you link this library with other files,
+   some of which are compiled with GCC, to produce an executable,
+   this library does not by itself cause the resulting executable
+   to be covered by the GNU General Public License.
+   This exception does not however invalidate any other reasons why
+   the executable file might be covered by the GNU General Public License.
+ */
+/* support functions required by the kernel. based on code from gcc-2.95.3 */
+/* I Molton	29/07/01 */
+
+#include "gcclib.h"
+
+DItype
+__ashldi3 (DItype u, word_type b)
+{
+  DIunion w;
+  word_type bm;
+  DIunion uu;
+
+  if (b == 0)
+    return u;
+
+  uu.ll = u;
+
+  bm = (sizeof (SItype) * BITS_PER_UNIT) - b;
+  if (bm <= 0)
+    {
+      w.s.low = 0;
+      w.s.high = (USItype)uu.s.low << -bm;
+    }
+  else
+    {
+      USItype carries = (USItype)uu.s.low >> bm;
+      w.s.low = (USItype)uu.s.low << b;
+      w.s.high = ((USItype)uu.s.high << b) | carries;
+    }
+
+  return w.ll;
+}
+
diff --git a/arch/arm26/lib/ashrdi3.c b/arch/arm26/lib/ashrdi3.c
new file mode 100644
index 00000000000..71625d218f8
--- /dev/null
+++ b/arch/arm26/lib/ashrdi3.c
@@ -0,0 +1,61 @@
+/* More subroutines needed by GCC output code on some machines.  */
+/* Compile this one with gcc.  */
+/* Copyright (C) 1989, 92-98, 1999 Free Software Foundation, Inc.
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+/* As a special exception, if you link this library with other files,
+   some of which are compiled with GCC, to produce an executable,
+   this library does not by itself cause the resulting executable
+   to be covered by the GNU General Public License.
+   This exception does not however invalidate any other reasons why
+   the executable file might be covered by the GNU General Public License.
+ */
+/* support functions required by the kernel. based on code from gcc-2.95.3 */
+/* I Molton     29/07/01 */
+
+#include "gcclib.h"
+
+DItype
+__ashrdi3 (DItype u, word_type b)
+{
+  DIunion w;
+  word_type bm;
+  DIunion uu;
+
+  if (b == 0)
+    return u;
+
+  uu.ll = u;
+
+  bm = (sizeof (SItype) * BITS_PER_UNIT) - b;
+  if (bm <= 0)
+    {
+      /* w.s.high = 1..1 or 0..0 */
+      w.s.high = uu.s.high >> (sizeof (SItype) * BITS_PER_UNIT - 1);
+      w.s.low = uu.s.high >> -bm;
+    }
+  else
+    {
+      USItype carries = (USItype)uu.s.high << bm;
+      w.s.high = uu.s.high >> b;
+      w.s.low = ((USItype)uu.s.low >> b) | carries;
+    }
+
+  return w.ll;
+}
diff --git a/arch/arm26/lib/backtrace.S b/arch/arm26/lib/backtrace.S
new file mode 100644
index 00000000000..d793fe4339f
--- /dev/null
+++ b/arch/arm26/lib/backtrace.S
@@ -0,0 +1,145 @@
+/*
+ *  linux/arch/arm26/lib/backtrace.S
+ *
+ *  Copyright (C) 1995, 1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/config.h>
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+		.text
+
+@ fp is 0 or stack frame
+
+#define frame	r4
+#define next	r5
+#define save	r6
+#define mask	r7
+#define offset	r8
+
+ENTRY(__backtrace)
+		mov	r1, #0x10
+		mov	r0, fp
+
+ENTRY(c_backtrace)
+
+#ifdef CONFIG_NO_FRAME_POINTER
+		mov	pc, lr
+#else
+
+		stmfd	sp!, {r4 - r8, lr}	@ Save an extra register so we have a location...
+		mov	mask, #0xfc000003
+		tst	mask, r0
+		movne	r0, #0
+		movs	frame, r0
+1:		moveq	r0, #-2
+		LOADREGS(eqfd, sp!, {r4 - r8, pc})
+
+2:		stmfd	sp!, {pc}		@ calculate offset of PC in STMIA instruction
+		ldr	r0, [sp], #4
+		adr	r1, 2b - 4
+		sub	offset, r0, r1
+
+3:		tst	frame, mask		@ Check for address exceptions...
+		bne	1b
+
+1001:		ldr	next, [frame, #-12]	@ get fp
+1002:		ldr	r2, [frame, #-4]	@ get lr
+1003:		ldr	r3, [frame, #0]		@ get pc
+		sub	save, r3, offset	@ Correct PC for prefetching
+		bic	save, save, mask
+1004:		ldr	r1, [save, #0]		@ get instruction at function
+		mov	r1, r1, lsr #10
+		ldr	r3, .Ldsi+4
+		teq	r1, r3
+		subeq	save, save, #4
+		adr	r0, .Lfe
+		mov	r1, save
+		bic	r2, r2, mask
+		bl	printk			@ print pc and link register
+
+		ldr	r0, [frame, #-8]	@ get sp
+		sub	r0, r0, #4
+1005:		ldr	r1, [save, #4]		@ get instruction at function+4
+		mov	r3, r1, lsr #10
+		ldr	r2, .Ldsi+4
+		teq	r3, r2			@ Check for stmia sp!, {args}
+		addeq	save, save, #4		@ next instruction
+		bleq	.Ldumpstm
+
+		sub	r0, frame, #16
+1006:		ldr	r1, [save, #4]		@ Get 'stmia sp!, {rlist, fp, ip, lr, pc}' instruction
+		mov	r3, r1, lsr #10
+		ldr	r2, .Ldsi
+		teq	r3, r2
+		bleq	.Ldumpstm
+
+		teq	frame, next
+		movne	frame, next
+		teqne	frame, #0
+		bne	3b
+		LOADREGS(fd, sp!, {r4 - r8, pc})
+
+/*
+ * Fixup for LDMDB
+ */
+		.section .fixup,"ax"
+		.align	0
+1007:		ldr	r0, =.Lbad
+		mov	r1, frame
+		bl	printk
+		LOADREGS(fd, sp!, {r4 - r8, pc})
+		.ltorg
+		.previous
+		
+		.section __ex_table,"a"
+		.align	3
+		.long	1001b, 1007b
+		.long	1002b, 1007b
+		.long	1003b, 1007b
+		.long	1004b, 1007b
+		.long	1005b, 1007b
+		.long	1006b, 1007b
+		.previous
+
+#define instr r4
+#define reg   r5
+#define stack r6
+
+.Ldumpstm:	stmfd	sp!, {instr, reg, stack, r7, lr}
+		mov	stack, r0
+		mov	instr, r1
+		mov	reg, #9
+		mov	r7, #0
+1:		mov	r3, #1
+		tst	instr, r3, lsl reg
+		beq	2f
+		add	r7, r7, #1
+		teq	r7, #4
+		moveq	r7, #0
+		moveq	r3, #'\n'
+		movne	r3, #' '
+		ldr	r2, [stack], #-4
+		mov	r1, reg
+		adr	r0, .Lfp
+		bl	printk
+2:		subs	reg, reg, #1
+		bpl	1b
+		teq	r7, #0
+		adrne	r0, .Lcr
+		blne	printk
+		mov	r0, stack
+		LOADREGS(fd, sp!, {instr, reg, stack, r7, pc})
+
+.Lfe:		.asciz	"Function entered at [<%p>] from [<%p>]\n"
+.Lfp:		.asciz	" r%d = %08X%c"
+.Lcr:		.asciz	"\n"
+.Lbad:		.asciz	"Backtrace aborted due to bad frame pointer <%p>\n"
+		.align
+.Ldsi:		.word	0x00e92dd8 >> 2
+		.word	0x00e92d00 >> 2
+
+#endif
diff --git a/arch/arm26/lib/changebit.S b/arch/arm26/lib/changebit.S
new file mode 100644
index 00000000000..1b6a077be5a
--- /dev/null
+++ b/arch/arm26/lib/changebit.S
@@ -0,0 +1,28 @@
+/*
+ *  linux/arch/arm26/lib/changebit.S
+ *
+ *  Copyright (C) 1995-1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+                .text
+
+/* Purpose  : Function to change a bit
+ * Prototype: int change_bit(int bit, void *addr)
+ */
+ENTRY(_change_bit_be)
+		eor	r0, r0, #0x18		@ big endian byte ordering
+ENTRY(_change_bit_le)
+		and	r2, r0, #7
+		mov	r3, #1
+		mov	r3, r3, lsl r2
+		save_and_disable_irqs ip, r2
+		ldrb	r2, [r1, r0, lsr #3]
+		eor	r2, r2, r3
+		strb	r2, [r1, r0, lsr #3]
+		restore_irqs ip
+		RETINSTR(mov,pc,lr)
diff --git a/arch/arm26/lib/clearbit.S b/arch/arm26/lib/clearbit.S
new file mode 100644
index 00000000000..0a895b0c759
--- /dev/null
+++ b/arch/arm26/lib/clearbit.S
@@ -0,0 +1,31 @@
+/*
+ *  linux/arch/arm26/lib/clearbit.S
+ *
+ *  Copyright (C) 1995-1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+                .text
+
+/*
+ * Purpose  : Function to clear a bit
+ * Prototype: int clear_bit(int bit, void *addr)
+ */
+ENTRY(_clear_bit_be)
+		eor	r0, r0, #0x18		@ big endian byte ordering
+ENTRY(_clear_bit_le)
+		and	r2, r0, #7
+		mov	r3, #1
+		mov	r3, r3, lsl r2
+		save_and_disable_irqs ip, r2
+		ldrb	r2, [r1, r0, lsr #3]
+		bic	r2, r2, r3
+		strb	r2, [r1, r0, lsr #3]
+		restore_irqs ip
+		RETINSTR(mov,pc,lr)
+
+
diff --git a/arch/arm26/lib/copy_page.S b/arch/arm26/lib/copy_page.S
new file mode 100644
index 00000000000..2d79ee12ea1
--- /dev/null
+++ b/arch/arm26/lib/copy_page.S
@@ -0,0 +1,62 @@
+/*
+ *  linux/arch/arm26/lib/copypage.S
+ *
+ *  Copyright (C) 1995-1999 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  ASM optimised string functions
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/asm_offsets.h>
+
+		.text
+		.align	5
+/*
+ * ARMv3 optimised copy_user_page
+ *
+ * FIXME: rmk do we need to handle cache stuff...
+ * FIXME: im is this right on ARM26?
+ */
+ENTRY(__copy_user_page)
+	stmfd	sp!, {r4, lr}			@	2
+	mov	r2, #PAGE_SZ/64			@	1
+	ldmia	r1!, {r3, r4, ip, lr}		@	4+1
+1:	stmia	r0!, {r3, r4, ip, lr}		@	4
+	ldmia	r1!, {r3, r4, ip, lr}		@	4+1
+	stmia	r0!, {r3, r4, ip, lr}		@	4
+	ldmia	r1!, {r3, r4, ip, lr}		@	4+1
+	stmia	r0!, {r3, r4, ip, lr}		@	4
+	ldmia	r1!, {r3, r4, ip, lr}		@	4
+	subs	r2, r2, #1			@	1
+	stmia	r0!, {r3, r4, ip, lr}		@	4
+	ldmneia	r1!, {r3, r4, ip, lr}		@	4
+	bne	1b				@	1
+	LOADREGS(fd, sp!, {r4, pc})		@	3
+
+	.align	5
+/*
+ * ARMv3 optimised clear_user_page
+ *
+ * FIXME: rmk do we need to handle cache stuff...
+ */
+ENTRY(__clear_user_page)
+	str	lr, [sp, #-4]!
+	mov	r1, #PAGE_SZ/64			@ 1
+	mov	r2, #0				@ 1
+	mov	r3, #0				@ 1
+	mov	ip, #0				@ 1
+	mov	lr, #0				@ 1
+1:	stmia	r0!, {r2, r3, ip, lr}		@ 4
+	stmia	r0!, {r2, r3, ip, lr}		@ 4
+	stmia	r0!, {r2, r3, ip, lr}		@ 4
+	stmia	r0!, {r2, r3, ip, lr}		@ 4
+	subs	r1, r1, #1			@ 1
+	bne	1b				@ 1
+	ldr	pc, [sp], #4
+
+	.section ".init.text", #alloc, #execinstr
+
diff --git a/arch/arm26/lib/csumipv6.S b/arch/arm26/lib/csumipv6.S
new file mode 100644
index 00000000000..62831155acd
--- /dev/null
+++ b/arch/arm26/lib/csumipv6.S
@@ -0,0 +1,32 @@
+/*
+ *  linux/arch/arm26/lib/csumipv6.S
+ *
+ *  Copyright (C) 1995-1998 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+		.text
+
+ENTRY(__csum_ipv6_magic)
+		str	lr, [sp, #-4]!
+		adds	ip, r2, r3
+		ldmia	r1, {r1 - r3, lr}
+		adcs	ip, ip, r1
+		adcs	ip, ip, r2
+		adcs	ip, ip, r3
+		adcs	ip, ip, lr
+		ldmia	r0, {r0 - r3}
+		adcs	r0, ip, r0
+		adcs	r0, r0, r1
+		adcs	r0, r0, r2
+		ldr	r2, [sp, #4]
+		adcs	r0, r0, r3
+		adcs	r0, r0, r2
+		adcs	r0, r0, #0
+		LOADREGS(fd, sp!, {pc})
+
diff --git a/arch/arm26/lib/csumpartial.S b/arch/arm26/lib/csumpartial.S
new file mode 100644
index 00000000000..e53e7109e62
--- /dev/null
+++ b/arch/arm26/lib/csumpartial.S
@@ -0,0 +1,130 @@
+/*
+ *  linux/arch/arm26/lib/csumpartial.S
+ *
+ *  Copyright (C) 1995-1998 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+		.text
+
+/*
+ * Function: __u32 csum_partial(const char *src, int len, __u32 sum)
+ * Params  : r0 = buffer, r1 = len, r2 = checksum
+ * Returns : r0 = new checksum
+ */
+
+buf	.req	r0
+len	.req	r1
+sum	.req	r2
+td0	.req	r3
+td1	.req	r4	@ save before use
+td2	.req	r5	@ save before use
+td3	.req	lr
+
+.zero:		mov	r0, sum
+		add	sp, sp, #4
+		ldr	pc, [sp], #4
+
+		/*
+		 * Handle 0 to 7 bytes, with any alignment of source and
+		 * destination pointers.  Note that when we get here, C = 0
+		 */
+.less8:		teq	len, #0			@ check for zero count
+		beq	.zero
+
+		/* we must have at least one byte. */
+		tst	buf, #1			@ odd address?
+		ldrneb	td0, [buf], #1
+		subne	len, len, #1
+		adcnes	sum, sum, td0, lsl #byte(1)
+
+.less4:		tst	len, #6
+		beq	.less8_byte
+
+		/* we are now half-word aligned */
+
+.less8_wordlp:
+#if __LINUX_ARM_ARCH__ >= 4
+		ldrh	td0, [buf], #2
+		sub	len, len, #2
+#else
+		ldrb	td0, [buf], #1
+		ldrb	td3, [buf], #1
+		sub	len, len, #2
+		orr	td0, td0, td3, lsl #8
+#endif
+		adcs	sum, sum, td0
+		tst	len, #6
+		bne	.less8_wordlp
+
+.less8_byte:	tst	len, #1			@ odd number of bytes
+		ldrneb	td0, [buf], #1		@ include last byte
+		adcnes	sum, sum, td0, lsl #byte(0)	@ update checksum
+
+.done:		adc	r0, sum, #0		@ collect up the last carry
+		ldr	td0, [sp], #4
+		tst	td0, #1			@ check buffer alignment
+		movne	td0, r0, lsl #8		@ rotate checksum by 8 bits
+		orrne	r0, td0, r0, lsr #24
+		ldr	pc, [sp], #4		@ return
+
+.not_aligned:	tst	buf, #1			@ odd address
+		ldrneb	td0, [buf], #1		@ make even
+		subne	len, len, #1
+		adcnes	sum, sum, td0, lsl #byte(1)	@ update checksum
+
+		tst	buf, #2			@ 32-bit aligned?
+#if __LINUX_ARM_ARCH__ >= 4
+		ldrneh	td0, [buf], #2		@ make 32-bit aligned
+		subne	len, len, #2
+#else
+		ldrneb	td0, [buf], #1
+		ldrneb	ip, [buf], #1
+		subne	len, len, #2
+		orrne	td0, td0, ip, lsl #8
+#endif
+		adcnes	sum, sum, td0		@ update checksum
+		mov	pc, lr
+
+ENTRY(csum_partial)
+		stmfd	sp!, {buf, lr}
+		cmp	len, #8			@ Ensure that we have at least
+		blo	.less8			@ 8 bytes to copy.
+
+		adds	sum, sum, #0		@ C = 0
+		tst	buf, #3			@ Test destination alignment
+		blne	.not_aligned		@ aligh destination, return here
+
+1:		bics	ip, len, #31
+		beq	3f
+
+		stmfd	sp!, {r4 - r5}
+2:		ldmia	buf!, {td0, td1, td2, td3}
+		adcs	sum, sum, td0
+		adcs	sum, sum, td1
+		adcs	sum, sum, td2
+		adcs	sum, sum, td3
+		ldmia	buf!, {td0, td1, td2, td3}
+		adcs	sum, sum, td0
+		adcs	sum, sum, td1
+		adcs	sum, sum, td2
+		adcs	sum, sum, td3
+		sub	ip, ip, #32
+		teq	ip, #0
+		bne	2b
+		ldmfd	sp!, {r4 - r5}
+
+3:		tst	len, #0x1c		@ should not change C
+		beq	.less4
+
+4:		ldr	td0, [buf], #4
+		sub	len, len, #4
+		adcs	sum, sum, td0
+		tst	len, #0x1c
+		bne	4b
+		b	.less4
diff --git a/arch/arm26/lib/csumpartialcopy.S b/arch/arm26/lib/csumpartialcopy.S
new file mode 100644
index 00000000000..a1c4b5fdd49
--- /dev/null
+++ b/arch/arm26/lib/csumpartialcopy.S
@@ -0,0 +1,52 @@
+/*
+ *  linux/arch/arm26/lib/csumpartialcopy.S
+ *
+ *  Copyright (C) 1995-1998 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+		.text
+
+/* Function: __u32 csum_partial_copy_nocheck(const char *src, char *dst, int len, __u32 sum)
+ * Params  : r0 = src, r1 = dst, r2 = len, r3 = checksum
+ * Returns : r0 = new checksum
+ */
+
+		.macro	save_regs
+		stmfd	sp!, {r1, r4 - r8, fp, ip, lr, pc}
+		.endm
+
+		.macro	load_regs,flags
+		LOADREGS(\flags,fp,{r1, r4 - r8, fp, sp, pc})
+		.endm
+
+		.macro	load1b, reg1
+		ldrb	\reg1, [r0], #1
+		.endm
+
+		.macro	load2b, reg1, reg2
+		ldrb	\reg1, [r0], #1
+		ldrb	\reg2, [r0], #1
+		.endm
+
+		.macro	load1l, reg1
+		ldr	\reg1, [r0], #4
+		.endm
+
+		.macro	load2l, reg1, reg2
+		ldr	\reg1, [r0], #4
+		ldr	\reg2, [r0], #4
+		.endm
+
+		.macro	load4l, reg1, reg2, reg3, reg4
+		ldmia	r0!, {\reg1, \reg2, \reg3, \reg4}
+		.endm
+
+#define FN_ENTRY	ENTRY(csum_partial_copy_nocheck)
+
+#include "csumpartialcopygeneric.S"
diff --git a/arch/arm26/lib/csumpartialcopygeneric.S b/arch/arm26/lib/csumpartialcopygeneric.S
new file mode 100644
index 00000000000..5249c3ad11d
--- /dev/null
+++ b/arch/arm26/lib/csumpartialcopygeneric.S
@@ -0,0 +1,352 @@
+/*
+ *  linux/arch/arm26/lib/csumpartialcopygeneric.S
+ *
+ *  Copyright (C) 1995-2001 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * JMA 01/06/03 Commented out some shl0s; probobly irrelevant to arm26 
+ *
+ */
+
+/*
+ * unsigned int
+ * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, )
+ *  r0 = src, r1 = dst, r2 = len, r3 = sum
+ *  Returns : r0 = checksum
+ *
+ * Note that 'tst' and 'teq' preserve the carry flag.
+ */
+
+/* Quick hack */
+                .macro  save_regs
+                stmfd   sp!, {r1, r4 - r8, fp, ip, lr, pc}
+                .endm
+
+/* end Quick Hack */
+
+src	.req	r0
+dst	.req	r1
+len	.req	r2
+sum	.req	r3
+
+.zero:		mov	r0, sum
+		load_regs	ea
+
+		/*
+		 * Align an unaligned destination pointer.  We know that
+		 * we have >= 8 bytes here, so we don't need to check
+		 * the length.  Note that the source pointer hasn't been
+		 * aligned yet.
+		 */
+.dst_unaligned:	tst	dst, #1
+		beq	.dst_16bit
+
+		load1b	ip
+		sub	len, len, #1
+		adcs	sum, sum, ip, lsl #byte(1)	@ update checksum
+		strb	ip, [dst], #1
+		tst	dst, #2
+		moveq	pc, lr			@ dst is now 32bit aligned
+
+.dst_16bit:	load2b	r8, ip
+		sub	len, len, #2
+		adcs	sum, sum, r8, lsl #byte(0)
+		strb	r8, [dst], #1
+		adcs	sum, sum, ip, lsl #byte(1)
+		strb	ip, [dst], #1
+		mov	pc, lr			@ dst is now 32bit aligned
+
+		/*
+		 * Handle 0 to 7 bytes, with any alignment of source and
+		 * destination pointers.  Note that when we get here, C = 0
+		 */
+.less8:		teq	len, #0			@ check for zero count
+		beq	.zero
+
+		/* we must have at least one byte. */
+		tst	dst, #1			@ dst 16-bit aligned
+		beq	.less8_aligned
+
+		/* Align dst */
+		load1b	ip
+		sub	len, len, #1
+		adcs	sum, sum, ip, lsl #byte(1)	@ update checksum
+		strb	ip, [dst], #1
+		tst	len, #6
+		beq	.less8_byteonly
+
+1:		load2b	r8, ip
+		sub	len, len, #2
+		adcs	sum, sum, r8, lsl #byte(0)
+		strb	r8, [dst], #1
+		adcs	sum, sum, ip, lsl #byte(1)
+		strb	ip, [dst], #1
+.less8_aligned:	tst	len, #6
+		bne	1b
+.less8_byteonly:
+		tst	len, #1
+		beq	.done
+		load1b	r8
+		adcs	sum, sum, r8, lsl #byte(0)	@ update checksum
+		strb	r8, [dst], #1
+		b	.done
+
+FN_ENTRY
+		mov	ip, sp
+		save_regs
+		sub	fp, ip, #4
+
+		cmp	len, #8			@ Ensure that we have at least
+		blo	.less8			@ 8 bytes to copy.
+
+		adds	sum, sum, #0		@ C = 0
+		tst	dst, #3			@ Test destination alignment
+		blne	.dst_unaligned		@ align destination, return here
+
+		/*
+		 * Ok, the dst pointer is now 32bit aligned, and we know
+		 * that we must have more than 4 bytes to copy.  Note
+		 * that C contains the carry from the dst alignment above.
+		 */
+
+		tst	src, #3			@ Test source alignment
+		bne	.src_not_aligned
+
+		/* Routine for src & dst aligned */
+
+		bics	ip, len, #15
+		beq	2f
+
+1:		load4l	r4, r5, r6, r7
+		stmia	dst!, {r4, r5, r6, r7}
+		adcs	sum, sum, r4
+		adcs	sum, sum, r5
+		adcs	sum, sum, r6
+		adcs	sum, sum, r7
+		sub	ip, ip, #16
+		teq	ip, #0
+		bne	1b
+
+2:		ands	ip, len, #12
+		beq	4f
+		tst	ip, #8
+		beq	3f
+		load2l	r4, r5
+		stmia	dst!, {r4, r5}
+		adcs	sum, sum, r4
+		adcs	sum, sum, r5
+		tst	ip, #4
+		beq	4f
+
+3:		load1l	r4
+		str	r4, [dst], #4
+		adcs	sum, sum, r4
+
+4:		ands	len, len, #3
+		beq	.done
+		load1l	r4
+		tst	len, #2
+/*		mov	r5, r4, lsr #byte(0)
+FIXME? 0 Shift anyhow!
+*/
+		beq	.exit
+		adcs	sum, sum, r4, push #16
+		strb	r5, [dst], #1
+		mov	r5, r4, lsr #byte(1)
+		strb	r5, [dst], #1
+		mov	r5, r4, lsr #byte(2)
+.exit:		tst	len, #1
+		strneb	r5, [dst], #1
+		andne	r5, r5, #255
+		adcnes	sum, sum, r5, lsl #byte(0)
+
+		/*
+		 * If the dst pointer was not 16-bit aligned, we
+		 * need to rotate the checksum here to get around
+		 * the inefficient byte manipulations in the
+		 * architecture independent code.
+		 */
+.done:		adc	r0, sum, #0
+		ldr	sum, [sp, #0]		@ dst
+		tst	sum, #1
+		movne	sum, r0, lsl #8
+		orrne	r0, sum, r0, lsr #24
+		load_regs	ea
+
+.src_not_aligned:
+		adc	sum, sum, #0		@ include C from dst alignment
+		and	ip, src, #3
+		bic	src, src, #3
+		load1l	r5
+		cmp	ip, #2
+		beq	.src2_aligned
+		bhi	.src3_aligned
+		mov	r4, r5, pull #8		@ C = 0
+		bics	ip, len, #15
+		beq	2f
+1:		load4l	r5, r6, r7, r8
+		orr	r4, r4, r5, push #24
+		mov	r5, r5, pull #8
+		orr	r5, r5, r6, push #24
+		mov	r6, r6, pull #8
+		orr	r6, r6, r7, push #24
+		mov	r7, r7, pull #8
+		orr	r7, r7, r8, push #24
+		stmia	dst!, {r4, r5, r6, r7}
+		adcs	sum, sum, r4
+		adcs	sum, sum, r5
+		adcs	sum, sum, r6
+		adcs	sum, sum, r7
+		mov	r4, r8, pull #8
+		sub	ip, ip, #16
+		teq	ip, #0
+		bne	1b
+2:		ands	ip, len, #12
+		beq	4f
+		tst	ip, #8
+		beq	3f
+		load2l	r5, r6
+		orr	r4, r4, r5, push #24
+		mov	r5, r5, pull #8
+		orr	r5, r5, r6, push #24
+		stmia	dst!, {r4, r5}
+		adcs	sum, sum, r4
+		adcs	sum, sum, r5
+		mov	r4, r6, pull #8
+		tst	ip, #4
+		beq	4f
+3:		load1l	r5
+		orr	r4, r4, r5, push #24
+		str	r4, [dst], #4
+		adcs	sum, sum, r4
+		mov	r4, r5, pull #8
+4:		ands	len, len, #3
+		beq	.done
+/*		mov	r5, r4, lsr #byte(0)
+FIXME? 0 Shift anyhow
+*/
+		tst	len, #2
+		beq	.exit
+		adcs	sum, sum, r4, push #16
+		strb	r5, [dst], #1
+		mov	r5, r4, lsr #byte(1)
+		strb	r5, [dst], #1
+		mov	r5, r4, lsr #byte(2)
+		b	.exit
+
+.src2_aligned:	mov	r4, r5, pull #16
+		adds	sum, sum, #0
+		bics	ip, len, #15
+		beq	2f
+1:		load4l	r5, r6, r7, r8
+		orr	r4, r4, r5, push #16
+		mov	r5, r5, pull #16
+		orr	r5, r5, r6, push #16
+		mov	r6, r6, pull #16
+		orr	r6, r6, r7, push #16
+		mov	r7, r7, pull #16
+		orr	r7, r7, r8, push #16
+		stmia	dst!, {r4, r5, r6, r7}
+		adcs	sum, sum, r4
+		adcs	sum, sum, r5
+		adcs	sum, sum, r6
+		adcs	sum, sum, r7
+		mov	r4, r8, pull #16
+		sub	ip, ip, #16
+		teq	ip, #0
+		bne	1b
+2:		ands	ip, len, #12
+		beq	4f
+		tst	ip, #8
+		beq	3f
+		load2l	r5, r6
+		orr	r4, r4, r5, push #16
+		mov	r5, r5, pull #16
+		orr	r5, r5, r6, push #16
+		stmia	dst!, {r4, r5}
+		adcs	sum, sum, r4
+		adcs	sum, sum, r5
+		mov	r4, r6, pull #16
+		tst	ip, #4
+		beq	4f
+3:		load1l	r5
+		orr	r4, r4, r5, push #16
+		str	r4, [dst], #4
+		adcs	sum, sum, r4
+		mov	r4, r5, pull #16
+4:		ands	len, len, #3
+		beq	.done
+/*		mov	r5, r4, lsr #byte(0)
+FIXME? 0 Shift anyhow
+*/
+		tst	len, #2
+		beq	.exit
+		adcs	sum, sum, r4
+		strb	r5, [dst], #1
+		mov	r5, r4, lsr #byte(1)
+		strb	r5, [dst], #1
+		tst	len, #1
+		beq	.done
+		load1b	r5
+		b	.exit
+
+.src3_aligned:	mov	r4, r5, pull #24
+		adds	sum, sum, #0
+		bics	ip, len, #15
+		beq	2f
+1:		load4l	r5, r6, r7, r8
+		orr	r4, r4, r5, push #8
+		mov	r5, r5, pull #24
+		orr	r5, r5, r6, push #8
+		mov	r6, r6, pull #24
+		orr	r6, r6, r7, push #8
+		mov	r7, r7, pull #24
+		orr	r7, r7, r8, push #8
+		stmia	dst!, {r4, r5, r6, r7}
+		adcs	sum, sum, r4
+		adcs	sum, sum, r5
+		adcs	sum, sum, r6
+		adcs	sum, sum, r7
+		mov	r4, r8, pull #24
+		sub	ip, ip, #16
+		teq	ip, #0
+		bne	1b
+2:		ands	ip, len, #12
+		beq	4f
+		tst	ip, #8
+		beq	3f
+		load2l	r5, r6
+		orr	r4, r4, r5, push #8
+		mov	r5, r5, pull #24
+		orr	r5, r5, r6, push #8
+		stmia	dst!, {r4, r5}
+		adcs	sum, sum, r4
+		adcs	sum, sum, r5
+		mov	r4, r6, pull #24
+		tst	ip, #4
+		beq	4f
+3:		load1l	r5
+		orr	r4, r4, r5, push #8
+		str	r4, [dst], #4
+		adcs	sum, sum, r4
+		mov	r4, r5, pull #24
+4:		ands	len, len, #3
+		beq	.done
+/*		mov	r5, r4, lsr #byte(0)
+FIXME? 0 Shift anyhow
+*/
+		tst	len, #2
+		beq	.exit
+		strb	r5, [dst], #1
+		adcs	sum, sum, r4
+		load1l	r4
+/*		mov	r5, r4, lsr #byte(0)
+FIXME? 0 Shift anyhow
+*/
+		strb	r5, [dst], #1
+		adcs	sum, sum, r4, push #24
+		mov	r5, r4, lsr #byte(1)
+		b	.exit
diff --git a/arch/arm26/lib/csumpartialcopyuser.S b/arch/arm26/lib/csumpartialcopyuser.S
new file mode 100644
index 00000000000..5b821188e47
--- /dev/null
+++ b/arch/arm26/lib/csumpartialcopyuser.S
@@ -0,0 +1,115 @@
+/*
+ *  linux/arch/arm26/lib/csumpartialcopyuser.S
+ *
+ *  Copyright (C) 1995-1998 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/config.h>
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/errno.h>
+#include <asm/asm_offsets.h>
+
+		.text
+
+		.macro	save_regs
+		stmfd	sp!, {r1 - r2, r4 - r9, fp, ip, lr, pc}
+		mov	r9, sp, lsr #13
+		mov	r9, r9, lsl #13
+		ldr	r9, [r9, #TSK_ADDR_LIMIT]
+		mov	r9, r9, lsr #24
+		.endm
+
+		.macro	load_regs,flags
+		ldm\flags	fp, {r1, r2, r4-r9, fp, sp, pc}^
+		.endm
+
+		.macro	load1b,	reg1
+		tst	r9, #0x01
+9999:		ldreqbt	\reg1, [r0], #1
+		ldrneb	\reg1, [r0], #1
+		.section __ex_table, "a"
+		.align	3
+		.long	9999b, 6001f
+		.previous
+		.endm
+
+		.macro	load2b, reg1, reg2
+		tst	r9, #0x01
+9999:		ldreqbt	\reg1, [r0], #1
+		ldrneb	\reg1, [r0], #1
+9998:		ldreqbt	\reg2, [r0], #1
+		ldrneb	\reg2, [r0], #1
+		.section __ex_table, "a"
+		.long	9999b, 6001f
+		.long	9998b, 6001f
+		.previous
+		.endm
+
+		.macro	load1l, reg1
+		tst	r9, #0x01
+9999:		ldreqt	\reg1, [r0], #4
+		ldrne	\reg1, [r0], #4
+		.section __ex_table, "a"
+		.align	3
+		.long	9999b, 6001f
+		.previous
+		.endm
+
+		.macro	load2l, reg1, reg2
+		tst	r9, #0x01
+		ldmneia	r0!, {\reg1, \reg2}
+9999:		ldreqt	\reg1, [r0], #4
+9998:		ldreqt	\reg2, [r0], #4
+		.section __ex_table, "a"
+		.long	9999b, 6001f
+		.long	9998b, 6001f
+		.previous
+		.endm
+
+		.macro	load4l, reg1, reg2, reg3, reg4
+		tst	r9, #0x01
+		ldmneia	r0!, {\reg1, \reg2, \reg3, \reg4}
+9999:		ldreqt	\reg1, [r0], #4
+9998:		ldreqt	\reg2, [r0], #4
+9997:		ldreqt	\reg3, [r0], #4
+9996:		ldreqt	\reg4, [r0], #4
+		.section __ex_table, "a"
+		.long	9999b, 6001f
+		.long	9998b, 6001f
+		.long	9997b, 6001f
+		.long	9996b, 6001f
+		.previous
+		.endm
+
+/*
+ * unsigned int
+ * csum_partial_copy_from_user(const char *src, char *dst, int len, int sum, int *err_ptr)
+ *  r0 = src, r1 = dst, r2 = len, r3 = sum, [sp] = *err_ptr
+ *  Returns : r0 = checksum, [[sp, #0], #0] = 0 or -EFAULT
+ */
+
+#define FN_ENTRY	ENTRY(csum_partial_copy_from_user)
+
+#include "csumpartialcopygeneric.S"
+
+/*
+ * FIXME: minor buglet here
+ * We don't return the checksum for the data present in the buffer.  To do
+ * so properly, we would have to add in whatever registers were loaded before
+ * the fault, which, with the current asm above is not predictable.
+ */
+		.align	4
+6001:		mov	r4, #-EFAULT
+		ldr	r5, [fp, #4]		@ *err_ptr
+		str	r4, [r5]
+		ldmia	sp, {r1, r2}		@ retrieve dst, len
+		add	r2, r2, r1
+		mov	r0, #0			@ zero the buffer
+6002:		teq	r2, r1
+		strneb	r0, [r1], #1
+		bne	6002b
+		load_regs	ea
diff --git a/arch/arm26/lib/delay.S b/arch/arm26/lib/delay.S
new file mode 100644
index 00000000000..66f2b68e1b1
--- /dev/null
+++ b/arch/arm26/lib/delay.S
@@ -0,0 +1,57 @@
+/*
+ *  linux/arch/arm26/lib/delay.S
+ *
+ *  Copyright (C) 1995, 1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+		.text
+
+LC0:		.word	loops_per_jiffy
+
+/*
+ * 0 <= r0 <= 2000
+ */
+ENTRY(udelay)
+		mov	r2,     #0x6800
+		orr	r2, r2, #0x00db
+		mul	r1, r0, r2
+		ldr	r2, LC0
+		ldr	r2, [r2]
+		mov	r1, r1, lsr #11
+		mov	r2, r2, lsr #11
+		mul	r0, r1, r2
+		movs	r0, r0, lsr #6
+		RETINSTR(moveq,pc,lr)
+
+/*
+ * loops = (r0 * 0x10c6 * 100 * loops_per_jiffy) / 2^32
+ *
+ * Oh, if only we had a cycle counter...
+ */
+
+@ Delay routine
+ENTRY(__delay)
+		subs	r0, r0, #1
+#if 0
+		RETINSTR(movls,pc,lr)
+		subs	r0, r0, #1
+		RETINSTR(movls,pc,lr)
+		subs	r0, r0, #1
+		RETINSTR(movls,pc,lr)
+		subs	r0, r0, #1
+		RETINSTR(movls,pc,lr)
+		subs	r0, r0, #1
+		RETINSTR(movls,pc,lr)
+		subs	r0, r0, #1
+		RETINSTR(movls,pc,lr)
+		subs	r0, r0, #1
+		RETINSTR(movls,pc,lr)
+		subs	r0, r0, #1
+#endif
+		bhi	__delay
+		RETINSTR(mov,pc,lr)
diff --git a/arch/arm26/lib/ecard.S b/arch/arm26/lib/ecard.S
new file mode 100644
index 00000000000..b4633150f01
--- /dev/null
+++ b/arch/arm26/lib/ecard.S
@@ -0,0 +1,41 @@
+/*
+ *  linux/arch/arm26/lib/ecard.S
+ *
+ *  Copyright (C) 1995, 1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/config.h> /* for CONFIG_CPU_nn */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/hardware.h>
+
+#define CPSR2SPSR(rt)
+
+@ Purpose: call an expansion card loader to read bytes.
+@ Proto  : char read_loader(int offset, char *card_base, char *loader);
+@ Returns: byte read
+
+ENTRY(ecard_loader_read)
+		stmfd	sp!, {r4 - r12, lr}
+		mov	r11, r1
+		mov	r1, r0
+		CPSR2SPSR(r0)
+		mov	lr, pc
+		mov	pc, r2
+		LOADREGS(fd, sp!, {r4 - r12, pc})
+
+@ Purpose: call an expansion card loader to reset the card
+@ Proto  : void read_loader(int card_base, char *loader);
+@ Returns: byte read
+
+ENTRY(ecard_loader_reset)
+		stmfd	sp!, {r4 - r12, lr}
+		mov	r11, r0
+		CPSR2SPSR(r0)
+		mov	lr, pc
+		add	pc, r1, #8
+		LOADREGS(fd, sp!, {r4 - r12, pc})
+
diff --git a/arch/arm26/lib/findbit.S b/arch/arm26/lib/findbit.S
new file mode 100644
index 00000000000..26f67cccc37
--- /dev/null
+++ b/arch/arm26/lib/findbit.S
@@ -0,0 +1,67 @@
+/*
+ *  linux/arch/arm/lib/findbit.S
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * 16th March 2001 - John Ripley <jripley@sonicblue.com>
+ *   Fixed so that "size" is an exclusive not an inclusive quantity.
+ *   All users of these functions expect exclusive sizes, and may
+ *   also call with zero size.
+ * Reworked by rmk.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+                .text
+
+/*
+ * Purpose  : Find a 'zero' bit
+ * Prototype: int find_first_zero_bit(void *addr, unsigned int maxbit);
+ */
+ENTRY(_find_first_zero_bit_le)
+		teq	r1, #0	
+		beq	3f
+		mov	r2, #0
+1:		ldrb	r3, [r0, r2, lsr #3]
+		eors	r3, r3, #0xff		@ invert bits
+		bne	.found			@ any now set - found zero bit
+		add	r2, r2, #8		@ next bit pointer
+2:		cmp	r2, r1			@ any more?
+		blo	1b
+3:		mov	r0, r1			@ no free bits
+		RETINSTR(mov,pc,lr)
+
+/*
+ * Purpose  : Find next 'zero' bit
+ * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset)
+ */
+ENTRY(_find_next_zero_bit_le)
+		teq	r1, #0
+		beq	2b
+		ands	ip, r2, #7
+		beq	1b			@ If new byte, goto old routine
+		ldrb	r3, [r0, r2, lsr #3]
+		eor	r3, r3, #0xff		@ now looking for a 1 bit
+		movs	r3, r3, lsr ip		@ shift off unused bits
+		bne	.found
+		orr	r2, r2, #7		@ if zero, then no bits here
+		add	r2, r2, #1		@ align bit pointer
+		b	2b			@ loop for next bit
+
+/*
+ * One or more bits in the LSB of r3 are assumed to be set.
+ */
+.found:		tst	r3, #0x0f
+		addeq	r2, r2, #4
+		movne	r3, r3, lsl #4
+		tst	r3, #0x30
+		addeq	r2, r2, #2
+		movne	r3, r3, lsl #2
+		tst	r3, #0x40
+		addeq	r2, r2, #1
+		mov	r0, r2
+		RETINSTR(mov,pc,lr)
+
diff --git a/arch/arm26/lib/floppydma.S b/arch/arm26/lib/floppydma.S
new file mode 100644
index 00000000000..e99ebbb2035
--- /dev/null
+++ b/arch/arm26/lib/floppydma.S
@@ -0,0 +1,32 @@
+/*
+ *  linux/arch/arm26/lib/floppydma.S
+ *
+ *  Copyright (C) 1995, 1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+		.text
+
+		.global	floppy_fiqin_end
+ENTRY(floppy_fiqin_start)
+		subs	r9, r9, #1
+		ldrgtb	r12, [r11, #-4]
+		ldrleb	r12, [r11], #0
+		strb	r12, [r10], #1
+		subs	pc, lr, #4
+floppy_fiqin_end:
+
+		.global	floppy_fiqout_end
+ENTRY(floppy_fiqout_start)
+		subs	r9, r9, #1
+		ldrgeb	r12, [r10], #1
+		movlt	r12, #0
+		strleb	r12, [r11], #0
+		subles	pc, lr, #4
+		strb	r12, [r11, #-4]
+		subs	pc, lr, #4
+floppy_fiqout_end:
diff --git a/arch/arm26/lib/gcclib.h b/arch/arm26/lib/gcclib.h
new file mode 100644
index 00000000000..9895e78904b
--- /dev/null
+++ b/arch/arm26/lib/gcclib.h
@@ -0,0 +1,21 @@
+/* gcclib.h -- definitions for various functions 'borrowed' from gcc-2.95.3 */
+/* I Molton     29/07/01 */
+
+#define BITS_PER_UNIT  8
+#define SI_TYPE_SIZE (sizeof (SItype) * BITS_PER_UNIT)
+
+typedef unsigned int UQItype    __attribute__ ((mode (QI)));
+typedef          int SItype     __attribute__ ((mode (SI)));
+typedef unsigned int USItype    __attribute__ ((mode (SI)));
+typedef          int DItype     __attribute__ ((mode (DI)));
+typedef          int word_type 	__attribute__ ((mode (__word__)));
+typedef unsigned int UDItype    __attribute__ ((mode (DI)));
+
+struct DIstruct {SItype low, high;};
+
+typedef union
+{
+  struct DIstruct s;
+  DItype ll;
+} DIunion;
+
diff --git a/arch/arm26/lib/getuser.S b/arch/arm26/lib/getuser.S
new file mode 100644
index 00000000000..e6d59b33485
--- /dev/null
+++ b/arch/arm26/lib/getuser.S
@@ -0,0 +1,112 @@
+/*
+ *  linux/arch/arm26/lib/getuser.S
+ *
+ *  Copyright (C) 2001 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  Idea from x86 version, (C) Copyright 1998 Linus Torvalds
+ *
+ * These functions have a non-standard call interface to make them more
+ * efficient, especially as they return an error value in addition to
+ * the "real" return value.
+ *
+ * __get_user_X
+ *
+ * Inputs:	r0 contains the address
+ * Outputs:	r0 is the error code
+ *		r1, r2 contains the zero-extended value
+ *		lr corrupted
+ *
+ * No other registers must be altered.  (see include/asm-arm/uaccess.h
+ * for specific ASM register usage).
+ *
+ * Note that ADDR_LIMIT is either 0 or 0xc0000000.
+ * Note also that it is intended that __get_user_bad is not global.
+ */
+#include <asm/asm_offsets.h>
+#include <asm/thread_info.h>
+#include <asm/errno.h>
+
+        .global __get_user_1
+__get_user_1:
+        bic     r1, sp, #0x1f00
+        bic     r1, r1, #0x00ff
+        str     lr, [sp, #-4]!
+        ldr     r1, [r1, #TI_ADDR_LIMIT]
+        sub     r1, r1, #1
+        cmp     r0, r1
+        bge     __get_user_bad
+        cmp     r0, #0x02000000
+1:      ldrlsbt r1, [r0]
+        ldrgeb  r1, [r0]
+        mov     r0, #0
+        ldmfd   sp!, {pc}^
+
+        .global __get_user_2
+__get_user_2:
+        bic     r2, sp, #0x1f00
+        bic     r2, r2, #0x00ff
+        str     lr, [sp, #-4]!
+        ldr     r2, [r2, #TI_ADDR_LIMIT]
+        sub     r2, r2, #2
+        cmp     r0, r2
+        bge     __get_user_bad
+        cmp     r0, #0x02000000
+2:      ldrlsbt r1, [r0], #1
+3:      ldrlsbt r2, [r0]
+        ldrgeb  r1, [r0], #1
+        ldrgeb  r2, [r0]
+        orr     r1, r1, r2, lsl #8
+        mov     r0, #0
+        ldmfd   sp!, {pc}^
+
+        .global __get_user_4
+__get_user_4:
+        bic     r1, sp, #0x1f00
+        bic     r1, r1, #0x00ff
+        str     lr, [sp, #-4]!
+        ldr     r1, [r1, #TI_ADDR_LIMIT]
+        sub     r1, r1, #4
+        cmp     r0, r1
+        bge     __get_user_bad
+        cmp     r0, #0x02000000
+4:      ldrlst  r1, [r0]
+        ldrge   r1, [r0]
+        mov     r0, #0
+        ldmfd   sp!, {pc}^
+
+        .global __get_user_8
+__get_user_8:
+        bic     r2, sp, #0x1f00
+        bic     r2, r2, #0x00ff
+        str     lr, [sp, #-4]!
+        ldr     r2, [r2, #TI_ADDR_LIMIT]
+        sub     r2, r2, #8
+        cmp     r0, r2
+        bge     __get_user_bad_8
+        cmp     r0, #0x02000000
+5:      ldrlst  r1, [r0], #4
+6:      ldrlst  r2, [r0]
+        ldrge   r1, [r0], #4
+        ldrge   r2, [r0]
+        mov     r0, #0
+        ldmfd   sp!, {pc}^
+
+__get_user_bad_8:
+        mov     r2, #0
+__get_user_bad:
+        mov     r1, #0
+        mov     r0, #-EFAULT
+        ldmfd   sp!, {pc}^
+
+.section __ex_table, "a"
+	.long	1b, __get_user_bad
+	.long	2b, __get_user_bad
+	.long	3b, __get_user_bad
+	.long	4b, __get_user_bad
+	.long	5b, __get_user_bad_8
+	.long	6b, __get_user_bad_8
+.previous
diff --git a/arch/arm26/lib/io-acorn.S b/arch/arm26/lib/io-acorn.S
new file mode 100644
index 00000000000..f6c3e30b1b4
--- /dev/null
+++ b/arch/arm26/lib/io-acorn.S
@@ -0,0 +1,71 @@
+/*
+ *  linux/arch/arm26/lib/io-acorn.S
+ *
+ *  Copyright (C) 1995, 1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/config.h> /* for CONFIG_CPU_nn */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/hardware.h>
+
+		.text
+		.align
+
+		.equ	diff_pcio_base, PCIO_BASE - IO_BASE
+
+		.macro	outw2	rd
+		mov	r8, \rd, lsl #16
+		orr	r8, r8, r8, lsr #16
+		str	r8, [r3, r0, lsl #2]
+		mov	r8, \rd, lsr #16
+		orr	r8, r8, r8, lsl #16
+		str	r8, [r3, r0, lsl #2]
+		.endm
+
+		.macro	inw2	rd, mask, temp
+		ldr	\rd, [r0]
+		and	\rd, \rd, \mask
+		ldr	\temp, [r0]
+		orr	\rd, \rd, \temp, lsl #16
+		.endm
+
+		.macro	addr	rd
+		tst	\rd, #0x80000000
+		mov	\rd, \rd, lsl #2
+		add	\rd, \rd, #IO_BASE
+		addeq	\rd, \rd, #diff_pcio_base
+		.endm
+
+.iosl_warning:
+		.ascii	"<4>insl/outsl not implemented, called from %08lX\0"
+		.align
+
+/*
+ * These make no sense on Acorn machines.
+ * Print a warning message.
+ */
+ENTRY(insl)
+ENTRY(outsl)
+		adr	r0, .iosl_warning
+		mov	r1, lr
+		b	printk
+
+@ Purpose: write a memc register
+@ Proto  : void memc_write(int register, int value);
+@ Returns: nothing
+
+ENTRY(memc_write)
+		cmp	r0, #7
+		RETINSTR(movgt,pc,lr)
+		mov	r0, r0, lsl #17
+		mov	r1, r1, lsl #15
+		mov	r1, r1, lsr #17
+		orr	r0, r0, r1, lsl #2
+		add	r0, r0, #0x03600000
+		strb	r0, [r0]
+		RETINSTR(mov,pc,lr)
+
diff --git a/arch/arm26/lib/io-readsb.S b/arch/arm26/lib/io-readsb.S
new file mode 100644
index 00000000000..4c4d99c0585
--- /dev/null
+++ b/arch/arm26/lib/io-readsb.S
@@ -0,0 +1,116 @@
+/*
+ *  linux/arch/arm26/lib/io-readsb.S
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/hardware.h>
+
+.insb_align:	rsb	ip, ip, #4
+		cmp	ip, r2
+		movgt	ip, r2
+		cmp	ip, #2
+		ldrb	r3, [r0]
+		strb	r3, [r1], #1
+		ldrgeb	r3, [r0]
+		strgeb	r3, [r1], #1
+		ldrgtb	r3, [r0]
+		strgtb	r3, [r1], #1
+		subs	r2, r2, ip
+		bne	.insb_aligned
+
+ENTRY(__raw_readsb)
+		teq	r2, #0		@ do we have to check for the zero len?
+		moveq	pc, lr
+		ands	ip, r1, #3
+		bne	.insb_align
+
+.insb_aligned:	stmfd	sp!, {r4 - r6, lr}
+
+		subs	r2, r2, #16
+		bmi	.insb_no_16
+
+.insb_16_lp:	ldrb	r3, [r0]
+		ldrb	r4, [r0]
+		orr	r3, r3, r4, lsl #8
+		ldrb	r4, [r0]
+		orr	r3, r3, r4, lsl #16
+		ldrb	r4, [r0]
+		orr	r3, r3, r4, lsl #24
+		ldrb	r4, [r0]
+		ldrb	r5, [r0]
+		orr	r4, r4, r5, lsl #8
+		ldrb	r5, [r0]
+		orr	r4, r4, r5, lsl #16
+		ldrb	r5, [r0]
+		orr	r4, r4, r5, lsl #24
+		ldrb	r5, [r0]
+		ldrb	r6, [r0]
+		orr	r5, r5, r6, lsl #8
+		ldrb	r6, [r0]
+		orr	r5, r5, r6, lsl #16
+		ldrb	r6, [r0]
+		orr	r5, r5, r6, lsl #24
+		ldrb	r6, [r0]
+		ldrb	ip, [r0]
+		orr	r6, r6, ip, lsl #8
+		ldrb	ip, [r0]
+		orr	r6, r6, ip, lsl #16
+		ldrb	ip, [r0]
+		orr	r6, r6, ip, lsl #24
+		stmia	r1!, {r3 - r6}
+
+		subs	r2, r2, #16
+		bpl	.insb_16_lp
+
+		tst	r2, #15
+		LOADREGS(eqfd, sp!, {r4 - r6, pc})
+
+.insb_no_16:	tst	r2, #8
+		beq	.insb_no_8
+
+		ldrb	r3, [r0]
+		ldrb	r4, [r0]
+		orr	r3, r3, r4, lsl #8
+		ldrb	r4, [r0]
+		orr	r3, r3, r4, lsl #16
+		ldrb	r4, [r0]
+		orr	r3, r3, r4, lsl #24
+		ldrb	r4, [r0]
+		ldrb	r5, [r0]
+		orr	r4, r4, r5, lsl #8
+		ldrb	r5, [r0]
+		orr	r4, r4, r5, lsl #16
+		ldrb	r5, [r0]
+		orr	r4, r4, r5, lsl #24
+		stmia	r1!, {r3, r4}
+
+.insb_no_8:	tst	r2, #4
+		beq	.insb_no_4
+
+		ldrb	r3, [r0]
+		ldrb	r4, [r0]
+		orr	r3, r3, r4, lsl #8
+		ldrb	r4, [r0]
+		orr	r3, r3, r4, lsl #16
+		ldrb	r4, [r0]
+		orr	r3, r3, r4, lsl #24
+		str	r3, [r1], #4
+
+.insb_no_4:	ands	r2, r2, #3
+		LOADREGS(eqfd, sp!, {r4 - r6, pc})
+
+		cmp	r2, #2
+		ldrb	r3, [r0]
+		strb	r3, [r1], #1
+		ldrgeb	r3, [r0]
+		strgeb	r3, [r1], #1
+		ldrgtb	r3, [r0]
+		strgtb	r3, [r1]
+
+		LOADREGS(fd, sp!, {r4 - r6, pc})
diff --git a/arch/arm26/lib/io-readsl.S b/arch/arm26/lib/io-readsl.S
new file mode 100644
index 00000000000..7be208bd23c
--- /dev/null
+++ b/arch/arm26/lib/io-readsl.S
@@ -0,0 +1,78 @@
+/*
+ *  linux/arch/arm26/lib/io-readsl.S
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/hardware.h>
+
+/*
+ * Note that some reads can be aligned on half-word boundaries.
+ */
+ENTRY(__raw_readsl)
+		teq	r2, #0		@ do we have to check for the zero len?
+		moveq	pc, lr
+		ands	ip, r1, #3
+		bne	2f
+
+1:		ldr	r3, [r0]
+		str	r3, [r1], #4
+		subs	r2, r2, #1
+		bne	1b
+		mov	pc, lr
+
+2:		cmp	ip, #2
+		ldr	ip, [r0]
+		blt	4f
+		bgt	6f
+
+		strb	ip, [r1], #1
+		mov	ip, ip, lsr #8
+		strb	ip, [r1], #1
+		mov	ip, ip, lsr #8
+3:		subs	r2, r2, #1
+		ldrne	r3, [r0]
+		orrne	ip, ip, r3, lsl #16
+		strne	ip, [r1], #4
+		movne	ip, r3, lsr #16
+		bne	3b
+		strb	ip, [r1], #1
+		mov	ip, ip, lsr #8
+		strb	ip, [r1], #1
+		mov	pc, lr
+
+4:		strb	ip, [r1], #1
+		mov	ip, ip, lsr #8
+		strb	ip, [r1], #1
+		mov	ip, ip, lsr #8
+		strb	ip, [r1], #1
+		mov	ip, ip, lsr #8
+5:		subs	r2, r2, #1
+		ldrne	r3, [r0]
+		orrne	ip, ip, r3, lsl #8
+		strne	ip, [r1], #4
+		movne	ip, r3, lsr #24
+		bne	5b
+		strb	ip, [r1], #1
+		mov	pc, lr
+
+6:		strb	ip, [r1], #1
+		mov	ip, ip, lsr #8
+7:		subs	r2, r2, #1
+		ldrne	r3, [r0]
+		orrne	ip, ip, r3, lsl #24
+		strne	ip, [r1], #4
+		movne	ip, r3, lsr #8
+		bne	7b
+		strb	ip, [r1], #1
+		mov	ip, ip, lsr #8
+		strb	ip, [r1], #1
+		mov	ip, ip, lsr #8
+		strb	ip, [r1], #1
+		mov	pc, lr
+
diff --git a/arch/arm26/lib/io-readsw.S b/arch/arm26/lib/io-readsw.S
new file mode 100644
index 00000000000..c65c1f28fcf
--- /dev/null
+++ b/arch/arm26/lib/io-readsw.S
@@ -0,0 +1,107 @@
+/*
+ *  linux/arch/arm26/lib/io-readsw.S
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/hardware.h>
+
+.insw_bad_alignment:
+		adr	r0, .insw_bad_align_msg
+		mov	r2, lr
+		b	panic
+.insw_bad_align_msg:
+		.asciz	"insw: bad buffer alignment (0x%p, lr=0x%08lX)\n"
+		.align
+
+.insw_align:	tst	r1, #1
+		bne	.insw_bad_alignment
+
+		ldr	r3, [r0]
+		strb	r3, [r1], #1
+		mov	r3, r3, lsr #8
+		strb	r3, [r1], #1
+
+		subs	r2, r2, #1
+		RETINSTR(moveq, pc, lr)
+
+ENTRY(__raw_readsw)
+		teq	r2, #0		@ do we have to check for the zero len?
+		moveq	pc, lr
+		tst	r1, #3
+		bne	.insw_align
+
+.insw_aligned:	mov	ip, #0xff
+		orr	ip, ip, ip, lsl #8
+		stmfd	sp!, {r4, r5, r6, lr}
+
+		subs	r2, r2, #8
+		bmi	.no_insw_8
+
+.insw_8_lp:	ldr	r3, [r0]
+		and	r3, r3, ip
+		ldr	r4, [r0]
+		orr	r3, r3, r4, lsl #16
+
+		ldr	r4, [r0]
+		and	r4, r4, ip
+		ldr	r5, [r0]
+		orr	r4, r4, r5, lsl #16
+
+		ldr	r5, [r0]
+		and	r5, r5, ip
+		ldr	r6, [r0]
+		orr	r5, r5, r6, lsl #16
+
+		ldr	r6, [r0]
+		and	r6, r6, ip
+		ldr	lr, [r0]
+		orr	r6, r6, lr, lsl #16
+
+		stmia	r1!, {r3 - r6}
+
+		subs	r2, r2, #8
+		bpl	.insw_8_lp
+
+		tst	r2, #7
+		LOADREGS(eqfd, sp!, {r4, r5, r6, pc})
+
+.no_insw_8:	tst	r2, #4
+		beq	.no_insw_4
+
+		ldr	r3, [r0]
+		and	r3, r3, ip
+		ldr	r4, [r0]
+		orr	r3, r3, r4, lsl #16
+
+		ldr	r4, [r0]
+		and	r4, r4, ip
+		ldr	r5, [r0]
+		orr	r4, r4, r5, lsl #16
+
+		stmia	r1!, {r3, r4}
+
+.no_insw_4:	tst	r2, #2
+		beq	.no_insw_2
+
+		ldr	r3, [r0]
+		and	r3, r3, ip
+		ldr	r4, [r0]
+		orr	r3, r3, r4, lsl #16
+
+		str	r3, [r1], #4
+
+.no_insw_2:	tst	r2, #1
+		ldrne	r3, [r0]
+		strneb	r3, [r1], #1
+		movne	r3, r3, lsr #8
+		strneb	r3, [r1]
+
+		LOADREGS(fd, sp!, {r4, r5, r6, pc})
+
+
diff --git a/arch/arm26/lib/io-writesb.S b/arch/arm26/lib/io-writesb.S
new file mode 100644
index 00000000000..16251b4d510
--- /dev/null
+++ b/arch/arm26/lib/io-writesb.S
@@ -0,0 +1,122 @@
+/*
+ *  linux/arch/arm26/lib/io-writesb.S
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/hardware.h>
+
+.outsb_align:	rsb	ip, ip, #4
+		cmp	ip, r2
+		movgt	ip, r2
+		cmp	ip, #2
+		ldrb	r3, [r1], #1
+		strb	r3, [r0]
+		ldrgeb	r3, [r1], #1
+		strgeb	r3, [r0]
+		ldrgtb	r3, [r1], #1
+		strgtb	r3, [r0]
+		subs	r2, r2, ip
+		bne	.outsb_aligned
+
+ENTRY(__raw_writesb)
+		teq	r2, #0		@ do we have to check for the zero len?
+		moveq	pc, lr
+		ands	ip, r1, #3
+		bne	.outsb_align
+
+.outsb_aligned:	stmfd	sp!, {r4 - r6, lr}
+
+		subs	r2, r2, #16
+		bmi	.outsb_no_16
+
+.outsb_16_lp:	ldmia	r1!, {r3 - r6}
+
+		strb	r3, [r0]
+		mov	r3, r3, lsr #8
+		strb	r3, [r0]
+		mov	r3, r3, lsr #8
+		strb	r3, [r0]
+		mov	r3, r3, lsr #8
+		strb	r3, [r0]
+
+		strb	r4, [r0]
+		mov	r4, r4, lsr #8
+		strb	r4, [r0]
+		mov	r4, r4, lsr #8
+		strb	r4, [r0]
+		mov	r4, r4, lsr #8
+		strb	r4, [r0]
+
+		strb	r5, [r0]
+		mov	r5, r5, lsr #8
+		strb	r5, [r0]
+		mov	r5, r5, lsr #8
+		strb	r5, [r0]
+		mov	r5, r5, lsr #8
+		strb	r5, [r0]
+
+		strb	r6, [r0]
+		mov	r6, r6, lsr #8
+		strb	r6, [r0]
+		mov	r6, r6, lsr #8
+		strb	r6, [r0]
+		mov	r6, r6, lsr #8
+		strb	r6, [r0]
+
+		subs	r2, r2, #16
+		bpl	.outsb_16_lp
+
+		tst	r2, #15
+		LOADREGS(eqfd, sp!, {r4 - r6, pc})
+
+.outsb_no_16:	tst	r2, #8
+		beq	.outsb_no_8
+
+		ldmia	r1!, {r3, r4}
+
+		strb	r3, [r0]
+		mov	r3, r3, lsr #8
+		strb	r3, [r0]
+		mov	r3, r3, lsr #8
+		strb	r3, [r0]
+		mov	r3, r3, lsr #8
+		strb	r3, [r0]
+
+		strb	r4, [r0]
+		mov	r4, r4, lsr #8
+		strb	r4, [r0]
+		mov	r4, r4, lsr #8
+		strb	r4, [r0]
+		mov	r4, r4, lsr #8
+		strb	r4, [r0]
+
+.outsb_no_8:	tst	r2, #4
+		beq	.outsb_no_4
+
+		ldr	r3, [r1], #4
+		strb	r3, [r0]
+		mov	r3, r3, lsr #8
+		strb	r3, [r0]
+		mov	r3, r3, lsr #8
+		strb	r3, [r0]
+		mov	r3, r3, lsr #8
+		strb	r3, [r0]
+
+.outsb_no_4:	ands	r2, r2, #3
+		LOADREGS(eqfd, sp!, {r4 - r6, pc})
+
+		cmp	r2, #2
+		ldrb	r3, [r1], #1
+		strb	r3, [r0]
+		ldrgeb	r3, [r1], #1
+		strgeb	r3, [r0]
+		ldrgtb	r3, [r1]
+		strgtb	r3, [r0]
+
+		LOADREGS(fd, sp!, {r4 - r6, pc})
diff --git a/arch/arm26/lib/io-writesl.S b/arch/arm26/lib/io-writesl.S
new file mode 100644
index 00000000000..4d6049b16e7
--- /dev/null
+++ b/arch/arm26/lib/io-writesl.S
@@ -0,0 +1,56 @@
+/*
+ *  linux/arch/arm26/lib/io-writesl.S
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/hardware.h>
+
+ENTRY(__raw_writesl)
+		teq	r2, #0		@ do we have to check for the zero len?
+		moveq	pc, lr
+		ands	ip, r1, #3
+		bne	2f
+
+1:		ldr	r3, [r1], #4
+		str	r3, [r0]
+		subs	r2, r2, #1
+		bne	1b
+		mov	pc, lr
+
+2:		bic	r1, r1, #3
+		cmp	ip, #2
+		ldr	r3, [r1], #4
+		bgt	4f
+		blt	5f
+
+3:		mov	ip, r3, lsr #16
+		ldr	r3, [r1], #4
+		orr	ip, ip, r3, lsl #16
+		str	ip, [r0]
+		subs	r2, r2, #1
+		bne	3b
+		mov	pc, lr
+
+4:		mov	ip, r3, lsr #24
+		ldr	r3, [r1], #4
+		orr	ip, ip, r3, lsl #8
+		str	ip, [r0]
+		subs	r2, r2, #1
+		bne	4b
+		mov	pc, lr
+
+5:		mov	ip, r3, lsr #8
+		ldr	r3, [r1], #4
+		orr	ip, ip, r3, lsl #24
+		str	ip, [r0]
+		subs	r2, r2, #1
+		bne	5b
+		mov	pc, lr
+
+
diff --git a/arch/arm26/lib/io-writesw.S b/arch/arm26/lib/io-writesw.S
new file mode 100644
index 00000000000..a24f891f6b1
--- /dev/null
+++ b/arch/arm26/lib/io-writesw.S
@@ -0,0 +1,127 @@
+/*
+ *  linux/arch/arm26/lib/io-writesw.S
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/hardware.h>
+
+.outsw_bad_alignment:
+		adr	r0, .outsw_bad_align_msg
+		mov	r2, lr
+		b	panic
+.outsw_bad_align_msg:
+		.asciz	"outsw: bad buffer alignment (0x%p, lr=0x%08lX)\n"
+		.align
+
+.outsw_align:	tst	r1, #1
+		bne	.outsw_bad_alignment
+
+		add	r1, r1, #2
+
+		ldr	r3, [r1, #-4]
+		mov	r3, r3, lsr #16
+		orr	r3, r3, r3, lsl #16
+		str	r3, [r0]
+		subs	r2, r2, #1
+		RETINSTR(moveq, pc, lr)
+
+ENTRY(__raw_writesw)
+		teq	r2, #0		@ do we have to check for the zero len?
+		moveq	pc, lr
+		tst	r1, #3
+		bne	.outsw_align
+
+.outsw_aligned:	stmfd	sp!, {r4, r5, r6, lr}
+
+		subs	r2, r2, #8
+		bmi	.no_outsw_8
+
+.outsw_8_lp:	ldmia	r1!, {r3, r4, r5, r6}
+
+		mov	ip, r3, lsl #16
+		orr	ip, ip, ip, lsr #16
+		str	ip, [r0]
+
+		mov	ip, r3, lsr #16
+		orr	ip, ip, ip, lsl #16
+		str	ip, [r0]
+
+		mov	ip, r4, lsl #16
+		orr	ip, ip, ip, lsr #16
+		str	ip, [r0]
+
+		mov	ip, r4, lsr #16
+		orr	ip, ip, ip, lsl #16
+		str	ip, [r0]
+
+		mov	ip, r5, lsl #16
+		orr	ip, ip, ip, lsr #16
+		str	ip, [r0]
+
+		mov	ip, r5, lsr #16
+		orr	ip, ip, ip, lsl #16
+		str	ip, [r0]
+
+		mov	ip, r6, lsl #16
+		orr	ip, ip, ip, lsr #16
+		str	ip, [r0]
+
+		mov	ip, r6, lsr #16
+		orr	ip, ip, ip, lsl #16
+		str	ip, [r0]
+
+		subs	r2, r2, #8
+		bpl	.outsw_8_lp
+
+		tst	r2, #7
+		LOADREGS(eqfd, sp!, {r4, r5, r6, pc})
+
+.no_outsw_8:	tst	r2, #4
+		beq	.no_outsw_4
+
+		ldmia	r1!, {r3, r4}
+
+		mov	ip, r3, lsl #16
+		orr	ip, ip, ip, lsr #16
+		str	ip, [r0]
+
+		mov	ip, r3, lsr #16
+		orr	ip, ip, ip, lsl #16
+		str	ip, [r0]
+
+		mov	ip, r4, lsl #16
+		orr	ip, ip, ip, lsr #16
+		str	ip, [r0]
+
+		mov	ip, r4, lsr #16
+		orr	ip, ip, ip, lsl #16
+		str	ip, [r0]
+
+.no_outsw_4:	tst	r2, #2
+		beq	.no_outsw_2
+
+		ldr	r3, [r1], #4
+
+		mov	ip, r3, lsl #16
+		orr	ip, ip, ip, lsr #16
+		str	ip, [r0]
+
+		mov	ip, r3, lsr #16
+		orr	ip, ip, ip, lsl #16
+		str	ip, [r0]
+
+.no_outsw_2:	tst	r2, #1
+
+		ldrne	r3, [r1]
+
+		movne	ip, r3, lsl #16
+		orrne	ip, ip, ip, lsr #16
+		strne	ip, [r0]
+
+		LOADREGS(fd, sp!, {r4, r5, r6, pc})
diff --git a/arch/arm26/lib/kbd.c b/arch/arm26/lib/kbd.c
new file mode 100644
index 00000000000..22d2c93aaf1
--- /dev/null
+++ b/arch/arm26/lib/kbd.c
@@ -0,0 +1,279 @@
+#include <linux/config.h>
+#include <linux/kd.h>
+//#include <linux/kbd_ll.h>
+#include <linux/kbd_kern.h>
+
+/*
+ * Translation of escaped scancodes to keycodes.
+ * This is now user-settable.
+ * The keycodes 1-88,96-111,119 are fairly standard, and
+ * should probably not be changed - changing might confuse X.
+ * X also interprets scancode 0x5d (KEY_Begin).
+ *
+ * For 1-88 keycode equals scancode.
+ */
+
+#define E0_KPENTER 96
+#define E0_RCTRL   97
+#define E0_KPSLASH 98
+#define E0_PRSCR   99
+#define E0_RALT    100
+#define E0_BREAK   101		/* (control-pause) */
+#define E0_HOME    102
+#define E0_UP      103
+#define E0_PGUP    104
+#define E0_LEFT    105
+#define E0_RIGHT   106
+#define E0_END     107
+#define E0_DOWN    108
+#define E0_PGDN    109
+#define E0_INS     110
+#define E0_DEL     111
+
+/* for USB 106 keyboard */
+#define E0_YEN         124
+#define E0_BACKSLASH   89
+
+
+#define E1_PAUSE   119
+
+/*
+ * The keycodes below are randomly located in 89-95,112-118,120-127.
+ * They could be thrown away (and all occurrences below replaced by 0),
+ * but that would force many users to use the `setkeycodes' utility, where
+ * they needed not before. It does not matter that there are duplicates, as
+ * long as no duplication occurs for any single keyboard.
+ */
+#define SC_LIM 89
+
+#define FOCUS_PF1 85		/* actual code! */
+#define FOCUS_PF2 89
+#define FOCUS_PF3 90
+#define FOCUS_PF4 91
+#define FOCUS_PF5 92
+#define FOCUS_PF6 93
+#define FOCUS_PF7 94
+#define FOCUS_PF8 95
+#define FOCUS_PF9 120
+#define FOCUS_PF10 121
+#define FOCUS_PF11 122
+#define FOCUS_PF12 123
+
+#define JAP_86     124
+/* tfj@olivia.ping.dk:
+ * The four keys are located over the numeric keypad, and are
+ * labelled A1-A4. It's an rc930 keyboard, from
+ * Regnecentralen/RC International, Now ICL.
+ * Scancodes: 59, 5a, 5b, 5c.
+ */
+#define RGN1 124
+#define RGN2 125
+#define RGN3 126
+#define RGN4 127
+
+static unsigned char high_keys[128 - SC_LIM] = {
+	RGN1, RGN2, RGN3, RGN4, 0, 0, 0,	/* 0x59-0x5f */
+	0, 0, 0, 0, 0, 0, 0, 0,	/* 0x60-0x67 */
+	0, 0, 0, 0, 0, FOCUS_PF11, 0, FOCUS_PF12,	/* 0x68-0x6f */
+	0, 0, 0, FOCUS_PF2, FOCUS_PF9, 0, 0, FOCUS_PF3,	/* 0x70-0x77 */
+	FOCUS_PF4, FOCUS_PF5, FOCUS_PF6, FOCUS_PF7,	/* 0x78-0x7b */
+	FOCUS_PF8, JAP_86, FOCUS_PF10, 0	/* 0x7c-0x7f */
+};
+
+/* BTC */
+#define E0_MACRO   112
+/* LK450 */
+#define E0_F13     113
+#define E0_F14     114
+#define E0_HELP    115
+#define E0_DO      116
+#define E0_F17     117
+#define E0_KPMINPLUS 118
+/*
+ * My OmniKey generates e0 4c for  the "OMNI" key and the
+ * right alt key does nada. [kkoller@nyx10.cs.du.edu]
+ */
+#define E0_OK  124
+/*
+ * New microsoft keyboard is rumoured to have
+ * e0 5b (left window button), e0 5c (right window button),
+ * e0 5d (menu button). [or: LBANNER, RBANNER, RMENU]
+ * [or: Windows_L, Windows_R, TaskMan]
+ */
+#define E0_MSLW        125
+#define E0_MSRW        126
+#define E0_MSTM        127
+
+static unsigned char e0_keys[128] = {
+	0, 0, 0, 0, 0, 0, 0, 0,	/* 0x00-0x07 */
+	0, 0, 0, 0, 0, 0, 0, 0,	/* 0x08-0x0f */
+	0, 0, 0, 0, 0, 0, 0, 0,	/* 0x10-0x17 */
+	0, 0, 0, 0, E0_KPENTER, E0_RCTRL, 0, 0,	/* 0x18-0x1f */
+	0, 0, 0, 0, 0, 0, 0, 0,	/* 0x20-0x27 */
+	0, 0, 0, 0, 0, 0, 0, 0,	/* 0x28-0x2f */
+	0, 0, 0, 0, 0, E0_KPSLASH, 0, E0_PRSCR,	/* 0x30-0x37 */
+	E0_RALT, 0, 0, 0, 0, E0_F13, E0_F14, E0_HELP,	/* 0x38-0x3f */
+	E0_DO, E0_F17, 0, 0, 0, 0, E0_BREAK, E0_HOME,	/* 0x40-0x47 */
+	E0_UP, E0_PGUP, 0, E0_LEFT, E0_OK, E0_RIGHT, E0_KPMINPLUS, E0_END,	/* 0x48-0x4f */
+	E0_DOWN, E0_PGDN, E0_INS, E0_DEL, 0, 0, 0, 0,	/* 0x50-0x57 */
+	0, 0, 0, E0_MSLW, E0_MSRW, E0_MSTM, 0, 0,	/* 0x58-0x5f */
+	0, 0, 0, 0, 0, 0, 0, 0,	/* 0x60-0x67 */
+	0, 0, 0, 0, 0, 0, 0, E0_MACRO,	/* 0x68-0x6f */
+	//0, 0, 0, 0, 0, 0, 0, 0,                          /* 0x70-0x77 */
+	0, 0, 0, 0, 0, E0_BACKSLASH, 0, 0,	/* 0x70-0x77 */
+	0, 0, 0, E0_YEN, 0, 0, 0, 0	/* 0x78-0x7f */
+};
+
+static int gen_setkeycode(unsigned int scancode, unsigned int keycode)
+{
+	if (scancode < SC_LIM || scancode > 255 || keycode > 127)
+		return -EINVAL;
+	if (scancode < 128)
+		high_keys[scancode - SC_LIM] = keycode;
+	else
+		e0_keys[scancode - 128] = keycode;
+	return 0;
+}
+
+static int gen_getkeycode(unsigned int scancode)
+{
+	return
+	    (scancode < SC_LIM || scancode > 255) ? -EINVAL :
+	    (scancode <
+	     128) ? high_keys[scancode - SC_LIM] : e0_keys[scancode - 128];
+}
+
+static int
+gen_translate(unsigned char scancode, unsigned char *keycode, char raw_mode)
+{
+	static int prev_scancode;
+
+	/* special prefix scancodes.. */
+	if (scancode == 0xe0 || scancode == 0xe1) {
+		prev_scancode = scancode;
+		return 0;
+	}
+
+	/* 0xFF is sent by a few keyboards, ignore it. 0x00 is error */
+	if (scancode == 0x00 || scancode == 0xff) {
+		prev_scancode = 0;
+		return 0;
+	}
+
+	scancode &= 0x7f;
+
+	if (prev_scancode) {
+		/*
+		 * usually it will be 0xe0, but a Pause key generates
+		 * e1 1d 45 e1 9d c5 when pressed, and nothing when released
+		 */
+		if (prev_scancode != 0xe0) {
+			if (prev_scancode == 0xe1 && scancode == 0x1d) {
+				prev_scancode = 0x100;
+				return 0;
+			}
+				else if (prev_scancode == 0x100
+					 && scancode == 0x45) {
+				*keycode = E1_PAUSE;
+				prev_scancode = 0;
+			} else {
+#ifdef KBD_REPORT_UNKN
+				if (!raw_mode)
+					printk(KERN_INFO
+					       "keyboard: unknown e1 escape sequence\n");
+#endif
+				prev_scancode = 0;
+				return 0;
+			}
+		} else {
+			prev_scancode = 0;
+			/*
+			 *  The keyboard maintains its own internal caps lock and
+			 *  num lock statuses. In caps lock mode E0 AA precedes make
+			 *  code and E0 2A follows break code. In num lock mode,
+			 *  E0 2A precedes make code and E0 AA follows break code.
+			 *  We do our own book-keeping, so we will just ignore these.
+			 */
+			/*
+			 *  For my keyboard there is no caps lock mode, but there are
+			 *  both Shift-L and Shift-R modes. The former mode generates
+			 *  E0 2A / E0 AA pairs, the latter E0 B6 / E0 36 pairs.
+			 *  So, we should also ignore the latter. - aeb@cwi.nl
+			 */
+			if (scancode == 0x2a || scancode == 0x36)
+				return 0;
+
+			if (e0_keys[scancode])
+				*keycode = e0_keys[scancode];
+			else {
+#ifdef KBD_REPORT_UNKN
+				if (!raw_mode)
+					printk(KERN_INFO
+					       "keyboard: unknown scancode e0 %02x\n",
+					       scancode);
+#endif
+				return 0;
+			}
+		}
+	} else if (scancode >= SC_LIM) {
+		/* This happens with the FOCUS 9000 keyboard
+		   Its keys PF1..PF12 are reported to generate
+		   55 73 77 78 79 7a 7b 7c 74 7e 6d 6f
+		   Moreover, unless repeated, they do not generate
+		   key-down events, so we have to zero up_flag below */
+		/* Also, Japanese 86/106 keyboards are reported to
+		   generate 0x73 and 0x7d for \ - and \ | respectively. */
+		/* Also, some Brazilian keyboard is reported to produce
+		   0x73 and 0x7e for \ ? and KP-dot, respectively. */
+
+		*keycode = high_keys[scancode - SC_LIM];
+
+		if (!*keycode) {
+			if (!raw_mode) {
+#ifdef KBD_REPORT_UNKN
+				printk(KERN_INFO
+				       "keyboard: unrecognized scancode (%02x)"
+				       " - ignored\n", scancode);
+#endif
+			}
+			return 0;
+		}
+	} else
+		*keycode = scancode;
+	return 1;
+}
+
+static char gen_unexpected_up(unsigned char keycode)
+{
+	/* unexpected, but this can happen: maybe this was a key release for a
+	   FOCUS 9000 PF key; if we want to see it, we have to clear up_flag */
+	if (keycode >= SC_LIM || keycode == 85)
+		return 0;
+	else
+		return 0200;
+}
+
+/*
+ * These are the default mappings
+ */
+int  (*k_setkeycode)(unsigned int, unsigned int) = gen_setkeycode;
+int  (*k_getkeycode)(unsigned int) = gen_getkeycode;
+int  (*k_translate)(unsigned char, unsigned char *, char) = gen_translate;
+char (*k_unexpected_up)(unsigned char) = gen_unexpected_up;
+void (*k_leds)(unsigned char);
+
+/* Simple translation table for the SysRq keys */
+
+#ifdef CONFIG_MAGIC_SYSRQ
+static unsigned char gen_sysrq_xlate[128] =
+	"\000\0331234567890-=\177\t"	/* 0x00 - 0x0f */
+	"qwertyuiop[]\r\000as"	/* 0x10 - 0x1f */
+	"dfghjkl;'`\000\\zxcv"	/* 0x20 - 0x2f */
+	"bnm,./\000*\000 \000\201\202\203\204\205"	/* 0x30 - 0x3f */
+	"\206\207\210\211\212\000\000789-456+1"	/* 0x40 - 0x4f */
+	"230\177\000\000\213\214\000\000\000\000\000\000\000\000\000\000"	/* 0x50 - 0x5f */
+	"\r\000/";			/* 0x60 - 0x6f */
+
+unsigned char *k_sysrq_xlate = gen_sysrq_xlate;
+int k_sysrq_key = 0x54;
+#endif
diff --git a/arch/arm26/lib/lib1funcs.S b/arch/arm26/lib/lib1funcs.S
new file mode 100644
index 00000000000..b8f9518db87
--- /dev/null
+++ b/arch/arm26/lib/lib1funcs.S
@@ -0,0 +1,314 @@
+@ libgcc1 routines for ARM cpu.
+@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
+
+/* Copyright (C) 1995, 1996, 1998 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file with other programs, and to distribute
+those programs without any restriction coming from the use of this
+file.  (The General Public License restrictions do apply in other
+respects; for example, they cover modification of the file, and
+distribution when not linked into another program.)
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+/* As a special exception, if you link this library with other files,
+   some of which are compiled with GCC, to produce an executable,
+   this library does not by itself cause the resulting executable
+   to be covered by the GNU General Public License.
+   This exception does not however invalidate any other reasons why
+   the executable file might be covered by the GNU General Public License.
+ */
+/* This code is derived from gcc 2.95.3 */
+/* I Molton     29/07/01 */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/hardware.h>
+#include <linux/config.h>
+
+#define RET	movs
+#define RETc(x)	mov##x##s
+#define RETCOND ^
+
+dividend	.req	r0
+divisor		.req	r1
+result		.req	r2
+overdone        .req    r2
+curbit		.req	r3
+ip		.req	r12
+sp		.req	r13
+lr		.req	r14
+pc		.req	r15
+	
+ENTRY(__udivsi3)
+	cmp	divisor, #0
+	beq	Ldiv0
+	mov	curbit, #1
+	mov	result, #0
+	cmp	dividend, divisor
+	bcc	Lgot_result_udivsi3
+1:
+	@ Unless the divisor is very big, shift it up in multiples of
+	@ four bits, since this is the amount of unwinding in the main
+	@ division loop.  Continue shifting until the divisor is 
+	@ larger than the dividend.
+	cmp	divisor, #0x10000000
+	cmpcc	divisor, dividend
+	movcc	divisor, divisor, lsl #4
+	movcc	curbit, curbit, lsl #4
+	bcc	1b
+
+2:
+	@ For very big divisors, we must shift it a bit at a time, or
+	@ we will be in danger of overflowing.
+	cmp	divisor, #0x80000000
+	cmpcc	divisor, dividend
+	movcc	divisor, divisor, lsl #1
+	movcc	curbit, curbit, lsl #1
+	bcc	2b
+
+3:
+	@ Test for possible subtractions, and note which bits
+	@ are done in the result.  On the final pass, this may subtract
+	@ too much from the dividend, but the result will be ok, since the
+	@ "bit" will have been shifted out at the bottom.
+	cmp	dividend, divisor
+	subcs	dividend, dividend, divisor
+	orrcs	result, result, curbit
+	cmp	dividend, divisor, lsr #1
+	subcs	dividend, dividend, divisor, lsr #1
+	orrcs	result, result, curbit, lsr #1
+	cmp	dividend, divisor, lsr #2
+	subcs	dividend, dividend, divisor, lsr #2
+	orrcs	result, result, curbit, lsr #2
+	cmp	dividend, divisor, lsr #3
+	subcs	dividend, dividend, divisor, lsr #3
+	orrcs	result, result, curbit, lsr #3
+	cmp	dividend, #0			@ Early termination?
+	movnes	curbit, curbit, lsr #4		@ No, any more bits to do?
+	movne	divisor, divisor, lsr #4
+	bne	3b
+Lgot_result_udivsi3:
+	mov	r0, result
+	RET	pc, lr
+
+Ldiv0:
+	str	lr, [sp, #-4]!
+	bl	__div0
+	mov	r0, #0			@ about as wrong as it could be
+	ldmia	sp!, {pc}RETCOND
+
+/* __umodsi3 ----------------------- */
+
+ENTRY(__umodsi3)
+	cmp	divisor, #0
+	beq	Ldiv0
+	mov	curbit, #1
+	cmp	dividend, divisor
+	RETc(cc)	pc, lr
+1:
+	@ Unless the divisor is very big, shift it up in multiples of
+	@ four bits, since this is the amount of unwinding in the main
+	@ division loop.  Continue shifting until the divisor is 
+	@ larger than the dividend.
+	cmp	divisor, #0x10000000
+	cmpcc	divisor, dividend
+	movcc	divisor, divisor, lsl #4
+	movcc	curbit, curbit, lsl #4
+	bcc	1b
+
+2:
+	@ For very big divisors, we must shift it a bit at a time, or
+	@ we will be in danger of overflowing.
+	cmp	divisor, #0x80000000
+	cmpcc	divisor, dividend
+	movcc	divisor, divisor, lsl #1
+	movcc	curbit, curbit, lsl #1
+	bcc	2b
+
+3:
+	@ Test for possible subtractions.  On the final pass, this may 
+	@ subtract too much from the dividend, so keep track of which
+	@ subtractions are done, we can fix them up afterwards...
+	mov	overdone, #0
+	cmp	dividend, divisor
+	subcs	dividend, dividend, divisor
+	cmp	dividend, divisor, lsr #1
+	subcs	dividend, dividend, divisor, lsr #1
+	orrcs	overdone, overdone, curbit, ror #1
+	cmp	dividend, divisor, lsr #2
+	subcs	dividend, dividend, divisor, lsr #2
+	orrcs	overdone, overdone, curbit, ror #2
+	cmp	dividend, divisor, lsr #3
+	subcs	dividend, dividend, divisor, lsr #3
+	orrcs	overdone, overdone, curbit, ror #3
+	mov	ip, curbit
+	cmp	dividend, #0			@ Early termination?
+	movnes	curbit, curbit, lsr #4		@ No, any more bits to do?
+	movne	divisor, divisor, lsr #4
+	bne	3b
+
+	@ Any subtractions that we should not have done will be recorded in
+	@ the top three bits of "overdone".  Exactly which were not needed
+	@ are governed by the position of the bit, stored in ip.
+	@ If we terminated early, because dividend became zero,
+	@ then none of the below will match, since the bit in ip will not be
+	@ in the bottom nibble.
+	ands	overdone, overdone, #0xe0000000
+	RETc(eq)	pc, lr				@ No fixups needed
+	tst	overdone, ip, ror #3
+	addne	dividend, dividend, divisor, lsr #3
+	tst	overdone, ip, ror #2
+	addne	dividend, dividend, divisor, lsr #2
+	tst	overdone, ip, ror #1
+	addne	dividend, dividend, divisor, lsr #1
+	RET	pc, lr
+
+ENTRY(__divsi3)
+	eor	ip, dividend, divisor		@ Save the sign of the result.
+	mov	curbit, #1
+	mov	result, #0
+	cmp	divisor, #0
+	rsbmi	divisor, divisor, #0		@ Loops below use unsigned.
+	beq	Ldiv0
+	cmp	dividend, #0
+	rsbmi	dividend, dividend, #0
+	cmp	dividend, divisor
+	bcc	Lgot_result_divsi3
+
+1:
+	@ Unless the divisor is very big, shift it up in multiples of
+	@ four bits, since this is the amount of unwinding in the main
+	@ division loop.  Continue shifting until the divisor is 
+	@ larger than the dividend.
+	cmp	divisor, #0x10000000
+	cmpcc	divisor, dividend
+	movcc	divisor, divisor, lsl #4
+	movcc	curbit, curbit, lsl #4
+	bcc	1b
+
+2:
+	@ For very big divisors, we must shift it a bit at a time, or
+	@ we will be in danger of overflowing.
+	cmp	divisor, #0x80000000
+	cmpcc	divisor, dividend
+	movcc	divisor, divisor, lsl #1
+	movcc	curbit, curbit, lsl #1
+	bcc	2b
+
+3:
+	@ Test for possible subtractions, and note which bits
+	@ are done in the result.  On the final pass, this may subtract
+	@ too much from the dividend, but the result will be ok, since the
+	@ "bit" will have been shifted out at the bottom.
+	cmp	dividend, divisor
+	subcs	dividend, dividend, divisor
+	orrcs	result, result, curbit
+	cmp	dividend, divisor, lsr #1
+	subcs	dividend, dividend, divisor, lsr #1
+	orrcs	result, result, curbit, lsr #1
+	cmp	dividend, divisor, lsr #2
+	subcs	dividend, dividend, divisor, lsr #2
+	orrcs	result, result, curbit, lsr #2
+	cmp	dividend, divisor, lsr #3
+	subcs	dividend, dividend, divisor, lsr #3
+	orrcs	result, result, curbit, lsr #3
+	cmp	dividend, #0			@ Early termination?
+	movnes	curbit, curbit, lsr #4		@ No, any more bits to do?
+	movne	divisor, divisor, lsr #4
+	bne	3b
+Lgot_result_divsi3:
+	mov	r0, result
+	cmp	ip, #0
+	rsbmi	r0, r0, #0
+	RET	pc, lr
+
+ENTRY(__modsi3)
+	mov	curbit, #1
+	cmp	divisor, #0
+	rsbmi	divisor, divisor, #0		@ Loops below use unsigned.
+	beq	Ldiv0
+	@ Need to save the sign of the dividend, unfortunately, we need
+	@ ip later on; this is faster than pushing lr and using that.
+	str	dividend, [sp, #-4]!
+	cmp	dividend, #0
+	rsbmi	dividend, dividend, #0
+	cmp	dividend, divisor
+	bcc	Lgot_result_modsi3
+
+1:
+	@ Unless the divisor is very big, shift it up in multiples of
+	@ four bits, since this is the amount of unwinding in the main
+	@ division loop.  Continue shifting until the divisor is 
+	@ larger than the dividend.
+	cmp	divisor, #0x10000000
+	cmpcc	divisor, dividend
+	movcc	divisor, divisor, lsl #4
+	movcc	curbit, curbit, lsl #4
+	bcc	1b
+
+2:
+	@ For very big divisors, we must shift it a bit at a time, or
+	@ we will be in danger of overflowing.
+	cmp	divisor, #0x80000000
+	cmpcc	divisor, dividend
+	movcc	divisor, divisor, lsl #1
+	movcc	curbit, curbit, lsl #1
+	bcc	2b
+
+3:
+	@ Test for possible subtractions.  On the final pass, this may 
+	@ subtract too much from the dividend, so keep track of which
+	@ subtractions are done, we can fix them up afterwards...
+	mov	overdone, #0
+	cmp	dividend, divisor
+	subcs	dividend, dividend, divisor
+	cmp	dividend, divisor, lsr #1
+	subcs	dividend, dividend, divisor, lsr #1
+	orrcs	overdone, overdone, curbit, ror #1
+	cmp	dividend, divisor, lsr #2
+	subcs	dividend, dividend, divisor, lsr #2
+	orrcs	overdone, overdone, curbit, ror #2
+	cmp	dividend, divisor, lsr #3
+	subcs	dividend, dividend, divisor, lsr #3
+	orrcs	overdone, overdone, curbit, ror #3
+	mov	ip, curbit
+	cmp	dividend, #0			@ Early termination?
+	movnes	curbit, curbit, lsr #4		@ No, any more bits to do?
+	movne	divisor, divisor, lsr #4
+	bne	3b
+
+	@ Any subtractions that we should not have done will be recorded in
+	@ the top three bits of "overdone".  Exactly which were not needed
+	@ are governed by the position of the bit, stored in ip.
+	@ If we terminated early, because dividend became zero,
+	@ then none of the below will match, since the bit in ip will not be
+	@ in the bottom nibble.
+	ands	overdone, overdone, #0xe0000000
+	beq	Lgot_result_modsi3
+	tst	overdone, ip, ror #3
+	addne	dividend, dividend, divisor, lsr #3
+	tst	overdone, ip, ror #2
+	addne	dividend, dividend, divisor, lsr #2
+	tst	overdone, ip, ror #1
+	addne	dividend, dividend, divisor, lsr #1
+Lgot_result_modsi3:
+	ldr	ip, [sp], #4
+	cmp	ip, #0
+	rsbmi	dividend, dividend, #0
+	RET	pc, lr
diff --git a/arch/arm26/lib/longlong.h b/arch/arm26/lib/longlong.h
new file mode 100644
index 00000000000..05ec1abd6a2
--- /dev/null
+++ b/arch/arm26/lib/longlong.h
@@ -0,0 +1,184 @@
+/* longlong.h -- based on code from gcc-2.95.3
+
+   definitions for mixed size 32/64 bit arithmetic.
+   Copyright (C) 1991, 92, 94, 95, 96, 1997, 1998 Free Software Foundation, Inc.
+
+   This definition file is free software; you can redistribute it
+   and/or modify it under the terms of the GNU General Public
+   License as published by the Free Software Foundation; either
+   version 2, or (at your option) any later version.
+
+   This definition file is distributed in the hope that it will be
+   useful, but WITHOUT ANY WARRANTY; without even the implied
+   warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+   See the GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+/* Borrowed from GCC 2.95.3, I Molton 29/07/01 */
+
+#ifndef SI_TYPE_SIZE
+#define SI_TYPE_SIZE 32
+#endif
+
+#define __BITS4 (SI_TYPE_SIZE / 4)
+#define __ll_B (1L << (SI_TYPE_SIZE / 2))
+#define __ll_lowpart(t) ((USItype) (t) % __ll_B)
+#define __ll_highpart(t) ((USItype) (t) / __ll_B)
+
+/* Define auxiliary asm macros.
+
+   1) umul_ppmm(high_prod, low_prod, multipler, multiplicand)
+   multiplies two USItype integers MULTIPLER and MULTIPLICAND,
+   and generates a two-part USItype product in HIGH_PROD and
+   LOW_PROD.
+
+   2) __umulsidi3(a,b) multiplies two USItype integers A and B,
+   and returns a UDItype product.  This is just a variant of umul_ppmm.
+
+   3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
+   denominator) divides a two-word unsigned integer, composed by the
+   integers HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and
+   places the quotient in QUOTIENT and the remainder in REMAINDER.
+   HIGH_NUMERATOR must be less than DENOMINATOR for correct operation.
+   If, in addition, the most significant bit of DENOMINATOR must be 1,
+   then the pre-processor symbol UDIV_NEEDS_NORMALIZATION is defined to 1.
+
+   4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
+   denominator).  Like udiv_qrnnd but the numbers are signed.  The
+   quotient is rounded towards 0.
+
+   5) count_leading_zeros(count, x) counts the number of zero-bits from
+   the msb to the first non-zero bit.  This is the number of steps X
+   needs to be shifted left to set the msb.  Undefined for X == 0.
+
+   6) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
+   high_addend_2, low_addend_2) adds two two-word unsigned integers,
+   composed by HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and
+   LOW_ADDEND_2 respectively.  The result is placed in HIGH_SUM and
+   LOW_SUM.  Overflow (i.e. carry out) is not stored anywhere, and is
+   lost.
+
+   7) sub_ddmmss(high_difference, low_difference, high_minuend,
+   low_minuend, high_subtrahend, low_subtrahend) subtracts two
+   two-word unsigned integers, composed by HIGH_MINUEND_1 and
+   LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and LOW_SUBTRAHEND_2
+   respectively.  The result is placed in HIGH_DIFFERENCE and
+   LOW_DIFFERENCE.  Overflow (i.e. carry out) is not stored anywhere,
+   and is lost.
+
+   If any of these macros are left undefined for a particular CPU,
+   C macros are used.  */
+
+#if defined (__arm__)
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("adds	%1, %4, %5					\n\
+	adc	%0, %2, %3"						\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "%r" ((USItype) (ah)),					\
+	     "rI" ((USItype) (bh)),					\
+	     "%r" ((USItype) (al)),					\
+	     "rI" ((USItype) (bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("subs	%1, %4, %5					\n\
+	sbc	%0, %2, %3"						\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "r" ((USItype) (ah)),					\
+	     "rI" ((USItype) (bh)),					\
+	     "r" ((USItype) (al)),					\
+	     "rI" ((USItype) (bl)))
+#define umul_ppmm(xh, xl, a, b) \
+{register USItype __t0, __t1, __t2;					\
+  __asm__ ("%@ Inlined umul_ppmm					\n\
+	mov	%2, %5, lsr #16						\n\
+	mov	%0, %6, lsr #16						\n\
+	bic	%3, %5, %2, lsl #16					\n\
+	bic	%4, %6, %0, lsl #16					\n\
+	mul	%1, %3, %4						\n\
+	mul	%4, %2, %4						\n\
+	mul	%3, %0, %3						\n\
+	mul	%0, %2, %0						\n\
+	adds	%3, %4, %3						\n\
+	addcs	%0, %0, #65536						\n\
+	adds	%1, %1, %3, lsl #16					\n\
+	adc	%0, %0, %3, lsr #16"					\
+	   : "=&r" ((USItype) (xh)),					\
+	     "=r" ((USItype) (xl)),					\
+	     "=&r" (__t0), "=&r" (__t1), "=r" (__t2)			\
+	   : "r" ((USItype) (a)),					\
+	     "r" ((USItype) (b)));}
+#define UMUL_TIME 20
+#define UDIV_TIME 100
+#endif /* __arm__ */
+
+#define __umulsidi3(u, v) \
+  ({DIunion __w;							\
+    umul_ppmm (__w.s.high, __w.s.low, u, v);				\
+    __w.ll; })
+
+#define __udiv_qrnnd_c(q, r, n1, n0, d) \
+  do {									\
+    USItype __d1, __d0, __q1, __q0;					\
+    USItype __r1, __r0, __m;						\
+    __d1 = __ll_highpart (d);						\
+    __d0 = __ll_lowpart (d);						\
+									\
+    __r1 = (n1) % __d1;							\
+    __q1 = (n1) / __d1;							\
+    __m = (USItype) __q1 * __d0;					\
+    __r1 = __r1 * __ll_B | __ll_highpart (n0);				\
+    if (__r1 < __m)							\
+      {									\
+	__q1--, __r1 += (d);						\
+	if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
+	  if (__r1 < __m)						\
+	    __q1--, __r1 += (d);					\
+      }									\
+    __r1 -= __m;							\
+									\
+    __r0 = __r1 % __d1;							\
+    __q0 = __r1 / __d1;							\
+    __m = (USItype) __q0 * __d0;					\
+    __r0 = __r0 * __ll_B | __ll_lowpart (n0);				\
+    if (__r0 < __m)							\
+      {									\
+	__q0--, __r0 += (d);						\
+	if (__r0 >= (d))						\
+	  if (__r0 < __m)						\
+	    __q0--, __r0 += (d);					\
+      }									\
+    __r0 -= __m;							\
+									\
+    (q) = (USItype) __q1 * __ll_B | __q0;				\
+    (r) = __r0;								\
+  } while (0)
+
+#define UDIV_NEEDS_NORMALIZATION 1
+#define udiv_qrnnd __udiv_qrnnd_c
+
+extern const UQItype __clz_tab[];
+#define count_leading_zeros(count, x) \
+  do {									\
+    USItype __xr = (x);							\
+    USItype __a;							\
+									\
+    if (SI_TYPE_SIZE <= 32)						\
+      {									\
+	__a = __xr < ((USItype)1<<2*__BITS4)				\
+	  ? (__xr < ((USItype)1<<__BITS4) ? 0 : __BITS4)		\
+	  : (__xr < ((USItype)1<<3*__BITS4) ?  2*__BITS4 : 3*__BITS4);	\
+      }									\
+    else								\
+      {									\
+	for (__a = SI_TYPE_SIZE - 8; __a > 0; __a -= 8)			\
+	  if (((__xr >> __a) & 0xff) != 0)				\
+	    break;							\
+      }									\
+									\
+    (count) = SI_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a);		\
+  } while (0)
diff --git a/arch/arm26/lib/lshrdi3.c b/arch/arm26/lib/lshrdi3.c
new file mode 100644
index 00000000000..b666f1bad45
--- /dev/null
+++ b/arch/arm26/lib/lshrdi3.c
@@ -0,0 +1,61 @@
+/* More subroutines needed by GCC output code on some machines.  */
+/* Compile this one with gcc.  */
+/* Copyright (C) 1989, 92-98, 1999 Free Software Foundation, Inc.
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+/* As a special exception, if you link this library with other files,
+   some of which are compiled with GCC, to produce an executable,
+   this library does not by itself cause the resulting executable
+   to be covered by the GNU General Public License.
+   This exception does not however invalidate any other reasons why
+   the executable file might be covered by the GNU General Public License.
+ */
+/* support functions required by the kernel. based on code from gcc-2.95.3 */
+/* I Molton     29/07/01 */
+
+#include "gcclib.h"
+
+DItype
+__lshrdi3 (DItype u, word_type b)
+{
+  DIunion w;
+  word_type bm;
+  DIunion uu;
+
+  if (b == 0)
+    return u;
+
+  uu.ll = u;
+
+  bm = (sizeof (SItype) * BITS_PER_UNIT) - b;
+  if (bm <= 0)
+    {
+      w.s.high = 0;
+      w.s.low = (USItype)uu.s.high >> -bm;
+    }
+  else
+    {
+      USItype carries = (USItype)uu.s.high << bm;
+      w.s.high = (USItype)uu.s.high >> b;
+      w.s.low = ((USItype)uu.s.low >> b) | carries;
+    }
+
+  return w.ll;
+}
+
diff --git a/arch/arm26/lib/memchr.S b/arch/arm26/lib/memchr.S
new file mode 100644
index 00000000000..34e7c14c08a
--- /dev/null
+++ b/arch/arm26/lib/memchr.S
@@ -0,0 +1,25 @@
+/*
+ *  linux/arch/arm26/lib/memchr.S
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  ASM optimised string functions
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+	.text
+	.align	5
+ENTRY(memchr)
+1:	subs	r2, r2, #1
+	bmi	2f
+	ldrb	r3, [r0], #1
+	teq	r3, r1
+	bne	1b
+	sub	r0, r0, #1
+2:	movne	r0, #0
+	RETINSTR(mov,pc,lr)
diff --git a/arch/arm26/lib/memcpy.S b/arch/arm26/lib/memcpy.S
new file mode 100644
index 00000000000..3f719e41206
--- /dev/null
+++ b/arch/arm26/lib/memcpy.S
@@ -0,0 +1,318 @@
+/*
+ *  linux/arch/arm26/lib/memcpy.S
+ *
+ *  Copyright (C) 1995-1999 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  ASM optimised string functions
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+		.text
+
+#define ENTER	\
+		mov	ip,sp	;\
+		stmfd	sp!,{r4-r9,fp,ip,lr,pc}	;\
+		sub	fp,ip,#4
+
+#define EXIT	\
+		LOADREGS(ea, fp, {r4 - r9, fp, sp, pc})
+
+#define EXITEQ	\
+		LOADREGS(eqea, fp, {r4 - r9, fp, sp, pc})
+
+/*
+ * Prototype: void memcpy(void *to,const void *from,unsigned long n);
+ * ARM3: cant use memcopy here!!!
+ */
+ENTRY(memcpy)
+ENTRY(memmove)
+		ENTER
+		cmp	r1, r0
+		bcc	19f
+		subs	r2, r2, #4
+		blt	6f
+		ands	ip, r0, #3
+		bne	7f
+		ands	ip, r1, #3
+		bne	8f
+
+1:		subs	r2, r2, #8
+		blt	5f
+		subs	r2, r2, #0x14
+		blt	3f
+2:		ldmia	r1!,{r3 - r9, ip}
+		stmia	r0!,{r3 - r9, ip}
+		subs	r2, r2, #32
+		bge	2b
+		cmn	r2, #16
+		ldmgeia	r1!, {r3 - r6}
+		stmgeia	r0!, {r3 - r6}
+		subge	r2, r2, #0x10
+3:		adds	r2, r2, #0x14
+4:		ldmgeia	r1!, {r3 - r5}
+		stmgeia	r0!, {r3 - r5}
+		subges	r2, r2, #12
+		bge	4b
+5:		adds	r2, r2, #8
+		blt	6f
+		subs	r2, r2, #4
+		ldrlt	r3, [r1], #4
+		ldmgeia	r1!, {r4, r5}
+		strlt	r3, [r0], #4
+		stmgeia	r0!, {r4, r5}
+		subge	r2, r2, #4
+
+6:		adds	r2, r2, #4
+		EXITEQ
+		cmp	r2, #2
+		ldrb	r3, [r1], #1
+		ldrgeb	r4, [r1], #1
+		ldrgtb	r5, [r1], #1
+		strb	r3, [r0], #1
+		strgeb	r4, [r0], #1
+		strgtb	r5, [r0], #1
+		EXIT
+
+7:		rsb	ip, ip, #4
+		cmp	ip, #2
+		ldrb	r3, [r1], #1
+		ldrgeb	r4, [r1], #1
+		ldrgtb	r5, [r1], #1
+		strb	r3, [r0], #1
+		strgeb	r4, [r0], #1
+		strgtb	r5, [r0], #1
+		subs	r2, r2, ip
+		blt	6b
+		ands	ip, r1, #3
+		beq	1b
+
+8:		bic	r1, r1, #3
+		ldr	r7, [r1], #4
+		cmp	ip, #2
+		bgt	15f
+		beq	11f
+		cmp	r2, #12
+		blt	10f
+		sub	r2, r2, #12
+9:		mov	r3, r7, pull #8
+		ldmia	r1!, {r4 - r7}
+		orr	r3, r3, r4, push #24
+		mov	r4, r4, pull #8
+		orr	r4, r4, r5, push #24
+		mov	r5, r5, pull #8
+		orr	r5, r5, r6, push #24
+		mov	r6, r6, pull #8
+		orr	r6, r6, r7, push #24
+		stmia	r0!, {r3 - r6}
+		subs	r2, r2, #16
+		bge	9b
+		adds	r2, r2, #12
+		blt	100f
+10:		mov	r3, r7, pull #8
+		ldr	r7, [r1], #4
+		subs	r2, r2, #4
+		orr	r3, r3, r7, push #24
+		str	r3, [r0], #4
+		bge	10b
+100:		sub	r1, r1, #3
+		b	6b
+
+11:		cmp	r2, #12
+		blt	13f		/* */
+		sub	r2, r2, #12
+12:		mov	r3, r7, pull #16
+		ldmia	r1!, {r4 - r7}
+		orr	r3, r3, r4, push #16
+		mov	r4, r4, pull #16
+		orr	r4, r4, r5, push #16
+		mov	r5, r5, pull #16
+		orr	r5, r5, r6, push #16
+		mov	r6, r6, pull #16
+		orr	r6, r6, r7, push #16
+		stmia	r0!, {r3 - r6}
+		subs	r2, r2, #16
+		bge	12b
+		adds	r2, r2, #12
+		blt	14f
+13:		mov	r3, r7, pull #16
+		ldr	r7, [r1], #4
+		subs	r2, r2, #4
+		orr	r3, r3, r7, push #16
+		str	r3, [r0], #4
+		bge	13b
+14:		sub	r1, r1, #2
+		b	6b
+
+15:		cmp	r2, #12
+		blt	17f
+		sub	r2, r2, #12
+16:		mov	r3, r7, pull #24
+		ldmia	r1!, {r4 - r7}
+		orr	r3, r3, r4, push #8
+		mov	r4, r4, pull #24
+		orr	r4, r4, r5, push #8
+		mov	r5, r5, pull #24
+		orr	r5, r5, r6, push #8
+		mov	r6, r6, pull #24
+		orr	r6, r6, r7, push #8
+		stmia	r0!, {r3 - r6}
+		subs	r2, r2, #16
+		bge	16b
+		adds	r2, r2, #12
+		blt	18f
+17:		mov	r3, r7, pull #24
+		ldr	r7, [r1], #4
+		subs	r2, r2, #4
+		orr	r3, r3, r7, push #8
+		str	r3, [r0], #4
+		bge	17b
+18:		sub	r1, r1, #1
+		b	6b
+
+
+19:		add	r1, r1, r2
+		add	r0, r0, r2
+		subs	r2, r2, #4
+		blt	24f
+		ands	ip, r0, #3
+		bne	25f
+		ands	ip, r1, #3
+		bne	26f
+
+20:		subs	r2, r2, #8
+		blt	23f
+		subs	r2, r2, #0x14
+		blt	22f
+21:		ldmdb	r1!, {r3 - r9, ip}
+		stmdb	r0!, {r3 - r9, ip}
+		subs	r2, r2, #32
+		bge	21b
+22:		cmn	r2, #16
+		ldmgedb	r1!, {r3 - r6}
+		stmgedb	r0!, {r3 - r6}
+		subge	r2, r2, #16
+		adds	r2, r2, #20
+		ldmgedb	r1!, {r3 - r5}
+		stmgedb	r0!, {r3 - r5}
+		subge	r2, r2, #12
+23:		adds	r2, r2, #8
+		blt	24f
+		subs	r2, r2, #4
+		ldrlt	r3, [r1, #-4]!
+		ldmgedb	r1!, {r4, r5}
+		strlt	r3, [r0, #-4]!
+		stmgedb	r0!, {r4, r5}
+		subge	r2, r2, #4
+
+24:		adds	r2, r2, #4
+		EXITEQ
+		cmp	r2, #2
+		ldrb	r3, [r1, #-1]!
+		ldrgeb	r4, [r1, #-1]!
+		ldrgtb	r5, [r1, #-1]!
+		strb	r3, [r0, #-1]!
+		strgeb	r4, [r0, #-1]!
+		strgtb	r5, [r0, #-1]!
+		EXIT
+
+25:		cmp	ip, #2
+		ldrb	r3, [r1, #-1]!
+		ldrgeb	r4, [r1, #-1]!
+		ldrgtb	r5, [r1, #-1]!
+		strb	r3, [r0, #-1]!
+		strgeb	r4, [r0, #-1]!
+		strgtb	r5, [r0, #-1]!
+		subs	r2, r2, ip
+		blt	24b
+		ands	ip, r1, #3
+		beq	20b
+
+26:		bic	r1, r1, #3
+		ldr	r3, [r1], #0
+		cmp	ip, #2
+		blt	34f
+		beq	30f
+		cmp	r2, #12
+		blt	28f
+		sub	r2, r2, #12
+27:		mov	r7, r3, push #8
+		ldmdb	r1!, {r3, r4, r5, r6}
+		orr	r7, r7, r6, pull #24
+		mov	r6, r6, push #8
+		orr	r6, r6, r5, pull #24
+		mov	r5, r5, push #8
+		orr	r5, r5, r4, pull #24
+		mov	r4, r4, push #8
+		orr	r4, r4, r3, pull #24
+		stmdb	r0!, {r4, r5, r6, r7}
+		subs	r2, r2, #16
+		bge	27b
+		adds	r2, r2, #12
+		blt	29f
+28:		mov	ip, r3, push #8
+		ldr	r3, [r1, #-4]!
+		subs	r2, r2, #4
+		orr	ip, ip, r3, pull #24
+		str	ip, [r0, #-4]!
+		bge	28b
+29:		add	r1, r1, #3
+		b	24b
+
+30:		cmp	r2, #12
+		blt	32f
+		sub	r2, r2, #12
+31:		mov	r7, r3, push #16
+		ldmdb	r1!, {r3, r4, r5, r6}
+		orr	r7, r7, r6, pull #16
+		mov	r6, r6, push #16
+		orr	r6, r6, r5, pull #16
+		mov	r5, r5, push #16
+		orr	r5, r5, r4, pull #16
+		mov	r4, r4, push #16
+		orr	r4, r4, r3, pull #16
+		stmdb	r0!, {r4, r5, r6, r7}
+		subs	r2, r2, #16
+		bge	31b
+		adds	r2, r2, #12
+		blt	33f
+32:		mov	ip, r3, push #16
+		ldr	r3, [r1, #-4]!
+		subs	r2, r2, #4
+		orr	ip, ip, r3, pull #16
+		str	ip, [r0, #-4]!
+		bge	32b
+33:		add	r1, r1, #2
+		b	24b
+
+34:		cmp	r2, #12
+		blt	36f
+		sub	r2, r2, #12
+35:		mov	r7, r3, push #24
+		ldmdb	r1!, {r3, r4, r5, r6}
+		orr	r7, r7, r6, pull #8
+		mov	r6, r6, push #24
+		orr	r6, r6, r5, pull #8
+		mov	r5, r5, push #24
+		orr	r5, r5, r4, pull #8
+		mov	r4, r4, push #24
+		orr	r4, r4, r3, pull #8
+		stmdb	r0!, {r4, r5, r6, r7}
+		subs	r2, r2, #16
+		bge	35b
+		adds	r2, r2, #12
+		blt	37f
+36:		mov	ip, r3, push #24
+		ldr	r3, [r1, #-4]!
+		subs	r2, r2, #4
+		orr	ip, ip, r3, pull #8
+		str	ip, [r0, #-4]!
+		bge	36b
+37:		add	r1, r1, #1
+		b	24b
+
+		.align
diff --git a/arch/arm26/lib/memset.S b/arch/arm26/lib/memset.S
new file mode 100644
index 00000000000..aedec10b58f
--- /dev/null
+++ b/arch/arm26/lib/memset.S
@@ -0,0 +1,80 @@
+/*
+ *  linux/arch/arm26/lib/memset.S
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  ASM optimised string functions
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+	.text
+	.align	5
+	.word	0
+
+1:	subs	r2, r2, #4		@ 1 do we have enough
+	blt	5f			@ 1 bytes to align with?
+	cmp	r3, #2			@ 1
+	strltb	r1, [r0], #1		@ 1
+	strleb	r1, [r0], #1		@ 1
+	strb	r1, [r0], #1		@ 1
+	add	r2, r2, r3		@ 1 (r2 = r2 - (4 - r3))
+/*
+ * The pointer is now aligned and the length is adjusted.  Try doing the
+ * memzero again.
+ */
+
+ENTRY(memset)
+	ands	r3, r0, #3		@ 1 unaligned?
+	bne	1b			@ 1
+/*
+ * we know that the pointer in r0 is aligned to a word boundary.
+ */
+	orr	r1, r1, r1, lsl #8
+	orr	r1, r1, r1, lsl #16
+	mov	r3, r1
+	cmp	r2, #16
+	blt	4f
+/*
+ * We need an extra register for this loop - save the return address and
+ * use the LR
+ */
+	str	lr, [sp, #-4]!
+	mov	ip, r1
+	mov	lr, r1
+
+2:	subs	r2, r2, #64
+	stmgeia	r0!, {r1, r3, ip, lr}	@ 64 bytes at a time.
+	stmgeia	r0!, {r1, r3, ip, lr}
+	stmgeia	r0!, {r1, r3, ip, lr}
+	stmgeia	r0!, {r1, r3, ip, lr}
+	bgt	2b
+	LOADREGS(eqfd, sp!, {pc})	@ Now <64 bytes to go.
+/*
+ * No need to correct the count; we're only testing bits from now on
+ */
+	tst	r2, #32
+	stmneia	r0!, {r1, r3, ip, lr}
+	stmneia	r0!, {r1, r3, ip, lr}
+	tst	r2, #16
+	stmneia	r0!, {r1, r3, ip, lr}
+	ldr	lr, [sp], #4
+
+4:	tst	r2, #8
+	stmneia	r0!, {r1, r3}
+	tst	r2, #4
+	strne	r1, [r0], #4
+/*
+ * When we get here, we've got less than 4 bytes to zero.  We
+ * may have an unaligned pointer as well.
+ */
+5:	tst	r2, #2
+	strneb	r1, [r0], #1
+	strneb	r1, [r0], #1
+	tst	r2, #1
+	strneb	r1, [r0], #1
+	RETINSTR(mov,pc,lr)
diff --git a/arch/arm26/lib/memzero.S b/arch/arm26/lib/memzero.S
new file mode 100644
index 00000000000..cc5bf686006
--- /dev/null
+++ b/arch/arm26/lib/memzero.S
@@ -0,0 +1,80 @@
+/*
+ *  linux/arch/arm26/lib/memzero.S
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+	.text
+	.align	5
+	.word	0
+/*
+ * Align the pointer in r0.  r3 contains the number of bytes that we are
+ * mis-aligned by, and r1 is the number of bytes.  If r1 < 4, then we
+ * don't bother; we use byte stores instead.
+ */
+1:	subs	r1, r1, #4		@ 1 do we have enough
+	blt	5f			@ 1 bytes to align with?
+	cmp	r3, #2			@ 1
+	strltb	r2, [r0], #1		@ 1
+	strleb	r2, [r0], #1		@ 1
+	strb	r2, [r0], #1		@ 1
+	add	r1, r1, r3		@ 1 (r1 = r1 - (4 - r3))
+/*
+ * The pointer is now aligned and the length is adjusted.  Try doing the
+ * memzero again.
+ */
+
+ENTRY(__memzero)
+	mov	r2, #0			@ 1
+	ands	r3, r0, #3		@ 1 unaligned?
+	bne	1b			@ 1
+/*
+ * r3 = 0, and we know that the pointer in r0 is aligned to a word boundary.
+ */
+	cmp	r1, #16			@ 1 we can skip this chunk if we
+	blt	4f			@ 1 have < 16 bytes
+/*
+ * We need an extra register for this loop - save the return address and
+ * use the LR
+ */
+	str	lr, [sp, #-4]!		@ 1
+	mov	ip, r2			@ 1
+	mov	lr, r2			@ 1
+
+3:	subs	r1, r1, #64		@ 1 write 32 bytes out per loop
+	stmgeia	r0!, {r2, r3, ip, lr}	@ 4
+	stmgeia	r0!, {r2, r3, ip, lr}	@ 4
+	stmgeia	r0!, {r2, r3, ip, lr}	@ 4
+	stmgeia	r0!, {r2, r3, ip, lr}	@ 4
+	bgt	3b			@ 1
+	LOADREGS(eqfd, sp!, {pc})	@ 1/2 quick exit
+/*
+ * No need to correct the count; we're only testing bits from now on
+ */
+	tst	r1, #32			@ 1
+	stmneia	r0!, {r2, r3, ip, lr}	@ 4
+	stmneia	r0!, {r2, r3, ip, lr}	@ 4
+	tst	r1, #16			@ 1 16 bytes or more?
+	stmneia	r0!, {r2, r3, ip, lr}	@ 4
+	ldr	lr, [sp], #4		@ 1
+
+4:	tst	r1, #8			@ 1 8 bytes or more?
+	stmneia	r0!, {r2, r3}		@ 2
+	tst	r1, #4			@ 1 4 bytes or more?
+	strne	r2, [r0], #4		@ 1
+/*
+ * When we get here, we've got less than 4 bytes to zero.  We
+ * may have an unaligned pointer as well.
+ */
+5:	tst	r1, #2			@ 1 2 bytes or more?
+	strneb	r2, [r0], #1		@ 1
+	strneb	r2, [r0], #1		@ 1
+	tst	r1, #1			@ 1 a byte left over
+	strneb	r2, [r0], #1		@ 1
+	RETINSTR(mov,pc,lr)		@ 1
diff --git a/arch/arm26/lib/muldi3.c b/arch/arm26/lib/muldi3.c
new file mode 100644
index 00000000000..44d611b1cfd
--- /dev/null
+++ b/arch/arm26/lib/muldi3.c
@@ -0,0 +1,77 @@
+/* More subroutines needed by GCC output code on some machines.  */
+/* Compile this one with gcc.  */
+/* Copyright (C) 1989, 92-98, 1999 Free Software Foundation, Inc.
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+/* As a special exception, if you link this library with other files,
+   some of which are compiled with GCC, to produce an executable,
+   this library does not by itself cause the resulting executable
+   to be covered by the GNU General Public License.
+   This exception does not however invalidate any other reasons why
+   the executable file might be covered by the GNU General Public License.
+ */
+/* support functions required by the kernel. based on code from gcc-2.95.3 */
+/* I Molton     29/07/01 */
+
+#include "gcclib.h"
+
+#define umul_ppmm(xh, xl, a, b) \
+{register USItype __t0, __t1, __t2;                                     \
+  __asm__ ("%@ Inlined umul_ppmm					\n\
+        mov     %2, %5, lsr #16						\n\
+        mov     %0, %6, lsr #16						\n\
+        bic     %3, %5, %2, lsl #16					\n\
+        bic     %4, %6, %0, lsl #16					\n\
+        mul     %1, %3, %4						\n\
+        mul     %4, %2, %4						\n\
+        mul     %3, %0, %3						\n\
+        mul     %0, %2, %0						\n\
+        adds    %3, %4, %3						\n\
+        addcs   %0, %0, #65536						\n\
+        adds    %1, %1, %3, lsl #16					\n\
+        adc     %0, %0, %3, lsr #16"                                    \
+           : "=&r" ((USItype) (xh)),                                    \
+             "=r" ((USItype) (xl)),                                     \
+             "=&r" (__t0), "=&r" (__t1), "=r" (__t2)                    \
+           : "r" ((USItype) (a)),                                       \
+             "r" ((USItype) (b)));}
+
+
+#define __umulsidi3(u, v) \
+  ({DIunion __w;                                                        \
+    umul_ppmm (__w.s.high, __w.s.low, u, v);                            \
+    __w.ll; })
+
+
+DItype
+__muldi3 (DItype u, DItype v)
+{
+  DIunion w;
+  DIunion uu, vv;
+
+  uu.ll = u,
+  vv.ll = v;
+
+  w.ll = __umulsidi3 (uu.s.low, vv.s.low);
+  w.s.high += ((USItype) uu.s.low * (USItype) vv.s.high
+               + (USItype) uu.s.high * (USItype) vv.s.low);
+
+  return w.ll;
+}
+
diff --git a/arch/arm26/lib/putuser.S b/arch/arm26/lib/putuser.S
new file mode 100644
index 00000000000..87588cbe46a
--- /dev/null
+++ b/arch/arm26/lib/putuser.S
@@ -0,0 +1,109 @@
+/*
+ *  linux/arch/arm26/lib/putuser.S
+ *
+ *  Copyright (C) 2001 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  Idea from x86 version, (C) Copyright 1998 Linus Torvalds
+ *
+ * These functions have a non-standard call interface to make
+ * them more efficient, especially as they return an error
+ * value in addition to the "real" return value.
+ *
+ * __put_user_X
+ *
+ * Inputs:	r0 contains the address
+ *		r1, r2 contains the value
+ * Outputs:	r0 is the error code
+ *		lr corrupted
+ *
+ * No other registers must be altered.  (see include/asm-arm/uaccess.h
+ * for specific ASM register usage).
+ *
+ * Note that ADDR_LIMIT is either 0 or 0xc0000000
+ * Note also that it is intended that __put_user_bad is not global.
+ */
+#include <asm/asm_offsets.h>
+#include <asm/thread_info.h>
+#include <asm/errno.h>
+
+        .global __put_user_1
+__put_user_1:
+        bic     r2, sp, #0x1f00
+        bic     r2, r2, #0x00ff
+        str     lr, [sp, #-4]!
+        ldr     r2, [r2, #TI_ADDR_LIMIT]
+        sub     r2, r2, #1
+        cmp     r0, r2
+        bge     __put_user_bad
+1:      cmp     r0, #0x02000000
+        strlsbt r1, [r0]
+        strgeb  r1, [r0]
+        mov     r0, #0
+        ldmfd   sp!, {pc}^
+
+        .global __put_user_2
+__put_user_2:
+        bic     r2, sp, #0x1f00
+        bic     r2, r2, #0x00ff
+        str     lr, [sp, #-4]!
+        ldr     r2, [r2, #TI_ADDR_LIMIT]
+        sub     r2, r2, #2
+        cmp     r0, r2
+        bge     __put_user_bad
+2:      cmp     r0, #0x02000000
+        strlsbt r1, [r0], #1
+        strgeb  r1, [r0], #1
+        mov     r1, r1, lsr #8
+3:      strlsbt r1, [r0]
+        strgeb  r1, [r0]
+        mov     r0, #0
+        ldmfd   sp!, {pc}^
+
+        .global __put_user_4
+__put_user_4:
+        bic     r2, sp, #0x1f00
+        bic     r2, r2, #0x00ff
+        str     lr, [sp, #-4]!
+        ldr     r2, [r2, #TI_ADDR_LIMIT]
+        sub     r2, r2, #4
+        cmp     r0, r2
+4:      bge     __put_user_bad
+        cmp     r0, #0x02000000
+        strlst  r1, [r0]
+        strge   r1, [r0]
+        mov     r0, #0
+        ldmfd   sp!, {pc}^
+
+        .global __put_user_8
+__put_user_8:
+        bic     ip, sp, #0x1f00
+        bic     ip, ip, #0x00ff
+        str     lr, [sp, #-4]!
+        ldr     ip, [ip, #TI_ADDR_LIMIT]
+        sub     ip, ip, #8
+        cmp     r0, ip
+        bge     __put_user_bad
+        cmp     r0, #0x02000000
+5:      strlst  r1, [r0], #4
+6:      strlst  r2, [r0]
+        strge   r1, [r0], #4
+        strge   r2, [r0]
+        mov     r0, #0
+        ldmfd   sp!, {pc}^
+
+__put_user_bad:
+	mov	r0, #-EFAULT
+	mov	pc, lr
+
+.section __ex_table, "a"
+	.long	1b, __put_user_bad
+	.long	2b, __put_user_bad
+	.long	3b, __put_user_bad
+	.long	4b, __put_user_bad
+	.long	5b, __put_user_bad
+	.long	6b, __put_user_bad
+.previous
diff --git a/arch/arm26/lib/setbit.S b/arch/arm26/lib/setbit.S
new file mode 100644
index 00000000000..e180c1a1b2f
--- /dev/null
+++ b/arch/arm26/lib/setbit.S
@@ -0,0 +1,29 @@
+/*
+ *  linux/arch/arm26/lib/setbit.S
+ *
+ *  Copyright (C) 1995-1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+		.text
+
+/*
+ * Purpose  : Function to set a bit
+ * Prototype: int set_bit(int bit, void *addr)
+ */
+ENTRY(_set_bit_be)
+		eor	r0, r0, #0x18		@ big endian byte ordering
+ENTRY(_set_bit_le)
+		and	r2, r0, #7
+		mov	r3, #1
+		mov	r3, r3, lsl r2
+		save_and_disable_irqs ip, r2
+		ldrb	r2, [r1, r0, lsr #3]
+		orr	r2, r2, r3
+		strb	r2, [r1, r0, lsr #3]
+		restore_irqs ip
+		RETINSTR(mov,pc,lr)
diff --git a/arch/arm26/lib/strchr.S b/arch/arm26/lib/strchr.S
new file mode 100644
index 00000000000..ecfff21aa7c
--- /dev/null
+++ b/arch/arm26/lib/strchr.S
@@ -0,0 +1,25 @@
+/*
+ *  linux/arch/arm26/lib/strchr.S
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  ASM optimised string functions
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+		.text
+		.align	5
+ENTRY(strchr)
+1:		ldrb	r2, [r0], #1
+		teq	r2, r1
+		teqne	r2, #0
+		bne	1b
+		teq	r2, #0
+		moveq	r0, #0
+		subne	r0, r0, #1
+		RETINSTR(mov,pc,lr)
diff --git a/arch/arm26/lib/strrchr.S b/arch/arm26/lib/strrchr.S
new file mode 100644
index 00000000000..db43b28e78d
--- /dev/null
+++ b/arch/arm26/lib/strrchr.S
@@ -0,0 +1,25 @@
+/*
+ *  linux/arch/arm26/lib/strrchr.S
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  ASM optimised string functions
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+		.text
+		.align	5
+ENTRY(strrchr)
+		mov	r3, #0
+1:		ldrb	r2, [r0], #1
+		teq	r2, r1
+		subeq	r3, r0, #1
+		teq	r2, #0
+		bne	1b
+		mov	r0, r3
+		RETINSTR(mov,pc,lr)
diff --git a/arch/arm26/lib/testchangebit.S b/arch/arm26/lib/testchangebit.S
new file mode 100644
index 00000000000..17049a2d93a
--- /dev/null
+++ b/arch/arm26/lib/testchangebit.S
@@ -0,0 +1,29 @@
+/*
+ *  linux/arch/arm26/lib/testchangebit.S
+ *
+ *  Copyright (C) 1995-1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+                .text
+
+ENTRY(_test_and_change_bit_be)
+		eor	r0, r0, #0x18		@ big endian byte ordering
+ENTRY(_test_and_change_bit_le)
+		add	r1, r1, r0, lsr #3
+		and	r3, r0, #7
+		mov	r0, #1
+		save_and_disable_irqs ip, r2
+		ldrb	r2, [r1]
+		tst	r2, r0, lsl r3
+		eor	r2, r2, r0, lsl r3
+		strb	r2, [r1]
+		restore_irqs ip
+		moveq	r0, #0
+		RETINSTR(mov,pc,lr)
+
+
diff --git a/arch/arm26/lib/testclearbit.S b/arch/arm26/lib/testclearbit.S
new file mode 100644
index 00000000000..2506bd743ab
--- /dev/null
+++ b/arch/arm26/lib/testclearbit.S
@@ -0,0 +1,29 @@
+/*
+ *  linux/arch/arm26/lib/testclearbit.S
+ *
+ *  Copyright (C) 1995-1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+                .text
+
+ENTRY(_test_and_clear_bit_be)
+		eor	r0, r0, #0x18		@ big endian byte ordering
+ENTRY(_test_and_clear_bit_le)
+		add	r1, r1, r0, lsr #3	@ Get byte offset
+		and	r3, r0, #7		@ Get bit offset
+		mov	r0, #1
+		save_and_disable_irqs ip, r2
+		ldrb	r2, [r1]
+		tst	r2, r0, lsl r3
+		bic	r2, r2, r0, lsl r3
+		strb	r2, [r1]
+		restore_irqs ip
+		moveq	r0, #0
+		RETINSTR(mov,pc,lr)
+
+
diff --git a/arch/arm26/lib/testsetbit.S b/arch/arm26/lib/testsetbit.S
new file mode 100644
index 00000000000..f827de64b22
--- /dev/null
+++ b/arch/arm26/lib/testsetbit.S
@@ -0,0 +1,29 @@
+/*
+ *  linux/arch/arm26/lib/testsetbit.S
+ *
+ *  Copyright (C) 1995-1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+                .text
+
+ENTRY(_test_and_set_bit_be)
+		eor	r0, r0, #0x18		@ big endian byte ordering
+ENTRY(_test_and_set_bit_le)
+		add	r1, r1, r0, lsr #3	@ Get byte offset
+		and	r3, r0, #7		@ Get bit offset
+		mov	r0, #1
+		save_and_disable_irqs ip, r2
+		ldrb	r2, [r1]
+		tst	r2, r0, lsl r3
+		orr	r2, r2, r0, lsl r3
+		strb	r2, [r1]
+		restore_irqs ip
+		moveq	r0, #0
+		RETINSTR(mov,pc,lr)
+
+
diff --git a/arch/arm26/lib/uaccess-kernel.S b/arch/arm26/lib/uaccess-kernel.S
new file mode 100644
index 00000000000..3950a1f6bc9
--- /dev/null
+++ b/arch/arm26/lib/uaccess-kernel.S
@@ -0,0 +1,173 @@
+/*
+ *  linux/arch/arm26/lib/uaccess-kernel.S
+ *
+ *  Copyright (C) 1998 Russell King
+ *
+ *  Note!  Some code fragments found in here have a special calling
+ *  convention - they are not APCS compliant!
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+		.text
+
+//FIXME - surely this can be done in C not asm, removing the problem of keeping C and asm in sync? (this is a struct uaccess_t)
+
+		.globl	uaccess_kernel
+uaccess_kernel:
+		.word	uaccess_kernel_put_byte
+		.word	uaccess_kernel_get_byte
+		.word	uaccess_kernel_put_half
+		.word	uaccess_kernel_get_half
+		.word	uaccess_kernel_put_word
+		.word	uaccess_kernel_get_word
+		.word	uaccess_kernel_put_dword
+		.word	uaccess_kernel_copy
+		.word	uaccess_kernel_copy
+		.word	uaccess_kernel_clear
+		.word	uaccess_kernel_strncpy
+		.word	uaccess_kernel_strnlen
+
+@ In : r0 = x, r1 = addr, r2 = error
+@ Out: r2 = error
+uaccess_kernel_put_byte:
+		stmfd	sp!, {lr}
+		strb	r0, [r1]
+		ldmfd	sp!, {pc}^
+
+@ In : r0 = x, r1 = addr, r2 = error
+@ Out: r2 = error
+uaccess_kernel_put_half:
+		stmfd	sp!, {lr}
+		strb	r0, [r1]
+		mov	r0, r0, lsr #8
+		strb	r0, [r1, #1]
+		ldmfd	sp!, {pc}^
+
+@ In : r0 = x, r1 = addr, r2 = error
+@ Out: r2 = error
+uaccess_kernel_put_word:
+		stmfd	sp!, {lr}
+		str	r0, [r1]
+		ldmfd	sp!, {pc}^
+
+@ In : r0 = x, r1 = addr, r2 = error
+@ Out: r2 = error
+uaccess_kernel_put_dword:
+                stmfd   sp!, {lr}
+                str     r0, [r1], #4
+		str	r0, [r1], #0
+                ldmfd   sp!, {pc}^
+
+@ In : r0 = addr, r1 = error
+@ Out: r0 = x, r1 = error
+uaccess_kernel_get_byte:
+		stmfd	sp!, {lr}
+		ldrb	r0, [r0]
+		ldmfd	sp!, {pc}^
+
+@ In : r0 = addr, r1 = error
+@ Out: r0 = x, r1 = error
+uaccess_kernel_get_half:
+		stmfd	sp!, {lr}
+		ldr	r0, [r0]
+		mov	r0, r0, lsl #16
+		mov	r0, r0, lsr #16
+		ldmfd	sp!, {pc}^
+
+@ In : r0 = addr, r1 = error
+@ Out: r0 = x, r1 = error
+uaccess_kernel_get_word:
+		stmfd	sp!, {lr}
+		ldr	r0, [r0]
+		ldmfd	sp!, {pc}^
+
+
+/* Prototype: int uaccess_kernel_copy(void *to, const char *from, size_t n)
+ * Purpose  : copy a block to kernel memory from kernel memory
+ * Params   : to   - kernel memory
+ *          : from - kernel memory
+ *          : n    - number of bytes to copy
+ * Returns  : Number of bytes NOT copied.
+ */
+uaccess_kernel_copy:
+		stmfd	sp!, {lr}
+		bl	memcpy
+		mov	r0, #0
+		ldmfd	sp!, {pc}^
+
+/* Prototype: int uaccess_kernel_clear(void *addr, size_t sz)
+ * Purpose  : clear some kernel memory
+ * Params   : addr - kernel memory address to clear
+ *          : sz   - number of bytes to clear
+ * Returns  : number of bytes NOT cleared
+ */
+uaccess_kernel_clear:
+		stmfd	sp!, {lr}
+		mov	r2, #0
+		cmp	r1, #4
+		blt	2f
+		ands	ip, r0, #3
+		beq	1f
+		cmp	ip, #1
+		strb	r2, [r0], #1
+		strleb	r2, [r0], #1
+		strltb	r2, [r0], #1
+		rsb	ip, ip, #4
+		sub	r1, r1, ip		@  7  6  5  4  3  2  1
+1:		subs	r1, r1, #8		@ -1 -2 -3 -4 -5 -6 -7
+		bmi	2f
+		str	r2, [r0], #4
+		str	r2, [r0], #4
+		b	1b
+2:		adds	r1, r1, #4		@  3  2  1  0 -1 -2 -3
+		strpl	r2, [r0], #4
+		tst	r1, #2			@ 1x 1x 0x 0x 1x 1x 0x
+		strneb	r2, [r0], #1
+		strneb	r2, [r0], #1
+		tst	r1, #1			@ x1 x0 x1 x0 x1 x0 x1
+		strneb	r2, [r0], #1
+		mov	r0, #0
+		ldmfd	sp!, {pc}^
+
+/* Prototype: size_t uaccess_kernel_strncpy(char *dst, char *src, size_t len)
+ * Purpose  : copy a string from kernel memory to kernel memory
+ * Params   : dst - kernel memory destination
+ *          : src - kernel memory source
+ *          : len - maximum length of string
+ * Returns  : number of characters copied
+ */
+uaccess_kernel_strncpy:
+		stmfd	sp!, {lr}
+		mov	ip, r2
+1:		subs	r2, r2, #1
+		bmi	2f
+		ldrb	r3, [r1], #1
+		strb	r3, [r0], #1
+		teq	r3, #0
+		bne	1b
+2:		subs	r0, ip, r2
+		ldmfd	sp!, {pc}^
+
+/* Prototype: int uaccess_kernel_strlen(char *str, long n)
+ * Purpose  : get length of a string in kernel memory
+ * Params   : str - address of string in kernel memory
+ * Returns  : length of string *including terminator*,
+ *            or zero on exception, or n + 1 if too long
+ */
+uaccess_kernel_strnlen:
+		stmfd	sp!, {lr}
+		mov	r2, r0
+1:		ldrb	r1, [r0], #1
+		teq	r1, #0
+		beq	2f
+		subs	r1, r1, #1
+		bne	1b
+		add	r0, r0, #1
+2:		sub	r0, r0, r2
+		ldmfd	sp!, {pc}^
+
diff --git a/arch/arm26/lib/uaccess-user.S b/arch/arm26/lib/uaccess-user.S
new file mode 100644
index 00000000000..130b8f28610
--- /dev/null
+++ b/arch/arm26/lib/uaccess-user.S
@@ -0,0 +1,718 @@
+/*
+ *  linux/arch/arm26/lib/uaccess-user.S
+ *
+ *  Copyright (C) 1995, 1996,1997,1998 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  Routines to block copy data to/from user memory
+ *   These are highly optimised both for the 4k page size
+ *   and for various alignments.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/errno.h>
+#include <asm/page.h>
+
+		.text
+
+//FIXME - surely this can be done in C not asm, removing the problem of keeping C and asm in sync? (this is a struct uaccess_t)
+                .globl  uaccess_user
+uaccess_user:
+                .word   uaccess_user_put_byte
+                .word   uaccess_user_get_byte
+                .word   uaccess_user_put_half
+                .word   uaccess_user_get_half
+                .word   uaccess_user_put_word
+                .word   uaccess_user_get_word
+		.word   uaccess_user_put_dword
+                .word   uaccess_user_copy_from_user
+                .word   uaccess_user_copy_to_user
+                .word   uaccess_user_clear_user
+                .word   uaccess_user_strncpy_from_user
+                .word   uaccess_user_strnlen_user
+
+
+@ In : r0 = x, r1 = addr, r2 = error
+@ Out: r2 = error
+uaccess_user_put_byte:
+                stmfd   sp!, {lr}
+USER(           strbt   r0, [r1])
+                ldmfd   sp!, {pc}^
+
+@ In : r0 = x, r1 = addr, r2 = error
+@ Out: r2 = error
+uaccess_user_put_half:
+                stmfd   sp!, {lr}
+USER(           strbt   r0, [r1], #1)
+                mov     r0, r0, lsr #8
+USER(           strbt   r0, [r1])
+                ldmfd   sp!, {pc}^
+
+@ In : r0 = x, r1 = addr, r2 = error
+@ Out: r2 = error
+uaccess_user_put_word:
+                stmfd   sp!, {lr}
+USER(           strt    r0, [r1])
+                ldmfd   sp!, {pc}^
+
+@ In : r0 = x, r1 = addr, r2 = error
+@ Out: r2 = error
+uaccess_user_put_dword:
+                stmfd   sp!, {lr}
+USER(           strt    r0, [r1], #4)
+USER(           strt    r0, [r1], #0)
+                ldmfd   sp!, {pc}^
+
+9001:           mov     r2, #-EFAULT
+                ldmfd   sp!, {pc}^
+
+
+@ In : r0 = addr, r1 = error
+@ Out: r0 = x, r1 = error
+uaccess_user_get_byte:
+                stmfd   sp!, {lr}
+USER(           ldrbt   r0, [r0])
+                ldmfd   sp!, {pc}^
+
+@ In : r0 = addr, r1 = error
+@ Out: r0 = x, r1 = error
+uaccess_user_get_half:
+                stmfd   sp!, {lr}
+USER(           ldrt    r0, [r0])
+                mov     r0, r0, lsl #16
+                mov     r0, r0, lsr #16
+                ldmfd   sp!, {pc}^
+
+@ In : r0 = addr, r1 = error
+@ Out: r0 = x, r1 = error
+uaccess_user_get_word:
+                stmfd   sp!, {lr}
+USER(           ldrt    r0, [r0])
+                ldmfd   sp!, {pc}^
+
+9001:           mov     r1, #-EFAULT
+                ldmfd   sp!, {pc}^
+
+/* Prototype: int uaccess_user_copy_to_user(void *to, const char *from, size_t n)
+ * Purpose  : copy a block to user memory from kernel memory
+ * Params   : to   - user memory
+ *          : from - kernel memory
+ *          : n    - number of bytes to copy
+ * Returns  : Number of bytes NOT copied.
+ */
+
+.c2u_dest_not_aligned:
+		rsb	ip, ip, #4
+		cmp	ip, #2
+		ldrb	r3, [r1], #1
+USER(		strbt	r3, [r0], #1)			@ May fault
+		ldrgeb	r3, [r1], #1
+USER(		strgebt	r3, [r0], #1)			@ May fault
+		ldrgtb	r3, [r1], #1
+USER(		strgtbt	r3, [r0], #1)			@ May fault
+		sub	r2, r2, ip
+		b	.c2u_dest_aligned
+
+ENTRY(uaccess_user_copy_to_user)
+		stmfd	sp!, {r2, r4 - r7, lr}
+		cmp	r2, #4
+		blt	.c2u_not_enough
+		ands	ip, r0, #3
+		bne	.c2u_dest_not_aligned
+.c2u_dest_aligned:
+
+		ands	ip, r1, #3
+		bne	.c2u_src_not_aligned
+/*
+ * Seeing as there has to be at least 8 bytes to copy, we can
+ * copy one word, and force a user-mode page fault...
+ */
+
+.c2u_0fupi:	subs	r2, r2, #4
+		addmi	ip, r2, #4
+		bmi	.c2u_0nowords
+		ldr	r3, [r1], #4
+USER(		strt	r3, [r0], #4)			@ May fault
+		mov	ip, r0, lsl #32 - PAGE_SHIFT	@ On each page, use a ld/st??t instruction
+		rsb	ip, ip, #0
+		movs	ip, ip, lsr #32 - PAGE_SHIFT
+		beq	.c2u_0fupi
+/*
+ * ip = max no. of bytes to copy before needing another "strt" insn
+ */
+		cmp	r2, ip
+		movlt	ip, r2
+		sub	r2, r2, ip
+		subs	ip, ip, #32
+		blt	.c2u_0rem8lp
+
+.c2u_0cpy8lp:	ldmia	r1!, {r3 - r6}
+		stmia	r0!, {r3 - r6}			@ Shouldnt fault
+		ldmia	r1!, {r3 - r6}
+		stmia	r0!, {r3 - r6}			@ Shouldnt fault
+		subs	ip, ip, #32
+		bpl	.c2u_0cpy8lp
+.c2u_0rem8lp:	cmn	ip, #16
+		ldmgeia	r1!, {r3 - r6}
+		stmgeia	r0!, {r3 - r6}			@ Shouldnt fault
+		tst	ip, #8
+		ldmneia	r1!, {r3 - r4}
+		stmneia	r0!, {r3 - r4}			@ Shouldnt fault
+		tst	ip, #4
+		ldrne	r3, [r1], #4
+		strnet	r3, [r0], #4			@ Shouldnt fault
+		ands	ip, ip, #3
+		beq	.c2u_0fupi
+.c2u_0nowords:	teq	ip, #0
+		beq	.c2u_finished
+.c2u_nowords:	cmp	ip, #2
+		ldrb	r3, [r1], #1
+USER(		strbt	r3, [r0], #1)			@ May fault
+		ldrgeb	r3, [r1], #1
+USER(		strgebt	r3, [r0], #1)			@ May fault
+		ldrgtb	r3, [r1], #1
+USER(		strgtbt	r3, [r0], #1)			@ May fault
+		b	.c2u_finished
+
+.c2u_not_enough:
+		movs	ip, r2
+		bne	.c2u_nowords
+.c2u_finished:	mov	r0, #0
+		LOADREGS(fd,sp!,{r2, r4 - r7, pc})
+
+.c2u_src_not_aligned:
+		bic	r1, r1, #3
+		ldr	r7, [r1], #4
+		cmp	ip, #2
+		bgt	.c2u_3fupi
+		beq	.c2u_2fupi
+.c2u_1fupi:	subs	r2, r2, #4
+		addmi	ip, r2, #4
+		bmi	.c2u_1nowords
+		mov	r3, r7, pull #8
+		ldr	r7, [r1], #4
+		orr	r3, r3, r7, push #24
+USER(		strt	r3, [r0], #4)			@ May fault
+		mov	ip, r0, lsl #32 - PAGE_SHIFT
+		rsb	ip, ip, #0
+		movs	ip, ip, lsr #32 - PAGE_SHIFT
+		beq	.c2u_1fupi
+		cmp	r2, ip
+		movlt	ip, r2
+		sub	r2, r2, ip
+		subs	ip, ip, #16
+		blt	.c2u_1rem8lp
+
+.c2u_1cpy8lp:	mov	r3, r7, pull #8
+		ldmia	r1!, {r4 - r7}
+		orr	r3, r3, r4, push #24
+		mov	r4, r4, pull #8
+		orr	r4, r4, r5, push #24
+		mov	r5, r5, pull #8
+		orr	r5, r5, r6, push #24
+		mov	r6, r6, pull #8
+		orr	r6, r6, r7, push #24
+		stmia	r0!, {r3 - r6}			@ Shouldnt fault
+		subs	ip, ip, #16
+		bpl	.c2u_1cpy8lp
+.c2u_1rem8lp:	tst	ip, #8
+		movne	r3, r7, pull #8
+		ldmneia	r1!, {r4, r7}
+		orrne	r3, r3, r4, push #24
+		movne	r4, r4, pull #8
+		orrne	r4, r4, r7, push #24
+		stmneia	r0!, {r3 - r4}			@ Shouldnt fault
+		tst	ip, #4
+		movne	r3, r7, pull #8
+		ldrne	r7, [r1], #4
+		orrne	r3, r3, r7, push #24
+		strnet	r3, [r0], #4			@ Shouldnt fault
+		ands	ip, ip, #3
+		beq	.c2u_1fupi
+.c2u_1nowords:	mov	r3, r7, lsr #byte(1)
+		teq	ip, #0
+		beq	.c2u_finished
+		cmp	ip, #2
+USER(		strbt	r3, [r0], #1)			@ May fault
+		movge	r3, r7, lsr #byte(2)
+USER(		strgebt	r3, [r0], #1)			@ May fault
+		movgt	r3, r7, lsr #byte(3)
+USER(		strgtbt	r3, [r0], #1)			@ May fault
+		b	.c2u_finished
+
+.c2u_2fupi:	subs	r2, r2, #4
+		addmi	ip, r2, #4
+		bmi	.c2u_2nowords
+		mov	r3, r7, pull #16
+		ldr	r7, [r1], #4
+		orr	r3, r3, r7, push #16
+USER(		strt	r3, [r0], #4)			@ May fault
+		mov	ip, r0, lsl #32 - PAGE_SHIFT
+		rsb	ip, ip, #0
+		movs	ip, ip, lsr #32 - PAGE_SHIFT
+		beq	.c2u_2fupi
+		cmp	r2, ip
+		movlt	ip, r2
+		sub	r2, r2, ip
+		subs	ip, ip, #16
+		blt	.c2u_2rem8lp
+
+.c2u_2cpy8lp:	mov	r3, r7, pull #16
+		ldmia	r1!, {r4 - r7}
+		orr	r3, r3, r4, push #16
+		mov	r4, r4, pull #16
+		orr	r4, r4, r5, push #16
+		mov	r5, r5, pull #16
+		orr	r5, r5, r6, push #16
+		mov	r6, r6, pull #16
+		orr	r6, r6, r7, push #16
+		stmia	r0!, {r3 - r6}			@ Shouldnt fault
+		subs	ip, ip, #16
+		bpl	.c2u_2cpy8lp
+.c2u_2rem8lp:	tst	ip, #8
+		movne	r3, r7, pull #16
+		ldmneia	r1!, {r4, r7}
+		orrne	r3, r3, r4, push #16
+		movne	r4, r4, pull #16
+		orrne	r4, r4, r7, push #16
+		stmneia	r0!, {r3 - r4}			@ Shouldnt fault
+		tst	ip, #4
+		movne	r3, r7, pull #16
+		ldrne	r7, [r1], #4
+		orrne	r3, r3, r7, push #16
+		strnet	r3, [r0], #4			@ Shouldnt fault
+		ands	ip, ip, #3
+		beq	.c2u_2fupi
+.c2u_2nowords:	mov	r3, r7, lsr #byte(2)
+		teq	ip, #0
+		beq	.c2u_finished
+		cmp	ip, #2
+USER(		strbt	r3, [r0], #1)			@ May fault
+		movge	r3, r7, lsr #byte(3)
+USER(		strgebt	r3, [r0], #1)			@ May fault
+		ldrgtb	r3, [r1], #0
+USER(		strgtbt	r3, [r0], #1)			@ May fault
+		b	.c2u_finished
+
+.c2u_3fupi:	subs	r2, r2, #4
+		addmi	ip, r2, #4
+		bmi	.c2u_3nowords
+		mov	r3, r7, pull #24
+		ldr	r7, [r1], #4
+		orr	r3, r3, r7, push #8
+USER(		strt	r3, [r0], #4)			@ May fault
+		mov	ip, r0, lsl #32 - PAGE_SHIFT
+		rsb	ip, ip, #0
+		movs	ip, ip, lsr #32 - PAGE_SHIFT
+		beq	.c2u_3fupi
+		cmp	r2, ip
+		movlt	ip, r2
+		sub	r2, r2, ip
+		subs	ip, ip, #16
+		blt	.c2u_3rem8lp
+
+.c2u_3cpy8lp:	mov	r3, r7, pull #24
+		ldmia	r1!, {r4 - r7}
+		orr	r3, r3, r4, push #8
+		mov	r4, r4, pull #24
+		orr	r4, r4, r5, push #8
+		mov	r5, r5, pull #24
+		orr	r5, r5, r6, push #8
+		mov	r6, r6, pull #24
+		orr	r6, r6, r7, push #8
+		stmia	r0!, {r3 - r6}			@ Shouldnt fault
+		subs	ip, ip, #16
+		bpl	.c2u_3cpy8lp
+.c2u_3rem8lp:	tst	ip, #8
+		movne	r3, r7, pull #24
+		ldmneia	r1!, {r4, r7}
+		orrne	r3, r3, r4, push #8
+		movne	r4, r4, pull #24
+		orrne	r4, r4, r7, push #8
+		stmneia	r0!, {r3 - r4}			@ Shouldnt fault
+		tst	ip, #4
+		movne	r3, r7, pull #24
+		ldrne	r7, [r1], #4
+		orrne	r3, r3, r7, push #8
+		strnet	r3, [r0], #4			@ Shouldnt fault
+		ands	ip, ip, #3
+		beq	.c2u_3fupi
+.c2u_3nowords:	mov	r3, r7, lsr #byte(3)
+		teq	ip, #0
+		beq	.c2u_finished
+		cmp	ip, #2
+USER(		strbt	r3, [r0], #1)			@ May fault
+		ldrgeb	r3, [r1], #1
+USER(		strgebt	r3, [r0], #1)			@ May fault
+		ldrgtb	r3, [r1], #0
+USER(		strgtbt	r3, [r0], #1)			@ May fault
+		b	.c2u_finished
+
+		.section .fixup,"ax"
+		.align	0
+9001:		LOADREGS(fd,sp!, {r0, r4 - r7, pc})
+		.previous
+
+/* Prototype: unsigned long uaccess_user_copy_from_user(void *to,const void *from,unsigned long n);
+ * Purpose  : copy a block from user memory to kernel memory
+ * Params   : to   - kernel memory
+ *          : from - user memory
+ *          : n    - number of bytes to copy
+ * Returns  : Number of bytes NOT copied.
+ */
+.cfu_dest_not_aligned:
+		rsb	ip, ip, #4
+		cmp	ip, #2
+USER(		ldrbt	r3, [r1], #1)			@ May fault
+		strb	r3, [r0], #1
+USER(		ldrgebt	r3, [r1], #1)			@ May fault
+		strgeb	r3, [r0], #1
+USER(		ldrgtbt	r3, [r1], #1)			@ May fault
+		strgtb	r3, [r0], #1
+		sub	r2, r2, ip
+		b	.cfu_dest_aligned
+
+ENTRY(uaccess_user_copy_from_user)
+		stmfd	sp!, {r0, r2, r4 - r7, lr}
+		cmp	r2, #4
+		blt	.cfu_not_enough
+		ands	ip, r0, #3
+		bne	.cfu_dest_not_aligned
+.cfu_dest_aligned:
+		ands	ip, r1, #3
+		bne	.cfu_src_not_aligned
+/*
+ * Seeing as there has to be at least 8 bytes to copy, we can
+ * copy one word, and force a user-mode page fault...
+ */
+
+.cfu_0fupi:	subs	r2, r2, #4
+		addmi	ip, r2, #4
+		bmi	.cfu_0nowords
+USER(		ldrt	r3, [r1], #4)
+		str	r3, [r0], #4
+		mov	ip, r1, lsl #32 - PAGE_SHIFT	@ On each page, use a ld/st??t instruction
+		rsb	ip, ip, #0
+		movs	ip, ip, lsr #32 - PAGE_SHIFT
+		beq	.cfu_0fupi
+/*
+ * ip = max no. of bytes to copy before needing another "strt" insn
+ */
+		cmp	r2, ip
+		movlt	ip, r2
+		sub	r2, r2, ip
+		subs	ip, ip, #32
+		blt	.cfu_0rem8lp
+
+.cfu_0cpy8lp:	ldmia	r1!, {r3 - r6}			@ Shouldnt fault
+		stmia	r0!, {r3 - r6}
+		ldmia	r1!, {r3 - r6}			@ Shouldnt fault
+		stmia	r0!, {r3 - r6}
+		subs	ip, ip, #32
+		bpl	.cfu_0cpy8lp
+.cfu_0rem8lp:	cmn	ip, #16
+		ldmgeia	r1!, {r3 - r6}			@ Shouldnt fault
+		stmgeia	r0!, {r3 - r6}
+		tst	ip, #8
+		ldmneia	r1!, {r3 - r4}			@ Shouldnt fault
+		stmneia	r0!, {r3 - r4}
+		tst	ip, #4
+		ldrnet	r3, [r1], #4			@ Shouldnt fault
+		strne	r3, [r0], #4
+		ands	ip, ip, #3
+		beq	.cfu_0fupi
+.cfu_0nowords:	teq	ip, #0
+		beq	.cfu_finished
+.cfu_nowords:	cmp	ip, #2
+USER(		ldrbt	r3, [r1], #1)			@ May fault
+		strb	r3, [r0], #1
+USER(		ldrgebt	r3, [r1], #1)			@ May fault
+		strgeb	r3, [r0], #1
+USER(		ldrgtbt	r3, [r1], #1)			@ May fault
+		strgtb	r3, [r0], #1
+		b	.cfu_finished
+
+.cfu_not_enough:
+		movs	ip, r2
+		bne	.cfu_nowords
+.cfu_finished:	mov	r0, #0
+		add	sp, sp, #8
+		LOADREGS(fd,sp!,{r4 - r7, pc})
+
+.cfu_src_not_aligned:
+		bic	r1, r1, #3
+USER(		ldrt	r7, [r1], #4)			@ May fault
+		cmp	ip, #2
+		bgt	.cfu_3fupi
+		beq	.cfu_2fupi
+.cfu_1fupi:	subs	r2, r2, #4
+		addmi	ip, r2, #4
+		bmi	.cfu_1nowords
+		mov	r3, r7, pull #8
+USER(		ldrt	r7, [r1], #4)			@ May fault
+		orr	r3, r3, r7, push #24
+		str	r3, [r0], #4
+		mov	ip, r1, lsl #32 - PAGE_SHIFT
+		rsb	ip, ip, #0
+		movs	ip, ip, lsr #32 - PAGE_SHIFT
+		beq	.cfu_1fupi
+		cmp	r2, ip
+		movlt	ip, r2
+		sub	r2, r2, ip
+		subs	ip, ip, #16
+		blt	.cfu_1rem8lp
+
+.cfu_1cpy8lp:	mov	r3, r7, pull #8
+		ldmia	r1!, {r4 - r7}			@ Shouldnt fault
+		orr	r3, r3, r4, push #24
+		mov	r4, r4, pull #8
+		orr	r4, r4, r5, push #24
+		mov	r5, r5, pull #8
+		orr	r5, r5, r6, push #24
+		mov	r6, r6, pull #8
+		orr	r6, r6, r7, push #24
+		stmia	r0!, {r3 - r6}
+		subs	ip, ip, #16
+		bpl	.cfu_1cpy8lp
+.cfu_1rem8lp:	tst	ip, #8
+		movne	r3, r7, pull #8
+		ldmneia	r1!, {r4, r7}			@ Shouldnt fault
+		orrne	r3, r3, r4, push #24
+		movne	r4, r4, pull #8
+		orrne	r4, r4, r7, push #24
+		stmneia	r0!, {r3 - r4}
+		tst	ip, #4
+		movne	r3, r7, pull #8
+USER(		ldrnet	r7, [r1], #4)			@ May fault
+		orrne	r3, r3, r7, push #24
+		strne	r3, [r0], #4
+		ands	ip, ip, #3
+		beq	.cfu_1fupi
+.cfu_1nowords:	mov	r3, r7, lsr #byte(1)
+		teq	ip, #0
+		beq	.cfu_finished
+		cmp	ip, #2
+		strb	r3, [r0], #1
+		movge	r3, r7, lsr #byte(2)
+		strgeb	r3, [r0], #1
+		movgt	r3, r7, lsr #byte(3)
+		strgtb	r3, [r0], #1
+		b	.cfu_finished
+
+.cfu_2fupi:	subs	r2, r2, #4
+		addmi	ip, r2, #4
+		bmi	.cfu_2nowords
+		mov	r3, r7, pull #16
+USER(		ldrt	r7, [r1], #4)			@ May fault
+		orr	r3, r3, r7, push #16
+		str	r3, [r0], #4
+		mov	ip, r1, lsl #32 - PAGE_SHIFT
+		rsb	ip, ip, #0
+		movs	ip, ip, lsr #32 - PAGE_SHIFT
+		beq	.cfu_2fupi
+		cmp	r2, ip
+		movlt	ip, r2
+		sub	r2, r2, ip
+		subs	ip, ip, #16
+		blt	.cfu_2rem8lp
+
+.cfu_2cpy8lp:	mov	r3, r7, pull #16
+		ldmia	r1!, {r4 - r7}			@ Shouldnt fault
+		orr	r3, r3, r4, push #16
+		mov	r4, r4, pull #16
+		orr	r4, r4, r5, push #16
+		mov	r5, r5, pull #16
+		orr	r5, r5, r6, push #16
+		mov	r6, r6, pull #16
+		orr	r6, r6, r7, push #16
+		stmia	r0!, {r3 - r6}
+		subs	ip, ip, #16
+		bpl	.cfu_2cpy8lp
+.cfu_2rem8lp:	tst	ip, #8
+		movne	r3, r7, pull #16
+		ldmneia	r1!, {r4, r7}			@ Shouldnt fault
+		orrne	r3, r3, r4, push #16
+		movne	r4, r4, pull #16
+		orrne	r4, r4, r7, push #16
+		stmneia	r0!, {r3 - r4}
+		tst	ip, #4
+		movne	r3, r7, pull #16
+USER(		ldrnet	r7, [r1], #4)			@ May fault
+		orrne	r3, r3, r7, push #16
+		strne	r3, [r0], #4
+		ands	ip, ip, #3
+		beq	.cfu_2fupi
+.cfu_2nowords:	mov	r3, r7, lsr #byte(2)
+		teq	ip, #0
+		beq	.cfu_finished
+		cmp	ip, #2
+		strb	r3, [r0], #1
+		movge	r3, r7, lsr #byte(3)
+		strgeb	r3, [r0], #1
+USER(		ldrgtbt	r3, [r1], #0)			@ May fault
+		strgtb	r3, [r0], #1
+		b	.cfu_finished
+
+.cfu_3fupi:	subs	r2, r2, #4
+		addmi	ip, r2, #4
+		bmi	.cfu_3nowords
+		mov	r3, r7, pull #24
+USER(		ldrt	r7, [r1], #4)			@ May fault
+		orr	r3, r3, r7, push #8
+		str	r3, [r0], #4
+		mov	ip, r1, lsl #32 - PAGE_SHIFT
+		rsb	ip, ip, #0
+		movs	ip, ip, lsr #32 - PAGE_SHIFT
+		beq	.cfu_3fupi
+		cmp	r2, ip
+		movlt	ip, r2
+		sub	r2, r2, ip
+		subs	ip, ip, #16
+		blt	.cfu_3rem8lp
+
+.cfu_3cpy8lp:	mov	r3, r7, pull #24
+		ldmia	r1!, {r4 - r7}			@ Shouldnt fault
+		orr	r3, r3, r4, push #8
+		mov	r4, r4, pull #24
+		orr	r4, r4, r5, push #8
+		mov	r5, r5, pull #24
+		orr	r5, r5, r6, push #8
+		mov	r6, r6, pull #24
+		orr	r6, r6, r7, push #8
+		stmia	r0!, {r3 - r6}
+		subs	ip, ip, #16
+		bpl	.cfu_3cpy8lp
+.cfu_3rem8lp:	tst	ip, #8
+		movne	r3, r7, pull #24
+		ldmneia	r1!, {r4, r7}			@ Shouldnt fault
+		orrne	r3, r3, r4, push #8
+		movne	r4, r4, pull #24
+		orrne	r4, r4, r7, push #8
+		stmneia	r0!, {r3 - r4}
+		tst	ip, #4
+		movne	r3, r7, pull #24
+USER(		ldrnet	r7, [r1], #4)			@ May fault
+		orrne	r3, r3, r7, push #8
+		strne	r3, [r0], #4
+		ands	ip, ip, #3
+		beq	.cfu_3fupi
+.cfu_3nowords:	mov	r3, r7, lsr #byte(3)
+		teq	ip, #0
+		beq	.cfu_finished
+		cmp	ip, #2
+		strb	r3, [r0], #1
+USER(		ldrgebt	r3, [r1], #1)			@ May fault
+		strgeb	r3, [r0], #1
+USER(		ldrgtbt	r3, [r1], #1)			@ May fault
+		strgtb	r3, [r0], #1
+		b	.cfu_finished
+
+		.section .fixup,"ax"
+		.align	0
+		/*
+		 * We took an exception.  r0 contains a pointer to
+		 * the byte not copied.
+		 */
+9001:		ldr	r2, [sp], #4			@ void *to
+		sub	r2, r0, r2			@ bytes copied
+		ldr	r1, [sp], #4			@ unsigned long count
+		subs	r4, r1, r2			@ bytes left to copy
+		movne	r1, r4
+		blne	__memzero
+		mov	r0, r4
+		LOADREGS(fd,sp!, {r4 - r7, pc})
+		.previous
+
+/* Prototype: int uaccess_user_clear_user(void *addr, size_t sz)
+ * Purpose  : clear some user memory
+ * Params   : addr - user memory address to clear
+ *          : sz   - number of bytes to clear
+ * Returns  : number of bytes NOT cleared
+ */
+ENTRY(uaccess_user_clear_user)
+		stmfd	sp!, {r1, lr}
+		mov	r2, #0
+		cmp	r1, #4
+		blt	2f
+		ands	ip, r0, #3
+		beq	1f
+		cmp	ip, #2
+USER(		strbt	r2, [r0], #1)
+USER(		strlebt	r2, [r0], #1)
+USER(		strltbt	r2, [r0], #1)
+		rsb	ip, ip, #4
+		sub	r1, r1, ip		@  7  6  5  4  3  2  1
+1:		subs	r1, r1, #8		@ -1 -2 -3 -4 -5 -6 -7
+USER(		strplt	r2, [r0], #4)
+USER(		strplt	r2, [r0], #4)
+		bpl	1b
+		adds	r1, r1, #4		@  3  2  1  0 -1 -2 -3
+USER(		strplt	r2, [r0], #4)
+2:		tst	r1, #2			@ 1x 1x 0x 0x 1x 1x 0x
+USER(		strnebt	r2, [r0], #1)
+USER(		strnebt	r2, [r0], #1)
+		tst	r1, #1			@ x1 x0 x1 x0 x1 x0 x1
+USER(		strnebt	r2, [r0], #1)
+		mov	r0, #0
+		LOADREGS(fd,sp!, {r1, pc})
+
+		.section .fixup,"ax"
+		.align	0
+9001:		LOADREGS(fd,sp!, {r0, pc})
+		.previous
+
+/*
+ * Copy a string from user space to kernel space.
+ *  r0 = dst, r1 = src, r2 = byte length
+ * returns the number of characters copied (strlen of copied string),
+ *  -EFAULT on exception, or "len" if we fill the whole buffer
+ */
+ENTRY(uaccess_user_strncpy_from_user)
+        save_lr
+        mov     ip, r1
+1:      subs    r2, r2, #1
+USER(   ldrplbt r3, [r1], #1)
+        bmi     2f
+        strb    r3, [r0], #1
+        teq     r3, #0
+        bne     1b
+        sub     r1, r1, #1      @ take NUL character out of count
+2:      sub     r0, r1, ip
+        restore_pc
+
+        .section .fixup,"ax"
+        .align  0
+9001:   mov     r3, #0
+        strb    r3, [r0, #0]    @ null terminate
+        mov     r0, #-EFAULT
+        restore_pc
+        .previous
+
+/* Prototype: unsigned long uaccess_user_strnlen_user(const char *str, long n)
+ * Purpose  : get length of a string in user memory
+ * Params   : str - address of string in user memory
+ * Returns  : length of string *including terminator*
+ *            or zero on exception, or n + 1 if too long
+ */
+ENTRY(uaccess_user_strnlen_user)
+        save_lr
+        mov     r2, r0
+1:
+USER(   ldrbt   r3, [r0], #1)
+        teq     r3, #0
+        beq     2f
+        subs    r1, r1, #1
+        bne     1b
+        add     r0, r0, #1
+2:      sub     r0, r0, r2
+        restore_pc
+
+        .section .fixup,"ax"
+        .align  0
+9001:   mov     r0, #0
+        restore_pc
+        .previous
+
diff --git a/arch/arm26/lib/ucmpdi2.c b/arch/arm26/lib/ucmpdi2.c
new file mode 100644
index 00000000000..6c6ae63efa0
--- /dev/null
+++ b/arch/arm26/lib/ucmpdi2.c
@@ -0,0 +1,51 @@
+/* More subroutines needed by GCC output code on some machines.  */
+/* Compile this one with gcc.  */
+/* Copyright (C) 1989, 92-98, 1999 Free Software Foundation, Inc.
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+/* As a special exception, if you link this library with other files,
+   some of which are compiled with GCC, to produce an executable,
+   this library does not by itself cause the resulting executable
+   to be covered by the GNU General Public License.
+   This exception does not however invalidate any other reasons why
+   the executable file might be covered by the GNU General Public License.
+ */
+/* support functions required by the kernel. based on code from gcc-2.95.3 */
+/* I Molton     29/07/01 */
+
+#include "gcclib.h"
+
+word_type
+__ucmpdi2 (DItype a, DItype b)
+{
+  DIunion au, bu;
+
+  au.ll = a, bu.ll = b;
+
+  if ((USItype) au.s.high < (USItype) bu.s.high)
+    return 0;
+  else if ((USItype) au.s.high > (USItype) bu.s.high)
+    return 2;
+  if ((USItype) au.s.low < (USItype) bu.s.low)
+    return 0;
+  else if ((USItype) au.s.low > (USItype) bu.s.low)
+    return 2;
+  return 1;
+}
+
diff --git a/arch/arm26/lib/udivdi3.c b/arch/arm26/lib/udivdi3.c
new file mode 100644
index 00000000000..d25195f673f
--- /dev/null
+++ b/arch/arm26/lib/udivdi3.c
@@ -0,0 +1,242 @@
+/* More subroutines needed by GCC output code on some machines.  */
+/* Compile this one with gcc.  */
+/* Copyright (C) 1989, 92-98, 1999 Free Software Foundation, Inc.
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+/* As a special exception, if you link this library with other files,
+   some of which are compiled with GCC, to produce an executable,
+   this library does not by itself cause the resulting executable
+   to be covered by the GNU General Public License.
+   This exception does not however invalidate any other reasons why
+   the executable file might be covered by the GNU General Public License.
+ */
+/* support functions required by the kernel. based on code from gcc-2.95.3 */
+/* I Molton     29/07/01 */
+
+#include "gcclib.h"
+#include "longlong.h"
+
+static const UQItype __clz_tab[] =
+{
+  0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+  8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+  8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+  8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+};
+
+UDItype
+__udivmoddi4 (UDItype n, UDItype d, UDItype *rp)
+{
+  DIunion ww;
+  DIunion nn, dd;
+  DIunion rr;
+  USItype d0, d1, n0, n1, n2;
+  USItype q0, q1;
+  USItype b, bm;
+
+  nn.ll = n;
+  dd.ll = d;
+
+  d0 = dd.s.low;
+  d1 = dd.s.high;
+  n0 = nn.s.low;
+  n1 = nn.s.high;
+
+  if (d1 == 0)
+    {
+      if (d0 > n1)
+        {
+          /* 0q = nn / 0D */
+
+          count_leading_zeros (bm, d0);
+
+          if (bm != 0)
+            {
+              /* Normalize, i.e. make the most significant bit of the
+                 denominator set.  */
+
+              d0 = d0 << bm;
+              n1 = (n1 << bm) | (n0 >> (SI_TYPE_SIZE - bm));
+              n0 = n0 << bm;
+            }
+
+          udiv_qrnnd (q0, n0, n1, n0, d0);
+          q1 = 0;
+
+          /* Remainder in n0 >> bm.  */
+        }
+      else
+        {
+          /* qq = NN / 0d */
+
+          if (d0 == 0)
+            d0 = 1 / d0;        /* Divide intentionally by zero.  */
+
+          count_leading_zeros (bm, d0);
+
+          if (bm == 0)
+            {
+              /* From (n1 >= d0) /\ (the most significant bit of d0 is set),
+                 conclude (the most significant bit of n1 is set) /\ (the
+                 leading quotient digit q1 = 1).
+
+                 This special case is necessary, not an optimization.
+                 (Shifts counts of SI_TYPE_SIZE are undefined.)  */
+
+              n1 -= d0;
+              q1 = 1;
+            }
+          else
+            {
+              /* Normalize.  */
+
+              b = SI_TYPE_SIZE - bm;
+
+              d0 = d0 << bm;
+              n2 = n1 >> b;
+              n1 = (n1 << bm) | (n0 >> b);
+              n0 = n0 << bm;
+
+              udiv_qrnnd (q1, n1, n2, n1, d0);
+            }
+
+          /* n1 != d0...  */
+
+          udiv_qrnnd (q0, n0, n1, n0, d0);
+
+          /* Remainder in n0 >> bm.  */
+        }
+
+      if (rp != 0)
+        {
+          rr.s.low = n0 >> bm;
+          rr.s.high = 0;
+          *rp = rr.ll;
+        }
+    }
+  else
+    {
+      if (d1 > n1)
+        {
+          /* 00 = nn / DD */
+
+          q0 = 0;
+          q1 = 0;
+
+          /* Remainder in n1n0.  */
+          if (rp != 0)
+            {
+              rr.s.low = n0;
+              rr.s.high = n1;
+              *rp = rr.ll;
+            }
+        }
+      else
+        {
+          /* 0q = NN / dd */
+
+          count_leading_zeros (bm, d1);
+          if (bm == 0)
+            {
+              /* From (n1 >= d1) /\ (the most significant bit of d1 is set),
+                 conclude (the most significant bit of n1 is set) /\ (the
+                 quotient digit q0 = 0 or 1).
+
+                 This special case is necessary, not an optimization.  */
+
+              /* The condition on the next line takes advantage of that
+                 n1 >= d1 (true due to program flow).  */
+              if (n1 > d1 || n0 >= d0)
+                {
+                  q0 = 1;
+                  sub_ddmmss (n1, n0, n1, n0, d1, d0);
+                }
+              else
+                q0 = 0;
+
+              q1 = 0;
+
+              if (rp != 0)
+                {
+                  rr.s.low = n0;
+                  rr.s.high = n1;
+                  *rp = rr.ll;
+                }
+            }
+          else
+            {
+              USItype m1, m0;
+              /* Normalize.  */
+
+              b = SI_TYPE_SIZE - bm;
+
+              d1 = (d1 << bm) | (d0 >> b);
+              d0 = d0 << bm;
+              n2 = n1 >> b;
+              n1 = (n1 << bm) | (n0 >> b);
+              n0 = n0 << bm;
+
+              udiv_qrnnd (q0, n1, n2, n1, d1);
+              umul_ppmm (m1, m0, q0, d0);
+
+              if (m1 > n1 || (m1 == n1 && m0 > n0))
+                {
+                  q0--;
+                  sub_ddmmss (m1, m0, m1, m0, d1, d0);
+                }
+
+              q1 = 0;
+
+              /* Remainder in (n1n0 - m1m0) >> bm.  */
+              if (rp != 0)
+                {
+                  sub_ddmmss (n1, n0, n1, n0, m1, m0);
+                  rr.s.low = (n1 << b) | (n0 >> bm);
+                  rr.s.high = n1 >> bm;
+                  *rp = rr.ll;
+                }
+            }
+        }
+    }
+
+  ww.s.low = q0;
+  ww.s.high = q1;
+  return ww.ll;
+}
+
+UDItype
+__udivdi3 (UDItype n, UDItype d)
+{
+  return __udivmoddi4 (n, d, (UDItype *) 0);
+}
+
+UDItype
+__umoddi3 (UDItype u, UDItype v)
+{
+  UDItype w;
+
+  (void) __udivmoddi4 (u ,v, &w);
+
+  return w;
+}
+
diff --git a/arch/arm26/machine/Makefile b/arch/arm26/machine/Makefile
new file mode 100644
index 00000000000..86ea97cc07f
--- /dev/null
+++ b/arch/arm26/machine/Makefile
@@ -0,0 +1,8 @@
+#
+# Makefile for the linux kernel.
+#
+
+# Object file lists.
+
+obj-y			:= dma.o irq.o latches.o
+
diff --git a/arch/arm26/machine/dma.c b/arch/arm26/machine/dma.c
new file mode 100644
index 00000000000..cbc7c61d5b3
--- /dev/null
+++ b/arch/arm26/machine/dma.c
@@ -0,0 +1,215 @@
+/*
+ *  linux/arch/arm26/kernel/dma.c
+ *
+ *  Copyright (C) 1998-1999 Dave Gilbert / Russell King
+ *  Copyright (C) 2003 Ian Molton
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  DMA functions specific to Archimedes and A5000 architecture
+ */
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+
+#include <asm/dma.h>
+#include <asm/fiq.h>
+#include <asm/irq.h>
+#include <asm/io.h>
+#include <asm/hardware.h>
+#include <asm/mach-types.h>
+
+#define DPRINTK(x...) printk(KERN_DEBUG x)
+
+#if defined(CONFIG_BLK_DEV_FD1772) || defined(CONFIG_BLK_DEV_FD1772_MODULE)
+
+extern unsigned char fdc1772_dma_read, fdc1772_dma_read_end;
+extern unsigned char fdc1772_dma_write, fdc1772_dma_write_end;
+extern void fdc1772_setupdma(unsigned int count,unsigned int addr);
+
+static void arc_floppy_data_enable_dma(dmach_t channel, dma_t *dma)
+{
+	DPRINTK("arc_floppy_data_enable_dma\n");
+
+	if (dma->using_sg)
+		BUG();
+
+	switch (dma->dma_mode) {
+	case DMA_MODE_READ: { /* read */
+		unsigned long flags;
+		DPRINTK("enable_dma fdc1772 data read\n");
+		local_save_flags_cli(flags);
+		clf();
+			
+		memcpy ((void *)0x1c, (void *)&fdc1772_dma_read,
+			&fdc1772_dma_read_end - &fdc1772_dma_read);
+		fdc1772_setupdma(dma->buf.length, dma->buf.__address); /* Sets data pointer up */
+		enable_fiq(FIQ_FLOPPYDATA);
+		local_irq_restore(flags);
+	   }
+	   break;
+
+	case DMA_MODE_WRITE: { /* write */
+		unsigned long flags;
+		DPRINTK("enable_dma fdc1772 data write\n");
+		local_save_flags_cli(flags);
+		clf();
+		memcpy ((void *)0x1c, (void *)&fdc1772_dma_write,
+			&fdc1772_dma_write_end - &fdc1772_dma_write);
+		fdc1772_setupdma(dma->buf.length, dma->buf.__address); /* Sets data pointer up */
+		enable_fiq(FIQ_FLOPPYDATA);
+
+		local_irq_restore(flags);
+	    }
+	    break;
+	default:
+		printk ("enable_dma: dma%d not initialised\n", channel);
+	}
+}
+
+static int arc_floppy_data_get_dma_residue(dmach_t channel, dma_t *dma)
+{
+	extern unsigned int fdc1772_bytestogo;
+
+	/* 10/1/1999 DAG - I presume its the number of bytes left? */
+	return fdc1772_bytestogo;
+}
+
+static void arc_floppy_cmdend_enable_dma(dmach_t channel, dma_t *dma)
+{
+	/* Need to build a branch at the FIQ address */
+	extern void fdc1772_comendhandler(void);
+	unsigned long flags;
+
+	DPRINTK("arc_floppy_cmdend_enable_dma\n");
+	/*printk("enable_dma fdc1772 command end FIQ\n");*/
+	save_flags(flags);
+	clf();
+	
+	/* B fdc1772_comendhandler */
+	*((unsigned int *)0x1c)=0xea000000 |
+			(((unsigned int)fdc1772_comendhandler-(0x1c+8))/4);
+
+	local_irq_restore(flags);
+}
+
+static int arc_floppy_cmdend_get_dma_residue(dmach_t channel, dma_t *dma)
+{
+	/* 10/1/1999 DAG - Presume whether there is an outstanding command? */
+	extern unsigned int fdc1772_fdc_int_done;
+
+	/* Explicit! If the int done is 0 then 1 int to go */
+	return (fdc1772_fdc_int_done==0)?1:0;
+}
+
+static void arc_disable_dma(dmach_t channel, dma_t *dma)
+{
+	disable_fiq(dma->dma_irq);
+}
+
+static struct dma_ops arc_floppy_data_dma_ops = {
+	.type		= "FIQDMA",
+	.enable		= arc_floppy_data_enable_dma,
+	.disable	= arc_disable_dma,
+	.residue	= arc_floppy_data_get_dma_residue,
+};
+
+static struct dma_ops arc_floppy_cmdend_dma_ops = {
+	.type		= "FIQCMD",
+	.enable		= arc_floppy_cmdend_enable_dma,
+	.disable	= arc_disable_dma,
+	.residue	= arc_floppy_cmdend_get_dma_residue,
+};
+#endif
+
+#ifdef CONFIG_ARCH_A5K
+static struct fiq_handler fh = {
+	.name	= "floppydata"
+};
+
+static int a5k_floppy_get_dma_residue(dmach_t channel, dma_t *dma)
+{
+	struct pt_regs regs;
+	get_fiq_regs(&regs);
+	return regs.ARM_r9;
+}
+
+static void a5k_floppy_enable_dma(dmach_t channel, dma_t *dma)
+{
+	struct pt_regs regs;
+	void *fiqhandler_start;
+	unsigned int fiqhandler_length;
+	extern void floppy_fiqsetup(unsigned long len, unsigned long addr,
+				     unsigned long port);
+
+	if (dma->using_sg)
+		BUG();
+
+	if (dma->dma_mode == DMA_MODE_READ) {
+		extern unsigned char floppy_fiqin_start, floppy_fiqin_end;
+		fiqhandler_start = &floppy_fiqin_start;
+		fiqhandler_length = &floppy_fiqin_end - &floppy_fiqin_start;
+	} else {
+		extern unsigned char floppy_fiqout_start, floppy_fiqout_end;
+		fiqhandler_start = &floppy_fiqout_start;
+		fiqhandler_length = &floppy_fiqout_end - &floppy_fiqout_start;
+	}
+	if (claim_fiq(&fh)) {
+		printk("floppydma: couldn't claim FIQ.\n");
+		return;
+	}
+	memcpy((void *)0x1c, fiqhandler_start, fiqhandler_length);
+	regs.ARM_r9 = dma->buf.length;
+	regs.ARM_r10 = (unsigned long)dma->buf.__address;
+	regs.ARM_fp = FLOPPYDMA_BASE;
+	set_fiq_regs(&regs);
+	enable_fiq(dma->dma_irq);
+}
+
+static void a5k_floppy_disable_dma(dmach_t channel, dma_t *dma)
+{
+	disable_fiq(dma->dma_irq);
+	release_fiq(&fh);
+}
+
+static struct dma_ops a5k_floppy_dma_ops = {
+	.type		= "FIQDMA",
+	.enable		= a5k_floppy_enable_dma,
+	.disable	= a5k_floppy_disable_dma,
+	.residue	= a5k_floppy_get_dma_residue,
+};
+#endif
+
+/*
+ * This is virtual DMA - we don't need anything here
+ */
+static void sound_enable_disable_dma(dmach_t channel, dma_t *dma)
+{
+}
+
+static struct dma_ops sound_dma_ops = {
+	.type		= "VIRTUAL",
+	.enable		= sound_enable_disable_dma,
+	.disable	= sound_enable_disable_dma,
+};
+
+void __init arch_dma_init(dma_t *dma)
+{
+#if defined(CONFIG_BLK_DEV_FD1772) || defined(CONFIG_BLK_DEV_FD1772_MODULE)
+	if (machine_is_archimedes()) {
+		dma[DMA_VIRTUAL_FLOPPY0].dma_irq = FIQ_FLOPPYDATA;
+		dma[DMA_VIRTUAL_FLOPPY0].d_ops   = &arc_floppy_data_dma_ops;
+		dma[DMA_VIRTUAL_FLOPPY1].dma_irq = 1;
+		dma[DMA_VIRTUAL_FLOPPY1].d_ops   = &arc_floppy_cmdend_dma_ops;
+	}
+#endif
+#ifdef CONFIG_ARCH_A5K
+	if (machine_is_a5k()) {
+		dma[DMA_VIRTUAL_FLOPPY0].dma_irq = FIQ_FLOPPYDATA;
+		dma[DMA_VIRTUAL_FLOPPY0].d_ops   = &a5k_floppy_dma_ops;
+	}
+#endif
+	dma[DMA_VIRTUAL_SOUND].d_ops = &sound_dma_ops;
+}
diff --git a/arch/arm26/machine/irq.c b/arch/arm26/machine/irq.c
new file mode 100644
index 00000000000..4361863f7ed
--- /dev/null
+++ b/arch/arm26/machine/irq.c
@@ -0,0 +1,165 @@
+/*
+ *  linux/arch/arm26/mach-arc/irq.c
+ *
+ *  Copyright (C) 1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  Changelog:
+ *   24-09-1996	RMK	Created
+ *   10-10-1996	RMK	Brought up to date with arch-sa110eval
+ *   22-10-1996	RMK	Changed interrupt numbers & uses new inb/outb macros
+ *   11-01-1998	RMK	Added mask_and_ack_irq
+ *   22-08-1998	RMK	Restructured IRQ routines
+ *   08-09-2002 IM	Brought up to date for 2.5
+ *   01-06-2003 JMA     Removed arc_fiq_chip
+ */
+#include <linux/config.h>
+#include <linux/init.h>
+
+#include <asm/irq.h>
+#include <asm/irqchip.h>
+#include <asm/ioc.h>
+#include <asm/io.h>
+#include <asm/system.h>
+
+extern void init_FIQ(void);
+
+#define a_clf()	clf()
+#define a_stf()	stf()
+
+static void arc_ack_irq_a(unsigned int irq)
+{
+	unsigned int val, mask;
+
+	mask = 1 << irq;
+	a_clf();
+	val = ioc_readb(IOC_IRQMASKA);
+	ioc_writeb(val & ~mask, IOC_IRQMASKA);
+	ioc_writeb(mask, IOC_IRQCLRA);
+	a_stf();
+}
+
+static void arc_mask_irq_a(unsigned int irq)
+{
+	unsigned int val, mask;
+
+	mask = 1 << irq;
+	a_clf();
+	val = ioc_readb(IOC_IRQMASKA);
+	ioc_writeb(val & ~mask, IOC_IRQMASKA);
+	a_stf();
+}
+
+static void arc_unmask_irq_a(unsigned int irq)
+{
+	unsigned int val, mask;
+
+	mask = 1 << irq;
+	a_clf();
+	val = ioc_readb(IOC_IRQMASKA);
+	ioc_writeb(val | mask, IOC_IRQMASKA);
+	a_stf();
+}
+
+static struct irqchip arc_a_chip = {
+        .ack    = arc_ack_irq_a,
+        .mask   = arc_mask_irq_a,
+        .unmask = arc_unmask_irq_a,
+};
+
+static void arc_mask_irq_b(unsigned int irq)
+{
+	unsigned int val, mask;
+	mask = 1 << (irq & 7);
+	val = ioc_readb(IOC_IRQMASKB);
+	ioc_writeb(val & ~mask, IOC_IRQMASKB);
+}
+
+static void arc_unmask_irq_b(unsigned int irq)
+{
+	unsigned int val, mask;
+
+	mask = 1 << (irq & 7);
+	val = ioc_readb(IOC_IRQMASKB);
+	ioc_writeb(val | mask, IOC_IRQMASKB);
+}
+
+static struct irqchip arc_b_chip = {
+        .ack    = arc_mask_irq_b,
+        .mask   = arc_mask_irq_b,
+        .unmask = arc_unmask_irq_b,
+};
+
+/* FIXME - JMA none of these functions are used in arm26 currently
+static void arc_mask_irq_fiq(unsigned int irq)
+{
+	unsigned int val, mask;
+
+	mask = 1 << (irq & 7);
+	val = ioc_readb(IOC_FIQMASK);
+	ioc_writeb(val & ~mask, IOC_FIQMASK);
+}
+
+static void arc_unmask_irq_fiq(unsigned int irq)
+{
+	unsigned int val, mask;
+
+	mask = 1 << (irq & 7);
+	val = ioc_readb(IOC_FIQMASK);
+	ioc_writeb(val | mask, IOC_FIQMASK);
+}
+
+static struct irqchip arc_fiq_chip = {
+        .ack    = arc_mask_irq_fiq,
+        .mask   = arc_mask_irq_fiq,
+        .unmask = arc_unmask_irq_fiq,
+};
+*/
+
+void __init arc_init_irq(void)
+{
+	unsigned int irq, flags;
+
+	/* Disable all IOC interrupt sources */
+	ioc_writeb(0, IOC_IRQMASKA);
+	ioc_writeb(0, IOC_IRQMASKB);
+	ioc_writeb(0, IOC_FIQMASK);
+
+	for (irq = 0; irq < NR_IRQS; irq++) {
+		flags = IRQF_VALID;
+		
+		if (irq <= 6 || (irq >= 9 && irq <= 15))
+                        flags |= IRQF_PROBE;
+	
+		if (irq == IRQ_KEYBOARDTX)
+                        flags |= IRQF_NOAUTOEN;	
+		
+		switch (irq) {
+		case 0 ... 7:
+			set_irq_chip(irq, &arc_a_chip);
+                        set_irq_handler(irq, do_level_IRQ);
+                        set_irq_flags(irq, flags);
+			break;
+
+		case 8 ... 15:
+			set_irq_chip(irq, &arc_b_chip);
+                        set_irq_handler(irq, do_level_IRQ);
+                        set_irq_flags(irq, flags);
+
+/*		case 64 ... 72:
+			set_irq_chip(irq, &arc_fiq_chip);
+                        set_irq_flags(irq, flags);
+			break;
+*/
+
+		}
+	}
+
+	irq_desc[IRQ_KEYBOARDTX].noautoenable = 1;
+
+	init_FIQ();
+}
+
diff --git a/arch/arm26/machine/latches.c b/arch/arm26/machine/latches.c
new file mode 100644
index 00000000000..94f05d2a3b2
--- /dev/null
+++ b/arch/arm26/machine/latches.c
@@ -0,0 +1,72 @@
+/*
+ *  linux/arch/arm26/kernel/latches.c
+ *
+ *  Copyright (C) David Alan Gilbert 1995/1996,2000
+ *  Copyright (C) Ian Molton 2003
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  Support for the latches on the old Archimedes which control the floppy,
+ *  hard disc and printer
+ */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+
+#include <asm/io.h>
+#include <asm/hardware.h>
+#include <asm/mach-types.h>
+#include <asm/oldlatches.h>
+
+static unsigned char latch_a_copy;
+static unsigned char latch_b_copy;
+
+/* newval=(oldval & ~mask)|newdata */
+void oldlatch_aupdate(unsigned char mask,unsigned char newdata)
+{
+	unsigned long flags;
+
+	BUG_ON(!machine_is_archimedes());
+
+	local_irq_save(flags); //FIXME: was local_save_flags
+	latch_a_copy = (latch_a_copy & ~mask) | newdata;
+	__raw_writeb(latch_a_copy, LATCHA_BASE);
+	local_irq_restore(flags);
+
+	printk("Latch: A = 0x%02x\n", latch_a_copy);
+}
+
+
+/* newval=(oldval & ~mask)|newdata */
+void oldlatch_bupdate(unsigned char mask,unsigned char newdata)
+{
+	unsigned long flags;
+
+	BUG_ON(!machine_is_archimedes());
+
+
+	local_irq_save(flags);//FIXME: was local_save_flags
+	latch_b_copy = (latch_b_copy & ~mask) | newdata;
+	__raw_writeb(latch_b_copy, LATCHB_BASE);
+	local_irq_restore(flags);
+
+	printk("Latch: B = 0x%02x\n", latch_b_copy);
+}
+
+static int __init oldlatch_init(void)
+{
+	if (machine_is_archimedes()) {
+		oldlatch_aupdate(0xff, 0xff);
+		/* Thats no FDC reset...*/
+		oldlatch_bupdate(0xff, LATCHB_FDCRESET);
+	}
+	return 0;
+}
+
+arch_initcall(oldlatch_init);
+
+EXPORT_SYMBOL(oldlatch_aupdate);
+EXPORT_SYMBOL(oldlatch_bupdate);
diff --git a/arch/arm26/mm/Makefile b/arch/arm26/mm/Makefile
new file mode 100644
index 00000000000..a8fb166d5c6
--- /dev/null
+++ b/arch/arm26/mm/Makefile
@@ -0,0 +1,6 @@
+#
+# Makefile for the linux arm26-specific parts of the memory manager.
+#
+
+obj-y		:= init.o extable.o proc-funcs.o memc.o fault.o \
+		   small_page.o
diff --git a/arch/arm26/mm/extable.c b/arch/arm26/mm/extable.c
new file mode 100644
index 00000000000..2d9f5b5a78d
--- /dev/null
+++ b/arch/arm26/mm/extable.c
@@ -0,0 +1,25 @@
+/*
+ *  linux/arch/arm26/mm/extable.c
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+
+int fixup_exception(struct pt_regs *regs)
+{
+        const struct exception_table_entry *fixup;
+
+        fixup = search_exception_tables(instruction_pointer(regs));
+
+	/*
+	 * The kernel runs in SVC mode - make sure we keep running in SVC mode
+	 * by frobbing the PSR appropriately (PSR and PC are in the same reg.
+	 * on ARM26)
+	 */
+        if (fixup)
+                regs->ARM_pc = fixup->fixup | PSR_I_BIT | MODE_SVC26;
+
+        return fixup != NULL;
+}
+
diff --git a/arch/arm26/mm/fault.c b/arch/arm26/mm/fault.c
new file mode 100644
index 00000000000..dacca8bb774
--- /dev/null
+++ b/arch/arm26/mm/fault.c
@@ -0,0 +1,318 @@
+/*
+ *  linux/arch/arm26/mm/fault.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ *  Modifications for ARM processor (c) 1995-2001 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/config.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+
+#include <asm/system.h>
+#include <asm/pgtable.h>
+#include <asm/uaccess.h> //FIXME this header may be bogusly included
+
+#include "fault.h"
+
+#define FAULT_CODE_LDRSTRPOST   0x80
+#define FAULT_CODE_LDRSTRPRE    0x40
+#define FAULT_CODE_LDRSTRREG    0x20
+#define FAULT_CODE_LDMSTM       0x10
+#define FAULT_CODE_LDCSTC       0x08
+#define FAULT_CODE_PREFETCH     0x04
+#define FAULT_CODE_WRITE        0x02
+#define FAULT_CODE_FORCECOW     0x01
+
+#define DO_COW(m)               ((m) & (FAULT_CODE_WRITE|FAULT_CODE_FORCECOW))
+#define READ_FAULT(m)           (!((m) & FAULT_CODE_WRITE))
+#define DEBUG
+/*
+ * This is useful to dump out the page tables associated with
+ * 'addr' in mm 'mm'.
+ */
+void show_pte(struct mm_struct *mm, unsigned long addr)
+{
+	pgd_t *pgd;
+
+	if (!mm)
+		mm = &init_mm;
+
+	printk(KERN_ALERT "pgd = %p\n", mm->pgd);
+	pgd = pgd_offset(mm, addr);
+	printk(KERN_ALERT "[%08lx] *pgd=%08lx", addr, pgd_val(*pgd));
+
+	do {
+		pmd_t *pmd;
+		pte_t *pte;
+
+		pmd = pmd_offset(pgd, addr);
+
+		if (pmd_none(*pmd))
+			break;
+
+		if (pmd_bad(*pmd)) {
+			printk("(bad)");
+			break;
+		}
+
+		/* We must not map this if we have highmem enabled */
+		/* FIXME */
+		pte = pte_offset_map(pmd, addr);
+		printk(", *pte=%08lx", pte_val(*pte));
+		pte_unmap(pte);
+	} while(0);
+
+	printk("\n");
+}
+
+/*
+ * Oops.  The kernel tried to access some page that wasn't present.
+ */
+static void
+__do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
+		  struct pt_regs *regs)
+{
+	/*
+         * Are we prepared to handle this kernel fault?
+         */
+        if (fixup_exception(regs))
+                return;
+
+	/*
+	 * No handler, we'll have to terminate things with extreme prejudice.
+	 */
+	bust_spinlocks(1);
+	printk(KERN_ALERT
+		"Unable to handle kernel %s at virtual address %08lx\n",
+		(addr < PAGE_SIZE) ? "NULL pointer dereference" :
+		"paging request", addr);
+
+	show_pte(mm, addr);
+	die("Oops", regs, fsr);
+	bust_spinlocks(0);
+	do_exit(SIGKILL);
+}
+
+/*
+ * Something tried to access memory that isn't in our memory map..
+ * User mode accesses just cause a SIGSEGV
+ */
+static void
+__do_user_fault(struct task_struct *tsk, unsigned long addr,
+		unsigned int fsr, int code, struct pt_regs *regs)
+{
+	struct siginfo si;
+
+#ifdef CONFIG_DEBUG_USER
+	printk("%s: unhandled page fault at 0x%08lx, code 0x%03x\n",
+	       tsk->comm, addr, fsr);
+	show_pte(tsk->mm, addr);
+	show_regs(regs);
+	//dump_backtrace(regs, tsk); // FIXME ARM32 dropped this - why?
+	while(1); //FIXME - hack to stop debug going nutso
+#endif
+
+	tsk->thread.address = addr;
+	tsk->thread.error_code = fsr;
+	tsk->thread.trap_no = 14;
+	si.si_signo = SIGSEGV;
+	si.si_errno = 0;
+	si.si_code = code;
+	si.si_addr = (void *)addr;
+	force_sig_info(SIGSEGV, &si, tsk);
+}
+
+static int
+__do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
+		struct task_struct *tsk)
+{
+	struct vm_area_struct *vma;
+	int fault, mask;
+
+	vma = find_vma(mm, addr);
+	fault = -2; /* bad map area */
+	if (!vma)
+		goto out;
+	if (vma->vm_start > addr)
+		goto check_stack;
+
+	/*
+	 * Ok, we have a good vm_area for this
+	 * memory access, so we can handle it.
+	 */
+good_area:
+	if (READ_FAULT(fsr)) /* read? */
+		mask = VM_READ|VM_EXEC;
+	else
+		mask = VM_WRITE;
+
+	fault = -1; /* bad access type */
+	if (!(vma->vm_flags & mask))
+		goto out;
+
+	/*
+	 * If for any reason at all we couldn't handle
+	 * the fault, make sure we exit gracefully rather
+	 * than endlessly redo the fault.
+	 */
+survive:
+	fault = handle_mm_fault(mm, vma, addr & PAGE_MASK, DO_COW(fsr));
+
+	/*
+	 * Handle the "normal" cases first - successful and sigbus
+	 */
+	switch (fault) {
+	case 2:
+		tsk->maj_flt++;
+		return fault;
+	case 1:
+		tsk->min_flt++;
+	case 0:
+		return fault;
+	}
+
+	fault = -3; /* out of memory */
+	if (tsk->pid != 1)
+		goto out;
+
+	/*
+	 * If we are out of memory for pid1,
+	 * sleep for a while and retry
+	 */
+	yield();
+	goto survive;
+
+check_stack:
+	if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr))
+		goto good_area;
+out:
+	return fault;
+}
+
+int do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
+{
+	struct task_struct *tsk;
+	struct mm_struct *mm;
+	int fault;
+
+	tsk = current;
+	mm  = tsk->mm;
+
+	/*
+	 * If we're in an interrupt or have no user
+	 * context, we must not take the fault..
+	 */
+	if (in_interrupt() || !mm)
+		goto no_context;
+
+	down_read(&mm->mmap_sem);
+	fault = __do_page_fault(mm, addr, fsr, tsk);
+	up_read(&mm->mmap_sem);
+
+	/*
+	 * Handle the "normal" case first
+	 */
+	if (fault > 0)
+		return 0;
+
+	/*
+	 * We had some memory, but were unable to
+	 * successfully fix up this page fault.
+	 */
+	if (fault == 0){
+		goto do_sigbus;
+	}
+
+	/*
+	 * If we are in kernel mode at this point, we
+	 * have no context to handle this fault with.
+         * FIXME - is this test right?
+	 */
+	if (!user_mode(regs)){
+		goto no_context;
+	}
+
+	if (fault == -3) {
+		/*
+		 * We ran out of memory, or some other thing happened to
+		 * us that made us unable to handle the page fault gracefully.
+		 */
+		printk("VM: killing process %s\n", tsk->comm);
+		do_exit(SIGKILL);
+	}
+	else{
+		__do_user_fault(tsk, addr, fsr, fault == -1 ? SEGV_ACCERR : SEGV_MAPERR, regs);
+	}
+
+	return 0;
+
+
+/*
+ * We ran out of memory, or some other thing happened to us that made
+ * us unable to handle the page fault gracefully.
+ */
+do_sigbus:
+	/*
+	 * Send a sigbus, regardless of whether we were in kernel
+	 * or user mode.
+	 */
+	tsk->thread.address = addr;  //FIXME - need other bits setting?
+	tsk->thread.error_code = fsr;
+	tsk->thread.trap_no = 14;
+	force_sig(SIGBUS, tsk);
+#ifdef CONFIG_DEBUG_USER
+	printk(KERN_DEBUG "%s: sigbus at 0x%08lx, pc=0x%08lx\n",
+		current->comm, addr, instruction_pointer(regs));
+#endif
+
+	/* Kernel mode? Handle exceptions or die */
+	if (user_mode(regs))
+		return 0;
+
+no_context:
+	__do_kernel_fault(mm, addr, fsr, regs);
+	return 0;
+}
+
+/*
+ * Handle a data abort.  Note that we have to handle a range of addresses
+ * on ARM2/3 for ldm.  If both pages are zero-mapped, then we have to force
+ * a copy-on-write.  However, on the second page, we always force COW.
+ */
+asmlinkage void
+do_DataAbort(unsigned long min_addr, unsigned long max_addr, int mode, struct pt_regs *regs)
+{
+        do_page_fault(min_addr, mode, regs);
+
+        if ((min_addr ^ max_addr) >> PAGE_SHIFT){
+               do_page_fault(max_addr, mode | FAULT_CODE_FORCECOW, regs);
+	}
+}
+
+asmlinkage int
+do_PrefetchAbort(unsigned long addr, struct pt_regs *regs)
+{
+#if 0
+        if (the memc mapping for this page exists) {
+                printk ("Page in, but got abort (undefined instruction?)\n");
+                return 0;
+        }
+#endif
+        do_page_fault(addr, FAULT_CODE_PREFETCH, regs);
+        return 1;
+}
+
diff --git a/arch/arm26/mm/fault.h b/arch/arm26/mm/fault.h
new file mode 100644
index 00000000000..4442d00d86a
--- /dev/null
+++ b/arch/arm26/mm/fault.h
@@ -0,0 +1,5 @@
+void show_pte(struct mm_struct *mm, unsigned long addr);
+
+int do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs);
+
+unsigned long search_extable(unsigned long addr); //FIXME - is it right?
diff --git a/arch/arm26/mm/init.c b/arch/arm26/mm/init.c
new file mode 100644
index 00000000000..1f09a9d0fb8
--- /dev/null
+++ b/arch/arm26/mm/init.c
@@ -0,0 +1,412 @@
+/*
+ *  linux/arch/arm26/mm/init.c
+ *
+ *  Copyright (C) 1995-2002 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/config.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/smp.h>
+#include <linux/init.h>
+#include <linux/initrd.h>
+#include <linux/bootmem.h>
+#include <linux/blkdev.h>
+
+#include <asm/segment.h>
+#include <asm/mach-types.h>
+#include <asm/dma.h>
+#include <asm/hardware.h>
+#include <asm/setup.h>
+#include <asm/tlb.h>
+
+#include <asm/map.h>
+
+
+#define TABLE_SIZE	PTRS_PER_PTE * sizeof(pte_t))
+
+struct mmu_gather mmu_gathers[NR_CPUS];
+
+extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
+extern char _stext, _text, _etext, _end, __init_begin, __init_end;
+#ifdef CONFIG_XIP_KERNEL
+extern char _endtext, _sdata;
+#endif
+extern unsigned long phys_initrd_start;
+extern unsigned long phys_initrd_size;
+
+/*
+ * The sole use of this is to pass memory configuration
+ * data from paging_init to mem_init.
+ */
+static struct meminfo meminfo __initdata = { 0, };
+
+/*
+ * empty_zero_page is a special page that is used for
+ * zero-initialized data and COW.
+ */
+struct page *empty_zero_page;
+
+void show_mem(void)
+{
+	int free = 0, total = 0, reserved = 0;
+	int shared = 0, cached = 0, slab = 0;
+	struct page *page, *end;
+
+	printk("Mem-info:\n");
+	show_free_areas();
+	printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
+
+
+	page = NODE_MEM_MAP(0);
+	end  = page + NODE_DATA(0)->node_spanned_pages;
+
+	do {
+		total++;
+		if (PageReserved(page))
+			reserved++;
+		else if (PageSwapCache(page))
+			cached++;
+		else if (PageSlab(page))
+			slab++;
+		else if (!page_count(page))
+			free++;
+		else
+			shared += page_count(page) - 1;
+		page++;
+	} while (page < end);
+
+	printk("%d pages of RAM\n", total);
+	printk("%d free pages\n", free);
+	printk("%d reserved pages\n", reserved);
+	printk("%d slab pages\n", slab);
+	printk("%d pages shared\n", shared);
+	printk("%d pages swap cached\n", cached);
+}
+
+struct node_info {
+	unsigned int start;
+	unsigned int end;
+	int bootmap_pages;
+};
+
+#define PFN_DOWN(x)	((x) >> PAGE_SHIFT)
+#define PFN_UP(x)	(PAGE_ALIGN(x) >> PAGE_SHIFT)
+#define PFN_SIZE(x)	((x) >> PAGE_SHIFT)
+#define PFN_RANGE(s,e)	PFN_SIZE(PAGE_ALIGN((unsigned long)(e)) - \
+				(((unsigned long)(s)) & PAGE_MASK))
+
+/*
+ * FIXME: We really want to avoid allocating the bootmap bitmap
+ * over the top of the initrd.  Hopefully, this is located towards
+ * the start of a bank, so if we allocate the bootmap bitmap at
+ * the end, we won't clash.
+ */
+static unsigned int __init
+find_bootmap_pfn(struct meminfo *mi, unsigned int bootmap_pages)
+{
+	unsigned int start_pfn, bootmap_pfn;
+	unsigned int start, end;
+
+	start_pfn   = PFN_UP((unsigned long)&_end);
+	bootmap_pfn = 0;
+
+	/* ARM26 machines only have one node */
+	if (mi->bank->node != 0)
+		BUG();
+
+	start = PFN_UP(mi->bank->start);
+	end   = PFN_DOWN(mi->bank->size + mi->bank->start);
+
+	if (start < start_pfn)
+		start = start_pfn;
+
+	if (end <= start)
+		BUG();
+
+	if (end - start >= bootmap_pages) 
+		bootmap_pfn = start;
+	else
+		BUG();
+
+	return bootmap_pfn;
+}
+
+/*
+ * Scan the memory info structure and pull out:
+ *  - the end of memory
+ *  - the number of nodes
+ *  - the pfn range of each node
+ *  - the number of bootmem bitmap pages
+ */
+static void __init
+find_memend_and_nodes(struct meminfo *mi, struct node_info *np)
+{
+	unsigned int memend_pfn = 0;
+
+	nodes_clear(node_online_map);
+	node_set_online(0);
+
+	np->bootmap_pages = 0;
+
+	if (mi->bank->size == 0) {
+		BUG();
+	}
+
+	/*
+	 * Get the start and end pfns for this bank
+	 */
+	np->start = PFN_UP(mi->bank->start);
+	np->end   = PFN_DOWN(mi->bank->start + mi->bank->size);
+
+	if (memend_pfn < np->end)
+		memend_pfn = np->end;
+
+	/*
+	 * Calculate the number of pages we require to
+	 * store the bootmem bitmaps.
+	 */
+	np->bootmap_pages = bootmem_bootmap_pages(np->end - np->start);
+
+	/*
+	 * This doesn't seem to be used by the Linux memory
+	 * manager any more.  If we can get rid of it, we
+	 * also get rid of some of the stuff above as well.
+	 */
+	max_low_pfn = memend_pfn - PFN_DOWN(PHYS_OFFSET);
+	max_pfn = memend_pfn - PFN_DOWN(PHYS_OFFSET);
+	mi->end = memend_pfn << PAGE_SHIFT;
+
+}
+
+/*
+ * Initialise the bootmem allocator for all nodes.  This is called
+ * early during the architecture specific initialisation.
+ */
+void __init bootmem_init(struct meminfo *mi)
+{
+	struct node_info node_info;
+	unsigned int bootmap_pfn;
+	pg_data_t *pgdat = NODE_DATA(0);
+
+	find_memend_and_nodes(mi, &node_info);
+
+	bootmap_pfn   = find_bootmap_pfn(mi, node_info.bootmap_pages);
+
+	/*
+	 * Note that node 0 must always have some pages.
+	 */
+	if (node_info.end == 0)
+		BUG();
+
+	/*
+	 * Initialise the bootmem allocator.
+	 */
+	init_bootmem_node(pgdat, bootmap_pfn, node_info.start, node_info.end);
+
+ 	/*
+	 * Register all available RAM in this node with the bootmem allocator. 
+	 */
+	free_bootmem_node(pgdat, mi->bank->start, mi->bank->size);
+
+        /*
+         * Register the kernel text and data with bootmem.
+         * Note: with XIP we dont register .text since
+         * its in ROM.
+         */
+#ifdef CONFIG_XIP_KERNEL
+        reserve_bootmem_node(pgdat, __pa(&_sdata), &_end - &_sdata);
+#else
+        reserve_bootmem_node(pgdat, __pa(&_stext), &_end - &_stext);
+#endif
+
+        /*
+         * And don't forget to reserve the allocator bitmap,
+         * which will be freed later.
+         */
+        reserve_bootmem_node(pgdat, bootmap_pfn << PAGE_SHIFT,
+                             node_info.bootmap_pages << PAGE_SHIFT);
+
+        /*
+         * These should likewise go elsewhere.  They pre-reserve
+         * the screen memory region at the start of main system
+         * memory. FIXME - screen RAM is not 512K!
+         */
+        reserve_bootmem_node(pgdat, 0x02000000, 0x00080000);
+
+#ifdef CONFIG_BLK_DEV_INITRD
+        initrd_start = phys_initrd_start;
+        initrd_end = initrd_start + phys_initrd_size;
+
+        /* Achimedes machines only have one node, so initrd is in node 0 */
+#ifdef CONFIG_XIP_KERNEL
+	/* Only reserve initrd space if it is in RAM */
+        if(initrd_start && initrd_start < 0x03000000){
+#else
+        if(initrd_start){
+#endif
+                reserve_bootmem_node(pgdat, __pa(initrd_start),
+                                             initrd_end - initrd_start);
+	}
+#endif   /* CONFIG_BLK_DEV_INITRD */
+
+
+}
+
+/*
+ * paging_init() sets up the page tables, initialises the zone memory
+ * maps, and sets up the zero page, bad page and bad page tables.
+ */
+void __init paging_init(struct meminfo *mi)
+{
+	void *zero_page;
+	unsigned long zone_size[MAX_NR_ZONES];
+        unsigned long zhole_size[MAX_NR_ZONES];
+        struct bootmem_data *bdata;
+        pg_data_t *pgdat;
+	int i;
+
+	memcpy(&meminfo, mi, sizeof(meminfo));
+
+	/*
+	 * allocate the zero page.  Note that we count on this going ok.
+	 */
+	zero_page = alloc_bootmem_low_pages(PAGE_SIZE);
+
+	/*
+	 * initialise the page tables.
+	 */
+	memtable_init(mi);
+	flush_tlb_all();
+
+	/*
+	 * initialise the zones in node 0 (archimedes have only 1 node)
+	 */
+
+	for (i = 0; i < MAX_NR_ZONES; i++) {
+		zone_size[i]  = 0;
+		zhole_size[i] = 0;
+	}
+
+	pgdat = NODE_DATA(0);
+	bdata = pgdat->bdata;
+	zone_size[0] = bdata->node_low_pfn -
+			(bdata->node_boot_start >> PAGE_SHIFT);
+	if (!zone_size[0])
+		BUG();
+	pgdat->node_mem_map = NULL;
+	free_area_init_node(0, pgdat, zone_size,
+			bdata->node_boot_start >> PAGE_SHIFT, zhole_size);
+
+	/*
+	 * finish off the bad pages once
+	 * the mem_map is initialised
+	 */
+	memzero(zero_page, PAGE_SIZE);
+	empty_zero_page = virt_to_page(zero_page);
+}
+
+static inline void free_area(unsigned long addr, unsigned long end, char *s)
+{
+	unsigned int size = (end - addr) >> 10;
+
+	for (; addr < end; addr += PAGE_SIZE) {
+		struct page *page = virt_to_page(addr);
+		ClearPageReserved(page);
+		set_page_count(page, 1);
+		free_page(addr);
+		totalram_pages++;
+	}
+
+	if (size && s)
+		printk(KERN_INFO "Freeing %s memory: %dK\n", s, size);
+}
+
+/*
+ * mem_init() marks the free areas in the mem_map and tells us how much
+ * memory is free.  This is done after various parts of the system have
+ * claimed their memory after the kernel image.
+ */
+void __init mem_init(void)
+{
+	unsigned int codepages, datapages, initpages;
+	pg_data_t *pgdat = NODE_DATA(0);
+	extern int sysctl_overcommit_memory;
+
+
+	/* Note: data pages includes BSS */
+#ifdef CONFIG_XIP_KERNEL
+	codepages = &_endtext - &_text;
+	datapages = &_end - &_sdata;
+#else
+	codepages = &_etext - &_text;
+	datapages = &_end - &_etext;
+#endif
+	initpages = &__init_end - &__init_begin;
+
+	high_memory = (void *)__va(meminfo.end);
+	max_mapnr   = virt_to_page(high_memory) - mem_map;
+
+	/* this will put all unused low memory onto the freelists */
+	if (pgdat->node_spanned_pages != 0)
+		totalram_pages += free_all_bootmem_node(pgdat);
+
+	num_physpages = meminfo.bank[0].size >> PAGE_SHIFT;
+
+	printk(KERN_INFO "Memory: %luMB total\n", num_physpages >> (20 - PAGE_SHIFT));
+	printk(KERN_NOTICE "Memory: %luKB available (%dK code, "
+		"%dK data, %dK init)\n",
+		(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
+		codepages >> 10, datapages >> 10, initpages >> 10);
+
+	/*
+	 * Turn on overcommit on tiny machines
+	 */
+	if (PAGE_SIZE >= 16384 && num_physpages <= 128) {
+		sysctl_overcommit_memory = OVERCOMMIT_ALWAYS;
+		printk("Turning on overcommit\n");
+	}
+}
+
+void free_initmem(void){
+#ifndef CONFIG_XIP_KERNEL
+	free_area((unsigned long)(&__init_begin),
+		  (unsigned long)(&__init_end),
+		  "init");
+#endif
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+
+static int keep_initrd;
+
+void free_initrd_mem(unsigned long start, unsigned long end)
+{
+#ifdef CONFIG_XIP_KERNEL
+	/* Only bin initrd if it is in RAM... */
+	if(!keep_initrd && start < 0x03000000)
+#else
+	if (!keep_initrd)
+#endif
+		free_area(start, end, "initrd");
+}
+
+static int __init keepinitrd_setup(char *__unused)
+{
+	keep_initrd = 1;
+	return 1;
+}
+
+__setup("keepinitrd", keepinitrd_setup);
+#endif
diff --git a/arch/arm26/mm/memc.c b/arch/arm26/mm/memc.c
new file mode 100644
index 00000000000..8e8a2bb2487
--- /dev/null
+++ b/arch/arm26/mm/memc.c
@@ -0,0 +1,202 @@
+/*
+ *  linux/arch/arm26/mm/memc.c
+ *
+ *  Copyright (C) 1998-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  Page table sludge for older ARM processor architectures.
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/page.h>
+#include <asm/memory.h>
+#include <asm/hardware.h>
+
+#include <asm/map.h>
+
+#define MEMC_TABLE_SIZE (256*sizeof(unsigned long))
+
+kmem_cache_t *pte_cache, *pgd_cache;
+int page_nr;
+
+/*
+ * Allocate space for a page table and a MEMC table.
+ * Note that we place the MEMC
+ * table before the page directory.  This means we can
+ * easily get to both tightly-associated data structures
+ * with a single pointer.
+ */
+static inline pgd_t *alloc_pgd_table(void)
+{
+	void *pg2k = kmem_cache_alloc(pgd_cache, GFP_KERNEL);
+
+	if (pg2k)
+		pg2k += MEMC_TABLE_SIZE;
+
+	return (pgd_t *)pg2k;
+}
+
+/*
+ * Free a page table. this function is the counterpart to get_pgd_slow
+ * below, not alloc_pgd_table above.
+ */
+void free_pgd_slow(pgd_t *pgd)
+{
+	unsigned long tbl = (unsigned long)pgd;
+
+	tbl -= MEMC_TABLE_SIZE;
+
+	kmem_cache_free(pgd_cache, (void *)tbl);
+}
+
+/*
+ * Allocate a new pgd and fill it in ready for use
+ *
+ * A new tasks pgd is completely empty (all pages !present) except for:
+ *
+ * o The machine vectors at virtual address 0x0
+ * o The vmalloc region at the top of address space
+ *
+ */
+#define FIRST_KERNEL_PGD_NR     (FIRST_USER_PGD_NR + USER_PTRS_PER_PGD)
+
+pgd_t *get_pgd_slow(struct mm_struct *mm)
+{
+	pgd_t *new_pgd, *init_pgd;
+	pmd_t *new_pmd, *init_pmd;
+	pte_t *new_pte, *init_pte;
+
+	new_pgd = alloc_pgd_table();
+	if (!new_pgd)
+		goto no_pgd;
+
+	/*
+	 * This lock is here just to satisfy pmd_alloc and pte_lock
+         * FIXME: I bet we could avoid taking it pretty much altogether
+	 */
+	spin_lock(&mm->page_table_lock);
+
+	/*
+	 * On ARM, first page must always be allocated since it contains
+	 * the machine vectors.
+	 */
+	new_pmd = pmd_alloc(mm, new_pgd, 0);
+	if (!new_pmd)
+		goto no_pmd;
+
+	new_pte = pte_alloc_kernel(mm, new_pmd, 0);
+	if (!new_pte)
+		goto no_pte;
+
+	init_pgd = pgd_offset(&init_mm, 0);
+	init_pmd = pmd_offset(init_pgd, 0);
+	init_pte = pte_offset(init_pmd, 0);
+
+	set_pte(new_pte, *init_pte);
+
+	/*
+	 * the page table entries are zeroed
+	 * when the table is created. (see the cache_ctor functions below)
+	 * Now we need to plonk the kernel (vmalloc) area at the end of
+	 * the address space. We copy this from the init thread, just like
+	 * the init_pte we copied above...
+	 */
+	memcpy(new_pgd + FIRST_KERNEL_PGD_NR, init_pgd + FIRST_KERNEL_PGD_NR,
+		(PTRS_PER_PGD - FIRST_KERNEL_PGD_NR) * sizeof(pgd_t));
+
+	spin_unlock(&mm->page_table_lock);
+
+	/* update MEMC tables */
+	cpu_memc_update_all(new_pgd);
+	return new_pgd;
+
+no_pte:
+	spin_unlock(&mm->page_table_lock);
+	pmd_free(new_pmd);
+	free_pgd_slow(new_pgd);
+	return NULL;
+
+no_pmd:
+	spin_unlock(&mm->page_table_lock);
+	free_pgd_slow(new_pgd);
+	return NULL;
+
+no_pgd:
+	return NULL;
+}
+
+/*
+ * No special code is required here.
+ */
+void setup_mm_for_reboot(char mode)
+{
+}
+
+/*
+ * This contains the code to setup the memory map on an ARM2/ARM250/ARM3
+ *  o swapper_pg_dir = 0x0207d000
+ *  o kernel proper starts at 0x0208000
+ *  o create (allocate) a pte to contain the machine vectors
+ *  o populate the pte (points to 0x02078000) (FIXME - is it zeroed?)
+ *  o populate the init tasks page directory (pgd) with the new pte
+ *  o zero the rest of the init tasks pgdir (FIXME - what about vmalloc?!)
+ */
+void __init memtable_init(struct meminfo *mi)
+{
+	pte_t *pte;
+	int i;
+
+	page_nr = max_low_pfn;
+
+	pte = alloc_bootmem_low_pages(PTRS_PER_PTE * sizeof(pte_t));
+	pte[0] = mk_pte_phys(PAGE_OFFSET + SCREEN_SIZE, PAGE_READONLY);
+	pmd_populate(&init_mm, pmd_offset(swapper_pg_dir, 0), pte);
+
+	for (i = 1; i < PTRS_PER_PGD; i++)
+		pgd_val(swapper_pg_dir[i]) = 0;
+}
+
+void __init iotable_init(struct map_desc *io_desc)
+{
+	/* nothing to do */
+}
+
+/*
+ * We never have holes in the memmap
+ */
+void __init create_memmap_holes(struct meminfo *mi)
+{
+}
+
+static void pte_cache_ctor(void *pte, kmem_cache_t *cache, unsigned long flags)
+{
+	memzero(pte, sizeof(pte_t) * PTRS_PER_PTE);
+}
+
+static void pgd_cache_ctor(void *pgd, kmem_cache_t *cache, unsigned long flags)
+{
+	memzero(pgd + MEMC_TABLE_SIZE, USER_PTRS_PER_PGD * sizeof(pgd_t));
+}
+
+void __init pgtable_cache_init(void)
+{
+	pte_cache = kmem_cache_create("pte-cache",
+				sizeof(pte_t) * PTRS_PER_PTE,
+				0, 0, pte_cache_ctor, NULL);
+	if (!pte_cache)
+		BUG();
+
+	pgd_cache = kmem_cache_create("pgd-cache", MEMC_TABLE_SIZE +
+				sizeof(pgd_t) * PTRS_PER_PGD,
+				0, 0, pgd_cache_ctor, NULL);
+	if (!pgd_cache)
+		BUG();
+}
diff --git a/arch/arm26/mm/proc-funcs.S b/arch/arm26/mm/proc-funcs.S
new file mode 100644
index 00000000000..c3d4cd3f457
--- /dev/null
+++ b/arch/arm26/mm/proc-funcs.S
@@ -0,0 +1,359 @@
+/*
+ *  linux/arch/arm26/mm/proc-arm2,3.S
+ *
+ *  Copyright (C) 1997-1999 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  MMU functions for ARM2,3
+ *
+ *  These are the low level assembler for performing cache
+ *  and memory functions on ARM2, ARM250 and ARM3 processors.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/asm_offsets.h>
+#include <asm/procinfo.h>
+#include <asm/ptrace.h>
+
+/*
+ * MEMC workhorse code.  It's both a horse which things it's a pig.
+ */
+/*
+ * Function: cpu_memc_update_entry(pgd_t *pgd, unsigned long phys_pte, unsigned long addr)
+ * Params  : pgd	Page tables/MEMC mapping
+ *         : phys_pte	physical address, or PTE
+ *         : addr	virtual address
+ */
+ENTRY(cpu_memc_update_entry)
+		tst	r1, #PAGE_PRESENT		@ is the page present
+		orreq	r1, r1, #PAGE_OLD | PAGE_CLEAN
+		moveq	r2, #0x01f00000
+		mov	r3, r1, lsr #13			@ convert to physical page nr
+		and	r3, r3, #0x3fc
+		adr	ip, memc_phys_table_32
+		ldr	r3, [ip, r3]
+		tst	r1, #PAGE_OLD | PAGE_NOT_USER
+		biceq	r3, r3, #0x200
+		tsteq	r1, #PAGE_READONLY | PAGE_CLEAN
+		biceq	r3, r3, #0x300
+		mov	r2, r2, lsr #15			@ virtual -> nr
+		orr	r3, r3, r2, lsl #15
+		and	r2, r2, #0x300
+		orr	r3, r3, r2, lsl #2
+		and	r2, r3, #255
+		sub	r0, r0, #256 * 4
+		str	r3, [r0, r2, lsl #2]
+		strb	r3, [r3]
+		movs	pc, lr
+/*
+ * Params  : r0 = preserved
+ *         : r1 = memc table base (preserved)
+ *         : r2 = page table entry
+ *         : r3 = preserved
+ *         : r4 = unused
+ *         : r5 = memc physical address translation table
+ *         : ip = virtual address (preserved)
+ */
+update_pte:
+		mov	r4, r2, lsr #13
+		and	r4, r4, #0x3fc
+		ldr	r4, [r5, r4]			@ covert to MEMC page
+
+		tst	r2, #PAGE_OLD | PAGE_NOT_USER	@ check for MEMC read
+		biceq	r4, r4, #0x200
+		tsteq	r2, #PAGE_READONLY | PAGE_CLEAN	@ check for MEMC write
+		biceq	r4, r4, #0x300
+
+		orr	r4, r4, ip
+		and	r2, ip, #0x01800000
+		orr	r4, r4, r2, lsr #13
+
+		and	r2, r4, #255
+		str	r4, [r1, r2, lsl #2]
+		movs	pc, lr
+
+/*
+ * Params  : r0 = preserved
+ *         : r1 = memc table base (preserved)
+ *         : r2 = page table base
+ *         : r3 = preserved
+ *         : r4 = unused
+ *         : r5 = memc physical address translation table
+ *         : ip = virtual address (updated)
+ */
+update_pte_table:
+		stmfd	sp!, {r0, lr}
+		bic	r0, r2, #3
+1:		ldr	r2, [r0], #4			@ get entry
+		tst	r2, #PAGE_PRESENT		@ page present
+		blne	update_pte			@ process pte
+		add	ip, ip, #32768			@ increment virt addr
+		ldr	r2, [r0], #4			@ get entry
+		tst	r2, #PAGE_PRESENT		@ page present
+		blne	update_pte			@ process pte
+		add	ip, ip, #32768			@ increment virt addr
+		ldr	r2, [r0], #4			@ get entry
+		tst	r2, #PAGE_PRESENT		@ page present
+		blne	update_pte			@ process pte
+		add	ip, ip, #32768			@ increment virt addr
+		ldr	r2, [r0], #4			@ get entry
+		tst	r2, #PAGE_PRESENT		@ page present
+		blne	update_pte			@ process pte
+		add	ip, ip, #32768			@ increment virt addr
+		tst	ip, #32768 * 31			@ finished?
+		bne	1b
+		ldmfd	sp!, {r0, pc}^
+
+/*
+ * Function: cpu_memc_update_all(pgd_t *pgd)
+ * Params  : pgd	Page tables/MEMC mapping
+ * Notes   : this is optimised for 32k pages
+ */
+ENTRY(cpu_memc_update_all)
+		stmfd	sp!, {r4, r5, lr}
+		bl	clear_tables
+		sub	r1, r0, #256 * 4		@ start of MEMC tables
+		adr	r5, memc_phys_table_32		@ Convert to logical page number
+		mov	ip, #0				@ virtual address
+1:		ldmia	r0!, {r2, r3}                   @ load two pgd entries
+		tst	r2, #PAGE_PRESENT               @ is pgd entry present?
+		addeq	ip, ip, #1048576        @FIXME - PAGE_PRESENT is for PTEs technically...
+		blne	update_pte_table
+		mov	r2, r3
+		tst	r2, #PAGE_PRESENT		@ is pgd entry present?
+		addeq	ip, ip, #1048576
+		blne	update_pte_table
+		teq	ip, #32 * 1048576
+		bne	1b
+		ldmfd	sp!, {r4, r5, pc}^
+
+/*
+ * Build the table to map from physical page number to memc page number
+ */
+		.type	memc_phys_table_32, #object
+memc_phys_table_32:
+		.irp	b7, 0x00, 0x80
+		.irp	b6, 0x00, 0x02
+		.irp	b5, 0x00, 0x04
+		.irp	b4, 0x00, 0x01
+
+		.irp	b3, 0x00, 0x40
+		.irp	b2, 0x00, 0x20
+		.irp	b1, 0x00, 0x10
+		.irp	b0, 0x00, 0x08
+		.long	0x03800300 + \b7 + \b6 + \b5 + \b4 + \b3 + \b2 + \b1 + \b0
+		.endr
+		.endr
+		.endr
+		.endr
+
+		.endr
+		.endr
+		.endr
+		.endr
+		.size	memc_phys_table_32, . - memc_phys_table_32
+
+/*
+ * helper for cpu_memc_update_all, this clears out all
+ * mappings, setting them close to the top of memory,
+ * and inaccessible (0x01f00000).
+ * Params  : r0 = page table pointer
+ */
+clear_tables:	ldr	r1, _arm3_set_pgd - 4
+		ldr	r2, [r1]
+		sub	r1, r0, #256 * 4		@ start of MEMC tables
+		add	r2, r1, r2, lsl #2		@ end of tables
+		mov	r3, #0x03f00000			@ Default mapping (null mapping)
+		orr	r3, r3, #0x00000f00
+		orr	r4, r3, #1
+		orr	r5, r3, #2
+		orr	ip, r3, #3
+1:		stmia	r1!, {r3, r4, r5, ip}
+		add	r3, r3, #4
+		add	r4, r4, #4
+		add	r5, r5, #4
+		add	ip, ip, #4
+		stmia	r1!, {r3, r4, r5, ip}
+		add	r3, r3, #4
+		add	r4, r4, #4
+		add	r5, r5, #4
+		add	ip, ip, #4
+		teq	r1, r2
+		bne	1b
+		mov	pc, lr
+
+/*
+ * Function: *_set_pgd(pgd_t *pgd)
+ * Params  : pgd	New page tables/MEMC mapping
+ * Purpose : update MEMC hardware with new mapping
+ */
+		.word	page_nr   @ extern - declared in mm-memc.c
+_arm3_set_pgd:	mcr	p15, 0, r1, c1, c0, 0		@ flush cache
+_arm2_set_pgd:	stmfd	sp!, {lr}
+		ldr	r1, _arm3_set_pgd - 4
+		ldr	r2, [r1]
+		sub	r0, r0, #256 * 4		@ start of MEMC tables
+		add	r1, r0, r2, lsl #2		@ end of tables
+1:		ldmia	r0!, {r2, r3, ip, lr}
+		strb	r2, [r2]
+		strb	r3, [r3]
+		strb	ip, [ip]
+		strb	lr, [lr]
+		ldmia	r0!, {r2, r3, ip, lr}
+		strb	r2, [r2]
+		strb	r3, [r3]
+		strb	ip, [ip]
+		strb	lr, [lr]
+		teq	r0, r1
+		bne	1b
+		ldmfd	sp!, {pc}^
+
+/*
+ * Function: *_proc_init (void)
+ * Purpose : Initialise the cache control registers
+ */
+_arm3_proc_init:
+		mov	r0, #0x001f0000
+		orr	r0, r0, #0x0000ff00
+		orr	r0, r0, #0x000000ff
+		mcr	p15, 0, r0, c3, c0		@ ARM3 Cacheable
+		mcr     p15, 0, r0, c4, c0		@ ARM3 Updateable
+		mov	r0, #0
+		mcr     p15, 0, r0, c5, c0		@ ARM3 Disruptive
+		mcr     p15, 0, r0, c1, c0		@ ARM3 Flush
+		mov	r0, #3
+		mcr     p15, 0, r0, c2, c0		@ ARM3 Control
+_arm2_proc_init:
+		movs	pc, lr
+
+/*
+ * Function: *_proc_fin (void)
+ * Purpose : Finalise processor (disable caches)
+ */
+_arm3_proc_fin:	mov	r0, #2
+		mcr	p15, 0, r0, c2, c0
+_arm2_proc_fin:	orrs	pc, lr, #PSR_I_BIT|PSR_F_BIT
+
+/*
+ * Function: *_xchg_1 (int new, volatile void *ptr)
+ * Params  : new	New value to store at...
+ *	   : ptr	pointer to byte-wide location
+ * Purpose : Performs an exchange operation
+ * Returns : Original byte data at 'ptr'
+ */
+_arm2_xchg_1:	mov	r2, pc
+		orr	r2, r2, #PSR_I_BIT
+		teqp	r2, #0
+		ldrb	r2, [r1]
+		strb	r0, [r1]
+		mov	r0, r2
+		movs	pc, lr
+
+_arm3_xchg_1:	swpb	r0, r0, [r1]
+		movs	pc, lr
+
+/*
+ * Function: *_xchg_4 (int new, volatile void *ptr)
+ * Params  : new	New value to store at...
+ *	   : ptr	pointer to word-wide location
+ * Purpose : Performs an exchange operation
+ * Returns : Original word data at 'ptr'
+ */
+_arm2_xchg_4:	mov	r2, pc
+		orr	r2, r2, #PSR_I_BIT
+		teqp	r2, #0
+		ldr	r2, [r1]
+		str	r0, [r1]
+		mov	r0, r2
+		movs	pc, lr
+
+_arm3_xchg_4:	swp	r0, r0, [r1]
+		movs	pc, lr
+
+_arm2_3_check_bugs:
+		bics	pc, lr, #PSR_F_BIT		@ Clear FIQ disable bit
+
+armvlsi_name:	.asciz	"ARM/VLSI"
+_arm2_name:	.asciz	"ARM 2"
+_arm250_name:	.asciz	"ARM 250"
+_arm3_name:	.asciz	"ARM 3"
+
+		.section ".init.text", #alloc, #execinstr
+/*
+ * Purpose : Function pointers used to access above functions - all calls
+ *	     come through these
+ */
+		.globl	arm2_processor_functions
+arm2_processor_functions:
+		.word	_arm2_3_check_bugs
+		.word	_arm2_proc_init
+		.word	_arm2_proc_fin
+		.word	_arm2_set_pgd
+		.word	_arm2_xchg_1
+		.word	_arm2_xchg_4
+
+cpu_arm2_info:
+		.long	armvlsi_name
+		.long	_arm2_name
+
+		.globl	arm250_processor_functions
+arm250_processor_functions:
+		.word	_arm2_3_check_bugs
+		.word	_arm2_proc_init
+		.word	_arm2_proc_fin
+		.word	_arm2_set_pgd
+		.word	_arm3_xchg_1
+		.word	_arm3_xchg_4
+
+cpu_arm250_info:
+		.long	armvlsi_name
+		.long	_arm250_name
+
+		.globl	arm3_processor_functions
+arm3_processor_functions:
+		.word	_arm2_3_check_bugs
+		.word	_arm3_proc_init
+		.word	_arm3_proc_fin
+		.word	_arm3_set_pgd
+		.word	_arm3_xchg_1
+		.word	_arm3_xchg_4
+
+cpu_arm3_info:
+		.long	armvlsi_name
+		.long	_arm3_name
+
+arm2_arch_name:	.asciz	"armv1"
+arm3_arch_name:	.asciz	"armv2"
+arm2_elf_name:	.asciz	"v1"
+arm3_elf_name:	.asciz	"v2"
+		.align
+
+		.section ".proc.info", #alloc, #execinstr
+
+		.long	0x41560200
+		.long	0xfffffff0
+		.long	arm2_arch_name
+		.long	arm2_elf_name
+		.long   0
+		.long	cpu_arm2_info
+		.long	arm2_processor_functions
+
+		.long	0x41560250
+		.long	0xfffffff0
+		.long	arm3_arch_name
+		.long	arm3_elf_name
+		.long   0
+		.long	cpu_arm250_info
+		.long	arm250_processor_functions
+
+		.long	0x41560300
+		.long	0xfffffff0
+		.long	arm3_arch_name
+		.long	arm3_elf_name
+		.long   0
+		.long	cpu_arm3_info
+		.long	arm3_processor_functions
+
diff --git a/arch/arm26/mm/small_page.c b/arch/arm26/mm/small_page.c
new file mode 100644
index 00000000000..77be86cca78
--- /dev/null
+++ b/arch/arm26/mm/small_page.c
@@ -0,0 +1,194 @@
+/*
+ *  linux/arch/arm26/mm/small_page.c
+ *
+ *  Copyright (C) 1996  Russell King
+ *  Copyright (C) 2003  Ian Molton
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  Changelog:
+ *   26/01/1996	RMK	Cleaned up various areas to make little more generic
+ *   07/02/1999	RMK	Support added for 16K and 32K page sizes
+ *			containing 8K blocks
+ *   23/05/2004 IM	Fixed to use struct page->lru (thanks wli)
+ *
+ */
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/smp.h>
+#include <linux/bitops.h>
+
+#include <asm/pgtable.h>
+
+#define PEDANTIC
+
+/*
+ * Requirement:
+ *  We need to be able to allocate naturally aligned memory of finer
+ *  granularity than the page size.  This is typically used for the
+ *  second level page tables on 32-bit ARMs.
+ *
+ * FIXME - this comment is *out of date*
+ * Theory:
+ *  We "misuse" the Linux memory management system.  We use alloc_page
+ *  to allocate a page and then mark it as reserved.  The Linux memory
+ *  management system will then ignore the "offset", "next_hash" and
+ *  "pprev_hash" entries in the mem_map for this page.
+ *
+ *  We then use a bitstring in the "offset" field to mark which segments
+ *  of the page are in use, and manipulate this as required during the
+ *  allocation and freeing of these small pages.
+ *
+ *  We also maintain a queue of pages being used for this purpose using
+ *  the "next_hash" and "pprev_hash" entries of mem_map;
+ */
+
+struct order {
+	struct list_head queue;
+	unsigned int mask;		/* (1 << shift) - 1		*/
+	unsigned int shift;		/* (1 << shift) size of page	*/
+	unsigned int block_mask;	/* nr_blocks - 1		*/
+	unsigned int all_used;		/* (1 << nr_blocks) - 1		*/
+};
+
+
+static struct order orders[] = {
+#if PAGE_SIZE == 32768
+	{ LIST_HEAD_INIT(orders[0].queue), 2047, 11, 15, 0x0000ffff },
+	{ LIST_HEAD_INIT(orders[1].queue), 8191, 13,  3, 0x0000000f }
+#else
+#error unsupported page size (ARGH!)
+#endif
+};
+
+#define USED_MAP(pg)			((pg)->index)
+#define TEST_AND_CLEAR_USED(pg,off)	(test_and_clear_bit(off, &USED_MAP(pg)))
+#define SET_USED(pg,off)		(set_bit(off, &USED_MAP(pg)))
+
+static DEFINE_SPINLOCK(small_page_lock);
+
+static unsigned long __get_small_page(int priority, struct order *order)
+{
+	unsigned long flags;
+	struct page *page;
+	int offset;
+
+	do {
+		spin_lock_irqsave(&small_page_lock, flags);
+
+		if (list_empty(&order->queue))
+			goto need_new_page;
+
+		page = list_entry(order->queue.next, struct page, lru);
+again:
+#ifdef PEDANTIC
+		if (USED_MAP(page) & ~order->all_used)
+			PAGE_BUG(page);
+#endif
+		offset = ffz(USED_MAP(page));
+		SET_USED(page, offset);
+		if (USED_MAP(page) == order->all_used)
+			list_del_init(&page->lru);
+		spin_unlock_irqrestore(&small_page_lock, flags);
+
+		return (unsigned long) page_address(page) + (offset << order->shift);
+
+need_new_page:
+		spin_unlock_irqrestore(&small_page_lock, flags);
+		page = alloc_page(priority);
+		spin_lock_irqsave(&small_page_lock, flags);
+
+		if (list_empty(&order->queue)) {
+			if (!page)
+				goto no_page;
+			SetPageReserved(page);
+			USED_MAP(page) = 0;
+			list_add(&page->lru, &order->queue);
+			goto again;
+		}
+
+		spin_unlock_irqrestore(&small_page_lock, flags);
+		__free_page(page);
+	} while (1);
+
+no_page:
+	spin_unlock_irqrestore(&small_page_lock, flags);
+	return 0;
+}
+
+static void __free_small_page(unsigned long spage, struct order *order)
+{
+	unsigned long flags;
+	struct page *page;
+
+	if (virt_addr_valid(spage)) {
+		page = virt_to_page(spage);
+
+		/*
+		 * The container-page must be marked Reserved
+		 */
+		if (!PageReserved(page) || spage & order->mask)
+			goto non_small;
+
+#ifdef PEDANTIC
+		if (USED_MAP(page) & ~order->all_used)
+			PAGE_BUG(page);
+#endif
+
+		spage = spage >> order->shift;
+		spage &= order->block_mask;
+
+		/*
+		 * the following must be atomic wrt get_page
+		 */
+		spin_lock_irqsave(&small_page_lock, flags);
+
+		if (USED_MAP(page) == order->all_used)
+			list_add(&page->lru, &order->queue);
+
+		if (!TEST_AND_CLEAR_USED(page, spage))
+			goto already_free;
+
+		if (USED_MAP(page) == 0)
+			goto free_page;
+
+		spin_unlock_irqrestore(&small_page_lock, flags);
+	}
+	return;
+
+free_page:
+	/*
+	 * unlink the page from the small page queue and free it
+	 */
+	list_del_init(&page->lru);
+	spin_unlock_irqrestore(&small_page_lock, flags);
+	ClearPageReserved(page);
+	__free_page(page);
+	return;
+
+non_small:
+	printk("Trying to free non-small page from %p\n", __builtin_return_address(0));
+	return;
+already_free:
+	printk("Trying to free free small page from %p\n", __builtin_return_address(0));
+}
+
+unsigned long get_page_8k(int priority)
+{
+	return __get_small_page(priority, orders+1);
+}
+
+void free_page_8k(unsigned long spage)
+{
+	__free_small_page(spage, orders+1);
+}
diff --git a/arch/arm26/nwfpe/ARM-gcc.h b/arch/arm26/nwfpe/ARM-gcc.h
new file mode 100644
index 00000000000..e6598470b07
--- /dev/null
+++ b/arch/arm26/nwfpe/ARM-gcc.h
@@ -0,0 +1,120 @@
+/*
+-------------------------------------------------------------------------------
+The macro `BITS64' can be defined to indicate that 64-bit integer types are
+supported by the compiler.
+-------------------------------------------------------------------------------
+*/
+#define BITS64
+
+/*
+-------------------------------------------------------------------------------
+Each of the following `typedef's defines the most convenient type that holds
+integers of at least as many bits as specified.  For example, `uint8' should
+be the most convenient type that can hold unsigned integers of as many as
+8 bits.  The `flag' type must be able to hold either a 0 or 1.  For most
+implementations of C, `flag', `uint8', and `int8' should all be `typedef'ed
+to the same as `int'.
+-------------------------------------------------------------------------------
+*/
+typedef char flag;
+typedef unsigned char uint8;
+typedef signed char int8;
+typedef int uint16;
+typedef int int16;
+typedef unsigned int uint32;
+typedef signed int int32;
+#ifdef BITS64
+typedef unsigned long long int bits64;
+typedef signed long long int sbits64;
+#endif
+
+/*
+-------------------------------------------------------------------------------
+Each of the following `typedef's defines a type that holds integers
+of _exactly_ the number of bits specified.  For instance, for most
+implementation of C, `bits16' and `sbits16' should be `typedef'ed to
+`unsigned short int' and `signed short int' (or `short int'), respectively.
+-------------------------------------------------------------------------------
+*/
+typedef unsigned char bits8;
+typedef signed char sbits8;
+typedef unsigned short int bits16;
+typedef signed short int sbits16;
+typedef unsigned int bits32;
+typedef signed int sbits32;
+#ifdef BITS64
+typedef unsigned long long int uint64;
+typedef signed long long int int64;
+#endif
+
+#ifdef BITS64
+/*
+-------------------------------------------------------------------------------
+The `LIT64' macro takes as its argument a textual integer literal and if
+necessary ``marks'' the literal as having a 64-bit integer type.  For
+example, the Gnu C Compiler (`gcc') requires that 64-bit literals be
+appended with the letters `LL' standing for `long long', which is `gcc's
+name for the 64-bit integer type.  Some compilers may allow `LIT64' to be
+defined as the identity macro:  `#define LIT64( a ) a'.
+-------------------------------------------------------------------------------
+*/
+#define LIT64( a ) a##LL
+#endif
+
+/*
+-------------------------------------------------------------------------------
+The macro `INLINE' can be used before functions that should be inlined.  If
+a compiler does not support explicit inlining, this macro should be defined
+to be `static'.
+-------------------------------------------------------------------------------
+*/
+#define INLINE extern __inline__
+
+
+/* For use as a GCC soft-float library we need some special function names. */
+
+#ifdef __LIBFLOAT__
+
+/* Some 32-bit ops can be mapped straight across by just changing the name. */
+#define float32_add			__addsf3
+#define float32_sub			__subsf3
+#define float32_mul			__mulsf3
+#define float32_div			__divsf3
+#define int32_to_float32		__floatsisf
+#define float32_to_int32_round_to_zero	__fixsfsi
+#define float32_to_uint32_round_to_zero	__fixunssfsi
+
+/* These ones go through the glue code.  To avoid namespace pollution
+   we rename the internal functions too.  */
+#define float32_eq			___float32_eq
+#define float32_le			___float32_le
+#define float32_lt			___float32_lt
+
+/* All the 64-bit ops have to go through the glue, so we pull the same
+   trick.  */
+#define float64_add			___float64_add
+#define float64_sub			___float64_sub
+#define float64_mul			___float64_mul
+#define float64_div			___float64_div
+#define int32_to_float64		___int32_to_float64
+#define float64_to_int32_round_to_zero	___float64_to_int32_round_to_zero
+#define float64_to_uint32_round_to_zero	___float64_to_uint32_round_to_zero
+#define float64_to_float32		___float64_to_float32
+#define float32_to_float64		___float32_to_float64
+#define float64_eq			___float64_eq
+#define float64_le			___float64_le
+#define float64_lt			___float64_lt
+
+#if 0
+#define float64_add			__adddf3
+#define float64_sub			__subdf3
+#define float64_mul			__muldf3
+#define float64_div			__divdf3
+#define int32_to_float64		__floatsidf
+#define float64_to_int32_round_to_zero	__fixdfsi
+#define float64_to_uint32_round_to_zero	__fixunsdfsi
+#define float64_to_float32		__truncdfsf2
+#define float32_to_float64		__extendsfdf2
+#endif
+
+#endif
diff --git a/arch/arm26/nwfpe/ChangeLog b/arch/arm26/nwfpe/ChangeLog
new file mode 100644
index 00000000000..0c580f764ba
--- /dev/null
+++ b/arch/arm26/nwfpe/ChangeLog
@@ -0,0 +1,83 @@
+2002-01-19  Russell King <rmk@arm.linux.org.uk>
+
+	* fpa11.h - Add documentation
+	          - remove userRegisters pointer from this structure.
+	          - add new method to obtain integer register values.
+	* softfloat.c - Remove float128
+	* softfloat.h - Remove float128
+	* softfloat-specialize - Remove float128
+
+	* The FPA11 structure is not a kernel-specific data structure.
+	  It is used by users of ptrace to examine the values of the
+	  floating point registers.  Therefore, any changes to the
+	  FPA11 structure (size or position of elements contained
+	  within) have to be well thought out.
+
+	* Since 128-bit float requires the FPA11 structure to change
+	  size, it has been removed.  128-bit float is currently unused,
+	  and needs various things to be re-worked so that we won't
+	  overflow the available space in the task structure.
+
+	* The changes are designed to break any patch that goes on top
+	  of this code, so that the authors properly review their changes.
+
+1999-08-19  Scott Bambrough  <scottb@netwinder.org>
+
+	* fpmodule.c - Changed version number to 0.95
+	* fpa11.h - modified FPA11, FPREG structures
+	* fpa11.c - Changes due to FPA11, FPREG structure alterations.
+	* fpa11_cpdo.c - Changes due to FPA11, FPREG structure alterations.
+	* fpa11_cpdt.c - Changes due to FPA11, FPREG structure alterations.
+	* fpa11_cprt.c - Changes due to FPA11, FPREG structure alterations.
+	* single_cpdo.c - Changes due to FPA11, FPREG structure alterations.
+	* double_cpdo.c - Changes due to FPA11, FPREG structure alterations.
+	* extended_cpdo.c - Changes due to FPA11, FPREG structure alterations.
+
+	* I discovered several bugs.  First and worst is that the kernel
+	  passes in a pointer to the FPE's state area.	This is defined
+	  as a struct user_fp (see user.h).  This pointer was cast to a
+	  FPA11*.  Unfortunately FPA11 and user_fp are of different sizes;
+	  user_fp is smaller.  This meant that the FPE scribbled on things
+	  below its area, which is bad, as the area is in the thread_struct
+	  embedded in the process task structure.  Thus we were scribbling
+	  over one of the most important structures in the entire OS.
+
+	* user_fp and FPA11 have now been harmonized.  Most of the changes
+	  in the above code were dereferencing problems due to moving the
+	  register type out of FPREG, and getting rid of the union variable
+	  fpvalue.
+
+	* Second I noticed resetFPA11 was not always being called for a
+	  task.  This should happen on the first floating point exception
+	  that occurs.	It is controlled by init_flag in FPA11.  The
+	  comment in the code beside init_flag state the kernel guarantees
+	  this to be zero.  Not so.  I found that the kernel recycles task
+	  structures, and that recycled ones may not have init_flag zeroed.
+	  I couldn't even find anything that guarantees it is zeroed when
+	  when the task structure is initially allocated.  In any case
+	  I now initialize the entire FPE state in the thread structure to
+	  zero when allocated and recycled.  See alloc_task_struct() and
+	  flush_thread() in arch/arm/process.c.  The change to
+	  alloc_task_struct() may not be necessary, but I left it in for
+	  completeness (better safe than sorry).
+
+1998-11-23  Scott Bambrough  <scottb@netwinder.org>
+
+	* README.FPE - fix typo in description of lfm/sfm instructions
+	* NOTES - Added file to describe known bugs/problems 
+	* fpmodule.c - Changed version number to 0.94
+
+1998-11-20  Scott Bambrough  <scottb@netwinder.org>
+
+	* README.FPE - fix description of URD, NRM instructions
+	* TODO - remove URD, NRM instructions from TODO list
+	* single_cpdo.c - implement URD, NRM
+	* double_cpdo.c - implement URD, NRM
+	* extended_cpdo.c - implement URD, NRM
+
+1998-11-19  Scott Bambrough  <scottb@netwinder.org>
+
+	* ChangeLog - Added this file to track changes made.
+	* fpa11.c - added code to initialize register types to typeNone
+	* fpa11_cpdt.c - fixed bug in storeExtended (typeExtended changed to
+	  typeDouble in switch statement)
diff --git a/arch/arm26/nwfpe/Makefile b/arch/arm26/nwfpe/Makefile
new file mode 100644
index 00000000000..b39d34dff05
--- /dev/null
+++ b/arch/arm26/nwfpe/Makefile
@@ -0,0 +1,15 @@
+#
+# Copyright (C) 1998, 1999, 2001 Philip Blundell
+#
+
+obj-y			:=
+obj-m			:=
+obj-n			:=
+
+obj-$(CONFIG_FPE_NWFPE)	+= nwfpe.o
+
+nwfpe-objs		:= fpa11.o fpa11_cpdo.o fpa11_cpdt.o fpa11_cprt.o \
+			   fpmodule.o fpopcode.o softfloat.o \
+			   single_cpdo.o double_cpdo.o extended_cpdo.o \
+			   entry.o
+
diff --git a/arch/arm26/nwfpe/double_cpdo.c b/arch/arm26/nwfpe/double_cpdo.c
new file mode 100644
index 00000000000..7f4fef0216c
--- /dev/null
+++ b/arch/arm26/nwfpe/double_cpdo.c
@@ -0,0 +1,288 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Rebel.COM, 1998,1999
+
+    Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include "fpa11.h"
+#include "softfloat.h"
+#include "fpopcode.h"
+
+float64 float64_exp(float64 Fm);
+float64 float64_ln(float64 Fm);
+float64 float64_sin(float64 rFm);
+float64 float64_cos(float64 rFm);
+float64 float64_arcsin(float64 rFm);
+float64 float64_arctan(float64 rFm);
+float64 float64_log(float64 rFm);
+float64 float64_tan(float64 rFm);
+float64 float64_arccos(float64 rFm);
+float64 float64_pow(float64 rFn,float64 rFm);
+float64 float64_pol(float64 rFn,float64 rFm);
+
+unsigned int DoubleCPDO(const unsigned int opcode)
+{
+   FPA11 *fpa11 = GET_FPA11();
+   float64 rFm, rFn = 0; //FIXME - should be zero?
+   unsigned int Fd, Fm, Fn, nRc = 1;
+
+   //printk("DoubleCPDO(0x%08x)\n",opcode);
+   
+   Fm = getFm(opcode);
+   if (CONSTANT_FM(opcode))
+   {
+     rFm = getDoubleConstant(Fm);
+   }
+   else
+   {  
+     switch (fpa11->fType[Fm])
+     {
+        case typeSingle:
+          rFm = float32_to_float64(fpa11->fpreg[Fm].fSingle);
+        break;
+
+        case typeDouble:
+          rFm = fpa11->fpreg[Fm].fDouble;
+          break;
+
+        case typeExtended:
+            // !! patb
+	    //printk("not implemented! why not?\n");
+            //!! ScottB
+            // should never get here, if extended involved
+            // then other operand should be promoted then
+            // ExtendedCPDO called.
+            break;
+
+        default: return 0;
+     }
+   }
+
+   if (!MONADIC_INSTRUCTION(opcode))
+   {
+      Fn = getFn(opcode);
+      switch (fpa11->fType[Fn])
+      {
+        case typeSingle:
+          rFn = float32_to_float64(fpa11->fpreg[Fn].fSingle);
+        break;
+
+        case typeDouble:
+          rFn = fpa11->fpreg[Fn].fDouble;
+        break;
+        
+        default: return 0;
+      }
+   }
+
+   Fd = getFd(opcode);
+   /* !! this switch isn't optimized; better (opcode & MASK_ARITHMETIC_OPCODE)>>24, sort of */
+   switch (opcode & MASK_ARITHMETIC_OPCODE)
+   {
+      /* dyadic opcodes */
+      case ADF_CODE:
+         fpa11->fpreg[Fd].fDouble = float64_add(rFn,rFm);
+      break;
+
+      case MUF_CODE:
+      case FML_CODE:
+         fpa11->fpreg[Fd].fDouble = float64_mul(rFn,rFm);
+      break;
+
+      case SUF_CODE:
+         fpa11->fpreg[Fd].fDouble = float64_sub(rFn,rFm);
+      break;
+
+      case RSF_CODE:
+         fpa11->fpreg[Fd].fDouble = float64_sub(rFm,rFn);
+      break;
+
+      case DVF_CODE:
+      case FDV_CODE:
+         fpa11->fpreg[Fd].fDouble = float64_div(rFn,rFm);
+      break;
+
+      case RDF_CODE:
+      case FRD_CODE:
+         fpa11->fpreg[Fd].fDouble = float64_div(rFm,rFn);
+      break;
+
+#if 0
+      case POW_CODE:
+         fpa11->fpreg[Fd].fDouble = float64_pow(rFn,rFm);
+      break;
+
+      case RPW_CODE:
+         fpa11->fpreg[Fd].fDouble = float64_pow(rFm,rFn);
+      break;
+#endif
+
+      case RMF_CODE:
+         fpa11->fpreg[Fd].fDouble = float64_rem(rFn,rFm);
+      break;
+
+#if 0
+      case POL_CODE:
+         fpa11->fpreg[Fd].fDouble = float64_pol(rFn,rFm);
+      break;
+#endif
+
+      /* monadic opcodes */
+      case MVF_CODE:
+         fpa11->fpreg[Fd].fDouble = rFm;
+      break;
+
+      case MNF_CODE:
+      {
+         unsigned int *p = (unsigned int*)&rFm;
+         p[1] ^= 0x80000000;
+         fpa11->fpreg[Fd].fDouble = rFm;
+      }
+      break;
+
+      case ABS_CODE:
+      {
+         unsigned int *p = (unsigned int*)&rFm;
+         p[1] &= 0x7fffffff;
+         fpa11->fpreg[Fd].fDouble = rFm;
+      }
+      break;
+
+      case RND_CODE:
+      case URD_CODE:
+         fpa11->fpreg[Fd].fDouble = float64_round_to_int(rFm);
+      break;
+
+      case SQT_CODE:
+         fpa11->fpreg[Fd].fDouble = float64_sqrt(rFm);
+      break;
+
+#if 0
+      case LOG_CODE:
+         fpa11->fpreg[Fd].fDouble = float64_log(rFm);
+      break;
+
+      case LGN_CODE:
+         fpa11->fpreg[Fd].fDouble = float64_ln(rFm);
+      break;
+
+      case EXP_CODE:
+         fpa11->fpreg[Fd].fDouble = float64_exp(rFm);
+      break;
+
+      case SIN_CODE:
+         fpa11->fpreg[Fd].fDouble = float64_sin(rFm);
+      break;
+
+      case COS_CODE:
+         fpa11->fpreg[Fd].fDouble = float64_cos(rFm);
+      break;
+
+      case TAN_CODE:
+         fpa11->fpreg[Fd].fDouble = float64_tan(rFm);
+      break;
+
+      case ASN_CODE:
+         fpa11->fpreg[Fd].fDouble = float64_arcsin(rFm);
+      break;
+
+      case ACS_CODE:
+         fpa11->fpreg[Fd].fDouble = float64_arccos(rFm);
+      break;
+
+      case ATN_CODE:
+         fpa11->fpreg[Fd].fDouble = float64_arctan(rFm);
+      break;
+#endif
+
+      case NRM_CODE:
+      break;
+      
+      default:
+      {
+        nRc = 0;
+      }
+   }
+
+   if (0 != nRc) fpa11->fType[Fd] = typeDouble;
+   return nRc;
+}
+
+#if 0
+float64 float64_exp(float64 rFm)
+{
+  return rFm;
+//series
+}
+
+float64 float64_ln(float64 rFm)
+{
+  return rFm;
+//series
+}
+
+float64 float64_sin(float64 rFm)
+{
+  return rFm;
+//series
+}
+
+float64 float64_cos(float64 rFm)
+{
+   return rFm;
+   //series
+}
+
+#if 0
+float64 float64_arcsin(float64 rFm)
+{
+//series
+}
+
+float64 float64_arctan(float64 rFm)
+{
+  //series
+}
+#endif
+
+float64 float64_log(float64 rFm)
+{
+  return float64_div(float64_ln(rFm),getDoubleConstant(7));
+}
+
+float64 float64_tan(float64 rFm)
+{
+  return float64_div(float64_sin(rFm),float64_cos(rFm));
+}
+
+float64 float64_arccos(float64 rFm)
+{
+return rFm;
+   //return float64_sub(halfPi,float64_arcsin(rFm));
+}
+
+float64 float64_pow(float64 rFn,float64 rFm)
+{
+  return float64_exp(float64_mul(rFm,float64_ln(rFn))); 
+}
+
+float64 float64_pol(float64 rFn,float64 rFm)
+{
+  return float64_arctan(float64_div(rFn,rFm)); 
+}
+#endif
diff --git a/arch/arm26/nwfpe/entry.S b/arch/arm26/nwfpe/entry.S
new file mode 100644
index 00000000000..7d6dfaad80c
--- /dev/null
+++ b/arch/arm26/nwfpe/entry.S
@@ -0,0 +1,114 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Rebel.COM, 1998
+    (c) Philip Blundell 1998-1999
+
+    Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include <asm/asm_offsets.h>
+
+/* This is the kernel's entry point into the floating point emulator.
+It is called from the kernel with code similar to this:
+
+	mov	fp, #0
+	teqp	pc, #PSR_I_BIT | MODE_SVC
+	ldr	r4, .LC2
+	ldr	pc, [r4]		@ Call FP module USR entry point
+
+The kernel expects the emulator to return via one of two possible
+points of return it passes to the emulator.  The emulator, if
+successful in its emulation, jumps to ret_from_exception and the
+kernel takes care of returning control from the trap to the user code.
+If the emulator is unable to emulate the instruction, it returns to
+fpundefinstr and the kernel halts the user program with a core dump.
+
+This routine does four things:
+
+1) It saves SP into a variable called userRegisters.  The kernel has
+created a struct pt_regs on the stack and saved the user registers
+into it.  See /usr/include/asm/proc/ptrace.h for details.  The
+emulator code uses userRegisters as the base of an array of words from
+which the contents of the registers can be extracted.
+
+2) It locates the FP emulator work area within the TSS structure and
+points `fpa11' to it.
+
+3) It calls EmulateAll to emulate a floating point instruction.
+EmulateAll returns 1 if the emulation was successful, or 0 if not.
+
+4) If an instruction has been emulated successfully, it looks ahead at
+the next instruction.  If it is a floating point instruction, it
+executes the instruction, without returning to user space.  In this
+way it repeatedly looks ahead and executes floating point instructions
+until it encounters a non floating point instruction, at which time it
+returns via _fpreturn.
+
+This is done to reduce the effect of the trap overhead on each
+floating point instructions.  GCC attempts to group floating point
+instructions to allow the emulator to spread the cost of the trap over
+several floating point instructions.  */
+
+	.globl	nwfpe_enter
+nwfpe_enter:
+	mov	sl, sp
+	bl	FPA11_CheckInit		@ check to see if we are initialised
+
+	ldr	r5, [sp, #60]		@ get contents of PC
+	bic	r5, r5, #0xfc000003
+	ldr	r0, [r5, #-4]		@ get actual instruction into r0
+	bl	EmulateAll		@ emulate the instruction
+1:	cmp	r0, #0			@ was emulation successful
+	beq	fpundefinstr		@ no, return failure
+
+next:
+.Lx1:	ldrt	r6, [r5], #4		@ get the next instruction and
+					@ increment PC
+
+	and	r2, r6, #0x0F000000	@ test for FP insns
+	teq	r2, #0x0C000000
+	teqne	r2, #0x0D000000
+	teqne	r2, #0x0E000000
+	bne	ret_from_exception	@ return ok if not a fp insn
+
+	ldr	r9, [sp, #60]		@ get new condition codes
+	and	r9, r9, #0xfc000003
+	orr	r7, r5, r9
+	str	r7, [sp, #60]		@ update PC copy in regs
+
+	mov	r0, r6			@ save a copy
+	mov	r1, r9			@ fetch the condition codes
+	bl	checkCondition		@ check the condition
+	cmp	r0, #0			@ r0 = 0 ==> condition failed
+
+	@ if condition code failed to match, next insn
+	beq	next			@ get the next instruction;
+	    
+	mov	r0, r6			@ prepare for EmulateAll()
+	adr	lr, 1b
+	orr	lr, lr, #3
+	b	EmulateAll		@ if r0 != 0, goto EmulateAll
+
+.Lret:	b	ret_from_exception	@ let the user eat segfaults
+	
+	@ We need to be prepared for the instruction at .Lx1 to fault.
+	@ Emit the appropriate exception gunk to fix things up.
+	.section __ex_table,"a"
+	.align	3
+	.long	.Lx1
+	ldr	lr, [lr, $(.Lret - .Lx1)/4]
+	.previous
diff --git a/arch/arm26/nwfpe/extended_cpdo.c b/arch/arm26/nwfpe/extended_cpdo.c
new file mode 100644
index 00000000000..331407596d9
--- /dev/null
+++ b/arch/arm26/nwfpe/extended_cpdo.c
@@ -0,0 +1,273 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Rebel.COM, 1998,1999
+
+    Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include "fpa11.h"
+#include "softfloat.h"
+#include "fpopcode.h"
+
+floatx80 floatx80_exp(floatx80 Fm);
+floatx80 floatx80_ln(floatx80 Fm);
+floatx80 floatx80_sin(floatx80 rFm);
+floatx80 floatx80_cos(floatx80 rFm);
+floatx80 floatx80_arcsin(floatx80 rFm);
+floatx80 floatx80_arctan(floatx80 rFm);
+floatx80 floatx80_log(floatx80 rFm);
+floatx80 floatx80_tan(floatx80 rFm);
+floatx80 floatx80_arccos(floatx80 rFm);
+floatx80 floatx80_pow(floatx80 rFn,floatx80 rFm);
+floatx80 floatx80_pol(floatx80 rFn,floatx80 rFm);
+
+unsigned int ExtendedCPDO(const unsigned int opcode)
+{
+   FPA11 *fpa11 = GET_FPA11();
+   floatx80 rFm, rFn;
+   unsigned int Fd, Fm, Fn, nRc = 1;
+
+   //printk("ExtendedCPDO(0x%08x)\n",opcode);
+   
+   Fm = getFm(opcode);
+   if (CONSTANT_FM(opcode))
+   {
+     rFm = getExtendedConstant(Fm);
+   }
+   else
+   {  
+     switch (fpa11->fType[Fm])
+     {
+        case typeSingle:
+          rFm = float32_to_floatx80(fpa11->fpreg[Fm].fSingle);
+        break;
+
+        case typeDouble:
+          rFm = float64_to_floatx80(fpa11->fpreg[Fm].fDouble);
+        break;
+        
+        case typeExtended:
+          rFm = fpa11->fpreg[Fm].fExtended;
+        break;
+        
+        default: return 0;
+     }
+   }
+   
+   if (!MONADIC_INSTRUCTION(opcode))
+   {
+      Fn = getFn(opcode);
+      switch (fpa11->fType[Fn])
+      {
+        case typeSingle:
+          rFn = float32_to_floatx80(fpa11->fpreg[Fn].fSingle);
+        break;
+
+        case typeDouble:
+          rFn = float64_to_floatx80(fpa11->fpreg[Fn].fDouble);
+        break;
+        
+        case typeExtended:
+          rFn = fpa11->fpreg[Fn].fExtended;
+        break;
+        
+        default: return 0;
+      }
+   }
+
+   Fd = getFd(opcode);
+   switch (opcode & MASK_ARITHMETIC_OPCODE)
+   {
+      /* dyadic opcodes */
+      case ADF_CODE:
+         fpa11->fpreg[Fd].fExtended = floatx80_add(rFn,rFm);
+      break;
+
+      case MUF_CODE:
+      case FML_CODE:
+         fpa11->fpreg[Fd].fExtended = floatx80_mul(rFn,rFm);
+      break;
+
+      case SUF_CODE:
+         fpa11->fpreg[Fd].fExtended = floatx80_sub(rFn,rFm);
+      break;
+
+      case RSF_CODE:
+         fpa11->fpreg[Fd].fExtended = floatx80_sub(rFm,rFn);
+      break;
+
+      case DVF_CODE:
+      case FDV_CODE:
+         fpa11->fpreg[Fd].fExtended = floatx80_div(rFn,rFm);
+      break;
+
+      case RDF_CODE:
+      case FRD_CODE:
+         fpa11->fpreg[Fd].fExtended = floatx80_div(rFm,rFn);
+      break;
+
+#if 0
+      case POW_CODE:
+         fpa11->fpreg[Fd].fExtended = floatx80_pow(rFn,rFm);
+      break;
+
+      case RPW_CODE:
+         fpa11->fpreg[Fd].fExtended = floatx80_pow(rFm,rFn);
+      break;
+#endif
+
+      case RMF_CODE:
+         fpa11->fpreg[Fd].fExtended = floatx80_rem(rFn,rFm);
+      break;
+
+#if 0
+      case POL_CODE:
+         fpa11->fpreg[Fd].fExtended = floatx80_pol(rFn,rFm);
+      break;
+#endif
+
+      /* monadic opcodes */
+      case MVF_CODE:
+         fpa11->fpreg[Fd].fExtended = rFm;
+      break;
+
+      case MNF_CODE:
+         rFm.high ^= 0x8000;
+         fpa11->fpreg[Fd].fExtended = rFm;
+      break;
+
+      case ABS_CODE:
+         rFm.high &= 0x7fff;
+         fpa11->fpreg[Fd].fExtended = rFm;
+      break;
+
+      case RND_CODE:
+      case URD_CODE:
+         fpa11->fpreg[Fd].fExtended = floatx80_round_to_int(rFm);
+      break;
+
+      case SQT_CODE:
+         fpa11->fpreg[Fd].fExtended = floatx80_sqrt(rFm);
+      break;
+
+#if 0
+      case LOG_CODE:
+         fpa11->fpreg[Fd].fExtended = floatx80_log(rFm);
+      break;
+
+      case LGN_CODE:
+         fpa11->fpreg[Fd].fExtended = floatx80_ln(rFm);
+      break;
+
+      case EXP_CODE:
+         fpa11->fpreg[Fd].fExtended = floatx80_exp(rFm);
+      break;
+
+      case SIN_CODE:
+         fpa11->fpreg[Fd].fExtended = floatx80_sin(rFm);
+      break;
+
+      case COS_CODE:
+         fpa11->fpreg[Fd].fExtended = floatx80_cos(rFm);
+      break;
+
+      case TAN_CODE:
+         fpa11->fpreg[Fd].fExtended = floatx80_tan(rFm);
+      break;
+
+      case ASN_CODE:
+         fpa11->fpreg[Fd].fExtended = floatx80_arcsin(rFm);
+      break;
+
+      case ACS_CODE:
+         fpa11->fpreg[Fd].fExtended = floatx80_arccos(rFm);
+      break;
+
+      case ATN_CODE:
+         fpa11->fpreg[Fd].fExtended = floatx80_arctan(rFm);
+      break;
+#endif
+
+      case NRM_CODE:
+      break;
+      
+      default:
+      {
+        nRc = 0;
+      }
+   }
+   
+   if (0 != nRc) fpa11->fType[Fd] = typeExtended;
+   return nRc;
+}
+
+#if 0
+floatx80 floatx80_exp(floatx80 Fm)
+{
+//series
+}
+
+floatx80 floatx80_ln(floatx80 Fm)
+{
+//series
+}
+
+floatx80 floatx80_sin(floatx80 rFm)
+{
+//series
+}
+
+floatx80 floatx80_cos(floatx80 rFm)
+{
+//series
+}
+
+floatx80 floatx80_arcsin(floatx80 rFm)
+{
+//series
+}
+
+floatx80 floatx80_arctan(floatx80 rFm)
+{
+  //series
+}
+
+floatx80 floatx80_log(floatx80 rFm)
+{
+  return floatx80_div(floatx80_ln(rFm),getExtendedConstant(7));
+}
+
+floatx80 floatx80_tan(floatx80 rFm)
+{
+  return floatx80_div(floatx80_sin(rFm),floatx80_cos(rFm));
+}
+
+floatx80 floatx80_arccos(floatx80 rFm)
+{
+   //return floatx80_sub(halfPi,floatx80_arcsin(rFm));
+}
+
+floatx80 floatx80_pow(floatx80 rFn,floatx80 rFm)
+{
+  return floatx80_exp(floatx80_mul(rFm,floatx80_ln(rFn))); 
+}
+
+floatx80 floatx80_pol(floatx80 rFn,floatx80 rFm)
+{
+  return floatx80_arctan(floatx80_div(rFn,rFm)); 
+}
+#endif
diff --git a/arch/arm26/nwfpe/fpa11.c b/arch/arm26/nwfpe/fpa11.c
new file mode 100644
index 00000000000..e954540a946
--- /dev/null
+++ b/arch/arm26/nwfpe/fpa11.c
@@ -0,0 +1,221 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Rebel.COM, 1998,1999
+
+    Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include "fpa11.h"
+#include "fpopcode.h"
+
+#include "fpmodule.h"
+#include "fpmodule.inl"
+
+#include <linux/compiler.h>
+#include <asm/system.h>
+
+/* forward declarations */
+unsigned int EmulateCPDO(const unsigned int);
+unsigned int EmulateCPDT(const unsigned int);
+unsigned int EmulateCPRT(const unsigned int);
+
+/* Reset the FPA11 chip.  Called to initialize and reset the emulator. */
+void resetFPA11(void)
+{
+  int i;
+  FPA11 *fpa11 = GET_FPA11();
+  
+  /* initialize the register type array */
+  for (i=0;i<=7;i++)
+  {
+    fpa11->fType[i] = typeNone;
+  }
+  
+  /* FPSR: set system id to FP_EMULATOR, set AC, clear all other bits */
+  fpa11->fpsr = FP_EMULATOR | BIT_AC;
+  
+  /* FPCR: set SB, AB and DA bits, clear all others */
+#if MAINTAIN_FPCR
+  fpa11->fpcr = MASK_RESET;
+#endif
+}
+
+void SetRoundingMode(const unsigned int opcode)
+{
+#if MAINTAIN_FPCR
+   FPA11 *fpa11 = GET_FPA11();
+   fpa11->fpcr &= ~MASK_ROUNDING_MODE;
+#endif   
+   switch (opcode & MASK_ROUNDING_MODE)
+   {
+      default:
+      case ROUND_TO_NEAREST:
+         float_rounding_mode = float_round_nearest_even;
+#if MAINTAIN_FPCR         
+         fpa11->fpcr |= ROUND_TO_NEAREST;
+#endif         
+      break;
+      
+      case ROUND_TO_PLUS_INFINITY:
+         float_rounding_mode = float_round_up;
+#if MAINTAIN_FPCR         
+         fpa11->fpcr |= ROUND_TO_PLUS_INFINITY;
+#endif         
+      break;
+      
+      case ROUND_TO_MINUS_INFINITY:
+         float_rounding_mode = float_round_down;
+#if MAINTAIN_FPCR         
+         fpa11->fpcr |= ROUND_TO_MINUS_INFINITY;
+#endif         
+      break;
+      
+      case ROUND_TO_ZERO:
+         float_rounding_mode = float_round_to_zero;
+#if MAINTAIN_FPCR         
+         fpa11->fpcr |= ROUND_TO_ZERO;
+#endif         
+      break;
+  }
+}
+
+void SetRoundingPrecision(const unsigned int opcode)
+{
+#if MAINTAIN_FPCR
+   FPA11 *fpa11 = GET_FPA11();
+   fpa11->fpcr &= ~MASK_ROUNDING_PRECISION;
+#endif   
+   switch (opcode & MASK_ROUNDING_PRECISION)
+   {
+      case ROUND_SINGLE:
+         floatx80_rounding_precision = 32;
+#if MAINTAIN_FPCR         
+         fpa11->fpcr |= ROUND_SINGLE;
+#endif         
+      break;
+      
+      case ROUND_DOUBLE:
+         floatx80_rounding_precision = 64;
+#if MAINTAIN_FPCR         
+         fpa11->fpcr |= ROUND_DOUBLE;
+#endif         
+      break;
+      
+      case ROUND_EXTENDED:
+         floatx80_rounding_precision = 80;
+#if MAINTAIN_FPCR         
+         fpa11->fpcr |= ROUND_EXTENDED;
+#endif         
+      break;
+      
+      default: floatx80_rounding_precision = 80;
+  }
+}
+
+void FPA11_CheckInit(void)
+{
+  FPA11 *fpa11 = GET_FPA11();
+  if (unlikely(fpa11->initflag == 0))
+  {
+    resetFPA11();
+    SetRoundingMode(ROUND_TO_NEAREST);
+    SetRoundingPrecision(ROUND_EXTENDED);
+    fpa11->initflag = 1;
+  }
+}
+
+/* Emulate the instruction in the opcode. */
+unsigned int EmulateAll(unsigned int opcode)
+{
+  unsigned int nRc = 1, code;
+
+  code = opcode & 0x00000f00;
+  if (code == 0x00000100 || code == 0x00000200)
+  {
+    /* For coprocessor 1 or 2 (FPA11) */
+    code = opcode & 0x0e000000;
+    if (code == 0x0e000000)
+    {
+      if (opcode & 0x00000010)
+      {
+        /* Emulate conversion opcodes. */
+        /* Emulate register transfer opcodes. */
+        /* Emulate comparison opcodes. */
+        nRc = EmulateCPRT(opcode);
+      }
+      else
+      {
+        /* Emulate monadic arithmetic opcodes. */
+        /* Emulate dyadic arithmetic opcodes. */
+        nRc = EmulateCPDO(opcode);
+      }
+    }
+    else if (code == 0x0c000000)
+    {
+      /* Emulate load/store opcodes. */
+      /* Emulate load/store multiple opcodes. */
+      nRc = EmulateCPDT(opcode);
+    }
+    else
+    {
+      /* Invalid instruction detected.  Return FALSE. */
+      nRc = 0;
+    }
+  }
+
+  return(nRc);
+}
+
+#if 0
+unsigned int EmulateAll1(unsigned int opcode)
+{
+  switch ((opcode >> 24) & 0xf)
+  {
+     case 0xc:
+     case 0xd:
+       if ((opcode >> 20) & 0x1)
+       {
+          switch ((opcode >> 8) & 0xf)
+          {
+             case 0x1: return PerformLDF(opcode); break;
+             case 0x2: return PerformLFM(opcode); break;
+             default: return 0;
+          }
+       }
+       else
+       {
+          switch ((opcode >> 8) & 0xf)
+          {
+             case 0x1: return PerformSTF(opcode); break;
+             case 0x2: return PerformSFM(opcode); break;
+             default: return 0;
+          }
+      }
+     break;
+     
+     case 0xe: 
+       if (opcode & 0x10)
+         return EmulateCPDO(opcode);
+       else
+         return EmulateCPRT(opcode);
+     break;
+  
+     default: return 0;
+  }
+}
+#endif
+
diff --git a/arch/arm26/nwfpe/fpa11.h b/arch/arm26/nwfpe/fpa11.h
new file mode 100644
index 00000000000..be09902a211
--- /dev/null
+++ b/arch/arm26/nwfpe/fpa11.h
@@ -0,0 +1,87 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Rebel.com, 1998-1999
+    
+    Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#ifndef __FPA11_H__
+#define __FPA11_H__
+
+#define GET_FPA11() ((FPA11 *)(&current_thread_info()->fpstate))
+
+/*
+ * The processes registers are always at the very top of the 8K
+ * stack+task struct.  Use the same method as 'current' uses to
+ * reach them.
+ */
+register unsigned int *user_registers asm("sl");
+
+#define GET_USERREG() (user_registers)
+
+#include <linux/thread_info.h>
+
+/* includes */
+#include "fpsr.h"		/* FP control and status register definitions */
+#include "softfloat.h"
+
+#define		typeNone		0x00
+#define		typeSingle		0x01
+#define		typeDouble		0x02
+#define		typeExtended		0x03
+
+/*
+ * This must be no more and no less than 12 bytes.
+ */
+typedef union tagFPREG {
+   floatx80 fExtended;
+   float64  fDouble;
+   float32  fSingle;
+} FPREG;
+
+/*
+ * FPA11 device model.
+ *
+ * This structure is exported to user space.  Do not re-order.
+ * Only add new stuff to the end, and do not change the size of
+ * any element.  Elements of this structure are used by user
+ * space, and must match struct user_fp in include/asm-arm/user.h.
+ * We include the byte offsets below for documentation purposes.
+ *
+ * The size of this structure and FPREG are checked by fpmodule.c
+ * on initialisation.  If the rules have been broken, NWFPE will
+ * not initialise.
+ */
+typedef struct tagFPA11 {
+/*   0 */  FPREG fpreg[8];		/* 8 floating point registers */
+/*  96 */  FPSR fpsr;			/* floating point status register */
+/* 100 */  FPCR fpcr;			/* floating point control register */
+/* 104 */  unsigned char fType[8];	/* type of floating point value held in
+					   floating point registers.  One of none
+					   single, double or extended. */
+/* 112 */  int initflag;		/* this is special.  The kernel guarantees
+					   to set it to 0 when a thread is launched,
+					   so we can use it to detect whether this
+					   instance of the emulator needs to be
+					   initialised. */
+} FPA11;
+
+extern void resetFPA11(void);
+extern void SetRoundingMode(const unsigned int);
+extern void SetRoundingPrecision(const unsigned int);
+
+#endif
diff --git a/arch/arm26/nwfpe/fpa11.inl b/arch/arm26/nwfpe/fpa11.inl
new file mode 100644
index 00000000000..1c45cba2de6
--- /dev/null
+++ b/arch/arm26/nwfpe/fpa11.inl
@@ -0,0 +1,51 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Rebel.COM, 1998,1999
+
+    Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include "fpa11.h"
+
+/* Read and write floating point status register */
+extern __inline__ unsigned int readFPSR(void)
+{
+  FPA11 *fpa11 = GET_FPA11();
+  return(fpa11->fpsr);
+}
+
+extern __inline__ void writeFPSR(FPSR reg)
+{
+  FPA11 *fpa11 = GET_FPA11();
+  /* the sysid byte in the status register is readonly */
+  fpa11->fpsr = (fpa11->fpsr & MASK_SYSID) | (reg & ~MASK_SYSID);
+}
+
+/* Read and write floating point control register */
+extern __inline__ FPCR readFPCR(void)
+{
+  FPA11 *fpa11 = GET_FPA11();
+  /* clear SB, AB and DA bits before returning FPCR */
+  return(fpa11->fpcr & ~MASK_RFC);
+}
+
+extern __inline__ void writeFPCR(FPCR reg)
+{
+  FPA11 *fpa11 = GET_FPA11();
+  fpa11->fpcr &= ~MASK_WFC;		/* clear SB, AB and DA bits */
+  fpa11->fpcr |= (reg & MASK_WFC);	/* write SB, AB and DA bits */
+}
diff --git a/arch/arm26/nwfpe/fpa11_cpdo.c b/arch/arm26/nwfpe/fpa11_cpdo.c
new file mode 100644
index 00000000000..343a6b9fd52
--- /dev/null
+++ b/arch/arm26/nwfpe/fpa11_cpdo.c
@@ -0,0 +1,117 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Rebel.COM, 1998,1999
+
+    Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include "fpa11.h"
+#include "fpopcode.h"
+
+unsigned int SingleCPDO(const unsigned int opcode);
+unsigned int DoubleCPDO(const unsigned int opcode);
+unsigned int ExtendedCPDO(const unsigned int opcode);
+
+unsigned int EmulateCPDO(const unsigned int opcode)
+{
+   FPA11 *fpa11 = GET_FPA11();
+   unsigned int Fd, nType, nDest, nRc = 1;
+   
+   //printk("EmulateCPDO(0x%08x)\n",opcode);
+
+   /* Get the destination size.  If not valid let Linux perform
+      an invalid instruction trap. */
+   nDest = getDestinationSize(opcode);
+   if (typeNone == nDest) return 0;
+   
+   SetRoundingMode(opcode);
+     
+   /* Compare the size of the operands in Fn and Fm.
+      Choose the largest size and perform operations in that size,
+      in order to make use of all the precision of the operands. 
+      If Fm is a constant, we just grab a constant of a size 
+      matching the size of the operand in Fn. */
+   if (MONADIC_INSTRUCTION(opcode))
+     nType = nDest;
+   else
+     nType = fpa11->fType[getFn(opcode)];
+   
+   if (!CONSTANT_FM(opcode))
+   {
+     register unsigned int Fm = getFm(opcode);
+     if (nType < fpa11->fType[Fm])
+     {
+        nType = fpa11->fType[Fm];
+     }
+   }
+
+   switch (nType)
+   {
+      case typeSingle   : nRc = SingleCPDO(opcode);   break;
+      case typeDouble   : nRc = DoubleCPDO(opcode);   break;
+      case typeExtended : nRc = ExtendedCPDO(opcode); break;
+      default           : nRc = 0;
+   }
+
+   /* If the operation succeeded, check to see if the result in the
+      destination register is the correct size.  If not force it
+      to be. */
+   Fd = getFd(opcode);
+   nType = fpa11->fType[Fd];
+   if ((0 != nRc) && (nDest != nType))
+   {
+     switch (nDest)
+     {
+       case typeSingle:
+       {
+         if (typeDouble == nType)
+           fpa11->fpreg[Fd].fSingle = 
+              float64_to_float32(fpa11->fpreg[Fd].fDouble);
+         else
+           fpa11->fpreg[Fd].fSingle = 
+              floatx80_to_float32(fpa11->fpreg[Fd].fExtended);
+       }
+       break;
+          
+       case typeDouble:
+       {
+         if (typeSingle == nType)
+           fpa11->fpreg[Fd].fDouble = 
+              float32_to_float64(fpa11->fpreg[Fd].fSingle);
+         else
+           fpa11->fpreg[Fd].fDouble = 
+              floatx80_to_float64(fpa11->fpreg[Fd].fExtended);
+       }
+       break;
+          
+       case typeExtended:
+       {
+         if (typeSingle == nType)
+           fpa11->fpreg[Fd].fExtended = 
+              float32_to_floatx80(fpa11->fpreg[Fd].fSingle);
+         else
+           fpa11->fpreg[Fd].fExtended = 
+              float64_to_floatx80(fpa11->fpreg[Fd].fDouble);
+       }
+       break;
+     }
+     
+     fpa11->fType[Fd] = nDest;
+   }
+   
+   return nRc;
+}
diff --git a/arch/arm26/nwfpe/fpa11_cpdt.c b/arch/arm26/nwfpe/fpa11_cpdt.c
new file mode 100644
index 00000000000..e12db7c51a7
--- /dev/null
+++ b/arch/arm26/nwfpe/fpa11_cpdt.c
@@ -0,0 +1,368 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Rebel.com, 1998-1999
+    (c) Philip Blundell, 1998
+
+    Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include "fpa11.h"
+#include "softfloat.h"
+#include "fpopcode.h"
+#include "fpmodule.h"
+#include "fpmodule.inl"
+
+#include <asm/uaccess.h>
+
+static inline
+void loadSingle(const unsigned int Fn,const unsigned int *pMem)
+{
+   FPA11 *fpa11 = GET_FPA11();
+   fpa11->fType[Fn] = typeSingle;
+   get_user(fpa11->fpreg[Fn].fSingle, pMem);
+}
+
+static inline
+void loadDouble(const unsigned int Fn,const unsigned int *pMem)
+{
+   FPA11 *fpa11 = GET_FPA11();
+   unsigned int *p;
+   p = (unsigned int*)&fpa11->fpreg[Fn].fDouble;
+   fpa11->fType[Fn] = typeDouble;
+   get_user(p[0], &pMem[1]);
+   get_user(p[1], &pMem[0]); /* sign & exponent */
+}
+
+static inline
+void loadExtended(const unsigned int Fn,const unsigned int *pMem)
+{
+   FPA11 *fpa11 = GET_FPA11();
+   unsigned int *p;
+   p = (unsigned int*)&fpa11->fpreg[Fn].fExtended;
+   fpa11->fType[Fn] = typeExtended;
+   get_user(p[0], &pMem[0]);  /* sign & exponent */
+   get_user(p[1], &pMem[2]);  /* ls bits */
+   get_user(p[2], &pMem[1]);  /* ms bits */
+}
+
+static inline
+void loadMultiple(const unsigned int Fn,const unsigned int *pMem)
+{
+   FPA11 *fpa11 = GET_FPA11();
+   register unsigned int *p;
+   unsigned long x;
+
+   p = (unsigned int*)&(fpa11->fpreg[Fn]);
+   get_user(x, &pMem[0]);
+   fpa11->fType[Fn] = (x >> 14) & 0x00000003;
+
+   switch (fpa11->fType[Fn])
+   {
+      case typeSingle:
+      case typeDouble:
+      {
+         get_user(p[0], &pMem[2]);  /* Single */
+         get_user(p[1], &pMem[1]);  /* double msw */
+         p[2] = 0;        /* empty */
+      }
+      break;
+
+      case typeExtended:
+      {
+         get_user(p[1], &pMem[2]);
+         get_user(p[2], &pMem[1]);  /* msw */
+         p[0] = (x & 0x80003fff);
+      }
+      break;
+   }
+}
+
+static inline
+void storeSingle(const unsigned int Fn,unsigned int *pMem)
+{
+   FPA11 *fpa11 = GET_FPA11();
+   union
+   {
+     float32 f;
+     unsigned int i[1];
+   } val;
+
+   switch (fpa11->fType[Fn])
+   {
+      case typeDouble:
+         val.f = float64_to_float32(fpa11->fpreg[Fn].fDouble);
+      break;
+
+      case typeExtended:
+         val.f = floatx80_to_float32(fpa11->fpreg[Fn].fExtended);
+      break;
+
+      default: val.f = fpa11->fpreg[Fn].fSingle;
+   }
+
+   put_user(val.i[0], pMem);
+}
+
+static inline
+void storeDouble(const unsigned int Fn,unsigned int *pMem)
+{
+   FPA11 *fpa11 = GET_FPA11();
+   union
+   {
+     float64 f;
+     unsigned int i[2];
+   } val;
+
+   switch (fpa11->fType[Fn])
+   {
+      case typeSingle:
+         val.f = float32_to_float64(fpa11->fpreg[Fn].fSingle);
+      break;
+
+      case typeExtended:
+         val.f = floatx80_to_float64(fpa11->fpreg[Fn].fExtended);
+      break;
+
+      default: val.f = fpa11->fpreg[Fn].fDouble;
+   }
+
+   put_user(val.i[1], &pMem[0]);	/* msw */
+   put_user(val.i[0], &pMem[1]);	/* lsw */
+}
+
+static inline
+void storeExtended(const unsigned int Fn,unsigned int *pMem)
+{
+   FPA11 *fpa11 = GET_FPA11();
+   union
+   {
+     floatx80 f;
+     unsigned int i[3];
+   } val;
+
+   switch (fpa11->fType[Fn])
+   {
+      case typeSingle:
+         val.f = float32_to_floatx80(fpa11->fpreg[Fn].fSingle);
+      break;
+
+      case typeDouble:
+         val.f = float64_to_floatx80(fpa11->fpreg[Fn].fDouble);
+      break;
+
+      default: val.f = fpa11->fpreg[Fn].fExtended;
+   }
+
+   put_user(val.i[0], &pMem[0]); /* sign & exp */
+   put_user(val.i[1], &pMem[2]);
+   put_user(val.i[2], &pMem[1]); /* msw */
+}
+
+static inline
+void storeMultiple(const unsigned int Fn,unsigned int *pMem)
+{
+   FPA11 *fpa11 = GET_FPA11();
+   register unsigned int nType, *p;
+
+   p = (unsigned int*)&(fpa11->fpreg[Fn]);
+   nType = fpa11->fType[Fn];
+
+   switch (nType)
+   {
+      case typeSingle:
+      case typeDouble:
+      {
+	 put_user(p[0], &pMem[2]); /* single */
+	 put_user(p[1], &pMem[1]); /* double msw */
+	 put_user(nType << 14, &pMem[0]);
+      }
+      break;
+
+      case typeExtended:
+      {
+	 put_user(p[2], &pMem[1]); /* msw */
+	 put_user(p[1], &pMem[2]);
+	 put_user((p[0] & 0x80003fff) | (nType << 14), &pMem[0]);
+      }
+      break;
+   }
+}
+
+unsigned int PerformLDF(const unsigned int opcode)
+{
+   unsigned int *pBase, *pAddress, *pFinal, nRc = 1,
+     write_back = WRITE_BACK(opcode);
+
+   //printk("PerformLDF(0x%08x), Fd = 0x%08x\n",opcode,getFd(opcode));
+
+   pBase = (unsigned int*)readRegister(getRn(opcode));
+   if (REG_PC == getRn(opcode))
+   {
+     pBase += 2;
+     write_back = 0;
+   }
+
+   pFinal = pBase;
+   if (BIT_UP_SET(opcode))
+     pFinal += getOffset(opcode);
+   else
+     pFinal -= getOffset(opcode);
+
+   if (PREINDEXED(opcode)) pAddress = pFinal; else pAddress = pBase;
+
+   switch (opcode & MASK_TRANSFER_LENGTH)
+   {
+      case TRANSFER_SINGLE  : loadSingle(getFd(opcode),pAddress);   break;
+      case TRANSFER_DOUBLE  : loadDouble(getFd(opcode),pAddress);   break;
+      case TRANSFER_EXTENDED: loadExtended(getFd(opcode),pAddress); break;
+      default: nRc = 0;
+   }
+
+   if (write_back) writeRegister(getRn(opcode),(unsigned int)pFinal);
+   return nRc;
+}
+
+unsigned int PerformSTF(const unsigned int opcode)
+{
+   unsigned int *pBase, *pAddress, *pFinal, nRc = 1,
+     write_back = WRITE_BACK(opcode);
+
+   //printk("PerformSTF(0x%08x), Fd = 0x%08x\n",opcode,getFd(opcode));
+   SetRoundingMode(ROUND_TO_NEAREST);
+
+   pBase = (unsigned int*)readRegister(getRn(opcode));
+   if (REG_PC == getRn(opcode))
+   {
+     pBase += 2;
+     write_back = 0;
+   }
+
+   pFinal = pBase;
+   if (BIT_UP_SET(opcode))
+     pFinal += getOffset(opcode);
+   else
+     pFinal -= getOffset(opcode);
+
+   if (PREINDEXED(opcode)) pAddress = pFinal; else pAddress = pBase;
+
+   switch (opcode & MASK_TRANSFER_LENGTH)
+   {
+      case TRANSFER_SINGLE  : storeSingle(getFd(opcode),pAddress);   break;
+      case TRANSFER_DOUBLE  : storeDouble(getFd(opcode),pAddress);   break;
+      case TRANSFER_EXTENDED: storeExtended(getFd(opcode),pAddress); break;
+      default: nRc = 0;
+   }
+
+   if (write_back) writeRegister(getRn(opcode),(unsigned int)pFinal);
+   return nRc;
+}
+
+unsigned int PerformLFM(const unsigned int opcode)
+{
+   unsigned int i, Fd, *pBase, *pAddress, *pFinal,
+     write_back = WRITE_BACK(opcode);
+
+   pBase = (unsigned int*)readRegister(getRn(opcode));
+   if (REG_PC == getRn(opcode))
+   {
+     pBase += 2;
+     write_back = 0;
+   }
+
+   pFinal = pBase;
+   if (BIT_UP_SET(opcode))
+     pFinal += getOffset(opcode);
+   else
+     pFinal -= getOffset(opcode);
+
+   if (PREINDEXED(opcode)) pAddress = pFinal; else pAddress = pBase;
+
+   Fd = getFd(opcode);
+   for (i=getRegisterCount(opcode);i>0;i--)
+   {
+     loadMultiple(Fd,pAddress);
+     pAddress += 3; Fd++;
+     if (Fd == 8) Fd = 0;
+   }
+
+   if (write_back) writeRegister(getRn(opcode),(unsigned int)pFinal);
+   return 1;
+}
+
+unsigned int PerformSFM(const unsigned int opcode)
+{
+   unsigned int i, Fd, *pBase, *pAddress, *pFinal,
+     write_back = WRITE_BACK(opcode);
+
+   pBase = (unsigned int*)readRegister(getRn(opcode));
+   if (REG_PC == getRn(opcode))
+   {
+     pBase += 2;
+     write_back = 0;
+   }
+
+   pFinal = pBase;
+   if (BIT_UP_SET(opcode))
+     pFinal += getOffset(opcode);
+   else
+     pFinal -= getOffset(opcode);
+
+   if (PREINDEXED(opcode)) pAddress = pFinal; else pAddress = pBase;
+
+   Fd = getFd(opcode);
+   for (i=getRegisterCount(opcode);i>0;i--)
+   {
+     storeMultiple(Fd,pAddress);
+     pAddress += 3; Fd++;
+     if (Fd == 8) Fd = 0;
+   }
+
+   if (write_back) writeRegister(getRn(opcode),(unsigned int)pFinal);
+   return 1;
+}
+
+#if 1
+unsigned int EmulateCPDT(const unsigned int opcode)
+{
+  unsigned int nRc = 0;
+
+  //printk("EmulateCPDT(0x%08x)\n",opcode);
+
+  if (LDF_OP(opcode))
+  {
+    nRc = PerformLDF(opcode);
+  }
+  else if (LFM_OP(opcode))
+  {
+    nRc = PerformLFM(opcode);
+  }
+  else if (STF_OP(opcode))
+  {
+    nRc = PerformSTF(opcode);
+  }
+  else if (SFM_OP(opcode))
+  {
+    nRc = PerformSFM(opcode);
+  }
+  else
+  {
+    nRc = 0;
+  }
+
+  return nRc;
+}
+#endif
diff --git a/arch/arm26/nwfpe/fpa11_cprt.c b/arch/arm26/nwfpe/fpa11_cprt.c
new file mode 100644
index 00000000000..a201076c1f1
--- /dev/null
+++ b/arch/arm26/nwfpe/fpa11_cprt.c
@@ -0,0 +1,289 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Rebel.COM, 1998,1999
+    (c) Philip Blundell, 1999
+
+    Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include "fpa11.h"
+#include "milieu.h"
+#include "softfloat.h"
+#include "fpopcode.h"
+#include "fpa11.inl"
+#include "fpmodule.h"
+#include "fpmodule.inl"
+
+extern flag floatx80_is_nan(floatx80);
+extern flag float64_is_nan( float64);
+extern flag float32_is_nan( float32);
+
+void SetRoundingMode(const unsigned int opcode);
+
+unsigned int PerformFLT(const unsigned int opcode);
+unsigned int PerformFIX(const unsigned int opcode);
+
+static unsigned int
+PerformComparison(const unsigned int opcode);
+
+unsigned int EmulateCPRT(const unsigned int opcode)
+{
+  unsigned int nRc = 1;
+
+  //printk("EmulateCPRT(0x%08x)\n",opcode);
+
+  if (opcode & 0x800000)
+  {
+     /* This is some variant of a comparison (PerformComparison will
+	sort out which one).  Since most of the other CPRT
+	instructions are oddball cases of some sort or other it makes
+	sense to pull this out into a fast path.  */
+     return PerformComparison(opcode);
+  }
+
+  /* Hint to GCC that we'd like a jump table rather than a load of CMPs */
+  switch ((opcode & 0x700000) >> 20)
+  {
+    case  FLT_CODE >> 20: nRc = PerformFLT(opcode); break;
+    case  FIX_CODE >> 20: nRc = PerformFIX(opcode); break;
+    
+    case  WFS_CODE >> 20: writeFPSR(readRegister(getRd(opcode))); break;
+    case  RFS_CODE >> 20: writeRegister(getRd(opcode),readFPSR()); break;
+
+#if 0    /* We currently have no use for the FPCR, so there's no point
+	    in emulating it. */
+    case  WFC_CODE >> 20: writeFPCR(readRegister(getRd(opcode)));
+    case  RFC_CODE >> 20: writeRegister(getRd(opcode),readFPCR()); break;
+#endif
+
+    default: nRc = 0;
+  }
+  
+  return nRc;
+}
+
+unsigned int PerformFLT(const unsigned int opcode)
+{
+   FPA11 *fpa11 = GET_FPA11();
+   
+   unsigned int nRc = 1;
+   SetRoundingMode(opcode);
+
+   switch (opcode & MASK_ROUNDING_PRECISION)
+   {
+      case ROUND_SINGLE:
+      {
+        fpa11->fType[getFn(opcode)] = typeSingle;
+        fpa11->fpreg[getFn(opcode)].fSingle =
+	   int32_to_float32(readRegister(getRd(opcode)));
+      }
+      break;
+
+      case ROUND_DOUBLE:
+      {
+        fpa11->fType[getFn(opcode)] = typeDouble;
+        fpa11->fpreg[getFn(opcode)].fDouble =
+            int32_to_float64(readRegister(getRd(opcode)));
+      }
+      break;
+        
+      case ROUND_EXTENDED:
+      {
+        fpa11->fType[getFn(opcode)] = typeExtended;
+        fpa11->fpreg[getFn(opcode)].fExtended =
+	   int32_to_floatx80(readRegister(getRd(opcode)));
+      }
+      break;
+      
+      default: nRc = 0;
+  }
+  
+  return nRc;
+}
+
+unsigned int PerformFIX(const unsigned int opcode)
+{
+   FPA11 *fpa11 = GET_FPA11();
+   unsigned int nRc = 1;
+   unsigned int Fn = getFm(opcode);
+   
+   SetRoundingMode(opcode);
+
+   switch (fpa11->fType[Fn])
+   {
+      case typeSingle:
+      {
+         writeRegister(getRd(opcode),
+	               float32_to_int32(fpa11->fpreg[Fn].fSingle));
+      }
+      break;
+
+      case typeDouble:
+      {
+         writeRegister(getRd(opcode),
+	               float64_to_int32(fpa11->fpreg[Fn].fDouble));
+      }
+      break;
+      	               
+      case typeExtended:
+      {
+         writeRegister(getRd(opcode),
+	               floatx80_to_int32(fpa11->fpreg[Fn].fExtended));
+      }
+      break;
+      
+      default: nRc = 0;
+  }
+  
+  return nRc;
+}
+
+   
+static unsigned int __inline__
+PerformComparisonOperation(floatx80 Fn, floatx80 Fm)
+{
+   unsigned int flags = 0;
+
+   /* test for less than condition */
+   if (floatx80_lt(Fn,Fm))
+   {
+      flags |= CC_NEGATIVE;
+   }
+  
+   /* test for equal condition */
+   if (floatx80_eq(Fn,Fm))
+   {
+      flags |= CC_ZERO;
+   }
+
+   /* test for greater than or equal condition */
+   if (floatx80_lt(Fm,Fn))
+   {
+      flags |= CC_CARRY;
+   }
+   
+   writeConditionCodes(flags);
+   return 1;
+}
+
+/* This instruction sets the flags N, Z, C, V in the FPSR. */
+   
+static unsigned int PerformComparison(const unsigned int opcode)
+{
+   FPA11 *fpa11 = GET_FPA11();
+   unsigned int Fn, Fm;
+   floatx80 rFn, rFm;
+   int e_flag = opcode & 0x400000;	/* 1 if CxFE */
+   int n_flag = opcode & 0x200000;	/* 1 if CNxx */
+   unsigned int flags = 0;
+
+   //printk("PerformComparison(0x%08x)\n",opcode);
+
+   Fn = getFn(opcode);
+   Fm = getFm(opcode);
+
+   /* Check for unordered condition and convert all operands to 80-bit
+      format.
+      ?? Might be some mileage in avoiding this conversion if possible.
+      Eg, if both operands are 32-bit, detect this and do a 32-bit
+      comparison (cheaper than an 80-bit one).  */
+   switch (fpa11->fType[Fn])
+   {
+      case typeSingle: 
+        //printk("single.\n");
+	if (float32_is_nan(fpa11->fpreg[Fn].fSingle))
+	   goto unordered;
+        rFn = float32_to_floatx80(fpa11->fpreg[Fn].fSingle);
+      break;
+
+      case typeDouble: 
+        //printk("double.\n");
+	if (float64_is_nan(fpa11->fpreg[Fn].fDouble))
+	   goto unordered;
+        rFn = float64_to_floatx80(fpa11->fpreg[Fn].fDouble);
+      break;
+      
+      case typeExtended: 
+        //printk("extended.\n");
+	if (floatx80_is_nan(fpa11->fpreg[Fn].fExtended))
+	   goto unordered;
+        rFn = fpa11->fpreg[Fn].fExtended;
+      break;
+      
+      default: return 0;
+   }
+
+   if (CONSTANT_FM(opcode))
+   {
+     //printk("Fm is a constant: #%d.\n",Fm);
+     rFm = getExtendedConstant(Fm);
+     if (floatx80_is_nan(rFm))
+        goto unordered;
+   }
+   else
+   {
+     //printk("Fm = r%d which contains a ",Fm);
+      switch (fpa11->fType[Fm])
+      {
+         case typeSingle: 
+           //printk("single.\n");
+	   if (float32_is_nan(fpa11->fpreg[Fm].fSingle))
+	      goto unordered;
+           rFm = float32_to_floatx80(fpa11->fpreg[Fm].fSingle);
+         break;
+
+         case typeDouble: 
+           //printk("double.\n");
+	   if (float64_is_nan(fpa11->fpreg[Fm].fDouble))
+	      goto unordered;
+           rFm = float64_to_floatx80(fpa11->fpreg[Fm].fDouble);
+         break;
+      
+         case typeExtended: 
+           //printk("extended.\n");
+	   if (floatx80_is_nan(fpa11->fpreg[Fm].fExtended))
+	      goto unordered;
+           rFm = fpa11->fpreg[Fm].fExtended;
+         break;
+      
+         default: return 0;
+      }
+   }
+
+   if (n_flag)
+   {
+      rFm.high ^= 0x8000;
+   }
+
+   return PerformComparisonOperation(rFn,rFm);
+
+ unordered:
+   /* ?? The FPA data sheet is pretty vague about this, in particular
+      about whether the non-E comparisons can ever raise exceptions.
+      This implementation is based on a combination of what it says in
+      the data sheet, observation of how the Acorn emulator actually
+      behaves (and how programs expect it to) and guesswork.  */
+   flags |= CC_OVERFLOW;
+   flags &= ~(CC_ZERO | CC_NEGATIVE);
+
+   if (BIT_AC & readFPSR()) flags |= CC_CARRY;
+
+   if (e_flag) float_raise(float_flag_invalid);
+
+   writeConditionCodes(flags);
+   return 1;
+}
diff --git a/arch/arm26/nwfpe/fpmodule.c b/arch/arm26/nwfpe/fpmodule.c
new file mode 100644
index 00000000000..528fa710aa3
--- /dev/null
+++ b/arch/arm26/nwfpe/fpmodule.c
@@ -0,0 +1,182 @@
+
+/*
+    NetWinder Floating Point Emulator
+    (c) Rebel.com, 1998-1999
+    (c) Philip Blundell, 1998-1999
+
+    Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include "fpa11.h"
+
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/config.h>
+
+/* XXX */
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+/* XXX */
+
+#include "softfloat.h"
+#include "fpopcode.h"
+#include "fpmodule.h"
+#include "fpa11.inl"
+
+/* kernel symbols required for signal handling */
+typedef struct task_struct*	PTASK;
+
+#ifdef MODULE
+void fp_send_sig(unsigned long sig, PTASK p, int priv);
+#if LINUX_VERSION_CODE > 0x20115
+MODULE_AUTHOR("Scott Bambrough <scottb@rebel.com>");
+MODULE_DESCRIPTION("NWFPE floating point emulator");
+#endif
+
+#else
+#define fp_send_sig	send_sig
+#define kern_fp_enter	fp_enter
+
+extern char fpe_type[];
+#endif
+
+/* kernel function prototypes required */
+void fp_setup(void);
+
+/* external declarations for saved kernel symbols */
+extern void (*kern_fp_enter)(void);
+
+/* Original value of fp_enter from kernel before patched by fpe_init. */ 
+static void (*orig_fp_enter)(void);
+
+/* forward declarations */
+extern void nwfpe_enter(void);
+
+#ifdef MODULE
+/*
+ * Return 0 if we can be unloaded.  This can only happen if
+ * kern_fp_enter is still pointing at nwfpe_enter
+ */
+static int fpe_unload(void)
+{
+  return (kern_fp_enter == nwfpe_enter) ? 0 : 1;
+}
+#endif
+
+static int __init fpe_init(void)
+{
+  if (sizeof(FPA11) > sizeof(union fp_state)) {
+    printk(KERN_ERR "nwfpe: bad structure size\n");
+    return -EINVAL;
+  }
+
+  if (sizeof(FPREG) != 12) {
+    printk(KERN_ERR "nwfpe: bad register size\n");
+    return -EINVAL;
+  }
+
+#ifdef MODULE
+  if (!mod_member_present(&__this_module, can_unload))
+    return -EINVAL;
+  __this_module.can_unload = fpe_unload;
+#else
+  if (fpe_type[0] && strcmp(fpe_type, "nwfpe"))
+    return 0;
+#endif
+
+  /* Display title, version and copyright information. */
+  printk(KERN_WARNING "NetWinder Floating Point Emulator V0.95 "
+	 "(c) 1998-1999 Rebel.com\n");
+
+  /* Save pointer to the old FP handler and then patch ourselves in */
+  orig_fp_enter = kern_fp_enter;
+  kern_fp_enter = nwfpe_enter;
+
+  return 0;
+}
+
+static void __exit fpe_exit(void)
+{
+  /* Restore the values we saved earlier. */
+  kern_fp_enter = orig_fp_enter;
+}
+
+/*
+ScottB:  November 4, 1998
+
+Moved this function out of softfloat-specialize into fpmodule.c.
+This effectively isolates all the changes required for integrating with the
+Linux kernel into fpmodule.c.  Porting to NetBSD should only require modifying
+fpmodule.c to integrate with the NetBSD kernel (I hope!).
+
+[1/1/99: Not quite true any more unfortunately.  There is Linux-specific
+code to access data in user space in some other source files at the 
+moment (grep for get_user / put_user calls).  --philb]
+
+float_exception_flags is a global variable in SoftFloat.
+
+This function is called by the SoftFloat routines to raise a floating
+point exception.  We check the trap enable byte in the FPSR, and raise
+a SIGFPE exception if necessary.  If not the relevant bits in the 
+cumulative exceptions flag byte are set and we return.
+*/
+
+void float_raise(signed char flags)
+{
+  register unsigned int fpsr, cumulativeTraps;
+  
+#ifdef CONFIG_DEBUG_USER
+  printk(KERN_DEBUG "NWFPE: %s[%d] takes exception %08x at %p from %08x\n",
+	 current->comm, current->pid, flags,
+	 __builtin_return_address(0), GET_USERREG()[15]);
+#endif
+
+  /* Keep SoftFloat exception flags up to date.  */
+  float_exception_flags |= flags;
+
+  /* Read fpsr and initialize the cumulativeTraps.  */
+  fpsr = readFPSR();
+  cumulativeTraps = 0;
+  
+  /* For each type of exception, the cumulative trap exception bit is only
+     set if the corresponding trap enable bit is not set.  */
+  if ((!(fpsr & BIT_IXE)) && (flags & BIT_IXC))
+     cumulativeTraps |= BIT_IXC;  
+  if ((!(fpsr & BIT_UFE)) && (flags & BIT_UFC))
+     cumulativeTraps |= BIT_UFC;  
+  if ((!(fpsr & BIT_OFE)) && (flags & BIT_OFC))
+     cumulativeTraps |= BIT_OFC;  
+  if ((!(fpsr & BIT_DZE)) && (flags & BIT_DZC))
+     cumulativeTraps |= BIT_DZC;  
+  if ((!(fpsr & BIT_IOE)) && (flags & BIT_IOC))
+     cumulativeTraps |= BIT_IOC;  
+
+  /* Set the cumulative exceptions flags.  */
+  if (cumulativeTraps)
+    writeFPSR(fpsr | cumulativeTraps);
+
+  /* Raise an exception if necessary.  */
+  if (fpsr & (flags << 16))
+    fp_send_sig(SIGFPE, current, 1);
+}
+
+module_init(fpe_init);
+module_exit(fpe_exit);
diff --git a/arch/arm26/nwfpe/fpmodule.h b/arch/arm26/nwfpe/fpmodule.h
new file mode 100644
index 00000000000..ef71aab46a3
--- /dev/null
+++ b/arch/arm26/nwfpe/fpmodule.h
@@ -0,0 +1,47 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Rebel.com, 1998-1999
+
+    Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#ifndef __FPMODULE_H__
+#define __FPMODULE_H__
+
+#include <linux/config.h>
+
+#define REG_ORIG_R0	16
+#define REG_CPSR	15
+#define REG_PC		15
+#define REG_LR		14
+#define REG_SP		13
+#define REG_IP		12
+#define REG_FP		11
+#define REG_R10		10
+#define REG_R9		9
+#define REG_R9		9
+#define REG_R8		8
+#define REG_R7		7
+#define REG_R6		6
+#define REG_R5		5
+#define REG_R4		4
+#define REG_R3		3
+#define REG_R2		2
+#define REG_R1		1
+#define REG_R0		0
+
+#endif
diff --git a/arch/arm26/nwfpe/fpmodule.inl b/arch/arm26/nwfpe/fpmodule.inl
new file mode 100644
index 00000000000..ef228378ffa
--- /dev/null
+++ b/arch/arm26/nwfpe/fpmodule.inl
@@ -0,0 +1,84 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Rebel.COM, 1998,1999
+
+    Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+extern __inline__
+unsigned int readRegister(const unsigned int nReg)
+{
+  /* Note: The CPU thinks it has dealt with the current instruction.  As
+           a result the program counter has been advanced to the next
+           instruction, and points 4 bytes beyond the actual instruction
+           that caused the invalid instruction trap to occur.  We adjust
+           for this in this routine.  LDF/STF instructions with Rn = PC
+           depend on the PC being correct, as they use PC+8 in their 
+           address calculations. */
+  unsigned int *userRegisters = GET_USERREG();
+  unsigned int val = userRegisters[nReg];
+  if (REG_PC == nReg) val -= 4;
+  return val;
+}
+
+extern __inline__
+void writeRegister(const unsigned int nReg, const unsigned int val)
+{
+  unsigned int *userRegisters = GET_USERREG();
+  userRegisters[nReg] = val;
+}
+
+extern __inline__
+unsigned int readCPSR(void)
+{
+  return(readRegister(REG_CPSR));
+}
+
+extern __inline__
+void writeCPSR(const unsigned int val)
+{
+  writeRegister(REG_CPSR,val);
+}
+
+extern __inline__
+unsigned int readConditionCodes(void)
+{
+#ifdef __FPEM_TEST__
+   return(0);
+#else
+   return(readCPSR() & CC_MASK);
+#endif
+}
+
+extern __inline__
+void writeConditionCodes(const unsigned int val)
+{
+  unsigned int *userRegisters = GET_USERREG();
+  unsigned int rval;
+  /*
+   * Operate directly on userRegisters since
+   * the CPSR may be the PC register itself.
+   */
+  rval = userRegisters[REG_CPSR] & ~CC_MASK;
+  userRegisters[REG_CPSR] = rval | (val & CC_MASK);
+}
+
+extern __inline__
+unsigned int readMemoryInt(unsigned int *pMem)
+{
+  return *pMem;
+}
diff --git a/arch/arm26/nwfpe/fpopcode.c b/arch/arm26/nwfpe/fpopcode.c
new file mode 100644
index 00000000000..d81ddd18832
--- /dev/null
+++ b/arch/arm26/nwfpe/fpopcode.c
@@ -0,0 +1,148 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Rebel.COM, 1998,1999
+
+    Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include "fpa11.h"
+#include "softfloat.h"
+#include "fpopcode.h"
+#include "fpsr.h"
+#include "fpmodule.h"
+#include "fpmodule.inl"
+
+const floatx80 floatx80Constant[] = {
+  { 0x0000, 0x0000000000000000ULL},	/* extended 0.0 */
+  { 0x3fff, 0x8000000000000000ULL},	/* extended 1.0 */
+  { 0x4000, 0x8000000000000000ULL},	/* extended 2.0 */
+  { 0x4000, 0xc000000000000000ULL},	/* extended 3.0 */
+  { 0x4001, 0x8000000000000000ULL},	/* extended 4.0 */
+  { 0x4001, 0xa000000000000000ULL},	/* extended 5.0 */
+  { 0x3ffe, 0x8000000000000000ULL},	/* extended 0.5 */
+  { 0x4002, 0xa000000000000000ULL}	/* extended 10.0 */
+};  
+
+const float64 float64Constant[] = {
+  0x0000000000000000ULL,		/* double 0.0 */
+  0x3ff0000000000000ULL,		/* double 1.0 */
+  0x4000000000000000ULL,		/* double 2.0 */
+  0x4008000000000000ULL,		/* double 3.0 */
+  0x4010000000000000ULL,		/* double 4.0 */
+  0x4014000000000000ULL,		/* double 5.0 */
+  0x3fe0000000000000ULL,		/* double 0.5 */
+  0x4024000000000000ULL			/* double 10.0 */
+};  
+
+const float32 float32Constant[] = {
+  0x00000000,				/* single 0.0 */
+  0x3f800000,				/* single 1.0 */
+  0x40000000,				/* single 2.0 */
+  0x40400000,				/* single 3.0 */
+  0x40800000,				/* single 4.0 */
+  0x40a00000,				/* single 5.0 */
+  0x3f000000,				/* single 0.5 */
+  0x41200000				/* single 10.0 */
+};  
+
+unsigned int getTransferLength(const unsigned int opcode)
+{
+  unsigned int nRc;
+  
+  switch (opcode & MASK_TRANSFER_LENGTH)
+  {
+    case 0x00000000: nRc = 1; break; /* single precision */
+    case 0x00008000: nRc = 2; break; /* double precision */
+    case 0x00400000: nRc = 3; break; /* extended precision */
+    default: nRc = 0;
+  }
+  
+  return(nRc);
+}
+
+unsigned int getRegisterCount(const unsigned int opcode)
+{
+  unsigned int nRc;
+  
+  switch (opcode & MASK_REGISTER_COUNT)
+  {
+    case 0x00000000: nRc = 4; break;
+    case 0x00008000: nRc = 1; break;
+    case 0x00400000: nRc = 2; break;
+    case 0x00408000: nRc = 3; break;
+    default: nRc = 0;
+  }
+  
+  return(nRc);
+}
+
+unsigned int getRoundingPrecision(const unsigned int opcode)
+{
+  unsigned int nRc;
+  
+  switch (opcode & MASK_ROUNDING_PRECISION)
+  {
+    case 0x00000000: nRc = 1; break;
+    case 0x00000080: nRc = 2; break;
+    case 0x00080000: nRc = 3; break;
+    default: nRc = 0;
+  }
+  
+  return(nRc);
+}
+
+unsigned int getDestinationSize(const unsigned int opcode)
+{
+  unsigned int nRc;
+  
+  switch (opcode & MASK_DESTINATION_SIZE)
+  {
+    case 0x00000000: nRc = typeSingle; break;
+    case 0x00000080: nRc = typeDouble; break;
+    case 0x00080000: nRc = typeExtended; break;
+    default: nRc = typeNone;
+  }
+  
+  return(nRc);
+}
+
+/* condition code lookup table
+ index into the table is test code: EQ, NE, ... LT, GT, AL, NV
+ bit position in short is condition code: NZCV */
+static const unsigned short aCC[16] = {
+    0xF0F0, // EQ == Z set
+    0x0F0F, // NE
+    0xCCCC, // CS == C set
+    0x3333, // CC
+    0xFF00, // MI == N set
+    0x00FF, // PL
+    0xAAAA, // VS == V set
+    0x5555, // VC
+    0x0C0C, // HI == C set && Z clear
+    0xF3F3, // LS == C clear || Z set
+    0xAA55, // GE == (N==V)
+    0x55AA, // LT == (N!=V)
+    0x0A05, // GT == (!Z && (N==V))
+    0xF5FA, // LE == (Z || (N!=V))
+    0xFFFF, // AL always
+    0 // NV
+};
+
+unsigned int checkCondition(const unsigned int opcode, const unsigned int ccodes)
+{
+  return (aCC[opcode>>28] >> (ccodes>>28)) & 1;
+}
diff --git a/arch/arm26/nwfpe/fpopcode.h b/arch/arm26/nwfpe/fpopcode.h
new file mode 100644
index 00000000000..13c7419262a
--- /dev/null
+++ b/arch/arm26/nwfpe/fpopcode.h
@@ -0,0 +1,390 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Rebel.COM, 1998,1999
+
+    Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#ifndef __FPOPCODE_H__
+#define __FPOPCODE_H__
+
+/*
+ARM Floating Point Instruction Classes
+| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 
+|c o n d|1 1 0 P|U|u|W|L|   Rn  |v|  Fd |0|0|0|1|  o f f s e t  | CPDT
+|c o n d|1 1 0 P|U|w|W|L|   Rn  |x|  Fd |0|0|0|1|  o f f s e t  | CPDT
+| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 
+|c o n d|1 1 1 0|a|b|c|d|e|  Fn |j|  Fd |0|0|0|1|f|g|h|0|i|  Fm | CPDO
+|c o n d|1 1 1 0|a|b|c|L|e|  Fn |   Rd  |0|0|0|1|f|g|h|1|i|  Fm | CPRT
+|c o n d|1 1 1 0|a|b|c|1|e|  Fn |1|1|1|1|0|0|0|1|f|g|h|1|i|  Fm | comparisons
+| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 
+
+CPDT		data transfer instructions
+		LDF, STF, LFM, SFM
+		
+CPDO		dyadic arithmetic instructions
+		ADF, MUF, SUF, RSF, DVF, RDF,
+		POW, RPW, RMF, FML, FDV, FRD, POL
+
+CPDO		monadic arithmetic instructions
+		MVF, MNF, ABS, RND, SQT, LOG, LGN, EXP,
+		SIN, COS, TAN, ASN, ACS, ATN, URD, NRM
+		
+CPRT		joint arithmetic/data transfer instructions
+		FIX (arithmetic followed by load/store)
+		FLT (load/store followed by arithmetic)
+		CMF, CNF CMFE, CNFE (comparisons)
+		WFS, RFS (write/read floating point status register)
+		WFC, RFC (write/read floating point control register)
+
+cond		condition codes
+P		pre/post index bit: 0 = postindex, 1 = preindex
+U		up/down bit: 0 = stack grows down, 1 = stack grows up
+W		write back bit: 1 = update base register (Rn)
+L		load/store bit: 0 = store, 1 = load
+Rn		base register
+Rd		destination/source register		
+Fd		floating point destination register
+Fn		floating point source register
+Fm		floating point source register or floating point constant
+
+uv		transfer length (TABLE 1)
+wx		register count (TABLE 2)
+abcd		arithmetic opcode (TABLES 3 & 4)
+ef		destination size (rounding precision) (TABLE 5)
+gh		rounding mode (TABLE 6)
+j		dyadic/monadic bit: 0 = dyadic, 1 = monadic
+i 		constant bit: 1 = constant (TABLE 6)
+*/
+
+/*
+TABLE 1
++-------------------------+---+---+---------+---------+
+|  Precision              | u | v | FPSR.EP | length  |
++-------------------------+---+---+---------+---------+
+| Single                  | 0 � 0 |    x    | 1 words |
+| Double                  | 1 � 1 |    x    | 2 words |
+| Extended                | 1 � 1 |    x    | 3 words |
+| Packed decimal          | 1 � 1 |    0    | 3 words |
+| Expanded packed decimal | 1 � 1 |    1    | 4 words |
++-------------------------+---+---+---------+---------+
+Note: x = don't care
+*/
+
+/*
+TABLE 2
++---+---+---------------------------------+
+| w | x | Number of registers to transfer |
++---+---+---------------------------------+
+| 0 � 1 |  1                              |
+| 1 � 0 |  2                              |
+| 1 � 1 |  3                              |
+| 0 � 0 |  4                              |
++---+---+---------------------------------+
+*/
+
+/*
+TABLE 3: Dyadic Floating Point Opcodes
++---+---+---+---+----------+-----------------------+-----------------------+
+| a | b | c | d | Mnemonic | Description           | Operation             |
++---+---+---+---+----------+-----------------------+-----------------------+
+| 0 | 0 | 0 | 0 | ADF      | Add                   | Fd := Fn + Fm         |
+| 0 | 0 | 0 | 1 | MUF      | Multiply              | Fd := Fn * Fm         |
+| 0 | 0 | 1 | 0 | SUF      | Subtract              | Fd := Fn - Fm         |
+| 0 | 0 | 1 | 1 | RSF      | Reverse subtract      | Fd := Fm - Fn         |
+| 0 | 1 | 0 | 0 | DVF      | Divide                | Fd := Fn / Fm         |
+| 0 | 1 | 0 | 1 | RDF      | Reverse divide        | Fd := Fm / Fn         |
+| 0 | 1 | 1 | 0 | POW      | Power                 | Fd := Fn ^ Fm         |
+| 0 | 1 | 1 | 1 | RPW      | Reverse power         | Fd := Fm ^ Fn         |
+| 1 | 0 | 0 | 0 | RMF      | Remainder             | Fd := IEEE rem(Fn/Fm) |
+| 1 | 0 | 0 | 1 | FML      | Fast Multiply         | Fd := Fn * Fm         |
+| 1 | 0 | 1 | 0 | FDV      | Fast Divide           | Fd := Fn / Fm         |
+| 1 | 0 | 1 | 1 | FRD      | Fast reverse divide   | Fd := Fm / Fn         |
+| 1 | 1 | 0 | 0 | POL      | Polar angle (ArcTan2) | Fd := arctan2(Fn,Fm)  |
+| 1 | 1 | 0 | 1 |          | undefined instruction | trap                  |
+| 1 | 1 | 1 | 0 |          | undefined instruction | trap                  |
+| 1 | 1 | 1 | 1 |          | undefined instruction | trap                  |
++---+---+---+---+----------+-----------------------+-----------------------+
+Note: POW, RPW, POL are deprecated, and are available for backwards
+      compatibility only.
+*/
+
+/*
+TABLE 4: Monadic Floating Point Opcodes
++---+---+---+---+----------+-----------------------+-----------------------+
+| a | b | c | d | Mnemonic | Description           | Operation             |
++---+---+---+---+----------+-----------------------+-----------------------+
+| 0 | 0 | 0 | 0 | MVF      | Move                  | Fd := Fm              |
+| 0 | 0 | 0 | 1 | MNF      | Move negated          | Fd := - Fm            |
+| 0 | 0 | 1 | 0 | ABS      | Absolute value        | Fd := abs(Fm)         |
+| 0 | 0 | 1 | 1 | RND      | Round to integer      | Fd := int(Fm)         |
+| 0 | 1 | 0 | 0 | SQT      | Square root           | Fd := sqrt(Fm)        |
+| 0 | 1 | 0 | 1 | LOG      | Log base 10           | Fd := log10(Fm)       |
+| 0 | 1 | 1 | 0 | LGN      | Log base e            | Fd := ln(Fm)          |
+| 0 | 1 | 1 | 1 | EXP      | Exponent              | Fd := e ^ Fm          |
+| 1 | 0 | 0 | 0 | SIN      | Sine                  | Fd := sin(Fm)         |
+| 1 | 0 | 0 | 1 | COS      | Cosine                | Fd := cos(Fm)         |
+| 1 | 0 | 1 | 0 | TAN      | Tangent               | Fd := tan(Fm)         |
+| 1 | 0 | 1 | 1 | ASN      | Arc Sine              | Fd := arcsin(Fm)      |
+| 1 | 1 | 0 | 0 | ACS      | Arc Cosine            | Fd := arccos(Fm)      |
+| 1 | 1 | 0 | 1 | ATN      | Arc Tangent           | Fd := arctan(Fm)      |
+| 1 | 1 | 1 | 0 | URD      | Unnormalized round    | Fd := int(Fm)         |
+| 1 | 1 | 1 | 1 | NRM      | Normalize             | Fd := norm(Fm)        |
++---+---+---+---+----------+-----------------------+-----------------------+
+Note: LOG, LGN, EXP, SIN, COS, TAN, ASN, ACS, ATN are deprecated, and are
+      available for backwards compatibility only.
+*/
+
+/*
+TABLE 5
++-------------------------+---+---+
+|  Rounding Precision     | e | f |
++-------------------------+---+---+
+| IEEE Single precision   | 0 � 0 |
+| IEEE Double precision   | 0 � 1 |
+| IEEE Extended precision | 1 � 0 |
+| undefined (trap)        | 1 � 1 |
++-------------------------+---+---+
+*/
+
+/*
+TABLE 5
++---------------------------------+---+---+
+|  Rounding Mode                  | g | h |
++---------------------------------+---+---+
+| Round to nearest (default)      | 0 � 0 |
+| Round toward plus infinity      | 0 � 1 |
+| Round toward negative infinity  | 1 � 0 |
+| Round toward zero               | 1 � 1 |
++---------------------------------+---+---+
+*/
+
+/*
+===
+=== Definitions for load and store instructions
+===
+*/
+
+/* bit masks */
+#define BIT_PREINDEX	0x01000000
+#define BIT_UP		0x00800000
+#define BIT_WRITE_BACK	0x00200000
+#define BIT_LOAD	0x00100000
+
+/* masks for load/store */
+#define MASK_CPDT		0x0c000000  /* data processing opcode */
+#define MASK_OFFSET		0x000000ff
+#define MASK_TRANSFER_LENGTH	0x00408000
+#define MASK_REGISTER_COUNT	MASK_TRANSFER_LENGTH
+#define MASK_COPROCESSOR	0x00000f00
+
+/* Tests for transfer length */
+#define TRANSFER_SINGLE		0x00000000
+#define TRANSFER_DOUBLE		0x00008000
+#define TRANSFER_EXTENDED	0x00400000
+#define TRANSFER_PACKED		MASK_TRANSFER_LENGTH
+
+/* Get the coprocessor number from the opcode. */
+#define getCoprocessorNumber(opcode)	((opcode & MASK_COPROCESSOR) >> 8)
+
+/* Get the offset from the opcode. */
+#define getOffset(opcode)		(opcode & MASK_OFFSET)
+
+/* Tests for specific data transfer load/store opcodes. */
+#define TEST_OPCODE(opcode,mask)	(((opcode) & (mask)) == (mask))
+
+#define LOAD_OP(opcode)   TEST_OPCODE((opcode),MASK_CPDT | BIT_LOAD)
+#define STORE_OP(opcode)  ((opcode & (MASK_CPDT | BIT_LOAD)) == MASK_CPDT)
+
+#define LDF_OP(opcode)	(LOAD_OP(opcode) && (getCoprocessorNumber(opcode) == 1))
+#define LFM_OP(opcode)	(LOAD_OP(opcode) && (getCoprocessorNumber(opcode) == 2))
+#define STF_OP(opcode)	(STORE_OP(opcode) && (getCoprocessorNumber(opcode) == 1))
+#define SFM_OP(opcode)	(STORE_OP(opcode) && (getCoprocessorNumber(opcode) == 2))
+
+#define PREINDEXED(opcode)		((opcode & BIT_PREINDEX) != 0)
+#define POSTINDEXED(opcode)		((opcode & BIT_PREINDEX) == 0)
+#define BIT_UP_SET(opcode)		((opcode & BIT_UP) != 0)
+#define BIT_UP_CLEAR(opcode)		((opcode & BIT_DOWN) == 0)
+#define WRITE_BACK(opcode)		((opcode & BIT_WRITE_BACK) != 0)
+#define LOAD(opcode)			((opcode & BIT_LOAD) != 0)
+#define STORE(opcode)			((opcode & BIT_LOAD) == 0)
+
+/*
+===
+=== Definitions for arithmetic instructions
+===
+*/
+/* bit masks */
+#define BIT_MONADIC	0x00008000
+#define BIT_CONSTANT	0x00000008
+
+#define CONSTANT_FM(opcode)		((opcode & BIT_CONSTANT) != 0)
+#define MONADIC_INSTRUCTION(opcode)	((opcode & BIT_MONADIC) != 0)
+
+/* instruction identification masks */
+#define MASK_CPDO		0x0e000000  /* arithmetic opcode */
+#define MASK_ARITHMETIC_OPCODE	0x00f08000
+#define MASK_DESTINATION_SIZE	0x00080080
+
+/* dyadic arithmetic opcodes. */
+#define ADF_CODE	0x00000000
+#define MUF_CODE	0x00100000
+#define SUF_CODE	0x00200000
+#define RSF_CODE	0x00300000
+#define DVF_CODE	0x00400000
+#define RDF_CODE	0x00500000
+#define POW_CODE	0x00600000
+#define RPW_CODE	0x00700000
+#define RMF_CODE	0x00800000
+#define FML_CODE	0x00900000
+#define FDV_CODE	0x00a00000
+#define FRD_CODE	0x00b00000
+#define POL_CODE	0x00c00000
+/* 0x00d00000 is an invalid dyadic arithmetic opcode */
+/* 0x00e00000 is an invalid dyadic arithmetic opcode */
+/* 0x00f00000 is an invalid dyadic arithmetic opcode */
+
+/* monadic arithmetic opcodes. */
+#define MVF_CODE	0x00008000
+#define MNF_CODE	0x00108000
+#define ABS_CODE	0x00208000
+#define RND_CODE	0x00308000
+#define SQT_CODE	0x00408000
+#define LOG_CODE	0x00508000
+#define LGN_CODE	0x00608000
+#define EXP_CODE	0x00708000
+#define SIN_CODE	0x00808000
+#define COS_CODE	0x00908000
+#define TAN_CODE	0x00a08000
+#define ASN_CODE	0x00b08000
+#define ACS_CODE	0x00c08000
+#define ATN_CODE	0x00d08000
+#define URD_CODE	0x00e08000
+#define NRM_CODE	0x00f08000
+
+/*
+===
+=== Definitions for register transfer and comparison instructions
+===
+*/
+
+#define MASK_CPRT		0x0e000010  /* register transfer opcode */
+#define MASK_CPRT_CODE		0x00f00000
+#define FLT_CODE		0x00000000
+#define FIX_CODE		0x00100000
+#define WFS_CODE		0x00200000
+#define RFS_CODE		0x00300000
+#define WFC_CODE		0x00400000
+#define RFC_CODE		0x00500000
+#define CMF_CODE		0x00900000
+#define CNF_CODE		0x00b00000
+#define CMFE_CODE		0x00d00000
+#define CNFE_CODE		0x00f00000
+
+/*
+===
+=== Common definitions
+===
+*/
+
+/* register masks */
+#define MASK_Rd		0x0000f000
+#define MASK_Rn		0x000f0000
+#define MASK_Fd		0x00007000
+#define MASK_Fm		0x00000007
+#define MASK_Fn		0x00070000
+
+/* condition code masks */
+#define CC_MASK		0xf0000000
+#define CC_NEGATIVE	0x80000000
+#define CC_ZERO		0x40000000
+#define CC_CARRY	0x20000000
+#define CC_OVERFLOW	0x10000000
+#define CC_EQ		0x00000000
+#define CC_NE		0x10000000
+#define CC_CS		0x20000000
+#define CC_HS		CC_CS
+#define CC_CC		0x30000000
+#define CC_LO		CC_CC
+#define CC_MI		0x40000000
+#define CC_PL		0x50000000
+#define CC_VS		0x60000000
+#define CC_VC		0x70000000
+#define CC_HI		0x80000000
+#define CC_LS		0x90000000
+#define CC_GE		0xa0000000
+#define CC_LT		0xb0000000
+#define CC_GT		0xc0000000
+#define CC_LE		0xd0000000
+#define CC_AL		0xe0000000
+#define CC_NV		0xf0000000
+
+/* rounding masks/values */
+#define MASK_ROUNDING_MODE	0x00000060
+#define ROUND_TO_NEAREST	0x00000000
+#define ROUND_TO_PLUS_INFINITY	0x00000020
+#define ROUND_TO_MINUS_INFINITY	0x00000040
+#define ROUND_TO_ZERO		0x00000060
+
+#define MASK_ROUNDING_PRECISION	0x00080080
+#define ROUND_SINGLE		0x00000000
+#define ROUND_DOUBLE		0x00000080
+#define ROUND_EXTENDED		0x00080000
+
+/* Get the condition code from the opcode. */
+#define getCondition(opcode)		(opcode >> 28)
+
+/* Get the source register from the opcode. */
+#define getRn(opcode)			((opcode & MASK_Rn) >> 16)
+
+/* Get the destination floating point register from the opcode. */
+#define getFd(opcode)			((opcode & MASK_Fd) >> 12)
+
+/* Get the first source floating point register from the opcode. */
+#define getFn(opcode)		((opcode & MASK_Fn) >> 16)
+
+/* Get the second source floating point register from the opcode. */
+#define getFm(opcode)		(opcode & MASK_Fm)
+
+/* Get the destination register from the opcode. */
+#define getRd(opcode)		((opcode & MASK_Rd) >> 12)
+
+/* Get the rounding mode from the opcode. */
+#define getRoundingMode(opcode)		((opcode & MASK_ROUNDING_MODE) >> 5)
+
+static inline const floatx80 getExtendedConstant(const unsigned int nIndex)
+{
+   extern const floatx80 floatx80Constant[];
+   return floatx80Constant[nIndex];
+} 
+
+static inline const float64 getDoubleConstant(const unsigned int nIndex)
+{
+   extern const float64 float64Constant[];
+   return float64Constant[nIndex];
+} 
+
+static inline const float32 getSingleConstant(const unsigned int nIndex)
+{
+   extern const float32 float32Constant[];
+   return float32Constant[nIndex];
+} 
+
+extern unsigned int getRegisterCount(const unsigned int opcode);
+extern unsigned int getDestinationSize(const unsigned int opcode);
+
+#endif
diff --git a/arch/arm26/nwfpe/fpsr.h b/arch/arm26/nwfpe/fpsr.h
new file mode 100644
index 00000000000..6dafb0f5243
--- /dev/null
+++ b/arch/arm26/nwfpe/fpsr.h
@@ -0,0 +1,108 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Rebel.com, 1998-1999
+
+    Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#ifndef __FPSR_H__
+#define __FPSR_H__
+
+/*
+The FPSR is a 32 bit register consisting of 4 parts, each exactly
+one byte.
+
+	SYSTEM ID
+	EXCEPTION TRAP ENABLE BYTE
+	SYSTEM CONTROL BYTE
+	CUMULATIVE EXCEPTION FLAGS BYTE
+	
+The FPCR is a 32 bit register consisting of bit flags.
+*/
+
+/* SYSTEM ID
+------------
+Note: the system id byte is read only  */
+
+typedef unsigned int FPSR;  /* type for floating point status register */
+typedef unsigned int FPCR;  /* type for floating point control register */
+
+#define MASK_SYSID		0xff000000
+#define BIT_HARDWARE		0x80000000
+#define FP_EMULATOR		0x01000000	/* System ID for emulator */ 
+#define FP_ACCELERATOR		0x81000000	/* System ID for FPA11 */
+
+/* EXCEPTION TRAP ENABLE BYTE
+----------------------------- */
+
+#define MASK_TRAP_ENABLE	0x00ff0000
+#define MASK_TRAP_ENABLE_STRICT	0x001f0000
+#define BIT_IXE		0x00100000   /* inexact exception enable */
+#define BIT_UFE		0x00080000   /* underflow exception enable */
+#define BIT_OFE		0x00040000   /* overflow exception enable */
+#define BIT_DZE		0x00020000   /* divide by zero exception enable */
+#define BIT_IOE		0x00010000   /* invalid operation exception enable */
+
+/* SYSTEM CONTROL BYTE
+---------------------- */
+
+#define MASK_SYSTEM_CONTROL	0x0000ff00
+#define MASK_TRAP_STRICT	0x00001f00
+
+#define BIT_AC	0x00001000	/* use alternative C-flag definition
+				   for compares */
+#define BIT_EP	0x00000800	/* use expanded packed decimal format */
+#define BIT_SO	0x00000400	/* select synchronous operation of FPA */
+#define BIT_NE	0x00000200	/* NaN exception bit */
+#define BIT_ND	0x00000100	/* no denormalized numbers bit */
+
+/* CUMULATIVE EXCEPTION FLAGS BYTE
+---------------------------------- */
+
+#define MASK_EXCEPTION_FLAGS		0x000000ff
+#define MASK_EXCEPTION_FLAGS_STRICT	0x0000001f
+
+#define BIT_IXC		0x00000010	/* inexact exception flag */
+#define BIT_UFC		0x00000008	/* underflow exception flag */
+#define BIT_OFC		0x00000004	/* overfloat exception flag */
+#define BIT_DZC		0x00000002	/* divide by zero exception flag */
+#define BIT_IOC		0x00000001	/* invalid operation exception flag */
+
+/* Floating Point Control Register
+----------------------------------*/
+
+#define BIT_RU		0x80000000	/* rounded up bit */
+#define BIT_IE		0x10000000	/* inexact bit */
+#define BIT_MO		0x08000000	/* mantissa overflow bit */
+#define BIT_EO		0x04000000	/* exponent overflow bit */
+#define BIT_SB		0x00000800	/* store bounce */
+#define BIT_AB		0x00000400	/* arithmetic bounce */
+#define BIT_RE		0x00000200	/* rounding exception */
+#define BIT_DA		0x00000100	/* disable FPA */
+
+#define MASK_OP		0x00f08010	/* AU operation code */
+#define MASK_PR		0x00080080	/* AU precision */
+#define MASK_S1		0x00070000	/* AU source register 1 */
+#define MASK_S2		0x00000007	/* AU source register 2 */
+#define MASK_DS		0x00007000	/* AU destination register */
+#define MASK_RM		0x00000060	/* AU rounding mode */
+#define MASK_ALU	0x9cfff2ff	/* only ALU can write these bits */
+#define MASK_RESET	0x00000d00	/* bits set on reset, all others cleared */
+#define MASK_WFC	MASK_RESET
+#define MASK_RFC	~MASK_RESET
+
+#endif
diff --git a/arch/arm26/nwfpe/milieu.h b/arch/arm26/nwfpe/milieu.h
new file mode 100644
index 00000000000..a3892ab2dca
--- /dev/null
+++ b/arch/arm26/nwfpe/milieu.h
@@ -0,0 +1,48 @@
+
+/*
+===============================================================================
+
+This C header file is part of the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
+arithmetic/softfloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
+has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
+TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
+PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
+AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) they include prominent notice that the work is derivative, and (2) they
+include prominent notice akin to these three paragraphs for those parts of
+this code that are retained.
+
+===============================================================================
+*/
+
+/*
+-------------------------------------------------------------------------------
+Include common integer types and flags.
+-------------------------------------------------------------------------------
+*/
+#include "ARM-gcc.h"
+
+/*
+-------------------------------------------------------------------------------
+Symbolic Boolean literals.
+-------------------------------------------------------------------------------
+*/
+enum {
+    FALSE = 0,
+    TRUE  = 1
+};
+
diff --git a/arch/arm26/nwfpe/single_cpdo.c b/arch/arm26/nwfpe/single_cpdo.c
new file mode 100644
index 00000000000..5cdcddbb899
--- /dev/null
+++ b/arch/arm26/nwfpe/single_cpdo.c
@@ -0,0 +1,255 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Rebel.COM, 1998,1999
+
+    Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include "fpa11.h"
+#include "softfloat.h"
+#include "fpopcode.h"
+
+float32 float32_exp(float32 Fm);
+float32 float32_ln(float32 Fm);
+float32 float32_sin(float32 rFm);
+float32 float32_cos(float32 rFm);
+float32 float32_arcsin(float32 rFm);
+float32 float32_arctan(float32 rFm);
+float32 float32_log(float32 rFm);
+float32 float32_tan(float32 rFm);
+float32 float32_arccos(float32 rFm);
+float32 float32_pow(float32 rFn,float32 rFm);
+float32 float32_pol(float32 rFn,float32 rFm);
+
+unsigned int SingleCPDO(const unsigned int opcode)
+{
+   FPA11 *fpa11 = GET_FPA11();
+   float32 rFm, rFn = 0; //FIXME - should be zero?
+   unsigned int Fd, Fm, Fn, nRc = 1;
+
+   Fm = getFm(opcode);
+   if (CONSTANT_FM(opcode))
+   {
+     rFm = getSingleConstant(Fm);
+   }
+   else
+   {  
+     switch (fpa11->fType[Fm])
+     {
+        case typeSingle:
+          rFm = fpa11->fpreg[Fm].fSingle;
+        break;
+        
+        default: return 0;
+     }
+   }
+
+   if (!MONADIC_INSTRUCTION(opcode))
+   {
+      Fn = getFn(opcode);
+      switch (fpa11->fType[Fn])
+      {
+        case typeSingle:
+          rFn = fpa11->fpreg[Fn].fSingle;
+        break;
+
+        default: return 0;
+      }
+   }
+
+   Fd = getFd(opcode);
+   switch (opcode & MASK_ARITHMETIC_OPCODE)
+   {
+      /* dyadic opcodes */
+      case ADF_CODE:
+         fpa11->fpreg[Fd].fSingle = float32_add(rFn,rFm);
+      break;
+
+      case MUF_CODE:
+      case FML_CODE:
+        fpa11->fpreg[Fd].fSingle = float32_mul(rFn,rFm);
+      break;
+
+      case SUF_CODE:
+         fpa11->fpreg[Fd].fSingle = float32_sub(rFn,rFm);
+      break;
+
+      case RSF_CODE:
+         fpa11->fpreg[Fd].fSingle = float32_sub(rFm,rFn);
+      break;
+
+      case DVF_CODE:
+      case FDV_CODE:
+         fpa11->fpreg[Fd].fSingle = float32_div(rFn,rFm);
+      break;
+
+      case RDF_CODE:
+      case FRD_CODE:
+         fpa11->fpreg[Fd].fSingle = float32_div(rFm,rFn);
+      break;
+
+#if 0
+      case POW_CODE:
+         fpa11->fpreg[Fd].fSingle = float32_pow(rFn,rFm);
+      break;
+
+      case RPW_CODE:
+         fpa11->fpreg[Fd].fSingle = float32_pow(rFm,rFn);
+      break;
+#endif
+
+      case RMF_CODE:
+         fpa11->fpreg[Fd].fSingle = float32_rem(rFn,rFm);
+      break;
+
+#if 0
+      case POL_CODE:
+         fpa11->fpreg[Fd].fSingle = float32_pol(rFn,rFm);
+      break;
+#endif
+
+      /* monadic opcodes */
+      case MVF_CODE:
+         fpa11->fpreg[Fd].fSingle = rFm;
+      break;
+
+      case MNF_CODE:
+         rFm ^= 0x80000000;
+         fpa11->fpreg[Fd].fSingle = rFm;
+      break;
+
+      case ABS_CODE:
+         rFm &= 0x7fffffff;
+         fpa11->fpreg[Fd].fSingle = rFm;
+      break;
+
+      case RND_CODE:
+      case URD_CODE:
+         fpa11->fpreg[Fd].fSingle = float32_round_to_int(rFm);
+      break;
+
+      case SQT_CODE:
+         fpa11->fpreg[Fd].fSingle = float32_sqrt(rFm);
+      break;
+
+#if 0
+      case LOG_CODE:
+         fpa11->fpreg[Fd].fSingle = float32_log(rFm);
+      break;
+
+      case LGN_CODE:
+         fpa11->fpreg[Fd].fSingle = float32_ln(rFm);
+      break;
+
+      case EXP_CODE:
+         fpa11->fpreg[Fd].fSingle = float32_exp(rFm);
+      break;
+
+      case SIN_CODE:
+         fpa11->fpreg[Fd].fSingle = float32_sin(rFm);
+      break;
+
+      case COS_CODE:
+         fpa11->fpreg[Fd].fSingle = float32_cos(rFm);
+      break;
+
+      case TAN_CODE:
+         fpa11->fpreg[Fd].fSingle = float32_tan(rFm);
+      break;
+
+      case ASN_CODE:
+         fpa11->fpreg[Fd].fSingle = float32_arcsin(rFm);
+      break;
+
+      case ACS_CODE:
+         fpa11->fpreg[Fd].fSingle = float32_arccos(rFm);
+      break;
+
+      case ATN_CODE:
+         fpa11->fpreg[Fd].fSingle = float32_arctan(rFm);
+      break;
+#endif
+
+      case NRM_CODE:
+      break;
+      
+      default:
+      {
+        nRc = 0;
+      }
+   }
+
+   if (0 != nRc) fpa11->fType[Fd] = typeSingle;
+   return nRc;
+}
+
+#if 0
+float32 float32_exp(float32 Fm)
+{
+//series
+}
+
+float32 float32_ln(float32 Fm)
+{
+//series
+}
+
+float32 float32_sin(float32 rFm)
+{
+//series
+}
+
+float32 float32_cos(float32 rFm)
+{
+//series
+}
+
+float32 float32_arcsin(float32 rFm)
+{
+//series
+}
+
+float32 float32_arctan(float32 rFm)
+{
+  //series
+}
+
+float32 float32_arccos(float32 rFm)
+{
+   //return float32_sub(halfPi,float32_arcsin(rFm));
+}
+
+float32 float32_log(float32 rFm)
+{
+  return float32_div(float32_ln(rFm),getSingleConstant(7));
+}
+
+float32 float32_tan(float32 rFm)
+{
+  return float32_div(float32_sin(rFm),float32_cos(rFm));
+}
+
+float32 float32_pow(float32 rFn,float32 rFm)
+{
+  return float32_exp(float32_mul(rFm,float32_ln(rFn))); 
+}
+
+float32 float32_pol(float32 rFn,float32 rFm)
+{
+  return float32_arctan(float32_div(rFn,rFm)); 
+}
+#endif
diff --git a/arch/arm26/nwfpe/softfloat-macros b/arch/arm26/nwfpe/softfloat-macros
new file mode 100644
index 00000000000..5469989f2c5
--- /dev/null
+++ b/arch/arm26/nwfpe/softfloat-macros
@@ -0,0 +1,740 @@
+
+/*
+===============================================================================
+
+This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
+arithmetic/softfloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
+has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
+TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
+PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
+AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) they include prominent notice that the work is derivative, and (2) they
+include prominent notice akin to these three paragraphs for those parts of
+this code that are retained.
+
+===============================================================================
+*/
+
+/*
+-------------------------------------------------------------------------------
+Shifts `a' right by the number of bits given in `count'.  If any nonzero
+bits are shifted off, they are ``jammed'' into the least significant bit of
+the result by setting the least significant bit to 1.  The value of `count'
+can be arbitrarily large; in particular, if `count' is greater than 32, the
+result will be either 0 or 1, depending on whether `a' is zero or nonzero.
+The result is stored in the location pointed to by `zPtr'.
+-------------------------------------------------------------------------------
+*/
+INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
+{
+    bits32 z;
+    if ( count == 0 ) {
+        z = a;
+    }
+    else if ( count < 32 ) {
+        z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
+    }
+    else {
+        z = ( a != 0 );
+    }
+    *zPtr = z;
+}
+
+/*
+-------------------------------------------------------------------------------
+Shifts `a' right by the number of bits given in `count'.  If any nonzero
+bits are shifted off, they are ``jammed'' into the least significant bit of
+the result by setting the least significant bit to 1.  The value of `count'
+can be arbitrarily large; in particular, if `count' is greater than 64, the
+result will be either 0 or 1, depending on whether `a' is zero or nonzero.
+The result is stored in the location pointed to by `zPtr'.
+-------------------------------------------------------------------------------
+*/
+INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
+{
+    bits64 z;
+
+ __asm__("@shift64RightJamming -- start");   
+    if ( count == 0 ) {
+        z = a;
+    }
+    else if ( count < 64 ) {
+        z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
+    }
+    else {
+        z = ( a != 0 );
+    }
+ __asm__("@shift64RightJamming -- end");   
+    *zPtr = z;
+}
+
+/*
+-------------------------------------------------------------------------------
+Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
+_plus_ the number of bits given in `count'.  The shifted result is at most
+64 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
+bits shifted off form a second 64-bit result as follows:  The _last_ bit
+shifted off is the most-significant bit of the extra result, and the other
+63 bits of the extra result are all zero if and only if _all_but_the_last_
+bits shifted off were all zero.  This extra result is stored in the location
+pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
+    (This routine makes more sense if `a0' and `a1' are considered to form a
+fixed-point value with binary point between `a0' and `a1'.  This fixed-point
+value is shifted right by the number of bits given in `count', and the
+integer part of the result is returned at the location pointed to by
+`z0Ptr'.  The fractional part of the result may be slightly corrupted as
+described above, and is returned at the location pointed to by `z1Ptr'.)
+-------------------------------------------------------------------------------
+*/
+INLINE void
+ shift64ExtraRightJamming(
+     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+    bits64 z0, z1;
+    int8 negCount = ( - count ) & 63;
+
+    if ( count == 0 ) {
+        z1 = a1;
+        z0 = a0;
+    }
+    else if ( count < 64 ) {
+        z1 = ( a0<<negCount ) | ( a1 != 0 );
+        z0 = a0>>count;
+    }
+    else {
+        if ( count == 64 ) {
+            z1 = a0 | ( a1 != 0 );
+        }
+        else {
+            z1 = ( ( a0 | a1 ) != 0 );
+        }
+        z0 = 0;
+    }
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
+number of bits given in `count'.  Any bits shifted off are lost.  The value
+of `count' can be arbitrarily large; in particular, if `count' is greater
+than 128, the result will be 0.  The result is broken into two 64-bit pieces
+which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+-------------------------------------------------------------------------------
+*/
+INLINE void
+ shift128Right(
+     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+    bits64 z0, z1;
+    int8 negCount = ( - count ) & 63;
+
+    if ( count == 0 ) {
+        z1 = a1;
+        z0 = a0;
+    }
+    else if ( count < 64 ) {
+        z1 = ( a0<<negCount ) | ( a1>>count );
+        z0 = a0>>count;
+    }
+    else {
+        z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
+        z0 = 0;
+    }
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
+number of bits given in `count'.  If any nonzero bits are shifted off, they
+are ``jammed'' into the least significant bit of the result by setting the
+least significant bit to 1.  The value of `count' can be arbitrarily large;
+in particular, if `count' is greater than 128, the result will be either 0
+or 1, depending on whether the concatenation of `a0' and `a1' is zero or
+nonzero.  The result is broken into two 64-bit pieces which are stored at
+the locations pointed to by `z0Ptr' and `z1Ptr'.
+-------------------------------------------------------------------------------
+*/
+INLINE void
+ shift128RightJamming(
+     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+    bits64 z0, z1;
+    int8 negCount = ( - count ) & 63;
+
+    if ( count == 0 ) {
+        z1 = a1;
+        z0 = a0;
+    }
+    else if ( count < 64 ) {
+        z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
+        z0 = a0>>count;
+    }
+    else {
+        if ( count == 64 ) {
+            z1 = a0 | ( a1 != 0 );
+        }
+        else if ( count < 128 ) {
+            z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
+        }
+        else {
+            z1 = ( ( a0 | a1 ) != 0 );
+        }
+        z0 = 0;
+    }
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
+by 64 _plus_ the number of bits given in `count'.  The shifted result is
+at most 128 nonzero bits; these are broken into two 64-bit pieces which are
+stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
+off form a third 64-bit result as follows:  The _last_ bit shifted off is
+the most-significant bit of the extra result, and the other 63 bits of the
+extra result are all zero if and only if _all_but_the_last_ bits shifted off
+were all zero.  This extra result is stored in the location pointed to by
+`z2Ptr'.  The value of `count' can be arbitrarily large.
+    (This routine makes more sense if `a0', `a1', and `a2' are considered
+to form a fixed-point value with binary point between `a1' and `a2'.  This
+fixed-point value is shifted right by the number of bits given in `count',
+and the integer part of the result is returned at the locations pointed to
+by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
+corrupted as described above, and is returned at the location pointed to by
+`z2Ptr'.)
+-------------------------------------------------------------------------------
+*/
+INLINE void
+ shift128ExtraRightJamming(
+     bits64 a0,
+     bits64 a1,
+     bits64 a2,
+     int16 count,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr
+ )
+{
+    bits64 z0, z1, z2;
+    int8 negCount = ( - count ) & 63;
+
+    if ( count == 0 ) {
+        z2 = a2;
+        z1 = a1;
+        z0 = a0;
+    }
+    else {
+        if ( count < 64 ) {
+            z2 = a1<<negCount;
+            z1 = ( a0<<negCount ) | ( a1>>count );
+            z0 = a0>>count;
+        }
+        else {
+            if ( count == 64 ) {
+                z2 = a1;
+                z1 = a0;
+            }
+            else {
+                a2 |= a1;
+                if ( count < 128 ) {
+                    z2 = a0<<negCount;
+                    z1 = a0>>( count & 63 );
+                }
+                else {
+                    z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
+                    z1 = 0;
+                }
+            }
+            z0 = 0;
+        }
+        z2 |= ( a2 != 0 );
+    }
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
+number of bits given in `count'.  Any bits shifted off are lost.  The value
+of `count' must be less than 64.  The result is broken into two 64-bit
+pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+-------------------------------------------------------------------------------
+*/
+INLINE void
+ shortShift128Left(
+     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+
+    *z1Ptr = a1<<count;
+    *z0Ptr =
+        ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
+by the number of bits given in `count'.  Any bits shifted off are lost.
+The value of `count' must be less than 64.  The result is broken into three
+64-bit pieces which are stored at the locations pointed to by `z0Ptr',
+`z1Ptr', and `z2Ptr'.
+-------------------------------------------------------------------------------
+*/
+INLINE void
+ shortShift192Left(
+     bits64 a0,
+     bits64 a1,
+     bits64 a2,
+     int16 count,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr
+ )
+{
+    bits64 z0, z1, z2;
+    int8 negCount;
+
+    z2 = a2<<count;
+    z1 = a1<<count;
+    z0 = a0<<count;
+    if ( 0 < count ) {
+        negCount = ( ( - count ) & 63 );
+        z1 |= a2>>negCount;
+        z0 |= a1>>negCount;
+    }
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
+value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
+any carry out is lost.  The result is broken into two 64-bit pieces which
+are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+-------------------------------------------------------------------------------
+*/
+INLINE void
+ add128(
+     bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+    bits64 z1;
+
+    z1 = a1 + b1;
+    *z1Ptr = z1;
+    *z0Ptr = a0 + b0 + ( z1 < a1 );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
+192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
+modulo 2^192, so any carry out is lost.  The result is broken into three
+64-bit pieces which are stored at the locations pointed to by `z0Ptr',
+`z1Ptr', and `z2Ptr'.
+-------------------------------------------------------------------------------
+*/
+INLINE void
+ add192(
+     bits64 a0,
+     bits64 a1,
+     bits64 a2,
+     bits64 b0,
+     bits64 b1,
+     bits64 b2,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr
+ )
+{
+    bits64 z0, z1, z2;
+    int8 carry0, carry1;
+
+    z2 = a2 + b2;
+    carry1 = ( z2 < a2 );
+    z1 = a1 + b1;
+    carry0 = ( z1 < a1 );
+    z0 = a0 + b0;
+    z1 += carry1;
+    z0 += ( z1 < carry1 );
+    z0 += carry0;
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
+128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
+2^128, so any borrow out (carry out) is lost.  The result is broken into two
+64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
+`z1Ptr'.
+-------------------------------------------------------------------------------
+*/
+INLINE void
+ sub128(
+     bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+
+    *z1Ptr = a1 - b1;
+    *z0Ptr = a0 - b0 - ( a1 < b1 );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
+from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
+Subtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
+result is broken into three 64-bit pieces which are stored at the locations
+pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
+-------------------------------------------------------------------------------
+*/
+INLINE void
+ sub192(
+     bits64 a0,
+     bits64 a1,
+     bits64 a2,
+     bits64 b0,
+     bits64 b1,
+     bits64 b2,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr
+ )
+{
+    bits64 z0, z1, z2;
+    int8 borrow0, borrow1;
+
+    z2 = a2 - b2;
+    borrow1 = ( a2 < b2 );
+    z1 = a1 - b1;
+    borrow0 = ( a1 < b1 );
+    z0 = a0 - b0;
+    z0 -= ( z1 < borrow1 );
+    z1 -= borrow1;
+    z0 -= borrow0;
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
+into two 64-bit pieces which are stored at the locations pointed to by
+`z0Ptr' and `z1Ptr'.
+-------------------------------------------------------------------------------
+*/
+INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+    bits32 aHigh, aLow, bHigh, bLow;
+    bits64 z0, zMiddleA, zMiddleB, z1;
+
+    aLow = a;
+    aHigh = a>>32;
+    bLow = b;
+    bHigh = b>>32;
+    z1 = ( (bits64) aLow ) * bLow;
+    zMiddleA = ( (bits64) aLow ) * bHigh;
+    zMiddleB = ( (bits64) aHigh ) * bLow;
+    z0 = ( (bits64) aHigh ) * bHigh;
+    zMiddleA += zMiddleB;
+    z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
+    zMiddleA <<= 32;
+    z1 += zMiddleA;
+    z0 += ( z1 < zMiddleA );
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Multiplies the 128-bit value formed by concatenating `a0' and `a1' by `b' to
+obtain a 192-bit product.  The product is broken into three 64-bit pieces
+which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
+`z2Ptr'.
+-------------------------------------------------------------------------------
+*/
+INLINE void
+ mul128By64To192(
+     bits64 a0,
+     bits64 a1,
+     bits64 b,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr
+ )
+{
+    bits64 z0, z1, z2, more1;
+
+    mul64To128( a1, b, &z1, &z2 );
+    mul64To128( a0, b, &z0, &more1 );
+    add128( z0, more1, 0, z1, &z0, &z1 );
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
+128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
+product.  The product is broken into four 64-bit pieces which are stored at
+the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
+-------------------------------------------------------------------------------
+*/
+INLINE void
+ mul128To256(
+     bits64 a0,
+     bits64 a1,
+     bits64 b0,
+     bits64 b1,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr,
+     bits64 *z3Ptr
+ )
+{
+    bits64 z0, z1, z2, z3;
+    bits64 more1, more2;
+
+    mul64To128( a1, b1, &z2, &z3 );
+    mul64To128( a1, b0, &z1, &more2 );
+    add128( z1, more2, 0, z2, &z1, &z2 );
+    mul64To128( a0, b0, &z0, &more1 );
+    add128( z0, more1, 0, z1, &z0, &z1 );
+    mul64To128( a0, b1, &more1, &more2 );
+    add128( more1, more2, 0, z2, &more1, &z2 );
+    add128( z0, z1, 0, more1, &z0, &z1 );
+    *z3Ptr = z3;
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns an approximation to the 64-bit integer quotient obtained by dividing
+`b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
+divisor `b' must be at least 2^63.  If q is the exact quotient truncated
+toward zero, the approximation returned lies between q and q + 2 inclusive.
+If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
+unsigned integer is returned.
+-------------------------------------------------------------------------------
+*/
+static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
+{
+    bits64 b0, b1;
+    bits64 rem0, rem1, term0, term1;
+    bits64 z;
+    if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
+    b0 = b>>32;
+    z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
+    mul64To128( b, z, &term0, &term1 );
+    sub128( a0, a1, term0, term1, &rem0, &rem1 );
+    while ( ( (sbits64) rem0 ) < 0 ) {
+        z -= LIT64( 0x100000000 );
+        b1 = b<<32;
+        add128( rem0, rem1, b0, b1, &rem0, &rem1 );
+    }
+    rem0 = ( rem0<<32 ) | ( rem1>>32 );
+    z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
+    return z;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns an approximation to the square root of the 32-bit significand given
+by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
+`aExp' (the least significant bit) is 1, the integer returned approximates
+2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
+is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
+case, the approximation returned lies strictly within +/-2 of the exact
+value.
+-------------------------------------------------------------------------------
+*/
+static bits32 estimateSqrt32( int16 aExp, bits32 a )
+{
+    static const bits16 sqrtOddAdjustments[] = {
+        0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
+        0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
+    };
+    static const bits16 sqrtEvenAdjustments[] = {
+        0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
+        0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
+    };
+    int8 index;
+    bits32 z;
+
+    index = ( a>>27 ) & 15;
+    if ( aExp & 1 ) {
+        z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
+        z = ( ( a / z )<<14 ) + ( z<<15 );
+        a >>= 1;
+    }
+    else {
+        z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
+        z = a / z + z;
+        z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
+        if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
+    }
+    return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the number of leading 0 bits before the most-significant 1 bit
+of `a'.  If `a' is zero, 32 is returned.
+-------------------------------------------------------------------------------
+*/
+static int8 countLeadingZeros32( bits32 a )
+{
+    static const int8 countLeadingZerosHigh[] = {
+        8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+    };
+    int8 shiftCount;
+
+    shiftCount = 0;
+    if ( a < 0x10000 ) {
+        shiftCount += 16;
+        a <<= 16;
+    }
+    if ( a < 0x1000000 ) {
+        shiftCount += 8;
+        a <<= 8;
+    }
+    shiftCount += countLeadingZerosHigh[ a>>24 ];
+    return shiftCount;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the number of leading 0 bits before the most-significant 1 bit
+of `a'.  If `a' is zero, 64 is returned.
+-------------------------------------------------------------------------------
+*/
+static int8 countLeadingZeros64( bits64 a )
+{
+    int8 shiftCount;
+
+    shiftCount = 0;
+    if ( a < ( (bits64) 1 )<<32 ) {
+        shiftCount += 32;
+    }
+    else {
+        a >>= 32;
+    }
+    shiftCount += countLeadingZeros32( a );
+    return shiftCount;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
+is equal to the 128-bit value formed by concatenating `b0' and `b1'.
+Otherwise, returns 0.
+-------------------------------------------------------------------------------
+*/
+INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
+{
+
+    return ( a0 == b0 ) && ( a1 == b1 );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
+than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
+Otherwise, returns 0.
+-------------------------------------------------------------------------------
+*/
+INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
+{
+
+    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
+than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
+returns 0.
+-------------------------------------------------------------------------------
+*/
+INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
+{
+
+    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
+not equal to the 128-bit value formed by concatenating `b0' and `b1'.
+Otherwise, returns 0.
+-------------------------------------------------------------------------------
+*/
+INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
+{
+
+    return ( a0 != b0 ) || ( a1 != b1 );
+
+}
+
diff --git a/arch/arm26/nwfpe/softfloat-specialize b/arch/arm26/nwfpe/softfloat-specialize
new file mode 100644
index 00000000000..acf40914476
--- /dev/null
+++ b/arch/arm26/nwfpe/softfloat-specialize
@@ -0,0 +1,366 @@
+
+/*
+===============================================================================
+
+This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
+arithmetic/softfloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
+has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
+TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
+PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
+AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) they include prominent notice that the work is derivative, and (2) they
+include prominent notice akin to these three paragraphs for those parts of
+this code that are retained.
+
+===============================================================================
+*/
+
+/*
+-------------------------------------------------------------------------------
+Underflow tininess-detection mode, statically initialized to default value.
+(The declaration in `softfloat.h' must match the `int8' type here.)
+-------------------------------------------------------------------------------
+*/
+int8 float_detect_tininess = float_tininess_after_rounding;
+
+/*
+-------------------------------------------------------------------------------
+Raises the exceptions specified by `flags'.  Floating-point traps can be
+defined here if desired.  It is currently not possible for such a trap to
+substitute a result value.  If traps are not implemented, this routine
+should be simply `float_exception_flags |= flags;'.
+
+ScottB:  November 4, 1998
+Moved this function out of softfloat-specialize into fpmodule.c.
+This effectively isolates all the changes required for integrating with the
+Linux kernel into fpmodule.c.  Porting to NetBSD should only require modifying
+fpmodule.c to integrate with the NetBSD kernel (I hope!).
+-------------------------------------------------------------------------------
+void float_raise( int8 flags )
+{
+    float_exception_flags |= flags;
+}
+*/
+
+/*
+-------------------------------------------------------------------------------
+Internal canonical NaN format.
+-------------------------------------------------------------------------------
+*/
+typedef struct {
+    flag sign;
+    bits64 high, low;
+} commonNaNT;
+
+/*
+-------------------------------------------------------------------------------
+The pattern for a default generated single-precision NaN.
+-------------------------------------------------------------------------------
+*/
+#define float32_default_nan 0xFFFFFFFF
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the single-precision floating-point value `a' is a NaN;
+otherwise returns 0.
+-------------------------------------------------------------------------------
+*/
+flag float32_is_nan( float32 a )
+{
+
+    return ( 0xFF000000 < (bits32) ( a<<1 ) );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the single-precision floating-point value `a' is a signaling
+NaN; otherwise returns 0.
+-------------------------------------------------------------------------------
+*/
+flag float32_is_signaling_nan( float32 a )
+{
+
+    return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the single-precision floating-point NaN
+`a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
+exception is raised.
+-------------------------------------------------------------------------------
+*/
+static commonNaNT float32ToCommonNaN( float32 a )
+{
+    commonNaNT z;
+
+    if ( float32_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
+    z.sign = a>>31;
+    z.low = 0;
+    z.high = ( (bits64) a )<<41;
+    return z;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the canonical NaN `a' to the single-
+precision floating-point format.
+-------------------------------------------------------------------------------
+*/
+static float32 commonNaNToFloat32( commonNaNT a )
+{
+
+    return ( ( (bits32) a.sign )<<31 ) | 0x7FC00000 | ( a.high>>41 );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Takes two single-precision floating-point values `a' and `b', one of which
+is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
+signaling NaN, the invalid exception is raised.
+-------------------------------------------------------------------------------
+*/
+static float32 propagateFloat32NaN( float32 a, float32 b )
+{
+    flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+
+    aIsNaN = float32_is_nan( a );
+    aIsSignalingNaN = float32_is_signaling_nan( a );
+    bIsNaN = float32_is_nan( b );
+    bIsSignalingNaN = float32_is_signaling_nan( b );
+    a |= 0x00400000;
+    b |= 0x00400000;
+    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
+    if ( aIsNaN ) {
+        return ( aIsSignalingNaN & bIsNaN ) ? b : a;
+    }
+    else {
+        return b;
+    }
+
+}
+
+/*
+-------------------------------------------------------------------------------
+The pattern for a default generated double-precision NaN.
+-------------------------------------------------------------------------------
+*/
+#define float64_default_nan LIT64( 0xFFFFFFFFFFFFFFFF )
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the double-precision floating-point value `a' is a NaN;
+otherwise returns 0.
+-------------------------------------------------------------------------------
+*/
+flag float64_is_nan( float64 a )
+{
+
+    return ( LIT64( 0xFFE0000000000000 ) < (bits64) ( a<<1 ) );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the double-precision floating-point value `a' is a signaling
+NaN; otherwise returns 0.
+-------------------------------------------------------------------------------
+*/
+flag float64_is_signaling_nan( float64 a )
+{
+
+    return
+           ( ( ( a>>51 ) & 0xFFF ) == 0xFFE )
+        && ( a & LIT64( 0x0007FFFFFFFFFFFF ) );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the double-precision floating-point NaN
+`a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
+exception is raised.
+-------------------------------------------------------------------------------
+*/
+static commonNaNT float64ToCommonNaN( float64 a )
+{
+    commonNaNT z;
+
+    if ( float64_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
+    z.sign = a>>63;
+    z.low = 0;
+    z.high = a<<12;
+    return z;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the canonical NaN `a' to the double-
+precision floating-point format.
+-------------------------------------------------------------------------------
+*/
+static float64 commonNaNToFloat64( commonNaNT a )
+{
+
+    return
+          ( ( (bits64) a.sign )<<63 )
+        | LIT64( 0x7FF8000000000000 )
+        | ( a.high>>12 );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Takes two double-precision floating-point values `a' and `b', one of which
+is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
+signaling NaN, the invalid exception is raised.
+-------------------------------------------------------------------------------
+*/
+static float64 propagateFloat64NaN( float64 a, float64 b )
+{
+    flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+
+    aIsNaN = float64_is_nan( a );
+    aIsSignalingNaN = float64_is_signaling_nan( a );
+    bIsNaN = float64_is_nan( b );
+    bIsSignalingNaN = float64_is_signaling_nan( b );
+    a |= LIT64( 0x0008000000000000 );
+    b |= LIT64( 0x0008000000000000 );
+    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
+    if ( aIsNaN ) {
+        return ( aIsSignalingNaN & bIsNaN ) ? b : a;
+    }
+    else {
+        return b;
+    }
+
+}
+
+#ifdef FLOATX80
+
+/*
+-------------------------------------------------------------------------------
+The pattern for a default generated extended double-precision NaN.  The
+`high' and `low' values hold the most- and least-significant bits,
+respectively.
+-------------------------------------------------------------------------------
+*/
+#define floatx80_default_nan_high 0xFFFF
+#define floatx80_default_nan_low  LIT64( 0xFFFFFFFFFFFFFFFF )
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the extended double-precision floating-point value `a' is a
+NaN; otherwise returns 0.
+-------------------------------------------------------------------------------
+*/
+flag floatx80_is_nan( floatx80 a )
+{
+
+    return ( ( a.high & 0x7FFF ) == 0x7FFF ) && (bits64) ( a.low<<1 );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the extended double-precision floating-point value `a' is a
+signaling NaN; otherwise returns 0.
+-------------------------------------------------------------------------------
+*/
+flag floatx80_is_signaling_nan( floatx80 a )
+{
+    //register int lr;
+    bits64 aLow;
+
+    //__asm__("mov %0, lr" : : "g" (lr));
+    //fp_printk("floatx80_is_signalling_nan() called from 0x%08x\n",lr);
+    aLow = a.low & ~ LIT64( 0x4000000000000000 );
+    return
+           ( ( a.high & 0x7FFF ) == 0x7FFF )
+        && (bits64) ( aLow<<1 )
+        && ( a.low == aLow );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the extended double-precision floating-
+point NaN `a' to the canonical NaN format.  If `a' is a signaling NaN, the
+invalid exception is raised.
+-------------------------------------------------------------------------------
+*/
+static commonNaNT floatx80ToCommonNaN( floatx80 a )
+{
+    commonNaNT z;
+
+    if ( floatx80_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
+    z.sign = a.high>>15;
+    z.low = 0;
+    z.high = a.low<<1;
+    return z;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the canonical NaN `a' to the extended
+double-precision floating-point format.
+-------------------------------------------------------------------------------
+*/
+static floatx80 commonNaNToFloatx80( commonNaNT a )
+{
+    floatx80 z;
+
+    z.low = LIT64( 0xC000000000000000 ) | ( a.high>>1 );
+    z.high = ( ( (bits16) a.sign )<<15 ) | 0x7FFF;
+    return z;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Takes two extended double-precision floating-point values `a' and `b', one
+of which is a NaN, and returns the appropriate NaN result.  If either `a' or
+`b' is a signaling NaN, the invalid exception is raised.
+-------------------------------------------------------------------------------
+*/
+static floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b )
+{
+    flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+
+    aIsNaN = floatx80_is_nan( a );
+    aIsSignalingNaN = floatx80_is_signaling_nan( a );
+    bIsNaN = floatx80_is_nan( b );
+    bIsSignalingNaN = floatx80_is_signaling_nan( b );
+    a.low |= LIT64( 0xC000000000000000 );
+    b.low |= LIT64( 0xC000000000000000 );
+    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
+    if ( aIsNaN ) {
+        return ( aIsSignalingNaN & bIsNaN ) ? b : a;
+    }
+    else {
+        return b;
+    }
+
+}
+
+#endif
diff --git a/arch/arm26/nwfpe/softfloat.c b/arch/arm26/nwfpe/softfloat.c
new file mode 100644
index 00000000000..26c1b916e52
--- /dev/null
+++ b/arch/arm26/nwfpe/softfloat.c
@@ -0,0 +1,3439 @@
+/*
+===============================================================================
+
+This C source file is part of the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
+arithmetic/softfloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
+has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
+TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
+PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
+AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) they include prominent notice that the work is derivative, and (2) they
+include prominent notice akin to these three paragraphs for those parts of
+this code that are retained.
+
+===============================================================================
+*/
+
+#include "fpa11.h"
+#include "milieu.h"
+#include "softfloat.h"
+
+/*
+-------------------------------------------------------------------------------
+Floating-point rounding mode, extended double-precision rounding precision,
+and exception flags.
+-------------------------------------------------------------------------------
+*/
+int8 float_rounding_mode = float_round_nearest_even;
+int8 floatx80_rounding_precision = 80;
+int8 float_exception_flags;
+
+/*
+-------------------------------------------------------------------------------
+Primitive arithmetic functions, including multi-word arithmetic, and
+division and square root approximations.  (Can be specialized to target if
+desired.)
+-------------------------------------------------------------------------------
+*/
+#include "softfloat-macros"
+
+/*
+-------------------------------------------------------------------------------
+Functions and definitions to determine:  (1) whether tininess for underflow
+is detected before or after rounding by default, (2) what (if anything)
+happens when exceptions are raised, (3) how signaling NaNs are distinguished
+from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
+are propagated from function inputs to output.  These details are target-
+specific.
+-------------------------------------------------------------------------------
+*/
+#include "softfloat-specialize"
+
+/*
+-------------------------------------------------------------------------------
+Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
+and 7, and returns the properly rounded 32-bit integer corresponding to the
+input.  If `zSign' is nonzero, the input is negated before being converted
+to an integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point
+input is simply rounded to an integer, with the inexact exception raised if
+the input cannot be represented exactly as an integer.  If the fixed-point
+input is too large, however, the invalid exception is raised and the largest
+positive or negative integer is returned.
+-------------------------------------------------------------------------------
+*/
+static int32 roundAndPackInt32( flag zSign, bits64 absZ )
+{
+    int8 roundingMode;
+    flag roundNearestEven;
+    int8 roundIncrement, roundBits;
+    int32 z;
+
+    roundingMode = float_rounding_mode;
+    roundNearestEven = ( roundingMode == float_round_nearest_even );
+    roundIncrement = 0x40;
+    if ( ! roundNearestEven ) {
+        if ( roundingMode == float_round_to_zero ) {
+            roundIncrement = 0;
+        }
+        else {
+            roundIncrement = 0x7F;
+            if ( zSign ) {
+                if ( roundingMode == float_round_up ) roundIncrement = 0;
+            }
+            else {
+                if ( roundingMode == float_round_down ) roundIncrement = 0;
+            }
+        }
+    }
+    roundBits = absZ & 0x7F;
+    absZ = ( absZ + roundIncrement )>>7;
+    absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
+    z = absZ;
+    if ( zSign ) z = - z;
+    if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
+        float_exception_flags |= float_flag_invalid;
+        return zSign ? 0x80000000 : 0x7FFFFFFF;
+    }
+    if ( roundBits ) float_exception_flags |= float_flag_inexact;
+    return z;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the fraction bits of the single-precision floating-point value `a'.
+-------------------------------------------------------------------------------
+*/
+INLINE bits32 extractFloat32Frac( float32 a )
+{
+
+    return a & 0x007FFFFF;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the exponent bits of the single-precision floating-point value `a'.
+-------------------------------------------------------------------------------
+*/
+INLINE int16 extractFloat32Exp( float32 a )
+{
+
+    return ( a>>23 ) & 0xFF;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the sign bit of the single-precision floating-point value `a'.
+-------------------------------------------------------------------------------
+*/
+INLINE flag extractFloat32Sign( float32 a )
+{
+
+    return a>>31;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Normalizes the subnormal single-precision floating-point value represented
+by the denormalized significand `aSig'.  The normalized exponent and
+significand are stored at the locations pointed to by `zExpPtr' and
+`zSigPtr', respectively.
+-------------------------------------------------------------------------------
+*/
+static void
+ normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
+{
+    int8 shiftCount;
+
+    shiftCount = countLeadingZeros32( aSig ) - 8;
+    *zSigPtr = aSig<<shiftCount;
+    *zExpPtr = 1 - shiftCount;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
+single-precision floating-point value, returning the result.  After being
+shifted into the proper positions, the three fields are simply added
+together to form the result.  This means that any integer portion of `zSig'
+will be added into the exponent.  Since a properly normalized significand
+will have an integer portion equal to 1, the `zExp' input should be 1 less
+than the desired result exponent whenever `zSig' is a complete, normalized
+significand.
+-------------------------------------------------------------------------------
+*/
+INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
+{
+#if 0
+   float32 f;
+   __asm__("@ packFloat32;		\n\
+   	    mov %0, %1, asl #31;	\n\
+   	    orr %0, %2, asl #23;	\n\
+   	    orr %0, %3"
+   	    : /* no outputs */
+   	    : "g" (f), "g" (zSign), "g" (zExp), "g" (zSig)
+   	    : "cc");
+   return f;
+#else
+    return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
+#endif 
+}
+
+/*
+-------------------------------------------------------------------------------
+Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+and significand `zSig', and returns the proper single-precision floating-
+point value corresponding to the abstract input.  Ordinarily, the abstract
+value is simply rounded and packed into the single-precision format, with
+the inexact exception raised if the abstract input cannot be represented
+exactly.  If the abstract value is too large, however, the overflow and
+inexact exceptions are raised and an infinity or maximal finite value is
+returned.  If the abstract value is too small, the input value is rounded to
+a subnormal number, and the underflow and inexact exceptions are raised if
+the abstract input cannot be represented exactly as a subnormal single-
+precision floating-point number.
+    The input significand `zSig' has its binary point between bits 30
+and 29, which is 7 bits to the left of the usual location.  This shifted
+significand must be normalized or smaller.  If `zSig' is not normalized,
+`zExp' must be 0; in that case, the result returned is a subnormal number,
+and it must not require rounding.  In the usual case that `zSig' is
+normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
+The handling of underflow and overflow follows the IEC/IEEE Standard for
+Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
+{
+    int8 roundingMode;
+    flag roundNearestEven;
+    int8 roundIncrement, roundBits;
+    flag isTiny;
+
+    roundingMode = float_rounding_mode;
+    roundNearestEven = ( roundingMode == float_round_nearest_even );
+    roundIncrement = 0x40;
+    if ( ! roundNearestEven ) {
+        if ( roundingMode == float_round_to_zero ) {
+            roundIncrement = 0;
+        }
+        else {
+            roundIncrement = 0x7F;
+            if ( zSign ) {
+                if ( roundingMode == float_round_up ) roundIncrement = 0;
+            }
+            else {
+                if ( roundingMode == float_round_down ) roundIncrement = 0;
+            }
+        }
+    }
+    roundBits = zSig & 0x7F;
+    if ( 0xFD <= (bits16) zExp ) {
+        if (    ( 0xFD < zExp )
+             || (    ( zExp == 0xFD )
+                  && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
+           ) {
+            float_raise( float_flag_overflow | float_flag_inexact );
+            return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
+        }
+        if ( zExp < 0 ) {
+            isTiny =
+                   ( float_detect_tininess == float_tininess_before_rounding )
+                || ( zExp < -1 )
+                || ( zSig + roundIncrement < 0x80000000 );
+            shift32RightJamming( zSig, - zExp, &zSig );
+            zExp = 0;
+            roundBits = zSig & 0x7F;
+            if ( isTiny && roundBits ) float_raise( float_flag_underflow );
+        }
+    }
+    if ( roundBits ) float_exception_flags |= float_flag_inexact;
+    zSig = ( zSig + roundIncrement )>>7;
+    zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
+    if ( zSig == 0 ) zExp = 0;
+    return packFloat32( zSign, zExp, zSig );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+and significand `zSig', and returns the proper single-precision floating-
+point value corresponding to the abstract input.  This routine is just like
+`roundAndPackFloat32' except that `zSig' does not have to be normalized in
+any way.  In all cases, `zExp' must be 1 less than the ``true'' floating-
+point exponent.
+-------------------------------------------------------------------------------
+*/
+static float32
+ normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
+{
+    int8 shiftCount;
+
+    shiftCount = countLeadingZeros32( zSig ) - 1;
+    return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the fraction bits of the double-precision floating-point value `a'.
+-------------------------------------------------------------------------------
+*/
+INLINE bits64 extractFloat64Frac( float64 a )
+{
+
+    return a & LIT64( 0x000FFFFFFFFFFFFF );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the exponent bits of the double-precision floating-point value `a'.
+-------------------------------------------------------------------------------
+*/
+INLINE int16 extractFloat64Exp( float64 a )
+{
+
+    return ( a>>52 ) & 0x7FF;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the sign bit of the double-precision floating-point value `a'.
+-------------------------------------------------------------------------------
+*/
+INLINE flag extractFloat64Sign( float64 a )
+{
+
+    return a>>63;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Normalizes the subnormal double-precision floating-point value represented
+by the denormalized significand `aSig'.  The normalized exponent and
+significand are stored at the locations pointed to by `zExpPtr' and
+`zSigPtr', respectively.
+-------------------------------------------------------------------------------
+*/
+static void
+ normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
+{
+    int8 shiftCount;
+
+    shiftCount = countLeadingZeros64( aSig ) - 11;
+    *zSigPtr = aSig<<shiftCount;
+    *zExpPtr = 1 - shiftCount;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
+double-precision floating-point value, returning the result.  After being
+shifted into the proper positions, the three fields are simply added
+together to form the result.  This means that any integer portion of `zSig'
+will be added into the exponent.  Since a properly normalized significand
+will have an integer portion equal to 1, the `zExp' input should be 1 less
+than the desired result exponent whenever `zSig' is a complete, normalized
+significand.
+-------------------------------------------------------------------------------
+*/
+INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
+{
+
+    return ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<52 ) + zSig;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+and significand `zSig', and returns the proper double-precision floating-
+point value corresponding to the abstract input.  Ordinarily, the abstract
+value is simply rounded and packed into the double-precision format, with
+the inexact exception raised if the abstract input cannot be represented
+exactly.  If the abstract value is too large, however, the overflow and
+inexact exceptions are raised and an infinity or maximal finite value is
+returned.  If the abstract value is too small, the input value is rounded to
+a subnormal number, and the underflow and inexact exceptions are raised if
+the abstract input cannot be represented exactly as a subnormal double-
+precision floating-point number.
+    The input significand `zSig' has its binary point between bits 62
+and 61, which is 10 bits to the left of the usual location.  This shifted
+significand must be normalized or smaller.  If `zSig' is not normalized,
+`zExp' must be 0; in that case, the result returned is a subnormal number,
+and it must not require rounding.  In the usual case that `zSig' is
+normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
+The handling of underflow and overflow follows the IEC/IEEE Standard for
+Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
+{
+    int8 roundingMode;
+    flag roundNearestEven;
+    int16 roundIncrement, roundBits;
+    flag isTiny;
+
+    roundingMode = float_rounding_mode;
+    roundNearestEven = ( roundingMode == float_round_nearest_even );
+    roundIncrement = 0x200;
+    if ( ! roundNearestEven ) {
+        if ( roundingMode == float_round_to_zero ) {
+            roundIncrement = 0;
+        }
+        else {
+            roundIncrement = 0x3FF;
+            if ( zSign ) {
+                if ( roundingMode == float_round_up ) roundIncrement = 0;
+            }
+            else {
+                if ( roundingMode == float_round_down ) roundIncrement = 0;
+            }
+        }
+    }
+    roundBits = zSig & 0x3FF;
+    if ( 0x7FD <= (bits16) zExp ) {
+        if (    ( 0x7FD < zExp )
+             || (    ( zExp == 0x7FD )
+                  && ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
+           ) {
+            //register int lr = __builtin_return_address(0);
+            //printk("roundAndPackFloat64 called from 0x%08x\n",lr);
+            float_raise( float_flag_overflow | float_flag_inexact );
+            return packFloat64( zSign, 0x7FF, 0 ) - ( roundIncrement == 0 );
+        }
+        if ( zExp < 0 ) {
+            isTiny =
+                   ( float_detect_tininess == float_tininess_before_rounding )
+                || ( zExp < -1 )
+                || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
+            shift64RightJamming( zSig, - zExp, &zSig );
+            zExp = 0;
+            roundBits = zSig & 0x3FF;
+            if ( isTiny && roundBits ) float_raise( float_flag_underflow );
+        }
+    }
+    if ( roundBits ) float_exception_flags |= float_flag_inexact;
+    zSig = ( zSig + roundIncrement )>>10;
+    zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
+    if ( zSig == 0 ) zExp = 0;
+    return packFloat64( zSign, zExp, zSig );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+and significand `zSig', and returns the proper double-precision floating-
+point value corresponding to the abstract input.  This routine is just like
+`roundAndPackFloat64' except that `zSig' does not have to be normalized in
+any way.  In all cases, `zExp' must be 1 less than the ``true'' floating-
+point exponent.
+-------------------------------------------------------------------------------
+*/
+static float64
+ normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
+{
+    int8 shiftCount;
+
+    shiftCount = countLeadingZeros64( zSig ) - 1;
+    return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount );
+
+}
+
+#ifdef FLOATX80
+
+/*
+-------------------------------------------------------------------------------
+Returns the fraction bits of the extended double-precision floating-point
+value `a'.
+-------------------------------------------------------------------------------
+*/
+INLINE bits64 extractFloatx80Frac( floatx80 a )
+{
+
+    return a.low;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the exponent bits of the extended double-precision floating-point
+value `a'.
+-------------------------------------------------------------------------------
+*/
+INLINE int32 extractFloatx80Exp( floatx80 a )
+{
+
+    return a.high & 0x7FFF;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the sign bit of the extended double-precision floating-point value
+`a'.
+-------------------------------------------------------------------------------
+*/
+INLINE flag extractFloatx80Sign( floatx80 a )
+{
+
+    return a.high>>15;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Normalizes the subnormal extended double-precision floating-point value
+represented by the denormalized significand `aSig'.  The normalized exponent
+and significand are stored at the locations pointed to by `zExpPtr' and
+`zSigPtr', respectively.
+-------------------------------------------------------------------------------
+*/
+static void
+ normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr )
+{
+    int8 shiftCount;
+
+    shiftCount = countLeadingZeros64( aSig );
+    *zSigPtr = aSig<<shiftCount;
+    *zExpPtr = 1 - shiftCount;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
+extended double-precision floating-point value, returning the result.
+-------------------------------------------------------------------------------
+*/
+INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
+{
+    floatx80 z;
+
+    z.low = zSig;
+    z.high = ( ( (bits16) zSign )<<15 ) + zExp;
+    return z;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+and extended significand formed by the concatenation of `zSig0' and `zSig1',
+and returns the proper extended double-precision floating-point value
+corresponding to the abstract input.  Ordinarily, the abstract value is
+rounded and packed into the extended double-precision format, with the
+inexact exception raised if the abstract input cannot be represented
+exactly.  If the abstract value is too large, however, the overflow and
+inexact exceptions are raised and an infinity or maximal finite value is
+returned.  If the abstract value is too small, the input value is rounded to
+a subnormal number, and the underflow and inexact exceptions are raised if
+the abstract input cannot be represented exactly as a subnormal extended
+double-precision floating-point number.
+    If `roundingPrecision' is 32 or 64, the result is rounded to the same
+number of bits as single or double precision, respectively.  Otherwise, the
+result is rounded to the full precision of the extended double-precision
+format.
+    The input significand must be normalized or smaller.  If the input
+significand is not normalized, `zExp' must be 0; in that case, the result
+returned is a subnormal number, and it must not require rounding.  The
+handling of underflow and overflow follows the IEC/IEEE Standard for Binary
+Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+static floatx80
+ roundAndPackFloatx80(
+     int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
+ )
+{
+    int8 roundingMode;
+    flag roundNearestEven, increment, isTiny;
+    int64 roundIncrement, roundMask, roundBits;
+
+    roundingMode = float_rounding_mode;
+    roundNearestEven = ( roundingMode == float_round_nearest_even );
+    if ( roundingPrecision == 80 ) goto precision80;
+    if ( roundingPrecision == 64 ) {
+        roundIncrement = LIT64( 0x0000000000000400 );
+        roundMask = LIT64( 0x00000000000007FF );
+    }
+    else if ( roundingPrecision == 32 ) {
+        roundIncrement = LIT64( 0x0000008000000000 );
+        roundMask = LIT64( 0x000000FFFFFFFFFF );
+    }
+    else {
+        goto precision80;
+    }
+    zSig0 |= ( zSig1 != 0 );
+    if ( ! roundNearestEven ) {
+        if ( roundingMode == float_round_to_zero ) {
+            roundIncrement = 0;
+        }
+        else {
+            roundIncrement = roundMask;
+            if ( zSign ) {
+                if ( roundingMode == float_round_up ) roundIncrement = 0;
+            }
+            else {
+                if ( roundingMode == float_round_down ) roundIncrement = 0;
+            }
+        }
+    }
+    roundBits = zSig0 & roundMask;
+    if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
+        if (    ( 0x7FFE < zExp )
+             || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
+           ) {
+            goto overflow;
+        }
+        if ( zExp <= 0 ) {
+            isTiny =
+                   ( float_detect_tininess == float_tininess_before_rounding )
+                || ( zExp < 0 )
+                || ( zSig0 <= zSig0 + roundIncrement );
+            shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
+            zExp = 0;
+            roundBits = zSig0 & roundMask;
+            if ( isTiny && roundBits ) float_raise( float_flag_underflow );
+            if ( roundBits ) float_exception_flags |= float_flag_inexact;
+            zSig0 += roundIncrement;
+            if ( (sbits64) zSig0 < 0 ) zExp = 1;
+            roundIncrement = roundMask + 1;
+            if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
+                roundMask |= roundIncrement;
+            }
+            zSig0 &= ~ roundMask;
+            return packFloatx80( zSign, zExp, zSig0 );
+        }
+    }
+    if ( roundBits ) float_exception_flags |= float_flag_inexact;
+    zSig0 += roundIncrement;
+    if ( zSig0 < roundIncrement ) {
+        ++zExp;
+        zSig0 = LIT64( 0x8000000000000000 );
+    }
+    roundIncrement = roundMask + 1;
+    if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
+        roundMask |= roundIncrement;
+    }
+    zSig0 &= ~ roundMask;
+    if ( zSig0 == 0 ) zExp = 0;
+    return packFloatx80( zSign, zExp, zSig0 );
+ precision80:
+    increment = ( (sbits64) zSig1 < 0 );
+    if ( ! roundNearestEven ) {
+        if ( roundingMode == float_round_to_zero ) {
+            increment = 0;
+        }
+        else {
+            if ( zSign ) {
+                increment = ( roundingMode == float_round_down ) && zSig1;
+            }
+            else {
+                increment = ( roundingMode == float_round_up ) && zSig1;
+            }
+        }
+    }
+    if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
+        if (    ( 0x7FFE < zExp )
+             || (    ( zExp == 0x7FFE )
+                  && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
+                  && increment
+                )
+           ) {
+            roundMask = 0;
+ overflow:
+            float_raise( float_flag_overflow | float_flag_inexact );
+            if (    ( roundingMode == float_round_to_zero )
+                 || ( zSign && ( roundingMode == float_round_up ) )
+                 || ( ! zSign && ( roundingMode == float_round_down ) )
+               ) {
+                return packFloatx80( zSign, 0x7FFE, ~ roundMask );
+            }
+            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+        }
+        if ( zExp <= 0 ) {
+            isTiny =
+                   ( float_detect_tininess == float_tininess_before_rounding )
+                || ( zExp < 0 )
+                || ! increment
+                || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
+            shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
+            zExp = 0;
+            if ( isTiny && zSig1 ) float_raise( float_flag_underflow );
+            if ( zSig1 ) float_exception_flags |= float_flag_inexact;
+            if ( roundNearestEven ) {
+                increment = ( (sbits64) zSig1 < 0 );
+            }
+            else {
+                if ( zSign ) {
+                    increment = ( roundingMode == float_round_down ) && zSig1;
+                }
+                else {
+                    increment = ( roundingMode == float_round_up ) && zSig1;
+                }
+            }
+            if ( increment ) {
+                ++zSig0;
+                zSig0 &= ~ ( ( zSig1 + zSig1 == 0 ) & roundNearestEven );
+                if ( (sbits64) zSig0 < 0 ) zExp = 1;
+            }
+            return packFloatx80( zSign, zExp, zSig0 );
+        }
+    }
+    if ( zSig1 ) float_exception_flags |= float_flag_inexact;
+    if ( increment ) {
+        ++zSig0;
+        if ( zSig0 == 0 ) {
+            ++zExp;
+            zSig0 = LIT64( 0x8000000000000000 );
+        }
+        else {
+            zSig0 &= ~ ( ( zSig1 + zSig1 == 0 ) & roundNearestEven );
+        }
+    }
+    else {
+        if ( zSig0 == 0 ) zExp = 0;
+    }
+    
+    return packFloatx80( zSign, zExp, zSig0 );
+}
+
+/*
+-------------------------------------------------------------------------------
+Takes an abstract floating-point value having sign `zSign', exponent
+`zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
+and returns the proper extended double-precision floating-point value
+corresponding to the abstract input.  This routine is just like
+`roundAndPackFloatx80' except that the input significand does not have to be
+normalized.
+-------------------------------------------------------------------------------
+*/
+static floatx80
+ normalizeRoundAndPackFloatx80(
+     int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
+ )
+{
+    int8 shiftCount;
+
+    if ( zSig0 == 0 ) {
+        zSig0 = zSig1;
+        zSig1 = 0;
+        zExp -= 64;
+    }
+    shiftCount = countLeadingZeros64( zSig0 );
+    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
+    zExp -= shiftCount;
+    return
+        roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 );
+
+}
+
+#endif
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the 32-bit two's complement integer `a' to
+the single-precision floating-point format.  The conversion is performed
+according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float32 int32_to_float32( int32 a )
+{
+    flag zSign;
+
+    if ( a == 0 ) return 0;
+    if ( a == 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
+    zSign = ( a < 0 );
+    return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the 32-bit two's complement integer `a' to
+the double-precision floating-point format.  The conversion is performed
+according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float64 int32_to_float64( int32 a )
+{
+    flag aSign;
+    uint32 absA;
+    int8 shiftCount;
+    bits64 zSig;
+
+    if ( a == 0 ) return 0;
+    aSign = ( a < 0 );
+    absA = aSign ? - a : a;
+    shiftCount = countLeadingZeros32( absA ) + 21;
+    zSig = absA;
+    return packFloat64( aSign, 0x432 - shiftCount, zSig<<shiftCount );
+
+}
+
+#ifdef FLOATX80
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the 32-bit two's complement integer `a'
+to the extended double-precision floating-point format.  The conversion
+is performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic.
+-------------------------------------------------------------------------------
+*/
+floatx80 int32_to_floatx80( int32 a )
+{
+    flag zSign;
+    uint32 absA;
+    int8 shiftCount;
+    bits64 zSig;
+
+    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
+    zSign = ( a < 0 );
+    absA = zSign ? - a : a;
+    shiftCount = countLeadingZeros32( absA ) + 32;
+    zSig = absA;
+    return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
+
+}
+
+#endif
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the single-precision floating-point value
+`a' to the 32-bit two's complement integer format.  The conversion is
+performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic---which means in particular that the conversion is rounded
+according to the current rounding mode.  If `a' is a NaN, the largest
+positive integer is returned.  Otherwise, if the conversion overflows, the
+largest integer with the same sign as `a' is returned.
+-------------------------------------------------------------------------------
+*/
+int32 float32_to_int32( float32 a )
+{
+    flag aSign;
+    int16 aExp, shiftCount;
+    bits32 aSig;
+    bits64 zSig;
+
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    aSign = extractFloat32Sign( a );
+    if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
+    if ( aExp ) aSig |= 0x00800000;
+    shiftCount = 0xAF - aExp;
+    zSig = aSig;
+    zSig <<= 32;
+    if ( 0 < shiftCount ) shift64RightJamming( zSig, shiftCount, &zSig );
+    return roundAndPackInt32( aSign, zSig );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the single-precision floating-point value
+`a' to the 32-bit two's complement integer format.  The conversion is
+performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic, except that the conversion is always rounded toward zero.  If
+`a' is a NaN, the largest positive integer is returned.  Otherwise, if the
+conversion overflows, the largest integer with the same sign as `a' is
+returned.
+-------------------------------------------------------------------------------
+*/
+int32 float32_to_int32_round_to_zero( float32 a )
+{
+    flag aSign;
+    int16 aExp, shiftCount;
+    bits32 aSig;
+    int32 z;
+
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    aSign = extractFloat32Sign( a );
+    shiftCount = aExp - 0x9E;
+    if ( 0 <= shiftCount ) {
+        if ( a == 0xCF000000 ) return 0x80000000;
+        float_raise( float_flag_invalid );
+        if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
+        return 0x80000000;
+    }
+    else if ( aExp <= 0x7E ) {
+        if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
+        return 0;
+    }
+    aSig = ( aSig | 0x00800000 )<<8;
+    z = aSig>>( - shiftCount );
+    if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
+        float_exception_flags |= float_flag_inexact;
+    }
+    return aSign ? - z : z;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the single-precision floating-point value
+`a' to the double-precision floating-point format.  The conversion is
+performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float64 float32_to_float64( float32 a )
+{
+    flag aSign;
+    int16 aExp;
+    bits32 aSig;
+
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    aSign = extractFloat32Sign( a );
+    if ( aExp == 0xFF ) {
+        if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
+        return packFloat64( aSign, 0x7FF, 0 );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
+        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+        --aExp;
+    }
+    return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
+
+}
+
+#ifdef FLOATX80
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the single-precision floating-point value
+`a' to the extended double-precision floating-point format.  The conversion
+is performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic.
+-------------------------------------------------------------------------------
+*/
+floatx80 float32_to_floatx80( float32 a )
+{
+    flag aSign;
+    int16 aExp;
+    bits32 aSig;
+
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    aSign = extractFloat32Sign( a );
+    if ( aExp == 0xFF ) {
+        if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) );
+        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
+        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+    }
+    aSig |= 0x00800000;
+    return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
+
+}
+
+#endif
+
+/*
+-------------------------------------------------------------------------------
+Rounds the single-precision floating-point value `a' to an integer, and
+returns the result as a single-precision floating-point value.  The
+operation is performed according to the IEC/IEEE Standard for Binary
+Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float32 float32_round_to_int( float32 a )
+{
+    flag aSign;
+    int16 aExp;
+    bits32 lastBitMask, roundBitsMask;
+    int8 roundingMode;
+    float32 z;
+
+    aExp = extractFloat32Exp( a );
+    if ( 0x96 <= aExp ) {
+        if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
+            return propagateFloat32NaN( a, a );
+        }
+        return a;
+    }
+    if ( aExp <= 0x7E ) {
+        if ( (bits32) ( a<<1 ) == 0 ) return a;
+        float_exception_flags |= float_flag_inexact;
+        aSign = extractFloat32Sign( a );
+        switch ( float_rounding_mode ) {
+         case float_round_nearest_even:
+            if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
+                return packFloat32( aSign, 0x7F, 0 );
+            }
+            break;
+         case float_round_down:
+            return aSign ? 0xBF800000 : 0;
+         case float_round_up:
+            return aSign ? 0x80000000 : 0x3F800000;
+        }
+        return packFloat32( aSign, 0, 0 );
+    }
+    lastBitMask = 1;
+    lastBitMask <<= 0x96 - aExp;
+    roundBitsMask = lastBitMask - 1;
+    z = a;
+    roundingMode = float_rounding_mode;
+    if ( roundingMode == float_round_nearest_even ) {
+        z += lastBitMask>>1;
+        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
+    }
+    else if ( roundingMode != float_round_to_zero ) {
+        if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
+            z += roundBitsMask;
+        }
+    }
+    z &= ~ roundBitsMask;
+    if ( z != a ) float_exception_flags |= float_flag_inexact;
+    return z;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of adding the absolute values of the single-precision
+floating-point values `a' and `b'.  If `zSign' is true, the sum is negated
+before being returned.  `zSign' is ignored if the result is a NaN.  The
+addition is performed according to the IEC/IEEE Standard for Binary
+Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
+{
+    int16 aExp, bExp, zExp;
+    bits32 aSig, bSig, zSig;
+    int16 expDiff;
+
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    bSig = extractFloat32Frac( b );
+    bExp = extractFloat32Exp( b );
+    expDiff = aExp - bExp;
+    aSig <<= 6;
+    bSig <<= 6;
+    if ( 0 < expDiff ) {
+        if ( aExp == 0xFF ) {
+            if ( aSig ) return propagateFloat32NaN( a, b );
+            return a;
+        }
+        if ( bExp == 0 ) {
+            --expDiff;
+        }
+        else {
+            bSig |= 0x20000000;
+        }
+        shift32RightJamming( bSig, expDiff, &bSig );
+        zExp = aExp;
+    }
+    else if ( expDiff < 0 ) {
+        if ( bExp == 0xFF ) {
+            if ( bSig ) return propagateFloat32NaN( a, b );
+            return packFloat32( zSign, 0xFF, 0 );
+        }
+        if ( aExp == 0 ) {
+            ++expDiff;
+        }
+        else {
+            aSig |= 0x20000000;
+        }
+        shift32RightJamming( aSig, - expDiff, &aSig );
+        zExp = bExp;
+    }
+    else {
+        if ( aExp == 0xFF ) {
+            if ( aSig | bSig ) return propagateFloat32NaN( a, b );
+            return a;
+        }
+        if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
+        zSig = 0x40000000 + aSig + bSig;
+        zExp = aExp;
+        goto roundAndPack;
+    }
+    aSig |= 0x20000000;
+    zSig = ( aSig + bSig )<<1;
+    --zExp;
+    if ( (sbits32) zSig < 0 ) {
+        zSig = aSig + bSig;
+        ++zExp;
+    }
+ roundAndPack:
+    return roundAndPackFloat32( zSign, zExp, zSig );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of subtracting the absolute values of the single-
+precision floating-point values `a' and `b'.  If `zSign' is true, the
+difference is negated before being returned.  `zSign' is ignored if the
+result is a NaN.  The subtraction is performed according to the IEC/IEEE
+Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
+{
+    int16 aExp, bExp, zExp;
+    bits32 aSig, bSig, zSig;
+    int16 expDiff;
+
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    bSig = extractFloat32Frac( b );
+    bExp = extractFloat32Exp( b );
+    expDiff = aExp - bExp;
+    aSig <<= 7;
+    bSig <<= 7;
+    if ( 0 < expDiff ) goto aExpBigger;
+    if ( expDiff < 0 ) goto bExpBigger;
+    if ( aExp == 0xFF ) {
+        if ( aSig | bSig ) return propagateFloat32NaN( a, b );
+        float_raise( float_flag_invalid );
+        return float32_default_nan;
+    }
+    if ( aExp == 0 ) {
+        aExp = 1;
+        bExp = 1;
+    }
+    if ( bSig < aSig ) goto aBigger;
+    if ( aSig < bSig ) goto bBigger;
+    return packFloat32( float_rounding_mode == float_round_down, 0, 0 );
+ bExpBigger:
+    if ( bExp == 0xFF ) {
+        if ( bSig ) return propagateFloat32NaN( a, b );
+        return packFloat32( zSign ^ 1, 0xFF, 0 );
+    }
+    if ( aExp == 0 ) {
+        ++expDiff;
+    }
+    else {
+        aSig |= 0x40000000;
+    }
+    shift32RightJamming( aSig, - expDiff, &aSig );
+    bSig |= 0x40000000;
+ bBigger:
+    zSig = bSig - aSig;
+    zExp = bExp;
+    zSign ^= 1;
+    goto normalizeRoundAndPack;
+ aExpBigger:
+    if ( aExp == 0xFF ) {
+        if ( aSig ) return propagateFloat32NaN( a, b );
+        return a;
+    }
+    if ( bExp == 0 ) {
+        --expDiff;
+    }
+    else {
+        bSig |= 0x40000000;
+    }
+    shift32RightJamming( bSig, expDiff, &bSig );
+    aSig |= 0x40000000;
+ aBigger:
+    zSig = aSig - bSig;
+    zExp = aExp;
+ normalizeRoundAndPack:
+    --zExp;
+    return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of adding the single-precision floating-point values `a'
+and `b'.  The operation is performed according to the IEC/IEEE Standard for
+Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float32 float32_add( float32 a, float32 b )
+{
+    flag aSign, bSign;
+
+    aSign = extractFloat32Sign( a );
+    bSign = extractFloat32Sign( b );
+    if ( aSign == bSign ) {
+        return addFloat32Sigs( a, b, aSign );
+    }
+    else {
+        return subFloat32Sigs( a, b, aSign );
+    }
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of subtracting the single-precision floating-point values
+`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float32 float32_sub( float32 a, float32 b )
+{
+    flag aSign, bSign;
+
+    aSign = extractFloat32Sign( a );
+    bSign = extractFloat32Sign( b );
+    if ( aSign == bSign ) {
+        return subFloat32Sigs( a, b, aSign );
+    }
+    else {
+        return addFloat32Sigs( a, b, aSign );
+    }
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of multiplying the single-precision floating-point values
+`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float32 float32_mul( float32 a, float32 b )
+{
+    flag aSign, bSign, zSign;
+    int16 aExp, bExp, zExp;
+    bits32 aSig, bSig;
+    bits64 zSig64;
+    bits32 zSig;
+
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    aSign = extractFloat32Sign( a );
+    bSig = extractFloat32Frac( b );
+    bExp = extractFloat32Exp( b );
+    bSign = extractFloat32Sign( b );
+    zSign = aSign ^ bSign;
+    if ( aExp == 0xFF ) {
+        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
+            return propagateFloat32NaN( a, b );
+        }
+        if ( ( bExp | bSig ) == 0 ) {
+            float_raise( float_flag_invalid );
+            return float32_default_nan;
+        }
+        return packFloat32( zSign, 0xFF, 0 );
+    }
+    if ( bExp == 0xFF ) {
+        if ( bSig ) return propagateFloat32NaN( a, b );
+        if ( ( aExp | aSig ) == 0 ) {
+            float_raise( float_flag_invalid );
+            return float32_default_nan;
+        }
+        return packFloat32( zSign, 0xFF, 0 );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
+        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+    }
+    if ( bExp == 0 ) {
+        if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
+        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
+    }
+    zExp = aExp + bExp - 0x7F;
+    aSig = ( aSig | 0x00800000 )<<7;
+    bSig = ( bSig | 0x00800000 )<<8;
+    shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
+    zSig = zSig64;
+    if ( 0 <= (sbits32) ( zSig<<1 ) ) {
+        zSig <<= 1;
+        --zExp;
+    }
+    return roundAndPackFloat32( zSign, zExp, zSig );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of dividing the single-precision floating-point value `a'
+by the corresponding value `b'.  The operation is performed according to the
+IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float32 float32_div( float32 a, float32 b )
+{
+    flag aSign, bSign, zSign;
+    int16 aExp, bExp, zExp;
+    bits32 aSig, bSig, zSig;
+
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    aSign = extractFloat32Sign( a );
+    bSig = extractFloat32Frac( b );
+    bExp = extractFloat32Exp( b );
+    bSign = extractFloat32Sign( b );
+    zSign = aSign ^ bSign;
+    if ( aExp == 0xFF ) {
+        if ( aSig ) return propagateFloat32NaN( a, b );
+        if ( bExp == 0xFF ) {
+            if ( bSig ) return propagateFloat32NaN( a, b );
+            float_raise( float_flag_invalid );
+            return float32_default_nan;
+        }
+        return packFloat32( zSign, 0xFF, 0 );
+    }
+    if ( bExp == 0xFF ) {
+        if ( bSig ) return propagateFloat32NaN( a, b );
+        return packFloat32( zSign, 0, 0 );
+    }
+    if ( bExp == 0 ) {
+        if ( bSig == 0 ) {
+            if ( ( aExp | aSig ) == 0 ) {
+                float_raise( float_flag_invalid );
+                return float32_default_nan;
+            }
+            float_raise( float_flag_divbyzero );
+            return packFloat32( zSign, 0xFF, 0 );
+        }
+        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
+        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+    }
+    zExp = aExp - bExp + 0x7D;
+    aSig = ( aSig | 0x00800000 )<<7;
+    bSig = ( bSig | 0x00800000 )<<8;
+    if ( bSig <= ( aSig + aSig ) ) {
+        aSig >>= 1;
+        ++zExp;
+    }
+    zSig = ( ( (bits64) aSig )<<32 ) / bSig;
+    if ( ( zSig & 0x3F ) == 0 ) {
+        zSig |= ( ( (bits64) bSig ) * zSig != ( (bits64) aSig )<<32 );
+    }
+    return roundAndPackFloat32( zSign, zExp, zSig );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the remainder of the single-precision floating-point value `a'
+with respect to the corresponding value `b'.  The operation is performed
+according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float32 float32_rem( float32 a, float32 b )
+{
+    flag aSign, bSign, zSign;
+    int16 aExp, bExp, expDiff;
+    bits32 aSig, bSig;
+    bits32 q;
+    bits64 aSig64, bSig64, q64;
+    bits32 alternateASig;
+    sbits32 sigMean;
+
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    aSign = extractFloat32Sign( a );
+    bSig = extractFloat32Frac( b );
+    bExp = extractFloat32Exp( b );
+    bSign = extractFloat32Sign( b );
+    if ( aExp == 0xFF ) {
+        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
+            return propagateFloat32NaN( a, b );
+        }
+        float_raise( float_flag_invalid );
+        return float32_default_nan;
+    }
+    if ( bExp == 0xFF ) {
+        if ( bSig ) return propagateFloat32NaN( a, b );
+        return a;
+    }
+    if ( bExp == 0 ) {
+        if ( bSig == 0 ) {
+            float_raise( float_flag_invalid );
+            return float32_default_nan;
+        }
+        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return a;
+        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+    }
+    expDiff = aExp - bExp;
+    aSig |= 0x00800000;
+    bSig |= 0x00800000;
+    if ( expDiff < 32 ) {
+        aSig <<= 8;
+        bSig <<= 8;
+        if ( expDiff < 0 ) {
+            if ( expDiff < -1 ) return a;
+            aSig >>= 1;
+        }
+        q = ( bSig <= aSig );
+        if ( q ) aSig -= bSig;
+        if ( 0 < expDiff ) {
+            q = ( ( (bits64) aSig )<<32 ) / bSig;
+            q >>= 32 - expDiff;
+            bSig >>= 2;
+            aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
+        }
+        else {
+            aSig >>= 2;
+            bSig >>= 2;
+        }
+    }
+    else {
+        if ( bSig <= aSig ) aSig -= bSig;
+        aSig64 = ( (bits64) aSig )<<40;
+        bSig64 = ( (bits64) bSig )<<40;
+        expDiff -= 64;
+        while ( 0 < expDiff ) {
+            q64 = estimateDiv128To64( aSig64, 0, bSig64 );
+            q64 = ( 2 < q64 ) ? q64 - 2 : 0;
+            aSig64 = - ( ( bSig * q64 )<<38 );
+            expDiff -= 62;
+        }
+        expDiff += 64;
+        q64 = estimateDiv128To64( aSig64, 0, bSig64 );
+        q64 = ( 2 < q64 ) ? q64 - 2 : 0;
+        q = q64>>( 64 - expDiff );
+        bSig <<= 6;
+        aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
+    }
+    do {
+        alternateASig = aSig;
+        ++q;
+        aSig -= bSig;
+    } while ( 0 <= (sbits32) aSig );
+    sigMean = aSig + alternateASig;
+    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
+        aSig = alternateASig;
+    }
+    zSign = ( (sbits32) aSig < 0 );
+    if ( zSign ) aSig = - aSig;
+    return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the square root of the single-precision floating-point value `a'.
+The operation is performed according to the IEC/IEEE Standard for Binary
+Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float32 float32_sqrt( float32 a )
+{
+    flag aSign;
+    int16 aExp, zExp;
+    bits32 aSig, zSig;
+    bits64 rem, term;
+
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    aSign = extractFloat32Sign( a );
+    if ( aExp == 0xFF ) {
+        if ( aSig ) return propagateFloat32NaN( a, 0 );
+        if ( ! aSign ) return a;
+        float_raise( float_flag_invalid );
+        return float32_default_nan;
+    }
+    if ( aSign ) {
+        if ( ( aExp | aSig ) == 0 ) return a;
+        float_raise( float_flag_invalid );
+        return float32_default_nan;
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return 0;
+        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+    }
+    zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
+    aSig = ( aSig | 0x00800000 )<<8;
+    zSig = estimateSqrt32( aExp, aSig ) + 2;
+    if ( ( zSig & 0x7F ) <= 5 ) {
+        if ( zSig < 2 ) {
+            zSig = 0xFFFFFFFF;
+        }
+        else {
+            aSig >>= aExp & 1;
+            term = ( (bits64) zSig ) * zSig;
+            rem = ( ( (bits64) aSig )<<32 ) - term;
+            while ( (sbits64) rem < 0 ) {
+                --zSig;
+                rem += ( ( (bits64) zSig )<<1 ) | 1;
+            }
+            zSig |= ( rem != 0 );
+        }
+    }
+    shift32RightJamming( zSig, 1, &zSig );
+    return roundAndPackFloat32( 0, zExp, zSig );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the single-precision floating-point value `a' is equal to the
+corresponding value `b', and 0 otherwise.  The comparison is performed
+according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag float32_eq( float32 a, float32 b )
+{
+
+    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+       ) {
+        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid );
+        }
+        return 0;
+    }
+    return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the single-precision floating-point value `a' is less than or
+equal to the corresponding value `b', and 0 otherwise.  The comparison is
+performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag float32_le( float32 a, float32 b )
+{
+    flag aSign, bSign;
+
+    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+       ) {
+        float_raise( float_flag_invalid );
+        return 0;
+    }
+    aSign = extractFloat32Sign( a );
+    bSign = extractFloat32Sign( b );
+    if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
+    return ( a == b ) || ( aSign ^ ( a < b ) );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the single-precision floating-point value `a' is less than
+the corresponding value `b', and 0 otherwise.  The comparison is performed
+according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag float32_lt( float32 a, float32 b )
+{
+    flag aSign, bSign;
+
+    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+       ) {
+        float_raise( float_flag_invalid );
+        return 0;
+    }
+    aSign = extractFloat32Sign( a );
+    bSign = extractFloat32Sign( b );
+    if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
+    return ( a != b ) && ( aSign ^ ( a < b ) );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the single-precision floating-point value `a' is equal to the
+corresponding value `b', and 0 otherwise.  The invalid exception is raised
+if either operand is a NaN.  Otherwise, the comparison is performed
+according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag float32_eq_signaling( float32 a, float32 b )
+{
+
+    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+       ) {
+        float_raise( float_flag_invalid );
+        return 0;
+    }
+    return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the single-precision floating-point value `a' is less than or
+equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
+cause an exception.  Otherwise, the comparison is performed according to the
+IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag float32_le_quiet( float32 a, float32 b )
+{
+    flag aSign, bSign;
+    //int16 aExp, bExp;
+
+    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+       ) {
+        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid );
+        }
+        return 0;
+    }
+    aSign = extractFloat32Sign( a );
+    bSign = extractFloat32Sign( b );
+    if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
+    return ( a == b ) || ( aSign ^ ( a < b ) );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the single-precision floating-point value `a' is less than
+the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
+exception.  Otherwise, the comparison is performed according to the IEC/IEEE
+Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag float32_lt_quiet( float32 a, float32 b )
+{
+    flag aSign, bSign;
+
+    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+       ) {
+        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid );
+        }
+        return 0;
+    }
+    aSign = extractFloat32Sign( a );
+    bSign = extractFloat32Sign( b );
+    if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
+    return ( a != b ) && ( aSign ^ ( a < b ) );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the double-precision floating-point value
+`a' to the 32-bit two's complement integer format.  The conversion is
+performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic---which means in particular that the conversion is rounded
+according to the current rounding mode.  If `a' is a NaN, the largest
+positive integer is returned.  Otherwise, if the conversion overflows, the
+largest integer with the same sign as `a' is returned.
+-------------------------------------------------------------------------------
+*/
+int32 float64_to_int32( float64 a )
+{
+    flag aSign;
+    int16 aExp, shiftCount;
+    bits64 aSig;
+
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = extractFloat64Sign( a );
+    if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
+    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
+    shiftCount = 0x42C - aExp;
+    if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
+    return roundAndPackInt32( aSign, aSig );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the double-precision floating-point value
+`a' to the 32-bit two's complement integer format.  The conversion is
+performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic, except that the conversion is always rounded toward zero.  If
+`a' is a NaN, the largest positive integer is returned.  Otherwise, if the
+conversion overflows, the largest integer with the same sign as `a' is
+returned.
+-------------------------------------------------------------------------------
+*/
+int32 float64_to_int32_round_to_zero( float64 a )
+{
+    flag aSign;
+    int16 aExp, shiftCount;
+    bits64 aSig, savedASig;
+    int32 z;
+
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = extractFloat64Sign( a );
+    shiftCount = 0x433 - aExp;
+    if ( shiftCount < 21 ) {
+        if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
+        goto invalid;
+    }
+    else if ( 52 < shiftCount ) {
+        if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
+        return 0;
+    }
+    aSig |= LIT64( 0x0010000000000000 );
+    savedASig = aSig;
+    aSig >>= shiftCount;
+    z = aSig;
+    if ( aSign ) z = - z;
+    if ( ( z < 0 ) ^ aSign ) {
+ invalid:
+        float_exception_flags |= float_flag_invalid;
+        return aSign ? 0x80000000 : 0x7FFFFFFF;
+    }
+    if ( ( aSig<<shiftCount ) != savedASig ) {
+        float_exception_flags |= float_flag_inexact;
+    }
+    return z;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the double-precision floating-point value
+`a' to the 32-bit two's complement unsigned integer format.  The conversion
+is performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic---which means in particular that the conversion is rounded
+according to the current rounding mode.  If `a' is a NaN, the largest
+positive integer is returned.  Otherwise, if the conversion overflows, the
+largest positive integer is returned.
+-------------------------------------------------------------------------------
+*/
+int32 float64_to_uint32( float64 a )
+{
+    flag aSign;
+    int16 aExp, shiftCount;
+    bits64 aSig;
+
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = 0; //extractFloat64Sign( a );
+    //if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
+    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
+    shiftCount = 0x42C - aExp;
+    if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
+    return roundAndPackInt32( aSign, aSig );
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the double-precision floating-point value
+`a' to the 32-bit two's complement integer format.  The conversion is
+performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic, except that the conversion is always rounded toward zero.  If
+`a' is a NaN, the largest positive integer is returned.  Otherwise, if the
+conversion overflows, the largest positive integer is returned.
+-------------------------------------------------------------------------------
+*/
+int32 float64_to_uint32_round_to_zero( float64 a )
+{
+    flag aSign;
+    int16 aExp, shiftCount;
+    bits64 aSig, savedASig;
+    int32 z;
+
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = extractFloat64Sign( a );
+    shiftCount = 0x433 - aExp;
+    if ( shiftCount < 21 ) {
+        if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
+        goto invalid;
+    }
+    else if ( 52 < shiftCount ) {
+        if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
+        return 0;
+    }
+    aSig |= LIT64( 0x0010000000000000 );
+    savedASig = aSig;
+    aSig >>= shiftCount;
+    z = aSig;
+    if ( aSign ) z = - z;
+    if ( ( z < 0 ) ^ aSign ) {
+ invalid:
+        float_exception_flags |= float_flag_invalid;
+        return aSign ? 0x80000000 : 0x7FFFFFFF;
+    }
+    if ( ( aSig<<shiftCount ) != savedASig ) {
+        float_exception_flags |= float_flag_inexact;
+    }
+    return z;
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the double-precision floating-point value
+`a' to the single-precision floating-point format.  The conversion is
+performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float32 float64_to_float32( float64 a )
+{
+    flag aSign;
+    int16 aExp;
+    bits64 aSig;
+    bits32 zSig;
+
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = extractFloat64Sign( a );
+    if ( aExp == 0x7FF ) {
+        if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) );
+        return packFloat32( aSign, 0xFF, 0 );
+    }
+    shift64RightJamming( aSig, 22, &aSig );
+    zSig = aSig;
+    if ( aExp || zSig ) {
+        zSig |= 0x40000000;
+        aExp -= 0x381;
+    }
+    return roundAndPackFloat32( aSign, aExp, zSig );
+
+}
+
+#ifdef FLOATX80
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the double-precision floating-point value
+`a' to the extended double-precision floating-point format.  The conversion
+is performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic.
+-------------------------------------------------------------------------------
+*/
+floatx80 float64_to_floatx80( float64 a )
+{
+    flag aSign;
+    int16 aExp;
+    bits64 aSig;
+
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = extractFloat64Sign( a );
+    if ( aExp == 0x7FF ) {
+        if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) );
+        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
+        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+    }
+    return
+        packFloatx80(
+            aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
+
+}
+
+#endif
+
+/*
+-------------------------------------------------------------------------------
+Rounds the double-precision floating-point value `a' to an integer, and
+returns the result as a double-precision floating-point value.  The
+operation is performed according to the IEC/IEEE Standard for Binary
+Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float64 float64_round_to_int( float64 a )
+{
+    flag aSign;
+    int16 aExp;
+    bits64 lastBitMask, roundBitsMask;
+    int8 roundingMode;
+    float64 z;
+
+    aExp = extractFloat64Exp( a );
+    if ( 0x433 <= aExp ) {
+        if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
+            return propagateFloat64NaN( a, a );
+        }
+        return a;
+    }
+    if ( aExp <= 0x3FE ) {
+        if ( (bits64) ( a<<1 ) == 0 ) return a;
+        float_exception_flags |= float_flag_inexact;
+        aSign = extractFloat64Sign( a );
+        switch ( float_rounding_mode ) {
+         case float_round_nearest_even:
+            if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
+                return packFloat64( aSign, 0x3FF, 0 );
+            }
+            break;
+         case float_round_down:
+            return aSign ? LIT64( 0xBFF0000000000000 ) : 0;
+         case float_round_up:
+            return
+            aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 );
+        }
+        return packFloat64( aSign, 0, 0 );
+    }
+    lastBitMask = 1;
+    lastBitMask <<= 0x433 - aExp;
+    roundBitsMask = lastBitMask - 1;
+    z = a;
+    roundingMode = float_rounding_mode;
+    if ( roundingMode == float_round_nearest_even ) {
+        z += lastBitMask>>1;
+        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
+    }
+    else if ( roundingMode != float_round_to_zero ) {
+        if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) {
+            z += roundBitsMask;
+        }
+    }
+    z &= ~ roundBitsMask;
+    if ( z != a ) float_exception_flags |= float_flag_inexact;
+    return z;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of adding the absolute values of the double-precision
+floating-point values `a' and `b'.  If `zSign' is true, the sum is negated
+before being returned.  `zSign' is ignored if the result is a NaN.  The
+addition is performed according to the IEC/IEEE Standard for Binary
+Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
+{
+    int16 aExp, bExp, zExp;
+    bits64 aSig, bSig, zSig;
+    int16 expDiff;
+
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    bSig = extractFloat64Frac( b );
+    bExp = extractFloat64Exp( b );
+    expDiff = aExp - bExp;
+    aSig <<= 9;
+    bSig <<= 9;
+    if ( 0 < expDiff ) {
+        if ( aExp == 0x7FF ) {
+            if ( aSig ) return propagateFloat64NaN( a, b );
+            return a;
+        }
+        if ( bExp == 0 ) {
+            --expDiff;
+        }
+        else {
+            bSig |= LIT64( 0x2000000000000000 );
+        }
+        shift64RightJamming( bSig, expDiff, &bSig );
+        zExp = aExp;
+    }
+    else if ( expDiff < 0 ) {
+        if ( bExp == 0x7FF ) {
+            if ( bSig ) return propagateFloat64NaN( a, b );
+            return packFloat64( zSign, 0x7FF, 0 );
+        }
+        if ( aExp == 0 ) {
+            ++expDiff;
+        }
+        else {
+            aSig |= LIT64( 0x2000000000000000 );
+        }
+        shift64RightJamming( aSig, - expDiff, &aSig );
+        zExp = bExp;
+    }
+    else {
+        if ( aExp == 0x7FF ) {
+            if ( aSig | bSig ) return propagateFloat64NaN( a, b );
+            return a;
+        }
+        if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
+        zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
+        zExp = aExp;
+        goto roundAndPack;
+    }
+    aSig |= LIT64( 0x2000000000000000 );
+    zSig = ( aSig + bSig )<<1;
+    --zExp;
+    if ( (sbits64) zSig < 0 ) {
+        zSig = aSig + bSig;
+        ++zExp;
+    }
+ roundAndPack:
+    return roundAndPackFloat64( zSign, zExp, zSig );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of subtracting the absolute values of the double-
+precision floating-point values `a' and `b'.  If `zSign' is true, the
+difference is negated before being returned.  `zSign' is ignored if the
+result is a NaN.  The subtraction is performed according to the IEC/IEEE
+Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
+{
+    int16 aExp, bExp, zExp;
+    bits64 aSig, bSig, zSig;
+    int16 expDiff;
+
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    bSig = extractFloat64Frac( b );
+    bExp = extractFloat64Exp( b );
+    expDiff = aExp - bExp;
+    aSig <<= 10;
+    bSig <<= 10;
+    if ( 0 < expDiff ) goto aExpBigger;
+    if ( expDiff < 0 ) goto bExpBigger;
+    if ( aExp == 0x7FF ) {
+        if ( aSig | bSig ) return propagateFloat64NaN( a, b );
+        float_raise( float_flag_invalid );
+        return float64_default_nan;
+    }
+    if ( aExp == 0 ) {
+        aExp = 1;
+        bExp = 1;
+    }
+    if ( bSig < aSig ) goto aBigger;
+    if ( aSig < bSig ) goto bBigger;
+    return packFloat64( float_rounding_mode == float_round_down, 0, 0 );
+ bExpBigger:
+    if ( bExp == 0x7FF ) {
+        if ( bSig ) return propagateFloat64NaN( a, b );
+        return packFloat64( zSign ^ 1, 0x7FF, 0 );
+    }
+    if ( aExp == 0 ) {
+        ++expDiff;
+    }
+    else {
+        aSig |= LIT64( 0x4000000000000000 );
+    }
+    shift64RightJamming( aSig, - expDiff, &aSig );
+    bSig |= LIT64( 0x4000000000000000 );
+ bBigger:
+    zSig = bSig - aSig;
+    zExp = bExp;
+    zSign ^= 1;
+    goto normalizeRoundAndPack;
+ aExpBigger:
+    if ( aExp == 0x7FF ) {
+        if ( aSig ) return propagateFloat64NaN( a, b );
+        return a;
+    }
+    if ( bExp == 0 ) {
+        --expDiff;
+    }
+    else {
+        bSig |= LIT64( 0x4000000000000000 );
+    }
+    shift64RightJamming( bSig, expDiff, &bSig );
+    aSig |= LIT64( 0x4000000000000000 );
+ aBigger:
+    zSig = aSig - bSig;
+    zExp = aExp;
+ normalizeRoundAndPack:
+    --zExp;
+    return normalizeRoundAndPackFloat64( zSign, zExp, zSig );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of adding the double-precision floating-point values `a'
+and `b'.  The operation is performed according to the IEC/IEEE Standard for
+Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float64 float64_add( float64 a, float64 b )
+{
+    flag aSign, bSign;
+
+    aSign = extractFloat64Sign( a );
+    bSign = extractFloat64Sign( b );
+    if ( aSign == bSign ) {
+        return addFloat64Sigs( a, b, aSign );
+    }
+    else {
+        return subFloat64Sigs( a, b, aSign );
+    }
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of subtracting the double-precision floating-point values
+`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float64 float64_sub( float64 a, float64 b )
+{
+    flag aSign, bSign;
+
+    aSign = extractFloat64Sign( a );
+    bSign = extractFloat64Sign( b );
+    if ( aSign == bSign ) {
+        return subFloat64Sigs( a, b, aSign );
+    }
+    else {
+        return addFloat64Sigs( a, b, aSign );
+    }
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of multiplying the double-precision floating-point values
+`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float64 float64_mul( float64 a, float64 b )
+{
+    flag aSign, bSign, zSign;
+    int16 aExp, bExp, zExp;
+    bits64 aSig, bSig, zSig0, zSig1;
+
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = extractFloat64Sign( a );
+    bSig = extractFloat64Frac( b );
+    bExp = extractFloat64Exp( b );
+    bSign = extractFloat64Sign( b );
+    zSign = aSign ^ bSign;
+    if ( aExp == 0x7FF ) {
+        if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
+            return propagateFloat64NaN( a, b );
+        }
+        if ( ( bExp | bSig ) == 0 ) {
+            float_raise( float_flag_invalid );
+            return float64_default_nan;
+        }
+        return packFloat64( zSign, 0x7FF, 0 );
+    }
+    if ( bExp == 0x7FF ) {
+        if ( bSig ) return propagateFloat64NaN( a, b );
+        if ( ( aExp | aSig ) == 0 ) {
+            float_raise( float_flag_invalid );
+            return float64_default_nan;
+        }
+        return packFloat64( zSign, 0x7FF, 0 );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
+        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+    }
+    if ( bExp == 0 ) {
+        if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
+        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
+    }
+    zExp = aExp + bExp - 0x3FF;
+    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
+    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
+    mul64To128( aSig, bSig, &zSig0, &zSig1 );
+    zSig0 |= ( zSig1 != 0 );
+    if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
+        zSig0 <<= 1;
+        --zExp;
+    }
+    return roundAndPackFloat64( zSign, zExp, zSig0 );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of dividing the double-precision floating-point value `a'
+by the corresponding value `b'.  The operation is performed according to
+the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float64 float64_div( float64 a, float64 b )
+{
+    flag aSign, bSign, zSign;
+    int16 aExp, bExp, zExp;
+    bits64 aSig, bSig, zSig;
+    bits64 rem0, rem1;
+    bits64 term0, term1;
+
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = extractFloat64Sign( a );
+    bSig = extractFloat64Frac( b );
+    bExp = extractFloat64Exp( b );
+    bSign = extractFloat64Sign( b );
+    zSign = aSign ^ bSign;
+    if ( aExp == 0x7FF ) {
+        if ( aSig ) return propagateFloat64NaN( a, b );
+        if ( bExp == 0x7FF ) {
+            if ( bSig ) return propagateFloat64NaN( a, b );
+            float_raise( float_flag_invalid );
+            return float64_default_nan;
+        }
+        return packFloat64( zSign, 0x7FF, 0 );
+    }
+    if ( bExp == 0x7FF ) {
+        if ( bSig ) return propagateFloat64NaN( a, b );
+        return packFloat64( zSign, 0, 0 );
+    }
+    if ( bExp == 0 ) {
+        if ( bSig == 0 ) {
+            if ( ( aExp | aSig ) == 0 ) {
+                float_raise( float_flag_invalid );
+                return float64_default_nan;
+            }
+            float_raise( float_flag_divbyzero );
+            return packFloat64( zSign, 0x7FF, 0 );
+        }
+        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
+        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+    }
+    zExp = aExp - bExp + 0x3FD;
+    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
+    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
+    if ( bSig <= ( aSig + aSig ) ) {
+        aSig >>= 1;
+        ++zExp;
+    }
+    zSig = estimateDiv128To64( aSig, 0, bSig );
+    if ( ( zSig & 0x1FF ) <= 2 ) {
+        mul64To128( bSig, zSig, &term0, &term1 );
+        sub128( aSig, 0, term0, term1, &rem0, &rem1 );
+        while ( (sbits64) rem0 < 0 ) {
+            --zSig;
+            add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
+        }
+        zSig |= ( rem1 != 0 );
+    }
+    return roundAndPackFloat64( zSign, zExp, zSig );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the remainder of the double-precision floating-point value `a'
+with respect to the corresponding value `b'.  The operation is performed
+according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float64 float64_rem( float64 a, float64 b )
+{
+    flag aSign, bSign, zSign;
+    int16 aExp, bExp, expDiff;
+    bits64 aSig, bSig;
+    bits64 q, alternateASig;
+    sbits64 sigMean;
+
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = extractFloat64Sign( a );
+    bSig = extractFloat64Frac( b );
+    bExp = extractFloat64Exp( b );
+    bSign = extractFloat64Sign( b );
+    if ( aExp == 0x7FF ) {
+        if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
+            return propagateFloat64NaN( a, b );
+        }
+        float_raise( float_flag_invalid );
+        return float64_default_nan;
+    }
+    if ( bExp == 0x7FF ) {
+        if ( bSig ) return propagateFloat64NaN( a, b );
+        return a;
+    }
+    if ( bExp == 0 ) {
+        if ( bSig == 0 ) {
+            float_raise( float_flag_invalid );
+            return float64_default_nan;
+        }
+        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return a;
+        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+    }
+    expDiff = aExp - bExp;
+    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
+    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
+    if ( expDiff < 0 ) {
+        if ( expDiff < -1 ) return a;
+        aSig >>= 1;
+    }
+    q = ( bSig <= aSig );
+    if ( q ) aSig -= bSig;
+    expDiff -= 64;
+    while ( 0 < expDiff ) {
+        q = estimateDiv128To64( aSig, 0, bSig );
+        q = ( 2 < q ) ? q - 2 : 0;
+        aSig = - ( ( bSig>>2 ) * q );
+        expDiff -= 62;
+    }
+    expDiff += 64;
+    if ( 0 < expDiff ) {
+        q = estimateDiv128To64( aSig, 0, bSig );
+        q = ( 2 < q ) ? q - 2 : 0;
+        q >>= 64 - expDiff;
+        bSig >>= 2;
+        aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
+    }
+    else {
+        aSig >>= 2;
+        bSig >>= 2;
+    }
+    do {
+        alternateASig = aSig;
+        ++q;
+        aSig -= bSig;
+    } while ( 0 <= (sbits64) aSig );
+    sigMean = aSig + alternateASig;
+    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
+        aSig = alternateASig;
+    }
+    zSign = ( (sbits64) aSig < 0 );
+    if ( zSign ) aSig = - aSig;
+    return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the square root of the double-precision floating-point value `a'.
+The operation is performed according to the IEC/IEEE Standard for Binary
+Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float64 float64_sqrt( float64 a )
+{
+    flag aSign;
+    int16 aExp, zExp;
+    bits64 aSig, zSig;
+    bits64 rem0, rem1, term0, term1; //, shiftedRem;
+    //float64 z;
+
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = extractFloat64Sign( a );
+    if ( aExp == 0x7FF ) {
+        if ( aSig ) return propagateFloat64NaN( a, a );
+        if ( ! aSign ) return a;
+        float_raise( float_flag_invalid );
+        return float64_default_nan;
+    }
+    if ( aSign ) {
+        if ( ( aExp | aSig ) == 0 ) return a;
+        float_raise( float_flag_invalid );
+        return float64_default_nan;
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return 0;
+        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+    }
+    zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
+    aSig |= LIT64( 0x0010000000000000 );
+    zSig = estimateSqrt32( aExp, aSig>>21 );
+    zSig <<= 31;
+    aSig <<= 9 - ( aExp & 1 );
+    zSig = estimateDiv128To64( aSig, 0, zSig ) + zSig + 2;
+    if ( ( zSig & 0x3FF ) <= 5 ) {
+        if ( zSig < 2 ) {
+            zSig = LIT64( 0xFFFFFFFFFFFFFFFF );
+        }
+        else {
+            aSig <<= 2;
+            mul64To128( zSig, zSig, &term0, &term1 );
+            sub128( aSig, 0, term0, term1, &rem0, &rem1 );
+            while ( (sbits64) rem0 < 0 ) {
+                --zSig;
+                shortShift128Left( 0, zSig, 1, &term0, &term1 );
+                term1 |= 1;
+                add128( rem0, rem1, term0, term1, &rem0, &rem1 );
+            }
+            zSig |= ( ( rem0 | rem1 ) != 0 );
+        }
+    }
+    shift64RightJamming( zSig, 1, &zSig );
+    return roundAndPackFloat64( 0, zExp, zSig );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the double-precision floating-point value `a' is equal to the
+corresponding value `b', and 0 otherwise.  The comparison is performed
+according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag float64_eq( float64 a, float64 b )
+{
+
+    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+       ) {
+        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid );
+        }
+        return 0;
+    }
+    return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the double-precision floating-point value `a' is less than or
+equal to the corresponding value `b', and 0 otherwise.  The comparison is
+performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag float64_le( float64 a, float64 b )
+{
+    flag aSign, bSign;
+
+    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+       ) {
+        float_raise( float_flag_invalid );
+        return 0;
+    }
+    aSign = extractFloat64Sign( a );
+    bSign = extractFloat64Sign( b );
+    if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
+    return ( a == b ) || ( aSign ^ ( a < b ) );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the double-precision floating-point value `a' is less than
+the corresponding value `b', and 0 otherwise.  The comparison is performed
+according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag float64_lt( float64 a, float64 b )
+{
+    flag aSign, bSign;
+
+    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+       ) {
+        float_raise( float_flag_invalid );
+        return 0;
+    }
+    aSign = extractFloat64Sign( a );
+    bSign = extractFloat64Sign( b );
+    if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
+    return ( a != b ) && ( aSign ^ ( a < b ) );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the double-precision floating-point value `a' is equal to the
+corresponding value `b', and 0 otherwise.  The invalid exception is raised
+if either operand is a NaN.  Otherwise, the comparison is performed
+according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag float64_eq_signaling( float64 a, float64 b )
+{
+
+    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+       ) {
+        float_raise( float_flag_invalid );
+        return 0;
+    }
+    return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the double-precision floating-point value `a' is less than or
+equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
+cause an exception.  Otherwise, the comparison is performed according to the
+IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag float64_le_quiet( float64 a, float64 b )
+{
+    flag aSign, bSign;
+    //int16 aExp, bExp;
+
+    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+       ) {
+        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid );
+        }
+        return 0;
+    }
+    aSign = extractFloat64Sign( a );
+    bSign = extractFloat64Sign( b );
+    if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
+    return ( a == b ) || ( aSign ^ ( a < b ) );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the double-precision floating-point value `a' is less than
+the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
+exception.  Otherwise, the comparison is performed according to the IEC/IEEE
+Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag float64_lt_quiet( float64 a, float64 b )
+{
+    flag aSign, bSign;
+
+    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+       ) {
+        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid );
+        }
+        return 0;
+    }
+    aSign = extractFloat64Sign( a );
+    bSign = extractFloat64Sign( b );
+    if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
+    return ( a != b ) && ( aSign ^ ( a < b ) );
+
+}
+
+#ifdef FLOATX80
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the extended double-precision floating-
+point value `a' to the 32-bit two's complement integer format.  The
+conversion is performed according to the IEC/IEEE Standard for Binary
+Floating-point Arithmetic---which means in particular that the conversion
+is rounded according to the current rounding mode.  If `a' is a NaN, the
+largest positive integer is returned.  Otherwise, if the conversion
+overflows, the largest integer with the same sign as `a' is returned.
+-------------------------------------------------------------------------------
+*/
+int32 floatx80_to_int32( floatx80 a )
+{
+    flag aSign;
+    int32 aExp, shiftCount;
+    bits64 aSig;
+
+    aSig = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    aSign = extractFloatx80Sign( a );
+    if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
+    shiftCount = 0x4037 - aExp;
+    if ( shiftCount <= 0 ) shiftCount = 1;
+    shift64RightJamming( aSig, shiftCount, &aSig );
+    return roundAndPackInt32( aSign, aSig );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the extended double-precision floating-
+point value `a' to the 32-bit two's complement integer format.  The
+conversion is performed according to the IEC/IEEE Standard for Binary
+Floating-point Arithmetic, except that the conversion is always rounded
+toward zero.  If `a' is a NaN, the largest positive integer is returned.
+Otherwise, if the conversion overflows, the largest integer with the same
+sign as `a' is returned.
+-------------------------------------------------------------------------------
+*/
+int32 floatx80_to_int32_round_to_zero( floatx80 a )
+{
+    flag aSign;
+    int32 aExp, shiftCount;
+    bits64 aSig, savedASig;
+    int32 z;
+
+    aSig = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    aSign = extractFloatx80Sign( a );
+    shiftCount = 0x403E - aExp;
+    if ( shiftCount < 32 ) {
+        if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
+        goto invalid;
+    }
+    else if ( 63 < shiftCount ) {
+        if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
+        return 0;
+    }
+    savedASig = aSig;
+    aSig >>= shiftCount;
+    z = aSig;
+    if ( aSign ) z = - z;
+    if ( ( z < 0 ) ^ aSign ) {
+ invalid:
+        float_exception_flags |= float_flag_invalid;
+        return aSign ? 0x80000000 : 0x7FFFFFFF;
+    }
+    if ( ( aSig<<shiftCount ) != savedASig ) {
+        float_exception_flags |= float_flag_inexact;
+    }
+    return z;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the extended double-precision floating-
+point value `a' to the single-precision floating-point format.  The
+conversion is performed according to the IEC/IEEE Standard for Binary
+Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float32 floatx80_to_float32( floatx80 a )
+{
+    flag aSign;
+    int32 aExp;
+    bits64 aSig;
+
+    aSig = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    aSign = extractFloatx80Sign( a );
+    if ( aExp == 0x7FFF ) {
+        if ( (bits64) ( aSig<<1 ) ) {
+            return commonNaNToFloat32( floatx80ToCommonNaN( a ) );
+        }
+        return packFloat32( aSign, 0xFF, 0 );
+    }
+    shift64RightJamming( aSig, 33, &aSig );
+    if ( aExp || aSig ) aExp -= 0x3F81;
+    return roundAndPackFloat32( aSign, aExp, aSig );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the extended double-precision floating-
+point value `a' to the double-precision floating-point format.  The
+conversion is performed according to the IEC/IEEE Standard for Binary
+Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float64 floatx80_to_float64( floatx80 a )
+{
+    flag aSign;
+    int32 aExp;
+    bits64 aSig, zSig;
+
+    aSig = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    aSign = extractFloatx80Sign( a );
+    if ( aExp == 0x7FFF ) {
+        if ( (bits64) ( aSig<<1 ) ) {
+            return commonNaNToFloat64( floatx80ToCommonNaN( a ) );
+        }
+        return packFloat64( aSign, 0x7FF, 0 );
+    }
+    shift64RightJamming( aSig, 1, &zSig );
+    if ( aExp || aSig ) aExp -= 0x3C01;
+    return roundAndPackFloat64( aSign, aExp, zSig );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Rounds the extended double-precision floating-point value `a' to an integer,
+and returns the result as an extended quadruple-precision floating-point
+value.  The operation is performed according to the IEC/IEEE Standard for
+Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+floatx80 floatx80_round_to_int( floatx80 a )
+{
+    flag aSign;
+    int32 aExp;
+    bits64 lastBitMask, roundBitsMask;
+    int8 roundingMode;
+    floatx80 z;
+
+    aExp = extractFloatx80Exp( a );
+    if ( 0x403E <= aExp ) {
+        if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
+            return propagateFloatx80NaN( a, a );
+        }
+        return a;
+    }
+    if ( aExp <= 0x3FFE ) {
+        if (    ( aExp == 0 )
+             && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
+            return a;
+        }
+        float_exception_flags |= float_flag_inexact;
+        aSign = extractFloatx80Sign( a );
+        switch ( float_rounding_mode ) {
+         case float_round_nearest_even:
+            if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
+               ) {
+                return
+                    packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
+            }
+            break;
+         case float_round_down:
+            return
+                  aSign ?
+                      packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
+                : packFloatx80( 0, 0, 0 );
+         case float_round_up:
+            return
+                  aSign ? packFloatx80( 1, 0, 0 )
+                : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
+        }
+        return packFloatx80( aSign, 0, 0 );
+    }
+    lastBitMask = 1;
+    lastBitMask <<= 0x403E - aExp;
+    roundBitsMask = lastBitMask - 1;
+    z = a;
+    roundingMode = float_rounding_mode;
+    if ( roundingMode == float_round_nearest_even ) {
+        z.low += lastBitMask>>1;
+        if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
+    }
+    else if ( roundingMode != float_round_to_zero ) {
+        if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
+            z.low += roundBitsMask;
+        }
+    }
+    z.low &= ~ roundBitsMask;
+    if ( z.low == 0 ) {
+        ++z.high;
+        z.low = LIT64( 0x8000000000000000 );
+    }
+    if ( z.low != a.low ) float_exception_flags |= float_flag_inexact;
+    return z;
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of adding the absolute values of the extended double-
+precision floating-point values `a' and `b'.  If `zSign' is true, the sum is
+negated before being returned.  `zSign' is ignored if the result is a NaN.
+The addition is performed according to the IEC/IEEE Standard for Binary
+Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
+{
+    int32 aExp, bExp, zExp;
+    bits64 aSig, bSig, zSig0, zSig1;
+    int32 expDiff;
+
+    aSig = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    bSig = extractFloatx80Frac( b );
+    bExp = extractFloatx80Exp( b );
+    expDiff = aExp - bExp;
+    if ( 0 < expDiff ) {
+        if ( aExp == 0x7FFF ) {
+            if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
+            return a;
+        }
+        if ( bExp == 0 ) --expDiff;
+        shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
+        zExp = aExp;
+    }
+    else if ( expDiff < 0 ) {
+        if ( bExp == 0x7FFF ) {
+            if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
+            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+        }
+        if ( aExp == 0 ) ++expDiff;
+        shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
+        zExp = bExp;
+    }
+    else {
+        if ( aExp == 0x7FFF ) {
+            if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
+                return propagateFloatx80NaN( a, b );
+            }
+            return a;
+        }
+        zSig1 = 0;
+        zSig0 = aSig + bSig;
+        if ( aExp == 0 ) {
+            normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
+            goto roundAndPack;
+        }
+        zExp = aExp;
+        goto shiftRight1;
+    }
+    
+    zSig0 = aSig + bSig;
+
+    if ( (sbits64) zSig0 < 0 ) goto roundAndPack; 
+ shiftRight1:
+    shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
+    zSig0 |= LIT64( 0x8000000000000000 );
+    ++zExp;
+ roundAndPack:
+    return
+        roundAndPackFloatx80(
+            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of subtracting the absolute values of the extended
+double-precision floating-point values `a' and `b'.  If `zSign' is true,
+the difference is negated before being returned.  `zSign' is ignored if the
+result is a NaN.  The subtraction is performed according to the IEC/IEEE
+Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
+{
+    int32 aExp, bExp, zExp;
+    bits64 aSig, bSig, zSig0, zSig1;
+    int32 expDiff;
+    floatx80 z;
+
+    aSig = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    bSig = extractFloatx80Frac( b );
+    bExp = extractFloatx80Exp( b );
+    expDiff = aExp - bExp;
+    if ( 0 < expDiff ) goto aExpBigger;
+    if ( expDiff < 0 ) goto bExpBigger;
+    if ( aExp == 0x7FFF ) {
+        if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
+            return propagateFloatx80NaN( a, b );
+        }
+        float_raise( float_flag_invalid );
+        z.low = floatx80_default_nan_low;
+        z.high = floatx80_default_nan_high;
+        return z;
+    }
+    if ( aExp == 0 ) {
+        aExp = 1;
+        bExp = 1;
+    }
+    zSig1 = 0;
+    if ( bSig < aSig ) goto aBigger;
+    if ( aSig < bSig ) goto bBigger;
+    return packFloatx80( float_rounding_mode == float_round_down, 0, 0 );
+ bExpBigger:
+    if ( bExp == 0x7FFF ) {
+        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
+        return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
+    }
+    if ( aExp == 0 ) ++expDiff;
+    shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
+ bBigger:
+    sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
+    zExp = bExp;
+    zSign ^= 1;
+    goto normalizeRoundAndPack;
+ aExpBigger:
+    if ( aExp == 0x7FFF ) {
+        if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
+        return a;
+    }
+    if ( bExp == 0 ) --expDiff;
+    shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
+ aBigger:
+    sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
+    zExp = aExp;
+ normalizeRoundAndPack:
+    return
+        normalizeRoundAndPackFloatx80(
+            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of adding the extended double-precision floating-point
+values `a' and `b'.  The operation is performed according to the IEC/IEEE
+Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+floatx80 floatx80_add( floatx80 a, floatx80 b )
+{
+    flag aSign, bSign;
+    
+    aSign = extractFloatx80Sign( a );
+    bSign = extractFloatx80Sign( b );
+    if ( aSign == bSign ) {
+        return addFloatx80Sigs( a, b, aSign );
+    }
+    else {
+        return subFloatx80Sigs( a, b, aSign );
+    }
+    
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of subtracting the extended double-precision floating-
+point values `a' and `b'.  The operation is performed according to the
+IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+floatx80 floatx80_sub( floatx80 a, floatx80 b )
+{
+    flag aSign, bSign;
+
+    aSign = extractFloatx80Sign( a );
+    bSign = extractFloatx80Sign( b );
+    if ( aSign == bSign ) {
+        return subFloatx80Sigs( a, b, aSign );
+    }
+    else {
+        return addFloatx80Sigs( a, b, aSign );
+    }
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of multiplying the extended double-precision floating-
+point values `a' and `b'.  The operation is performed according to the
+IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+floatx80 floatx80_mul( floatx80 a, floatx80 b )
+{
+    flag aSign, bSign, zSign;
+    int32 aExp, bExp, zExp;
+    bits64 aSig, bSig, zSig0, zSig1;
+    floatx80 z;
+
+    aSig = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    aSign = extractFloatx80Sign( a );
+    bSig = extractFloatx80Frac( b );
+    bExp = extractFloatx80Exp( b );
+    bSign = extractFloatx80Sign( b );
+    zSign = aSign ^ bSign;
+    if ( aExp == 0x7FFF ) {
+        if (    (bits64) ( aSig<<1 )
+             || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
+            return propagateFloatx80NaN( a, b );
+        }
+        if ( ( bExp | bSig ) == 0 ) goto invalid;
+        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+    }
+    if ( bExp == 0x7FFF ) {
+        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
+        if ( ( aExp | aSig ) == 0 ) {
+ invalid:
+            float_raise( float_flag_invalid );
+            z.low = floatx80_default_nan_low;
+            z.high = floatx80_default_nan_high;
+            return z;
+        }
+        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
+        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
+    }
+    if ( bExp == 0 ) {
+        if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
+        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
+    }
+    zExp = aExp + bExp - 0x3FFE;
+    mul64To128( aSig, bSig, &zSig0, &zSig1 );
+    if ( 0 < (sbits64) zSig0 ) {
+        shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
+        --zExp;
+    }
+    return
+        roundAndPackFloatx80(
+            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the result of dividing the extended double-precision floating-point
+value `a' by the corresponding value `b'.  The operation is performed
+according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+floatx80 floatx80_div( floatx80 a, floatx80 b )
+{
+    flag aSign, bSign, zSign;
+    int32 aExp, bExp, zExp;
+    bits64 aSig, bSig, zSig0, zSig1;
+    bits64 rem0, rem1, rem2, term0, term1, term2;
+    floatx80 z;
+
+    aSig = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    aSign = extractFloatx80Sign( a );
+    bSig = extractFloatx80Frac( b );
+    bExp = extractFloatx80Exp( b );
+    bSign = extractFloatx80Sign( b );
+    zSign = aSign ^ bSign;
+    if ( aExp == 0x7FFF ) {
+        if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
+        if ( bExp == 0x7FFF ) {
+            if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
+            goto invalid;
+        }
+        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+    }
+    if ( bExp == 0x7FFF ) {
+        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
+        return packFloatx80( zSign, 0, 0 );
+    }
+    if ( bExp == 0 ) {
+        if ( bSig == 0 ) {
+            if ( ( aExp | aSig ) == 0 ) {
+ invalid:
+                float_raise( float_flag_invalid );
+                z.low = floatx80_default_nan_low;
+                z.high = floatx80_default_nan_high;
+                return z;
+            }
+            float_raise( float_flag_divbyzero );
+            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+        }
+        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
+        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
+    }
+    zExp = aExp - bExp + 0x3FFE;
+    rem1 = 0;
+    if ( bSig <= aSig ) {
+        shift128Right( aSig, 0, 1, &aSig, &rem1 );
+        ++zExp;
+    }
+    zSig0 = estimateDiv128To64( aSig, rem1, bSig );
+    mul64To128( bSig, zSig0, &term0, &term1 );
+    sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
+    while ( (sbits64) rem0 < 0 ) {
+        --zSig0;
+        add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
+    }
+    zSig1 = estimateDiv128To64( rem1, 0, bSig );
+    if ( (bits64) ( zSig1<<1 ) <= 8 ) {
+        mul64To128( bSig, zSig1, &term1, &term2 );
+        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
+        while ( (sbits64) rem1 < 0 ) {
+            --zSig1;
+            add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
+        }
+        zSig1 |= ( ( rem1 | rem2 ) != 0 );
+    }
+    return
+        roundAndPackFloatx80(
+            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the remainder of the extended double-precision floating-point value
+`a' with respect to the corresponding value `b'.  The operation is performed
+according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+floatx80 floatx80_rem( floatx80 a, floatx80 b )
+{
+    flag aSign, bSign, zSign;
+    int32 aExp, bExp, expDiff;
+    bits64 aSig0, aSig1, bSig;
+    bits64 q, term0, term1, alternateASig0, alternateASig1;
+    floatx80 z;
+
+    aSig0 = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    aSign = extractFloatx80Sign( a );
+    bSig = extractFloatx80Frac( b );
+    bExp = extractFloatx80Exp( b );
+    bSign = extractFloatx80Sign( b );
+    if ( aExp == 0x7FFF ) {
+        if (    (bits64) ( aSig0<<1 )
+             || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
+            return propagateFloatx80NaN( a, b );
+        }
+        goto invalid;
+    }
+    if ( bExp == 0x7FFF ) {
+        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
+        return a;
+    }
+    if ( bExp == 0 ) {
+        if ( bSig == 0 ) {
+ invalid:
+            float_raise( float_flag_invalid );
+            z.low = floatx80_default_nan_low;
+            z.high = floatx80_default_nan_high;
+            return z;
+        }
+        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
+    }
+    if ( aExp == 0 ) {
+        if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
+        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
+    }
+    bSig |= LIT64( 0x8000000000000000 );
+    zSign = aSign;
+    expDiff = aExp - bExp;
+    aSig1 = 0;
+    if ( expDiff < 0 ) {
+        if ( expDiff < -1 ) return a;
+        shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
+        expDiff = 0;
+    }
+    q = ( bSig <= aSig0 );
+    if ( q ) aSig0 -= bSig;
+    expDiff -= 64;
+    while ( 0 < expDiff ) {
+        q = estimateDiv128To64( aSig0, aSig1, bSig );
+        q = ( 2 < q ) ? q - 2 : 0;
+        mul64To128( bSig, q, &term0, &term1 );
+        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
+        shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
+        expDiff -= 62;
+    }
+    expDiff += 64;
+    if ( 0 < expDiff ) {
+        q = estimateDiv128To64( aSig0, aSig1, bSig );
+        q = ( 2 < q ) ? q - 2 : 0;
+        q >>= 64 - expDiff;
+        mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
+        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
+        shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
+        while ( le128( term0, term1, aSig0, aSig1 ) ) {
+            ++q;
+            sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
+        }
+    }
+    else {
+        term1 = 0;
+        term0 = bSig;
+    }
+    sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
+    if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
+         || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
+              && ( q & 1 ) )
+       ) {
+        aSig0 = alternateASig0;
+        aSig1 = alternateASig1;
+        zSign = ! zSign;
+    }
+    return
+        normalizeRoundAndPackFloatx80(
+            80, zSign, bExp + expDiff, aSig0, aSig1 );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns the square root of the extended double-precision floating-point
+value `a'.  The operation is performed according to the IEC/IEEE Standard
+for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+floatx80 floatx80_sqrt( floatx80 a )
+{
+    flag aSign;
+    int32 aExp, zExp;
+    bits64 aSig0, aSig1, zSig0, zSig1;
+    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
+    bits64 shiftedRem0, shiftedRem1;
+    floatx80 z;
+
+    aSig0 = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    aSign = extractFloatx80Sign( a );
+    if ( aExp == 0x7FFF ) {
+        if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a );
+        if ( ! aSign ) return a;
+        goto invalid;
+    }
+    if ( aSign ) {
+        if ( ( aExp | aSig0 ) == 0 ) return a;
+ invalid:
+        float_raise( float_flag_invalid );
+        z.low = floatx80_default_nan_low;
+        z.high = floatx80_default_nan_high;
+        return z;
+    }
+    if ( aExp == 0 ) {
+        if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
+        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
+    }
+    zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
+    zSig0 = estimateSqrt32( aExp, aSig0>>32 );
+    zSig0 <<= 31;
+    aSig1 = 0;
+    shift128Right( aSig0, 0, ( aExp & 1 ) + 2, &aSig0, &aSig1 );
+    zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0 ) + zSig0 + 4;
+    if ( 0 <= (sbits64) zSig0 ) zSig0 = LIT64( 0xFFFFFFFFFFFFFFFF );
+    shortShift128Left( aSig0, aSig1, 2, &aSig0, &aSig1 );
+    mul64To128( zSig0, zSig0, &term0, &term1 );
+    sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
+    while ( (sbits64) rem0 < 0 ) {
+        --zSig0;
+        shortShift128Left( 0, zSig0, 1, &term0, &term1 );
+        term1 |= 1;
+        add128( rem0, rem1, term0, term1, &rem0, &rem1 );
+    }
+    shortShift128Left( rem0, rem1, 63, &shiftedRem0, &shiftedRem1 );
+    zSig1 = estimateDiv128To64( shiftedRem0, shiftedRem1, zSig0 );
+    if ( (bits64) ( zSig1<<1 ) <= 10 ) {
+        if ( zSig1 == 0 ) zSig1 = 1;
+        mul64To128( zSig0, zSig1, &term1, &term2 );
+        shortShift128Left( term1, term2, 1, &term1, &term2 );
+        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
+        mul64To128( zSig1, zSig1, &term2, &term3 );
+        sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
+        while ( (sbits64) rem1 < 0 ) {
+            --zSig1;
+            shortShift192Left( 0, zSig0, zSig1, 1, &term1, &term2, &term3 );
+            term3 |= 1;
+            add192(
+                rem1, rem2, rem3, term1, term2, term3, &rem1, &rem2, &rem3 );
+        }
+        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
+    }
+    return
+        roundAndPackFloatx80(
+            floatx80_rounding_precision, 0, zExp, zSig0, zSig1 );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the extended double-precision floating-point value `a' is
+equal to the corresponding value `b', and 0 otherwise.  The comparison is
+performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag floatx80_eq( floatx80 a, floatx80 b )
+{
+
+    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+       ) {
+        if (    floatx80_is_signaling_nan( a )
+             || floatx80_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid );
+        }
+        return 0;
+    }
+    return
+           ( a.low == b.low )
+        && (    ( a.high == b.high )
+             || (    ( a.low == 0 )
+                  && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
+           );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the extended double-precision floating-point value `a' is
+less than or equal to the corresponding value `b', and 0 otherwise.  The
+comparison is performed according to the IEC/IEEE Standard for Binary
+Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag floatx80_le( floatx80 a, floatx80 b )
+{
+    flag aSign, bSign;
+
+    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+       ) {
+        float_raise( float_flag_invalid );
+        return 0;
+    }
+    aSign = extractFloatx80Sign( a );
+    bSign = extractFloatx80Sign( b );
+    if ( aSign != bSign ) {
+        return
+               aSign
+            || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+                 == 0 );
+    }
+    return
+          aSign ? le128( b.high, b.low, a.high, a.low )
+        : le128( a.high, a.low, b.high, b.low );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the extended double-precision floating-point value `a' is
+less than the corresponding value `b', and 0 otherwise.  The comparison
+is performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag floatx80_lt( floatx80 a, floatx80 b )
+{
+    flag aSign, bSign;
+
+    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+       ) {
+        float_raise( float_flag_invalid );
+        return 0;
+    }
+    aSign = extractFloatx80Sign( a );
+    bSign = extractFloatx80Sign( b );
+    if ( aSign != bSign ) {
+        return
+               aSign
+            && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+                 != 0 );
+    }
+    return
+          aSign ? lt128( b.high, b.low, a.high, a.low )
+        : lt128( a.high, a.low, b.high, b.low );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the extended double-precision floating-point value `a' is equal
+to the corresponding value `b', and 0 otherwise.  The invalid exception is
+raised if either operand is a NaN.  Otherwise, the comparison is performed
+according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag floatx80_eq_signaling( floatx80 a, floatx80 b )
+{
+
+    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+       ) {
+        float_raise( float_flag_invalid );
+        return 0;
+    }
+    return
+           ( a.low == b.low )
+        && (    ( a.high == b.high )
+             || (    ( a.low == 0 )
+                  && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
+           );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the extended double-precision floating-point value `a' is less
+than or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs
+do not cause an exception.  Otherwise, the comparison is performed according
+to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag floatx80_le_quiet( floatx80 a, floatx80 b )
+{
+    flag aSign, bSign;
+
+    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+       ) {
+        if (    floatx80_is_signaling_nan( a )
+             || floatx80_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid );
+        }
+        return 0;
+    }
+    aSign = extractFloatx80Sign( a );
+    bSign = extractFloatx80Sign( b );
+    if ( aSign != bSign ) {
+        return
+               aSign
+            || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+                 == 0 );
+    }
+    return
+          aSign ? le128( b.high, b.low, a.high, a.low )
+        : le128( a.high, a.low, b.high, b.low );
+
+}
+
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the extended double-precision floating-point value `a' is less
+than the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause
+an exception.  Otherwise, the comparison is performed according to the
+IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag floatx80_lt_quiet( floatx80 a, floatx80 b )
+{
+    flag aSign, bSign;
+
+    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+       ) {
+        if (    floatx80_is_signaling_nan( a )
+             || floatx80_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid );
+        }
+        return 0;
+    }
+    aSign = extractFloatx80Sign( a );
+    bSign = extractFloatx80Sign( b );
+    if ( aSign != bSign ) {
+        return
+               aSign
+            && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+                 != 0 );
+    }
+    return
+          aSign ? lt128( b.high, b.low, a.high, a.low )
+        : lt128( a.high, a.low, b.high, b.low );
+
+}
+
+#endif
+
diff --git a/arch/arm26/nwfpe/softfloat.h b/arch/arm26/nwfpe/softfloat.h
new file mode 100644
index 00000000000..22c2193a499
--- /dev/null
+++ b/arch/arm26/nwfpe/softfloat.h
@@ -0,0 +1,232 @@
+
+/*
+===============================================================================
+
+This C header file is part of the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
+arithmetic/softfloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
+has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
+TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
+PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
+AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) they include prominent notice that the work is derivative, and (2) they
+include prominent notice akin to these three paragraphs for those parts of
+this code that are retained.
+
+===============================================================================
+*/
+
+#ifndef __SOFTFLOAT_H__
+#define __SOFTFLOAT_H__
+
+/*
+-------------------------------------------------------------------------------
+The macro `FLOATX80' must be defined to enable the extended double-precision
+floating-point format `floatx80'.  If this macro is not defined, the
+`floatx80' type will not be defined, and none of the functions that either
+input or output the `floatx80' type will be defined.
+-------------------------------------------------------------------------------
+*/
+#define FLOATX80
+
+/*
+-------------------------------------------------------------------------------
+Software IEC/IEEE floating-point types.
+-------------------------------------------------------------------------------
+*/
+typedef unsigned long int float32;
+typedef unsigned long long float64;
+typedef struct {
+    unsigned short high;
+    unsigned long long low;
+} floatx80;
+
+/*
+-------------------------------------------------------------------------------
+Software IEC/IEEE floating-point underflow tininess-detection mode.
+-------------------------------------------------------------------------------
+*/
+extern signed char float_detect_tininess;
+enum {
+    float_tininess_after_rounding  = 0,
+    float_tininess_before_rounding = 1
+};
+
+/*
+-------------------------------------------------------------------------------
+Software IEC/IEEE floating-point rounding mode.
+-------------------------------------------------------------------------------
+*/
+extern signed char float_rounding_mode;
+enum {
+    float_round_nearest_even = 0,
+    float_round_to_zero      = 1,
+    float_round_down         = 2,
+    float_round_up           = 3
+};
+
+/*
+-------------------------------------------------------------------------------
+Software IEC/IEEE floating-point exception flags.
+-------------------------------------------------------------------------------
+extern signed char float_exception_flags;
+enum {
+    float_flag_inexact   =  1,
+    float_flag_underflow =  2,
+    float_flag_overflow  =  4,
+    float_flag_divbyzero =  8,
+    float_flag_invalid   = 16
+};
+
+ScottB: November 4, 1998
+Changed the enumeration to match the bit order in the FPA11.
+*/
+
+extern signed char float_exception_flags;
+enum {
+    float_flag_invalid   =  1,
+    float_flag_divbyzero =  2,
+    float_flag_overflow  =  4,
+    float_flag_underflow =  8,
+    float_flag_inexact   = 16
+};
+
+/*
+-------------------------------------------------------------------------------
+Routine to raise any or all of the software IEC/IEEE floating-point
+exception flags.
+-------------------------------------------------------------------------------
+*/
+void float_raise( signed char );
+
+/*
+-------------------------------------------------------------------------------
+Software IEC/IEEE integer-to-floating-point conversion routines.
+-------------------------------------------------------------------------------
+*/
+float32 int32_to_float32( signed int );
+float64 int32_to_float64( signed int );
+#ifdef FLOATX80
+floatx80 int32_to_floatx80( signed int );
+#endif
+
+/*
+-------------------------------------------------------------------------------
+Software IEC/IEEE single-precision conversion routines.
+-------------------------------------------------------------------------------
+*/
+signed int float32_to_int32( float32 );
+signed int float32_to_int32_round_to_zero( float32 );
+float64 float32_to_float64( float32 );
+#ifdef FLOATX80
+floatx80 float32_to_floatx80( float32 );
+#endif
+
+/*
+-------------------------------------------------------------------------------
+Software IEC/IEEE single-precision operations.
+-------------------------------------------------------------------------------
+*/
+float32 float32_round_to_int( float32 );
+float32 float32_add( float32, float32 );
+float32 float32_sub( float32, float32 );
+float32 float32_mul( float32, float32 );
+float32 float32_div( float32, float32 );
+float32 float32_rem( float32, float32 );
+float32 float32_sqrt( float32 );
+char float32_eq( float32, float32 );
+char float32_le( float32, float32 );
+char float32_lt( float32, float32 );
+char float32_eq_signaling( float32, float32 );
+char float32_le_quiet( float32, float32 );
+char float32_lt_quiet( float32, float32 );
+char float32_is_signaling_nan( float32 );
+
+/*
+-------------------------------------------------------------------------------
+Software IEC/IEEE double-precision conversion routines.
+-------------------------------------------------------------------------------
+*/
+signed int float64_to_int32( float64 );
+signed int float64_to_int32_round_to_zero( float64 );
+float32 float64_to_float32( float64 );
+#ifdef FLOATX80
+floatx80 float64_to_floatx80( float64 );
+#endif
+
+/*
+-------------------------------------------------------------------------------
+Software IEC/IEEE double-precision operations.
+-------------------------------------------------------------------------------
+*/
+float64 float64_round_to_int( float64 );
+float64 float64_add( float64, float64 );
+float64 float64_sub( float64, float64 );
+float64 float64_mul( float64, float64 );
+float64 float64_div( float64, float64 );
+float64 float64_rem( float64, float64 );
+float64 float64_sqrt( float64 );
+char float64_eq( float64, float64 );
+char float64_le( float64, float64 );
+char float64_lt( float64, float64 );
+char float64_eq_signaling( float64, float64 );
+char float64_le_quiet( float64, float64 );
+char float64_lt_quiet( float64, float64 );
+char float64_is_signaling_nan( float64 );
+
+#ifdef FLOATX80
+
+/*
+-------------------------------------------------------------------------------
+Software IEC/IEEE extended double-precision conversion routines.
+-------------------------------------------------------------------------------
+*/
+signed int floatx80_to_int32( floatx80 );
+signed int floatx80_to_int32_round_to_zero( floatx80 );
+float32 floatx80_to_float32( floatx80 );
+float64 floatx80_to_float64( floatx80 );
+
+/*
+-------------------------------------------------------------------------------
+Software IEC/IEEE extended double-precision rounding precision.  Valid
+values are 32, 64, and 80.
+-------------------------------------------------------------------------------
+*/
+extern signed char floatx80_rounding_precision;
+
+/*
+-------------------------------------------------------------------------------
+Software IEC/IEEE extended double-precision operations.
+-------------------------------------------------------------------------------
+*/
+floatx80 floatx80_round_to_int( floatx80 );
+floatx80 floatx80_add( floatx80, floatx80 );
+floatx80 floatx80_sub( floatx80, floatx80 );
+floatx80 floatx80_mul( floatx80, floatx80 );
+floatx80 floatx80_div( floatx80, floatx80 );
+floatx80 floatx80_rem( floatx80, floatx80 );
+floatx80 floatx80_sqrt( floatx80 );
+char floatx80_eq( floatx80, floatx80 );
+char floatx80_le( floatx80, floatx80 );
+char floatx80_lt( floatx80, floatx80 );
+char floatx80_eq_signaling( floatx80, floatx80 );
+char floatx80_le_quiet( floatx80, floatx80 );
+char floatx80_lt_quiet( floatx80, floatx80 );
+char floatx80_is_signaling_nan( floatx80 );
+
+#endif
+
+#endif